clusterkit 0.1.0.pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.simplecov +47 -0
  4. data/CHANGELOG.md +35 -0
  5. data/CLAUDE.md +226 -0
  6. data/Cargo.toml +8 -0
  7. data/Gemfile +17 -0
  8. data/IMPLEMENTATION_NOTES.md +143 -0
  9. data/LICENSE.txt +21 -0
  10. data/PYTHON_COMPARISON.md +183 -0
  11. data/README.md +499 -0
  12. data/Rakefile +245 -0
  13. data/clusterkit.gemspec +45 -0
  14. data/docs/KNOWN_ISSUES.md +130 -0
  15. data/docs/RUST_ERROR_HANDLING.md +164 -0
  16. data/docs/TEST_FIXTURES.md +170 -0
  17. data/docs/UMAP_EXPLAINED.md +362 -0
  18. data/docs/UMAP_TROUBLESHOOTING.md +284 -0
  19. data/docs/VERBOSE_OUTPUT.md +84 -0
  20. data/examples/hdbscan_example.rb +147 -0
  21. data/examples/optimal_kmeans_example.rb +96 -0
  22. data/examples/pca_example.rb +114 -0
  23. data/examples/reproducible_umap.rb +99 -0
  24. data/examples/verbose_control.rb +43 -0
  25. data/ext/clusterkit/Cargo.toml +25 -0
  26. data/ext/clusterkit/extconf.rb +4 -0
  27. data/ext/clusterkit/src/clustering/hdbscan_wrapper.rs +115 -0
  28. data/ext/clusterkit/src/clustering.rs +267 -0
  29. data/ext/clusterkit/src/embedder.rs +413 -0
  30. data/ext/clusterkit/src/lib.rs +22 -0
  31. data/ext/clusterkit/src/svd.rs +112 -0
  32. data/ext/clusterkit/src/tests.rs +16 -0
  33. data/ext/clusterkit/src/utils.rs +33 -0
  34. data/lib/clusterkit/clustering/hdbscan.rb +177 -0
  35. data/lib/clusterkit/clustering.rb +213 -0
  36. data/lib/clusterkit/clusterkit.rb +9 -0
  37. data/lib/clusterkit/configuration.rb +24 -0
  38. data/lib/clusterkit/dimensionality/pca.rb +251 -0
  39. data/lib/clusterkit/dimensionality/svd.rb +144 -0
  40. data/lib/clusterkit/dimensionality/umap.rb +311 -0
  41. data/lib/clusterkit/dimensionality.rb +29 -0
  42. data/lib/clusterkit/hdbscan_api_design.rb +142 -0
  43. data/lib/clusterkit/preprocessing.rb +106 -0
  44. data/lib/clusterkit/silence.rb +42 -0
  45. data/lib/clusterkit/utils.rb +51 -0
  46. data/lib/clusterkit/version.rb +5 -0
  47. data/lib/clusterkit.rb +93 -0
  48. data/lib/tasks/visualize.rake +641 -0
  49. metadata +194 -0
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clusterkit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Petersen
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2025-08-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '13.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.2'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rb_sys
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '3.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: simplecov
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.22'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.22'
97
+ - !ruby/object:Gem::Dependency
98
+ name: yard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '0.9'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '0.9'
111
+ description: A comprehensive clustering toolkit for Ruby, providing UMAP, PCA, K-means,
112
+ HDBSCAN and more. Built on top of annembed and hdbscan Rust crates for blazing-fast
113
+ performance.
114
+ email:
115
+ - chris@petersen.io
116
+ executables: []
117
+ extensions:
118
+ - ext/clusterkit/extconf.rb
119
+ extra_rdoc_files: []
120
+ files:
121
+ - ".rspec"
122
+ - ".simplecov"
123
+ - CHANGELOG.md
124
+ - CLAUDE.md
125
+ - Cargo.toml
126
+ - Gemfile
127
+ - IMPLEMENTATION_NOTES.md
128
+ - LICENSE.txt
129
+ - PYTHON_COMPARISON.md
130
+ - README.md
131
+ - Rakefile
132
+ - clusterkit.gemspec
133
+ - docs/KNOWN_ISSUES.md
134
+ - docs/RUST_ERROR_HANDLING.md
135
+ - docs/TEST_FIXTURES.md
136
+ - docs/UMAP_EXPLAINED.md
137
+ - docs/UMAP_TROUBLESHOOTING.md
138
+ - docs/VERBOSE_OUTPUT.md
139
+ - examples/hdbscan_example.rb
140
+ - examples/optimal_kmeans_example.rb
141
+ - examples/pca_example.rb
142
+ - examples/reproducible_umap.rb
143
+ - examples/verbose_control.rb
144
+ - ext/clusterkit/Cargo.toml
145
+ - ext/clusterkit/extconf.rb
146
+ - ext/clusterkit/src/clustering.rs
147
+ - ext/clusterkit/src/clustering/hdbscan_wrapper.rs
148
+ - ext/clusterkit/src/embedder.rs
149
+ - ext/clusterkit/src/lib.rs
150
+ - ext/clusterkit/src/svd.rs
151
+ - ext/clusterkit/src/tests.rs
152
+ - ext/clusterkit/src/utils.rs
153
+ - lib/clusterkit.rb
154
+ - lib/clusterkit/clustering.rb
155
+ - lib/clusterkit/clustering/hdbscan.rb
156
+ - lib/clusterkit/clusterkit.rb
157
+ - lib/clusterkit/configuration.rb
158
+ - lib/clusterkit/dimensionality.rb
159
+ - lib/clusterkit/dimensionality/pca.rb
160
+ - lib/clusterkit/dimensionality/svd.rb
161
+ - lib/clusterkit/dimensionality/umap.rb
162
+ - lib/clusterkit/hdbscan_api_design.rb
163
+ - lib/clusterkit/preprocessing.rb
164
+ - lib/clusterkit/silence.rb
165
+ - lib/clusterkit/utils.rb
166
+ - lib/clusterkit/version.rb
167
+ - lib/tasks/visualize.rake
168
+ homepage: https://github.com/cpetersen/clusterkit
169
+ licenses:
170
+ - MIT
171
+ metadata:
172
+ homepage_uri: https://github.com/cpetersen/clusterkit
173
+ source_code_uri: https://github.com/cpetersen/clusterkit
174
+ changelog_uri: https://github.com/cpetersen/clusterkit/blob/main/CHANGELOG.md
175
+ post_install_message:
176
+ rdoc_options: []
177
+ require_paths:
178
+ - lib
179
+ required_ruby_version: !ruby/object:Gem::Requirement
180
+ requirements:
181
+ - - ">="
182
+ - !ruby/object:Gem::Version
183
+ version: 2.7.0
184
+ required_rubygems_version: !ruby/object:Gem::Requirement
185
+ requirements:
186
+ - - ">="
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ requirements: []
190
+ rubygems_version: 3.5.3
191
+ signing_key:
192
+ specification_version: 4
193
+ summary: High-performance clustering and dimensionality reduction for Ruby
194
+ test_files: []