clusterkit 0.3.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.simplecov +47 -0
  4. data/CHANGELOG.md +35 -0
  5. data/CLAUDE.md +226 -0
  6. data/Cargo.lock +3228 -0
  7. data/Cargo.toml +8 -0
  8. data/Gemfile +17 -0
  9. data/IMPLEMENTATION_NOTES.md +143 -0
  10. data/LICENSE.txt +21 -0
  11. data/PYTHON_COMPARISON.md +183 -0
  12. data/README.md +744 -0
  13. data/Rakefile +259 -0
  14. data/docs/KNOWN_ISSUES.md +130 -0
  15. data/docs/RUST_ERROR_HANDLING.md +164 -0
  16. data/docs/TEST_FIXTURES.md +170 -0
  17. data/docs/UMAP_EXPLAINED.md +362 -0
  18. data/docs/UMAP_TROUBLESHOOTING.md +284 -0
  19. data/docs/VERBOSE_OUTPUT.md +84 -0
  20. data/docs/assets/clusterkit-wide.png +0 -0
  21. data/docs/assets/clusterkit.png +0 -0
  22. data/docs/assets/visualization.png +0 -0
  23. data/examples/hdbscan_example.rb +147 -0
  24. data/examples/optimal_kmeans_example.rb +96 -0
  25. data/examples/pca_example.rb +114 -0
  26. data/examples/reproducible_umap.rb +99 -0
  27. data/examples/verbose_control.rb +43 -0
  28. data/ext/clusterkit/Cargo.toml +26 -0
  29. data/ext/clusterkit/extconf.rb +23 -0
  30. data/ext/clusterkit/src/clustering/hdbscan_wrapper.rs +80 -0
  31. data/ext/clusterkit/src/clustering.rs +221 -0
  32. data/ext/clusterkit/src/embedder.rs +349 -0
  33. data/ext/clusterkit/src/hnsw.rs +579 -0
  34. data/ext/clusterkit/src/lib.rs +24 -0
  35. data/ext/clusterkit/src/svd.rs +89 -0
  36. data/ext/clusterkit/src/tests.rs +16 -0
  37. data/ext/clusterkit/src/utils.rs +183 -0
  38. data/lib/clusterkit/3.1/clusterkit.bundle +0 -0
  39. data/lib/clusterkit/3.2/clusterkit.bundle +0 -0
  40. data/lib/clusterkit/3.3/clusterkit.bundle +0 -0
  41. data/lib/clusterkit/3.4/clusterkit.bundle +0 -0
  42. data/lib/clusterkit/clustering/hdbscan.rb +164 -0
  43. data/lib/clusterkit/clustering.rb +194 -0
  44. data/lib/clusterkit/clusterkit.rb +14 -0
  45. data/lib/clusterkit/configuration.rb +24 -0
  46. data/lib/clusterkit/data_validator.rb +132 -0
  47. data/lib/clusterkit/dimensionality/pca.rb +251 -0
  48. data/lib/clusterkit/dimensionality/svd.rb +175 -0
  49. data/lib/clusterkit/dimensionality/umap.rb +282 -0
  50. data/lib/clusterkit/dimensionality.rb +29 -0
  51. data/lib/clusterkit/hdbscan_api_design.rb +142 -0
  52. data/lib/clusterkit/hnsw.rb +251 -0
  53. data/lib/clusterkit/preprocessing.rb +106 -0
  54. data/lib/clusterkit/silence.rb +42 -0
  55. data/lib/clusterkit/utils.rb +51 -0
  56. data/lib/clusterkit/version.rb +5 -0
  57. data/lib/clusterkit.rb +105 -0
  58. data/lib/tasks/visualize.rake +641 -0
  59. metadata +214 -0
metadata ADDED
@@ -0,0 +1,214 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clusterkit
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: arm64-darwin
6
+ authors:
7
+ - Chris Petersen
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rb_sys
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '0.9'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '0.9'
26
+ - !ruby/object:Gem::Dependency
27
+ name: benchmark
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: csv
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: rake
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '13.0'
61
+ type: :development
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '13.0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: rake-compiler
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.2'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.2'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rspec
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '3.0'
96
+ - !ruby/object:Gem::Dependency
97
+ name: simplecov
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '0.22'
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '0.22'
110
+ - !ruby/object:Gem::Dependency
111
+ name: yard
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '0.9'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.9'
124
+ description: A comprehensive clustering toolkit for Ruby, providing UMAP, PCA, K-means,
125
+ HDBSCAN and more. Built on top of annembed and hdbscan Rust crates for blazing-fast
126
+ performance.
127
+ email:
128
+ - chris@petersen.io
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".rspec"
134
+ - ".simplecov"
135
+ - CHANGELOG.md
136
+ - CLAUDE.md
137
+ - Cargo.lock
138
+ - Cargo.toml
139
+ - Gemfile
140
+ - IMPLEMENTATION_NOTES.md
141
+ - LICENSE.txt
142
+ - PYTHON_COMPARISON.md
143
+ - README.md
144
+ - Rakefile
145
+ - docs/KNOWN_ISSUES.md
146
+ - docs/RUST_ERROR_HANDLING.md
147
+ - docs/TEST_FIXTURES.md
148
+ - docs/UMAP_EXPLAINED.md
149
+ - docs/UMAP_TROUBLESHOOTING.md
150
+ - docs/VERBOSE_OUTPUT.md
151
+ - docs/assets/clusterkit-wide.png
152
+ - docs/assets/clusterkit.png
153
+ - docs/assets/visualization.png
154
+ - examples/hdbscan_example.rb
155
+ - examples/optimal_kmeans_example.rb
156
+ - examples/pca_example.rb
157
+ - examples/reproducible_umap.rb
158
+ - examples/verbose_control.rb
159
+ - ext/clusterkit/Cargo.toml
160
+ - ext/clusterkit/extconf.rb
161
+ - ext/clusterkit/src/clustering.rs
162
+ - ext/clusterkit/src/clustering/hdbscan_wrapper.rs
163
+ - ext/clusterkit/src/embedder.rs
164
+ - ext/clusterkit/src/hnsw.rs
165
+ - ext/clusterkit/src/lib.rs
166
+ - ext/clusterkit/src/svd.rs
167
+ - ext/clusterkit/src/tests.rs
168
+ - ext/clusterkit/src/utils.rs
169
+ - lib/clusterkit.rb
170
+ - lib/clusterkit/3.1/clusterkit.bundle
171
+ - lib/clusterkit/3.2/clusterkit.bundle
172
+ - lib/clusterkit/3.3/clusterkit.bundle
173
+ - lib/clusterkit/3.4/clusterkit.bundle
174
+ - lib/clusterkit/clustering.rb
175
+ - lib/clusterkit/clustering/hdbscan.rb
176
+ - lib/clusterkit/clusterkit.rb
177
+ - lib/clusterkit/configuration.rb
178
+ - lib/clusterkit/data_validator.rb
179
+ - lib/clusterkit/dimensionality.rb
180
+ - lib/clusterkit/dimensionality/pca.rb
181
+ - lib/clusterkit/dimensionality/svd.rb
182
+ - lib/clusterkit/dimensionality/umap.rb
183
+ - lib/clusterkit/hdbscan_api_design.rb
184
+ - lib/clusterkit/hnsw.rb
185
+ - lib/clusterkit/preprocessing.rb
186
+ - lib/clusterkit/silence.rb
187
+ - lib/clusterkit/utils.rb
188
+ - lib/clusterkit/version.rb
189
+ - lib/tasks/visualize.rake
190
+ homepage: https://github.com/scientist-labs/clusterkit
191
+ licenses:
192
+ - MIT
193
+ metadata:
194
+ homepage_uri: https://github.com/scientist-labs/clusterkit
195
+ source_code_uri: https://github.com/scientist-labs/clusterkit
196
+ changelog_uri: https://github.com/scientist-labs/clusterkit/blob/main/CHANGELOG.md
197
+ rdoc_options: []
198
+ require_paths:
199
+ - lib
200
+ required_ruby_version: !ruby/object:Gem::Requirement
201
+ requirements:
202
+ - - ">="
203
+ - !ruby/object:Gem::Version
204
+ version: 2.7.0
205
+ required_rubygems_version: !ruby/object:Gem::Requirement
206
+ requirements:
207
+ - - ">="
208
+ - !ruby/object:Gem::Version
209
+ version: '0'
210
+ requirements: []
211
+ rubygems_version: 3.6.9
212
+ specification_version: 4
213
+ summary: High-performance clustering and dimensionality reduction for Ruby
214
+ test_files: []