clusterkit 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative '../clusterkit'
4
+ require_relative '../data_validator'
4
5
 
5
6
  module ClusterKit
6
7
  module Dimensionality
@@ -8,7 +9,7 @@ module ClusterKit
8
9
  # Decomposes a matrix into U, S, V^T components
9
10
  class SVD
10
11
  attr_reader :n_components, :n_iter, :random_seed
11
- attr_reader :u, :s, :vt
12
+ attr_reader :u, :s, :vt, :n_features
12
13
 
13
14
  # Initialize a new SVD instance
14
15
  # @param n_components [Integer] Number of components to compute
@@ -27,7 +28,8 @@ module ClusterKit
27
28
  def fit_transform(data)
28
29
  validate_input(data)
29
30
 
30
- # Store reference to original data for transform detection
31
+ # Store data characteristics for later transform operations
32
+ @n_features = data.first.size
31
33
  @original_data_id = data.object_id
32
34
 
33
35
  # Determine n_components if not set
@@ -77,26 +79,21 @@ module ClusterKit
77
79
 
78
80
  # Transform data using fitted SVD (project onto components)
79
81
  # @param data [Array<Array<Numeric>>] Data to transform
80
- # @return [Array<Array<Float>>] Transformed data (U * S)
82
+ # @return [Array<Array<Float>>] Transformed data projected onto SVD components
81
83
  def transform(data)
82
84
  raise RuntimeError, "Model must be fitted first" unless fitted?
83
- validate_input(data)
84
-
85
- # For SVD, transform typically means projecting onto the components
86
- # This is equivalent to data * V (or data * V^T.T)
87
- # But for dimensionality reduction, we usually want U * S
88
- # which is already computed in fit_transform
85
+ validate_transform_input(data)
89
86
 
90
- # If transforming new data, we'd need to project it
91
- # For now, return U * S for the fitted data
92
87
  if data.object_id == @original_data_id
93
88
  # Same data that was fitted - return U * S
94
89
  @u.map.with_index do |row, i|
95
90
  row.map.with_index { |val, j| val * @s[j] }
96
91
  end
97
92
  else
98
- # New data - would need proper projection
99
- raise NotImplementedError, "Transform for new data not yet implemented"
93
+ # New data - project onto V components: data × V
94
+ # Since we have V^T, we need to transpose it back to V
95
+ # V = V^T^T, so we project: data × V^T^T
96
+ transform_new_data(data)
100
97
  end
101
98
  end
102
99
 
@@ -135,9 +132,43 @@ module ClusterKit
135
132
  private
136
133
 
137
134
  def validate_input(data)
138
- raise ArgumentError, "Input must be an array" unless data.is_a?(Array)
139
- raise ArgumentError, "Input cannot be empty" if data.empty?
140
- raise ArgumentError, "Input must be a 2D array" unless data.first.is_a?(Array)
135
+ DataValidator.validate_standard(data, check_finite: false)
136
+ end
137
+
138
+ def validate_transform_input(data)
139
+ DataValidator.validate_standard(data, check_finite: false)
140
+
141
+ # Check feature count matches training data
142
+ if data.first.size != @n_features
143
+ raise ArgumentError, "New data has #{data.first.size} features, but model was fitted with #{@n_features} features"
144
+ end
145
+ end
146
+
147
+ # Transform new data by projecting onto V components
148
+ # Mathematical operation: new_data × V, where V = V^T^T
149
+ def transform_new_data(data)
150
+ # V^T is stored as @vt (shape: n_components × n_features)
151
+ # We need V (shape: n_features × n_components)
152
+ # V = V^T^T, so we transpose @vt
153
+
154
+ result = []
155
+ data.each do |sample|
156
+ # Project sample onto each component (column of V = row of V^T)
157
+ projected = Array.new(@vt.size, 0.0)
158
+
159
+ @vt.each_with_index do |vt_row, comp_idx|
160
+ # Dot product: sample · vt_row (this is sample · V[:, comp_idx])
161
+ dot_product = 0.0
162
+ sample.each_with_index do |val, feat_idx|
163
+ dot_product += val * vt_row[feat_idx]
164
+ end
165
+ projected[comp_idx] = dot_product
166
+ end
167
+
168
+ result << projected
169
+ end
170
+
171
+ result
141
172
  end
142
173
  end
143
174
  end
@@ -4,6 +4,7 @@ require 'fileutils'
4
4
  require 'json'
5
5
  require_relative '../configuration'
6
6
  require_relative '../silence'
7
+ require_relative '../data_validator'
7
8
 
8
9
  module ClusterKit
9
10
  module Dimensionality
@@ -224,44 +225,10 @@ module ClusterKit
224
225
  end
225
226
 
226
227
  def validate_input(data, check_min_samples: true)
227
- raise ArgumentError, "Input must be an array" unless data.is_a?(Array)
228
- raise ArgumentError, "Input cannot be empty" if data.empty?
228
+ # Use shared validation for common checks
229
+ DataValidator.validate_standard(data)
229
230
 
230
- first_row = data.first
231
- raise ArgumentError, "Input must be a 2D array (array of arrays)" unless first_row.is_a?(Array)
232
-
233
- row_length = first_row.length
234
- min_val = Float::INFINITY
235
- max_val = -Float::INFINITY
236
-
237
- # First validate data structure and types
238
- data.each_with_index do |row, i|
239
- unless row.is_a?(Array)
240
- raise ArgumentError, "Row #{i} is not an array"
241
- end
242
-
243
- if row.length != row_length
244
- raise ArgumentError, "All rows must have the same length (row #{i} has #{row.length} elements, expected #{row_length})"
245
- end
246
-
247
- row.each_with_index do |val, j|
248
- unless val.is_a?(Numeric)
249
- raise ArgumentError, "Element at position [#{i}, #{j}] is not numeric"
250
- end
251
-
252
- # Only check for NaN/Infinite on floats
253
- if val.is_a?(Float) && (val.nan? || val.infinite?)
254
- raise ArgumentError, "Element at position [#{i}, #{j}] is NaN or Infinite"
255
- end
256
-
257
- # Track data range
258
- val_f = val.to_f
259
- min_val = val_f if val_f < min_val
260
- max_val = val_f if val_f > max_val
261
- end
262
- end
263
-
264
- # Check for sufficient data points after validating structure (only for fit operations)
231
+ # UMAP-specific validations
265
232
  if check_min_samples && data.size < 10
266
233
  raise ::ClusterKit::InsufficientDataError, <<~MSG
267
234
  UMAP requires at least 10 data points, but only #{data.size} provided.
@@ -274,9 +241,9 @@ module ClusterKit
274
241
  end
275
242
 
276
243
  # Check for extreme data ranges that might cause numerical issues
277
- data_range = max_val - min_val
278
- if data_range > 1000
279
- warn "WARNING: Large data range detected (#{data_range.round(2)}). Consider normalizing your data to prevent numerical instability."
244
+ stats = DataValidator.data_statistics(data)
245
+ if stats[:data_range] > 1000
246
+ warn "WARNING: Large data range detected (#{stats[:data_range].round(2)}). Consider normalizing your data to prevent numerical instability."
280
247
  end
281
248
  end
282
249
 
@@ -0,0 +1,251 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ClusterKit
4
+ # HNSW (Hierarchical Navigable Small World) index for fast approximate nearest neighbor search
5
+ #
6
+ # @example Basic usage
7
+ # index = ClusterKit::HNSW.new(dim: 128, space: :euclidean)
8
+ # index.add_batch(vectors, labels: labels)
9
+ # neighbors = index.search(query_vector, k: 10)
10
+ #
11
+ # @example With metadata
12
+ # index = ClusterKit::HNSW.new(dim: 768, space: :cosine)
13
+ # index.add_item(vector, label: "doc_1", metadata: { title: "Introduction", date: "2024-01-01" })
14
+ # results = index.search_with_metadata(query, k: 5)
15
+ # # => [{ label: "doc_1", distance: 0.23, metadata: { title: "...", date: "..." } }, ...]
16
+ class HNSW
17
+ # Note: The actual HNSW class is defined in Rust (ext/clusterkit/src/hnsw.rs)
18
+ # This Ruby file adds additional convenience methods and documentation.
19
+ # The Rust implementation provides these core methods:
20
+ # - new(kwargs) - constructor
21
+ # - add_item(vector, kwargs) - add single item
22
+ # - add_batch(vectors, kwargs) - add multiple items
23
+ # - search(query, kwargs) - search for neighbors
24
+ # - search_with_metadata(query, kwargs) - search with metadata
25
+ # - size() - get number of items
26
+ # - config() - get configuration
27
+ # - stats() - get statistics
28
+ # - set_ef(ef) - set search quality parameter
29
+ # - save(path) - save to file
30
+
31
+ # Initialize is actually handled by the Rust code
32
+ # This documentation is for reference
33
+ #
34
+ # @param dim [Integer] Dimension of vectors (required)
35
+ # @param space [Symbol] Distance metric: :euclidean, :cosine, or :inner_product (default: :euclidean)
36
+ # @param max_elements [Integer] Maximum number of elements (default: 10_000)
37
+ # @param m [Integer] Number of bi-directional links (default: 16)
38
+ # @param ef_construction [Integer] Size of dynamic candidate list (default: 200)
39
+ # @param random_seed [Integer, nil] Random seed for reproducible builds (default: nil)
40
+ # @param dynamic_list [Boolean] Allow index to grow dynamically (not yet implemented)
41
+
42
+ # Fit the index with training data (alias for add_batch)
43
+ #
44
+ # @param data [Array<Array>, Numo::NArray] Training vectors
45
+ # @param labels [Array, nil] Optional labels for vectors
46
+ # @return [self]
47
+ def fit(data, labels: nil)
48
+ add_batch(data, labels: labels)
49
+ self
50
+ end
51
+
52
+ # Fit and return transformed data (for compatibility with sklearn-like interface)
53
+ #
54
+ # @param data [Array<Array>, Numo::NArray] Training vectors
55
+ # @return [self]
56
+ def fit_transform(data)
57
+ fit(data)
58
+ self
59
+ end
60
+
61
+ # Add a vector using the << operator
62
+ #
63
+ # @param vector [Array, Numo::NArray] Vector to add
64
+ # @return [self]
65
+ def <<(vector)
66
+ add_item(vector, {})
67
+ self
68
+ end
69
+
70
+ # Alias for search that always includes distances
71
+ #
72
+ # @param query [Array, Numo::NArray] Query vector
73
+ # @param k [Integer] Number of neighbors
74
+ # @param ef [Integer, nil] Search parameter (higher = better quality, slower)
75
+ # @return [Array<Array>] Array of [indices, distances]
76
+ def knn_query(query, k: 10, ef: nil)
77
+ search(query, k: k, ef: ef, include_distances: true)
78
+ end
79
+
80
+ # Batch search for multiple queries
81
+ #
82
+ # @param queries [Array<Array>, Numo::NArray] Multiple query vectors
83
+ # @param k [Integer] Number of neighbors per query
84
+ # @param parallel [Boolean] Process queries in parallel
85
+ # @return [Array<Array>] Results for each query
86
+ def batch_search(queries, k: 10, parallel: true)
87
+ queries = ensure_array(queries)
88
+
89
+ if parallel && queries.size > 1
90
+ require 'parallel'
91
+ Parallel.map(queries) { |query| search(query, k: k) }
92
+ else
93
+ queries.map { |query| search(query, k: k) }
94
+ end
95
+ rescue LoadError
96
+ # Parallel gem not available, fall back to sequential
97
+ queries.map { |query| search(query, k: k) }
98
+ end
99
+
100
+ # Range search - find all points within a given radius
101
+ #
102
+ # @param query [Array, Numo::NArray] Query vector
103
+ # @param radius [Float] Search radius
104
+ # @param limit [Integer, nil] Maximum number of results
105
+ # @return [Array<Hash>] Results within radius
106
+ def range_search(query, radius:, limit: nil)
107
+ # Get a large number of candidates
108
+ k = limit || size
109
+ k = [k, size].min
110
+
111
+ results = search_with_metadata(query, k: k)
112
+
113
+ # Filter by radius
114
+ results.select { |r| r[:distance] <= radius }
115
+ .take(limit || results.size)
116
+ end
117
+
118
+ # Check if index is empty
119
+ # @return [Boolean]
120
+ def empty?
121
+ size == 0
122
+ end
123
+
124
+ # Clear all elements from the index
125
+ #
126
+ # @return [self]
127
+ def clear!
128
+ # Would need to recreate the index
129
+ raise NotImplementedError, "Clear not yet implemented"
130
+ end
131
+
132
+ # Check if a label exists in the index
133
+ #
134
+ # @param label [String, Integer] Label to check
135
+ # @return [Boolean]
136
+ def include?(label)
137
+ # This would need to be implemented in Rust
138
+ # For now, return false
139
+ false
140
+ end
141
+
142
+ # Get recall rate for a test set
143
+ #
144
+ # @param test_queries [Array<Array>] Query vectors
145
+ # @param ground_truth [Array<Array>] True nearest neighbors for each query
146
+ # @param k [Integer] Number of neighbors to evaluate
147
+ # @return [Float] Recall rate (0.0 to 1.0)
148
+ def recall(test_queries, ground_truth, k: 10)
149
+ test_queries = ensure_array(test_queries)
150
+
151
+ require 'set'
152
+ total_correct = 0
153
+ total_possible = 0
154
+
155
+ test_queries.each_with_index do |query, i|
156
+ predicted = Set.new(search(query, k: k))
157
+ actual = Set.new(ground_truth[i].take(k))
158
+
159
+ total_correct += (predicted & actual).size
160
+ total_possible += [k, actual.size].min
161
+ end
162
+
163
+ total_possible > 0 ? total_correct.to_f / total_possible : 0.0
164
+ end
165
+
166
+ # Load an index from file
167
+ # Note: This uses Box::leak internally to work around hnsw_rs lifetime constraints
168
+ # This causes a small memory leak - the HnswIo struct won't be freed until program exit
169
+ #
170
+ # @param path [String] File path to load from
171
+ # @return [HNSW] New HNSW instance loaded from file
172
+ # (The actual implementation is in Rust)
173
+
174
+ # Create an index from embeddings produced by UMAP or other dimensionality reduction
175
+ #
176
+ # @param embeddings [Array<Array>, Numo::NArray] Embedding vectors
177
+ # @param kwargs [Hash] Additional options for HNSW initialization
178
+ # @return [HNSW] New HNSW instance
179
+ def self.from_embedding(embeddings, **kwargs)
180
+ embeddings = ensure_array(embeddings)
181
+
182
+ dim = embeddings.first.size
183
+ index = new(dim: dim, **kwargs)
184
+ index.fit(embeddings)
185
+ index
186
+ end
187
+
188
+ # Builder pattern for creating HNSW indices
189
+ class Builder
190
+ def initialize
191
+ @config = {}
192
+ end
193
+
194
+ def space(type)
195
+ @config[:space] = type
196
+ self
197
+ end
198
+
199
+ def dimensions(dim)
200
+ @config[:dim] = dim
201
+ self
202
+ end
203
+
204
+ def max_elements(n)
205
+ @config[:max_elements] = n
206
+ self
207
+ end
208
+
209
+ def m_parameter(m)
210
+ @config[:m] = m
211
+ self
212
+ end
213
+
214
+ def ef_construction(ef)
215
+ @config[:ef_construction] = ef
216
+ self
217
+ end
218
+
219
+ def seed(seed)
220
+ @config[:random_seed] = seed
221
+ self
222
+ end
223
+
224
+ def build
225
+ HNSW.new(**@config)
226
+ end
227
+ end
228
+
229
+ private
230
+
231
+ # Ensure input is a proper array format
232
+ def ensure_array(data)
233
+ case data
234
+ when Array
235
+ data
236
+ else
237
+ data.respond_to?(:to_a) ? data.to_a : raise(ArgumentError, "Data must be convertible to Array")
238
+ end
239
+ end
240
+
241
+ # Class method to make it available to class methods
242
+ def self.ensure_array(data)
243
+ case data
244
+ when Array
245
+ data
246
+ else
247
+ data.respond_to?(:to_a) ? data.to_a : raise(ArgumentError, "Data must be convertible to Array")
248
+ end
249
+ end
250
+ end
251
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ClusterKit
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
data/lib/clusterkit.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "clusterkit/version"
4
- require_relative "clusterkit/clusterkit"
4
+ require "clusterkit/clusterkit"
5
5
  require_relative "clusterkit/configuration"
6
6
 
7
7
  # Main module for ClusterKit gem
@@ -29,6 +29,7 @@ module ClusterKit
29
29
  # Load modules that depend on the extension
30
30
  require_relative "clusterkit/dimensionality"
31
31
  require_relative "clusterkit/clustering"
32
+ require_relative "clusterkit/hnsw"
32
33
 
33
34
  # Make RustUMAP private - it's an implementation detail
34
35
  # Users should use Dimensionality::UMAP instead
metadata CHANGED
@@ -1,17 +1,31 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clusterkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Petersen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-08-21 00:00:00.000000000 Z
11
+ date: 2026-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: csv
14
+ name: rb_sys
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: benchmark
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
31
  - - ">="
@@ -25,47 +39,47 @@ dependencies:
25
39
  - !ruby/object:Gem::Version
26
40
  version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
- name: rake
42
+ name: csv
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
- - - "~>"
45
+ - - ">="
32
46
  - !ruby/object:Gem::Version
33
- version: '13.0'
47
+ version: '0'
34
48
  type: :development
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
- - - "~>"
52
+ - - ">="
39
53
  - !ruby/object:Gem::Version
40
- version: '13.0'
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
- name: rake-compiler
56
+ name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '1.2'
61
+ version: '13.0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '1.2'
68
+ version: '13.0'
55
69
  - !ruby/object:Gem::Dependency
56
- name: rb_sys
70
+ name: rake-compiler
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '0.9'
75
+ version: '1.2'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '0.9'
82
+ version: '1.2'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rspec
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +136,7 @@ files:
122
136
  - ".simplecov"
123
137
  - CHANGELOG.md
124
138
  - CLAUDE.md
139
+ - Cargo.lock
125
140
  - Cargo.toml
126
141
  - Gemfile
127
142
  - IMPLEMENTATION_NOTES.md
@@ -129,13 +144,15 @@ files:
129
144
  - PYTHON_COMPARISON.md
130
145
  - README.md
131
146
  - Rakefile
132
- - clusterkit.gemspec
133
147
  - docs/KNOWN_ISSUES.md
134
148
  - docs/RUST_ERROR_HANDLING.md
135
149
  - docs/TEST_FIXTURES.md
136
150
  - docs/UMAP_EXPLAINED.md
137
151
  - docs/UMAP_TROUBLESHOOTING.md
138
152
  - docs/VERBOSE_OUTPUT.md
153
+ - docs/assets/clusterkit-wide.png
154
+ - docs/assets/clusterkit.png
155
+ - docs/assets/visualization.png
139
156
  - examples/hdbscan_example.rb
140
157
  - examples/optimal_kmeans_example.rb
141
158
  - examples/pca_example.rb
@@ -146,6 +163,7 @@ files:
146
163
  - ext/clusterkit/src/clustering.rs
147
164
  - ext/clusterkit/src/clustering/hdbscan_wrapper.rs
148
165
  - ext/clusterkit/src/embedder.rs
166
+ - ext/clusterkit/src/hnsw.rs
149
167
  - ext/clusterkit/src/lib.rs
150
168
  - ext/clusterkit/src/svd.rs
151
169
  - ext/clusterkit/src/tests.rs
@@ -155,23 +173,25 @@ files:
155
173
  - lib/clusterkit/clustering/hdbscan.rb
156
174
  - lib/clusterkit/clusterkit.rb
157
175
  - lib/clusterkit/configuration.rb
176
+ - lib/clusterkit/data_validator.rb
158
177
  - lib/clusterkit/dimensionality.rb
159
178
  - lib/clusterkit/dimensionality/pca.rb
160
179
  - lib/clusterkit/dimensionality/svd.rb
161
180
  - lib/clusterkit/dimensionality/umap.rb
162
181
  - lib/clusterkit/hdbscan_api_design.rb
182
+ - lib/clusterkit/hnsw.rb
163
183
  - lib/clusterkit/preprocessing.rb
164
184
  - lib/clusterkit/silence.rb
165
185
  - lib/clusterkit/utils.rb
166
186
  - lib/clusterkit/version.rb
167
187
  - lib/tasks/visualize.rake
168
- homepage: https://github.com/cpetersen/clusterkit
188
+ homepage: https://github.com/scientist-labs/clusterkit
169
189
  licenses:
170
190
  - MIT
171
191
  metadata:
172
- homepage_uri: https://github.com/cpetersen/clusterkit
173
- source_code_uri: https://github.com/cpetersen/clusterkit
174
- changelog_uri: https://github.com/cpetersen/clusterkit/blob/main/CHANGELOG.md
192
+ homepage_uri: https://github.com/scientist-labs/clusterkit
193
+ source_code_uri: https://github.com/scientist-labs/clusterkit
194
+ changelog_uri: https://github.com/scientist-labs/clusterkit/blob/main/CHANGELOG.md
175
195
  post_install_message:
176
196
  rdoc_options: []
177
197
  require_paths:
@@ -187,7 +207,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
187
207
  - !ruby/object:Gem::Version
188
208
  version: '0'
189
209
  requirements: []
190
- rubygems_version: 3.5.3
210
+ rubygems_version: 3.5.22
191
211
  signing_key:
192
212
  specification_version: 4
193
213
  summary: High-performance clustering and dimensionality reduction for Ruby
data/clusterkit.gemspec DELETED
@@ -1,45 +0,0 @@
1
- require_relative "lib/clusterkit/version"
2
-
3
- Gem::Specification.new do |spec|
4
- spec.name = "clusterkit"
5
- spec.version = ClusterKit::VERSION
6
- spec.authors = ["Chris Petersen"]
7
- spec.email = ["chris@petersen.io"]
8
-
9
- spec.summary = "High-performance clustering and dimensionality reduction for Ruby"
10
- spec.description = "A comprehensive clustering toolkit for Ruby, providing UMAP, PCA, K-means, HDBSCAN and more. Built on top of annembed and hdbscan Rust crates for blazing-fast performance."
11
- spec.homepage = "https://github.com/cpetersen/clusterkit"
12
- spec.license = "MIT"
13
- spec.required_ruby_version = ">= 2.7.0"
14
-
15
- spec.metadata["homepage_uri"] = spec.homepage
16
- spec.metadata["source_code_uri"] = spec.homepage
17
- spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
18
-
19
- # Specify which files should be added to the gem when it is released.
20
- spec.files = Dir.chdir(__dir__) do
21
- `git ls-files -z`.split("\x0").reject do |f|
22
- (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
23
- end + Dir["ext/**/*.rs", "ext/**/*.toml"]
24
- end
25
- spec.bindir = "exe"
26
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
- spec.extensions = ["ext/clusterkit/extconf.rb"]
29
-
30
- # Runtime dependencies
31
- # Numo is optional but recommended for better performance
32
- # spec.add_dependency "numo-narray", "~> 0.9"
33
-
34
- # Development dependencies
35
- spec.add_development_dependency "csv"
36
- spec.add_development_dependency "rake", "~> 13.0"
37
- spec.add_development_dependency "rake-compiler", "~> 1.2"
38
- spec.add_development_dependency "rb_sys", "~> 0.9"
39
- spec.add_development_dependency "rspec", "~> 3.0"
40
- spec.add_development_dependency "simplecov", "~> 0.22"
41
- spec.add_development_dependency "yard", "~> 0.9"
42
-
43
- # For more information and examples about making a new gem, check out our
44
- # guide at: https://bundler.io/guides/creating_gem.html
45
- end