clusterkit 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- use magnus::{define_module, Error};
1
+ use magnus::{Error, Ruby};
2
2
 
3
3
  mod embedder;
4
4
  mod svd;
@@ -10,15 +10,15 @@ mod hnsw;
10
10
  mod tests;
11
11
 
12
12
  #[magnus::init]
13
- fn init() -> Result<(), Error> {
14
- let module = define_module("ClusterKit")?;
15
-
13
+ fn init(ruby: &Ruby) -> Result<(), Error> {
14
+ let module = ruby.define_module("ClusterKit")?;
15
+
16
16
  // Initialize submodules
17
17
  embedder::init(&module)?;
18
18
  svd::init(&module)?;
19
19
  utils::init(&module)?;
20
20
  clustering::init(&module)?;
21
21
  hnsw::init(&module)?;
22
-
22
+
23
23
  Ok(())
24
24
  }
@@ -1,91 +1,89 @@
1
- use magnus::{function, prelude::*, Error, Value, RArray};
1
+ use magnus::{function, prelude::*, Error, Value, RArray, Ruby};
2
2
  use annembed::tools::svdapprox::{SvdApprox, RangeApproxMode, RangeRank, MatRepr};
3
3
  use crate::utils::ruby_array_to_ndarray2;
4
4
 
5
5
  pub fn init(parent: &magnus::RModule) -> Result<(), Error> {
6
6
  let svd_module = parent.define_module("SVD")?;
7
-
7
+
8
8
  svd_module.define_singleton_method(
9
9
  "randomized_svd_rust",
10
10
  function!(randomized_svd, 3),
11
11
  )?;
12
-
12
+
13
13
  Ok(())
14
14
  }
15
15
 
16
16
  fn randomized_svd(matrix: Value, k: usize, n_iter: usize) -> Result<RArray, Error> {
17
+ let ruby = Ruby::get().unwrap();
18
+
17
19
  // Convert Ruby array to ndarray using shared helper
18
20
  let matrix_data = ruby_array_to_ndarray2(matrix)?;
19
21
  let (n_rows, n_cols) = matrix_data.dim();
20
-
22
+
21
23
  if k > n_rows.min(n_cols) {
22
24
  return Err(Error::new(
23
- magnus::exception::arg_error(),
25
+ ruby.exception_arg_error(),
24
26
  format!("k ({}) cannot be larger than min(rows, cols) = {}", k, n_rows.min(n_cols)),
25
27
  ));
26
28
  }
27
-
29
+
28
30
  // Create MatRepr for the full matrix
29
31
  let mat_repr = MatRepr::from_array2(matrix_data.clone());
30
-
32
+
31
33
  // Create SvdApprox instance
32
34
  let mut svd_approx = SvdApprox::new(&mat_repr);
33
-
35
+
34
36
  // Set up parameters for randomized SVD
35
- // Use RANK mode to specify the desired rank
36
37
  let params = RangeApproxMode::RANK(RangeRank::new(k, n_iter));
37
-
38
+
38
39
  // Perform SVD
39
40
  let svd_result = svd_approx.direct_svd(params)
40
- .map_err(|e| Error::new(magnus::exception::runtime_error(), e))?;
41
-
42
- // Extract U, S, V from the result - they are optional fields
41
+ .map_err(|e| Error::new(ruby.exception_runtime_error(), e))?;
42
+
43
+ // Extract U, S, V from the result
43
44
  let u_matrix = svd_result.u.ok_or_else(|| {
44
- Error::new(magnus::exception::runtime_error(), "No U matrix in SVD result")
45
+ Error::new(ruby.exception_runtime_error(), "No U matrix in SVD result")
45
46
  })?;
46
-
47
+
47
48
  let s_values = svd_result.s.ok_or_else(|| {
48
- Error::new(magnus::exception::runtime_error(), "No S values in SVD result")
49
+ Error::new(ruby.exception_runtime_error(), "No S values in SVD result")
49
50
  })?;
50
-
51
+
51
52
  let vt_matrix = svd_result.vt.ok_or_else(|| {
52
- Error::new(magnus::exception::runtime_error(), "No V^T matrix in SVD result")
53
+ Error::new(ruby.exception_runtime_error(), "No V^T matrix in SVD result")
53
54
  })?;
54
-
55
+
55
56
  // Convert results to Ruby arrays
56
- // U matrix - convert ndarray to Ruby nested array
57
- let u_ruby = RArray::new();
57
+ let u_ruby = ruby.ary_new();
58
58
  let u_shape = u_matrix.shape();
59
59
  for i in 0..u_shape[0] {
60
- let row = RArray::new();
60
+ let row = ruby.ary_new();
61
61
  for j in 0..u_shape[1] {
62
62
  row.push(u_matrix[[i, j]])?;
63
63
  }
64
64
  u_ruby.push(row)?;
65
65
  }
66
-
67
- // S values - convert to Ruby array
68
- let s_ruby = RArray::new();
66
+
67
+ let s_ruby = ruby.ary_new();
69
68
  for val in s_values.iter() {
70
69
  s_ruby.push(*val)?;
71
70
  }
72
-
73
- // V matrix (note: we have V^T, so we need to transpose)
74
- let v_ruby = RArray::new();
71
+
72
+ let v_ruby = ruby.ary_new();
75
73
  let vt_shape = vt_matrix.shape();
76
74
  for i in 0..vt_shape[0] {
77
- let row = RArray::new();
75
+ let row = ruby.ary_new();
78
76
  for j in 0..vt_shape[1] {
79
77
  row.push(vt_matrix[[i, j]])?;
80
78
  }
81
79
  v_ruby.push(row)?;
82
80
  }
83
-
81
+
84
82
  // Return [U, S, V^T] as a Ruby array
85
- let result = RArray::new();
83
+ let result = ruby.ary_new();
86
84
  result.push(u_ruby)?;
87
85
  result.push(s_ruby)?;
88
86
  result.push(v_ruby)?;
89
-
87
+
90
88
  Ok(result)
91
- }
89
+ }
@@ -1,34 +1,34 @@
1
- use magnus::{function, prelude::*, Error, Value, RArray, TryConvert, Float, Integer};
1
+ use magnus::{function, prelude::*, Error, Value, RArray, TryConvert, Float, Integer, Ruby};
2
2
  use ndarray::Array2;
3
3
 
4
4
  pub fn init(parent: &magnus::RModule) -> Result<(), Error> {
5
5
  let utils_module = parent.define_module("Utils")?;
6
-
6
+
7
7
  utils_module.define_singleton_method(
8
8
  "estimate_intrinsic_dimension_rust",
9
9
  function!(estimate_intrinsic_dimension, 2),
10
10
  )?;
11
-
11
+
12
12
  utils_module.define_singleton_method(
13
13
  "estimate_hubness_rust",
14
14
  function!(estimate_hubness, 1),
15
15
  )?;
16
-
16
+
17
17
  Ok(())
18
18
  }
19
19
 
20
20
  fn estimate_intrinsic_dimension(_data: Value, _k_neighbors: usize) -> Result<f64, Error> {
21
- // TODO: Implement using annembed
21
+ let ruby = Ruby::get().unwrap();
22
22
  Err(Error::new(
23
- magnus::exception::not_imp_error(),
23
+ ruby.exception_not_imp_error(),
24
24
  "Dimension estimation not implemented yet",
25
25
  ))
26
26
  }
27
27
 
28
28
  fn estimate_hubness(_data: Value) -> Result<Value, Error> {
29
- // TODO: Implement using annembed
29
+ let ruby = Ruby::get().unwrap();
30
30
  Err(Error::new(
31
- magnus::exception::not_imp_error(),
31
+ ruby.exception_not_imp_error(),
32
32
  "Hubness estimation not implemented yet",
33
33
  ))
34
34
  }
@@ -36,12 +36,13 @@ fn estimate_hubness(_data: Value) -> Result<Value, Error> {
36
36
  /// Convert Ruby 2D array to ndarray Array2<f64>
37
37
  /// Handles validation and provides consistent error messages
38
38
  pub fn ruby_array_to_ndarray2(data: Value) -> Result<Array2<f64>, Error> {
39
+ let ruby = Ruby::get().unwrap();
39
40
  let rarray: RArray = TryConvert::try_convert(data)?;
40
41
  let n_samples = rarray.len();
41
42
 
42
43
  if n_samples == 0 {
43
44
  return Err(Error::new(
44
- magnus::exception::arg_error(),
45
+ ruby.exception_arg_error(),
45
46
  "Data cannot be empty",
46
47
  ));
47
48
  }
@@ -52,7 +53,7 @@ pub fn ruby_array_to_ndarray2(data: Value) -> Result<Array2<f64>, Error> {
52
53
 
53
54
  if n_features == 0 {
54
55
  return Err(Error::new(
55
- magnus::exception::arg_error(),
56
+ ruby.exception_arg_error(),
56
57
  "Data rows cannot be empty",
57
58
  ));
58
59
  }
@@ -61,11 +62,11 @@ pub fn ruby_array_to_ndarray2(data: Value) -> Result<Array2<f64>, Error> {
61
62
  let mut data_array = Array2::<f64>::zeros((n_samples, n_features));
62
63
  for i in 0..n_samples {
63
64
  let row: RArray = rarray.entry(i as isize)?;
64
-
65
+
65
66
  // Validate row length consistency
66
67
  if row.len() != n_features {
67
68
  return Err(Error::new(
68
- magnus::exception::arg_error(),
69
+ ruby.exception_arg_error(),
69
70
  format!("Row {} has {} elements, expected {}", i, row.len(), n_features),
70
71
  ));
71
72
  }
@@ -80,14 +81,15 @@ pub fn ruby_array_to_ndarray2(data: Value) -> Result<Array2<f64>, Error> {
80
81
  }
81
82
 
82
83
  /// Convert Ruby 2D array to Vec<Vec<f64>>
83
- /// Handles validation and provides consistent error messages
84
+ /// Handles validation and provides consistent error messages
84
85
  pub fn ruby_array_to_vec_vec_f64(data: Value) -> Result<Vec<Vec<f64>>, Error> {
86
+ let ruby = Ruby::get().unwrap();
85
87
  let rarray: RArray = TryConvert::try_convert(data)?;
86
88
  let n_samples = rarray.len();
87
89
 
88
90
  if n_samples == 0 {
89
91
  return Err(Error::new(
90
- magnus::exception::arg_error(),
92
+ ruby.exception_arg_error(),
91
93
  "Data cannot be empty",
92
94
  ));
93
95
  }
@@ -98,13 +100,13 @@ pub fn ruby_array_to_vec_vec_f64(data: Value) -> Result<Vec<Vec<f64>>, Error> {
98
100
  for i in 0..n_samples {
99
101
  let row: RArray = rarray.entry(i as isize)?;
100
102
  let n_features = row.len();
101
-
103
+
102
104
  // Check row length consistency
103
105
  match expected_features {
104
106
  Some(expected) => {
105
107
  if n_features != expected {
106
108
  return Err(Error::new(
107
- magnus::exception::arg_error(),
109
+ ruby.exception_arg_error(),
108
110
  format!("Row {} has {} elements, expected {}", i, n_features, expected),
109
111
  ));
110
112
  }
@@ -126,12 +128,13 @@ pub fn ruby_array_to_vec_vec_f64(data: Value) -> Result<Vec<Vec<f64>>, Error> {
126
128
  /// Convert Ruby 2D array to Vec<Vec<f32>>
127
129
  /// For algorithms that require f32 precision (like UMAP)
128
130
  pub fn ruby_array_to_vec_vec_f32(data: Value) -> Result<Vec<Vec<f32>>, Error> {
131
+ let ruby = Ruby::get().unwrap();
129
132
  let rarray: RArray = TryConvert::try_convert(data)?;
130
133
  let array_len = rarray.len();
131
134
 
132
135
  if array_len == 0 {
133
136
  return Err(Error::new(
134
- magnus::exception::arg_error(),
137
+ ruby.exception_arg_error(),
135
138
  "Input data cannot be empty",
136
139
  ));
137
140
  }
@@ -142,7 +145,7 @@ pub fn ruby_array_to_vec_vec_f32(data: Value) -> Result<Vec<Vec<f32>>, Error> {
142
145
  let row = rarray.entry::<Value>(i as isize)?;
143
146
  let row_array = RArray::try_convert(row).map_err(|_| {
144
147
  Error::new(
145
- magnus::exception::type_error(),
148
+ ruby.exception_type_error(),
146
149
  "Expected array of arrays (2D array)",
147
150
  )
148
151
  })?;
@@ -158,7 +161,7 @@ pub fn ruby_array_to_vec_vec_f32(data: Value) -> Result<Vec<Vec<f32>>, Error> {
158
161
  i.to_i64()? as f32
159
162
  } else {
160
163
  return Err(Error::new(
161
- magnus::exception::type_error(),
164
+ ruby.exception_type_error(),
162
165
  "All values must be numeric",
163
166
  ));
164
167
  };
@@ -168,7 +171,7 @@ pub fn ruby_array_to_vec_vec_f32(data: Value) -> Result<Vec<Vec<f32>>, Error> {
168
171
  // Validate row length consistency
169
172
  if !rust_data.is_empty() && rust_row.len() != rust_data[0].len() {
170
173
  return Err(Error::new(
171
- magnus::exception::arg_error(),
174
+ ruby.exception_arg_error(),
172
175
  "All rows must have the same length",
173
176
  ));
174
177
  }
@@ -177,4 +180,4 @@ pub fn ruby_array_to_vec_vec_f32(data: Value) -> Result<Vec<Vec<f32>>, Error> {
177
180
  }
178
181
 
179
182
  Ok(rust_data)
180
- }
183
+ }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ClusterKit
4
- VERSION = "0.2.3"
4
+ VERSION = "0.2.5"
5
5
  end
data/lib/clusterkit.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "clusterkit/version"
4
- require_relative "clusterkit/clusterkit"
4
+ require "clusterkit/clusterkit"
5
5
  require_relative "clusterkit/configuration"
6
6
 
7
7
  # Main module for ClusterKit gem
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clusterkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Petersen
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-09-06 00:00:00.000000000 Z
11
+ date: 2026-03-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: benchmark
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: csv
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +136,7 @@ files:
122
136
  - ".simplecov"
123
137
  - CHANGELOG.md
124
138
  - CLAUDE.md
139
+ - Cargo.lock
125
140
  - Cargo.toml
126
141
  - Gemfile
127
142
  - IMPLEMENTATION_NOTES.md
@@ -129,7 +144,6 @@ files:
129
144
  - PYTHON_COMPARISON.md
130
145
  - README.md
131
146
  - Rakefile
132
- - clusterkit.gemspec
133
147
  - docs/KNOWN_ISSUES.md
134
148
  - docs/RUST_ERROR_HANDLING.md
135
149
  - docs/TEST_FIXTURES.md
@@ -193,7 +207,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
193
207
  - !ruby/object:Gem::Version
194
208
  version: '0'
195
209
  requirements: []
196
- rubygems_version: 3.5.3
210
+ rubygems_version: 3.5.22
197
211
  signing_key:
198
212
  specification_version: 4
199
213
  summary: High-performance clustering and dimensionality reduction for Ruby
data/clusterkit.gemspec DELETED
@@ -1,45 +0,0 @@
1
- require_relative "lib/clusterkit/version"
2
-
3
- Gem::Specification.new do |spec|
4
- spec.name = "clusterkit"
5
- spec.version = ClusterKit::VERSION
6
- spec.authors = ["Chris Petersen"]
7
- spec.email = ["chris@petersen.io"]
8
-
9
- spec.summary = "High-performance clustering and dimensionality reduction for Ruby"
10
- spec.description = "A comprehensive clustering toolkit for Ruby, providing UMAP, PCA, K-means, HDBSCAN and more. Built on top of annembed and hdbscan Rust crates for blazing-fast performance."
11
- spec.homepage = "https://github.com/scientist-labs/clusterkit"
12
- spec.license = "MIT"
13
- spec.required_ruby_version = ">= 2.7.0"
14
-
15
- spec.metadata["homepage_uri"] = spec.homepage
16
- spec.metadata["source_code_uri"] = spec.homepage
17
- spec.metadata["changelog_uri"] = "#{spec.homepage}/blob/main/CHANGELOG.md"
18
-
19
- # Specify which files should be added to the gem when it is released.
20
- spec.files = Dir.chdir(__dir__) do
21
- `git ls-files -z`.split("\x0").reject do |f|
22
- (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
23
- end + Dir["ext/**/*.rs", "ext/**/*.toml"]
24
- end
25
- spec.bindir = "exe"
26
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
- spec.extensions = ["ext/clusterkit/extconf.rb"]
29
-
30
- # Runtime dependencies
31
- # Numo is optional but recommended for better performance
32
- # spec.add_dependency "numo-narray", "~> 0.9"
33
- spec.add_dependency "rb_sys", "~> 0.9"
34
-
35
- # Development dependencies
36
- spec.add_development_dependency "csv"
37
- spec.add_development_dependency "rake", "~> 13.0"
38
- spec.add_development_dependency "rake-compiler", "~> 1.2"
39
- spec.add_development_dependency "rspec", "~> 3.0"
40
- spec.add_development_dependency "simplecov", "~> 0.22"
41
- spec.add_development_dependency "yard", "~> 0.9"
42
-
43
- # For more information and examples about making a new gem, check out our
44
- # guide at: https://bundler.io/guides/creating_gem.html
45
- end