clusterkit 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/clusterkit.gemspec +1 -1
- data/docs/KNOWN_ISSUES.md +5 -5
- data/docs/RUST_ERROR_HANDLING.md +6 -6
- data/ext/clusterkit/Cargo.toml +3 -3
- data/lib/clusterkit/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3da45be55195b7646566e031639b24724f5ebb27bed4bc40a4121fccede290e
|
4
|
+
data.tar.gz: 9162028aaeb4956368e1a40ccec1d5a5af70e9c7ced9e8148da0dcb9b6711a12
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1e86b6a03da44fa3b9e49b8b39eabe97aeebd3b4766731c0be629b35aab8a6566ee2d94a81bbbbf47e43c9c03da7d6f44c240461865f062f8e709987f6997ca
|
7
|
+
data.tar.gz: 114362ba8b87b4045da8c0092b8dc365436dc94c975658f63affcfa18c103fed27320353236e67c96ca5b3d5da1ad746176b864656db0ab1f96cf4eb940bf19e
|
data/README.md
CHANGED
@@ -720,7 +720,7 @@ COVERAGE=true bundle exec rspec
|
|
720
720
|
|
721
721
|
## Contributing
|
722
722
|
|
723
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
723
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/scientist-labs/clusterkit.
|
724
724
|
|
725
725
|
## License
|
726
726
|
|
@@ -735,7 +735,7 @@ If you use ClusterKit in your research, please cite:
|
|
735
735
|
author = {Chris Petersen},
|
736
736
|
title = {ClusterKit: High-Performance Clustering and Dimensionality Reduction for Ruby},
|
737
737
|
year = {2024},
|
738
|
-
url = {https://github.com/
|
738
|
+
url = {https://github.com/scientist-labs/clusterkit}
|
739
739
|
}
|
740
740
|
```
|
741
741
|
|
data/clusterkit.gemspec
CHANGED
@@ -8,7 +8,7 @@ Gem::Specification.new do |spec|
|
|
8
8
|
|
9
9
|
spec.summary = "High-performance clustering and dimensionality reduction for Ruby"
|
10
10
|
spec.description = "A comprehensive clustering toolkit for Ruby, providing UMAP, PCA, K-means, HDBSCAN and more. Built on top of annembed and hdbscan Rust crates for blazing-fast performance."
|
11
|
-
spec.homepage = "https://github.com/
|
11
|
+
spec.homepage = "https://github.com/scientist-labs/clusterkit"
|
12
12
|
spec.license = "MIT"
|
13
13
|
spec.required_ruby_version = ">= 2.7.0"
|
14
14
|
|
data/docs/KNOWN_ISSUES.md
CHANGED
@@ -14,7 +14,7 @@ This gem has three main categories of limitations:
|
|
14
14
|
|
15
15
|
**Reason**: UMAP needs sufficient data to construct a meaningful manifold approximation. With fewer than 10 points, the algorithm cannot create a reliable graph structure.
|
16
16
|
|
17
|
-
**Workaround**:
|
17
|
+
**Workaround**:
|
18
18
|
- Use PCA for datasets with fewer than 10 points
|
19
19
|
- The `transform` method can handle smaller datasets once the model is fitted on adequate training data
|
20
20
|
|
@@ -30,12 +30,12 @@ This gem has three main categories of limitations:
|
|
30
30
|
|
31
31
|
**Previous Issue**: The box_size assertion would panic and crash the Ruby process.
|
32
32
|
|
33
|
-
**Current Status**: **FIXED** in `
|
33
|
+
**Current Status**: **FIXED** in `scientist-labs/annembed:fix-box-size-panic` branch
|
34
34
|
- The `"assertion failed: (*f).abs() <= box_size"` panic has been converted to a catchable error
|
35
35
|
- Extreme value ranges are now handled gracefully through normalization
|
36
36
|
- NaN/Infinite values are detected and reported with clear error messages
|
37
37
|
|
38
|
-
**Remaining Uncatchable Errors**:
|
38
|
+
**Remaining Uncatchable Errors**:
|
39
39
|
- Array bounds violations (accessing out-of-bounds indices)
|
40
40
|
- Some `.unwrap()` calls on `None` or `Err` values
|
41
41
|
- These are much less common in normal usage
|
@@ -98,7 +98,7 @@ def safe_umap_transform(data, options = {})
|
|
98
98
|
# Save data to temporary file before processing
|
99
99
|
temp_file = "temp_umap_data_#{Time.now.to_i}.json"
|
100
100
|
File.write(temp_file, JSON.dump(data))
|
101
|
-
|
101
|
+
|
102
102
|
begin
|
103
103
|
umap = ClusterKit::Dimensionality::UMAP.new(**options)
|
104
104
|
result = umap.fit_transform(data)
|
@@ -127,4 +127,4 @@ def reduce_dimensions(data, n_components: 2)
|
|
127
127
|
pca.fit_transform(data)
|
128
128
|
end
|
129
129
|
end
|
130
|
-
```
|
130
|
+
```
|
data/docs/RUST_ERROR_HANDLING.md
CHANGED
@@ -37,11 +37,11 @@ These use Rust's `assert!` or `panic!` macros and CANNOT be caught. They will cr
|
|
37
37
|
|
38
38
|
| Error | Source | Location | Trigger Condition |
|
39
39
|
|-------|--------|----------|-------------------|
|
40
|
-
| ~~Box size assertion~~ | ~~annembed~~ | ~~`set_data_box`~~ | **FIXED in
|
40
|
+
| ~~Box size assertion~~ | ~~annembed~~ | ~~`set_data_box`~~ | **FIXED in scientist-labs/annembed:fix-box-size-panic** |
|
41
41
|
| Array bounds | Various | Index operations | Accessing out-of-bounds indices |
|
42
42
|
| Unwrap failures | Various | `.unwrap()` calls | Unwrapping `None` or `Err` |
|
43
43
|
|
44
|
-
**Update (2025-08-19):** The box size assertion has been fixed in the `fix-box-size-panic` branch of
|
44
|
+
**Update (2025-08-19):** The box size assertion has been fixed in the `fix-box-size-panic` branch of scientist-labs/annembed. It now returns a proper `Result<(), anyhow::Error>` that can be caught and handled gracefully:
|
45
45
|
|
46
46
|
```rust
|
47
47
|
// Previously (would panic):
|
@@ -96,13 +96,13 @@ when /isolated point/i
|
|
96
96
|
|
97
97
|
**Previous Issue:** Would panic and crash the Ruby process
|
98
98
|
|
99
|
-
**Current Status:** Fixed in `
|
100
|
-
- Now returns a catchable `anyhow::Error`
|
99
|
+
**Current Status:** Fixed in `scientist-labs/annembed:fix-box-size-panic` branch
|
100
|
+
- Now returns a catchable `anyhow::Error`
|
101
101
|
- Detects NaN/Infinite values during normalization
|
102
102
|
- Handles constant data (max_max = 0) gracefully
|
103
103
|
- Extreme value ranges are normalized successfully
|
104
104
|
|
105
|
-
**User-visible behavior:**
|
105
|
+
**User-visible behavior:**
|
106
106
|
- Previously: Ruby process would crash with assertion failure
|
107
107
|
- Now: Raises a catchable Ruby exception with helpful error message
|
108
108
|
|
@@ -161,4 +161,4 @@ when /isolated point/i
|
|
161
161
|
|
162
162
|
The test suite mocks Rust errors to verify our error handling logic works correctly. However, actual panic conditions cannot be tested without crashing the test process.
|
163
163
|
|
164
|
-
See `spec/clusterkit/error_handling_spec.rb` for error handling tests.
|
164
|
+
See `spec/clusterkit/error_handling_spec.rb` for error handling tests.
|
data/ext/clusterkit/Cargo.toml
CHANGED
@@ -8,8 +8,8 @@ crate-type = ["cdylib"]
|
|
8
8
|
|
9
9
|
[dependencies]
|
10
10
|
magnus = { version = "0.6", features = ["embed"] }
|
11
|
-
annembed = { git = "https://github.com/
|
12
|
-
hnsw_rs = { git = "https://github.com/
|
11
|
+
annembed = { git = "https://github.com/scientist-labs/annembed", tag = "clusterkit-0.1.1" }
|
12
|
+
hnsw_rs = { git = "https://github.com/scientist-labs/hnswlib-rs", tag = "clusterkit-0.1.0" }
|
13
13
|
hdbscan = "0.11"
|
14
14
|
ndarray = "0.16"
|
15
15
|
num-traits = "0.2"
|
@@ -22,4 +22,4 @@ rand = "0.8"
|
|
22
22
|
default = ["openblas-static"]
|
23
23
|
openblas-static = ["annembed/openblas-static"]
|
24
24
|
openblas-system = ["annembed/openblas-system"]
|
25
|
-
intel-mkl-static = ["annembed/intel-mkl-static"]
|
25
|
+
intel-mkl-static = ["annembed/intel-mkl-static"]
|
data/lib/clusterkit/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: clusterkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Petersen
|
@@ -171,13 +171,13 @@ files:
|
|
171
171
|
- lib/clusterkit/utils.rb
|
172
172
|
- lib/clusterkit/version.rb
|
173
173
|
- lib/tasks/visualize.rake
|
174
|
-
homepage: https://github.com/
|
174
|
+
homepage: https://github.com/scientist-labs/clusterkit
|
175
175
|
licenses:
|
176
176
|
- MIT
|
177
177
|
metadata:
|
178
|
-
homepage_uri: https://github.com/
|
179
|
-
source_code_uri: https://github.com/
|
180
|
-
changelog_uri: https://github.com/
|
178
|
+
homepage_uri: https://github.com/scientist-labs/clusterkit
|
179
|
+
source_code_uri: https://github.com/scientist-labs/clusterkit
|
180
|
+
changelog_uri: https://github.com/scientist-labs/clusterkit/blob/main/CHANGELOG.md
|
181
181
|
post_install_message:
|
182
182
|
rdoc_options: []
|
183
183
|
require_paths:
|