clusterkit 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # ClusterKit
1
+ <img src="/docs/assets/clusterkit-wide.png" alt="clusterkit" height="80px">
2
2
 
3
3
  A high-performance clustering and dimensionality reduction toolkit for Ruby, powered by best-in-class Rust implementations.
4
4
 
@@ -44,7 +44,7 @@ ClusterKit organizes its functionality into clear modules:
44
44
 
45
45
  - **`ClusterKit::Dimensionality`** - All dimensionality reduction algorithms
46
46
  - `ClusterKit::Dimensionality::UMAP` - UMAP implementation
47
- - `ClusterKit::Dimensionality::PCA` - PCA implementation
47
+ - `ClusterKit::Dimensionality::PCA` - PCA implementation
48
48
  - `ClusterKit::Dimensionality::SVD` - SVD implementation
49
49
  - **`ClusterKit::Clustering`** - All clustering algorithms
50
50
  - `ClusterKit::Clustering::KMeans` - K-means clustering
@@ -96,7 +96,7 @@ data = []
96
96
  3.times do |cluster|
97
97
  # Each cluster has a different center, well-separated
98
98
  center = Array.new(50) { rand * 0.1 + cluster * 2.0 }
99
-
99
+
100
100
  # Add 33 points around each center with controlled noise
101
101
  33.times do
102
102
  point = center.map { |c| c + (rand - 0.5) * 0.3 }
@@ -329,6 +329,223 @@ probabilities = hdbscan.probabilities # Cluster membership probabilities
329
329
  outlier_scores = hdbscan.outlier_scores # Outlier scores for each point
330
330
  ```
331
331
 
332
+ ### HNSW - Fast Nearest Neighbor Search
333
+
334
+ ClusterKit includes HNSW (Hierarchical Navigable Small World) for fast approximate nearest neighbor search, useful for building recommendation systems, similarity search, and as a building block for other algorithms.
335
+
336
+ Copy and paste this **entire block** into IRB to try HNSW with real embeddings:
337
+
338
+ ```ruby
339
+ require 'clusterkit'
340
+ require 'candle'
341
+
342
+ # Step 1: Initialize the embedding model
343
+ puts "Loading embedding model..."
344
+ embedding_model = Candle::EmbeddingModel.from_pretrained(
345
+ 'sentence-transformers/all-MiniLM-L6-v2',
346
+ device: Candle::Device.best
347
+ )
348
+ puts " ✓ Model loaded: #{embedding_model.model_id}"
349
+
350
+ # Step 2: Create sample documents for semantic search
351
+ documents = [
352
+ "The cat sat on the mat",
353
+ "Dogs are loyal pets that love their owners",
354
+ "Machine learning algorithms can classify text documents",
355
+ "Natural language processing helps computers understand human language",
356
+ "Ruby is a programming language known for its simplicity",
357
+ "Python is popular for data science and machine learning",
358
+ "The weather today is sunny and warm",
359
+ "Climate change affects global weather patterns",
360
+ "Artificial intelligence is transforming many industries",
361
+ "Deep learning models require large amounts of training data",
362
+ "Cats and dogs are common household pets",
363
+ "Software engineering requires problem-solving skills",
364
+ "The ocean contains many different species of fish",
365
+ "Marine biology studies life in aquatic environments",
366
+ "Cooking requires understanding of ingredients and techniques"
367
+ ]
368
+
369
+ puts "\nGenerating embeddings for #{documents.size} documents..."
370
+
371
+ # Step 3: Generate embeddings for all documents
372
+ embeddings = documents.map do |doc|
373
+ embedding_model.embedding(doc).first.to_a
374
+ end
375
+ puts " ✓ Generated embeddings: #{embeddings.first.count} dimensions each"
376
+
377
+ # Step 4: Create HNSW index
378
+ puts "\nBuilding HNSW search index..."
379
+ index = ClusterKit::HNSW.new(
380
+ dim: embeddings.first.count, # 384 dimensions for all-MiniLM-L6-v2
381
+ space: :euclidean,
382
+ m: 16, # Good balance of speed vs accuracy
383
+ ef_construction: 200, # Build quality
384
+ max_elements: documents.size,
385
+ random_seed: 42 # For reproducible results
386
+ )
387
+
388
+ # Step 5: Add all documents to the index
389
+ documents.each_with_index do |doc, i|
390
+ index.add_item(
391
+ embeddings[i],
392
+ label: "doc_#{i}",
393
+ metadata: {
394
+ 'text' => doc,
395
+ 'length' => doc.length,
396
+ 'word_count' => doc.split.size
397
+ }
398
+ )
399
+ end
400
+ puts " ✓ Added #{documents.size} documents to index"
401
+
402
+ # Step 6: Perform semantic searches
403
+ puts "\n" + "="*50
404
+ puts "SEMANTIC SEARCH DEMO"
405
+ puts "="*50
406
+
407
+ queries = [
408
+ "pets and animals",
409
+ "computer programming",
410
+ "weather and environment"
411
+ ]
412
+
413
+ queries.each do |query|
414
+ puts "\nQuery: '#{query}'"
415
+ puts "-" * 30
416
+
417
+ # Generate query embedding
418
+ query_embedding = embedding_model.embedding(query).first.to_a
419
+
420
+ # Search for similar documents
421
+ results = index.search_with_metadata(query_embedding, k: 3)
422
+
423
+ results.each_with_index do |result, i|
424
+ similarity = (1.0 - result[:distance]).round(3) # Convert distance to similarity
425
+ text = result[:metadata]['text']
426
+ puts " #{i+1}. [#{similarity}] #{text}"
427
+ end
428
+ end
429
+
430
+ # Step 7: Demonstrate advanced features
431
+ puts "\n" + "="*50
432
+ puts "ADVANCED FEATURES"
433
+ puts "="*50
434
+
435
+ # Show search quality adjustment
436
+ puts "\nAdjusting search quality (ef parameter):"
437
+ index.set_ef(50) # Lower ef = faster but potentially less accurate
438
+ fast_results = index.search(embeddings[0], k: 3)
439
+ puts " Fast search (ef=50): #{fast_results}"
440
+
441
+ index.set_ef(200) # Higher ef = slower but more accurate
442
+ accurate_results = index.search(embeddings[0], k: 3)
443
+ puts " Accurate search (ef=200): #{accurate_results}"
444
+
445
+ # Show batch operations
446
+ puts "\nBatch search example:"
447
+ query_embeddings = [embeddings[0], embeddings[5], embeddings[10]]
448
+ batch_results = query_embeddings.map { |emb| index.search(emb, k: 2) }
449
+ puts " Found #{batch_results.size} result sets"
450
+
451
+ # Save and load demonstration
452
+ puts "\nSaving and loading index:"
453
+ index.save('demo_index')
454
+ puts " ✓ Index saved to 'demo_index'"
455
+
456
+ loaded_index = ClusterKit::HNSW.load('demo_index')
457
+ test_results = loaded_index.search(embeddings[0], k: 2)
458
+ puts " ✓ Loaded index works: #{test_results}"
459
+
460
+ puts "\n✅ HNSW demo complete!"
461
+ puts "\nTry your own queries by running:"
462
+ puts "query_embedding = embedding_model.embedding('your search query').first.to_a"
463
+ puts "results = index.search_with_metadata(query_embedding, k: 5)"
464
+ ```
465
+
466
+ #### When to Use HNSW
467
+
468
+ HNSW is ideal for:
469
+ - **Recommendation Systems**: Find similar items/users quickly
470
+ - **Semantic Search**: Find documents with similar embeddings
471
+ - **Duplicate Detection**: Identify near-duplicate content
472
+ - **Clustering Support**: As a fast neighbor graph for HDBSCAN
473
+ - **Real-time Applications**: When you need sub-millisecond search times
474
+
475
+ #### Configuration Guidelines
476
+
477
+ ```ruby
478
+ # High recall (>0.95) - Best quality, slower
479
+ ClusterKit::HNSW.new(
480
+ dim: dim,
481
+ m: 32,
482
+ ef_construction: 400
483
+ ).tap { |idx| idx.set_ef(100) }
484
+
485
+ # Balanced (>0.90 recall) - Good quality, fast
486
+ ClusterKit::HNSW.new(
487
+ dim: dim,
488
+ m: 16,
489
+ ef_construction: 200
490
+ ).tap { |idx| idx.set_ef(50) }
491
+
492
+ # Speed optimized (>0.85 recall) - Fastest, acceptable quality
493
+ ClusterKit::HNSW.new(
494
+ dim: dim,
495
+ m: 8,
496
+ ef_construction: 100
497
+ ).tap { |idx| idx.set_ef(20) }
498
+ ```
499
+
500
+ #### Important Notes
501
+
502
+ 1. **Memory Usage**: HNSW keeps the entire index in memory. Estimate: `(num_items * (dim * 4 + m * 16))` bytes
503
+ 2. **Distance Metrics**: Currently only Euclidean distance is fully supported
504
+ 3. **Loading Behavior**: Due to Rust lifetime constraints, loading an index creates a small memory leak (the index metadata persists until program exit). This is typically negligible for most applications.
505
+ 4. **Build Time**: Index construction is O(N * log(N)). For large datasets (>1M items), consider building offline
506
+
507
+ #### Example: Semantic Search System
508
+
509
+ ```ruby
510
+ # Build a simple semantic search system
511
+ documents = load_documents()
512
+ embeddings = generate_embeddings(documents) # Use red-candle or similar
513
+
514
+ # Build search index
515
+ search_index = ClusterKit::HNSW.new(
516
+ dim: embeddings.first.size,
517
+ m: 16,
518
+ ef_construction: 200,
519
+ max_elements: documents.size
520
+ )
521
+
522
+ # Add all documents
523
+ documents.each_with_index do |doc, i|
524
+ search_index.add_item(
525
+ embeddings[i],
526
+ label: i,
527
+ metadata: { title: doc[:title], url: doc[:url] }
528
+ )
529
+ end
530
+
531
+ # Search function
532
+ def search(query, index, k: 10)
533
+ query_embedding = generate_embedding(query)
534
+ results = index.search_with_metadata(query_embedding, k: k)
535
+
536
+ results.map do |result|
537
+ {
538
+ title: result[:metadata]['title'],
539
+ url: result[:metadata]['url'],
540
+ similarity: 1.0 - result[:distance] # Convert distance to similarity
541
+ }
542
+ end
543
+ end
544
+
545
+ # Save for later use
546
+ search_index.save('document_index')
547
+ ```
548
+
332
549
  ### Visualization
333
550
 
334
551
  ClusterKit includes a built-in visualization tool:
@@ -350,6 +567,9 @@ This creates an interactive HTML file with:
350
567
  - Performance metrics
351
568
  - Interactive Plotly.js charts
352
569
 
570
+ <img src="/docs/assets/visualization.png" alt="rake clusterkit:visualize">
571
+
572
+
353
573
  ## Choosing the Right Algorithm
354
574
 
355
575
  ### Dimensionality Reduction
@@ -454,7 +674,7 @@ This error occurs when UMAP cannot find enough neighbors for some points. Soluti
454
674
  ```ruby
455
675
  # Bad: Pure random data with no structure
456
676
  data = Array.new(100) { Array.new(50) { rand } }
457
-
677
+
458
678
  # Good: Data with clusters or patterns (see Quick Start example)
459
679
  # Create clusters with centers and add points around them
460
680
  ```
@@ -500,7 +720,7 @@ COVERAGE=true bundle exec rspec
500
720
 
501
721
  ## Contributing
502
722
 
503
- Bug reports and pull requests are welcome on GitHub at https://github.com/cpetersen/clusterkit.
723
+ Bug reports and pull requests are welcome on GitHub at https://github.com/scientist-labs/clusterkit.
504
724
 
505
725
  ## License
506
726
 
@@ -515,10 +735,10 @@ If you use ClusterKit in your research, please cite:
515
735
  author = {Chris Petersen},
516
736
  title = {ClusterKit: High-Performance Clustering and Dimensionality Reduction for Ruby},
517
737
  year = {2024},
518
- url = {https://github.com/cpetersen/clusterkit}
738
+ url = {https://github.com/scientist-labs/clusterkit}
519
739
  }
520
740
  ```
521
741
 
522
742
  And please also cite the underlying libraries:
523
743
  - [annembed](https://github.com/jean-pierreBoth/annembed) for dimensionality reduction algorithms
524
- - [hdbscan](https://github.com/petabi/hdbscan) for HDBSCAN clustering
744
+ - [hdbscan](https://github.com/petabi/hdbscan) for HDBSCAN clustering
data/docs/KNOWN_ISSUES.md CHANGED
@@ -14,7 +14,7 @@ This gem has three main categories of limitations:
14
14
 
15
15
  **Reason**: UMAP needs sufficient data to construct a meaningful manifold approximation. With fewer than 10 points, the algorithm cannot create a reliable graph structure.
16
16
 
17
- **Workaround**:
17
+ **Workaround**:
18
18
  - Use PCA for datasets with fewer than 10 points
19
19
  - The `transform` method can handle smaller datasets once the model is fitted on adequate training data
20
20
 
@@ -30,12 +30,12 @@ This gem has three main categories of limitations:
30
30
 
31
31
  **Previous Issue**: The box_size assertion would panic and crash the Ruby process.
32
32
 
33
- **Current Status**: **FIXED** in `cpetersen/annembed:fix-box-size-panic` branch
33
+ **Current Status**: **FIXED** in `scientist-labs/annembed:fix-box-size-panic` branch
34
34
  - The `"assertion failed: (*f).abs() <= box_size"` panic has been converted to a catchable error
35
35
  - Extreme value ranges are now handled gracefully through normalization
36
36
  - NaN/Infinite values are detected and reported with clear error messages
37
37
 
38
- **Remaining Uncatchable Errors**:
38
+ **Remaining Uncatchable Errors**:
39
39
  - Array bounds violations (accessing out-of-bounds indices)
40
40
  - Some `.unwrap()` calls on `None` or `Err` values
41
41
  - These are much less common in normal usage
@@ -98,7 +98,7 @@ def safe_umap_transform(data, options = {})
98
98
  # Save data to temporary file before processing
99
99
  temp_file = "temp_umap_data_#{Time.now.to_i}.json"
100
100
  File.write(temp_file, JSON.dump(data))
101
-
101
+
102
102
  begin
103
103
  umap = ClusterKit::Dimensionality::UMAP.new(**options)
104
104
  result = umap.fit_transform(data)
@@ -127,4 +127,4 @@ def reduce_dimensions(data, n_components: 2)
127
127
  pca.fit_transform(data)
128
128
  end
129
129
  end
130
- ```
130
+ ```
@@ -37,11 +37,11 @@ These use Rust's `assert!` or `panic!` macros and CANNOT be caught. They will cr
37
37
 
38
38
  | Error | Source | Location | Trigger Condition |
39
39
  |-------|--------|----------|-------------------|
40
- | ~~Box size assertion~~ | ~~annembed~~ | ~~`set_data_box`~~ | **FIXED in cpetersen/annembed:fix-box-size-panic** |
40
+ | ~~Box size assertion~~ | ~~annembed~~ | ~~`set_data_box`~~ | **FIXED in scientist-labs/annembed:fix-box-size-panic** |
41
41
  | Array bounds | Various | Index operations | Accessing out-of-bounds indices |
42
42
  | Unwrap failures | Various | `.unwrap()` calls | Unwrapping `None` or `Err` |
43
43
 
44
- **Update (2025-08-19):** The box size assertion has been fixed in the `fix-box-size-panic` branch of cpetersen/annembed. It now returns a proper `Result<(), anyhow::Error>` that can be caught and handled gracefully:
44
+ **Update (2025-08-19):** The box size assertion has been fixed in the `fix-box-size-panic` branch of scientist-labs/annembed. It now returns a proper `Result<(), anyhow::Error>` that can be caught and handled gracefully:
45
45
 
46
46
  ```rust
47
47
  // Previously (would panic):
@@ -96,13 +96,13 @@ when /isolated point/i
96
96
 
97
97
  **Previous Issue:** Would panic and crash the Ruby process
98
98
 
99
- **Current Status:** Fixed in `cpetersen/annembed:fix-box-size-panic` branch
100
- - Now returns a catchable `anyhow::Error`
99
+ **Current Status:** Fixed in `scientist-labs/annembed:fix-box-size-panic` branch
100
+ - Now returns a catchable `anyhow::Error`
101
101
  - Detects NaN/Infinite values during normalization
102
102
  - Handles constant data (max_max = 0) gracefully
103
103
  - Extreme value ranges are normalized successfully
104
104
 
105
- **User-visible behavior:**
105
+ **User-visible behavior:**
106
106
  - Previously: Ruby process would crash with assertion failure
107
107
  - Now: Raises a catchable Ruby exception with helpful error message
108
108
 
@@ -161,4 +161,4 @@ when /isolated point/i
161
161
 
162
162
  The test suite mocks Rust errors to verify our error handling logic works correctly. However, actual panic conditions cannot be tested without crashing the test process.
163
163
 
164
- See `spec/clusterkit/error_handling_spec.rb` for error handling tests.
164
+ See `spec/clusterkit/error_handling_spec.rb` for error handling tests.
Binary file
Binary file
Binary file
@@ -7,9 +7,9 @@ edition = "2021"
7
7
  crate-type = ["cdylib"]
8
8
 
9
9
  [dependencies]
10
- magnus = { version = "0.6", features = ["embed"] }
11
- annembed = { git = "https://github.com/cpetersen/annembed", tag = "clusterkit-0.1.0" }
12
- hnsw_rs = { git = "https://github.com/cpetersen/hnswlib-rs", tag = "clusterkit-0.1.0" }
10
+ magnus = { version = "0.8", features = ["embed"] }
11
+ annembed = { git = "https://github.com/scientist-labs/annembed", tag = "clusterkit-0.1.1" }
12
+ hnsw_rs = { git = "https://github.com/scientist-labs/hnswlib-rs", tag = "clusterkit-0.1.0" }
13
13
  hdbscan = "0.11"
14
14
  ndarray = "0.16"
15
15
  num-traits = "0.2"
@@ -22,4 +22,5 @@ rand = "0.8"
22
22
  default = ["openblas-static"]
23
23
  openblas-static = ["annembed/openblas-static"]
24
24
  openblas-system = ["annembed/openblas-system"]
25
- intel-mkl-static = ["annembed/intel-mkl-static"]
25
+ intel-mkl-static = ["annembed/intel-mkl-static"]
26
+ macos-accelerate = ["annembed/macos-accelerate"]
@@ -1,4 +1,12 @@
1
1
  require "mkmf"
2
2
  require "rb_sys/mkmf"
3
3
 
4
- create_rust_makefile("clusterkit/clusterkit")
4
+ create_rust_makefile("clusterkit/clusterkit") do |r|
5
+ if ENV["CLUSTERKIT_FEATURES"]
6
+ r.extra_cargo_args += ["--no-default-features"]
7
+ r.features = ENV["CLUSTERKIT_FEATURES"].split(",")
8
+ elsif RUBY_PLATFORM =~ /darwin/
9
+ r.extra_cargo_args += ["--no-default-features"]
10
+ r.features = ["macos-accelerate"]
11
+ end
12
+ end
@@ -1,5 +1,6 @@
1
- use magnus::{function, prelude::*, Error, Value, RArray, RHash, Integer, TryConvert};
1
+ use magnus::{function, prelude::*, Error, Value, RHash, Ruby};
2
2
  use hdbscan::{Hdbscan, HdbscanHyperParams};
3
+ use crate::utils::ruby_array_to_vec_vec_f64;
3
4
 
4
5
  /// Perform HDBSCAN clustering
5
6
  /// Returns a hash with labels and basic statistics
@@ -9,98 +10,62 @@ pub fn hdbscan_fit(
9
10
  min_cluster_size: usize,
10
11
  metric: String,
11
12
  ) -> Result<RHash, Error> {
12
- // Convert Ruby array to ndarray
13
- let rarray: RArray = TryConvert::try_convert(data)?;
14
- let n_samples = rarray.len();
15
-
16
- if n_samples == 0 {
17
- return Err(Error::new(
18
- magnus::exception::arg_error(),
19
- "Data cannot be empty",
20
- ));
21
- }
22
-
23
- // Get dimensions
24
- let first_row: RArray = rarray.entry::<RArray>(0)?;
25
- let n_features = first_row.len();
26
-
27
- // Convert to Vec<Vec<f64>> format expected by hdbscan crate
28
- let mut data_vec: Vec<Vec<f64>> = Vec::with_capacity(n_samples);
29
- for i in 0..n_samples {
30
- let row: RArray = rarray.entry(i as isize)?;
31
- let mut row_vec: Vec<f64> = Vec::with_capacity(n_features);
32
- for j in 0..n_features {
33
- let val: f64 = row.entry(j as isize)?;
34
- row_vec.push(val);
35
- }
36
- data_vec.push(row_vec);
37
- }
38
-
39
- // Note: hdbscan crate doesn't support custom metrics directly
40
- // We'll use the default Euclidean distance for now
13
+ let ruby = Ruby::get().unwrap();
14
+
15
+ // Convert Ruby array to Vec<Vec<f64>> using shared helper
16
+ let data_vec = ruby_array_to_vec_vec_f64(data)?;
17
+ let n_samples = data_vec.len();
18
+
41
19
  if metric != "euclidean" && metric != "l2" {
42
20
  eprintln!("Warning: Current hdbscan version only supports Euclidean distance. Using Euclidean.");
43
21
  }
44
-
22
+
45
23
  // Adjust parameters to avoid index out of bounds errors
46
- // The hdbscan crate has issues when min_samples >= n_samples
47
24
  let adjusted_min_samples = min_samples.min(n_samples.saturating_sub(1)).max(1);
48
25
  let adjusted_min_cluster_size = min_cluster_size.min(n_samples).max(2);
49
-
26
+
50
27
  // Create hyperparameters
51
28
  let hyper_params = HdbscanHyperParams::builder()
52
29
  .min_cluster_size(adjusted_min_cluster_size)
53
30
  .min_samples(adjusted_min_samples)
54
31
  .build();
55
-
32
+
56
33
  // Create HDBSCAN instance and run clustering
57
34
  let clusterer = Hdbscan::new(&data_vec, hyper_params);
58
-
59
- // Run the clustering algorithm - cluster() returns Result<Vec<i32>, HdbscanError>
35
+
60
36
  let labels = clusterer.cluster().map_err(|e| {
61
37
  Error::new(
62
- magnus::exception::runtime_error(),
38
+ ruby.exception_runtime_error(),
63
39
  format!("HDBSCAN clustering failed: {:?}", e)
64
40
  )
65
41
  })?;
66
-
42
+
67
43
  // Convert results to Ruby types
68
- let ruby = magnus::Ruby::get().unwrap();
69
- let result = RHash::new();
70
-
71
- // Convert labels (i32 to Ruby Integer, -1 for noise)
72
- let labels_array = RArray::new();
44
+ let result = ruby.hash_new();
45
+
46
+ let labels_array = ruby.ary_new();
73
47
  for &label in labels.iter() {
74
- labels_array.push(Integer::from_value(
75
- ruby.eval(&format!("{}", label)).unwrap()
76
- ).unwrap())?;
48
+ labels_array.push(ruby.integer_from_i64(label as i64))?;
77
49
  }
78
50
  result.aset("labels", labels_array)?;
79
-
80
- // For now, we'll create dummy probabilities and outlier scores
81
- // since the basic hdbscan crate doesn't provide these
82
- // In the future, we could calculate these ourselves or use a more advanced implementation
83
-
84
- // Create probabilities array (all 1.0 for clustered points, 0.0 for noise)
85
- let probs_array = RArray::new();
51
+
52
+ let probs_array = ruby.ary_new();
86
53
  for &label in labels.iter() {
87
54
  let prob = if label == -1 { 0.0 } else { 1.0 };
88
55
  probs_array.push(prob)?;
89
56
  }
90
57
  result.aset("probabilities", probs_array)?;
91
-
92
- // Create outlier scores array (0.0 for clustered points, 1.0 for noise)
93
- let outlier_array = RArray::new();
58
+
59
+ let outlier_array = ruby.ary_new();
94
60
  for &label in labels.iter() {
95
61
  let score = if label == -1 { 1.0 } else { 0.0 };
96
62
  outlier_array.push(score)?;
97
63
  }
98
64
  result.aset("outlier_scores", outlier_array)?;
99
-
100
- // Create empty cluster persistence hash for now
101
- let persistence_hash = RHash::new();
65
+
66
+ let persistence_hash = ruby.hash_new();
102
67
  result.aset("cluster_persistence", persistence_hash)?;
103
-
68
+
104
69
  Ok(result)
105
70
  }
106
71
 
@@ -110,6 +75,6 @@ pub fn init(clustering_module: &magnus::RModule) -> Result<(), Error> {
110
75
  "hdbscan_rust",
111
76
  function!(hdbscan_fit, 4),
112
77
  )?;
113
-
78
+
114
79
  Ok(())
115
- }
80
+ }