RubyGems - clusterkit - Versions diffs - 0.3.0-arm64-darwin - Mend

clusterkit 0.3.0-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.simplecov +47 -0
data/CHANGELOG.md +35 -0
data/CLAUDE.md +226 -0
data/Cargo.lock +3228 -0
data/Cargo.toml +8 -0
data/Gemfile +17 -0
data/IMPLEMENTATION_NOTES.md +143 -0
data/LICENSE.txt +21 -0
data/PYTHON_COMPARISON.md +183 -0
data/README.md +744 -0
data/Rakefile +259 -0
data/docs/KNOWN_ISSUES.md +130 -0
data/docs/RUST_ERROR_HANDLING.md +164 -0
data/docs/TEST_FIXTURES.md +170 -0
data/docs/UMAP_EXPLAINED.md +362 -0
data/docs/UMAP_TROUBLESHOOTING.md +284 -0
data/docs/VERBOSE_OUTPUT.md +84 -0
data/docs/assets/clusterkit-wide.png +0 -0
data/docs/assets/clusterkit.png +0 -0
data/docs/assets/visualization.png +0 -0
data/examples/hdbscan_example.rb +147 -0
data/examples/optimal_kmeans_example.rb +96 -0
data/examples/pca_example.rb +114 -0
data/examples/reproducible_umap.rb +99 -0
data/examples/verbose_control.rb +43 -0
data/ext/clusterkit/Cargo.toml +26 -0
data/ext/clusterkit/extconf.rb +23 -0
data/ext/clusterkit/src/clustering/hdbscan_wrapper.rs +80 -0
data/ext/clusterkit/src/clustering.rs +221 -0
data/ext/clusterkit/src/embedder.rs +349 -0
data/ext/clusterkit/src/hnsw.rs +579 -0
data/ext/clusterkit/src/lib.rs +24 -0
data/ext/clusterkit/src/svd.rs +89 -0
data/ext/clusterkit/src/tests.rs +16 -0
data/ext/clusterkit/src/utils.rs +183 -0
data/lib/clusterkit/3.1/clusterkit.bundle +0 -0
data/lib/clusterkit/3.2/clusterkit.bundle +0 -0
data/lib/clusterkit/3.3/clusterkit.bundle +0 -0
data/lib/clusterkit/3.4/clusterkit.bundle +0 -0
data/lib/clusterkit/clustering/hdbscan.rb +164 -0
data/lib/clusterkit/clustering.rb +194 -0
data/lib/clusterkit/clusterkit.rb +14 -0
data/lib/clusterkit/configuration.rb +24 -0
data/lib/clusterkit/data_validator.rb +132 -0
data/lib/clusterkit/dimensionality/pca.rb +251 -0
data/lib/clusterkit/dimensionality/svd.rb +175 -0
data/lib/clusterkit/dimensionality/umap.rb +282 -0
data/lib/clusterkit/dimensionality.rb +29 -0
data/lib/clusterkit/hdbscan_api_design.rb +142 -0
data/lib/clusterkit/hnsw.rb +251 -0
data/lib/clusterkit/preprocessing.rb +106 -0
data/lib/clusterkit/silence.rb +42 -0
data/lib/clusterkit/utils.rb +51 -0
data/lib/clusterkit/version.rb +5 -0
data/lib/clusterkit.rb +105 -0
data/lib/tasks/visualize.rake +641 -0
metadata +214 -0

data/examples/pca_example.rb ADDED Viewed

@@ -0,0 +1,114 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'clusterkit'
+puts "PCA Example - Dimensionality Reduction and Variance Analysis"
+puts "=" * 60
+# Generate sample data with clear structure
+# High variance in first 2 dimensions, low variance in others
+def generate_structured_data(n_samples: 100, n_features: 20)
+  data = []
+  n_samples.times do
+    point = []
+    # First dimension: high variance (range ~10)
+    point << rand * 10
+    # Second dimension: medium variance (range ~5)
+    point << rand * 5
+    # Third dimension: some variance (range ~2)
+    point << rand * 2
+    # Remaining dimensions: very low variance (noise)
+    (n_features - 3).times do
+      point << rand * 0.1
+    end
+    data << point
+  end
+  data
+end
+# Generate data
+data = generate_structured_data(n_samples: 100, n_features: 20)
+puts "\nGenerated #{data.size} samples with #{data.first.size} features"
+# Perform PCA with different numbers of components
+[2, 3, 5, 10].each do |n_components|
+  puts "\n" + "-" * 40
+  puts "PCA with #{n_components} components:"
+  pca = ClusterKit::PCA.new(n_components: n_components)
+  transformed = pca.fit_transform(data)
+  puts "  Transformed shape: #{transformed.size} x #{transformed.first.size}"
+  # Show explained variance for each component
+  puts "  Explained variance ratio:"
+  pca.explained_variance_ratio.each_with_index do |ratio, i|
+    puts "    PC#{i+1}: #{(ratio * 100).round(2)}%"
+  end
+  # Show cumulative explained variance
+  cumulative = pca.cumulative_explained_variance_ratio[-1]
+  puts "  Total variance explained: #{(cumulative * 100).round(2)}%"
+end
+# Demonstrate reconstruction
+puts "\n" + "=" * 60
+puts "Reconstruction Example:"
+puts "-" * 40
+# Use 2 components (should capture most variance)
+pca_2 = ClusterKit::PCA.new(n_components: 2)
+compressed = pca_2.fit_transform(data)
+reconstructed = pca_2.inverse_transform(compressed)
+# Calculate reconstruction error
+sample_idx = 0
+original = data[sample_idx]
+recon = reconstructed[sample_idx]
+puts "\nOriginal data point (first 5 features):"
+puts "  #{original[0..4].map { |v| v.round(3) }.join(', ')}"
+puts "\nReconstructed from 2 components (first 5 features):"
+puts "  #{recon[0..4].map { |v| v.round(3) }.join(', ')}"
+# Calculate mean squared error
+mse = original.zip(recon).map { |o, r| (o - r) ** 2 }.sum / original.size
+puts "\nReconstruction MSE: #{mse.round(4)}"
+# Demonstrate data compression ratio
+original_size = data.size * data.first.size
+compressed_size = compressed.size * compressed.first.size
+compression_ratio = (1 - compressed_size.to_f / original_size) * 100
+puts "\nData Compression:"
+puts "  Original size: #{original_size} values"
+puts "  Compressed size: #{compressed_size} values"
+puts "  Compression ratio: #{compression_ratio.round(1)}%"
+puts "  Variance retained: #{(pca_2.cumulative_explained_variance_ratio[-1] * 100).round(1)}%"
+# Compare with SVD
+puts "\n" + "=" * 60
+puts "PCA vs SVD Comparison:"
+puts "-" * 40
+# PCA (with mean centering)
+pca = ClusterKit::PCA.new(n_components: 2)
+pca_result = pca.fit_transform(data)
+# SVD (without mean centering)
+u, s, vt = ClusterKit.svd(data, 2)
+svd_result = u
+puts "PCA result (first point): #{pca_result[0].map { |v| v.round(3) }}"
+puts "SVD result (first point): #{svd_result[0].map { |v| v.round(3) }}"
+puts "\nNote: PCA centers the data (subtracts mean), SVD does not."
+puts "This makes PCA better for finding principal components of variation."

data/examples/reproducible_umap.rb ADDED Viewed

@@ -0,0 +1,99 @@
+#!/usr/bin/env ruby
+# Example: Achieving reproducibility with UMAP despite random seed issues
+require_relative '../lib/clusterkit'
+require 'json'
+# Due to upstream limitations, UMAP doesn't give perfectly reproducible results
+# even with a fixed random_seed. Here are workarounds:
+# Generate sample data
+srand(42)
+data = []
+3.times do |cluster|
+  center = Array.new(50) { rand * 0.1 + cluster * 2.0 }
+  30.times do
+    point = center.map { |c| c + (rand - 0.5) * 0.3 }
+    data << point
+  end
+end
+puts "Workaround 1: Cache transformed results"
+puts "=" * 60
+# First run: transform and save results
+cache_file = "umap_results_cache.json"
+if File.exist?(cache_file)
+  puts "Loading cached results from #{cache_file}"
+  embedded = JSON.parse(File.read(cache_file))
+else
+  puts "No cache found, running UMAP..."
+  umap = ClusterKit::Dimensionality::UMAP.new(
+    n_components: 2,
+    n_neighbors: 5,
+    random_seed: 42  # Still use for *some* consistency
+  )
+  embedded = umap.fit_transform(data)
+  # Save results for reproducibility
+  File.write(cache_file, JSON.pretty_generate(embedded))
+  puts "Results cached to #{cache_file}"
+end
+puts "First 3 points:"
+embedded[0..2].each_with_index do |point, i|
+  puts "  Point #{i}: [#{point[0].round(3)}, #{point[1].round(3)}]"
+end
+puts "\nWorkaround 2: Save and load fitted models"
+puts "=" * 60
+model_file = "umap_model.bin"
+# Train and save model once
+if File.exist?(model_file)
+  puts "Loading existing model from #{model_file}"
+  umap = ClusterKit::Dimensionality::UMAP.load(model_file)
+else
+  puts "Training new model..."
+  umap = ClusterKit::Dimensionality::UMAP.new(
+    n_components: 2,
+    n_neighbors: 5,
+    random_seed: 42
+  )
+  umap.fit(data)
+  umap.save(model_file)
+  puts "Model saved to #{model_file}"
+end
+# Now transform new data with the same model
+new_data = data[0..9]  # Take first 10 points as "new" data
+transformed = umap.transform(new_data)
+puts "Transformed 10 new points with saved model"
+puts "First 3 transformed points:"
+transformed[0..2].each_with_index do |point, i|
+  puts "  Point #{i}: [#{point[0].round(3)}, #{point[1].round(3)}]"
+end
+puts "\nWorkaround 3: Use PCA for deterministic reduction"
+puts "=" * 60
+# PCA is deterministic - same input always gives same output
+pca = ClusterKit::Dimensionality::PCA.new(n_components: 2)
+pca_result1 = pca.fit_transform(data)
+pca_result2 = pca.fit_transform(data)  # Do it again
+puts "PCA results are identical: #{pca_result1[0] == pca_result2[0]}"
+puts "First point from run 1: [#{pca_result1[0][0].round(3)}, #{pca_result1[0][1].round(3)}]"
+puts "First point from run 2: [#{pca_result2[0][0].round(3)}, #{pca_result2[0][1].round(3)}]"
+puts "\nRecommendations:"
+puts "-" * 40
+puts "1. For production pipelines, cache UMAP results"
+puts "2. For model deployment, save fitted models and reuse them"
+puts "3. For testing/CI, use PCA or cached test data"
+puts "4. Accept small variations in UMAP results as normal"
+# Clean up example files (uncomment to remove)
+# File.delete(cache_file) if File.exist?(cache_file)
+# File.delete(model_file) if File.exist?(model_file)

data/examples/verbose_control.rb ADDED Viewed

@@ -0,0 +1,43 @@
+#!/usr/bin/env ruby
+# Example demonstrating how to control verbose output from clusterkit
+require 'bundler/setup'
+require 'clusterkit'
+# Generate some random test data
+data = Array.new(50) { Array.new(20) { rand } }
+puts "=" * 60
+puts "clusterkit Verbose Output Control Demo"
+puts "=" * 60
+puts "\n1. Default behavior (quiet mode):"
+puts "-" * 40
+umap1 = ClusterKit::UMAP.new(n_components: 2, n_neighbors: 10)
+result1 = umap1.fit_transform(data)
+puts "✓ UMAP completed silently"
+puts "  Result shape: #{result1.length} x #{result1.first.length}"
+puts "\n2. Enable verbose output:"
+puts "-" * 40
+ClusterKit.configure do |config|
+  config.verbose = true
+end
+umap2 = ClusterKit::UMAP.new(n_components: 2, n_neighbors: 10)
+puts "Running UMAP with verbose output enabled..."
+result2 = umap2.fit_transform(data)
+puts "✓ UMAP completed with debug output"
+puts "\n3. Back to quiet mode:"
+puts "-" * 40
+ClusterKit.configuration.verbose = false
+umap3 = ClusterKit::UMAP.new(n_components: 2, n_neighbors: 10)
+result3 = umap3.fit_transform(data)
+puts "✓ UMAP completed silently again"
+puts "\n" + "=" * 60
+puts "You can also set verbose mode via environment variable:"
+puts "  ANNEMBED_VERBOSE=true ruby your_script.rb"
+puts "=" * 60

data/ext/clusterkit/Cargo.toml ADDED Viewed

@@ -0,0 +1,26 @@
+[package]
+name = "clusterkit"
+version = "0.1.0"
+edition = "2021"
+[lib]
+crate-type = ["cdylib"]
+[dependencies]
+magnus = { version = "0.8", features = ["embed"] }
+annembed = { git = "https://github.com/scientist-labs/annembed", tag = "clusterkit-0.2.6" }
+hnsw_rs = { git = "https://github.com/scientist-labs/hnswlib-rs", tag = "clusterkit-0.1.0" }
+hdbscan = "0.11"
+ndarray = "0.16"
+num-traits = "0.2"
+rayon = "1.7"
+serde = { version = "1.0", features = ["derive"] }
+bincode = "1.3"
+rand = "0.8"
+[features]
+default = ["openblas-static"]
+openblas-static = ["annembed/openblas-static"]
+openblas-system = ["annembed/openblas-system"]
+intel-mkl-static = ["annembed/intel-mkl-static"]
+macos-accelerate = ["annembed/macos-accelerate"]

data/ext/clusterkit/extconf.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require "mkmf"
+require "rb_sys/mkmf"
+create_rust_makefile("clusterkit/clusterkit") do |r|
+  if ENV["CLUSTERKIT_FEATURES"]
+    # Explicit override wins (set CLUSTERKIT_FEATURES=openblas-static,... to force a backend).
+    r.extra_cargo_args += ["--no-default-features"]
+    r.features = ENV["CLUSTERKIT_FEATURES"].split(",")
+  elsif RUBY_PLATFORM =~ /darwin/
+    # macOS links the system Accelerate framework — no OpenBLAS build needed.
+    r.extra_cargo_args += ["--no-default-features"]
+    r.features = ["macos-accelerate"]
+  elsif RUBY_PLATFORM =~ /linux/
+    # Linux: link the SYSTEM OpenBLAS/LAPACK (apt: libopenblas-dev liblapack-dev
+    # gfortran, provided by the rust-gem-cross image) instead of the default
+    # `openblas-static` feature, which compiles OpenBLAS from C+Fortran source.
+    # rb-sys-dock does NOT forward host env to extconf and a .cargo/config.toml
+    # [env] only reaches cargo-spawned procs (not mkmf), so this backend choice
+    # must live in committed code — it cannot be passed via a workflow input.
+    r.extra_cargo_args += ["--no-default-features"]
+    r.features = ["openblas-system"]
+  end
+end

data/ext/clusterkit/src/clustering/hdbscan_wrapper.rs ADDED Viewed

@@ -0,0 +1,80 @@
+use magnus::{function, prelude::*, Error, Value, RHash, Ruby};
+use hdbscan::{Hdbscan, HdbscanHyperParams};
+use crate::utils::ruby_array_to_vec_vec_f64;
+/// Perform HDBSCAN clustering
+/// Returns a hash with labels and basic statistics
+pub fn hdbscan_fit(
+    data: Value,
+    min_samples: usize,
+    min_cluster_size: usize,
+    metric: String,
+) -> Result<RHash, Error> {
+    let ruby = Ruby::get().unwrap();
+    // Convert Ruby array to Vec<Vec<f64>> using shared helper
+    let data_vec = ruby_array_to_vec_vec_f64(data)?;
+    let n_samples = data_vec.len();
+    if metric != "euclidean" && metric != "l2" {
+        eprintln!("Warning: Current hdbscan version only supports Euclidean distance. Using Euclidean.");
+    }
+    // Adjust parameters to avoid index out of bounds errors
+    let adjusted_min_samples = min_samples.min(n_samples.saturating_sub(1)).max(1);
+    let adjusted_min_cluster_size = min_cluster_size.min(n_samples).max(2);
+    // Create hyperparameters
+    let hyper_params = HdbscanHyperParams::builder()
+        .min_cluster_size(adjusted_min_cluster_size)
+        .min_samples(adjusted_min_samples)
+        .build();
+    // Create HDBSCAN instance and run clustering
+    let clusterer = Hdbscan::new(&data_vec, hyper_params);
+    let labels = clusterer.cluster().map_err(|e| {
+        Error::new(
+            ruby.exception_runtime_error(),
+            format!("HDBSCAN clustering failed: {:?}", e)
+        )
+    })?;
+    // Convert results to Ruby types
+    let result = ruby.hash_new();
+    let labels_array = ruby.ary_new();
+    for &label in labels.iter() {
+        labels_array.push(ruby.integer_from_i64(label as i64))?;
+    }
+    result.aset("labels", labels_array)?;
+    let probs_array = ruby.ary_new();
+    for &label in labels.iter() {
+        let prob = if label == -1 { 0.0 } else { 1.0 };
+        probs_array.push(prob)?;
+    }
+    result.aset("probabilities", probs_array)?;
+    let outlier_array = ruby.ary_new();
+    for &label in labels.iter() {
+        let score = if label == -1 { 1.0 } else { 0.0 };
+        outlier_array.push(score)?;
+    }
+    result.aset("outlier_scores", outlier_array)?;
+    let persistence_hash = ruby.hash_new();
+    result.aset("cluster_persistence", persistence_hash)?;
+    Ok(result)
+}
+/// Initialize HDBSCAN module functions
+pub fn init(clustering_module: &magnus::RModule) -> Result<(), Error> {
+    clustering_module.define_singleton_method(
+        "hdbscan_rust",
+        function!(hdbscan_fit, 4),
+    )?;
+    Ok(())
+}

data/ext/clusterkit/src/clustering.rs ADDED Viewed

@@ -0,0 +1,221 @@
+use magnus::{function, prelude::*, Error, Value, RArray, Ruby};
+use ndarray::{Array1, Array2, ArrayView1, Axis};
+use rand::prelude::*;
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+use crate::utils::ruby_array_to_ndarray2;
+mod hdbscan_wrapper;
+pub fn init(parent: &magnus::RModule) -> Result<(), Error> {
+    let clustering_module = parent.define_module("Clustering")?;
+    clustering_module.define_singleton_method(
+        "kmeans_rust",
+        function!(kmeans, 4),
+    )?;
+    clustering_module.define_singleton_method(
+        "kmeans_predict_rust",
+        function!(kmeans_predict, 2),
+    )?;
+    // Initialize HDBSCAN functions
+    hdbscan_wrapper::init(&clustering_module)?;
+    Ok(())
+}
+/// Perform K-means clustering
+/// Returns (labels, centroids, inertia)
+fn kmeans(data: Value, k: usize, max_iter: usize, random_seed: Option<i64>) -> Result<(RArray, RArray, f64), Error> {
+    let ruby = Ruby::get().unwrap();
+    // Convert Ruby array to ndarray using shared helper
+    let data_array = ruby_array_to_ndarray2(data)?;
+    let (n_samples, n_features) = data_array.dim();
+    if k > n_samples {
+        return Err(Error::new(
+            ruby.exception_arg_error(),
+            format!("k ({}) cannot be larger than number of samples ({})", k, n_samples),
+        ));
+    }
+    // Initialize centroids using K-means++
+    let mut centroids = kmeans_plusplus(&data_array, k, random_seed)?;
+    let mut labels = vec![0usize; n_samples];
+    let mut prev_labels = vec![0usize; n_samples];
+    // K-means iterations
+    for iteration in 0..max_iter {
+        // Assign points to nearest centroid
+        let mut changed = false;
+        for i in 0..n_samples {
+            let point = data_array.row(i);
+            let mut min_dist = f64::INFINITY;
+            let mut best_cluster = 0;
+            for (j, centroid) in centroids.axis_iter(Axis(0)).enumerate() {
+                let dist = euclidean_distance(&point, &centroid);
+                if dist < min_dist {
+                    min_dist = dist;
+                    best_cluster = j;
+                }
+            }
+            if labels[i] != best_cluster {
+                changed = true;
+            }
+            labels[i] = best_cluster;
+        }
+        // Check for convergence
+        if !changed && iteration > 0 {
+            break;
+        }
+        // Update centroids
+        for j in 0..k {
+            let mut sum = Array1::<f64>::zeros(n_features);
+            let mut count = 0;
+            for i in 0..n_samples {
+                if labels[i] == j {
+                    sum += &data_array.row(i);
+                    count += 1;
+                }
+            }
+            if count > 0 {
+                centroids.row_mut(j).assign(&(sum / count as f64));
+            }
+        }
+        prev_labels.clone_from(&labels);
+    }
+    // Calculate inertia (sum of squared distances to nearest centroid)
+    let mut inertia = 0.0;
+    for i in 0..n_samples {
+        let point = data_array.row(i);
+        let centroid = centroids.row(labels[i]);
+        inertia += euclidean_distance(&point, &centroid).powi(2);
+    }
+    // Convert results to Ruby arrays
+    let labels_array = ruby.ary_new();
+    for label in labels {
+        labels_array.push(ruby.integer_from_i64(label as i64))?;
+    }
+    let centroids_array = ruby.ary_new();
+    for i in 0..k {
+        let row_array = ruby.ary_new();
+        for j in 0..n_features {
+            row_array.push(centroids[[i, j]])?;
+        }
+        centroids_array.push(row_array)?;
+    }
+    Ok((labels_array, centroids_array, inertia))
+}
+/// Predict cluster labels for new data given centroids
+fn kmeans_predict(data: Value, centroids: Value) -> Result<RArray, Error> {
+    let ruby = Ruby::get().unwrap();
+    // Convert inputs using shared helpers
+    let data_matrix = ruby_array_to_ndarray2(data)?;
+    let centroids_matrix = ruby_array_to_ndarray2(centroids)?;
+    let (n_samples, _) = data_matrix.dim();
+    // Predict labels
+    let labels_array = ruby.ary_new();
+    for i in 0..n_samples {
+        let point = data_matrix.row(i);
+        let mut min_dist = f64::INFINITY;
+        let mut best_cluster = 0;
+        for (j, centroid) in centroids_matrix.axis_iter(Axis(0)).enumerate() {
+            let dist = euclidean_distance(&point, &centroid);
+            if dist < min_dist {
+                min_dist = dist;
+                best_cluster = j;
+            }
+        }
+        labels_array.push(ruby.integer_from_i64(best_cluster as i64))?;
+    }
+    Ok(labels_array)
+}
+/// K-means++ initialization
+fn kmeans_plusplus(data: &Array2<f64>, k: usize, random_seed: Option<i64>) -> Result<Array2<f64>, Error> {
+    let n_samples = data.nrows();
+    let n_features = data.ncols();
+    // Use seeded RNG if seed is provided, otherwise use thread_rng
+    let mut rng: Box<dyn RngCore> = match random_seed {
+        Some(seed) => {
+            let seed_u64 = seed as u64;
+            Box::new(StdRng::seed_from_u64(seed_u64))
+        },
+        None => Box::new(thread_rng()),
+    };
+    let mut centroids = Array2::<f64>::zeros((k, n_features));
+    // Choose first centroid randomly
+    let first_idx = rng.gen_range(0..n_samples);
+    centroids.row_mut(0).assign(&data.row(first_idx));
+    // Choose remaining centroids
+    for i in 1..k {
+        let mut distances = vec![f64::INFINITY; n_samples];
+        for j in 0..n_samples {
+            for c in 0..i {
+                let dist = euclidean_distance(&data.row(j), &centroids.row(c));
+                if dist < distances[j] {
+                    distances[j] = dist;
+                }
+            }
+        }
+        let total: f64 = distances.iter().map(|d| d * d).sum();
+        if total == 0.0 {
+            if i < n_samples {
+                centroids.row_mut(i).assign(&data.row(i));
+            } else {
+                centroids.row_mut(i).assign(&data.row(0));
+            }
+            continue;
+        }
+        let mut cumsum = 0.0;
+        let rand_val: f64 = rng.gen::<f64>() * total;
+        for j in 0..n_samples {
+            cumsum += distances[j] * distances[j];
+            if cumsum >= rand_val {
+                centroids.row_mut(i).assign(&data.row(j));
+                break;
+            }
+        }
+    }
+    Ok(centroids)
+}
+/// Calculate Euclidean distance between two points
+fn euclidean_distance(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> f64 {
+    a.iter()
+        .zip(b.iter())
+        .map(|(x, y)| (x - y).powi(2))
+        .sum::<f64>()
+        .sqrt()
+}