RubyGems - gte - Versions diffs - 0.0.1 - Mend

gte 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +7 -0
data/Gemfile +17 -0
data/LICENSE +21 -0
data/README.md +49 -0
data/Rakefile +76 -0
data/VERSION +1 -0
data/ext/gte/Cargo.toml +37 -0
data/ext/gte/benches/hot_path.rs +53 -0
data/ext/gte/build.rs +25 -0
data/ext/gte/extconf.rb +6 -0
data/ext/gte/src/embedder.rs +342 -0
data/ext/gte/src/error.rs +48 -0
data/ext/gte/src/lib.rs +31 -0
data/ext/gte/src/model_config.rs +17 -0
data/ext/gte/src/postprocess.rs +113 -0
data/ext/gte/src/ruby_embedder.rs +222 -0
data/ext/gte/src/session.rs +123 -0
data/ext/gte/src/tokenizer.rs +130 -0
data/ext/gte/tests/embedder_unit_test.rs +39 -0
data/ext/gte/tests/inference_integration_test.rs +62 -0
data/ext/gte/tests/tokenizer_unit_test.rs +44 -0
data/lib/gte.rb +32 -0
metadata +136 -0

data/ext/gte/src/lib.rs ADDED Viewed

@@ -0,0 +1,31 @@
+pub mod embedder;
+pub mod error;
+pub mod model_config;
+pub mod postprocess;
+pub mod session;
+pub mod tokenizer;
+#[cfg(feature = "ruby-ffi")]
+mod ruby_embedder;
+#[cfg(feature = "ruby-ffi")]
+use magnus::{prelude::*, Error, Ruby};
+#[cfg(feature = "ruby-ffi")]
+#[magnus::init]
+fn init(ruby: &Ruby) -> Result<(), Error> {
+    let module = ruby.define_module("GTE")?;
+    module.define_error("Error", ruby.exception_standard_error())?;
+    crate::ruby_embedder::register(ruby)?;
+    std::panic::set_hook(Box::new(|info| {
+        let msg = info
+            .payload()
+            .downcast_ref::<&str>()
+            .copied()
+            .or_else(|| info.payload().downcast_ref::<String>().map(|s| s.as_str()))
+            .unwrap_or("unknown panic");
+        eprintln!("GTE Rust panic: {msg}");
+    }));
+    Ok(())
+}

data/ext/gte/src/model_config.rs ADDED Viewed

@@ -0,0 +1,17 @@
+#[derive(Debug, Clone, Copy)]
+pub enum ExtractorMode {
+    Token(usize),
+    MeanPool,
+    Raw,
+}
+#[derive(Debug, Clone)]
+pub struct ModelConfig {
+    pub max_length: usize,
+    pub output_tensor: String,
+    pub mode: ExtractorMode,
+    pub with_type_ids: bool,
+    pub with_attention_mask: bool,
+    pub num_threads: usize,
+    pub optimization_level: u8,
+}

data/ext/gte/src/postprocess.rs ADDED Viewed

@@ -0,0 +1,113 @@
+use crate::error::{GteError, Result};
+use ndarray::{Array2, ArrayView2, ArrayView3};
+pub fn mean_pool(
+    hidden_states: ArrayView3<'_, f32>,
+    attention_mask: ArrayView2<'_, i64>,
+) -> Result<Array2<f32>> {
+    let (batch, seq, dim) = hidden_states.dim();
+    if attention_mask.dim() != (batch, seq) {
+        return Err(GteError::Inference(format!(
+            "attention mask shape {:?} does not match hidden state shape ({batch}, {seq}, {dim})",
+            attention_mask.dim()
+        )));
+    }
+    let mut pooled = Array2::<f32>::zeros((batch, dim));
+    if let (Some(hidden), Some(mask), Some(output)) = (
+        hidden_states.as_slice_memory_order(),
+        attention_mask.as_slice_memory_order(),
+        pooled.as_slice_memory_order_mut(),
+    ) {
+        mean_pool_contiguous(hidden, mask, output, batch, seq, dim);
+        return Ok(pooled);
+    }
+    for batch_index in 0..batch {
+        let mut weight_sum = 0.0f32;
+        for token_index in 0..seq {
+            let weight = attention_mask[[batch_index, token_index]];
+            if weight <= 0 {
+                continue;
+            }
+            let weight = weight as f32;
+            for dim_index in 0..dim {
+                pooled[[batch_index, dim_index]] +=
+                    hidden_states[[batch_index, token_index, dim_index]] * weight;
+            }
+            weight_sum += weight;
+        }
+        if weight_sum > 0.0 {
+            let inverse = weight_sum.recip();
+            pooled
+                .row_mut(batch_index)
+                .map_inplace(|value| *value *= inverse);
+        }
+    }
+    Ok(pooled)
+}
+pub fn normalize_l2(mut embeddings: Array2<f32>) -> Array2<f32> {
+    let cols = embeddings.ncols();
+    if let Some(data) = embeddings.as_slice_mut() {
+        for row in data.chunks_mut(cols) {
+            let norm = row.iter().map(|v| v * v).sum::<f32>().sqrt();
+            if norm > 0.0 {
+                let inv = norm.recip();
+                for v in row.iter_mut() {
+                    *v *= inv;
+                }
+            }
+        }
+        return embeddings;
+    }
+    // non-contiguous fallback
+    for mut row in embeddings.rows_mut() {
+        let norm = row.iter().map(|value| value * value).sum::<f32>().sqrt();
+        if norm > 0.0 {
+            row.map_inplace(|value| *value *= norm.recip());
+        }
+    }
+    embeddings
+}
+fn mean_pool_contiguous(
+    hidden: &[f32],
+    attention_mask: &[i64],
+    output: &mut [f32],
+    batch: usize,
+    seq: usize,
+    dim: usize,
+) {
+    for batch_index in 0..batch {
+        let mask_base = batch_index * seq;
+        let hidden_base = batch_index * seq * dim;
+        let output_row = &mut output[batch_index * dim..(batch_index + 1) * dim];
+        let mut weight_sum = 0.0f32;
+        for token_index in 0..seq {
+            let weight = attention_mask[mask_base + token_index];
+            if weight <= 0 {
+                continue;
+            }
+            let weight = weight as f32;
+            let token_base = hidden_base + token_index * dim;
+            for dim_index in 0..dim {
+                output_row[dim_index] += hidden[token_base + dim_index] * weight;
+            }
+            weight_sum += weight;
+        }
+        if weight_sum > 0.0 {
+            let inverse = weight_sum.recip();
+            for value in output_row {
+                *value *= inverse;
+            }
+        }
+    }
+}

data/ext/gte/src/ruby_embedder.rs ADDED Viewed

@@ -0,0 +1,222 @@
+#![cfg(feature = "ruby-ffi")]
+use crate::embedder::{normalize_l2, Embedder};
+use crate::error::GteError;
+use magnus::{function, method, prelude::*, wrap, Error, RArray, Ruby};
+use std::os::raw::c_void;
+use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::sync::Arc;
+#[wrap(class = "GTE::Embedder", free_immediately, size)]
+pub struct RbEmbedder {
+    inner: Arc<Embedder>,
+}
+#[wrap(class = "GTE::Tensor", free_immediately, size)]
+pub struct RbTensor {
+    rows: usize,
+    cols: usize,
+    data: Vec<f32>,
+}
+struct InferArgs {
+    embedder: *const Embedder,
+    texts: *const Vec<String>,
+    result: Option<Result<ndarray::Array2<f32>, GteError>>,
+}
+unsafe impl Send for InferArgs {}
+fn panic_payload_to_string(payload: Box<dyn std::any::Any + Send>) -> String {
+    if let Some(msg) = payload.downcast_ref::<&str>() {
+        (*msg).to_string()
+    } else if let Some(msg) = payload.downcast_ref::<String>() {
+        msg.clone()
+    } else {
+        "unknown panic payload".to_string()
+    }
+}
+fn infer_without_gvl(embedder: &Arc<Embedder>, texts: Vec<String>) -> Result<ndarray::Array2<f32>, Error> {
+    let embeddings = unsafe {
+        let mut args = InferArgs {
+            embedder: Arc::as_ptr(embedder),
+            texts: &texts as *const Vec<String>,
+            result: None,
+        };
+        rb_sys::rb_thread_call_without_gvl(
+            Some(run_without_gvl),
+            &mut args as *mut InferArgs as *mut c_void,
+            None,
+            std::ptr::null_mut(),
+        );
+        let result = args.result.take().ok_or_else(|| {
+            magnus::Error::from(GteError::Inference(
+                "inference did not return a result".to_string(),
+            ))
+        })?;
+        result.map_err(magnus::Error::from)?
+    };
+    Ok(embeddings)
+}
+unsafe extern "C" fn run_without_gvl(ptr: *mut c_void) -> *mut c_void {
+    let args = &mut *(ptr as *mut InferArgs);
+    let run_result = catch_unwind(AssertUnwindSafe(|| {
+        let tokenized = (*args.embedder).tokenize(&*args.texts)?;
+        let embeddings = (*args.embedder).run(&tokenized)?;
+        Ok(normalize_l2(embeddings))
+    }));
+    args.result = Some(match run_result {
+        Ok(result) => result,
+        Err(payload) => Err(GteError::Inference(format!(
+            "panic during inference: {}",
+            panic_payload_to_string(payload),
+        ))),
+    });
+    std::ptr::null_mut()
+}
+fn tensor_from_array(embeddings: ndarray::Array2<f32>) -> Result<RbTensor, Error> {
+    let rows = embeddings.nrows();
+    let cols = embeddings.ncols();
+    let (data, offset) = embeddings.into_raw_vec_and_offset();
+    if let Some(off) = offset.filter(|&o| o != 0) {
+        return Err(magnus::Error::from(GteError::Inference(format!(
+            "unexpected non-zero tensor offset: {}",
+            off
+        ))));
+    }
+    Ok(RbTensor { rows, cols, data })
+}
+impl RbEmbedder {
+    pub fn rb_new(
+        _ruby: &Ruby,
+        dir_path: String,
+        num_threads: usize,
+        optimization_level: u8,
+    ) -> Result<Self, Error> {
+        let embedder = Embedder::from_dir(&dir_path, num_threads, optimization_level)
+            .map_err(magnus::Error::from)?;
+        Ok(RbEmbedder {
+            inner: Arc::new(embedder),
+        })
+    }
+    pub fn rb_embed(_ruby: &Ruby, rb_self: &Self, texts: RArray) -> Result<RbTensor, Error> {
+        let texts: Vec<String> = texts.to_vec()?;
+        let embeddings = infer_without_gvl(&rb_self.inner, texts)?;
+        tensor_from_array(embeddings)
+    }
+    pub fn rb_embed_one(_ruby: &Ruby, rb_self: &Self, text: String) -> Result<RbTensor, Error> {
+        let embeddings = infer_without_gvl(&rb_self.inner, vec![text])?;
+        tensor_from_array(embeddings)
+    }
+}
+impl RbTensor {
+    pub fn len(&self) -> usize {
+        self.rows
+    }
+    pub fn rows(&self) -> usize {
+        self.rows
+    }
+    pub fn dim(&self) -> usize {
+        self.cols
+    }
+    pub fn shape(ruby: &Ruby, rb_self: &Self) -> Result<RArray, Error> {
+        let out = ruby.ary_new_capa(2);
+        out.push(rb_self.rows)?;
+        out.push(rb_self.cols)?;
+        Ok(out)
+    }
+    pub fn row(ruby: &Ruby, rb_self: &Self, index: usize) -> Result<RArray, Error> {
+        if index >= rb_self.rows {
+            return Err(magnus::Error::from(GteError::Inference(format!(
+                "row index {} out of bounds for {} rows",
+                index, rb_self.rows
+            ))));
+        }
+        let start = index * rb_self.cols;
+        let end = start + rb_self.cols;
+        let out = ruby.ary_new_capa(rb_self.cols);
+        for &value in &rb_self.data[start..end] {
+            out.push(value)?;
+        }
+        Ok(out)
+    }
+    pub fn first(ruby: &Ruby, rb_self: &Self) -> Result<RArray, Error> {
+        Self::row(ruby, rb_self, 0)
+    }
+    pub fn row_binary_f32(
+        ruby: &Ruby,
+        rb_self: &Self,
+        index: usize,
+    ) -> Result<magnus::RString, Error> {
+        if index >= rb_self.rows {
+            return Err(magnus::Error::from(GteError::Inference(format!(
+                "row index {} out of bounds for {} rows",
+                index, rb_self.rows
+            ))));
+        }
+        let start = index * rb_self.cols;
+        let end = start + rb_self.cols;
+        let bytes = unsafe {
+            std::slice::from_raw_parts(
+                rb_self.data[start..end].as_ptr() as *const u8,
+                rb_self.cols * std::mem::size_of::<f32>(),
+            )
+        };
+        Ok(ruby.str_from_slice(bytes))
+    }
+    pub fn to_a(ruby: &Ruby, rb_self: &Self) -> Result<RArray, Error> {
+        let outer = ruby.ary_new_capa(rb_self.rows);
+        for row_idx in 0..rb_self.rows {
+            outer.push(Self::row(ruby, rb_self, row_idx)?)?;
+        }
+        Ok(outer)
+    }
+    pub fn to_binary_f32(ruby: &Ruby, rb_self: &Self) -> Result<magnus::RString, Error> {
+        let bytes = unsafe {
+            std::slice::from_raw_parts(
+                rb_self.data.as_ptr() as *const u8,
+                rb_self.data.len() * std::mem::size_of::<f32>(),
+            )
+        };
+        Ok(ruby.str_from_slice(bytes))
+    }
+}
+pub fn register(ruby: &Ruby) -> Result<(), Error> {
+    let module = ruby.define_module("GTE")?;
+    let embedder_class = module.define_class("Embedder", ruby.class_object())?;
+    embedder_class.define_singleton_method("new", function!(RbEmbedder::rb_new, 3))?;
+    embedder_class.define_method("embed", method!(RbEmbedder::rb_embed, 1))?;
+    embedder_class.define_method("embed_one", method!(RbEmbedder::rb_embed_one, 1))?;
+    let tensor_class = module.define_class("Tensor", ruby.class_object())?;
+    tensor_class.define_method("rows", method!(RbTensor::rows, 0))?;
+    tensor_class.define_method("size", method!(RbTensor::len, 0))?;
+    tensor_class.define_method("length", method!(RbTensor::len, 0))?;
+    tensor_class.define_method("dim", method!(RbTensor::dim, 0))?;
+    tensor_class.define_method("shape", method!(RbTensor::shape, 0))?;
+    tensor_class.define_method("[]", method!(RbTensor::row, 1))?;
+    tensor_class.define_method("row", method!(RbTensor::row, 1))?;
+    tensor_class.define_method("first", method!(RbTensor::first, 0))?;
+    tensor_class.define_method("row_binary_f32", method!(RbTensor::row_binary_f32, 1))?;
+    tensor_class.define_method("to_a", method!(RbTensor::to_a, 0))?;
+    tensor_class.define_method("to_binary_f32", method!(RbTensor::to_binary_f32, 0))?;
+    Ok(())
+}

data/ext/gte/src/session.rs ADDED Viewed

@@ -0,0 +1,123 @@
+use crate::error::{GteError, Result};
+use crate::model_config::{ExtractorMode, ModelConfig};
+use crate::postprocess::mean_pool;
+use crate::tokenizer::Tokenized;
+use ndarray::{Array2, ArrayView2, Ix2};
+use ort::execution_providers::{
+    CoreMLExecutionProvider, ExecutionProviderDispatch, XNNPACKExecutionProvider,
+};
+use ort::session::Session;
+use ort::session::SessionInputValue;
+use ort::value::Value;
+use std::path::Path;
+pub fn build_session<P: AsRef<Path>>(model_path: P, config: &ModelConfig) -> Result<Session> {
+    let opt_level = match config.optimization_level {
+        0 => ort::session::builder::GraphOptimizationLevel::Disable,
+        1 => ort::session::builder::GraphOptimizationLevel::Level1,
+        2 => ort::session::builder::GraphOptimizationLevel::Level2,
+        _ => ort::session::builder::GraphOptimizationLevel::Level3,
+    };
+    let mut builder = Session::builder()?
+        .with_optimization_level(opt_level)?
+        .with_memory_pattern(true)?;
+    let providers = preferred_execution_providers();
+    if !providers.is_empty() {
+        builder = builder.with_execution_providers(providers)?;
+    }
+    if config.num_threads > 0 {
+        builder = builder.with_intra_threads(config.num_threads)?;
+    }
+    Ok(builder.commit_from_file(model_path)?)
+}
+fn preferred_execution_providers() -> Vec<ExecutionProviderDispatch> {
+    let default_providers = if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
+        "xnnpack,coreml"
+    } else {
+        "xnnpack"
+    };
+    let order = std::env::var("GTE_EXECUTION_PROVIDERS")
+        .unwrap_or_else(|_| default_providers.to_string())
+        .to_ascii_lowercase();
+    let mut providers = Vec::new();
+    for provider in order.split(',').map(str::trim).filter(|p| !p.is_empty()) {
+        match provider {
+            "xnnpack" => providers.push(XNNPACKExecutionProvider::default().build().fail_silently()),
+            "coreml" => providers.push(CoreMLExecutionProvider::default().build().fail_silently()),
+            "none" => {}
+            _ => {}
+        }
+    }
+    providers
+}
+pub fn run_session(
+    session: &Session,
+    tokenized: &Tokenized,
+    config: &ModelConfig,
+) -> Result<Array2<f32>> {
+    let input_ids_view: ArrayView2<'_, i64> =
+        ArrayView2::from_shape((tokenized.rows, tokenized.cols), tokenized.input_ids.as_slice())?;
+    let attn_masks_view: ArrayView2<'_, i64> =
+        ArrayView2::from_shape((tokenized.rows, tokenized.cols), tokenized.attn_masks.as_slice())?;
+    let mut inputs = Vec::with_capacity(2 + usize::from(tokenized.type_ids.is_some()));
+    inputs.push((
+        "input_ids",
+        SessionInputValue::from(Value::from_array(input_ids_view)?),
+    ));
+    if config.with_attention_mask {
+        inputs.push((
+            "attention_mask",
+            SessionInputValue::from(Value::from_array(attn_masks_view)?),
+        ));
+    }
+    if let Some(type_ids) = tokenized.type_ids.as_deref() {
+        let type_ids_view: ArrayView2<'_, i64> =
+            ArrayView2::from_shape((tokenized.rows, tokenized.cols), type_ids)?;
+        inputs.push((
+            "token_type_ids",
+            SessionInputValue::from(Value::from_array(type_ids_view)?),
+        ));
+    }
+    let outputs = session.run(inputs)?;
+    let tensor_value = outputs.get(config.output_tensor.as_str()).ok_or_else(|| {
+        GteError::Inference(format!(
+            "output tensor '{}' not found in model outputs",
+            &config.output_tensor
+        ))
+    })?;
+    let array = tensor_value.try_extract_tensor::<f32>()?;
+    match config.mode {
+        ExtractorMode::Token(idx) => {
+            let shape = array.shape();
+            if shape.len() != 3 || idx >= shape[1] {
+                return Err(GteError::Inference(format!(
+                    "token extraction index {} out of bounds for output shape {:?}",
+                    idx, shape
+                )));
+            }
+            Ok(array.slice(ndarray::s![.., idx, ..]).into_owned())
+        }
+        ExtractorMode::MeanPool => {
+            let ndim = array.ndim();
+            let hidden_states = array.into_dimensionality::<ndarray::Ix3>().map_err(|_| {
+                GteError::Inference(format!(
+                    "mean pooling requires rank-3 output, got rank {}",
+                    ndim
+                ))
+            })?;
+            mean_pool(hidden_states, attn_masks_view)
+        }
+        ExtractorMode::Raw => Ok(array.into_dimensionality::<Ix2>()?.into_owned()),
+    }
+}

data/ext/gte/src/tokenizer.rs ADDED Viewed

@@ -0,0 +1,130 @@
+use crate::error::{GteError, Result};
+use std::path::Path;
+use tokenizers::{PaddingParams, PaddingStrategy, TruncationParams};
+pub struct Tokenized {
+    pub rows: usize,
+    pub cols: usize,
+    pub input_ids: Vec<i64>,
+    pub attn_masks: Vec<i64>,
+    pub type_ids: Option<Vec<i64>>,
+}
+pub struct Tokenizer {
+    tokenizer: tokenizers::Tokenizer,
+    with_type_ids: bool,
+}
+impl Tokenizer {
+    pub fn new<P: AsRef<Path>>(
+        tokenizer_path: P,
+        max_length: usize,
+        with_type_ids: bool,
+    ) -> Result<Self> {
+        let mut tokenizer = tokenizers::Tokenizer::from_file(tokenizer_path)
+            .map_err(|e| GteError::Tokenizer(e.to_string()))?;
+        let truncation = TruncationParams {
+            max_length,
+            ..Default::default()
+        };
+        tokenizer
+            .with_truncation(Some(truncation))
+            .map_err(|e| GteError::Tokenizer(e.to_string()))?;
+        let padding = PaddingParams {
+            strategy: PaddingStrategy::BatchLongest,
+            ..Default::default()
+        };
+        tokenizer.with_padding(Some(padding));
+        Ok(Self {
+            tokenizer,
+            with_type_ids,
+        })
+    }
+    pub fn tokenize(&self, texts: &[String]) -> Result<Tokenized> {
+        if texts.len() == 1 {
+            let encoding = self
+                .tokenizer
+                .encode_fast(texts[0].as_str(), true)
+                .map_err(|e| GteError::Tokenizer(e.to_string()))?;
+            return build_tokenized_single(&encoding, self.with_type_ids);
+        }
+        let encode_inputs: Vec<&str> = texts.iter().map(String::as_str).collect();
+        let encodings = self
+            .tokenizer
+            .encode_batch_fast(encode_inputs, true)
+            .map_err(|e| GteError::Tokenizer(e.to_string()))?;
+        build_tokenized(&encodings, self.with_type_ids)
+    }
+}
+fn build_tokenized_single(encoding: &tokenizers::Encoding, with_type_ids: bool) -> Result<Tokenized> {
+    let cols = encoding.len();
+    let input_ids: Vec<i64> = encoding.get_ids().iter().map(|&value| i64::from(value)).collect();
+    let attn_masks: Vec<i64> = encoding
+        .get_attention_mask()
+        .iter()
+        .map(|&value| i64::from(value))
+        .collect();
+    let type_ids: Option<Vec<i64>> = with_type_ids.then(|| {
+        encoding
+            .get_type_ids()
+            .iter()
+            .map(|&value| i64::from(value))
+            .collect()
+    });
+    Ok(Tokenized {
+        rows: 1,
+        cols,
+        input_ids,
+        attn_masks,
+        type_ids,
+    })
+}
+fn build_tokenized(encodings: &[tokenizers::Encoding], with_type_ids: bool) -> Result<Tokenized> {
+    let rows = encodings.len();
+    let cols = encodings
+        .first()
+        .map(|encoding| encoding.len())
+        .unwrap_or(0);
+    let len = rows * cols;
+    let mut input_ids = Vec::with_capacity(len);
+    let mut attn_masks = Vec::with_capacity(len);
+    let mut type_ids = with_type_ids.then(|| Vec::with_capacity(len));
+    for encoding in encodings {
+        input_ids.extend(encoding.get_ids().iter().map(|&value| i64::from(value)));
+        attn_masks.extend(
+            encoding
+                .get_attention_mask()
+                .iter()
+                .map(|&value| i64::from(value)),
+        );
+        if let Some(type_ids) = type_ids.as_mut() {
+            type_ids.extend(
+                encoding
+                    .get_type_ids()
+                    .iter()
+                    .map(|&value| i64::from(value)),
+            );
+        }
+    }
+    Ok(Tokenized {
+        rows,
+        cols,
+        input_ids,
+        attn_masks,
+        type_ids,
+    })
+}

data/ext/gte/tests/embedder_unit_test.rs ADDED Viewed

@@ -0,0 +1,39 @@
+use gte::embedder::normalize_l2;
+use ndarray::array;
+#[test]
+fn test_normalize_l2_basic() {
+    let input = array![[3.0f32, 4.0], [1.0, 0.0]];
+    let result = normalize_l2(input);
+    let row0 = result.row(0);
+    assert!((row0[0] - 0.6).abs() < 1e-6);
+    assert!((row0[1] - 0.8).abs() < 1e-6);
+}
+#[test]
+fn test_normalize_l2_zero_vector_unchanged() {
+    let input = array![[0.0f32, 0.0, 0.0]];
+    let result = normalize_l2(input);
+    let row = result.row(0);
+    assert!(row.iter().all(|&x| x == 0.0));
+}
+#[test]
+fn test_normalize_l2_unit_norm() {
+    let input = array![[1.0f32, 2.0, 3.0], [4.0, 5.0, 6.0]];
+    let result = normalize_l2(input);
+    for row in result.rows() {
+        let norm: f32 = row.mapv(|x: f32| x * x).sum().sqrt();
+        assert!((norm - 1.0).abs() < 1e-6);
+    }
+}
+#[test]
+fn test_normalize_l2_already_unit_unchanged() {
+    let input = array![[1.0f32, 0.0, 0.0]];
+    let result = normalize_l2(input.clone());
+    let row = result.row(0);
+    assert!((row[0] - 1.0).abs() < 1e-6 && row[1] == 0.0 && row[2] == 0.0);
+}