RubyGems - gte - Versions diffs - 0.0.6 → 0.0.8 - Mend

gte 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/README.md +16 -8
data/Rakefile +38 -3
data/VERSION +1 -1
data/ext/gte/Cargo.toml +4 -4
data/ext/gte/src/embedder.rs +42 -33
data/ext/gte/src/model_config.rs +18 -0
data/ext/gte/src/model_profile.rs +129 -33
data/ext/gte/src/pipeline.rs +12 -9
data/ext/gte/src/reranker.rs +49 -31
data/ext/gte/src/ruby_embedder.rs +73 -113
data/ext/gte/src/session.rs +279 -15
data/ext/gte/src/tokenizer.rs +99 -14
data/ext/gte/tests/inference_integration_test.rs +5 -4
data/ext/gte/tests/tokenizer_unit_test.rs +5 -2
data/lib/gte/config.rb +2 -2
data/lib/gte/embedder.rb +7 -4
data/lib/gte/reranker.rb +3 -1
data/lib/gte.rb +1 -10
metadata +6 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fc149108c647dc5b14154bfbdc4975b53670b9ed3cf7d80760cc2b415c935a48
-  data.tar.gz: 32a682a95d56c8fab8d0d64a7ada0c0347ae796b6aefe6191f9aca8fc96426c2
+  metadata.gz: 2c754b4675ee105e9a280cd9deafa00a81b9e02ee629131f3e908400006b6ae4
+  data.tar.gz: 40a0d3e04c3d2943ae50910164d644ecb763eac99a02044dc962cc141a0e13c5
 SHA512:
-  metadata.gz: f5c69d954f51a51521b143b576942a9c0505ad60574c1727f963dd79e0b6c22cacc4e6d9af75394ae06f451521dbc788af51f1e79397a5cc66a41b4ce1b31933
-  data.tar.gz: 9e75fdbc9b5c8cfdd9d0e377a7e4a944057ec604e38ab23d960c4ed75ec6a72ce1dd27c2dd1bb2802721387babdabe0996e0c42be34d17d98253e0582b375de1
+  metadata.gz: 16614e01e7a33a53339ba9fe7cf32fe7606041518a24177258d7a6e5550516e8cff741d0f0df02b7e5863fc763c02ae81b943dc4b18295701a4cafdec6627cb0
+  data.tar.gz: 348e1fd1d9f4c44214b5101ba339109b5ececfbef18b48b7c11324a64481f476d8da831cc5148d17a85c41b525ee753c296d4421a4fb2adda269a3f5fe38cda6

data/README.md CHANGED Viewed

@@ -33,14 +33,15 @@ raw_model = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
   config.with(normalize: false)
 end
-full_throttle = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
-  config.with(threads: 0)
+single_thread = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
+  config.with(threads: 1)
 end
 custom = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
   config.with(
     output_tensor: "last_hidden_state",
     max_length: 256,
+    padding: "batch_longest",
     optimization_level: 3
   )
 end
@@ -49,12 +50,13 @@ end
 Config fields and defaults:
 - `model_dir`: absolute path to model directory
-- `threads`: `3` (set `0` for ONNX Runtime full-throttle threadpool)
+- `threads`: `1` (default tuned for p95 latency; use `0` for ONNX Runtime auto-thread mode)
 - `optimization_level`: `3`
 - `model_name`: `nil`
 - `normalize`: `true` (L2 normalization at Ruby-facing API)
 - `output_tensor`: `nil` (auto-select output tensor)
 - `max_length`: `nil` (uses tokenizer/model defaults)
+- `padding`: `nil` (auto; accepts `auto`, `batch_longest`, `fixed`)
 - `execution_providers`: `nil` (falls back to `GTE_EXECUTION_PROVIDERS` / CPU default)
 Notes:
@@ -66,7 +68,7 @@ Low-level embedder setup (without model cache):
 ```ruby
 embedder = GTE::Embedder.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
-  config.with(threads: 0, execution_providers: "cpu")
+  config.with(threads: 1, execution_providers: "cpu")
 end
 ```
@@ -76,7 +78,7 @@ Use `GTE::Reranker.config(model_dir)` for cross-encoder reranking.
 ```ruby
 reranker = GTE::Reranker.config(ENV.fetch("GTE_RERANK_DIR")) do |config|
-  config.with(sigmoid: true, threads: 0)
+  config.with(sigmoid: true, threads: 1)
 end
 query = "how to train a neural network?"
@@ -100,12 +102,13 @@ ranked = reranker.rerank(query: query, candidates: candidates)
 Reranker config fields and defaults:
 - `model_dir`: absolute path to model directory
-- `threads`: `3`
+- `threads`: `1`
 - `optimization_level`: `3`
 - `model_name`: `nil`
 - `sigmoid`: `false` (set `true` if you want bounded [0,1] style scores)
 - `output_tensor`: `nil`
 - `max_length`: `nil`
+- `padding`: `nil` (auto; accepts `auto`, `batch_longest`, `fixed`)
 - `execution_providers`: `nil`
 ## Runtime + Result Examples
@@ -171,7 +174,7 @@ make ci
 ## Benchmark
-The repo includes two benchmark paths:
+The repo includes a shared multi-runtime benchmark harness:
 ```bash
 make bench
@@ -180,6 +183,11 @@ nix develop -c bundle exec rake bench:matrix_sweep
 nix develop -c bundle exec ruby bench/memory_probe.rb --compare-pure
 ```
+- `make bench`: Puma-like single-request comparison at concurrency `16`
+- `rake bench:pure_compare`: batch amortization comparison
+- `rake bench:matrix_sweep`: GTE provider/thread sweep using the shared result schema
+- Optional Python comparisons use `bench/python_onnxruntime.py` and are skipped automatically if local dependencies are unavailable.
 To run benchmark + append a `RUNS.md` entry + enforce goal checks:
 ```bash
@@ -188,5 +196,5 @@ make bench-record
 `bench/runs_ledger.rb check` is goal-focused by default:
-- Enforces goal metric (`response_time_p95` ratio threshold).
+- Enforces the goal metric (`response_time_p95`) across every enabled competitor.
 - Does not require current-version coverage in `RUNS.md` unless explicitly enabled.

data/Rakefile CHANGED Viewed

@@ -10,17 +10,52 @@ rescue LoadError
 end
 spec = Gem::Specification.load('gte.gemspec')
+cross_target = ENV.fetch('RUBY_TARGET', nil)
-Rake::ExtensionTask.new('gte', spec) do |ext|
+if cross_target == 'arm64-darwin'
+  # rb-sys-dock's darwin image can expose an unusable default LIBRARY_PATH.
+  # Force the compiler-rt darwin runtime directory so -lclang_rt.osx resolves.
+  ENV['LIBRARY_PATH'] = '/usr/lib/llvm-10/lib/clang/10.0.0/lib/darwin'
+end
+extension_task = Rake::ExtensionTask.new('gte', spec) do |ext|
   ext.lib_dir = 'lib/gte'
   ext.cross_compile = true
-  ext.cross_platform = %w[x86_64-linux arm64-darwin]
+  # rb-sys-dock invokes `rake native:$RUBY_TARGET gem` without the `cross` task,
+  # so scope platforms during dock builds to avoid host-Ruby fallback copy tasks.
+  cross_platforms = if cross_target && !cross_target.empty?
+                      [cross_target]
+                    else
+                      %w[x86_64-linux aarch64-linux arm64-darwin]
+                    end
+  ext.cross_platform = cross_platforms
+end
+if cross_target && !cross_target.empty? && ENV['RUBY_CC_VERSION']
+  ruby_version = ENV['RUBY_CC_VERSION'].split(':').first
+  lib_binary_path = File.join(extension_task.lib_dir, File.basename(extension_task.binary(cross_target)))
+  copy_task = "copy:gte:#{cross_target}:#{ruby_version}"
+  if Rake::Task.task_defined?(lib_binary_path) && Rake::Task.task_defined?(copy_task)
+    Rake::Task[lib_binary_path].prerequisites.clear
+    Rake::Task[lib_binary_path].enhance([copy_task])
+  end
 end
 task default: %i[compile spec]
+def bundler_env
+  root = File.expand_path(__dir__)
+  {
+    'BUNDLE_DISABLE_SHARED_GEMS' => '1',
+    'GEM_HOME' => File.join(root, '.bundle-gems'),
+    'GEM_PATH' => File.join(root, '.bundle-gems'),
+    'BUNDLE_PATH' => File.join(root, 'vendor/bundle')
+  }
+end
 def run_in_nix(*command)
-  sh('nix', 'develop', '-c', *command)
+  sh(bundler_env, 'nix', 'develop', '-c', *command)
 end
 namespace :bench do

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.6
1	+ 0.0.8

data/ext/gte/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "gte"
-version = "0.0.6"
+version = "0.0.8"
 edition = "2021"
 authors = ["elcuervo <elcuervo@elcuervo.net>"]
 license = "MIT"
@@ -21,10 +21,10 @@ ruby-ffi = ["dep:magnus", "dep:rb-sys"]
 [dependencies]
 rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"], optional = true }
 magnus = { version = "0.8", optional = true }
-ort = { version = "=2.0.0-rc.9", features = ["ndarray"] }
-ort-sys = "=2.0.0-rc.9"
+ort = { version = "=2.0.0-rc.12", features = ["ndarray", "xnnpack"] }
+ort-sys = "=2.0.0-rc.12"
 tokenizers = "0.21.0"
-ndarray = "0.16.0"
+ndarray = "0.17"
 half = "2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"

data/ext/gte/src/embedder.rs CHANGED Viewed

@@ -1,19 +1,18 @@
 use crate::error::{GteError, Result};
-use crate::model_config::{ExtractorMode, ModelConfig};
+use crate::model_config::{ExtractorMode, ModelConfig, ModelLoadOverrides, PaddingMode};
 use crate::model_profile::{
-    has_input, infer_extraction_mode, read_max_length, resolve_default_text_model, resolve_named_model,
-    resolve_tokenizer_path, select_output_tensor, validate_supported_text_inputs,
+    has_input, infer_extraction_mode, read_tokenizer_profile, resolve_default_text_model,
+    resolve_named_model, resolve_tokenizer_path, select_output_tensor, validate_supported_text_inputs,
 };
 use crate::postprocess::normalize_l2 as normalize_l2_rows;
-use crate::session::{build_session, run_session};
-use crate::tokenizer::{Tokenized, Tokenizer};
+use crate::session::{build_session, run_session, SessionPool};
+use crate::tokenizer::{parse_padding_mode_override, Tokenized, Tokenizer};
 use ndarray::Array2;
-use ort::session::Session;
-use std::path::Path;
+use std::path::{Path, PathBuf};
 pub struct Embedder {
     tokenizer: Tokenizer,
-    session: Session,
+    pool: SessionPool,
     config: ModelConfig,
 }
@@ -23,23 +22,24 @@ impl Embedder {
         P1: AsRef<Path>,
         P2: AsRef<Path>,
     {
-        let tokenizer = Tokenizer::new(tokenizer_path, config.max_length, config.with_type_ids)?;
-        let session = build_session(model_path, &config)?;
-        Ok(Self {
-            tokenizer,
-            session,
-            config,
-        })
+        let tokenizer = Tokenizer::new(
+            tokenizer_path,
+            config.max_length,
+            config.with_type_ids,
+            config.padding_mode,
+            None,
+        )?;
+        let model_path = model_path.as_ref().to_path_buf();
+        let session = build_session(&model_path, &config)?;
+        let pool = SessionPool::new(session, model_path, config.clone());
+        Ok(Self { tokenizer, pool, config })
     }
     pub fn from_dir<P: AsRef<Path>>(
         dir: P,
         num_threads: usize,
         optimization_level: u8,
-        model_name: Option<&str>,
-        output_tensor_override: Option<&str>,
-        max_length_override: Option<usize>,
-        execution_providers_override: Option<&str>,
+        overrides: ModelLoadOverrides<'_>,
     ) -> Result<Self> {
         const PREFERRED_EMBEDDING_OUTPUTS: [&str; 4] = [
             "pooler_output",
@@ -50,31 +50,35 @@ impl Embedder {
         let dir = dir.as_ref();
         let tokenizer_path = resolve_tokenizer_path(dir)?;
-        let model_path = match model_name.filter(|s| !s.is_empty()) {
+        let model_path: PathBuf = match overrides.model_name.filter(|s| !s.is_empty()) {
             Some(name) => resolve_named_model(dir, name)?,
             None => resolve_default_text_model(dir)?,
         };
-        let max_length = if let Some(override_value) = max_length_override {
+        let tokenizer_profile = read_tokenizer_profile(dir);
+        let max_length = if let Some(override_value) = overrides.max_length {
             if override_value == 0 {
                 return Err(GteError::Inference(
                     "max_length override must be greater than 0".to_string(),
                 ));
             }
-            override_value
+            override_value.min(tokenizer_profile.safe_max_length)
         } else {
-            read_max_length(dir)
+            tokenizer_profile.default_max_length
         };
+        let padding_mode =
+            parse_padding_mode_override(overrides.padding)?.unwrap_or(PaddingMode::Auto);
         let session_config = ModelConfig {
             max_length,
+            padding_mode,
             output_tensor: String::new(),
             mode: ExtractorMode::Raw,
             with_type_ids: false,
             with_attention_mask: true,
             num_threads,
             optimization_level,
-            execution_providers: execution_providers_override.map(str::to_string),
+            execution_providers: overrides.execution_providers.map(str::to_string),
         };
         let session = build_session(&model_path, &session_config)?;
@@ -82,7 +86,7 @@ impl Embedder {
         let with_type_ids = has_input(&session, "token_type_ids");
         let with_attention_mask = has_input(&session, "attention_mask");
         let output_tensor =
-            select_output_tensor(&session, output_tensor_override, &PREFERRED_EMBEDDING_OUTPUTS)?;
+            select_output_tensor(&session, overrides.output_tensor, &PREFERRED_EMBEDDING_OUTPUTS)?;
         let mode = infer_extraction_mode(&session, output_tensor.as_str())?;
         if matches!(mode, ExtractorMode::MeanPool) && !with_attention_mask {
             return Err(GteError::Inference(
@@ -92,22 +96,26 @@ impl Embedder {
         let config = ModelConfig {
             max_length,
+            padding_mode,
             output_tensor,
             mode,
             with_type_ids,
             with_attention_mask,
             num_threads,
             optimization_level,
-            execution_providers: execution_providers_override.map(str::to_string),
+            execution_providers: overrides.execution_providers.map(str::to_string),
         };
-        let tokenizer = Tokenizer::new(&tokenizer_path, config.max_length, config.with_type_ids)?;
+        let tokenizer = Tokenizer::new(
+            &tokenizer_path,
+            config.max_length,
+            config.with_type_ids,
+            config.padding_mode,
+            tokenizer_profile.fixed_padding_length,
+        )?;
-        Ok(Self {
-            tokenizer,
-            session,
-            config,
-        })
+        let pool = SessionPool::new(session, model_path, session_config);
+        Ok(Self { tokenizer, pool, config })
     }
     pub fn embed(&self, texts: Vec<String>) -> Result<Array2<f32>> {
@@ -120,7 +128,8 @@ impl Embedder {
     }
     pub fn run(&self, tokenized: &Tokenized) -> crate::error::Result<Array2<f32>> {
-        run_session(&self.session, tokenized, &self.config)
+        let mut session = self.pool.acquire()?;
+        run_session(&mut session, tokenized, &self.config)
     }
 }

data/ext/gte/src/model_config.rs CHANGED Viewed

@@ -5,9 +5,18 @@ pub enum ExtractorMode {
     Raw,
 }
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum PaddingMode {
+    #[default]
+    Auto,
+    BatchLongest,
+    Fixed,
+}
 #[derive(Debug, Clone)]
 pub struct ModelConfig {
     pub max_length: usize,
+    pub padding_mode: PaddingMode,
     pub output_tensor: String,
     pub mode: ExtractorMode,
     pub with_type_ids: bool,
@@ -16,3 +25,12 @@ pub struct ModelConfig {
     pub optimization_level: u8,
     pub execution_providers: Option<String>,
 }
+#[derive(Debug, Clone, Copy, Default)]
+pub struct ModelLoadOverrides<'a> {
+    pub model_name: Option<&'a str>,
+    pub output_tensor: Option<&'a str>,
+    pub max_length: Option<usize>,
+    pub padding: Option<&'a str>,
+    pub execution_providers: Option<&'a str>,
+}

data/ext/gte/src/model_profile.rs CHANGED Viewed

@@ -1,9 +1,19 @@
 use crate::error::{GteError, Result};
 use crate::model_config::ExtractorMode;
 use ort::session::Session;
+use serde_json::Value;
 use std::path::{Path, PathBuf};
 const SUPPORTED_INPUTS: [&str; 3] = ["input_ids", "attention_mask", "token_type_ids"];
+const DEFAULT_MAX_LENGTH: usize = 512;
+const MAX_SUPPORTED_LENGTH: usize = 8192;
+#[derive(Debug, Clone, Copy)]
+pub struct TokenizerProfile {
+    pub default_max_length: usize,
+    pub safe_max_length: usize,
+    pub fixed_padding_length: Option<usize>,
+}
 pub fn resolve_tokenizer_path(dir: &Path) -> Result<PathBuf> {
     let tokenizer_path = dir.join("tokenizer.json");
@@ -48,27 +58,84 @@ pub fn resolve_default_text_model(dir: &Path) -> Result<PathBuf> {
     )))
 }
-pub fn read_max_length(dir: &Path) -> usize {
-    (|| -> Option<usize> {
-        let contents = std::fs::read_to_string(dir.join("tokenizer_config.json")).ok()?;
-        let json: serde_json::Value = serde_json::from_str(&contents).ok()?;
-        let v = json.get("model_max_length")?;
-        let n = v.as_u64().or_else(|| {
-            v.as_f64()
-                .filter(|&f| f > 0.0 && f < 1e15)
-                .map(|f| f as u64)
-        })?;
-        Some((n as usize).min(8192))
-    })()
-    .unwrap_or(512)
+pub fn read_tokenizer_profile(dir: &Path) -> TokenizerProfile {
+    let tokenizer_config = read_json(dir.join("tokenizer_config.json"));
+    let tokenizer_json = read_json(dir.join("tokenizer.json"));
+    let fixed_padding_length = tokenizer_json
+        .as_ref()
+        .and_then(parse_fixed_padding_length_from_tokenizer_json);
+    let mut candidates = Vec::new();
+    if let Some(config) = tokenizer_config.as_ref() {
+        if let Some(v) = config.get("max_length").and_then(parse_positive_usize) {
+            candidates.push(v.min(MAX_SUPPORTED_LENGTH));
+        }
+        if let Some(v) = config.get("model_max_length").and_then(parse_positive_usize) {
+            candidates.push(v.min(MAX_SUPPORTED_LENGTH));
+        }
+    }
+    if let Some(tokenizer) = tokenizer_json.as_ref() {
+        if let Some(v) = tokenizer
+            .get("truncation")
+            .and_then(|truncation| truncation.get("max_length"))
+            .and_then(parse_positive_usize)
+        {
+            candidates.push(v.min(MAX_SUPPORTED_LENGTH));
+        }
+    }
+    if let Some(v) = fixed_padding_length {
+        candidates.push(v.min(MAX_SUPPORTED_LENGTH));
+    }
+    let default_max_length = candidates
+        .iter()
+        .copied()
+        .min()
+        .unwrap_or(DEFAULT_MAX_LENGTH)
+        .max(1);
+    let safe_max_length = fixed_padding_length.unwrap_or(default_max_length).max(1);
+    TokenizerProfile {
+        default_max_length,
+        safe_max_length,
+        fixed_padding_length,
+    }
+}
+fn read_json(path: PathBuf) -> Option<Value> {
+    let contents = std::fs::read_to_string(path).ok()?;
+    serde_json::from_str(&contents).ok()
+}
+fn parse_positive_usize(value: &Value) -> Option<usize> {
+    let raw = value
+        .as_u64()
+        .or_else(|| {
+            value
+                .as_f64()
+                .filter(|&v| v.is_finite() && v > 0.0)
+                .map(|v| v as u64)
+        })
+        .or_else(|| value.as_str().and_then(|s| s.parse::<u64>().ok()))?;
+    let parsed = usize::try_from(raw).ok()?;
+    (parsed > 0).then_some(parsed)
+}
+fn parse_fixed_padding_length_from_tokenizer_json(tokenizer_json: &Value) -> Option<usize> {
+    tokenizer_json
+        .get("padding")
+        .and_then(|padding| padding.get("strategy"))
+        .and_then(|strategy| strategy.get("Fixed"))
+        .and_then(parse_positive_usize)
 }
 pub fn validate_supported_text_inputs(session: &Session, api_label: &str) -> Result<()> {
-    let unsupported: Vec<String> = session
-        .inputs
-        .iter()
-        .filter(|i| !SUPPORTED_INPUTS.contains(&i.name.as_str()))
-        .map(|i| i.name.clone())
+    let unsupported: Vec<String> = session.inputs().iter()
+        .filter(|i| !SUPPORTED_INPUTS.contains(&i.name()))
+        .map(|i| i.name().to_owned())
         .collect();
     if unsupported.is_empty() {
@@ -91,7 +158,7 @@ pub fn validate_supported_text_inputs(session: &Session, api_label: &str) -> Res
 }
 pub fn has_input(session: &Session, name: &str) -> bool {
-    session.inputs.iter().any(|input| input.name == name)
+    session.inputs().iter().any(|input| input.name() == name)
 }
 fn output_name_matches(name: &str, preferred: &str) -> bool {
@@ -106,16 +173,16 @@ pub fn select_output_tensor(
 ) -> Result<String> {
     if let Some(requested_name) = requested.map(str::trim).filter(|name| !name.is_empty()) {
         if let Some(output) = session
-            .outputs
+            .outputs()
             .iter()
-            .find(|o| output_name_matches(o.name.as_str(), requested_name))
+            .find(|o| output_name_matches(o.name(), requested_name))
         {
-            return Ok(output.name.clone());
+            return Ok(output.name().to_owned());
         }
         let available = session
-            .outputs
+            .outputs()
             .iter()
-            .map(|o| o.name.as_str())
+            .map(|o| o.name())
             .collect::<Vec<_>>()
             .join(", ");
         return Err(GteError::Inference(format!(
@@ -126,18 +193,18 @@ pub fn select_output_tensor(
     for preferred in preferred_outputs {
         if let Some(output) = session
-            .outputs
+            .outputs()
             .iter()
-            .find(|o| output_name_matches(o.name.as_str(), preferred))
+            .find(|o| output_name_matches(o.name(), preferred))
         {
-            return Ok(output.name.clone());
+            return Ok(output.name().to_owned());
         }
     }
     session
-        .outputs
+        .outputs()
         .first()
-        .map(|o| o.name.clone())
+        .map(|o| o.name().to_owned())
         .ok_or_else(|| GteError::Inference("model has no outputs".into()))
 }
@@ -147,9 +214,9 @@ fn output_basename(name: &str) -> &str {
 pub fn infer_extraction_mode(session: &Session, output_tensor: &str) -> Result<ExtractorMode> {
     let output = session
-        .outputs
+        .outputs()
         .iter()
-        .find(|o| o.name == output_tensor)
+        .find(|o| o.name() == output_tensor)
         .ok_or_else(|| {
             GteError::Inference(format!(
                 "output tensor '{}' not found in model outputs",
@@ -157,8 +224,8 @@ pub fn infer_extraction_mode(session: &Session, output_tensor: &str) -> Result<E
             ))
         })?;
-    let ndims = match &output.output_type {
-        ort::value::ValueType::Tensor { dimensions, .. } => dimensions.len(),
+    let ndims = match output.dtype() {
+        ort::value::ValueType::Tensor { shape, .. } => shape.len(),
         other => {
             return Err(GteError::Inference(format!(
                 "output is not a tensor: {:?}",
@@ -177,3 +244,32 @@ pub fn infer_extraction_mode(session: &Session, output_tensor: &str) -> Result<E
         ))),
     }
 }
+#[cfg(test)]
+mod tests {
+    use super::{parse_fixed_padding_length_from_tokenizer_json, parse_positive_usize};
+    use serde_json::json;
+    #[test]
+    fn parse_positive_usize_handles_integer_float_and_string() {
+        assert_eq!(parse_positive_usize(&json!(64)), Some(64));
+        assert_eq!(parse_positive_usize(&json!(64.0)), Some(64));
+        assert_eq!(parse_positive_usize(&json!("64")), Some(64));
+        assert_eq!(parse_positive_usize(&json!(0)), None);
+    }
+    #[test]
+    fn parse_fixed_padding_length_reads_fixed_padding_strategy() {
+        let tokenizer_json = json!({
+            "padding": {
+                "strategy": {
+                    "Fixed": 64
+                }
+            }
+        });
+        assert_eq!(
+            parse_fixed_padding_length_from_tokenizer_json(&tokenizer_json),
+            Some(64)
+        );
+    }
+}

data/ext/gte/src/pipeline.rs CHANGED Viewed

@@ -1,8 +1,8 @@
 use crate::error::{GteError, Result};
 use crate::tokenizer::Tokenized;
-use ndarray::ArrayView2;
+use ndarray::{ArrayView2, ArrayViewD};
 use ort::session::SessionInputValue;
-use ort::value::Value;
+use ort::value::TensorRef;
 pub struct InputTensors<'a> {
     pub inputs: Vec<(&'static str, SessionInputValue<'a>)>,
@@ -23,13 +23,13 @@ impl<'a> InputTensors<'a> {
         let mut inputs = Vec::with_capacity(2 + usize::from(tokenized.type_ids.is_some()));
         inputs.push((
             "input_ids",
-            SessionInputValue::from(Value::from_array(input_ids_view)?),
+            SessionInputValue::from(TensorRef::from_array_view(input_ids_view)?),
         ));
         if with_attention_mask {
             inputs.push((
                 "attention_mask",
-                SessionInputValue::from(Value::from_array(attention_mask)?),
+                SessionInputValue::from(TensorRef::from_array_view(attention_mask)?),
             ));
         }
@@ -38,7 +38,7 @@ impl<'a> InputTensors<'a> {
                 ArrayView2::from_shape((tokenized.rows, tokenized.cols), type_ids)?;
             inputs.push((
                 "token_type_ids",
-                SessionInputValue::from(Value::from_array(type_ids_view)?),
+                SessionInputValue::from(TensorRef::from_array_view(type_ids_view)?),
             ));
         }
@@ -50,11 +50,14 @@ impl<'a> InputTensors<'a> {
 }
 pub fn extract_output_tensor<'a>(
-    outputs: &'a ort::session::SessionOutputs<'a, 'a>,
+    outputs: &'a ort::session::SessionOutputs<'_>,
     output_name: &str,
-) -> Result<ndarray::CowArray<'a, f32, ndarray::IxDyn>> {
+) -> Result<ArrayViewD<'a, f32>> {
     let tensor_value = outputs.get(output_name).ok_or_else(|| {
-        GteError::Inference(format!("output tensor '{}' not found in model outputs", output_name))
+        GteError::Inference(format!(
+            "output tensor '{}' not found in model outputs",
+            output_name
+        ))
     })?;
-    Ok(tensor_value.try_extract_tensor::<f32>()?.into())
+    Ok(tensor_value.try_extract_array::<f32>()?)
 }