gte 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -9
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/ext/gte/Cargo.toml +1 -1
- data/ext/gte/benches/hot_path.rs +9 -5
- data/ext/gte/src/embedder.rs +5 -4
- data/ext/gte/src/model_config.rs +0 -1
- data/ext/gte/src/model_profile.rs +8 -4
- data/ext/gte/src/reranker.rs +0 -2
- data/ext/gte/src/ruby_embedder.rs +21 -26
- data/ext/gte/src/session.rs +32 -43
- data/ext/gte/tests/inference_integration_test.rs +4 -4
- data/lib/gte/config.rb +2 -2
- data/lib/gte/embedder.rb +0 -3
- data/lib/gte/reranker.rb +0 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fd947255138b34b53e9ddf15a5c0e69295df78d95b87e87fa0eb618ae18f1ece
|
|
4
|
+
data.tar.gz: 87ace8f9622fa68ef2f4d614fba5a18da11891c825f9b3b751896a366c7bf130
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1b1dbf36073caeced7595047f53cc4c8dc4ae425b1bf06024f10d6835a61cc7270bf3a1768a9e3ba9e5d4e39115d3266db28767f353aaf968633a8c0a450f7f4
|
|
7
|
+
data.tar.gz: a18206611d3f7c0ddc175514502c471dabb22e0ff225081f73b3f4c267d47f0b85364d99e781f9df7f5184ada30bc31c17351ae1824f05f906bd041f2a0c9e45
|
data/README.md
CHANGED
|
@@ -33,10 +33,6 @@ raw_model = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
|
|
|
33
33
|
config.with(normalize: false)
|
|
34
34
|
end
|
|
35
35
|
|
|
36
|
-
single_thread = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
|
|
37
|
-
config.with(threads: 1)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
36
|
custom = GTE.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
|
|
41
37
|
config.with(
|
|
42
38
|
output_tensor: "last_hidden_state",
|
|
@@ -50,7 +46,6 @@ end
|
|
|
50
46
|
Config fields and defaults:
|
|
51
47
|
|
|
52
48
|
- `model_dir`: absolute path to model directory
|
|
53
|
-
- `threads`: `1` (default tuned for p95 latency; use `0` for ONNX Runtime auto-thread mode)
|
|
54
49
|
- `optimization_level`: `3`
|
|
55
50
|
- `model_name`: `nil`
|
|
56
51
|
- `normalize`: `true` (L2 normalization at Ruby-facing API)
|
|
@@ -68,7 +63,7 @@ Low-level embedder setup (without model cache):
|
|
|
68
63
|
|
|
69
64
|
```ruby
|
|
70
65
|
embedder = GTE::Embedder.config(ENV.fetch("GTE_MODEL_DIR")) do |config|
|
|
71
|
-
config.with(
|
|
66
|
+
config.with(execution_providers: "cpu")
|
|
72
67
|
end
|
|
73
68
|
```
|
|
74
69
|
|
|
@@ -78,7 +73,7 @@ Use `GTE::Reranker.config(model_dir)` for cross-encoder reranking.
|
|
|
78
73
|
|
|
79
74
|
```ruby
|
|
80
75
|
reranker = GTE::Reranker.config(ENV.fetch("GTE_RERANK_DIR")) do |config|
|
|
81
|
-
config.with(sigmoid: true
|
|
76
|
+
config.with(sigmoid: true)
|
|
82
77
|
end
|
|
83
78
|
|
|
84
79
|
query = "how to train a neural network?"
|
|
@@ -102,7 +97,6 @@ ranked = reranker.rerank(query: query, candidates: candidates)
|
|
|
102
97
|
Reranker config fields and defaults:
|
|
103
98
|
|
|
104
99
|
- `model_dir`: absolute path to model directory
|
|
105
|
-
- `threads`: `1`
|
|
106
100
|
- `optimization_level`: `3`
|
|
107
101
|
- `model_name`: `nil`
|
|
108
102
|
- `sigmoid`: `false` (set `true` if you want bounded [0,1] style scores)
|
|
@@ -111,6 +105,11 @@ Reranker config fields and defaults:
|
|
|
111
105
|
- `padding`: `nil` (auto; accepts `auto`, `batch_longest`, `fixed`)
|
|
112
106
|
- `execution_providers`: `nil`
|
|
113
107
|
|
|
108
|
+
Session pool sizing:
|
|
109
|
+
|
|
110
|
+
- `GTE_SESSION_POOL_CAP`: optional positive integer cap for internal ONNX session pool size.
|
|
111
|
+
- Unset by default; runtime uses available CPU parallelism.
|
|
112
|
+
|
|
114
113
|
## Runtime + Result Examples
|
|
115
114
|
|
|
116
115
|
Process-local reuse (recommended for Puma/web servers):
|
|
@@ -185,7 +184,7 @@ nix develop -c bundle exec ruby bench/memory_probe.rb --compare-pure
|
|
|
185
184
|
|
|
186
185
|
- `make bench`: Puma-like single-request comparison at concurrency `16`
|
|
187
186
|
- `rake bench:pure_compare`: batch amortization comparison
|
|
188
|
-
- `rake bench:matrix_sweep`: GTE provider
|
|
187
|
+
- `rake bench:matrix_sweep`: GTE provider sweep using the shared result schema
|
|
189
188
|
- Optional Python comparisons use `bench/python_onnxruntime.py` and are skipped automatically if local dependencies are unavailable.
|
|
190
189
|
|
|
191
190
|
To run benchmark + append a `RUNS.md` entry + enforce goal checks:
|
data/Rakefile
CHANGED
|
@@ -74,7 +74,7 @@ namespace :bench do
|
|
|
74
74
|
)
|
|
75
75
|
end
|
|
76
76
|
|
|
77
|
-
desc 'Sweep execution-provider
|
|
77
|
+
desc 'Sweep execution-provider settings for Puma-like benchmark'
|
|
78
78
|
task :matrix_sweep do
|
|
79
79
|
run_in_nix(
|
|
80
80
|
'bundle', 'exec', 'ruby', 'bench/puma_matrix_sweep.rb',
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0.
|
|
1
|
+
0.0.12
|
data/ext/gte/Cargo.toml
CHANGED
data/ext/gte/benches/hot_path.rs
CHANGED
|
@@ -90,7 +90,7 @@ fn bench_padding_impact(c: &mut Criterion) {
|
|
|
90
90
|
// GTE_BENCH_E5_DIR — sentence-transformers / E5-style text model dir
|
|
91
91
|
// GTE_BENCH_SIGLIP2_DIR — siglip2 text encoder dir
|
|
92
92
|
// GTE_BENCH_CLIP_DIR — clip text encoder dir
|
|
93
|
-
// Sweeps
|
|
93
|
+
// Sweeps execution providers for quick local comparison.
|
|
94
94
|
fn bench_embedding_e2e(c: &mut Criterion) {
|
|
95
95
|
let cases = [
|
|
96
96
|
("e5", "GTE_BENCH_E5_DIR", "query: cat", "query: ".to_string() + &"the quick brown fox jumps over the lazy dog ".repeat(20)),
|
|
@@ -106,17 +106,21 @@ fn bench_embedding_e2e(c: &mut Criterion) {
|
|
|
106
106
|
continue;
|
|
107
107
|
};
|
|
108
108
|
|
|
109
|
-
for
|
|
110
|
-
let
|
|
109
|
+
for provider in ["cpu", "xnnpack"] {
|
|
110
|
+
let overrides = ModelLoadOverrides {
|
|
111
|
+
execution_providers: Some(provider),
|
|
112
|
+
..ModelLoadOverrides::default()
|
|
113
|
+
};
|
|
114
|
+
let embedder = match Embedder::from_dir(&dir, 3, overrides) {
|
|
111
115
|
Ok(e) => e,
|
|
112
116
|
Err(err) => {
|
|
113
|
-
eprintln!("skip {model_label}
|
|
117
|
+
eprintln!("skip {model_label} provider={provider}: {err}");
|
|
114
118
|
continue;
|
|
115
119
|
}
|
|
116
120
|
};
|
|
117
121
|
|
|
118
122
|
for (input_label, input) in [("short", short_input.to_string()), ("long", long_input.clone())] {
|
|
119
|
-
let id = BenchmarkId::from_parameter(format!("{model_label}/
|
|
123
|
+
let id = BenchmarkId::from_parameter(format!("{model_label}/{provider}/{input_label}"));
|
|
120
124
|
group.bench_with_input(id, &input, |b, text| {
|
|
121
125
|
b.iter(|| {
|
|
122
126
|
embedder
|
data/ext/gte/src/embedder.rs
CHANGED
|
@@ -37,7 +37,6 @@ impl Embedder {
|
|
|
37
37
|
|
|
38
38
|
pub fn from_dir<P: AsRef<Path>>(
|
|
39
39
|
dir: P,
|
|
40
|
-
num_threads: usize,
|
|
41
40
|
optimization_level: u8,
|
|
42
41
|
overrides: ModelLoadOverrides<'_>,
|
|
43
42
|
) -> Result<Self> {
|
|
@@ -76,7 +75,6 @@ impl Embedder {
|
|
|
76
75
|
mode: ExtractorMode::Raw,
|
|
77
76
|
with_type_ids: false,
|
|
78
77
|
with_attention_mask: true,
|
|
79
|
-
num_threads,
|
|
80
78
|
optimization_level,
|
|
81
79
|
execution_providers: overrides.execution_providers.map(str::to_string),
|
|
82
80
|
};
|
|
@@ -101,7 +99,6 @@ impl Embedder {
|
|
|
101
99
|
mode,
|
|
102
100
|
with_type_ids,
|
|
103
101
|
with_attention_mask,
|
|
104
|
-
num_threads,
|
|
105
102
|
optimization_level,
|
|
106
103
|
execution_providers: overrides.execution_providers.map(str::to_string),
|
|
107
104
|
};
|
|
@@ -119,7 +116,11 @@ impl Embedder {
|
|
|
119
116
|
}
|
|
120
117
|
|
|
121
118
|
pub fn embed(&self, texts: Vec<String>) -> Result<Array2<f32>> {
|
|
122
|
-
|
|
119
|
+
self.embed_ref(&texts)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
pub fn embed_ref(&self, texts: &[String]) -> Result<Array2<f32>> {
|
|
123
|
+
let tokenized = self.tokenize(texts)?;
|
|
123
124
|
self.run(&tokenized)
|
|
124
125
|
}
|
|
125
126
|
|
data/ext/gte/src/model_config.rs
CHANGED
|
@@ -201,10 +201,14 @@ pub fn select_output_tensor(
|
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
203
|
|
|
204
|
-
session
|
|
205
|
-
|
|
206
|
-
.
|
|
207
|
-
.
|
|
204
|
+
let outputs = session.outputs();
|
|
205
|
+
let best = outputs
|
|
206
|
+
.iter()
|
|
207
|
+
.find(|o| {
|
|
208
|
+
matches!(o.dtype(), ort::value::ValueType::Tensor { shape, .. } if shape.len() == 2)
|
|
209
|
+
})
|
|
210
|
+
.or_else(|| outputs.first());
|
|
211
|
+
best.map(|o| o.name().to_owned())
|
|
208
212
|
.ok_or_else(|| GteError::Inference("model has no outputs".into()))
|
|
209
213
|
}
|
|
210
214
|
|
data/ext/gte/src/reranker.rs
CHANGED
|
@@ -28,7 +28,6 @@ pub struct Reranker {
|
|
|
28
28
|
impl Reranker {
|
|
29
29
|
pub fn from_dir<P: AsRef<Path>>(
|
|
30
30
|
dir: P,
|
|
31
|
-
num_threads: usize,
|
|
32
31
|
optimization_level: u8,
|
|
33
32
|
overrides: ModelLoadOverrides<'_>,
|
|
34
33
|
) -> Result<Self> {
|
|
@@ -60,7 +59,6 @@ impl Reranker {
|
|
|
60
59
|
mode: crate::model_config::ExtractorMode::Raw,
|
|
61
60
|
with_type_ids: false,
|
|
62
61
|
with_attention_mask: true,
|
|
63
|
-
num_threads,
|
|
64
62
|
optimization_level,
|
|
65
63
|
execution_providers: overrides.execution_providers.map(str::to_string),
|
|
66
64
|
};
|
|
@@ -4,7 +4,6 @@ use crate::embedder::{normalize_l2, Embedder};
|
|
|
4
4
|
use crate::error::GteError;
|
|
5
5
|
use crate::model_config::ModelLoadOverrides;
|
|
6
6
|
use crate::reranker::Reranker;
|
|
7
|
-
use crate::tokenizer::Tokenized;
|
|
8
7
|
use magnus::{function, method, prelude::*, wrap, Error, RArray, Ruby};
|
|
9
8
|
use std::os::raw::c_void;
|
|
10
9
|
use std::panic::{catch_unwind, AssertUnwindSafe};
|
|
@@ -33,10 +32,9 @@ pub struct RbTensor {
|
|
|
33
32
|
// GVL-release helpers
|
|
34
33
|
// ---------------------------------------------------------------------------
|
|
35
34
|
|
|
36
|
-
// Tokenized holds only Vec<i64> fields — safe to send across threads.
|
|
37
35
|
struct InferArgs {
|
|
38
36
|
embedder: *const Embedder,
|
|
39
|
-
|
|
37
|
+
texts: *const Vec<String>,
|
|
40
38
|
normalize: bool,
|
|
41
39
|
result: Option<crate::error::Result<ndarray::Array2<f32>>>,
|
|
42
40
|
}
|
|
@@ -45,7 +43,8 @@ unsafe impl Send for InferArgs {}
|
|
|
45
43
|
|
|
46
44
|
struct ScoreArgs {
|
|
47
45
|
reranker: *const Reranker,
|
|
48
|
-
|
|
46
|
+
query: *const String,
|
|
47
|
+
candidates: *const Vec<String>,
|
|
49
48
|
apply_sigmoid: bool,
|
|
50
49
|
result: Option<crate::error::Result<Vec<f32>>>,
|
|
51
50
|
}
|
|
@@ -62,12 +61,11 @@ fn panic_payload_to_string(payload: Box<dyn std::any::Any + Send>) -> String {
|
|
|
62
61
|
}
|
|
63
62
|
}
|
|
64
63
|
|
|
65
|
-
unsafe extern "C" fn
|
|
64
|
+
unsafe extern "C" fn run_embed_without_gvl(ptr: *mut c_void) -> *mut c_void {
|
|
66
65
|
let args = &mut *(ptr as *mut InferArgs);
|
|
67
66
|
let run_result = catch_unwind(AssertUnwindSafe(|| {
|
|
68
|
-
//
|
|
69
|
-
|
|
70
|
-
let embeddings = (*args.embedder).run(&*args.tokenized)?;
|
|
67
|
+
// Full embedding path (tokenization + inference) runs without the GVL.
|
|
68
|
+
let embeddings = (*args.embedder).embed_ref(&*args.texts)?;
|
|
71
69
|
if args.normalize { Ok(normalize_l2(embeddings)) } else { Ok(embeddings) }
|
|
72
70
|
}));
|
|
73
71
|
args.result = Some(match run_result {
|
|
@@ -83,7 +81,7 @@ unsafe extern "C" fn run_without_gvl(ptr: *mut c_void) -> *mut c_void {
|
|
|
83
81
|
unsafe extern "C" fn run_score_without_gvl(ptr: *mut c_void) -> *mut c_void {
|
|
84
82
|
let args = &mut *(ptr as *mut ScoreArgs);
|
|
85
83
|
let run_result = catch_unwind(AssertUnwindSafe(|| {
|
|
86
|
-
(*args.reranker).
|
|
84
|
+
(*args.reranker).score(&*args.query, &*args.candidates, args.apply_sigmoid)
|
|
87
85
|
}));
|
|
88
86
|
args.result = Some(match run_result {
|
|
89
87
|
Ok(result) => result,
|
|
@@ -98,17 +96,17 @@ unsafe extern "C" fn run_score_without_gvl(ptr: *mut c_void) -> *mut c_void {
|
|
|
98
96
|
fn infer_without_gvl(
|
|
99
97
|
embedder: &Arc<Embedder>,
|
|
100
98
|
normalize: bool,
|
|
101
|
-
|
|
99
|
+
texts: Vec<String>,
|
|
102
100
|
) -> Result<ndarray::Array2<f32>, Error> {
|
|
103
101
|
let embeddings = unsafe {
|
|
104
102
|
let mut args = InferArgs {
|
|
105
103
|
embedder: Arc::as_ptr(embedder),
|
|
106
|
-
|
|
104
|
+
texts: &texts as *const Vec<String>,
|
|
107
105
|
normalize,
|
|
108
106
|
result: None,
|
|
109
107
|
};
|
|
110
108
|
rb_sys::rb_thread_call_without_gvl(
|
|
111
|
-
Some(
|
|
109
|
+
Some(run_embed_without_gvl),
|
|
112
110
|
&mut args as *mut InferArgs as *mut c_void,
|
|
113
111
|
None,
|
|
114
112
|
std::ptr::null_mut(),
|
|
@@ -125,13 +123,15 @@ fn infer_without_gvl(
|
|
|
125
123
|
|
|
126
124
|
fn score_without_gvl(
|
|
127
125
|
reranker: &Arc<Reranker>,
|
|
128
|
-
|
|
126
|
+
query: String,
|
|
127
|
+
candidates: Vec<String>,
|
|
129
128
|
apply_sigmoid: bool,
|
|
130
129
|
) -> Result<Vec<f32>, Error> {
|
|
131
130
|
let scores = unsafe {
|
|
132
131
|
let mut args = ScoreArgs {
|
|
133
132
|
reranker: Arc::as_ptr(reranker),
|
|
134
|
-
|
|
133
|
+
query: &query as *const String,
|
|
134
|
+
candidates: &candidates as *const Vec<String>,
|
|
135
135
|
apply_sigmoid,
|
|
136
136
|
result: None,
|
|
137
137
|
};
|
|
@@ -170,7 +170,6 @@ impl RbEmbedder {
|
|
|
170
170
|
pub fn rb_new(
|
|
171
171
|
_ruby: &Ruby,
|
|
172
172
|
dir_path: String,
|
|
173
|
-
num_threads: usize,
|
|
174
173
|
optimization_level: u8,
|
|
175
174
|
model_name: String,
|
|
176
175
|
normalize: bool,
|
|
@@ -191,21 +190,19 @@ impl RbEmbedder {
|
|
|
191
190
|
padding: padding_override,
|
|
192
191
|
execution_providers: execution_providers_override,
|
|
193
192
|
};
|
|
194
|
-
let embedder = Embedder::from_dir(&dir_path,
|
|
193
|
+
let embedder = Embedder::from_dir(&dir_path, optimization_level, overrides)
|
|
195
194
|
.map_err(magnus::Error::from)?;
|
|
196
195
|
Ok(RbEmbedder { inner: Arc::new(embedder), normalize })
|
|
197
196
|
}
|
|
198
197
|
|
|
199
198
|
pub fn rb_embed(_ruby: &Ruby, rb_self: &Self, texts: RArray) -> Result<RbTensor, Error> {
|
|
200
199
|
let texts: Vec<String> = texts.to_vec()?;
|
|
201
|
-
let
|
|
202
|
-
let embeddings = infer_without_gvl(&rb_self.inner, rb_self.normalize, &tokenized)?;
|
|
200
|
+
let embeddings = infer_without_gvl(&rb_self.inner, rb_self.normalize, texts)?;
|
|
203
201
|
tensor_from_array(embeddings)
|
|
204
202
|
}
|
|
205
203
|
|
|
206
204
|
pub fn rb_embed_one(_ruby: &Ruby, rb_self: &Self, text: String) -> Result<RbTensor, Error> {
|
|
207
|
-
let
|
|
208
|
-
let embeddings = infer_without_gvl(&rb_self.inner, rb_self.normalize, &tokenized)?;
|
|
205
|
+
let embeddings = infer_without_gvl(&rb_self.inner, rb_self.normalize, vec![text])?;
|
|
209
206
|
tensor_from_array(embeddings)
|
|
210
207
|
}
|
|
211
208
|
}
|
|
@@ -214,7 +211,6 @@ impl RbReranker {
|
|
|
214
211
|
pub fn rb_new(
|
|
215
212
|
_ruby: &Ruby,
|
|
216
213
|
dir_path: String,
|
|
217
|
-
num_threads: usize,
|
|
218
214
|
optimization_level: u8,
|
|
219
215
|
model_name: String,
|
|
220
216
|
sigmoid: bool,
|
|
@@ -235,7 +231,7 @@ impl RbReranker {
|
|
|
235
231
|
padding: padding_override,
|
|
236
232
|
execution_providers: execution_providers_override,
|
|
237
233
|
};
|
|
238
|
-
let reranker = Reranker::from_dir(&dir_path,
|
|
234
|
+
let reranker = Reranker::from_dir(&dir_path, optimization_level, overrides)
|
|
239
235
|
.map_err(magnus::Error::from)?;
|
|
240
236
|
Ok(RbReranker { inner: Arc::new(reranker), sigmoid })
|
|
241
237
|
}
|
|
@@ -247,8 +243,7 @@ impl RbReranker {
|
|
|
247
243
|
candidates: RArray,
|
|
248
244
|
) -> Result<RArray, Error> {
|
|
249
245
|
let candidates: Vec<String> = candidates.to_vec()?;
|
|
250
|
-
let
|
|
251
|
-
let scores = score_without_gvl(&rb_self.inner, pairs, rb_self.sigmoid)?;
|
|
246
|
+
let scores = score_without_gvl(&rb_self.inner, query, candidates, rb_self.sigmoid)?;
|
|
252
247
|
let out = ruby.ary_new_capa(scores.len());
|
|
253
248
|
for score in scores {
|
|
254
249
|
out.push(score)?;
|
|
@@ -345,12 +340,12 @@ impl RbTensor {
|
|
|
345
340
|
pub fn register(ruby: &Ruby) -> Result<(), Error> {
|
|
346
341
|
let module = ruby.define_module("GTE")?;
|
|
347
342
|
let embedder_class = module.define_class("Embedder", ruby.class_object())?;
|
|
348
|
-
embedder_class.define_singleton_method("new", function!(RbEmbedder::rb_new,
|
|
343
|
+
embedder_class.define_singleton_method("new", function!(RbEmbedder::rb_new, 8))?;
|
|
349
344
|
embedder_class.define_method("embed", method!(RbEmbedder::rb_embed, 1))?;
|
|
350
345
|
embedder_class.define_method("embed_one", method!(RbEmbedder::rb_embed_one, 1))?;
|
|
351
346
|
|
|
352
347
|
let reranker_class = module.define_class("Reranker", ruby.class_object())?;
|
|
353
|
-
reranker_class.define_singleton_method("new", function!(RbReranker::rb_new,
|
|
348
|
+
reranker_class.define_singleton_method("new", function!(RbReranker::rb_new, 8))?;
|
|
354
349
|
reranker_class.define_method("score", method!(RbReranker::rb_score, 2))?;
|
|
355
350
|
|
|
356
351
|
let tensor_class = module.define_class("Tensor", ruby.class_object())?;
|
data/ext/gte/src/session.rs
CHANGED
|
@@ -27,8 +27,6 @@ pub fn build_session<P: AsRef<Path>>(model_path: P, config: &ModelConfig) -> Res
|
|
|
27
27
|
let mut builder = Session::builder()
|
|
28
28
|
.map_err(ort_err)?
|
|
29
29
|
.with_optimization_level(opt_level)
|
|
30
|
-
.map_err(ort_err)?
|
|
31
|
-
.with_memory_pattern(false)
|
|
32
30
|
.map_err(ort_err)?;
|
|
33
31
|
|
|
34
32
|
let providers = preferred_execution_providers(config.execution_providers.as_deref());
|
|
@@ -38,15 +36,6 @@ pub fn build_session<P: AsRef<Path>>(model_path: P, config: &ModelConfig) -> Res
|
|
|
38
36
|
.map_err(ort_err)?;
|
|
39
37
|
}
|
|
40
38
|
|
|
41
|
-
if config.num_threads > 0 {
|
|
42
|
-
builder = builder
|
|
43
|
-
.with_intra_threads(config.num_threads)
|
|
44
|
-
.map_err(ort_err)?;
|
|
45
|
-
builder = builder
|
|
46
|
-
.with_inter_threads(config.num_threads)
|
|
47
|
-
.map_err(ort_err)?;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
39
|
builder.commit_from_file(model_path).map_err(ort_err)
|
|
51
40
|
}
|
|
52
41
|
|
|
@@ -54,25 +43,17 @@ pub fn build_session<P: AsRef<Path>>(model_path: P, config: &ModelConfig) -> Res
|
|
|
54
43
|
// Session pool
|
|
55
44
|
// ---------------------------------------------------------------------------
|
|
56
45
|
|
|
57
|
-
fn pool_capacity(
|
|
58
|
-
let
|
|
46
|
+
fn pool_capacity() -> usize {
|
|
47
|
+
let available = std::thread::available_parallelism()
|
|
59
48
|
.map(|n| n.get())
|
|
60
49
|
.unwrap_or(1);
|
|
61
|
-
|
|
50
|
+
parse_pool_capacity_override().map_or(available, |cap| cap.min(available).max(1))
|
|
62
51
|
}
|
|
63
52
|
|
|
64
|
-
fn
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// Auto-thread mode: ORT grabs all cores per session. One session avoids
|
|
70
|
-
// N² intra-op oversubscription when multiple Ruby threads call concurrently.
|
|
71
|
-
if num_threads == 0 {
|
|
72
|
-
return 1;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
available_parallelism.div_ceil(num_threads).max(1)
|
|
53
|
+
fn parse_pool_capacity_override() -> Option<usize> {
|
|
54
|
+
let raw = std::env::var("GTE_SESSION_POOL_CAP").ok()?;
|
|
55
|
+
let parsed = raw.trim().parse::<usize>().ok()?;
|
|
56
|
+
(parsed > 0).then_some(parsed)
|
|
76
57
|
}
|
|
77
58
|
|
|
78
59
|
pub struct SessionPool {
|
|
@@ -86,7 +67,7 @@ pub struct SessionPool {
|
|
|
86
67
|
|
|
87
68
|
impl SessionPool {
|
|
88
69
|
pub fn new(initial: Session, model_path: PathBuf, build_config: ModelConfig) -> Self {
|
|
89
|
-
let capacity = pool_capacity(
|
|
70
|
+
let capacity = pool_capacity();
|
|
90
71
|
Self {
|
|
91
72
|
sessions: Mutex::new(vec![initial]),
|
|
92
73
|
available: Condvar::new(),
|
|
@@ -282,7 +263,7 @@ mod tests {
|
|
|
282
263
|
use ndarray::{array, ArrayView2};
|
|
283
264
|
|
|
284
265
|
use super::{
|
|
285
|
-
extract_embeddings,
|
|
266
|
+
extract_embeddings, parse_pool_capacity_override, parse_provider_registrations,
|
|
286
267
|
resolve_provider_order_with_env,
|
|
287
268
|
};
|
|
288
269
|
|
|
@@ -294,7 +275,6 @@ mod tests {
|
|
|
294
275
|
mode,
|
|
295
276
|
with_type_ids: false,
|
|
296
277
|
with_attention_mask: true,
|
|
297
|
-
num_threads: 1,
|
|
298
278
|
optimization_level: 3,
|
|
299
279
|
execution_providers: None,
|
|
300
280
|
}
|
|
@@ -343,21 +323,30 @@ mod tests {
|
|
|
343
323
|
}
|
|
344
324
|
|
|
345
325
|
#[test]
|
|
346
|
-
fn
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
assert_eq!(
|
|
351
|
-
assert_eq!(pool_capacity_with_parallelism(0, 8), 1);
|
|
352
|
-
}
|
|
326
|
+
fn parse_pool_capacity_override_uses_positive_integer_only() {
|
|
327
|
+
unsafe {
|
|
328
|
+
std::env::remove_var("GTE_SESSION_POOL_CAP");
|
|
329
|
+
}
|
|
330
|
+
assert_eq!(parse_pool_capacity_override(), None);
|
|
353
331
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
assert_eq!(
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
332
|
+
unsafe {
|
|
333
|
+
std::env::set_var("GTE_SESSION_POOL_CAP", "0");
|
|
334
|
+
}
|
|
335
|
+
assert_eq!(parse_pool_capacity_override(), None);
|
|
336
|
+
|
|
337
|
+
unsafe {
|
|
338
|
+
std::env::set_var("GTE_SESSION_POOL_CAP", "4");
|
|
339
|
+
}
|
|
340
|
+
assert_eq!(parse_pool_capacity_override(), Some(4));
|
|
341
|
+
|
|
342
|
+
unsafe {
|
|
343
|
+
std::env::set_var("GTE_SESSION_POOL_CAP", "abc");
|
|
344
|
+
}
|
|
345
|
+
assert_eq!(parse_pool_capacity_override(), None);
|
|
346
|
+
|
|
347
|
+
unsafe {
|
|
348
|
+
std::env::remove_var("GTE_SESSION_POOL_CAP");
|
|
349
|
+
}
|
|
361
350
|
}
|
|
362
351
|
|
|
363
352
|
#[test]
|
|
@@ -8,7 +8,7 @@ fn model_dir(env_var: &str) -> Option<String> {
|
|
|
8
8
|
#[test]
|
|
9
9
|
fn test_e5_single_embedding_shape() {
|
|
10
10
|
let Some(dir) = model_dir("GTE_BENCH_E5_DIR") else { return };
|
|
11
|
-
let embedder = Embedder::from_dir(&dir, 0,
|
|
11
|
+
let embedder = Embedder::from_dir(&dir, 0, ModelLoadOverrides::default())
|
|
12
12
|
.expect("embedder should initialize");
|
|
13
13
|
let result = embedder
|
|
14
14
|
.embed(vec!["query: Hello world".to_string()])
|
|
@@ -21,7 +21,7 @@ fn test_e5_single_embedding_shape() {
|
|
|
21
21
|
#[test]
|
|
22
22
|
fn test_clip_single_embedding_shape() {
|
|
23
23
|
let Some(dir) = model_dir("GTE_BENCH_CLIP_DIR") else { return };
|
|
24
|
-
let embedder = Embedder::from_dir(&dir, 0,
|
|
24
|
+
let embedder = Embedder::from_dir(&dir, 0, ModelLoadOverrides::default())
|
|
25
25
|
.expect("embedder should initialize");
|
|
26
26
|
let result = embedder
|
|
27
27
|
.embed(vec!["a photo of a cat".to_string()])
|
|
@@ -34,7 +34,7 @@ fn test_clip_single_embedding_shape() {
|
|
|
34
34
|
#[test]
|
|
35
35
|
fn test_e5_batch_embedding_shape() {
|
|
36
36
|
let Some(dir) = model_dir("GTE_BENCH_E5_DIR") else { return };
|
|
37
|
-
let embedder = Embedder::from_dir(&dir, 0,
|
|
37
|
+
let embedder = Embedder::from_dir(&dir, 0, ModelLoadOverrides::default())
|
|
38
38
|
.expect("embedder should initialize");
|
|
39
39
|
let texts = vec![
|
|
40
40
|
"query: first sentence".to_string(),
|
|
@@ -51,7 +51,7 @@ fn test_e5_batch_embedding_shape() {
|
|
|
51
51
|
#[test]
|
|
52
52
|
fn test_e5_long_input_truncation_no_error() {
|
|
53
53
|
let Some(dir) = model_dir("GTE_BENCH_E5_DIR") else { return };
|
|
54
|
-
let embedder = Embedder::from_dir(&dir, 0,
|
|
54
|
+
let embedder = Embedder::from_dir(&dir, 0, ModelLoadOverrides::default())
|
|
55
55
|
.expect("embedder should initialize");
|
|
56
56
|
let very_long_text = "word ".repeat(1000);
|
|
57
57
|
let result = embedder
|
data/lib/gte/config.rb
CHANGED
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
module GTE
|
|
4
4
|
module Config
|
|
5
5
|
Text = Data.define(
|
|
6
|
-
:model_dir, :
|
|
6
|
+
:model_dir, :optimization_level,
|
|
7
7
|
:model_name, :normalize, :output_tensor, :max_length, :padding, :execution_providers
|
|
8
8
|
)
|
|
9
9
|
|
|
10
10
|
Reranker = Data.define(
|
|
11
|
-
:model_dir, :
|
|
11
|
+
:model_dir, :optimization_level,
|
|
12
12
|
:model_name, :sigmoid, :output_tensor, :max_length, :padding, :execution_providers
|
|
13
13
|
)
|
|
14
14
|
end
|
data/lib/gte/embedder.rb
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
module GTE
|
|
4
4
|
class Embedder
|
|
5
|
-
DEFAULT_THREADS = 0
|
|
6
5
|
DEFAULT_OPTIMIZATION_LEVEL = 3
|
|
7
6
|
|
|
8
7
|
class << self
|
|
@@ -15,7 +14,6 @@ module GTE
|
|
|
15
14
|
def from_config(config)
|
|
16
15
|
new(
|
|
17
16
|
config.model_dir,
|
|
18
|
-
config.threads,
|
|
19
17
|
config.optimization_level,
|
|
20
18
|
config.model_name.to_s,
|
|
21
19
|
config.normalize,
|
|
@@ -29,7 +27,6 @@ module GTE
|
|
|
29
27
|
def default_config(model_dir)
|
|
30
28
|
Config::Text.new(
|
|
31
29
|
model_dir: File.expand_path(model_dir),
|
|
32
|
-
threads: DEFAULT_THREADS,
|
|
33
30
|
optimization_level: DEFAULT_OPTIMIZATION_LEVEL,
|
|
34
31
|
model_name: nil,
|
|
35
32
|
normalize: true,
|
data/lib/gte/reranker.rb
CHANGED
|
@@ -19,7 +19,6 @@ module GTE
|
|
|
19
19
|
def default_config(model_dir)
|
|
20
20
|
Config::Reranker.new(
|
|
21
21
|
model_dir: File.expand_path(model_dir),
|
|
22
|
-
threads: 1,
|
|
23
22
|
optimization_level: 3,
|
|
24
23
|
model_name: nil,
|
|
25
24
|
sigmoid: false,
|
|
@@ -33,7 +32,6 @@ module GTE
|
|
|
33
32
|
def build(cfg)
|
|
34
33
|
new(
|
|
35
34
|
cfg.model_dir,
|
|
36
|
-
cfg.threads,
|
|
37
35
|
cfg.optimization_level,
|
|
38
36
|
cfg.model_name.to_s,
|
|
39
37
|
cfg.sigmoid,
|