gte 0.0.14 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/ext/gte/Cargo.toml +2 -1
- data/ext/gte/src/session.rs +86 -31
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9edaf702ddb74aa67328a218eef5ce5c211868ae9e59aac312dd4bd471c5ba51
|
|
4
|
+
data.tar.gz: fb7ab71c6ee482afc2db321b2a84222956716da906d8df5eee1e962eb26dbd7f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8ddb7b0a4f2b84c5d48c4007b51143b8275980332fe48c47ae444e47dc71691cb24f9dcbb386f17eb80e58ae30d981544f377be238d3872f11ee0c64be404ed7
|
|
7
|
+
data.tar.gz: 95173dde395043b1a792c8954548a7334d6bc1e528c39f4d54d9bed6796e949806e1c359e42b17d403b316f46005a31f0033785055fa6dc725a50a8e2599117c
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.0.
|
|
1
|
+
0.0.15
|
data/ext/gte/Cargo.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "gte"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.15"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
authors = ["elcuervo <elcuervo@elcuervo.net>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -22,6 +22,7 @@ ruby-ffi = ["dep:magnus", "dep:rb-sys"]
|
|
|
22
22
|
rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"], optional = true }
|
|
23
23
|
magnus = { version = "0.8", optional = true }
|
|
24
24
|
ort = { version = "=2.0.0-rc.12", features = ["ndarray", "xnnpack"] }
|
|
25
|
+
parking_lot = "0.12"
|
|
25
26
|
tokenizers = "0.21.0"
|
|
26
27
|
ndarray = "0.17"
|
|
27
28
|
serde_json = "1"
|
data/ext/gte/src/session.rs
CHANGED
|
@@ -6,44 +6,57 @@ use crate::tokenizer::Tokenized;
|
|
|
6
6
|
use ndarray::{Array2, ArrayView2, ArrayViewD, Ix2};
|
|
7
7
|
use ort::execution_providers::{CoreMLExecutionProvider, ExecutionProviderDispatch, XNNPACKExecutionProvider};
|
|
8
8
|
use ort::session::{OutputSelector, RunOptions, Session};
|
|
9
|
-
use
|
|
10
|
-
use std::collections::hash_map::Entry;
|
|
11
|
-
use std::collections::HashMap;
|
|
9
|
+
use parking_lot::Mutex;
|
|
12
10
|
use std::path::{Path, PathBuf};
|
|
13
11
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
12
|
+
use std::sync::Arc;
|
|
14
13
|
|
|
15
14
|
// ---------------------------------------------------------------------------
|
|
16
|
-
//
|
|
17
|
-
//
|
|
15
|
+
// Lazy session pool — starts with 1 session, grows on contention, capped.
|
|
16
|
+
//
|
|
17
|
+
// Pool max is resolved in order:
|
|
18
|
+
// 1. GTE_SESSION_POOL_SIZE env var (explicit override)
|
|
19
|
+
// 2. Auto: 2 (conservative: 2× pure Ruby memory at peak, no OOM risk)
|
|
20
|
+
//
|
|
21
|
+
// At idle the pool holds 1 session (same memory as pure Ruby's single
|
|
22
|
+
// OnnxRuntime::Model). When all existing sessions are busy and the cap
|
|
23
|
+
// hasn't been reached, a new session is created on-demand.
|
|
18
24
|
// ---------------------------------------------------------------------------
|
|
19
25
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
fn resolve_pool_cap() -> usize {
|
|
27
|
+
if let Some(n) =
|
|
28
|
+
std::env::var("GTE_SESSION_POOL_SIZE").ok().and_then(|v| v.trim().parse::<usize>().ok()).filter(|&n| n > 0)
|
|
29
|
+
{
|
|
30
|
+
return n;
|
|
31
|
+
}
|
|
32
|
+
2
|
|
25
33
|
}
|
|
26
34
|
|
|
27
|
-
|
|
28
|
-
|
|
35
|
+
pub struct SessionPool {
|
|
36
|
+
inner: Mutex<PoolInner>,
|
|
37
|
+
next_idx: AtomicUsize,
|
|
38
|
+
cap: usize,
|
|
29
39
|
}
|
|
30
40
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
41
|
+
struct PoolInner {
|
|
42
|
+
sessions: Vec<Arc<Mutex<Session>>>,
|
|
43
|
+
model_path: PathBuf,
|
|
44
|
+
build_config: ModelConfig,
|
|
34
45
|
}
|
|
35
46
|
|
|
36
47
|
impl SessionPool {
|
|
37
48
|
pub fn new(initial: Session, model_path: &Path, build_config: &ModelConfig) -> Result<Self> {
|
|
38
|
-
let
|
|
39
|
-
|
|
40
|
-
SESSIONS.with(|map| {
|
|
41
|
-
_ = map.borrow_mut().insert(pool_id, initial);
|
|
42
|
-
});
|
|
49
|
+
let cap = resolve_pool_cap();
|
|
50
|
+
let sessions = vec![Arc::new(Mutex::new(initial))];
|
|
43
51
|
|
|
44
52
|
Ok(Self {
|
|
45
|
-
|
|
46
|
-
|
|
53
|
+
inner: Mutex::new(PoolInner {
|
|
54
|
+
sessions,
|
|
55
|
+
model_path: model_path.to_path_buf(),
|
|
56
|
+
build_config: build_config.clone(),
|
|
57
|
+
}),
|
|
58
|
+
next_idx: AtomicUsize::new(0),
|
|
59
|
+
cap,
|
|
47
60
|
})
|
|
48
61
|
}
|
|
49
62
|
|
|
@@ -55,17 +68,59 @@ impl SessionPool {
|
|
|
55
68
|
where
|
|
56
69
|
F: FnOnce(&mut Session) -> Result<R>,
|
|
57
70
|
{
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
71
|
+
const SPIN_LIMIT: u32 = 64;
|
|
72
|
+
|
|
73
|
+
loop {
|
|
74
|
+
// Snapshot the pool under the outer lock so the scan below
|
|
75
|
+
// doesn't contend on that lock at all.
|
|
76
|
+
let arcs: Vec<Arc<Mutex<Session>>> = {
|
|
77
|
+
let inner = self.inner.lock();
|
|
78
|
+
inner.sessions.clone()
|
|
79
|
+
};
|
|
80
|
+
let len = arcs.len();
|
|
81
|
+
let start = self.next_idx.fetch_add(1, Ordering::Relaxed) % len;
|
|
82
|
+
|
|
83
|
+
for offset in 0..len {
|
|
84
|
+
let idx = (start + offset) % len;
|
|
85
|
+
if let Some(mut guard) = arcs[idx].try_lock() {
|
|
86
|
+
return f(&mut guard);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// All sessions busy — try to grow the pool
|
|
91
|
+
let grew = {
|
|
92
|
+
let mut inner = self.inner.lock();
|
|
93
|
+
if inner.sessions.len() < self.cap {
|
|
94
|
+
match build_session(&inner.model_path, &inner.build_config) {
|
|
95
|
+
Ok(session) => {
|
|
96
|
+
inner.sessions.push(Arc::new(Mutex::new(session)));
|
|
97
|
+
true
|
|
98
|
+
}
|
|
99
|
+
Err(e) => return Err(e),
|
|
100
|
+
}
|
|
101
|
+
} else {
|
|
102
|
+
false
|
|
65
103
|
}
|
|
66
104
|
};
|
|
67
|
-
|
|
68
|
-
|
|
105
|
+
|
|
106
|
+
if grew {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// At cap — spin briefly, then block on a session
|
|
111
|
+
let idx = self.next_idx.fetch_add(1, Ordering::Relaxed) % len;
|
|
112
|
+
let arc = Arc::clone(&arcs[idx]);
|
|
113
|
+
|
|
114
|
+
for _ in 0..SPIN_LIMIT {
|
|
115
|
+
if let Some(mut guard) = arc.try_lock() {
|
|
116
|
+
return f(&mut guard);
|
|
117
|
+
}
|
|
118
|
+
std::hint::spin_loop();
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
let mut guard = arc.lock();
|
|
122
|
+
return f(&mut guard);
|
|
123
|
+
}
|
|
69
124
|
}
|
|
70
125
|
}
|
|
71
126
|
|