gte 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 37ad2ef3f640b8bbaefa14cae29541f329c3b99950a2867b9054b8e8854ca242
4
- data.tar.gz: 0b54c757ca510ccc8644e0d3c1519aa7b6576fa1da4ba9012535e0b0b7d598dc
3
+ metadata.gz: 9edaf702ddb74aa67328a218eef5ce5c211868ae9e59aac312dd4bd471c5ba51
4
+ data.tar.gz: fb7ab71c6ee482afc2db321b2a84222956716da906d8df5eee1e962eb26dbd7f
5
5
  SHA512:
6
- metadata.gz: 5d67fc8c73aa2b162bf804082accc849ce972b4df51e2ec86bb7d977cd760e2bf5b760d7de141b5fbd5b6f68dd0e51d2bd15c84d1cb8dc9a8f4bf424b62490ba
7
- data.tar.gz: c6cefec4d42f7ca72980e4d3454447aa681f05f0b72f96ceec226b5efd5ae32bade13d85ae55f032098d7cb7f72b530d8332f1eaf25fa364d0c33703da13e043
6
+ metadata.gz: 8ddb7b0a4f2b84c5d48c4007b51143b8275980332fe48c47ae444e47dc71691cb24f9dcbb386f17eb80e58ae30d981544f377be238d3872f11ee0c64be404ed7
7
+ data.tar.gz: 95173dde395043b1a792c8954548a7334d6bc1e528c39f4d54d9bed6796e949806e1c359e42b17d403b316f46005a31f0033785055fa6dc725a50a8e2599117c
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.14
1
+ 0.0.15
data/ext/gte/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "gte"
3
- version = "0.0.14"
3
+ version = "0.0.15"
4
4
  edition = "2021"
5
5
  authors = ["elcuervo <elcuervo@elcuervo.net>"]
6
6
  license = "MIT"
@@ -22,6 +22,7 @@ ruby-ffi = ["dep:magnus", "dep:rb-sys"]
22
22
  rb-sys = { version = "0.9", features = ["stable-api-compiled-fallback"], optional = true }
23
23
  magnus = { version = "0.8", optional = true }
24
24
  ort = { version = "=2.0.0-rc.12", features = ["ndarray", "xnnpack"] }
25
+ parking_lot = "0.12"
25
26
  tokenizers = "0.21.0"
26
27
  ndarray = "0.17"
27
28
  serde_json = "1"
@@ -6,44 +6,57 @@ use crate::tokenizer::Tokenized;
6
6
  use ndarray::{Array2, ArrayView2, ArrayViewD, Ix2};
7
7
  use ort::execution_providers::{CoreMLExecutionProvider, ExecutionProviderDispatch, XNNPACKExecutionProvider};
8
8
  use ort::session::{OutputSelector, RunOptions, Session};
9
- use std::cell::RefCell;
10
- use std::collections::hash_map::Entry;
11
- use std::collections::HashMap;
9
+ use parking_lot::Mutex;
12
10
  use std::path::{Path, PathBuf};
13
11
  use std::sync::atomic::{AtomicUsize, Ordering};
12
+ use std::sync::Arc;
14
13
 
15
14
  // ---------------------------------------------------------------------------
16
- // Thread-local session storageeach OS thread lazily creates its own ONNX
17
- // session the first time it calls into a given pool. No Mutex, no contention.
15
+ // Lazy session poolstarts with 1 session, grows on contention, capped.
16
+ //
17
+ // Pool max is resolved in order:
18
+ // 1. GTE_SESSION_POOL_SIZE env var (explicit override)
19
+ // 2. Auto: 2 (conservative: 2× pure Ruby memory at peak, no OOM risk)
20
+ //
21
+ // At idle the pool holds 1 session (same memory as pure Ruby's single
22
+ // OnnxRuntime::Model). When all existing sessions are busy and the cap
23
+ // hasn't been reached, a new session is created on-demand.
18
24
  // ---------------------------------------------------------------------------
19
25
 
20
- static NEXT_POOL_ID: AtomicUsize = AtomicUsize::new(1);
21
-
22
- struct SessionRecipe {
23
- model_path: PathBuf,
24
- build_config: ModelConfig,
26
+ fn resolve_pool_cap() -> usize {
27
+ if let Some(n) =
28
+ std::env::var("GTE_SESSION_POOL_SIZE").ok().and_then(|v| v.trim().parse::<usize>().ok()).filter(|&n| n > 0)
29
+ {
30
+ return n;
31
+ }
32
+ 2
25
33
  }
26
34
 
27
- thread_local! {
28
- static SESSIONS: RefCell<HashMap<usize, Session>> = RefCell::new(HashMap::new());
35
+ pub struct SessionPool {
36
+ inner: Mutex<PoolInner>,
37
+ next_idx: AtomicUsize,
38
+ cap: usize,
29
39
  }
30
40
 
31
- pub struct SessionPool {
32
- pool_id: usize,
33
- recipe: SessionRecipe,
41
+ struct PoolInner {
42
+ sessions: Vec<Arc<Mutex<Session>>>,
43
+ model_path: PathBuf,
44
+ build_config: ModelConfig,
34
45
  }
35
46
 
36
47
  impl SessionPool {
37
48
  pub fn new(initial: Session, model_path: &Path, build_config: &ModelConfig) -> Result<Self> {
38
- let pool_id = NEXT_POOL_ID.fetch_add(1, Ordering::Relaxed);
39
-
40
- SESSIONS.with(|map| {
41
- _ = map.borrow_mut().insert(pool_id, initial);
42
- });
49
+ let cap = resolve_pool_cap();
50
+ let sessions = vec![Arc::new(Mutex::new(initial))];
43
51
 
44
52
  Ok(Self {
45
- pool_id,
46
- recipe: SessionRecipe { model_path: model_path.to_path_buf(), build_config: build_config.clone() },
53
+ inner: Mutex::new(PoolInner {
54
+ sessions,
55
+ model_path: model_path.to_path_buf(),
56
+ build_config: build_config.clone(),
57
+ }),
58
+ next_idx: AtomicUsize::new(0),
59
+ cap,
47
60
  })
48
61
  }
49
62
 
@@ -55,17 +68,59 @@ impl SessionPool {
55
68
  where
56
69
  F: FnOnce(&mut Session) -> Result<R>,
57
70
  {
58
- SESSIONS.with(|map| {
59
- let mut map = map.borrow_mut();
60
- let session = match map.entry(self.pool_id) {
61
- Entry::Occupied(e) => e.into_mut(),
62
- Entry::Vacant(e) => {
63
- let session = build_session(&self.recipe.model_path, &self.recipe.build_config)?;
64
- e.insert(session)
71
+ const SPIN_LIMIT: u32 = 64;
72
+
73
+ loop {
74
+ // Snapshot the pool under the outer lock so the scan below
75
+ // doesn't contend on that lock at all.
76
+ let arcs: Vec<Arc<Mutex<Session>>> = {
77
+ let inner = self.inner.lock();
78
+ inner.sessions.clone()
79
+ };
80
+ let len = arcs.len();
81
+ let start = self.next_idx.fetch_add(1, Ordering::Relaxed) % len;
82
+
83
+ for offset in 0..len {
84
+ let idx = (start + offset) % len;
85
+ if let Some(mut guard) = arcs[idx].try_lock() {
86
+ return f(&mut guard);
87
+ }
88
+ }
89
+
90
+ // All sessions busy — try to grow the pool
91
+ let grew = {
92
+ let mut inner = self.inner.lock();
93
+ if inner.sessions.len() < self.cap {
94
+ match build_session(&inner.model_path, &inner.build_config) {
95
+ Ok(session) => {
96
+ inner.sessions.push(Arc::new(Mutex::new(session)));
97
+ true
98
+ }
99
+ Err(e) => return Err(e),
100
+ }
101
+ } else {
102
+ false
65
103
  }
66
104
  };
67
- f(session)
68
- })
105
+
106
+ if grew {
107
+ continue;
108
+ }
109
+
110
+ // At cap — spin briefly, then block on a session
111
+ let idx = self.next_idx.fetch_add(1, Ordering::Relaxed) % len;
112
+ let arc = Arc::clone(&arcs[idx]);
113
+
114
+ for _ in 0..SPIN_LIMIT {
115
+ if let Some(mut guard) = arc.try_lock() {
116
+ return f(&mut guard);
117
+ }
118
+ std::hint::spin_loop();
119
+ }
120
+
121
+ let mut guard = arc.lock();
122
+ return f(&mut guard);
123
+ }
69
124
  }
70
125
  }
71
126
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - elcuervo