embedding_util 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +6 -2
- data/lib/embedding_util/cli.rb +2 -0
- data/lib/embedding_util/configuration.rb +2 -1
- data/lib/embedding_util/providers/self_hosted.rb +22 -5
- data/lib/embedding_util/runtime_command.rb +9 -3
- data/lib/embedding_util/server_manager.rb +96 -19
- data/lib/embedding_util/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 25117f1c8eb2df3a23d26b45a76df8f850af2e96a9dc7c5ecfc9f2820a8cd913
|
|
4
|
+
data.tar.gz: a1d173a4933740d65cadbc6dad877cf605859029a0bd351c3123a7899dd49fe3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5b61c1d0e518af3aa80acbb19db28314412ff24611681dc61ce1a8b2d1a989da0bbe181e73c488ac0ab409d2ab579b598e2ca41b23871667ed19faec59e6e9a1
|
|
7
|
+
data.tar.gz: 9eb004c7a36be9a82638bd7883d865219f6f09dfe443d016bd73267c8124cac7f41d9f94960d86ea34d02828f7c186b49d5220a00808f0eec1195ff7c40769de
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
## [0.1.4] - 2026-06-10
|
|
2
|
+
|
|
3
|
+
- Stabilize managed Ramalama reranker startup, restart, and idle cleanup
|
|
4
|
+
- Add request-aware idle tracking so active embedding/reranking requests are not stopped during quiet server output
|
|
5
|
+
- Add `ramalama_device` configuration, CLI, and environment support for hosts that need `--device none`
|
|
6
|
+
- Retry managed reranker EOF/connection-drop failures once after escalating batch settings
|
|
7
|
+
- Verify stability through repeated `index_util/examples/ruby_api` queries
|
|
8
|
+
|
|
1
9
|
## [0.1.3] - 2026-06-10
|
|
2
10
|
|
|
3
11
|
- Set self-hosted reranker `--batch-size` and `--ubatch-size` together
|
data/README.md
CHANGED
|
@@ -67,7 +67,7 @@ embedding_util serve --model embedding-small_multilingual_v1
|
|
|
67
67
|
embedding_util serve --model reranker-small_multilingual_v1
|
|
68
68
|
```
|
|
69
69
|
|
|
70
|
-
`serve` starts one model server per command and runs until stopped. Add `--shutdown-idle SECONDS` only when you want that manually managed server to stop itself after idle
|
|
70
|
+
`serve` starts one model server per command and runs until stopped. Add `--shutdown-idle SECONDS` only when you want that manually managed server to stop itself after it is idle; omit it, set it to `nil`, or pass `0` to disable idle shutdown.
|
|
71
71
|
|
|
72
72
|
## CLI
|
|
73
73
|
|
|
@@ -86,12 +86,14 @@ embedding_util rerank \
|
|
|
86
86
|
|
|
87
87
|
`embed` prints a JSON array. `rerank` prints JSON objects with `index`, `document`, `score`, and `metadata`.
|
|
88
88
|
|
|
89
|
-
`serve` starts one local model server. The default model is `embedding-small_multilingual_v1`; use `reranker-small_multilingual_v1` for the reranker server. By default, `serve` uses Ramalama when available and falls back to direct `llama-server`. It runs until stopped unless a positive `--shutdown-idle` value is provided.
|
|
89
|
+
`serve` starts one local model server. The default model is `embedding-small_multilingual_v1`; use `reranker-small_multilingual_v1` for the reranker server. By default, `serve` uses Ramalama when available and falls back to direct `llama-server`. It runs until stopped unless a positive `--shutdown-idle` value is provided. Idle shutdown is request-aware for `embedding_util`-managed calls, so long-running embedding or reranking requests are not stopped just because the model server is temporarily quiet.
|
|
90
90
|
|
|
91
91
|
Explicit `serve --port PORT` requires that exact port to be free. Without `--port`, `serve` prefers the profile default port and chooses the next free local port if needed.
|
|
92
92
|
|
|
93
93
|
Use `--verbose` on `embed` or `rerank` to print self-hosting diagnostics, including the background `serve` command and log path. First-time model downloads are expected to work with the default startup timeout; use `--startup-timeout` only when you explicitly want to shorten or extend that wait.
|
|
94
94
|
|
|
95
|
+
If Ramalama's automatic device passthrough does not work on a host, pass `--ramalama-device none` or set `EMBEDDING_UTIL_RAMALAMA_DEVICE=none` to force CPU/container-only serving.
|
|
96
|
+
|
|
95
97
|
## API
|
|
96
98
|
|
|
97
99
|
- `EmbeddingUtil.embed(text)` returns one embedding array.
|
|
@@ -137,6 +139,7 @@ EmbeddingUtil.configure do |config|
|
|
|
137
139
|
config.shutdown_idle = 300
|
|
138
140
|
config.reranker_ubatch_size = 1024
|
|
139
141
|
config.reranker_max_ubatch_size = 4096
|
|
142
|
+
config.ramalama_device = nil
|
|
140
143
|
config.timeout = 60
|
|
141
144
|
end
|
|
142
145
|
```
|
|
@@ -165,6 +168,7 @@ Environment variables are also supported:
|
|
|
165
168
|
- `EMBEDDING_UTIL_RERANKER_PORT`
|
|
166
169
|
- `EMBEDDING_UTIL_RERANKER_UBATCH_SIZE`
|
|
167
170
|
- `EMBEDDING_UTIL_RERANKER_MAX_UBATCH_SIZE`
|
|
171
|
+
- `EMBEDDING_UTIL_RAMALAMA_DEVICE`
|
|
168
172
|
|
|
169
173
|
## Development
|
|
170
174
|
|
data/lib/embedding_util/cli.rb
CHANGED
|
@@ -16,6 +16,7 @@ module EmbeddingUtil
|
|
|
16
16
|
shutdown_idle: :to_i.to_proc,
|
|
17
17
|
reranker_ubatch_size: :to_i.to_proc,
|
|
18
18
|
reranker_max_ubatch_size: :to_i.to_proc,
|
|
19
|
+
ramalama_device: ->(value) { value },
|
|
19
20
|
verbose: ->(value) { value }
|
|
20
21
|
}.freeze
|
|
21
22
|
|
|
@@ -29,6 +30,7 @@ module EmbeddingUtil
|
|
|
29
30
|
class_option :shutdown_idle, type: :numeric, desc: "Stop self-hosted server after this many seconds without stdout/stderr activity"
|
|
30
31
|
class_option :reranker_ubatch_size, type: :numeric, desc: "llama.cpp physical batch size for self-hosted reranker servers"
|
|
31
32
|
class_option :reranker_max_ubatch_size, type: :numeric, desc: "Largest reranker physical batch size for automatic retry"
|
|
33
|
+
class_option :ramalama_device, type: :string, desc: "Ramalama device option, for example none"
|
|
32
34
|
class_option :verbose, type: :boolean, desc: "Print self-hosting diagnostics"
|
|
33
35
|
|
|
34
36
|
desc "support", "Display configured provider support"
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module EmbeddingUtil
|
|
4
4
|
class Configuration
|
|
5
5
|
attr_accessor :profile, :provider, :endpoint, :embedding_endpoint, :reranker_endpoint, :timeout, :startup_timeout, :shutdown_idle, :host,
|
|
6
|
-
:embedding_port, :reranker_port, :state_dir, :verbose, :reranker_ubatch_size, :reranker_max_ubatch_size
|
|
6
|
+
:embedding_port, :reranker_port, :state_dir, :verbose, :reranker_ubatch_size, :reranker_max_ubatch_size, :ramalama_device
|
|
7
7
|
attr_reader :runtime
|
|
8
8
|
|
|
9
9
|
def initialize
|
|
@@ -21,6 +21,7 @@ module EmbeddingUtil
|
|
|
21
21
|
@reranker_port = Integer(ENV.fetch("EMBEDDING_UTIL_RERANKER_PORT", "18081"))
|
|
22
22
|
@reranker_ubatch_size = Integer(ENV.fetch("EMBEDDING_UTIL_RERANKER_UBATCH_SIZE", "1024"))
|
|
23
23
|
@reranker_max_ubatch_size = Integer(ENV.fetch("EMBEDDING_UTIL_RERANKER_MAX_UBATCH_SIZE", "4096"))
|
|
24
|
+
@ramalama_device = ENV["EMBEDDING_UTIL_RAMALAMA_DEVICE"]
|
|
24
25
|
@state_dir = ENV.fetch("EMBEDDING_UTIL_STATE_DIR", File.expand_path("~/.local/state/embedding_util"))
|
|
25
26
|
@verbose = ENV.fetch("EMBEDDING_UTIL_VERBOSE", "false").match?(/\A(?:1|true|yes|on)\z/i)
|
|
26
27
|
end
|
|
@@ -22,20 +22,23 @@ module EmbeddingUtil
|
|
|
22
22
|
end
|
|
23
23
|
|
|
24
24
|
def embed(texts, profile: config.resolved_profile)
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
manager = ServerManager.new(config: config)
|
|
26
|
+
endpoint = manager.ensure_server(:embedding, profile: profile)
|
|
27
|
+
manager.track_activity(:embedding, profile: profile) do
|
|
28
|
+
endpoint_provider(embedding_endpoint: endpoint).embed(texts, profile: profile)
|
|
29
|
+
end
|
|
27
30
|
end
|
|
28
31
|
|
|
29
32
|
def rerank(query, documents, profile: config.resolved_profile)
|
|
30
33
|
manager = ServerManager.new(config: config)
|
|
31
34
|
endpoint = manager.ensure_server(:reranker, profile: profile)
|
|
32
|
-
|
|
35
|
+
rerank_with_activity(manager, endpoint, query, documents, profile)
|
|
33
36
|
rescue EndpointError => e
|
|
34
|
-
raise unless
|
|
37
|
+
raise unless retryable_reranker_error?(e) && can_escalate_reranker_ubatch?
|
|
35
38
|
|
|
36
39
|
config.reranker_ubatch_size = config.reranker_max_ubatch_size
|
|
37
40
|
endpoint = manager.restart_server(:reranker, profile: profile)
|
|
38
|
-
|
|
41
|
+
rerank_with_activity(manager, endpoint, query, documents, profile)
|
|
39
42
|
end
|
|
40
43
|
|
|
41
44
|
private
|
|
@@ -47,10 +50,24 @@ module EmbeddingUtil
|
|
|
47
50
|
Endpoint.new(config: endpoint_config)
|
|
48
51
|
end
|
|
49
52
|
|
|
53
|
+
def rerank_with_activity(manager, endpoint, query, documents, profile)
|
|
54
|
+
manager.track_activity(:reranker, profile: profile) do
|
|
55
|
+
endpoint_provider(reranker_endpoint: endpoint).rerank(query, documents, profile: profile)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
50
59
|
def reranker_batch_size_error?(error)
|
|
51
60
|
error.message.include?("increase the physical batch size")
|
|
52
61
|
end
|
|
53
62
|
|
|
63
|
+
def retryable_reranker_error?(error)
|
|
64
|
+
reranker_batch_size_error?(error) || reranker_connection_dropped?(error)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def reranker_connection_dropped?(error)
|
|
68
|
+
error.message.match?(%r{could not reach http://[^ ]+/v1/rerank: (?:end of file reached|Connection reset|connection reset|stream closed)})
|
|
69
|
+
end
|
|
70
|
+
|
|
54
71
|
def can_escalate_reranker_ubatch?
|
|
55
72
|
config.reranker_ubatch_size < config.reranker_max_ubatch_size
|
|
56
73
|
end
|
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
module EmbeddingUtil
|
|
4
4
|
class RuntimeCommand
|
|
5
|
-
attr_reader :runtime, :server_model, :host, :port, :server_flags
|
|
5
|
+
attr_reader :runtime, :server_model, :host, :port, :server_flags, :ramalama_device
|
|
6
6
|
|
|
7
|
-
def initialize(runtime:, server_model:, host:, port:,
|
|
7
|
+
def initialize(runtime:, server_model:, host:, port:, **options)
|
|
8
8
|
@runtime = self.class.normalize_runtime(runtime)
|
|
9
9
|
@server_model = server_model
|
|
10
10
|
@host = host
|
|
11
11
|
@port = port
|
|
12
|
-
@server_flags = server_flags || server_model.settings.fetch(:server_flags)
|
|
12
|
+
@server_flags = options[:server_flags] || server_model.settings.fetch(:server_flags)
|
|
13
|
+
@ramalama_device = options[:ramalama_device]
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
def self.available?(runtime)
|
|
@@ -85,6 +86,7 @@ module EmbeddingUtil
|
|
|
85
86
|
[
|
|
86
87
|
"ramalama", "--runtime=llama.cpp", "serve",
|
|
87
88
|
"--name", server_name,
|
|
89
|
+
*ramalama_device_args,
|
|
88
90
|
"--host", host,
|
|
89
91
|
"--port", port.to_s,
|
|
90
92
|
"--runtime-args=#{server_flags.join(' ')}",
|
|
@@ -106,5 +108,9 @@ module EmbeddingUtil
|
|
|
106
108
|
def huggingface_model
|
|
107
109
|
"hf://#{server_model.settings.fetch(:repo)}/#{server_model.settings.fetch(:file)}"
|
|
108
110
|
end
|
|
111
|
+
|
|
112
|
+
def ramalama_device_args
|
|
113
|
+
ramalama_device.to_s.empty? ? [] : ["--device", ramalama_device.to_s]
|
|
114
|
+
end
|
|
109
115
|
end
|
|
110
116
|
end
|
|
@@ -11,6 +11,8 @@ require "uri"
|
|
|
11
11
|
|
|
12
12
|
module EmbeddingUtil
|
|
13
13
|
class ServerManager
|
|
14
|
+
STOP_TIMEOUT = 30
|
|
15
|
+
|
|
14
16
|
attr_reader :config
|
|
15
17
|
|
|
16
18
|
def initialize(config: EmbeddingUtil.configuration)
|
|
@@ -47,10 +49,10 @@ module EmbeddingUtil
|
|
|
47
49
|
previous_traps = install_interrupt_traps
|
|
48
50
|
Open3.popen2e(*command.argv) do |_stdin, output, wait_thread|
|
|
49
51
|
url = "http://#{host}:#{selected_port}"
|
|
50
|
-
write_state(server_model, pid: wait_thread
|
|
52
|
+
write_state(server_model, pid: state_pid(command, wait_thread), url: url, runtime: command.label, port: selected_port)
|
|
51
53
|
last_output_at_mutex = Mutex.new
|
|
52
54
|
reader = stream_output(output) { last_output_at_mutex.synchronize { last_output_at = Time.now } }
|
|
53
|
-
wait_for_runtime_serving(command, server_model, url, wait_thread
|
|
55
|
+
wait_for_runtime_serving(command, server_model, url, wait_thread)
|
|
54
56
|
supervise_runtime(command, wait_thread, shutdown_idle) { last_output_at_mutex.synchronize { last_output_at } }
|
|
55
57
|
ensure
|
|
56
58
|
cleanup_runtime(command, wait_thread)
|
|
@@ -65,13 +67,22 @@ module EmbeddingUtil
|
|
|
65
67
|
server_model = ServerModel.for(capability, profile)
|
|
66
68
|
|
|
67
69
|
with_lock(server_model) do
|
|
68
|
-
stop_server(server_model)
|
|
70
|
+
stopped_url = stop_server(server_model)
|
|
71
|
+
wait_for_stopped(server_model, stopped_url)
|
|
69
72
|
start_background(server_model)
|
|
70
73
|
end
|
|
71
74
|
|
|
72
75
|
wait_for_healthy(server_model, log_path: server_log_path(server_model))
|
|
73
76
|
end
|
|
74
77
|
|
|
78
|
+
def track_activity(capability, profile: config.resolved_profile)
|
|
79
|
+
server_model = ServerModel.for(capability, profile)
|
|
80
|
+
update_activity(server_model, 1)
|
|
81
|
+
yield
|
|
82
|
+
ensure
|
|
83
|
+
update_activity(server_model, -1) if server_model
|
|
84
|
+
end
|
|
85
|
+
|
|
75
86
|
private
|
|
76
87
|
|
|
77
88
|
def start_background(server_model)
|
|
@@ -88,6 +99,7 @@ module EmbeddingUtil
|
|
|
88
99
|
argv.push("--shutdown-idle", config.shutdown_idle.to_s) unless config.shutdown_idle.nil?
|
|
89
100
|
argv.push("--reranker-ubatch-size", config.reranker_ubatch_size.to_s)
|
|
90
101
|
argv.push("--reranker-max-ubatch-size", config.reranker_max_ubatch_size.to_s)
|
|
102
|
+
argv.push("--ramalama-device", config.ramalama_device.to_s) unless config.ramalama_device.to_s.empty?
|
|
91
103
|
warn "starting #{server_model.name} in background: #{argv.join(' ')}" if config.verbose
|
|
92
104
|
warn "#{server_model.name} log: #{log_path}" if config.verbose
|
|
93
105
|
pid = Process.spawn(*argv, out: [log_path, "a"], err: %i[child out], pgroup: true)
|
|
@@ -119,7 +131,8 @@ module EmbeddingUtil
|
|
|
119
131
|
server_model: server_model,
|
|
120
132
|
host: host,
|
|
121
133
|
port: port,
|
|
122
|
-
server_flags: server_flags(server_model)
|
|
134
|
+
server_flags: server_flags(server_model),
|
|
135
|
+
ramalama_device: config.ramalama_device
|
|
123
136
|
)
|
|
124
137
|
end
|
|
125
138
|
|
|
@@ -210,12 +223,16 @@ module EmbeddingUtil
|
|
|
210
223
|
end
|
|
211
224
|
end
|
|
212
225
|
|
|
213
|
-
def wait_for_runtime_serving(command, server_model, url,
|
|
226
|
+
def wait_for_runtime_serving(command, server_model, url, wait_thread)
|
|
214
227
|
warn "waiting for #{server_model.name} at #{url}" if config.verbose
|
|
215
|
-
wait_for_serving(server_model, url, pid, check_process: !command.detached_server?)
|
|
228
|
+
wait_for_serving(server_model, url, wait_thread.pid, wait_thread: wait_thread, check_process: !command.detached_server?)
|
|
216
229
|
warn "#{server_model.name} is healthy" if config.verbose
|
|
217
230
|
end
|
|
218
231
|
|
|
232
|
+
def state_pid(command, wait_thread)
|
|
233
|
+
command.detached_server? ? Process.pid : wait_thread.pid
|
|
234
|
+
end
|
|
235
|
+
|
|
219
236
|
def supervise_runtime(command, wait_thread, shutdown_idle, &last_output_at)
|
|
220
237
|
warn "supervising #{command.server_name}" if config.verbose && command.detached_server?
|
|
221
238
|
return supervise_detached_server(command, shutdown_idle, &last_output_at) if command.detached_server?
|
|
@@ -226,10 +243,11 @@ module EmbeddingUtil
|
|
|
226
243
|
watchdog&.kill
|
|
227
244
|
end
|
|
228
245
|
|
|
229
|
-
def wait_for_serving(server_model, url, pid, check_process: true)
|
|
246
|
+
def wait_for_serving(server_model, url, pid, wait_thread: nil, check_process: true)
|
|
230
247
|
deadline = Time.now + config.startup_timeout
|
|
231
248
|
loop do
|
|
232
249
|
return if healthy_url?(url)
|
|
250
|
+
raise UnsupportedProviderError, "#{server_model.name} runtime launcher exited before server became healthy" if launcher_failed?(wait_thread)
|
|
233
251
|
raise UnsupportedProviderError, "#{server_model.name} server process exited before becoming healthy" if check_process && !process_running?(pid)
|
|
234
252
|
raise UnsupportedProviderError, "timed out after #{config.startup_timeout}s waiting for #{server_model.name} to become healthy" if Time.now >= deadline
|
|
235
253
|
|
|
@@ -237,9 +255,15 @@ module EmbeddingUtil
|
|
|
237
255
|
end
|
|
238
256
|
end
|
|
239
257
|
|
|
258
|
+
def launcher_failed?(wait_thread)
|
|
259
|
+
return false unless wait_thread && !wait_thread.alive?
|
|
260
|
+
|
|
261
|
+
!wait_thread.value.success?
|
|
262
|
+
end
|
|
263
|
+
|
|
240
264
|
def supervise_detached_server(command, shutdown_idle)
|
|
241
265
|
loop do
|
|
242
|
-
if idle_expired?(shutdown_idle, yield)
|
|
266
|
+
if idle_expired?(shutdown_idle, command.server_model, yield)
|
|
243
267
|
warn "stopping #{command.server_name} after #{shutdown_idle}s idle" if config.verbose
|
|
244
268
|
stop_detached_server(command)
|
|
245
269
|
return 0
|
|
@@ -252,8 +276,29 @@ module EmbeddingUtil
|
|
|
252
276
|
130
|
|
253
277
|
end
|
|
254
278
|
|
|
255
|
-
def idle_expired?(shutdown_idle, last_output_at)
|
|
256
|
-
shutdown_idle&.positive?
|
|
279
|
+
def idle_expired?(shutdown_idle, server_model, last_output_at)
|
|
280
|
+
return false unless shutdown_idle&.positive?
|
|
281
|
+
|
|
282
|
+
activity = activity_state(server_model, last_output_at)
|
|
283
|
+
activity.fetch(:active_requests).zero? && Time.now - activity.fetch(:last_activity_at) >= shutdown_idle
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def activity_state(server_model, fallback_time)
|
|
287
|
+
state = read_state(server_model)
|
|
288
|
+
last_activity_at = parse_state_time(state&.fetch("last_activity_at", nil)) || fallback_time
|
|
289
|
+
last_output_at = [fallback_time, last_activity_at].max
|
|
290
|
+
{
|
|
291
|
+
active_requests: Integer(state&.fetch("active_requests", 0) || 0),
|
|
292
|
+
last_activity_at: last_output_at
|
|
293
|
+
}
|
|
294
|
+
rescue ArgumentError
|
|
295
|
+
{ active_requests: 0, last_activity_at: fallback_time }
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def parse_state_time(value)
|
|
299
|
+
Time.iso8601(value) if value
|
|
300
|
+
rescue ArgumentError
|
|
301
|
+
nil
|
|
257
302
|
end
|
|
258
303
|
|
|
259
304
|
def stop_detached_server(command)
|
|
@@ -268,13 +313,28 @@ module EmbeddingUtil
|
|
|
268
313
|
|
|
269
314
|
runtime = state.fetch("runtime", config.runtime)
|
|
270
315
|
port = state.fetch("port", server_model.default_port(config))
|
|
316
|
+
url = state["url"]
|
|
271
317
|
command = runtime_command(runtime, server_model, config.host, port)
|
|
272
318
|
if command.detached_server?
|
|
273
319
|
stop_detached_server(command)
|
|
274
320
|
else
|
|
275
321
|
terminate_runtime_process(command, state["pid"])
|
|
322
|
+
stop_detached_server(runtime_command(:ramalama, server_model, config.host, port))
|
|
276
323
|
end
|
|
277
324
|
delete_state(server_model)
|
|
325
|
+
url
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def wait_for_stopped(server_model, url)
|
|
329
|
+
return unless url
|
|
330
|
+
|
|
331
|
+
deadline = Time.now + STOP_TIMEOUT
|
|
332
|
+
loop do
|
|
333
|
+
return unless healthy_url?(url)
|
|
334
|
+
raise UnsupportedProviderError, "#{server_model.name} did not stop before restart" if Time.now >= deadline
|
|
335
|
+
|
|
336
|
+
sleep 0.25
|
|
337
|
+
end
|
|
278
338
|
end
|
|
279
339
|
|
|
280
340
|
def cleanup_runtime(command, wait_thread)
|
|
@@ -382,15 +442,32 @@ module EmbeddingUtil
|
|
|
382
442
|
end
|
|
383
443
|
|
|
384
444
|
def write_state(server_model, pid:, url:, runtime:, port:)
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
445
|
+
state = {
|
|
446
|
+
pid: pid,
|
|
447
|
+
url: url,
|
|
448
|
+
profile: server_model.profile.name,
|
|
449
|
+
capability: server_model.capability,
|
|
450
|
+
runtime: runtime,
|
|
451
|
+
port: port,
|
|
452
|
+
active_requests: 0,
|
|
453
|
+
last_activity_at: Time.now.utc.iso8601,
|
|
454
|
+
updated_at: Time.now.utc.iso8601
|
|
455
|
+
}
|
|
456
|
+
File.write(state_path(server_model), JSON.pretty_generate(state))
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def update_activity(server_model, delta)
|
|
460
|
+
with_lock(server_model) do
|
|
461
|
+
state = read_state(server_model)
|
|
462
|
+
next unless state
|
|
463
|
+
|
|
464
|
+
state["active_requests"] = [Integer(state.fetch("active_requests", 0)) + delta, 0].max
|
|
465
|
+
state["last_activity_at"] = Time.now.utc.iso8601
|
|
466
|
+
state["updated_at"] = Time.now.utc.iso8601
|
|
467
|
+
File.write(state_path(server_model), JSON.pretty_generate(state))
|
|
468
|
+
end
|
|
469
|
+
rescue ArgumentError
|
|
470
|
+
nil
|
|
394
471
|
end
|
|
395
472
|
|
|
396
473
|
def read_state(server_model)
|