embedding_util 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/embedding_util/cli.rb +4 -0
- data/lib/embedding_util/runtime_command.rb +25 -0
- data/lib/embedding_util/server_manager.rb +102 -10
- data/lib/embedding_util/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f388bd90069b46caca18046622511f7fe2bd5c25f2a56a8d02fc01cc673bc682
|
|
4
|
+
data.tar.gz: 75b484781ae6689e6bf471007257c1e63f4e8aa2485b558e0d41b2f03643749f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4bd65f54bc228373056843d55aa866d510660361e294bff33b18ec8440f02ae807f4bf5773e6250eb2a5152e53d3656dadef0f624b50f0c1ec32fab689f78367
|
|
7
|
+
data.tar.gz: 9f51238b20d2aabfda68aace11f276e79cbdeb291468bd888933330869a5717c52f33db2ceb7a46b870a8f138a1b9fcbdbc94e4e5108f529ba7d4aa5ab299e6a
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## [0.1.1] - 2026-06-08
|
|
2
|
+
|
|
3
|
+
- Fix local server lifecycle cleanup for Ramalama and direct `llama-server`
|
|
4
|
+
- Stop named Ramalama servers after idle shutdown and fall back to Podman/Docker cleanup when needed
|
|
5
|
+
- Ensure direct `llama-server` child processes are terminated on idle shutdown and interruption
|
|
6
|
+
|
|
1
7
|
## [0.1.0] - 2026-06-08
|
|
2
8
|
|
|
3
9
|
- Initial release
|
data/lib/embedding_util/cli.rb
CHANGED
|
@@ -54,11 +54,36 @@ module EmbeddingUtil
|
|
|
54
54
|
runtime == :llama_server ? "llama-server" : runtime.to_s
|
|
55
55
|
end
|
|
56
56
|
|
|
57
|
+
def detached_server?
|
|
58
|
+
runtime == :ramalama
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def stop_argv
|
|
62
|
+
return unless detached_server?
|
|
63
|
+
|
|
64
|
+
stop_argvs.first
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def stop_argvs
|
|
68
|
+
return [] unless detached_server?
|
|
69
|
+
|
|
70
|
+
[
|
|
71
|
+
["ramalama", "stop", server_name],
|
|
72
|
+
["podman", "stop", "--time", "0", server_name],
|
|
73
|
+
["docker", "stop", server_name]
|
|
74
|
+
].select { |argv| self.class.command_path(argv.first) }
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def server_name
|
|
78
|
+
"embedding-util-#{server_model.name}".tr("_", "-")
|
|
79
|
+
end
|
|
80
|
+
|
|
57
81
|
private
|
|
58
82
|
|
|
59
83
|
def ramalama_argv
|
|
60
84
|
[
|
|
61
85
|
"ramalama", "--runtime=llama.cpp", "serve",
|
|
86
|
+
"--name", server_name,
|
|
62
87
|
"--host", host,
|
|
63
88
|
"--port", port.to_s,
|
|
64
89
|
"--runtime-args=#{server_model.settings.fetch(:server_flags).join(' ')}",
|
|
@@ -44,18 +44,20 @@ module EmbeddingUtil
|
|
|
44
44
|
puts "starting #{server_model.name} with #{command.label} on http://#{host}:#{selected_port}"
|
|
45
45
|
puts "shutdown idle: #{shutdown_idle}s" if shutdown_idle&.positive?
|
|
46
46
|
|
|
47
|
+
previous_traps = install_interrupt_traps
|
|
47
48
|
Open3.popen2e(*command.argv) do |_stdin, output, wait_thread|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
output.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
49
|
+
url = "http://#{host}:#{selected_port}"
|
|
50
|
+
write_state(server_model, pid: wait_thread.pid, url: url, runtime: command.label, port: selected_port)
|
|
51
|
+
last_output_at_mutex = Mutex.new
|
|
52
|
+
reader = stream_output(output) { last_output_at_mutex.synchronize { last_output_at = Time.now } }
|
|
53
|
+
wait_for_runtime_serving(command, server_model, url, wait_thread.pid)
|
|
54
|
+
supervise_runtime(command, wait_thread, shutdown_idle) { last_output_at_mutex.synchronize { last_output_at } }
|
|
55
|
+
ensure
|
|
56
|
+
cleanup_runtime(command, wait_thread)
|
|
57
|
+
reader&.kill
|
|
58
|
+
reader&.join
|
|
57
59
|
delete_state(server_model)
|
|
58
|
-
|
|
60
|
+
restore_interrupt_traps(previous_traps)
|
|
59
61
|
end
|
|
60
62
|
end
|
|
61
63
|
|
|
@@ -150,6 +152,96 @@ module EmbeddingUtil
|
|
|
150
152
|
end
|
|
151
153
|
end
|
|
152
154
|
|
|
155
|
+
def stream_output(output)
|
|
156
|
+
Thread.new do
|
|
157
|
+
output.each_line do |line|
|
|
158
|
+
yield
|
|
159
|
+
print line
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def wait_for_runtime_serving(command, server_model, url, pid)
|
|
165
|
+
warn "waiting for #{server_model.name} at #{url}" if config.verbose
|
|
166
|
+
wait_for_serving(server_model, url, pid, check_process: !command.detached_server?)
|
|
167
|
+
warn "#{server_model.name} is healthy" if config.verbose
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def supervise_runtime(command, wait_thread, shutdown_idle, &last_output_at)
|
|
171
|
+
warn "supervising #{command.server_name}" if config.verbose && command.detached_server?
|
|
172
|
+
return supervise_detached_server(command, shutdown_idle, &last_output_at) if command.detached_server?
|
|
173
|
+
|
|
174
|
+
watchdog = start_watchdog(wait_thread.pid, shutdown_idle, &last_output_at)
|
|
175
|
+
wait_thread.value.exitstatus
|
|
176
|
+
ensure
|
|
177
|
+
watchdog&.kill
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def wait_for_serving(server_model, url, pid, check_process: true)
|
|
181
|
+
deadline = Time.now + config.startup_timeout
|
|
182
|
+
loop do
|
|
183
|
+
return if healthy_url?(url)
|
|
184
|
+
raise UnsupportedProviderError, "#{server_model.name} server process exited before becoming healthy" if check_process && !process_running?(pid)
|
|
185
|
+
raise UnsupportedProviderError, "timed out after #{config.startup_timeout}s waiting for #{server_model.name} to become healthy" if Time.now >= deadline
|
|
186
|
+
|
|
187
|
+
sleep 0.25
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def supervise_detached_server(command, shutdown_idle)
|
|
192
|
+
loop do
|
|
193
|
+
if idle_expired?(shutdown_idle, yield)
|
|
194
|
+
warn "stopping #{command.server_name} after #{shutdown_idle}s idle" if config.verbose
|
|
195
|
+
stop_detached_server(command)
|
|
196
|
+
return 0
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
sleep [shutdown_idle.to_f / 5.0, 1].max
|
|
200
|
+
end
|
|
201
|
+
rescue Interrupt
|
|
202
|
+
stop_detached_server(command)
|
|
203
|
+
130
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def idle_expired?(shutdown_idle, last_output_at)
|
|
207
|
+
shutdown_idle&.positive? && Time.now - last_output_at >= shutdown_idle
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def stop_detached_server(command)
|
|
211
|
+
command.stop_argvs.any? do |stop_argv|
|
|
212
|
+
system(*stop_argv, out: File::NULL, err: File::NULL)
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def cleanup_runtime(command, wait_thread)
|
|
217
|
+
return unless command
|
|
218
|
+
|
|
219
|
+
if command.detached_server?
|
|
220
|
+
stop_detached_server(command)
|
|
221
|
+
else
|
|
222
|
+
terminate_runtime_process(command, wait_thread&.pid)
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def terminate_runtime_process(command, pid)
|
|
227
|
+
return if command.detached_server? || !pid || pid == Process.pid || !process_running?(pid)
|
|
228
|
+
|
|
229
|
+
terminate_idle_process(pid)
|
|
230
|
+
rescue Errno::ESRCH
|
|
231
|
+
nil
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def install_interrupt_traps
|
|
235
|
+
%w[INT TERM].to_h do |signal|
|
|
236
|
+
previous = Signal.trap(signal) { Thread.main.raise Interrupt }
|
|
237
|
+
[signal, previous]
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def restore_interrupt_traps(previous_traps)
|
|
242
|
+
previous_traps&.each { |signal, handler| Signal.trap(signal, handler) }
|
|
243
|
+
end
|
|
244
|
+
|
|
153
245
|
def terminate_idle_process(pid)
|
|
154
246
|
Process.kill("TERM", pid)
|
|
155
247
|
sleep 5
|