embedding_util 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41974235f403d3694132565fa69ecf07ca338789bd197c34e2563eb7bd38ad06
4
- data.tar.gz: c05864e6cda0f5881d44d676ab8b8cb7dc9d7ceee70aff8394a3ee79f8166a69
3
+ metadata.gz: f388bd90069b46caca18046622511f7fe2bd5c25f2a56a8d02fc01cc673bc682
4
+ data.tar.gz: 75b484781ae6689e6bf471007257c1e63f4e8aa2485b558e0d41b2f03643749f
5
5
  SHA512:
6
- metadata.gz: b3500877638960c73de3ebf04ef07441392b86f22108014e4f27e872b95d9ff52cf1850e423443c51bab16ea4c5149a851f619faa53d7f44480f99ae7ebdd759
7
- data.tar.gz: e55f3761208466504a9865115d23e0f1ab68bfb146a08ddfcb1c2ae7df4b37a859a80d99aa52139bf6b005fa6ddf4624a96046d58c80f923d183316234042232
6
+ metadata.gz: 4bd65f54bc228373056843d55aa866d510660361e294bff33b18ec8440f02ae807f4bf5773e6250eb2a5152e53d3656dadef0f624b50f0c1ec32fab689f78367
7
+ data.tar.gz: 9f51238b20d2aabfda68aace11f276e79cbdeb291468bd888933330869a5717c52f33db2ceb7a46b870a8f138a1b9fcbdbc94e4e5108f529ba7d4aa5ab299e6a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [0.1.1] - 2026-06-08
2
+
3
+ - Fix local server lifecycle cleanup for Ramalama and direct `llama-server`
4
+ - Stop named Ramalama servers after idle shutdown and fall back to Podman/Docker cleanup when needed
5
+ - Ensure direct `llama-server` child processes are terminated on idle shutdown and interruption
6
+
1
7
  ## [0.1.0] - 2026-06-08
2
8
 
3
9
  - Initial release
@@ -87,6 +87,10 @@ module EmbeddingUtil
87
87
  host: options[:host],
88
88
  port: options[:port]&.to_i
89
89
  )
90
+ rescue Error => e
91
+ abort e.message
92
+ rescue Interrupt
93
+ exit 130
90
94
  end
91
95
 
92
96
  no_commands do
@@ -54,11 +54,36 @@ module EmbeddingUtil
54
54
  runtime == :llama_server ? "llama-server" : runtime.to_s
55
55
  end
56
56
 
57
+ def detached_server?
58
+ runtime == :ramalama
59
+ end
60
+
61
+ def stop_argv
62
+ return unless detached_server?
63
+
64
+ stop_argvs.first
65
+ end
66
+
67
+ def stop_argvs
68
+ return [] unless detached_server?
69
+
70
+ [
71
+ ["ramalama", "stop", server_name],
72
+ ["podman", "stop", "--time", "0", server_name],
73
+ ["docker", "stop", server_name]
74
+ ].select { |argv| self.class.command_path(argv.first) }
75
+ end
76
+
77
+ def server_name
78
+ "embedding-util-#{server_model.name}".tr("_", "-")
79
+ end
80
+
57
81
  private
58
82
 
59
83
  def ramalama_argv
60
84
  [
61
85
  "ramalama", "--runtime=llama.cpp", "serve",
86
+ "--name", server_name,
62
87
  "--host", host,
63
88
  "--port", port.to_s,
64
89
  "--runtime-args=#{server_model.settings.fetch(:server_flags).join(' ')}",
@@ -44,18 +44,20 @@ module EmbeddingUtil
44
44
  puts "starting #{server_model.name} with #{command.label} on http://#{host}:#{selected_port}"
45
45
  puts "shutdown idle: #{shutdown_idle}s" if shutdown_idle&.positive?
46
46
 
47
+ previous_traps = install_interrupt_traps
47
48
  Open3.popen2e(*command.argv) do |_stdin, output, wait_thread|
48
- write_state(server_model, pid: wait_thread.pid, url: "http://#{host}:#{selected_port}", runtime: command.label, port: selected_port)
49
- watchdog = start_watchdog(wait_thread.pid, shutdown_idle) { last_output_at }
50
-
51
- output.each_line do |line|
52
- last_output_at = Time.now
53
- print line
54
- end
55
-
56
- watchdog&.kill
49
+ url = "http://#{host}:#{selected_port}"
50
+ write_state(server_model, pid: wait_thread.pid, url: url, runtime: command.label, port: selected_port)
51
+ last_output_at_mutex = Mutex.new
52
+ reader = stream_output(output) { last_output_at_mutex.synchronize { last_output_at = Time.now } }
53
+ wait_for_runtime_serving(command, server_model, url, wait_thread.pid)
54
+ supervise_runtime(command, wait_thread, shutdown_idle) { last_output_at_mutex.synchronize { last_output_at } }
55
+ ensure
56
+ cleanup_runtime(command, wait_thread)
57
+ reader&.kill
58
+ reader&.join
57
59
  delete_state(server_model)
58
- wait_thread.value.exitstatus
60
+ restore_interrupt_traps(previous_traps)
59
61
  end
60
62
  end
61
63
 
@@ -150,6 +152,96 @@ module EmbeddingUtil
150
152
  end
151
153
  end
152
154
 
155
+ def stream_output(output)
156
+ Thread.new do
157
+ output.each_line do |line|
158
+ yield
159
+ print line
160
+ end
161
+ end
162
+ end
163
+
164
+ def wait_for_runtime_serving(command, server_model, url, pid)
165
+ warn "waiting for #{server_model.name} at #{url}" if config.verbose
166
+ wait_for_serving(server_model, url, pid, check_process: !command.detached_server?)
167
+ warn "#{server_model.name} is healthy" if config.verbose
168
+ end
169
+
170
+ def supervise_runtime(command, wait_thread, shutdown_idle, &last_output_at)
171
+ warn "supervising #{command.server_name}" if config.verbose && command.detached_server?
172
+ return supervise_detached_server(command, shutdown_idle, &last_output_at) if command.detached_server?
173
+
174
+ watchdog = start_watchdog(wait_thread.pid, shutdown_idle, &last_output_at)
175
+ wait_thread.value.exitstatus
176
+ ensure
177
+ watchdog&.kill
178
+ end
179
+
180
+ def wait_for_serving(server_model, url, pid, check_process: true)
181
+ deadline = Time.now + config.startup_timeout
182
+ loop do
183
+ return if healthy_url?(url)
184
+ raise UnsupportedProviderError, "#{server_model.name} server process exited before becoming healthy" if check_process && !process_running?(pid)
185
+ raise UnsupportedProviderError, "timed out after #{config.startup_timeout}s waiting for #{server_model.name} to become healthy" if Time.now >= deadline
186
+
187
+ sleep 0.25
188
+ end
189
+ end
190
+
191
+ def supervise_detached_server(command, shutdown_idle)
192
+ loop do
193
+ if idle_expired?(shutdown_idle, yield)
194
+ warn "stopping #{command.server_name} after #{shutdown_idle}s idle" if config.verbose
195
+ stop_detached_server(command)
196
+ return 0
197
+ end
198
+
199
+ sleep [shutdown_idle.to_f / 5.0, 1].max
200
+ end
201
+ rescue Interrupt
202
+ stop_detached_server(command)
203
+ 130
204
+ end
205
+
206
+ def idle_expired?(shutdown_idle, last_output_at)
207
+ shutdown_idle&.positive? && Time.now - last_output_at >= shutdown_idle
208
+ end
209
+
210
+ def stop_detached_server(command)
211
+ command.stop_argvs.any? do |stop_argv|
212
+ system(*stop_argv, out: File::NULL, err: File::NULL)
213
+ end
214
+ end
215
+
216
+ def cleanup_runtime(command, wait_thread)
217
+ return unless command
218
+
219
+ if command.detached_server?
220
+ stop_detached_server(command)
221
+ else
222
+ terminate_runtime_process(command, wait_thread&.pid)
223
+ end
224
+ end
225
+
226
+ def terminate_runtime_process(command, pid)
227
+ return if command.detached_server? || !pid || pid == Process.pid || !process_running?(pid)
228
+
229
+ terminate_idle_process(pid)
230
+ rescue Errno::ESRCH
231
+ nil
232
+ end
233
+
234
+ def install_interrupt_traps
235
+ %w[INT TERM].to_h do |signal|
236
+ previous = Signal.trap(signal) { Thread.main.raise Interrupt }
237
+ [signal, previous]
238
+ end
239
+ end
240
+
241
+ def restore_interrupt_traps(previous_traps)
242
+ previous_traps&.each { |signal, handler| Signal.trap(signal, handler) }
243
+ end
244
+
153
245
  def terminate_idle_process(pid)
154
246
  Process.kill("TERM", pid)
155
247
  sleep 5
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EmbeddingUtil
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embedding_util
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - hmdne