legion-llm 0.5.13 → 0.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -0
- data/CHANGELOG.md +7 -0
- data/lib/legion/llm/daemon_client.rb +56 -3
- data/lib/legion/llm/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ceb7fa0519b3985439d577579889de688b42a9e8d3c5fdbc6a1be5b22c7fb2ba
|
|
4
|
+
data.tar.gz: 80fd2cc36a19cf49783f9429e4aca545e26c3b8ca130fa649800955ce52001d2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 82f00569a04406cc64983f447e3fbd5fbb4c9765f9caa59543a7db0dc612ca659368dae3441f4fa77ae27aaf74347dac997cccb842dc3a2e3e99184ac52f591e
|
|
7
|
+
data.tar.gz: 40d6ec150d5832f0a4d0ac8f9f9e25754c58e6d4d5a528eb13c2c1420a62a8e6e579c3dda417fdd4dcff8f9d6ba888d7613baba266596c442e787f041a1a50a0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Legion LLM Changelog
|
|
2
2
|
|
|
3
|
+
## [0.5.14] - 2026-03-27
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
- `DaemonClient.inference` method for conversation-level routing — accepts a full `messages:` array and optional `tools:`, `model:`, `provider:`, and `timeout:` keyword args, posts to `POST /api/llm/inference`, and returns a structured `{ status: :ok, data: { content:, tool_calls:, stop_reason:, model:, input_tokens:, output_tokens: } }` hash on success
|
|
7
|
+
- `http_post` now accepts an optional `timeout:` keyword argument (default `DEFAULT_TIMEOUT = 60`) so callers like `inference` can pass a longer timeout (120s) without affecting existing `chat` calls
|
|
8
|
+
- `interpret_inference_response` private helper that maps the `/api/llm/inference` HTTP response — 200 returns `:ok` with structured fields, 4xx/5xx follow the same error handling as `interpret_response`
|
|
9
|
+
|
|
3
10
|
## [0.5.13] - 2026-03-27
|
|
4
11
|
|
|
5
12
|
### Changed
|
|
@@ -102,13 +102,29 @@ module Legion
|
|
|
102
102
|
http.request(request)
|
|
103
103
|
end
|
|
104
104
|
|
|
105
|
+
# POSTs a conversation-level inference request to the daemon REST API.
|
|
106
|
+
# Accepts a full messages array and optional tool schemas.
|
|
107
|
+
# Returns a status hash with structured inference fields on success.
|
|
108
|
+
def inference(messages:, tools: [], model: nil, provider: nil, timeout: 120)
|
|
109
|
+
body = { messages: messages, tools: tools }
|
|
110
|
+
body[:model] = model if model
|
|
111
|
+
body[:provider] = provider if provider
|
|
112
|
+
|
|
113
|
+
response = http_post('/api/llm/inference', body, timeout: timeout)
|
|
114
|
+
interpret_inference_response(response)
|
|
115
|
+
rescue StandardError => e
|
|
116
|
+
mark_unhealthy
|
|
117
|
+
{ status: :unavailable, error: e.message }
|
|
118
|
+
end
|
|
119
|
+
|
|
105
120
|
# Builds and sends a POST request with a JSON body.
|
|
106
121
|
# Returns Net::HTTPResponse.
|
|
107
|
-
|
|
122
|
+
# The optional timeout: keyword overrides the default read timeout.
|
|
123
|
+
def http_post(path, body, timeout: DEFAULT_TIMEOUT)
|
|
108
124
|
uri = URI.parse("#{daemon_url}#{path}")
|
|
109
125
|
http = Net::HTTP.new(uri.host, uri.port)
|
|
110
126
|
http.open_timeout = 5
|
|
111
|
-
http.read_timeout =
|
|
127
|
+
http.read_timeout = timeout
|
|
112
128
|
request = Net::HTTP::Post.new(uri.request_uri)
|
|
113
129
|
request['Content-Type'] = 'application/json'
|
|
114
130
|
request.body = ::JSON.dump(body)
|
|
@@ -180,7 +196,44 @@ module Legion
|
|
|
180
196
|
0
|
|
181
197
|
end
|
|
182
198
|
|
|
183
|
-
|
|
199
|
+
# Maps an HTTP response from /api/llm/inference to a structured status hash.
|
|
200
|
+
# On 200 returns :ok with structured inference fields extracted from the body.
|
|
201
|
+
# All other codes follow the same error handling as interpret_response.
|
|
202
|
+
def interpret_inference_response(response)
|
|
203
|
+
code = response.code.to_i
|
|
204
|
+
parsed = safe_parse(response.body)
|
|
205
|
+
|
|
206
|
+
if code == 200
|
|
207
|
+
data = parsed.fetch(:data, parsed)
|
|
208
|
+
return {
|
|
209
|
+
status: :ok,
|
|
210
|
+
data: {
|
|
211
|
+
content: data[:content],
|
|
212
|
+
tool_calls: data[:tool_calls] || [],
|
|
213
|
+
stop_reason: data[:stop_reason],
|
|
214
|
+
model: data[:model],
|
|
215
|
+
input_tokens: data[:input_tokens],
|
|
216
|
+
output_tokens: data[:output_tokens]
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
case code
|
|
222
|
+
when 403
|
|
223
|
+
Legion::Logging.warn("Daemon returned 403 Denied url=#{daemon_url}") if defined?(Legion::Logging)
|
|
224
|
+
{ status: :denied, error: parsed.fetch(:error, parsed) }
|
|
225
|
+
when 429
|
|
226
|
+
retry_after = extract_retry_after(response, parsed)
|
|
227
|
+
Legion::Logging.warn("Daemon returned 429 RateLimited url=#{daemon_url} retry_after=#{retry_after}") if defined?(Legion::Logging)
|
|
228
|
+
{ status: :rate_limited, retry_after: retry_after }
|
|
229
|
+
when 503
|
|
230
|
+
{ status: :unavailable }
|
|
231
|
+
else
|
|
232
|
+
{ status: :error, code: code, body: parsed }
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
private_class_method :fetch_daemon_url, :safe_parse, :extract_retry_after, :interpret_inference_response
|
|
184
237
|
end
|
|
185
238
|
end
|
|
186
239
|
end
|
data/lib/legion/llm/version.rb
CHANGED