legion-llm 0.5.13 → 0.5.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa7ba49d17c9d418f172c576bc3d1b2d5174757c489928cecf52a403c43c4544
4
- data.tar.gz: a1339abda9a424179721b7b7af22e5ec972c5847976876a967547b5080f48c29
3
+ metadata.gz: ceb7fa0519b3985439d577579889de688b42a9e8d3c5fdbc6a1be5b22c7fb2ba
4
+ data.tar.gz: 80fd2cc36a19cf49783f9429e4aca545e26c3b8ca130fa649800955ce52001d2
5
5
  SHA512:
6
- metadata.gz: 3283898e616549b10b78d722210c538e9f30135a5b20eed49fea1f326c300ca9be4cfbb269ab698080eaee9f8f3d0ccee2bf7350474b195b59130cc55ec5cbe6
7
- data.tar.gz: 76c4137add776f2c40634daf05baaf459d99a865147b6021e24f22c442564cca569ac6ff63d6bc3b7c7bb43aafa01690f02a2ec948e8bbf840ca7a43bc89c34f
6
+ metadata.gz: 82f00569a04406cc64983f447e3fbd5fbb4c9765f9caa59543a7db0dc612ca659368dae3441f4fa77ae27aaf74347dac997cccb842dc3a2e3e99184ac52f591e
7
+ data.tar.gz: 40d6ec150d5832f0a4d0ac8f9f9e25754c58e6d4d5a528eb13c2c1420a62a8e6e579c3dda417fdd4dcff8f9d6ba888d7613baba266596c442e787f041a1a50a0
data/.rubocop.yml CHANGED
@@ -54,3 +54,7 @@ Naming/PredicateMethod:
54
54
 
55
55
  Metrics/ParameterLists:
56
56
  Max: 9
57
+
58
+ Style/RedundantConstantBase:
59
+ Exclude:
60
+ - 'spec/**/*'
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Legion LLM Changelog
2
2
 
3
+ ## [0.5.14] - 2026-03-27
4
+
5
+ ### Added
6
+ - `DaemonClient.inference` method for conversation-level routing — accepts a full `messages:` array and optional `tools:`, `model:`, `provider:`, and `timeout:` keyword args, posts to `POST /api/llm/inference`, and returns a structured `{ status: :ok, data: { content:, tool_calls:, stop_reason:, model:, input_tokens:, output_tokens: } }` hash on success
7
+ - `http_post` now accepts an optional `timeout:` keyword argument (default `DEFAULT_TIMEOUT = 60`) so callers like `inference` can pass a longer timeout (120s) without affecting existing `chat` calls
8
+ - `interpret_inference_response` private helper that maps the `/api/llm/inference` HTTP response — 200 returns `:ok` with structured fields, 4xx/5xx follow the same error handling as `interpret_response`
9
+
3
10
  ## [0.5.13] - 2026-03-27
4
11
 
5
12
  ### Changed
@@ -102,13 +102,29 @@ module Legion
102
102
  http.request(request)
103
103
  end
104
104
 
105
+ # POSTs a conversation-level inference request to the daemon REST API.
106
+ # Accepts a full messages array and optional tool schemas.
107
+ # Returns a status hash with structured inference fields on success.
108
+ def inference(messages:, tools: [], model: nil, provider: nil, timeout: 120)
109
+ body = { messages: messages, tools: tools }
110
+ body[:model] = model if model
111
+ body[:provider] = provider if provider
112
+
113
+ response = http_post('/api/llm/inference', body, timeout: timeout)
114
+ interpret_inference_response(response)
115
+ rescue StandardError => e
116
+ mark_unhealthy
117
+ { status: :unavailable, error: e.message }
118
+ end
119
+
105
120
  # Builds and sends a POST request with a JSON body.
106
121
  # Returns Net::HTTPResponse.
107
- def http_post(path, body)
122
+ # The optional timeout: keyword overrides the default read timeout.
123
+ def http_post(path, body, timeout: DEFAULT_TIMEOUT)
108
124
  uri = URI.parse("#{daemon_url}#{path}")
109
125
  http = Net::HTTP.new(uri.host, uri.port)
110
126
  http.open_timeout = 5
111
- http.read_timeout = DEFAULT_TIMEOUT
127
+ http.read_timeout = timeout
112
128
  request = Net::HTTP::Post.new(uri.request_uri)
113
129
  request['Content-Type'] = 'application/json'
114
130
  request.body = ::JSON.dump(body)
@@ -180,7 +196,44 @@ module Legion
180
196
  0
181
197
  end
182
198
 
183
- private_class_method :fetch_daemon_url, :safe_parse, :extract_retry_after
199
+ # Maps an HTTP response from /api/llm/inference to a structured status hash.
200
+ # On 200 returns :ok with structured inference fields extracted from the body.
201
+ # All other codes follow the same error handling as interpret_response.
202
+ def interpret_inference_response(response)
203
+ code = response.code.to_i
204
+ parsed = safe_parse(response.body)
205
+
206
+ if code == 200
207
+ data = parsed.fetch(:data, parsed)
208
+ return {
209
+ status: :ok,
210
+ data: {
211
+ content: data[:content],
212
+ tool_calls: data[:tool_calls] || [],
213
+ stop_reason: data[:stop_reason],
214
+ model: data[:model],
215
+ input_tokens: data[:input_tokens],
216
+ output_tokens: data[:output_tokens]
217
+ }
218
+ }
219
+ end
220
+
221
+ case code
222
+ when 403
223
+ Legion::Logging.warn("Daemon returned 403 Denied url=#{daemon_url}") if defined?(Legion::Logging)
224
+ { status: :denied, error: parsed.fetch(:error, parsed) }
225
+ when 429
226
+ retry_after = extract_retry_after(response, parsed)
227
+ Legion::Logging.warn("Daemon returned 429 RateLimited url=#{daemon_url} retry_after=#{retry_after}") if defined?(Legion::Logging)
228
+ { status: :rate_limited, retry_after: retry_after }
229
+ when 503
230
+ { status: :unavailable }
231
+ else
232
+ { status: :error, code: code, body: parsed }
233
+ end
234
+ end
235
+
236
+ private_class_method :fetch_daemon_url, :safe_parse, :extract_retry_after, :interpret_inference_response
184
237
  end
185
238
  end
186
239
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Legion
4
4
  module LLM
5
- VERSION = '0.5.13'
5
+ VERSION = '0.5.14'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legion-llm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.13
4
+ version: 0.5.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Esity