lex-llm-ollama 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/legion/extensions/llm/ollama/provider.rb +117 -5
- data/lib/legion/extensions/llm/ollama/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 98c1040c20dfe6ead193a404a6e34fda71e98188859f1e9c828a9778fff1b262
|
|
4
|
+
data.tar.gz: e5ce31cff8c62f5cab9ce6a59d33e90847bfbbe89e2d9445d8f51ce8540fe67c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cf3f73f8533cf2bd1145e3beffd12f602150eae3b4ad0031df24eaecac36bef6536d04e18a15b20cb2d6f529812a5cb63f2dfad28ed0638820a582f6e0ff37c9
|
|
7
|
+
data.tar.gz: da96da090f714b1d0811d475d091f637d4124931168039a2d0de2190b4022860e40a0c099819fb1d359c2ee055daba984d87a9748d9481e635fbe9673ea6eab6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.10 - 2026-05-16
|
|
4
|
+
|
|
5
|
+
- Stop assuming every non-embedding Ollama model supports tools; fallback chat discovery now advertises completion, streaming, and vision only.
|
|
6
|
+
- Add canonical Ollama capability normalization so reported `tools`/function-calling metadata is preserved and streaming is inferred for chat/completion models.
|
|
7
|
+
- Include reported capability metadata from `/api/show` model detail responses.
|
|
8
|
+
|
|
3
9
|
## 0.2.9 - 2026-05-13
|
|
4
10
|
|
|
5
11
|
- Add `fetch_model_detail` — calls POST `/api/show` to retrieve the real context window from Ollama.
|
|
@@ -90,7 +90,7 @@ module Legion
|
|
|
90
90
|
def fetch_model_detail(model_name)
|
|
91
91
|
raw = show_model(model_name)
|
|
92
92
|
context_window = extract_context_window(raw)
|
|
93
|
-
{ context_window: context_window }.compact
|
|
93
|
+
{ context_window: context_window, capabilities: extract_capabilities(raw) }.compact
|
|
94
94
|
rescue StandardError => e
|
|
95
95
|
handle_exception(e, level: :warn, handled: true, operation: 'ollama.fetch_model_detail',
|
|
96
96
|
model: model_name)
|
|
@@ -249,7 +249,7 @@ module Legion
|
|
|
249
249
|
model: model_id,
|
|
250
250
|
messages: format_messages(messages),
|
|
251
251
|
stream: stream,
|
|
252
|
-
think: thinking
|
|
252
|
+
think: thinking == true,
|
|
253
253
|
keep_alive: ollama_keep_alive,
|
|
254
254
|
format: schema_format(schema),
|
|
255
255
|
options: { temperature: temperature }.compact,
|
|
@@ -258,6 +258,77 @@ module Legion
|
|
|
258
258
|
}.compact
|
|
259
259
|
end
|
|
260
260
|
|
|
261
|
+
def stream_response(connection, payload, additional_headers = {}, &block)
|
|
262
|
+
buffer = +''
|
|
263
|
+
chunks = []
|
|
264
|
+
|
|
265
|
+
connection.post(stream_url, payload) do |req|
|
|
266
|
+
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
267
|
+
req.options.on_data = ndjson_handler(buffer, chunks, block)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
finalize_stream(chunks)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
def ndjson_handler(buffer, chunks, block)
|
|
274
|
+
proc do |chunk_data, _bytes, env|
|
|
275
|
+
next if env.respond_to?(:status) && env.status && env.status != 200
|
|
276
|
+
|
|
277
|
+
buffer << chunk_data.to_s
|
|
278
|
+
drain_ndjson_buffer(buffer, chunks, block)
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def drain_ndjson_buffer(buffer, chunks, block)
|
|
283
|
+
while (idx = buffer.index("\n"))
|
|
284
|
+
line = buffer.slice!(0..idx).strip
|
|
285
|
+
next if line.empty?
|
|
286
|
+
|
|
287
|
+
parse_ndjson_line(line, chunks, block)
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def parse_ndjson_line(line, chunks, block)
|
|
292
|
+
parsed = Legion::JSON.parse(line, symbolize_names: false)
|
|
293
|
+
return unless parsed.is_a?(Hash)
|
|
294
|
+
|
|
295
|
+
built = build_chunk(parsed)
|
|
296
|
+
chunks << built
|
|
297
|
+
block&.call(built)
|
|
298
|
+
rescue Legion::JSON::ParseError => e
|
|
299
|
+
handle_exception(e, level: :debug, handled: true, operation: 'ollama.stream_parse')
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def finalize_stream(chunks)
|
|
303
|
+
return Legion::Extensions::Llm::Message.new(role: :assistant, content: nil) if chunks.empty?
|
|
304
|
+
|
|
305
|
+
Legion::Extensions::Llm::Message.new(
|
|
306
|
+
role: :assistant,
|
|
307
|
+
content: join_stream_content(chunks),
|
|
308
|
+
thinking: join_stream_thinking(chunks),
|
|
309
|
+
tool_calls: merge_stream_tool_calls(chunks),
|
|
310
|
+
model_id: chunks.last.model_id,
|
|
311
|
+
input_tokens: chunks.last.input_tokens,
|
|
312
|
+
output_tokens: chunks.last.output_tokens,
|
|
313
|
+
raw: chunks.last.raw
|
|
314
|
+
)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def join_stream_content(chunks)
|
|
318
|
+
text = chunks.filter_map { |c| c.content&.to_s }.join
|
|
319
|
+
text.empty? ? nil : text
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def join_stream_thinking(chunks)
|
|
323
|
+
parts = chunks.filter_map { |c| c.thinking&.text }
|
|
324
|
+
Thinking.build(text: parts.empty? ? nil : parts.join)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def merge_stream_tool_calls(chunks)
|
|
328
|
+
merged = chunks.filter_map(&:tool_calls).reject(&:empty?).reduce({}, :merge)
|
|
329
|
+
merged.empty? ? nil : merged
|
|
330
|
+
end
|
|
331
|
+
|
|
261
332
|
def format_messages(messages)
|
|
262
333
|
messages.map do |message|
|
|
263
334
|
content = message.content
|
|
@@ -312,11 +383,13 @@ module Legion
|
|
|
312
383
|
def parse_completion_response(response)
|
|
313
384
|
body = response.body
|
|
314
385
|
message = body.fetch('message', {})
|
|
386
|
+
content, thinking = extract_thinking_from_completion(message)
|
|
315
387
|
Legion::Extensions::Llm::Message.new(
|
|
316
388
|
role: :assistant,
|
|
317
|
-
content:
|
|
389
|
+
content: content,
|
|
318
390
|
model_id: body['model'],
|
|
319
391
|
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
392
|
+
thinking: thinking,
|
|
320
393
|
input_tokens: body['prompt_eval_count'],
|
|
321
394
|
output_tokens: body['eval_count'],
|
|
322
395
|
raw: body
|
|
@@ -325,9 +398,12 @@ module Legion
|
|
|
325
398
|
|
|
326
399
|
def build_chunk(data)
|
|
327
400
|
message = data.fetch('message', {})
|
|
401
|
+
thinking = message['thinking']
|
|
328
402
|
Legion::Extensions::Llm::Chunk.new(
|
|
329
403
|
role: :assistant,
|
|
330
404
|
content: message['content'],
|
|
405
|
+
thinking: thinking ? Thinking.build(text: thinking) : nil,
|
|
406
|
+
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
331
407
|
model_id: data['model'],
|
|
332
408
|
input_tokens: data['prompt_eval_count'],
|
|
333
409
|
output_tokens: data['eval_count'],
|
|
@@ -335,6 +411,22 @@ module Legion
|
|
|
335
411
|
)
|
|
336
412
|
end
|
|
337
413
|
|
|
414
|
+
def extract_thinking_from_completion(message)
|
|
415
|
+
extraction = Responses::ThinkingExtractor.extract(
|
|
416
|
+
message['content'],
|
|
417
|
+
metadata: thinking_metadata(message)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
[
|
|
421
|
+
extraction.content,
|
|
422
|
+
Thinking.build(text: extraction.thinking, signature: extraction.signature)
|
|
423
|
+
]
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def thinking_metadata(message)
|
|
427
|
+
{ thinking: message['thinking'] }.compact
|
|
428
|
+
end
|
|
429
|
+
|
|
338
430
|
def parse_tool_calls(tool_calls)
|
|
339
431
|
return nil unless tool_calls
|
|
340
432
|
|
|
@@ -372,15 +464,35 @@ module Legion
|
|
|
372
464
|
end
|
|
373
465
|
|
|
374
466
|
def infer_capabilities(name, family, api_caps)
|
|
375
|
-
|
|
467
|
+
normalized = normalize_ollama_capabilities(api_caps)
|
|
468
|
+
return normalized unless normalized.empty?
|
|
376
469
|
|
|
377
470
|
if embedding_model?(name, family)
|
|
378
471
|
[:embedding]
|
|
379
472
|
else
|
|
380
|
-
%i[completion streaming
|
|
473
|
+
%i[completion streaming vision]
|
|
381
474
|
end
|
|
382
475
|
end
|
|
383
476
|
|
|
477
|
+
def normalize_ollama_capabilities(capabilities)
|
|
478
|
+
Array(capabilities).compact.each_with_object([]) do |capability, result|
|
|
479
|
+
capability_sym = capability.to_s.downcase.strip.to_sym
|
|
480
|
+
next if capability_sym.to_s.empty?
|
|
481
|
+
|
|
482
|
+
result << capability_sym
|
|
483
|
+
result << :tools if %i[function_calling functions tool tool_use].include?(capability_sym)
|
|
484
|
+
result << :streaming if %i[chat completion].include?(capability_sym)
|
|
485
|
+
end.uniq
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
def extract_capabilities(raw)
|
|
489
|
+
return nil unless raw.is_a?(Hash)
|
|
490
|
+
|
|
491
|
+
caps = raw['capabilities'] || raw[:capabilities]
|
|
492
|
+
normalized = normalize_ollama_capabilities(caps)
|
|
493
|
+
normalized unless normalized.empty?
|
|
494
|
+
end
|
|
495
|
+
|
|
384
496
|
def embedding_model?(name, family)
|
|
385
497
|
name.to_s.match?(/embed|embedding/i) || family.to_s.match?(/bert|nomic/i)
|
|
386
498
|
end
|