lex-llm-ollama 0.2.10 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ae0169129db1cf8833dcbc252f5b06b6c3ed6e2ab106a262b9dc90789919da55
|
|
4
|
+
data.tar.gz: f8af5ef3ccea0f79638bbdf77dfb43392cc7332a602aedbc01a2514e229d8e03
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 208c7d4cd46c681bffd89ff263378beb2460af6e27554ca79c04e2850771bd69b9722e3fc547093af2dc20332ee29e17da72363aab8e92b435f1b93d3615a77f
|
|
7
|
+
data.tar.gz: 92333b402f15d5eb694a747a45cb6b37289d6d2d6fc43dcb2784647e59c12c4eca2b959491f3fe4d567f4b134f8a37d4589be7b76b3ad887b1630bcaa46662cc
|
data/.rubocop.yml
CHANGED
|
@@ -14,5 +14,21 @@ Metrics/BlockLength:
|
|
|
14
14
|
- spec/**/*
|
|
15
15
|
Metrics/MethodLength:
|
|
16
16
|
Enabled: false
|
|
17
|
+
Metrics/ParameterLists:
|
|
18
|
+
Enabled: false
|
|
19
|
+
Metrics/AbcSize:
|
|
20
|
+
Enabled: false
|
|
21
|
+
Metrics/CyclomaticComplexity:
|
|
22
|
+
Enabled: false
|
|
23
|
+
Metrics/PerceivedComplexity:
|
|
24
|
+
Enabled: false
|
|
17
25
|
RSpec/MultipleExpectations:
|
|
18
26
|
Enabled: false
|
|
27
|
+
RSpec/ExampleLength:
|
|
28
|
+
Enabled: false
|
|
29
|
+
RSpec/LeakyConstantDeclaration:
|
|
30
|
+
Enabled: false
|
|
31
|
+
RSpec/InstanceVariable:
|
|
32
|
+
Enabled: false
|
|
33
|
+
Style/Documentation:
|
|
34
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.12 - 2026-05-21
|
|
4
|
+
|
|
5
|
+
- Add `default_transport`/`default_tier` class declarations, remove duplicate instance methods
|
|
6
|
+
- Add `model_allowed?` filtering in `discover_offerings`
|
|
7
|
+
- Add `DiscoveryRefresh` actor (Every, 30min, run_now) for non-blocking model discovery
|
|
8
|
+
- Identity headers included via base provider
|
|
9
|
+
- api_base reads from settings[:endpoint] fallback
|
|
10
|
+
|
|
11
|
+
|
|
3
12
|
## 0.2.10 - 2026-05-16
|
|
4
13
|
|
|
5
14
|
- Stop assuming every non-embedding Ollama model supports tools; fallback chat discovery now advertises completion, streaming, and vision only.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
require 'legion/extensions/actors/every'
|
|
5
|
+
rescue LoadError => e
|
|
6
|
+
warn(e.message) if $VERBOSE
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
return unless defined?(Legion::Extensions::Actors::Every)
|
|
10
|
+
|
|
11
|
+
module Legion
|
|
12
|
+
module Extensions
|
|
13
|
+
module Llm
|
|
14
|
+
module Ollama
|
|
15
|
+
module Actor
|
|
16
|
+
class DiscoveryRefresh < Legion::Extensions::Actors::Every
|
|
17
|
+
include Legion::Logging::Helper
|
|
18
|
+
|
|
19
|
+
REFRESH_INTERVAL = 1800
|
|
20
|
+
|
|
21
|
+
def runner_class = self.class
|
|
22
|
+
def runner_function = 'manual'
|
|
23
|
+
def run_now? = true
|
|
24
|
+
def use_runner? = false
|
|
25
|
+
def check_subtask? = false
|
|
26
|
+
def generate_task? = false
|
|
27
|
+
|
|
28
|
+
def time
|
|
29
|
+
return REFRESH_INTERVAL unless defined?(Legion::Settings)
|
|
30
|
+
|
|
31
|
+
Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || REFRESH_INTERVAL
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def manual
|
|
35
|
+
log.debug('[ollama][discovery_refresh] refreshing model list')
|
|
36
|
+
return unless defined?(Legion::LLM::Discovery)
|
|
37
|
+
|
|
38
|
+
Legion::LLM::Discovery.run
|
|
39
|
+
if defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:populate_auto_rules)
|
|
40
|
+
Legion::LLM::Router.populate_auto_rules(Legion::LLM::Discovery.discovered_instances)
|
|
41
|
+
end
|
|
42
|
+
if defined?(Legion::LLM::Inventory) && Legion::LLM::Inventory.respond_to?(:invalidate_offerings_cache!)
|
|
43
|
+
Legion::LLM::Inventory.invalidate_offerings_cache!
|
|
44
|
+
end
|
|
45
|
+
rescue StandardError => e
|
|
46
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.actor.discovery_refresh')
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -16,6 +16,8 @@ module Legion
|
|
|
16
16
|
|
|
17
17
|
def slug = 'ollama'
|
|
18
18
|
def local? = true
|
|
19
|
+
def default_transport = :http
|
|
20
|
+
def default_tier = :local
|
|
19
21
|
def configuration_requirements = []
|
|
20
22
|
def capabilities = Capabilities
|
|
21
23
|
|
|
@@ -40,7 +42,7 @@ module Legion
|
|
|
40
42
|
end
|
|
41
43
|
|
|
42
44
|
def api_base
|
|
43
|
-
resolve_base_url || normalize_url(settings[:base_url] || '127.0.0.1:11434')
|
|
45
|
+
resolve_base_url || normalize_url(settings[:base_url] || settings[:endpoint] || 'http://127.0.0.1:11434')
|
|
44
46
|
end
|
|
45
47
|
|
|
46
48
|
def config_base_url
|
|
@@ -110,9 +112,13 @@ module Legion
|
|
|
110
112
|
log.debug do
|
|
111
113
|
"ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
|
|
112
114
|
end
|
|
113
|
-
resolve_models(live).
|
|
114
|
-
|
|
115
|
+
offerings = resolve_models(live).filter_map do |model_info|
|
|
116
|
+
next unless model_allowed?(model_info.id)
|
|
117
|
+
|
|
118
|
+
offering_from_model(model_info)
|
|
115
119
|
end
|
|
120
|
+
log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
|
|
121
|
+
offerings
|
|
116
122
|
rescue Faraday::ConnectionFailed => e
|
|
117
123
|
log.warn("[ollama] instance=#{provider_instance_id} unreachable: #{e.message}")
|
|
118
124
|
[]
|
|
@@ -167,14 +173,6 @@ module Legion
|
|
|
167
173
|
)
|
|
168
174
|
end
|
|
169
175
|
|
|
170
|
-
def offering_transport
|
|
171
|
-
config.respond_to?(:transport) ? config.transport : :http
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
def offering_tier
|
|
175
|
-
config.respond_to?(:tier) ? config.tier : :local
|
|
176
|
-
end
|
|
177
|
-
|
|
178
176
|
def offering_usage_type(model_info)
|
|
179
177
|
model_info.embedding? ? :embedding : :inference
|
|
180
178
|
end
|
|
@@ -238,7 +236,7 @@ module Legion
|
|
|
238
236
|
settings[:keep_alive]
|
|
239
237
|
end
|
|
240
238
|
|
|
241
|
-
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:)
|
|
239
|
+
def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:)
|
|
242
240
|
model_id = model.respond_to?(:id) ? model.id : model
|
|
243
241
|
log.debug do
|
|
244
242
|
"ollama provider rendering chat payload model=#{model_id} message_count=#{messages.size} " \
|
|
@@ -249,7 +247,7 @@ module Legion
|
|
|
249
247
|
model: model_id,
|
|
250
248
|
messages: format_messages(messages),
|
|
251
249
|
stream: stream,
|
|
252
|
-
think: thinking
|
|
250
|
+
think: thinking == true,
|
|
253
251
|
keep_alive: ollama_keep_alive,
|
|
254
252
|
format: schema_format(schema),
|
|
255
253
|
options: { temperature: temperature }.compact,
|
|
@@ -258,6 +256,77 @@ module Legion
|
|
|
258
256
|
}.compact
|
|
259
257
|
end
|
|
260
258
|
|
|
259
|
+
def stream_response(connection, payload, additional_headers = {}, &block)
|
|
260
|
+
buffer = +''
|
|
261
|
+
chunks = []
|
|
262
|
+
|
|
263
|
+
connection.post(stream_url, payload) do |req|
|
|
264
|
+
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
265
|
+
req.options.on_data = ndjson_handler(buffer, chunks, block)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
finalize_stream(chunks)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def ndjson_handler(buffer, chunks, block)
|
|
272
|
+
proc do |chunk_data, _bytes, env|
|
|
273
|
+
next if env.respond_to?(:status) && env.status && env.status != 200
|
|
274
|
+
|
|
275
|
+
buffer << chunk_data.to_s
|
|
276
|
+
drain_ndjson_buffer(buffer, chunks, block)
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def drain_ndjson_buffer(buffer, chunks, block)
|
|
281
|
+
while (idx = buffer.index("\n"))
|
|
282
|
+
line = buffer.slice!(0..idx).strip
|
|
283
|
+
next if line.empty?
|
|
284
|
+
|
|
285
|
+
parse_ndjson_line(line, chunks, block)
|
|
286
|
+
end
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def parse_ndjson_line(line, chunks, block)
|
|
290
|
+
parsed = Legion::JSON.parse(line, symbolize_names: false)
|
|
291
|
+
return unless parsed.is_a?(Hash)
|
|
292
|
+
|
|
293
|
+
built = build_chunk(parsed)
|
|
294
|
+
chunks << built
|
|
295
|
+
block&.call(built)
|
|
296
|
+
rescue Legion::JSON::ParseError => e
|
|
297
|
+
handle_exception(e, level: :debug, handled: true, operation: 'ollama.stream_parse')
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def finalize_stream(chunks)
|
|
301
|
+
return Legion::Extensions::Llm::Message.new(role: :assistant, content: nil) if chunks.empty?
|
|
302
|
+
|
|
303
|
+
Legion::Extensions::Llm::Message.new(
|
|
304
|
+
role: :assistant,
|
|
305
|
+
content: join_stream_content(chunks),
|
|
306
|
+
thinking: join_stream_thinking(chunks),
|
|
307
|
+
tool_calls: merge_stream_tool_calls(chunks),
|
|
308
|
+
model_id: chunks.last.model_id,
|
|
309
|
+
input_tokens: chunks.last.input_tokens,
|
|
310
|
+
output_tokens: chunks.last.output_tokens,
|
|
311
|
+
raw: chunks.last.raw
|
|
312
|
+
)
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def join_stream_content(chunks)
|
|
316
|
+
text = chunks.filter_map { |c| c.content&.to_s }.join
|
|
317
|
+
text.empty? ? nil : text
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def join_stream_thinking(chunks)
|
|
321
|
+
parts = chunks.filter_map { |c| c.thinking&.text }
|
|
322
|
+
Thinking.build(text: parts.empty? ? nil : parts.join)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def merge_stream_tool_calls(chunks)
|
|
326
|
+
merged = chunks.filter_map(&:tool_calls).reject(&:empty?).reduce({}, :merge)
|
|
327
|
+
merged.empty? ? nil : merged
|
|
328
|
+
end
|
|
329
|
+
|
|
261
330
|
def format_messages(messages)
|
|
262
331
|
messages.map do |message|
|
|
263
332
|
content = message.content
|
|
@@ -312,11 +381,13 @@ module Legion
|
|
|
312
381
|
def parse_completion_response(response)
|
|
313
382
|
body = response.body
|
|
314
383
|
message = body.fetch('message', {})
|
|
384
|
+
content, thinking = extract_thinking_from_completion(message)
|
|
315
385
|
Legion::Extensions::Llm::Message.new(
|
|
316
386
|
role: :assistant,
|
|
317
|
-
content:
|
|
387
|
+
content: content,
|
|
318
388
|
model_id: body['model'],
|
|
319
389
|
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
390
|
+
thinking: thinking,
|
|
320
391
|
input_tokens: body['prompt_eval_count'],
|
|
321
392
|
output_tokens: body['eval_count'],
|
|
322
393
|
raw: body
|
|
@@ -325,9 +396,12 @@ module Legion
|
|
|
325
396
|
|
|
326
397
|
def build_chunk(data)
|
|
327
398
|
message = data.fetch('message', {})
|
|
399
|
+
thinking = message['thinking']
|
|
328
400
|
Legion::Extensions::Llm::Chunk.new(
|
|
329
401
|
role: :assistant,
|
|
330
402
|
content: message['content'],
|
|
403
|
+
thinking: thinking ? Thinking.build(text: thinking) : nil,
|
|
404
|
+
tool_calls: parse_tool_calls(message['tool_calls']),
|
|
331
405
|
model_id: data['model'],
|
|
332
406
|
input_tokens: data['prompt_eval_count'],
|
|
333
407
|
output_tokens: data['eval_count'],
|
|
@@ -335,6 +409,22 @@ module Legion
|
|
|
335
409
|
)
|
|
336
410
|
end
|
|
337
411
|
|
|
412
|
+
def extract_thinking_from_completion(message)
|
|
413
|
+
extraction = Responses::ThinkingExtractor.extract(
|
|
414
|
+
message['content'],
|
|
415
|
+
metadata: thinking_metadata(message)
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
[
|
|
419
|
+
extraction.content,
|
|
420
|
+
Thinking.build(text: extraction.thinking, signature: extraction.signature)
|
|
421
|
+
]
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def thinking_metadata(message)
|
|
425
|
+
{ thinking: message['thinking'] }.compact
|
|
426
|
+
end
|
|
427
|
+
|
|
338
428
|
def parse_tool_calls(tool_calls)
|
|
339
429
|
return nil unless tool_calls
|
|
340
430
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm-ollama
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.12
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -97,6 +97,7 @@ files:
|
|
|
97
97
|
- README.md
|
|
98
98
|
- lex-llm-ollama.gemspec
|
|
99
99
|
- lib/legion/extensions/llm/ollama.rb
|
|
100
|
+
- lib/legion/extensions/llm/ollama/actors/discovery_refresh.rb
|
|
100
101
|
- lib/legion/extensions/llm/ollama/actors/fleet_worker.rb
|
|
101
102
|
- lib/legion/extensions/llm/ollama/provider.rb
|
|
102
103
|
- lib/legion/extensions/llm/ollama/runners/fleet_worker.rb
|