lex-llm-ollama 0.2.10 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 367e593d0f98eed36e553e82356f0c86e37dd8e4a1324674c388f0383e59034d
4
- data.tar.gz: f9d24ec554a9be7be90d33c76ddbb7d475a976c8158ceda71f596fcc8bfa42dd
3
+ metadata.gz: ae0169129db1cf8833dcbc252f5b06b6c3ed6e2ab106a262b9dc90789919da55
4
+ data.tar.gz: f8af5ef3ccea0f79638bbdf77dfb43392cc7332a602aedbc01a2514e229d8e03
5
5
  SHA512:
6
- metadata.gz: 5be0235ede9a88b3b355cd2d6232c885332f6bcf8f5e0041922d496a76b5335c1533edd193b50c8ffe1f04953305b6527d7c60ac1a46b8d35d9cf28535902c36
7
- data.tar.gz: cce2dd847b6fa13f1c8bf2c2a84df522315ed1d8af2f0e524e0564309a24494b4336432a57c60e3b2570402354923f1de805c7d721131d30939e633624f8aa4e
6
+ metadata.gz: 208c7d4cd46c681bffd89ff263378beb2460af6e27554ca79c04e2850771bd69b9722e3fc547093af2dc20332ee29e17da72363aab8e92b435f1b93d3615a77f
7
+ data.tar.gz: 92333b402f15d5eb694a747a45cb6b37289d6d2d6fc43dcb2784647e59c12c4eca2b959491f3fe4d567f4b134f8a37d4589be7b76b3ad887b1630bcaa46662cc
data/.rubocop.yml CHANGED
@@ -14,5 +14,21 @@ Metrics/BlockLength:
14
14
  - spec/**/*
15
15
  Metrics/MethodLength:
16
16
  Enabled: false
17
+ Metrics/ParameterLists:
18
+ Enabled: false
19
+ Metrics/AbcSize:
20
+ Enabled: false
21
+ Metrics/CyclomaticComplexity:
22
+ Enabled: false
23
+ Metrics/PerceivedComplexity:
24
+ Enabled: false
17
25
  RSpec/MultipleExpectations:
18
26
  Enabled: false
27
+ RSpec/ExampleLength:
28
+ Enabled: false
29
+ RSpec/LeakyConstantDeclaration:
30
+ Enabled: false
31
+ RSpec/InstanceVariable:
32
+ Enabled: false
33
+ Style/Documentation:
34
+ Enabled: false
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.12 - 2026-05-21
4
+
5
+ - Add `default_transport`/`default_tier` class declarations, remove duplicate instance methods
6
+ - Add `model_allowed?` filtering in `discover_offerings`
7
+ - Add `DiscoveryRefresh` actor (Every, 30min, run_now) for non-blocking model discovery
8
+ - Identity headers included via base provider
9
+ - api_base reads from settings[:endpoint] fallback
10
+
11
+
3
12
  ## 0.2.10 - 2026-05-16
4
13
 
5
14
  - Stop assuming every non-embedding Ollama model supports tools; fallback chat discovery now advertises completion, streaming, and vision only.
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'legion/extensions/actors/every'
5
+ rescue LoadError => e
6
+ warn(e.message) if $VERBOSE
7
+ end
8
+
9
+ return unless defined?(Legion::Extensions::Actors::Every)
10
+
11
+ module Legion
12
+ module Extensions
13
+ module Llm
14
+ module Ollama
15
+ module Actor
16
+ class DiscoveryRefresh < Legion::Extensions::Actors::Every
17
+ include Legion::Logging::Helper
18
+
19
+ REFRESH_INTERVAL = 1800
20
+
21
+ def runner_class = self.class
22
+ def runner_function = 'manual'
23
+ def run_now? = true
24
+ def use_runner? = false
25
+ def check_subtask? = false
26
+ def generate_task? = false
27
+
28
+ def time
29
+ return REFRESH_INTERVAL unless defined?(Legion::Settings)
30
+
31
+ Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || REFRESH_INTERVAL
32
+ end
33
+
34
+ def manual
35
+ log.debug('[ollama][discovery_refresh] refreshing model list')
36
+ return unless defined?(Legion::LLM::Discovery)
37
+
38
+ Legion::LLM::Discovery.run
39
+ if defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:populate_auto_rules)
40
+ Legion::LLM::Router.populate_auto_rules(Legion::LLM::Discovery.discovered_instances)
41
+ end
42
+ if defined?(Legion::LLM::Inventory) && Legion::LLM::Inventory.respond_to?(:invalidate_offerings_cache!)
43
+ Legion::LLM::Inventory.invalidate_offerings_cache!
44
+ end
45
+ rescue StandardError => e
46
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.actor.discovery_refresh')
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -16,6 +16,8 @@ module Legion
16
16
 
17
17
  def slug = 'ollama'
18
18
  def local? = true
19
+ def default_transport = :http
20
+ def default_tier = :local
19
21
  def configuration_requirements = []
20
22
  def capabilities = Capabilities
21
23
 
@@ -40,7 +42,7 @@ module Legion
40
42
  end
41
43
 
42
44
  def api_base
43
- resolve_base_url || normalize_url(settings[:base_url] || '127.0.0.1:11434')
45
+ resolve_base_url || normalize_url(settings[:base_url] || settings[:endpoint] || 'http://127.0.0.1:11434')
44
46
  end
45
47
 
46
48
  def config_base_url
@@ -110,9 +112,13 @@ module Legion
110
112
  log.debug do
111
113
  "ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
112
114
  end
113
- resolve_models(live).map { |model_info| offering_from_model(model_info) }.tap do |offerings|
114
- log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
115
+ offerings = resolve_models(live).filter_map do |model_info|
116
+ next unless model_allowed?(model_info.id)
117
+
118
+ offering_from_model(model_info)
115
119
  end
120
+ log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
121
+ offerings
116
122
  rescue Faraday::ConnectionFailed => e
117
123
  log.warn("[ollama] instance=#{provider_instance_id} unreachable: #{e.message}")
118
124
  []
@@ -167,14 +173,6 @@ module Legion
167
173
  )
168
174
  end
169
175
 
170
- def offering_transport
171
- config.respond_to?(:transport) ? config.transport : :http
172
- end
173
-
174
- def offering_tier
175
- config.respond_to?(:tier) ? config.tier : :local
176
- end
177
-
178
176
  def offering_usage_type(model_info)
179
177
  model_info.embedding? ? :embedding : :inference
180
178
  end
@@ -238,7 +236,7 @@ module Legion
238
236
  settings[:keep_alive]
239
237
  end
240
238
 
241
- def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
239
+ def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:)
242
240
  model_id = model.respond_to?(:id) ? model.id : model
243
241
  log.debug do
244
242
  "ollama provider rendering chat payload model=#{model_id} message_count=#{messages.size} " \
@@ -249,7 +247,7 @@ module Legion
249
247
  model: model_id,
250
248
  messages: format_messages(messages),
251
249
  stream: stream,
252
- think: thinking ? true : nil,
250
+ think: thinking == true,
253
251
  keep_alive: ollama_keep_alive,
254
252
  format: schema_format(schema),
255
253
  options: { temperature: temperature }.compact,
@@ -258,6 +256,77 @@ module Legion
258
256
  }.compact
259
257
  end
260
258
 
259
+ def stream_response(connection, payload, additional_headers = {}, &block)
260
+ buffer = +''
261
+ chunks = []
262
+
263
+ connection.post(stream_url, payload) do |req|
264
+ req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
265
+ req.options.on_data = ndjson_handler(buffer, chunks, block)
266
+ end
267
+
268
+ finalize_stream(chunks)
269
+ end
270
+
271
+ def ndjson_handler(buffer, chunks, block)
272
+ proc do |chunk_data, _bytes, env|
273
+ next if env.respond_to?(:status) && env.status && env.status != 200
274
+
275
+ buffer << chunk_data.to_s
276
+ drain_ndjson_buffer(buffer, chunks, block)
277
+ end
278
+ end
279
+
280
+ def drain_ndjson_buffer(buffer, chunks, block)
281
+ while (idx = buffer.index("\n"))
282
+ line = buffer.slice!(0..idx).strip
283
+ next if line.empty?
284
+
285
+ parse_ndjson_line(line, chunks, block)
286
+ end
287
+ end
288
+
289
+ def parse_ndjson_line(line, chunks, block)
290
+ parsed = Legion::JSON.parse(line, symbolize_names: false)
291
+ return unless parsed.is_a?(Hash)
292
+
293
+ built = build_chunk(parsed)
294
+ chunks << built
295
+ block&.call(built)
296
+ rescue Legion::JSON::ParseError => e
297
+ handle_exception(e, level: :debug, handled: true, operation: 'ollama.stream_parse')
298
+ end
299
+
300
+ def finalize_stream(chunks)
301
+ return Legion::Extensions::Llm::Message.new(role: :assistant, content: nil) if chunks.empty?
302
+
303
+ Legion::Extensions::Llm::Message.new(
304
+ role: :assistant,
305
+ content: join_stream_content(chunks),
306
+ thinking: join_stream_thinking(chunks),
307
+ tool_calls: merge_stream_tool_calls(chunks),
308
+ model_id: chunks.last.model_id,
309
+ input_tokens: chunks.last.input_tokens,
310
+ output_tokens: chunks.last.output_tokens,
311
+ raw: chunks.last.raw
312
+ )
313
+ end
314
+
315
+ def join_stream_content(chunks)
316
+ text = chunks.filter_map { |c| c.content&.to_s }.join
317
+ text.empty? ? nil : text
318
+ end
319
+
320
+ def join_stream_thinking(chunks)
321
+ parts = chunks.filter_map { |c| c.thinking&.text }
322
+ Thinking.build(text: parts.empty? ? nil : parts.join)
323
+ end
324
+
325
+ def merge_stream_tool_calls(chunks)
326
+ merged = chunks.filter_map(&:tool_calls).reject(&:empty?).reduce({}, :merge)
327
+ merged.empty? ? nil : merged
328
+ end
329
+
261
330
  def format_messages(messages)
262
331
  messages.map do |message|
263
332
  content = message.content
@@ -312,11 +381,13 @@ module Legion
312
381
  def parse_completion_response(response)
313
382
  body = response.body
314
383
  message = body.fetch('message', {})
384
+ content, thinking = extract_thinking_from_completion(message)
315
385
  Legion::Extensions::Llm::Message.new(
316
386
  role: :assistant,
317
- content: message['content'],
387
+ content: content,
318
388
  model_id: body['model'],
319
389
  tool_calls: parse_tool_calls(message['tool_calls']),
390
+ thinking: thinking,
320
391
  input_tokens: body['prompt_eval_count'],
321
392
  output_tokens: body['eval_count'],
322
393
  raw: body
@@ -325,9 +396,12 @@ module Legion
325
396
 
326
397
  def build_chunk(data)
327
398
  message = data.fetch('message', {})
399
+ thinking = message['thinking']
328
400
  Legion::Extensions::Llm::Chunk.new(
329
401
  role: :assistant,
330
402
  content: message['content'],
403
+ thinking: thinking ? Thinking.build(text: thinking) : nil,
404
+ tool_calls: parse_tool_calls(message['tool_calls']),
331
405
  model_id: data['model'],
332
406
  input_tokens: data['prompt_eval_count'],
333
407
  output_tokens: data['eval_count'],
@@ -335,6 +409,22 @@ module Legion
335
409
  )
336
410
  end
337
411
 
412
+ def extract_thinking_from_completion(message)
413
+ extraction = Responses::ThinkingExtractor.extract(
414
+ message['content'],
415
+ metadata: thinking_metadata(message)
416
+ )
417
+
418
+ [
419
+ extraction.content,
420
+ Thinking.build(text: extraction.thinking, signature: extraction.signature)
421
+ ]
422
+ end
423
+
424
+ def thinking_metadata(message)
425
+ { thinking: message['thinking'] }.compact
426
+ end
427
+
338
428
  def parse_tool_calls(tool_calls)
339
429
  return nil unless tool_calls
340
430
 
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Ollama
7
- VERSION = '0.2.10'
7
+ VERSION = '0.2.12'
8
8
  end
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-ollama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.10
4
+ version: 0.2.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -97,6 +97,7 @@ files:
97
97
  - README.md
98
98
  - lex-llm-ollama.gemspec
99
99
  - lib/legion/extensions/llm/ollama.rb
100
+ - lib/legion/extensions/llm/ollama/actors/discovery_refresh.rb
100
101
  - lib/legion/extensions/llm/ollama/actors/fleet_worker.rb
101
102
  - lib/legion/extensions/llm/ollama/provider.rb
102
103
  - lib/legion/extensions/llm/ollama/runners/fleet_worker.rb