lex-llm-ollama 0.2.14 → 0.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8bbec813c20e8b5c62b97209466439569cfcde42251acfe9e87f2bb0fce79e9d
4
- data.tar.gz: 59822c6527476ec0000af57ec2a5884672d03065ca3212e0fb655faf809cc0da
3
+ metadata.gz: 9105e1f109fd5c83078224391ceb669ea321943a2075d85d5a77cf48b73d16e7
4
+ data.tar.gz: 1bd18adb284f8b8fa5c12e0049f310973989efb5a20070dcbb172754f64f941b
5
5
  SHA512:
6
- metadata.gz: 6f41591f42a566ab7f3344e6d9963393db977dfea257b61c4a4aea798943804cc1db9f9525522cb690e5ab0448bc4b1451ee2ec3a67b615caccb049e84607bc4
7
- data.tar.gz: effc50944c4583c1732ea4b23563c2f1ac00f660c0571e6c001403f3a140027303fe3bd77d2e0e64c61db39828a5230f9a35f30e7a1ab8663a3dd4e8b56bc185
6
+ metadata.gz: 175570e4fdf0574998741b731718675d643f03c1163183bd41a28a7c90151c71919150060c9d87ff27840f0bcf5203b90dba85e81680f1ec74d3f494b22c85f0
7
+ data.tar.gz: 9ebb03d6cdf2078303f8ac674cfe939a73770a0ce7b721e86c6daaeb63169c0039dfca9e7063138e83896322820939b203f68265da9a2125066ee4e05ecefcd4
data/.rubocop.yml CHANGED
@@ -22,6 +22,12 @@ Metrics/CyclomaticComplexity:
22
22
  Enabled: false
23
23
  Metrics/PerceivedComplexity:
24
24
  Enabled: false
25
+ Metrics/ClassLength:
26
+ Enabled: false
27
+ Lint/DuplicateBranch:
28
+ Enabled: false
29
+ Lint/UselessConstantScoping:
30
+ Enabled: false
25
31
  RSpec/MultipleExpectations:
26
32
  Enabled: false
27
33
  RSpec/ExampleLength:
@@ -32,3 +38,5 @@ RSpec/InstanceVariable:
32
38
  Enabled: false
33
39
  Style/Documentation:
34
40
  Enabled: false
41
+ Style/AsciiComments:
42
+ Enabled: false
data/CHANGELOG.md CHANGED
@@ -1,5 +1,47 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.2.22] - 2026-06-20
4
+
5
+ ### Fixed
6
+ - Stub shared registry publishing through `RegistryPublisher#schedule` in specs so async availability-event coverage stays stable after the shared publisher moved off raw `Thread.new`.
7
+
8
+ ## [0.2.21] - 2026-06-20
9
+
10
+ ### Fixed
11
+ - Stop bulk-publishing Ollama model availability from `list_models`; discovery now emits one registry event per seen model from the shared `lex-llm` policy-filter path so blocked models stay observable without duplicate publishes.
12
+
13
+ ## [0.2.20] - 2026-06-20
14
+
15
+ ### Changed
16
+ - Slow the live discovery refresh cadence from 60 seconds to 300 seconds for Ollama instances; `extensions.llm.ollama.discovery_interval` still overrides the default.
17
+
18
+ ## [0.2.19] - 2026-06-20
19
+
20
+ ### Fixed
21
+ - Route Ollama capability overrides through the shared `lex-llm` provider contract and preserve the canonical singular `:embedding` capability on embedding offerings.
22
+
23
+ ## [0.2.18] - 2026-06-19
24
+
25
+ ### Changed
26
+ - Adopt `Legion::Extensions::Llm::Inventory::ScopedRefresher` mixin (lex-llm 0.6.0). Discovery
27
+ refresh actors now write directly to the live `Inventory` catalog via `Inventory.write_lane`.
28
+ - Pin `lex-llm >= 0.6.0` and `legion-llm >= 0.14.0` in gemspec.
29
+ - Standard `weight: 100` default added to provider instance settings schema.
30
+
31
+ ## 0.2.17 - 2026-06-16
32
+
33
+ - dependency updates, code quality improvements
34
+
35
+ ## 0.2.16 - 2026-06-15
36
+
37
+ - **CapabilityPolicy integration** — Optional capabilities default false; API-provided capabilities tagged as `:model_metadata`. Settings overrides at provider/instance/model level supported.
38
+
39
+ ## 0.2.15 - 2026-06-13
40
+
41
+ - **Gemfile cleanup** — Remove local path overrides; dependencies resolve from gemspec via rubygems.
42
+ - **Canonical tool support** — Use `ToolSchema.extract`, add `:tools` capability, canonical normalization for tool parameter schemas.
43
+ - 147 examples, 0 failures; 17 files, 0 rubocop offenses.
44
+
3
45
  ## 0.2.14 - 2026-06-05
4
46
 
5
47
  - Verified specs and RuboCop compliance (52 examples, 0 failures; 15 files, 0 offenses)
data/Gemfile CHANGED
@@ -2,13 +2,6 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- group :test do
6
- llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
7
- transport_path = ENV.fetch('LEGION_TRANSPORT_PATH', File.expand_path('../../legion-transport', __dir__))
8
- gem 'legion-transport', path: transport_path if File.directory?(transport_path)
9
- gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
10
- end
11
-
12
5
  gemspec
13
6
 
14
7
  group :development do
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_dependency 'legion-logging', '>= 1.3.2'
28
28
  spec.add_dependency 'legion-settings', '>= 1.3.14'
29
29
  spec.add_dependency 'legion-transport', '>= 1.4.14'
30
- spec.add_dependency 'lex-llm', '>= 0.4.3'
30
+ spec.add_dependency 'lex-llm', '>= 0.6.0'
31
31
  end
@@ -1,11 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
4
+
3
5
  begin
4
6
  require 'legion/extensions/actors/every'
5
7
  rescue LoadError => e
6
8
  warn(e.message) if $VERBOSE
7
9
  end
8
10
 
11
+ begin
12
+ require 'legion/extensions/llm/inventory/scoped_refresher'
13
+ rescue LoadError => e
14
+ warn(e.message) if $VERBOSE
15
+ end
16
+
9
17
  return unless defined?(Legion::Extensions::Actors::Every)
10
18
 
11
19
  module Legion
@@ -16,7 +24,11 @@ module Legion
16
24
  class DiscoveryRefresh < Legion::Extensions::Actors::Every
17
25
  include Legion::Logging::Helper
18
26
 
19
- REFRESH_INTERVAL = 1800
27
+ if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
28
+ include Legion::Extensions::Llm::Inventory::ScopedRefresher
29
+ end
30
+
31
+ def self.every_seconds = 300
20
32
 
21
33
  def runner_class = self.class
22
34
  def runner_function = 'manual'
@@ -26,25 +38,143 @@ module Legion
26
38
  def generate_task? = false
27
39
 
28
40
  def time
29
- return REFRESH_INTERVAL unless defined?(Legion::Settings)
41
+ return self.class.every_seconds unless defined?(Legion::Settings)
30
42
 
31
- Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || REFRESH_INTERVAL
43
+ Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || self.class.every_seconds
32
44
  end
33
45
 
34
- def manual
35
- log.debug('[ollama][discovery_refresh] refreshing model list')
36
- return unless defined?(Legion::LLM::Discovery)
46
+ def scope_key(**)
47
+ { provider: :ollama }
48
+ end
37
49
 
38
- Legion::LLM::Discovery.refresh_discovered_models!(provider: :ollama)
39
- if defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:populate_auto_rules)
40
- Legion::LLM::Router.populate_auto_rules(Legion::LLM::Discovery.discovered_instances)
41
- end
42
- if defined?(Legion::LLM::Inventory) && Legion::LLM::Inventory.respond_to?(:invalidate_offerings_cache!)
43
- Legion::LLM::Inventory.invalidate_offerings_cache!
50
+ def compute_lanes_for_scope(**)
51
+ return [] unless defined?(Legion::LLM::Call::Registry)
52
+
53
+ lanes = []
54
+ ollama_instances.each do |instance|
55
+ collect_lanes_for_instance(instance, lanes)
56
+ rescue StandardError => e
57
+ handle_exception(e, level: :warn, handled: true,
58
+ operation: 'ollama.discovery_refresh.compute_lanes',
59
+ instance: instance[:instance])
44
60
  end
61
+ lanes
62
+ rescue StandardError => e
63
+ handle_exception(e, level: :warn, handled: true,
64
+ operation: 'ollama.discovery_refresh.compute_lanes_for_scope')
65
+ []
66
+ end
67
+
68
+ def credential_hash(**)
69
+ settings = ollama_settings
70
+ Digest::SHA256.hexdigest(settings[:api_key].to_s + settings[:instances].to_s)[0, 16]
71
+ rescue StandardError => e
72
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.discovery_refresh.credential_hash')
73
+ 'unknown'
74
+ end
75
+
76
+ def manual(**)
77
+ tick if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher) &&
78
+ respond_to?(:tick, true)
45
79
  rescue StandardError => e
46
80
  handle_exception(e, level: :warn, handled: true, operation: 'ollama.actor.discovery_refresh')
47
81
  end
82
+
83
+ private
84
+
85
+ def ollama_instances
86
+ Legion::LLM::Call::Registry.all_instances.select do |e|
87
+ (e[:provider] || '').to_sym == :ollama
88
+ end
89
+ end
90
+
91
+ def collect_lanes_for_instance(instance, lanes)
92
+ adapter = instance[:adapter]
93
+ return unless adapter.respond_to?(:discover_offerings)
94
+
95
+ Array(adapter.discover_offerings(live: true)).each do |raw_offering|
96
+ offering = offering_to_hash(raw_offering)
97
+ next unless offering
98
+
99
+ model = offering[:model] || offering['model']
100
+ next unless model
101
+
102
+ lane = build_lane(offering, instance)
103
+ lanes << lane
104
+ lanes << fleet_lane(lane, instance) if emit_fleet_lane?(lane)
105
+ end
106
+ end
107
+
108
+ def offering_to_hash(offering)
109
+ return nil if offering.nil?
110
+ return offering if offering.is_a?(Hash)
111
+
112
+ hash = offering.to_h
113
+ hash[:type] ||= hash[:usage_type]
114
+ hash[:enabled] = offering.respond_to?(:enabled?) ? offering.enabled? : true
115
+ hash
116
+ end
117
+
118
+ def build_lane(offering, instance)
119
+ instance_id = instance[:instance] || instance[:instance_id] || instance[:id]
120
+ raw_tier = offering[:tier] || :local
121
+ offer_type = offering[:type]
122
+ type = %i[embed embedding].include?(offer_type) ? :embedding : :inference
123
+ capabilities = normalize_capabilities(offering[:capabilities] || [])
124
+ model = offering[:model] || offering['model']
125
+
126
+ lane_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
127
+ tier: raw_tier, provider_family: :ollama,
128
+ instance_id: instance_id, type: type, model: model
129
+ )
130
+
131
+ {
132
+ id: lane_id,
133
+ tier: raw_tier,
134
+ provider_family: :ollama,
135
+ instance_id: instance_id,
136
+ model: model,
137
+ canonical_model_alias: offering[:canonical_model_alias] || offering[:name],
138
+ type: type,
139
+ capabilities: capabilities,
140
+ limits: offering[:limits] || {},
141
+ enabled: offering.fetch(:enabled, true),
142
+ cost: offering[:cost] || {}
143
+ }
144
+ end
145
+
146
+ def emit_fleet_lane?(lane)
147
+ return false unless lane[:type] == :inference
148
+
149
+ ollama_settings&.dig(:fleet, :dispatch, :enabled)
150
+ end
151
+
152
+ def fleet_lane(lane, instance)
153
+ fleet_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
154
+ tier: :fleet, provider_family: :ollama,
155
+ instance_id: instance[:instance] || instance[:instance_id],
156
+ type: lane[:type], model: lane[:model]
157
+ )
158
+ lane.merge(id: fleet_id, tier: :fleet)
159
+ end
160
+
161
+ def normalize_capabilities(caps)
162
+ # Inventory::Capabilities lives in lex-llm; the previous fallback (`return []
163
+ # unless defined?(...)`) silently swallowed every capability the operator
164
+ # declared via enable_thinking/enable_tools when the constant wasn't loaded.
165
+ # Always normalize through the shared vocabulary so aliases collapse.
166
+ if defined?(Legion::Extensions::Llm::Inventory::Capabilities)
167
+ Legion::Extensions::Llm::Inventory::Capabilities.normalize(caps)
168
+ else
169
+ Array(caps).compact.map(&:to_sym).uniq
170
+ end
171
+ end
172
+
173
+ def ollama_settings
174
+ Legion::Settings.dig(:extensions, :llm, :ollama)
175
+ rescue StandardError
176
+ {}
177
+ end
48
178
  end
49
179
  end
50
180
  end
@@ -8,7 +8,7 @@ module Legion
8
8
  module Llm
9
9
  module Ollama
10
10
  # Ollama provider implementation for the Legion::Extensions::Llm base provider contract.
11
- class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
11
+ class Provider < Legion::Extensions::Llm::Provider
12
12
  include Legion::Logging::Helper
13
13
 
14
14
  class << self
@@ -41,6 +41,10 @@ module Legion
41
41
  Ollama.default_settings
42
42
  end
43
43
 
44
+ def translator
45
+ @translator ||= Translator.new(config: config)
46
+ end
47
+
44
48
  def api_base
45
49
  resolve_base_url || normalize_url(settings[:base_url] || settings[:endpoint] || 'http://127.0.0.1:11434')
46
50
  end
@@ -73,14 +77,27 @@ module Legion
73
77
  end
74
78
  end
75
79
 
76
- def list_models
80
+ def list_models(live: false, **filters)
77
81
  log.debug { "ollama provider discovering models endpoint=#{api_base}#{models_url}" }
78
82
  super.tap do |models|
79
83
  log.debug { "ollama provider discovered model_count=#{models.size}" }
80
- self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
81
84
  end
82
85
  end
83
86
 
87
+ def discover_offerings(live: false, raise_on_unreachable: false, **filters)
88
+ return filter_cached_offerings(Array(@cached_offerings), filters) unless live
89
+
90
+ provider_health = health(live:)
91
+ @cached_offerings = discover_live_offerings(filters, provider_health, live:)
92
+ log_discover_complete(@cached_offerings)
93
+ @cached_offerings
94
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
95
+ log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
96
+ raise if raise_on_unreachable
97
+
98
+ []
99
+ end
100
+
84
101
  def show_model(model)
85
102
  log.debug { "ollama provider fetching model details model=#{model}" }
86
103
  connection.post(show_model_url, { model: model }).body
@@ -108,24 +125,42 @@ module Legion
108
125
  raise
109
126
  end
110
127
 
111
- def discover_offerings(live: false, **)
112
- log.debug do
113
- "ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
114
- end
115
- offerings = resolve_models(live).filter_map do |model_info|
116
- next unless model_allowed?(model_info.id)
128
+ private
117
129
 
118
- offering_from_model(model_info)
130
+ def discovery_registry_readiness(provider_health, live:)
131
+ {
132
+ provider: slug.to_sym,
133
+ configured: configured?,
134
+ ready: provider_health[:ready] == true,
135
+ live: live,
136
+ health: provider_health
137
+ }
138
+ end
139
+
140
+ def discover_live_offerings(filters, provider_health, live:)
141
+ readiness = discovery_registry_readiness(provider_health, live:)
142
+ Array(list_models(live:, **filters)).filter_map do |model|
143
+ self.class.registry_publisher.publish_models_async([model], readiness:)
144
+ next unless model_matches_filters?(model, filters)
145
+ next unless model_allowed?(model.id)
146
+
147
+ log_model_discovered(model)
148
+ offering_from_model(model, health: provider_health)
119
149
  end
120
- log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
121
- offerings
122
- rescue Faraday::ConnectionFailed => e
123
- log.warn("[ollama] instance=#{provider_instance_id} unreachable: #{e.message}")
124
- []
125
- rescue StandardError => e
126
- handle_exception(e, level: :warn, handled: true, operation: 'ollama.discover_offerings',
127
- backtrace_limit: 3)
128
- []
150
+ end
151
+
152
+ def log_model_discovered(model)
153
+ log.debug(
154
+ "[#{slug}] instance=#{provider_instance_id} action=model_discovered " \
155
+ "model=#{model.id} family=#{model.family}"
156
+ )
157
+ end
158
+
159
+ def log_discover_complete(offerings)
160
+ log.info(
161
+ "[#{slug}] instance=#{provider_instance_id} action=discover_complete " \
162
+ "model_count=#{Array(offerings).size}"
163
+ )
129
164
  end
130
165
 
131
166
  CONTEXT_WINDOWS = {
@@ -149,8 +184,6 @@ module Legion
149
184
  'bge' => 512
150
185
  }.freeze
151
186
 
152
- private
153
-
154
187
  def resolve_models(live)
155
188
  if live
156
189
  @cached_models = list_models
@@ -159,7 +192,26 @@ module Legion
159
192
  end
160
193
  end
161
194
 
162
- def offering_from_model(model_info)
195
+ def running_model_ids
196
+ Array(list_running_models).filter_map do |m|
197
+ m['name'] || m[:name] || m['model'] || m[:model]
198
+ end.map(&:to_s)
199
+ end
200
+
201
+ def offering_from_model(model_info, health: {})
202
+ loaded = begin
203
+ running_model_ids.include?(model_info.id.to_s)
204
+ rescue StandardError
205
+ health.is_a?(Hash) ? health.fetch(:loaded, false) : false
206
+ end
207
+ policy = resolve_capability_policy(model_info)
208
+ embedding_model = model_info.embedding?
209
+ capabilities = embedding_model ? [:embedding] : policy[:capabilities]
210
+ capability_sources = if embedding_model
211
+ policy[:sources].merge(embedding: { value: true, source: :model_metadata })
212
+ else
213
+ policy[:sources]
214
+ end
163
215
  Legion::Extensions::Llm::Routing::ModelOffering.new(
164
216
  provider_family: :ollama,
165
217
  instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -167,18 +219,35 @@ module Legion
167
219
  tier: offering_tier,
168
220
  model: model_info.id,
169
221
  usage_type: offering_usage_type(model_info),
170
- capabilities: offering_capabilities(model_info),
222
+ capabilities: capabilities,
223
+ capability_sources: capability_sources,
171
224
  limits: offering_limits(model_info),
172
- metadata: offering_metadata(model_info)
225
+ metadata: offering_metadata(model_info).merge(loaded: loaded)
173
226
  )
174
227
  end
175
228
 
176
- def offering_usage_type(model_info)
177
- model_info.embedding? ? :embedding : :inference
229
+ def resolve_capability_policy(model_info)
230
+ model_id = model_info.id.to_s
231
+ Legion::Extensions::Llm::CapabilityPolicy.resolve(
232
+ real: capabilities_from_api(model_info),
233
+ provider_catalog: {},
234
+ probe: {},
235
+ provider_envelope: { streaming: true },
236
+ provider_config: provider_capability_config,
237
+ instance_config: instance_capability_config,
238
+ model_config: model_capability_config(model_id)
239
+ )
240
+ end
241
+
242
+ def capabilities_from_api(model_info)
243
+ Array(model_info.capabilities).each_with_object({}) do |cap, hash|
244
+ sym = cap.to_s.downcase.to_sym
245
+ hash[sym] = true
246
+ end
178
247
  end
179
248
 
180
- def offering_capabilities(model_info)
181
- model_info.capabilities.map(&:to_s)
249
+ def offering_usage_type(model_info)
250
+ model_info.embedding? ? :embedding : :inference
182
251
  end
183
252
 
184
253
  def offering_limits(model_info)
@@ -294,7 +363,7 @@ module Legion
294
363
  chunks << built
295
364
  block&.call(built)
296
365
  rescue Legion::JSON::ParseError => e
297
- handle_exception(e, level: :debug, handled: true, operation: 'ollama.stream_parse')
366
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.stream_parse')
298
367
  end
299
368
 
300
369
  def finalize_stream(chunks)
@@ -357,16 +426,16 @@ module Legion
357
426
  def format_tools(tools)
358
427
  return nil if tools.empty?
359
428
 
360
- tool_names = tools.values.filter_map { |tool| tool.respond_to?(:name) ? tool.name : nil }
429
+ tool_names = tools.values.filter_map { |tool| Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool) }
361
430
  log.debug { "ollama provider formatting tools count=#{tools.size} names=#{tool_names.join(',')}" }
362
431
 
363
432
  tools.values.map do |tool|
364
433
  {
365
434
  type: 'function',
366
435
  function: {
367
- name: tool.name,
368
- description: tool.description,
369
- parameters: tool.params_schema || { type: 'object', properties: {} }
436
+ name: Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool),
437
+ description: Legion::Extensions::Llm::Canonical::ToolSchema.tool_description(tool),
438
+ parameters: Legion::Extensions::Llm::Canonical::ToolSchema.extract(tool)
370
439
  }
371
440
  }
372
441
  end
@@ -380,67 +449,74 @@ module Legion
380
449
 
381
450
  def parse_completion_response(response)
382
451
  body = response.body
383
- message = body.fetch('message', {})
384
- content, thinking = extract_thinking_from_completion(message)
452
+ canonical = translator.parse_response(body)
453
+ to_legacy_message(canonical, body)
454
+ end
455
+
456
+ def build_chunk(data)
457
+ canonical_chunk = translator.parse_chunk(data)
458
+ return nil if canonical_chunk.nil?
459
+
460
+ to_legacy_chunk(canonical_chunk, data)
461
+ end
462
+
463
+ def to_legacy_message(canonical, raw_body)
464
+ usage = canonical.usage
385
465
  Legion::Extensions::Llm::Message.new(
386
466
  role: :assistant,
387
- content: content,
388
- model_id: body['model'],
389
- tool_calls: parse_tool_calls(message['tool_calls']),
390
- thinking: thinking,
391
- input_tokens: body['prompt_eval_count'],
392
- output_tokens: body['eval_count'],
393
- raw: body
467
+ content: canonical.text,
468
+ model_id: canonical.model,
469
+ thinking: if canonical.thinking
470
+ Legion::Extensions::Llm::Thinking.build(
471
+ text: canonical.thinking.content, signature: canonical.thinking.signature
472
+ )
473
+ end,
474
+ tool_calls: legacy_tool_calls(canonical.tool_calls),
475
+ input_tokens: usage&.input_tokens,
476
+ output_tokens: usage&.output_tokens,
477
+ raw: raw_body
394
478
  )
395
479
  end
396
480
 
397
- def build_chunk(data)
398
- message = data.fetch('message', {})
399
- thinking = message['thinking']
481
+ def to_legacy_chunk(canonical_chunk, raw_data)
400
482
  Legion::Extensions::Llm::Chunk.new(
401
483
  role: :assistant,
402
- content: message['content'],
403
- thinking: thinking ? Thinking.build(text: thinking) : nil,
404
- tool_calls: parse_tool_calls(message['tool_calls']),
405
- model_id: data['model'],
406
- input_tokens: data['prompt_eval_count'],
407
- output_tokens: data['eval_count'],
408
- raw: data
484
+ content: canonical_chunk.text_delta? ? canonical_chunk.delta : nil,
485
+ thinking: if canonical_chunk.thinking_delta?
486
+ Legion::Extensions::Llm::Thinking.build(
487
+ text: canonical_chunk.delta
488
+ )
489
+ end,
490
+ tool_calls: legacy_streaming_tool_calls(canonical_chunk),
491
+ model_id: raw_data['model'] || raw_data[:model],
492
+ input_tokens: canonical_chunk.usage&.input_tokens ||
493
+ raw_data['prompt_eval_count'] || raw_data[:prompt_eval_count],
494
+ output_tokens: canonical_chunk.usage&.output_tokens ||
495
+ raw_data['eval_count'] || raw_data[:eval_count],
496
+ raw: raw_data
409
497
  )
410
498
  end
411
499
 
412
- def extract_thinking_from_completion(message)
413
- extraction = Responses::ThinkingExtractor.extract(
414
- message['content'],
415
- metadata: thinking_metadata(message)
416
- )
417
-
418
- [
419
- extraction.content,
420
- Thinking.build(text: extraction.thinking, signature: extraction.signature)
421
- ]
422
- end
500
+ def legacy_tool_calls(canonical_tool_calls)
501
+ return nil if canonical_tool_calls.nil? || canonical_tool_calls.empty?
423
502
 
424
- def thinking_metadata(message)
425
- { thinking: message['thinking'] }.compact
503
+ canonical_tool_calls.to_h do |tc|
504
+ [
505
+ (tc.name || tc.id).to_s.to_sym,
506
+ Legion::Extensions::Llm::ToolCall.new(id: tc.id, name: tc.name, arguments: tc.arguments || {})
507
+ ]
508
+ end
426
509
  end
427
510
 
428
- def parse_tool_calls(tool_calls)
429
- return nil unless tool_calls
511
+ def legacy_streaming_tool_calls(canonical_chunk)
512
+ return nil unless canonical_chunk.tool_call_delta?
430
513
 
431
- log.debug { "ollama provider parsing tool_call_count=#{tool_calls.size}" }
514
+ tc = canonical_chunk.tool_call
515
+ return nil unless tc
432
516
 
433
- tool_calls.to_h do |call|
434
- function = call.fetch('function', {})
435
- [
436
- function.fetch('name').to_sym,
437
- Legion::Extensions::Llm::ToolCall.new(
438
- id: call['id'] || function['name'],
439
- name: function['name'],
440
- arguments: function['arguments'] || {}
441
- )
442
- ]
443
- end
517
+ { (tc.name || tc.id).to_s.to_sym => Legion::Extensions::Llm::ToolCall.new(
518
+ id: tc.id, name: tc.name, arguments: tc.arguments || ''
519
+ ) }
444
520
  end
445
521
 
446
522
  def parse_list_models_response(response, provider, _capabilities)
@@ -0,0 +1,497 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/llm/canonical'
4
+ require 'legion/extensions/llm/responses/thinking_extractor'
5
+ require 'legion/json'
6
+ require 'legion/logging'
7
+
8
+ module Legion
9
+ module Extensions
10
+ module Llm
11
+ module Ollama
12
+ # Canonical provider translator for Ollama (/api/chat NDJSON wire format).
13
+ #
14
+ # Implements render_request, parse_response, parse_chunk, and capabilities.
15
+ # Ollama uses NDJSON streaming (not SSE), native tool calling, and the `think`
16
+ # flag for extended thinking support.
17
+ #
18
+ # Ollama quirks (declared in capabilities):
19
+ # - tool_calls_as_text: false — Ollama returns structured tool_calls natively.
20
+ # - forced_tool_choice: false — Ollama does not support forced tool selection.
21
+ # - assistant_prefill: false — Ollama does not support assistant prefill.
22
+ class Translator
23
+ include Legion::Logging::Helper
24
+
25
+ # Ollama-specific stop_reason mapping (done_reason field).
26
+ OLLAMA_STOP_REASON_MAP = {
27
+ 'stop' => :end_turn,
28
+ 'tool_use' => :tool_use,
29
+ 'length' => :max_tokens
30
+ }.freeze
31
+ FALLBACK_STOP_REASON = :end_turn
32
+
33
+ # G18 parameter mapping: canonical params -> Ollama options keys.
34
+ PARAM_OPTIONS_KEYS = {
35
+ max_tokens: :num_predict,
36
+ temperature: :temperature,
37
+ top_p: :top_p,
38
+ top_k: :top_k,
39
+ stop_sequences: :stop,
40
+ seed: :seed,
41
+ frequency_penalty: :frequency_penalty,
42
+ presence_penalty: :presence_penalty
43
+ }.freeze
44
+
45
+ SUPPORTED_PARAMS = %i[
46
+ max_tokens temperature top_p top_k stop_sequences
47
+ seed frequency_penalty presence_penalty
48
+ ].freeze
49
+
50
+ def initialize(config: nil)
51
+ @config = config
52
+ end
53
+
54
+ # Render a canonical request into Ollama /api/chat wire payload.
55
+ def render_request(request)
56
+ model = request.metadata&.dig(:model) || 'default'
57
+ messages = format_messages(request)
58
+ payload = {
59
+ model: model,
60
+ messages: messages,
61
+ stream: request.stream
62
+ }
63
+
64
+ payload[:tools] = format_tools(request.tools) unless request.tools.to_h.empty?
65
+ apply_options(payload, request.params)
66
+ apply_thinking_config(payload, request)
67
+ apply_response_format(payload, request.params)
68
+
69
+ log.debug do
70
+ "[llm][ollama-translator] action=render_request model=#{model} stream=#{request.stream} " \
71
+ "message_count=#{messages.size} tools=#{request.tools&.size || 0}"
72
+ end
73
+
74
+ payload.compact
75
+ end
76
+
77
+ # Parse an Ollama /api/chat completion response into a Canonical::Response.
78
+ def parse_response(wire)
79
+ return canonical_error_response(wire) unless wire.is_a?(Hash)
80
+ return Canonical::Response.from_hash(wire) if canonical_response?(wire)
81
+
82
+ message = wire[:message] || wire['message'] || {}
83
+ content = message[:content] || message['content'] || ''
84
+ tool_calls_raw = message[:tool_calls] || message['tool_calls']
85
+ model = wire[:model] || wire['model']
86
+ done_reason = wire[:done_reason] || wire['done_reason']
87
+ done = wire[:done] || wire['done']
88
+
89
+ extraction = Responses::ThinkingExtractor.extract(
90
+ content,
91
+ metadata: thinking_metadata(message)
92
+ )
93
+
94
+ text = extraction.content || ''
95
+ thinking = build_canonical_thinking(extraction)
96
+ tool_calls = parse_tool_calls(tool_calls_raw)
97
+ stop_reason = map_stop_reason(done_reason, done)
98
+
99
+ usage = Canonical::Usage.from_hash({
100
+ input_tokens: wire[:prompt_eval_count] || wire['prompt_eval_count'],
101
+ output_tokens: wire[:eval_count] || wire['eval_count']
102
+ })
103
+
104
+ Canonical::Response.build(
105
+ text: text.to_s,
106
+ thinking: thinking,
107
+ tool_calls: tool_calls,
108
+ usage: usage,
109
+ stop_reason: stop_reason,
110
+ model: model,
111
+ metadata: {}
112
+ )
113
+ rescue StandardError => e
114
+ handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_response')
115
+ raise
116
+ end
117
+
118
+ # Parse a single NDJSON chunk into a Canonical::Chunk or nil.
119
+ def parse_chunk(raw)
120
+ return nil if raw.nil?
121
+
122
+ data = normalize_chunk_input(raw)
123
+ return nil if data.nil?
124
+
125
+ # Handle canonical-form chunks (from conformance fixtures)
126
+ return handle_canonical_chunk(data) if data['type'] || data[:type]
127
+
128
+ parse_ollama_chunk(data)
129
+ rescue StandardError => e
130
+ handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_chunk')
131
+ raise
132
+ end
133
+
134
+ # Declared capabilities for the Ollama provider.
135
+ def capabilities
136
+ {
137
+ provider: 'ollama',
138
+ streaming: true,
139
+ tool_calls: true,
140
+ thinking: true,
141
+ vision: true,
142
+ embeddings: true,
143
+ tool_calls_as_text: false,
144
+ forced_tool_choice: false,
145
+ assistant_prefill: false
146
+ }.freeze
147
+ end
148
+
149
+ private
150
+
151
+ attr_reader :config
152
+
153
+ # -- Message formatting --
154
+
155
+ def format_messages(request)
156
+ messages = format_request_messages(request.messages)
157
+
158
+ if request.system.to_s.strip.empty?
159
+ messages
160
+ else
161
+ [{ role: 'system', content: request.system.strip }] + messages
162
+ end
163
+ end
164
+
165
+ def format_request_messages(messages)
166
+ return [] if messages.nil? || messages.empty?
167
+
168
+ messages.map { |msg| format_message(msg) }
169
+ end
170
+
171
+ def format_message(msg)
172
+ role = msg.role.to_s
173
+ content = format_message_content(msg)
174
+ result = { role: role, content: content }
175
+
176
+ images = extract_images(msg.content)
177
+ result[:images] = images unless images.empty?
178
+
179
+ result[:tool_call_id] = msg.tool_call_id if msg.tool_call_id
180
+ result.compact
181
+ end
182
+
183
+ def format_message_content(msg)
184
+ content = msg.content
185
+ return content if content.is_a?(String)
186
+
187
+ case content
188
+ when Array
189
+ extract_text_from_blocks(content)
190
+ when Canonical::ContentBlock
191
+ content.text? ? content.text.to_s : content.to_s
192
+ else
193
+ content.to_s
194
+ end
195
+ end
196
+
197
+ def extract_text_from_blocks(blocks)
198
+ parts = blocks.filter_map do |block|
199
+ case block
200
+ when Canonical::ContentBlock
201
+ format_content_block_text(block)
202
+ when Hash
203
+ block_hash = block.transform_keys(&:to_sym)
204
+ block_hash[:text]&.to_s
205
+ else
206
+ block.to_s
207
+ end
208
+ end
209
+ parts.join
210
+ end
211
+
212
+ def format_content_block_text(block)
213
+ case block.type
214
+ when :text, :thinking
215
+ block.text.to_s
216
+ when :tool_use
217
+ Legion::JSON.dump({ name: block.name, arguments: block.input || {} })
218
+ when :tool_result
219
+ block.text.to_s
220
+ end
221
+ end
222
+
223
+ def extract_images(content)
224
+ return [] unless content.is_a?(Array)
225
+
226
+ content.filter_map do |block|
227
+ next unless block.is_a?(Canonical::ContentBlock) && block.type == :image
228
+
229
+ block.data
230
+ end
231
+ end
232
+
233
+ # -- Tool formatting --
234
+
235
+ def format_tools(tools)
236
+ return nil if tools.to_h.empty?
237
+
238
+ tools.to_h.values.map do |tool|
239
+ tool_hash = if tool.is_a?(Canonical::ToolDefinition)
240
+ { name: tool.name, description: tool.description, parameters: tool.parameters }
241
+ elsif tool.is_a?(Hash)
242
+ tool.transform_keys(&:to_sym)
243
+ else
244
+ tool
245
+ end
246
+
247
+ name = tool_hash[:name] || tool_hash['name']
248
+ description = (tool_hash[:description] || tool_hash['description'] || '').to_s
249
+ raw_params = tool_hash[:parameters] || tool_hash[:input_schema]
250
+ raw_params = raw_params.to_h if raw_params.respond_to?(:to_h) && !raw_params.is_a?(Hash)
251
+ parameters = Legion::Extensions::Llm::Canonical::ToolDefinition.normalize_parameters(raw_params)
252
+
253
+ {
254
+ type: 'function',
255
+ function: {
256
+ name: name.to_s,
257
+ description: description,
258
+ parameters: parameters
259
+ }
260
+ }
261
+ end
262
+ end
263
+
264
+ # -- Parameter mapping (G18) --
265
+
266
+ def apply_options(payload, params)
267
+ return unless params.is_a?(Canonical::Params)
268
+
269
+ options = {}
270
+ SUPPORTED_PARAMS.each do |param_key|
271
+ value = params.public_send(param_key)
272
+ next if value.nil?
273
+
274
+ wire_key = PARAM_OPTIONS_KEYS[param_key]
275
+ options[wire_key] = case param_key
276
+ when :stop_sequences
277
+ Array(value)
278
+ else
279
+ value
280
+ end
281
+ end
282
+
283
+ payload[:options] = options unless options.empty?
284
+
285
+ return unless params.max_thinking_tokens
286
+
287
+ log.debug do
288
+ '[llm][ollama-translator] action=drop_unsupported_param param=max_thinking_tokens ' \
289
+ "value=#{params.max_thinking_tokens} reason=ollama_not_supported"
290
+ end
291
+ end
292
+
293
+ # -- Thinking configuration --
294
+
295
+ def apply_thinking_config(payload, request)
296
+ return unless enable_thinking?(request)
297
+
298
+ payload[:think] = true
299
+ end
300
+
301
+ def enable_thinking?(request)
302
+ return true if request.thinking.is_a?(Canonical::Thinking::Config) && request.thinking.enabled?
303
+ return true if request.thinking.is_a?(Hash) && (request.thinking[:enabled] != false)
304
+
305
+ false
306
+ end
307
+
308
+ # -- Response format --
309
+
310
+ def apply_response_format(payload, params)
311
+ return unless params.is_a?(Canonical::Params) && params.response_format
312
+
313
+ format_value = params.response_format
314
+ payload[:format] = if format_value.is_a?(Hash)
315
+ schema = format_value[:schema] || format_value['schema'] ||
316
+ format_value[:json_schema] || format_value['json_schema']
317
+ schema || format_value
318
+ else
319
+ format_value
320
+ end
321
+ end
322
+
323
+ # -- Response parsing --
324
+
325
+ def canonical_response?(wire)
326
+ wire.key?(:text) || wire.key?('text') || wire.key?(:stop_reason) || wire.key?('stop_reason')
327
+ end
328
+
329
+ def canonical_error_response(wire)
330
+ body = wire.is_a?(Hash) ? wire : {}
331
+ error_info = body['error'] || body[:error] ||
332
+ { type: 'parse_error', message: 'Failed to parse response' }
333
+
334
+ Canonical::Response.build(
335
+ text: '',
336
+ tool_calls: [],
337
+ usage: Canonical::Usage.from_hash(body['usage'] || body[:usage] || {}),
338
+ stop_reason: :error,
339
+ model: body['model'] || body[:model],
340
+ metadata: { error: error_info }
341
+ )
342
+ end
343
+
344
+ def thinking_metadata(message)
345
+ thinking = message[:thinking] || message['thinking']
346
+ return {} unless thinking
347
+
348
+ { thinking: thinking }
349
+ end
350
+
351
+ def build_canonical_thinking(extraction)
352
+ return nil unless extraction.thinking || extraction.signature
353
+
354
+ Canonical::Thinking.new(
355
+ content: extraction.thinking,
356
+ signature: extraction.signature
357
+ )
358
+ end
359
+
360
+ def parse_tool_calls(tool_calls_raw)
361
+ return [] unless tool_calls_raw.is_a?(Array) && !tool_calls_raw.empty?
362
+
363
+ tool_calls_raw.filter_map do |call|
364
+ call = call.transform_keys(&:to_sym) if call.is_a?(Hash)
365
+ function = call[:function] || call['function'] || {}
366
+ function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
367
+
368
+ name = function[:name] || function['name']
369
+ id = call[:id] || call['id'] || name
370
+ args = parse_tool_arguments(function[:arguments] || function['arguments'])
371
+
372
+ Canonical::ToolCall.build(
373
+ id: id.to_s,
374
+ name: name.to_s,
375
+ arguments: args,
376
+ source: :client
377
+ )
378
+ rescue StandardError => e
379
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.translator.parse_tool_call')
380
+ nil
381
+ end
382
+ end
383
+
384
+ def parse_tool_arguments(arguments)
385
+ return {} if arguments.nil? || arguments == ''
386
+ return arguments if arguments.is_a?(Hash)
387
+
388
+ Legion::JSON.load(arguments)
389
+ rescue Legion::JSON::ParseError
390
+ {}
391
+ end
392
+
393
+ def map_stop_reason(done_reason, done = nil)
394
+ if done_reason
395
+ OLLAMA_STOP_REASON_MAP.fetch(done_reason.to_s, FALLBACK_STOP_REASON)
396
+ elsif done
397
+ FALLBACK_STOP_REASON
398
+ end
399
+ end
400
+
401
+ # -- Chunk parsing --
402
+
403
+ def normalize_chunk_input(raw)
404
+ return nil if raw.is_a?(String) && raw.strip.empty?
405
+
406
+ raw.is_a?(Hash) ? raw : parse_json_safely(raw)
407
+ end
408
+
409
+ def handle_canonical_chunk(data)
410
+ normalized = data.is_a?(Hash) && data.keys.first.is_a?(Symbol) ? data : data.transform_keys(&:to_sym)
411
+ Canonical::Chunk.from_hash(normalized)
412
+ rescue StandardError => e
413
+ log.debug { "[llm][ollama-translator] action=canonical_chunk_parse_error error=#{e.message}" }
414
+ nil
415
+ end
416
+
417
+ def parse_ollama_chunk(data)
418
+ message = data[:message] || data['message'] || {}
419
+ done = data[:done] || data['done']
420
+ done_reason = data[:done_reason] || data['done_reason']
421
+ request_id = data[:request_id] || data['request_id'] || data[:id] || data['id']
422
+
423
+ # Tool call delta
424
+ tool_calls = message[:tool_calls] || message['tool_calls']
425
+ return build_tool_call_chunk(tool_calls, request_id) unless Array(tool_calls).empty?
426
+
427
+ # Thinking delta
428
+ thinking_content = message[:thinking] || message['thinking']
429
+ unless thinking_content.to_s.empty?
430
+ return Canonical::Chunk.thinking_delta(
431
+ delta: thinking_content.to_s,
432
+ request_id: request_id
433
+ )
434
+ end
435
+
436
+ # Text delta — emit content even on done chunks (Ollama's final chunk may carry text)
437
+ content = message[:content] || message['content']
438
+ unless content.to_s.empty?
439
+ return Canonical::Chunk.text_delta(
440
+ delta: content.to_s,
441
+ request_id: request_id
442
+ )
443
+ end
444
+
445
+ # Done chunk (only when no content/thinking/tool_calls to emit)
446
+ return build_done_chunk(data, done_reason, request_id) if done
447
+
448
+ nil
449
+ end
450
+
451
+ def build_done_chunk(data, done_reason, request_id)
452
+ usage = Canonical::Usage.from_hash({
453
+ input_tokens: data[:prompt_eval_count] || data['prompt_eval_count'],
454
+ output_tokens: data[:eval_count] || data['eval_count']
455
+ })
456
+
457
+ Canonical::Chunk.done(
458
+ request_id: request_id,
459
+ usage: usage,
460
+ stop_reason: map_stop_reason(done_reason, true)
461
+ )
462
+ end
463
+
464
+ def build_tool_call_chunk(tool_calls, request_id)
465
+ first_call = tool_calls.first
466
+ first_call = first_call.transform_keys(&:to_sym) if first_call.is_a?(Hash)
467
+ function = first_call[:function] || first_call['function'] || {}
468
+ function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
469
+
470
+ tc = Canonical::ToolCall.build(
471
+ id: (first_call[:id] || first_call['id'] || function[:name] || 'synthesized').to_s,
472
+ name: (function[:name] || function['name']).to_s,
473
+ arguments: parse_tool_arguments(function[:arguments] || function['arguments']),
474
+ source: :client
475
+ )
476
+
477
+ Canonical::Chunk.tool_call_delta(
478
+ tool_call: tc,
479
+ request_id: request_id
480
+ )
481
+ end
482
+
483
+ # -- JSON helpers --
484
+
485
+ def parse_json_safely(raw)
486
+ return nil unless raw.is_a?(String)
487
+
488
+ Legion::JSON.load(raw)
489
+ rescue Legion::JSON::ParseError => e
490
+ log.debug { "[llm][ollama-translator] action=json_parse_error error=#{e.message}" }
491
+ nil
492
+ end
493
+ end
494
+ end
495
+ end
496
+ end
497
+ end
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Ollama
7
- VERSION = '0.2.14'
7
+ VERSION = '0.2.22'
8
8
  end
9
9
  end
10
10
  end
@@ -2,8 +2,10 @@
2
2
 
3
3
  require 'legion/extensions/llm'
4
4
  require 'legion/extensions/llm/ollama/provider'
5
+ require 'legion/extensions/llm/ollama/translator'
5
6
  require 'legion/extensions/llm/ollama/version'
6
7
  require 'legion/logging/helper'
8
+ require_relative 'ollama/actors/discovery_refresh'
7
9
 
8
10
  module Legion
9
11
  module Extensions
@@ -30,10 +32,7 @@ module Legion
30
32
  fleet: {
31
33
  enabled: false,
32
34
  respond_to_requests: false,
33
- capabilities: %i[chat stream_chat embed],
34
- lanes: [],
35
- concurrency: 1,
36
- queue_suffix: nil
35
+ capabilities: %i[chat stream_chat embed tools]
37
36
  }
38
37
  }
39
38
  )
@@ -73,7 +72,8 @@ module Legion
73
72
  instances[:local] = {
74
73
  base_url: 'http://127.0.0.1:11434',
75
74
  tier: :local,
76
- capabilities: %i[completion embedding vision]
75
+ capabilities: {},
76
+ provider_capabilities: { streaming: true }
77
77
  }
78
78
  end
79
79
 
@@ -85,7 +85,8 @@ module Legion
85
85
  configured.each do |name, config|
86
86
  instances[name.to_sym] = normalize_instance_config(config).merge(
87
87
  tier: :direct,
88
- capabilities: %i[completion embedding vision]
88
+ capabilities: {},
89
+ provider_capabilities: { streaming: true }
89
90
  )
90
91
  end
91
92
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-ollama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.14
4
+ version: 0.2.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -71,14 +71,14 @@ dependencies:
71
71
  requirements:
72
72
  - - ">="
73
73
  - !ruby/object:Gem::Version
74
- version: 0.4.3
74
+ version: 0.6.0
75
75
  type: :runtime
76
76
  prerelease: false
77
77
  version_requirements: !ruby/object:Gem::Requirement
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: 0.4.3
81
+ version: 0.6.0
82
82
  description: Ollama provider integration for the LegionIO LLM routing framework.
83
83
  email:
84
84
  - matthewdiverson@gmail.com
@@ -101,6 +101,7 @@ files:
101
101
  - lib/legion/extensions/llm/ollama/actors/fleet_worker.rb
102
102
  - lib/legion/extensions/llm/ollama/provider.rb
103
103
  - lib/legion/extensions/llm/ollama/runners/fleet_worker.rb
104
+ - lib/legion/extensions/llm/ollama/translator.rb
104
105
  - lib/legion/extensions/llm/ollama/version.rb
105
106
  homepage: https://github.com/LegionIO/lex-llm-ollama
106
107
  licenses: