lex-llm-ollama 0.2.14 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8bbec813c20e8b5c62b97209466439569cfcde42251acfe9e87f2bb0fce79e9d
4
- data.tar.gz: 59822c6527476ec0000af57ec2a5884672d03065ca3212e0fb655faf809cc0da
3
+ metadata.gz: 7850eb1a4f0fcf50d9d0a86de7b9c2e60fa47154e1c6b330a492abeb00c25803
4
+ data.tar.gz: 24e040db015065dd7e508a995aa0f2b072910da41552029a1ff561993961331a
5
5
  SHA512:
6
- metadata.gz: 6f41591f42a566ab7f3344e6d9963393db977dfea257b61c4a4aea798943804cc1db9f9525522cb690e5ab0448bc4b1451ee2ec3a67b615caccb049e84607bc4
7
- data.tar.gz: effc50944c4583c1732ea4b23563c2f1ac00f660c0571e6c001403f3a140027303fe3bd77d2e0e64c61db39828a5230f9a35f30e7a1ab8663a3dd4e8b56bc185
6
+ metadata.gz: 762912cf8067d8b1c9019ea2d1d10261e234abac127ad1eeeecb5d2b7e41219c09f6294f68f022f0ad33b1f2eb95332db8b1ed3521eeef84aecfbeb11c3f186e
7
+ data.tar.gz: 9f99c4bc9f342d1061077d9dd8f663b35a0a9c962515cddbcf76d19a55fa734040ab8f4b7f7fd6767395d3dd9582913c6b2dbd3ac3010d10c787e932f218fd6a
data/.rubocop.yml CHANGED
@@ -22,6 +22,12 @@ Metrics/CyclomaticComplexity:
22
22
  Enabled: false
23
23
  Metrics/PerceivedComplexity:
24
24
  Enabled: false
25
+ Metrics/ClassLength:
26
+ Enabled: false
27
+ Lint/DuplicateBranch:
28
+ Enabled: false
29
+ Lint/UselessConstantScoping:
30
+ Enabled: false
25
31
  RSpec/MultipleExpectations:
26
32
  Enabled: false
27
33
  RSpec/ExampleLength:
@@ -32,3 +38,5 @@ RSpec/InstanceVariable:
32
38
  Enabled: false
33
39
  Style/Documentation:
34
40
  Enabled: false
41
+ Style/AsciiComments:
42
+ Enabled: false
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.17 - 2026-06-16
4
+
5
+ - dependency updates, code quality improvements
6
+
7
+ ## 0.2.16 - 2026-06-15
8
+
9
+ - **CapabilityPolicy integration** — Optional capabilities default false; API-provided capabilities tagged as `:model_metadata`. Settings overrides at provider/instance/model level supported.
10
+
11
+ ## 0.2.15 - 2026-06-13
12
+
13
+ - **Gemfile cleanup** — Remove local path overrides; dependencies resolve from gemspec via rubygems.
14
+ - **Canonical tool support** — Use `ToolSchema.extract`, add `:tools` capability, canonical normalization for tool parameter schemas.
15
+ - 147 examples, 0 failures; 17 files, 0 rubocop offenses.
16
+
3
17
  ## 0.2.14 - 2026-06-05
4
18
 
5
19
  - Verified specs and RuboCop compliance (52 examples, 0 failures; 15 files, 0 offenses)
data/Gemfile CHANGED
@@ -2,13 +2,6 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- group :test do
6
- llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
7
- transport_path = ENV.fetch('LEGION_TRANSPORT_PATH', File.expand_path('../../legion-transport', __dir__))
8
- gem 'legion-transport', path: transport_path if File.directory?(transport_path)
9
- gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
10
- end
11
-
12
5
  gemspec
13
6
 
14
7
  group :development do
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_dependency 'legion-logging', '>= 1.3.2'
28
28
  spec.add_dependency 'legion-settings', '>= 1.3.14'
29
29
  spec.add_dependency 'legion-transport', '>= 1.4.14'
30
- spec.add_dependency 'lex-llm', '>= 0.4.3'
30
+ spec.add_dependency 'lex-llm', '>= 0.5.0'
31
31
  end
@@ -8,7 +8,7 @@ module Legion
8
8
  module Llm
9
9
  module Ollama
10
10
  # Ollama provider implementation for the Legion::Extensions::Llm base provider contract.
11
- class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
11
+ class Provider < Legion::Extensions::Llm::Provider
12
12
  include Legion::Logging::Helper
13
13
 
14
14
  class << self
@@ -41,6 +41,10 @@ module Legion
41
41
  Ollama.default_settings
42
42
  end
43
43
 
44
+ def translator
45
+ @translator ||= Translator.new(config: config)
46
+ end
47
+
44
48
  def api_base
45
49
  resolve_base_url || normalize_url(settings[:base_url] || settings[:endpoint] || 'http://127.0.0.1:11434')
46
50
  end
@@ -112,10 +116,11 @@ module Legion
112
116
  log.debug do
113
117
  "ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
114
118
  end
119
+ running_ids = live ? running_model_ids : []
115
120
  offerings = resolve_models(live).filter_map do |model_info|
116
121
  next unless model_allowed?(model_info.id)
117
122
 
118
- offering_from_model(model_info)
123
+ offering_from_model(model_info, loaded: running_ids.include?(model_info.id.to_s))
119
124
  end
120
125
  log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
121
126
  offerings
@@ -159,7 +164,14 @@ module Legion
159
164
  end
160
165
  end
161
166
 
162
- def offering_from_model(model_info)
167
+ def running_model_ids
168
+ Array(list_running_models).filter_map do |m|
169
+ m['name'] || m[:name] || m['model'] || m[:model]
170
+ end.map(&:to_s)
171
+ end
172
+
173
+ def offering_from_model(model_info, loaded: false)
174
+ policy = resolve_capability_policy(model_info)
163
175
  Legion::Extensions::Llm::Routing::ModelOffering.new(
164
176
  provider_family: :ollama,
165
177
  instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -167,18 +179,64 @@ module Legion
167
179
  tier: offering_tier,
168
180
  model: model_info.id,
169
181
  usage_type: offering_usage_type(model_info),
170
- capabilities: offering_capabilities(model_info),
182
+ capabilities: policy[:capabilities],
183
+ capability_sources: policy[:sources],
171
184
  limits: offering_limits(model_info),
172
- metadata: offering_metadata(model_info)
185
+ metadata: offering_metadata(model_info).merge(loaded: loaded)
173
186
  )
174
187
  end
175
188
 
176
- def offering_usage_type(model_info)
177
- model_info.embedding? ? :embedding : :inference
189
+ def resolve_capability_policy(model_info)
190
+ model_id = model_info.id.to_s
191
+ Legion::Extensions::Llm::CapabilityPolicy.resolve(
192
+ real: capabilities_from_api(model_info),
193
+ provider_catalog: {},
194
+ probe: {},
195
+ provider_envelope: { streaming: true },
196
+ provider_config: provider_level_config,
197
+ instance_config: instance_level_config,
198
+ model_config: model_level_config(model_id)
199
+ )
178
200
  end
179
201
 
180
- def offering_capabilities(model_info)
181
- model_info.capabilities.map(&:to_s)
202
+ def capabilities_from_api(model_info)
203
+ Array(model_info.capabilities).each_with_object({}) do |cap, hash|
204
+ sym = cap.to_s.downcase.to_sym
205
+ hash[sym] = true
206
+ end
207
+ end
208
+
209
+ def provider_level_config
210
+ raw = CredentialSources.setting(:extensions, :llm, :ollama)
211
+ return {} unless raw.is_a?(Hash)
212
+
213
+ raw.reject { |k, _| k.to_sym == :instances }
214
+ end
215
+
216
+ def instance_level_config
217
+ extract_config_hash
218
+ end
219
+
220
+ def model_level_config(model_id)
221
+ data = extract_config_hash
222
+ models = data[:models]
223
+ return {} unless models.is_a?(Hash)
224
+
225
+ models[model_id.to_sym] || models[model_id.to_s] || models[model_id] || {}
226
+ end
227
+
228
+ def extract_config_hash
229
+ return config.to_h if config.respond_to?(:to_h) && !config.is_a?(Legion::Extensions::Llm::HashConfig)
230
+
231
+ if config.is_a?(Legion::Extensions::Llm::HashConfig)
232
+ config.instance_variable_get(:@data) || {}
233
+ else
234
+ {}
235
+ end
236
+ end
237
+
238
+ def offering_usage_type(model_info)
239
+ model_info.embedding? ? :embedding : :inference
182
240
  end
183
241
 
184
242
  def offering_limits(model_info)
@@ -357,16 +415,16 @@ module Legion
357
415
  def format_tools(tools)
358
416
  return nil if tools.empty?
359
417
 
360
- tool_names = tools.values.filter_map { |tool| tool.respond_to?(:name) ? tool.name : nil }
418
+ tool_names = tools.values.filter_map { |tool| Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool) }
361
419
  log.debug { "ollama provider formatting tools count=#{tools.size} names=#{tool_names.join(',')}" }
362
420
 
363
421
  tools.values.map do |tool|
364
422
  {
365
423
  type: 'function',
366
424
  function: {
367
- name: tool.name,
368
- description: tool.description,
369
- parameters: tool.params_schema || { type: 'object', properties: {} }
425
+ name: Legion::Extensions::Llm::Canonical::ToolSchema.tool_name(tool),
426
+ description: Legion::Extensions::Llm::Canonical::ToolSchema.tool_description(tool),
427
+ parameters: Legion::Extensions::Llm::Canonical::ToolSchema.extract(tool)
370
428
  }
371
429
  }
372
430
  end
@@ -380,67 +438,74 @@ module Legion
380
438
 
381
439
  def parse_completion_response(response)
382
440
  body = response.body
383
- message = body.fetch('message', {})
384
- content, thinking = extract_thinking_from_completion(message)
441
+ canonical = translator.parse_response(body)
442
+ to_legacy_message(canonical, body)
443
+ end
444
+
445
+ def build_chunk(data)
446
+ canonical_chunk = translator.parse_chunk(data)
447
+ return nil if canonical_chunk.nil?
448
+
449
+ to_legacy_chunk(canonical_chunk, data)
450
+ end
451
+
452
+ def to_legacy_message(canonical, raw_body)
453
+ usage = canonical.usage
385
454
  Legion::Extensions::Llm::Message.new(
386
455
  role: :assistant,
387
- content: content,
388
- model_id: body['model'],
389
- tool_calls: parse_tool_calls(message['tool_calls']),
390
- thinking: thinking,
391
- input_tokens: body['prompt_eval_count'],
392
- output_tokens: body['eval_count'],
393
- raw: body
456
+ content: canonical.text,
457
+ model_id: canonical.model,
458
+ thinking: if canonical.thinking
459
+ Legion::Extensions::Llm::Thinking.build(
460
+ text: canonical.thinking.content, signature: canonical.thinking.signature
461
+ )
462
+ end,
463
+ tool_calls: legacy_tool_calls(canonical.tool_calls),
464
+ input_tokens: usage&.input_tokens,
465
+ output_tokens: usage&.output_tokens,
466
+ raw: raw_body
394
467
  )
395
468
  end
396
469
 
397
- def build_chunk(data)
398
- message = data.fetch('message', {})
399
- thinking = message['thinking']
470
+ def to_legacy_chunk(canonical_chunk, raw_data)
400
471
  Legion::Extensions::Llm::Chunk.new(
401
472
  role: :assistant,
402
- content: message['content'],
403
- thinking: thinking ? Thinking.build(text: thinking) : nil,
404
- tool_calls: parse_tool_calls(message['tool_calls']),
405
- model_id: data['model'],
406
- input_tokens: data['prompt_eval_count'],
407
- output_tokens: data['eval_count'],
408
- raw: data
473
+ content: canonical_chunk.text_delta? ? canonical_chunk.delta : nil,
474
+ thinking: if canonical_chunk.thinking_delta?
475
+ Legion::Extensions::Llm::Thinking.build(
476
+ text: canonical_chunk.delta
477
+ )
478
+ end,
479
+ tool_calls: legacy_streaming_tool_calls(canonical_chunk),
480
+ model_id: raw_data['model'] || raw_data[:model],
481
+ input_tokens: canonical_chunk.usage&.input_tokens ||
482
+ raw_data['prompt_eval_count'] || raw_data[:prompt_eval_count],
483
+ output_tokens: canonical_chunk.usage&.output_tokens ||
484
+ raw_data['eval_count'] || raw_data[:eval_count],
485
+ raw: raw_data
409
486
  )
410
487
  end
411
488
 
412
- def extract_thinking_from_completion(message)
413
- extraction = Responses::ThinkingExtractor.extract(
414
- message['content'],
415
- metadata: thinking_metadata(message)
416
- )
489
+ def legacy_tool_calls(canonical_tool_calls)
490
+ return nil if canonical_tool_calls.nil? || canonical_tool_calls.empty?
417
491
 
418
- [
419
- extraction.content,
420
- Thinking.build(text: extraction.thinking, signature: extraction.signature)
421
- ]
422
- end
423
-
424
- def thinking_metadata(message)
425
- { thinking: message['thinking'] }.compact
492
+ canonical_tool_calls.to_h do |tc|
493
+ [
494
+ (tc.name || tc.id).to_s.to_sym,
495
+ Legion::Extensions::Llm::ToolCall.new(id: tc.id, name: tc.name, arguments: tc.arguments || {})
496
+ ]
497
+ end
426
498
  end
427
499
 
428
- def parse_tool_calls(tool_calls)
429
- return nil unless tool_calls
500
+ def legacy_streaming_tool_calls(canonical_chunk)
501
+ return nil unless canonical_chunk.tool_call_delta?
430
502
 
431
- log.debug { "ollama provider parsing tool_call_count=#{tool_calls.size}" }
503
+ tc = canonical_chunk.tool_call
504
+ return nil unless tc
432
505
 
433
- tool_calls.to_h do |call|
434
- function = call.fetch('function', {})
435
- [
436
- function.fetch('name').to_sym,
437
- Legion::Extensions::Llm::ToolCall.new(
438
- id: call['id'] || function['name'],
439
- name: function['name'],
440
- arguments: function['arguments'] || {}
441
- )
442
- ]
443
- end
506
+ { (tc.name || tc.id).to_s.to_sym => Legion::Extensions::Llm::ToolCall.new(
507
+ id: tc.id, name: tc.name, arguments: tc.arguments || ''
508
+ ) }
444
509
  end
445
510
 
446
511
  def parse_list_models_response(response, provider, _capabilities)
@@ -0,0 +1,497 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/llm/canonical'
4
+ require 'legion/extensions/llm/responses/thinking_extractor'
5
+ require 'legion/json'
6
+ require 'legion/logging'
7
+
8
+ module Legion
9
+ module Extensions
10
+ module Llm
11
+ module Ollama
12
+ # Canonical provider translator for Ollama (/api/chat NDJSON wire format).
13
+ #
14
+ # Implements render_request, parse_response, parse_chunk, and capabilities.
15
+ # Ollama uses NDJSON streaming (not SSE), native tool calling, and the `think`
16
+ # flag for extended thinking support.
17
+ #
18
+ # Ollama quirks (declared in capabilities):
19
+ # - tool_calls_as_text: false — Ollama returns structured tool_calls natively.
20
+ # - forced_tool_choice: false — Ollama does not support forced tool selection.
21
+ # - assistant_prefill: false — Ollama does not support assistant prefill.
22
+ class Translator
23
+ include Legion::Logging::Helper
24
+
25
+ # Ollama-specific stop_reason mapping (done_reason field).
26
+ OLLAMA_STOP_REASON_MAP = {
27
+ 'stop' => :end_turn,
28
+ 'tool_use' => :tool_use,
29
+ 'length' => :max_tokens
30
+ }.freeze
31
+ FALLBACK_STOP_REASON = :end_turn
32
+
33
+ # G18 parameter mapping: canonical params -> Ollama options keys.
34
+ PARAM_OPTIONS_KEYS = {
35
+ max_tokens: :num_predict,
36
+ temperature: :temperature,
37
+ top_p: :top_p,
38
+ top_k: :top_k,
39
+ stop_sequences: :stop,
40
+ seed: :seed,
41
+ frequency_penalty: :frequency_penalty,
42
+ presence_penalty: :presence_penalty
43
+ }.freeze
44
+
45
+ SUPPORTED_PARAMS = %i[
46
+ max_tokens temperature top_p top_k stop_sequences
47
+ seed frequency_penalty presence_penalty
48
+ ].freeze
49
+
50
+ def initialize(config: nil)
51
+ @config = config
52
+ end
53
+
54
+ # Render a canonical request into Ollama /api/chat wire payload.
55
+ def render_request(request)
56
+ model = request.metadata&.dig(:model) || 'default'
57
+ messages = format_messages(request)
58
+ payload = {
59
+ model: model,
60
+ messages: messages,
61
+ stream: request.stream
62
+ }
63
+
64
+ payload[:tools] = format_tools(request.tools) unless request.tools.to_h.empty?
65
+ apply_options(payload, request.params)
66
+ apply_thinking_config(payload, request)
67
+ apply_response_format(payload, request.params)
68
+
69
+ log.debug do
70
+ "[llm][ollama-translator] action=render_request model=#{model} stream=#{request.stream} " \
71
+ "message_count=#{messages.size} tools=#{request.tools&.size || 0}"
72
+ end
73
+
74
+ payload.compact
75
+ end
76
+
77
+ # Parse an Ollama /api/chat completion response into a Canonical::Response.
78
+ def parse_response(wire)
79
+ return canonical_error_response(wire) unless wire.is_a?(Hash)
80
+ return Canonical::Response.from_hash(wire) if canonical_response?(wire)
81
+
82
+ message = wire[:message] || wire['message'] || {}
83
+ content = message[:content] || message['content'] || ''
84
+ tool_calls_raw = message[:tool_calls] || message['tool_calls']
85
+ model = wire[:model] || wire['model']
86
+ done_reason = wire[:done_reason] || wire['done_reason']
87
+ done = wire[:done] || wire['done']
88
+
89
+ extraction = Responses::ThinkingExtractor.extract(
90
+ content,
91
+ metadata: thinking_metadata(message)
92
+ )
93
+
94
+ text = extraction.content || ''
95
+ thinking = build_canonical_thinking(extraction)
96
+ tool_calls = parse_tool_calls(tool_calls_raw)
97
+ stop_reason = map_stop_reason(done_reason, done)
98
+
99
+ usage = Canonical::Usage.from_hash({
100
+ input_tokens: wire[:prompt_eval_count] || wire['prompt_eval_count'],
101
+ output_tokens: wire[:eval_count] || wire['eval_count']
102
+ })
103
+
104
+ Canonical::Response.build(
105
+ text: text.to_s,
106
+ thinking: thinking,
107
+ tool_calls: tool_calls,
108
+ usage: usage,
109
+ stop_reason: stop_reason,
110
+ model: model,
111
+ metadata: {}
112
+ )
113
+ rescue StandardError => e
114
+ handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_response')
115
+ raise
116
+ end
117
+
118
+ # Parse a single NDJSON chunk into a Canonical::Chunk or nil.
119
+ def parse_chunk(raw)
120
+ return nil if raw.nil?
121
+
122
+ data = normalize_chunk_input(raw)
123
+ return nil if data.nil?
124
+
125
+ # Handle canonical-form chunks (from conformance fixtures)
126
+ return handle_canonical_chunk(data) if data['type'] || data[:type]
127
+
128
+ parse_ollama_chunk(data)
129
+ rescue StandardError => e
130
+ handle_exception(e, level: :error, handled: false, operation: 'ollama.translator.parse_chunk')
131
+ raise
132
+ end
133
+
134
+ # Declared capabilities for the Ollama provider.
135
+ def capabilities
136
+ {
137
+ provider: 'ollama',
138
+ streaming: true,
139
+ tool_calls: true,
140
+ thinking: true,
141
+ vision: true,
142
+ embeddings: true,
143
+ tool_calls_as_text: false,
144
+ forced_tool_choice: false,
145
+ assistant_prefill: false
146
+ }.freeze
147
+ end
148
+
149
+ private
150
+
151
+ attr_reader :config
152
+
153
+ # -- Message formatting --
154
+
155
+ def format_messages(request)
156
+ messages = format_request_messages(request.messages)
157
+
158
+ if request.system.to_s.strip.empty?
159
+ messages
160
+ else
161
+ [{ role: 'system', content: request.system.strip }] + messages
162
+ end
163
+ end
164
+
165
+ def format_request_messages(messages)
166
+ return [] if messages.nil? || messages.empty?
167
+
168
+ messages.map { |msg| format_message(msg) }
169
+ end
170
+
171
+ def format_message(msg)
172
+ role = msg.role.to_s
173
+ content = format_message_content(msg)
174
+ result = { role: role, content: content }
175
+
176
+ images = extract_images(msg.content)
177
+ result[:images] = images unless images.empty?
178
+
179
+ result[:tool_call_id] = msg.tool_call_id if msg.tool_call_id
180
+ result.compact
181
+ end
182
+
183
+ def format_message_content(msg)
184
+ content = msg.content
185
+ return content if content.is_a?(String)
186
+
187
+ case content
188
+ when Array
189
+ extract_text_from_blocks(content)
190
+ when Canonical::ContentBlock
191
+ content.text? ? content.text.to_s : content.to_s
192
+ else
193
+ content.to_s
194
+ end
195
+ end
196
+
197
+ def extract_text_from_blocks(blocks)
198
+ parts = blocks.filter_map do |block|
199
+ case block
200
+ when Canonical::ContentBlock
201
+ format_content_block_text(block)
202
+ when Hash
203
+ block_hash = block.transform_keys(&:to_sym)
204
+ block_hash[:text]&.to_s
205
+ else
206
+ block.to_s
207
+ end
208
+ end
209
+ parts.join
210
+ end
211
+
212
+ def format_content_block_text(block)
213
+ case block.type
214
+ when :text, :thinking
215
+ block.text.to_s
216
+ when :tool_use
217
+ Legion::JSON.dump({ name: block.name, arguments: block.input || {} })
218
+ when :tool_result
219
+ block.text.to_s
220
+ end
221
+ end
222
+
223
+ def extract_images(content)
224
+ return [] unless content.is_a?(Array)
225
+
226
+ content.filter_map do |block|
227
+ next unless block.is_a?(Canonical::ContentBlock) && block.type == :image
228
+
229
+ block.data
230
+ end
231
+ end
232
+
233
+ # -- Tool formatting --
234
+
235
+ def format_tools(tools)
236
+ return nil if tools.to_h.empty?
237
+
238
+ tools.to_h.values.map do |tool|
239
+ tool_hash = if tool.is_a?(Canonical::ToolDefinition)
240
+ { name: tool.name, description: tool.description, parameters: tool.parameters }
241
+ elsif tool.is_a?(Hash)
242
+ tool.transform_keys(&:to_sym)
243
+ else
244
+ tool
245
+ end
246
+
247
+ name = tool_hash[:name] || tool_hash['name']
248
+ description = (tool_hash[:description] || tool_hash['description'] || '').to_s
249
+ raw_params = tool_hash[:parameters] || tool_hash[:input_schema]
250
+ raw_params = raw_params.to_h if raw_params.respond_to?(:to_h) && !raw_params.is_a?(Hash)
251
+ parameters = Legion::Extensions::Llm::Canonical::ToolDefinition.normalize_parameters(raw_params)
252
+
253
+ {
254
+ type: 'function',
255
+ function: {
256
+ name: name.to_s,
257
+ description: description,
258
+ parameters: parameters
259
+ }
260
+ }
261
+ end
262
+ end
263
+
264
+ # -- Parameter mapping (G18) --
265
+
266
+ def apply_options(payload, params)
267
+ return unless params.is_a?(Canonical::Params)
268
+
269
+ options = {}
270
+ SUPPORTED_PARAMS.each do |param_key|
271
+ value = params.public_send(param_key)
272
+ next if value.nil?
273
+
274
+ wire_key = PARAM_OPTIONS_KEYS[param_key]
275
+ options[wire_key] = case param_key
276
+ when :stop_sequences
277
+ Array(value)
278
+ else
279
+ value
280
+ end
281
+ end
282
+
283
+ payload[:options] = options unless options.empty?
284
+
285
+ return unless params.max_thinking_tokens
286
+
287
+ log.debug do
288
+ '[llm][ollama-translator] action=drop_unsupported_param param=max_thinking_tokens ' \
289
+ "value=#{params.max_thinking_tokens} reason=ollama_not_supported"
290
+ end
291
+ end
292
+
293
+ # -- Thinking configuration --
294
+
295
+ def apply_thinking_config(payload, request)
296
+ return unless enable_thinking?(request)
297
+
298
+ payload[:think] = true
299
+ end
300
+
301
+ def enable_thinking?(request)
302
+ return true if request.thinking.is_a?(Canonical::Thinking::Config) && request.thinking.enabled?
303
+ return true if request.thinking.is_a?(Hash) && (request.thinking[:enabled] != false)
304
+
305
+ false
306
+ end
307
+
308
+ # -- Response format --
309
+
310
+ def apply_response_format(payload, params)
311
+ return unless params.is_a?(Canonical::Params) && params.response_format
312
+
313
+ format_value = params.response_format
314
+ payload[:format] = if format_value.is_a?(Hash)
315
+ schema = format_value[:schema] || format_value['schema'] ||
316
+ format_value[:json_schema] || format_value['json_schema']
317
+ schema || format_value
318
+ else
319
+ format_value
320
+ end
321
+ end
322
+
323
+ # -- Response parsing --
324
+
325
+ def canonical_response?(wire)
326
+ wire.key?(:text) || wire.key?('text') || wire.key?(:stop_reason) || wire.key?('stop_reason')
327
+ end
328
+
329
+ def canonical_error_response(wire)
330
+ body = wire.is_a?(Hash) ? wire : {}
331
+ error_info = body['error'] || body[:error] ||
332
+ { type: 'parse_error', message: 'Failed to parse response' }
333
+
334
+ Canonical::Response.build(
335
+ text: '',
336
+ tool_calls: [],
337
+ usage: Canonical::Usage.from_hash(body['usage'] || body[:usage] || {}),
338
+ stop_reason: :error,
339
+ model: body['model'] || body[:model],
340
+ metadata: { error: error_info }
341
+ )
342
+ end
343
+
344
+ def thinking_metadata(message)
345
+ thinking = message[:thinking] || message['thinking']
346
+ return {} unless thinking
347
+
348
+ { thinking: thinking }
349
+ end
350
+
351
+ def build_canonical_thinking(extraction)
352
+ return nil unless extraction.thinking || extraction.signature
353
+
354
+ Canonical::Thinking.new(
355
+ content: extraction.thinking,
356
+ signature: extraction.signature
357
+ )
358
+ end
359
+
360
+ def parse_tool_calls(tool_calls_raw)
361
+ return [] unless tool_calls_raw.is_a?(Array) && !tool_calls_raw.empty?
362
+
363
+ tool_calls_raw.filter_map do |call|
364
+ call = call.transform_keys(&:to_sym) if call.is_a?(Hash)
365
+ function = call[:function] || call['function'] || {}
366
+ function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
367
+
368
+ name = function[:name] || function['name']
369
+ id = call[:id] || call['id'] || name
370
+ args = parse_tool_arguments(function[:arguments] || function['arguments'])
371
+
372
+ Canonical::ToolCall.build(
373
+ id: id.to_s,
374
+ name: name.to_s,
375
+ arguments: args,
376
+ source: :client
377
+ )
378
+ rescue StandardError => e
379
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.translator.parse_tool_call')
380
+ nil
381
+ end
382
+ end
383
+
384
+ def parse_tool_arguments(arguments)
385
+ return {} if arguments.nil? || arguments == ''
386
+ return arguments if arguments.is_a?(Hash)
387
+
388
+ Legion::JSON.load(arguments)
389
+ rescue Legion::JSON::ParseError
390
+ {}
391
+ end
392
+
393
+ def map_stop_reason(done_reason, done = nil)
394
+ if done_reason
395
+ OLLAMA_STOP_REASON_MAP.fetch(done_reason.to_s, FALLBACK_STOP_REASON)
396
+ elsif done
397
+ FALLBACK_STOP_REASON
398
+ end
399
+ end
400
+
401
+ # -- Chunk parsing --
402
+
403
+ def normalize_chunk_input(raw)
404
+ return nil if raw.is_a?(String) && raw.strip.empty?
405
+
406
+ raw.is_a?(Hash) ? raw : parse_json_safely(raw)
407
+ end
408
+
409
+ def handle_canonical_chunk(data)
410
+ normalized = data.is_a?(Hash) && data.keys.first.is_a?(Symbol) ? data : data.transform_keys(&:to_sym)
411
+ Canonical::Chunk.from_hash(normalized)
412
+ rescue StandardError => e
413
+ log.debug { "[llm][ollama-translator] action=canonical_chunk_parse_error error=#{e.message}" }
414
+ nil
415
+ end
416
+
417
+ def parse_ollama_chunk(data)
418
+ message = data[:message] || data['message'] || {}
419
+ done = data[:done] || data['done']
420
+ done_reason = data[:done_reason] || data['done_reason']
421
+ request_id = data[:request_id] || data['request_id'] || data[:id] || data['id']
422
+
423
+ # Tool call delta
424
+ tool_calls = message[:tool_calls] || message['tool_calls']
425
+ return build_tool_call_chunk(tool_calls, request_id) unless Array(tool_calls).empty?
426
+
427
+ # Thinking delta
428
+ thinking_content = message[:thinking] || message['thinking']
429
+ unless thinking_content.to_s.empty?
430
+ return Canonical::Chunk.thinking_delta(
431
+ delta: thinking_content.to_s,
432
+ request_id: request_id
433
+ )
434
+ end
435
+
436
+ # Text delta — emit content even on done chunks (Ollama's final chunk may carry text)
437
+ content = message[:content] || message['content']
438
+ unless content.to_s.empty?
439
+ return Canonical::Chunk.text_delta(
440
+ delta: content.to_s,
441
+ request_id: request_id
442
+ )
443
+ end
444
+
445
+ # Done chunk (only when no content/thinking/tool_calls to emit)
446
+ return build_done_chunk(data, done_reason, request_id) if done
447
+
448
+ nil
449
+ end
450
+
451
+ def build_done_chunk(data, done_reason, request_id)
452
+ usage = Canonical::Usage.from_hash({
453
+ input_tokens: data[:prompt_eval_count] || data['prompt_eval_count'],
454
+ output_tokens: data[:eval_count] || data['eval_count']
455
+ })
456
+
457
+ Canonical::Chunk.done(
458
+ request_id: request_id,
459
+ usage: usage,
460
+ stop_reason: map_stop_reason(done_reason, true)
461
+ )
462
+ end
463
+
464
+ def build_tool_call_chunk(tool_calls, request_id)
465
+ first_call = tool_calls.first
466
+ first_call = first_call.transform_keys(&:to_sym) if first_call.is_a?(Hash)
467
+ function = first_call[:function] || first_call['function'] || {}
468
+ function = function.transform_keys(&:to_sym) if function.is_a?(Hash)
469
+
470
+ tc = Canonical::ToolCall.build(
471
+ id: (first_call[:id] || first_call['id'] || function[:name] || 'synthesized').to_s,
472
+ name: (function[:name] || function['name']).to_s,
473
+ arguments: parse_tool_arguments(function[:arguments] || function['arguments']),
474
+ source: :client
475
+ )
476
+
477
+ Canonical::Chunk.tool_call_delta(
478
+ tool_call: tc,
479
+ request_id: request_id
480
+ )
481
+ end
482
+
483
+ # -- JSON helpers --
484
+
485
+ def parse_json_safely(raw)
486
+ return nil unless raw.is_a?(String)
487
+
488
+ Legion::JSON.load(raw)
489
+ rescue Legion::JSON::ParseError => e
490
+ log.debug { "[llm][ollama-translator] action=json_parse_error error=#{e.message}" }
491
+ nil
492
+ end
493
+ end
494
+ end
495
+ end
496
+ end
497
+ end
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Ollama
7
- VERSION = '0.2.14'
7
+ VERSION = '0.2.17'
8
8
  end
9
9
  end
10
10
  end
@@ -2,8 +2,10 @@
2
2
 
3
3
  require 'legion/extensions/llm'
4
4
  require 'legion/extensions/llm/ollama/provider'
5
+ require 'legion/extensions/llm/ollama/translator'
5
6
  require 'legion/extensions/llm/ollama/version'
6
7
  require 'legion/logging/helper'
8
+ require_relative 'ollama/actors/discovery_refresh'
7
9
 
8
10
  module Legion
9
11
  module Extensions
@@ -30,10 +32,7 @@ module Legion
30
32
  fleet: {
31
33
  enabled: false,
32
34
  respond_to_requests: false,
33
- capabilities: %i[chat stream_chat embed],
34
- lanes: [],
35
- concurrency: 1,
36
- queue_suffix: nil
35
+ capabilities: %i[chat stream_chat embed tools]
37
36
  }
38
37
  }
39
38
  )
@@ -73,7 +72,8 @@ module Legion
73
72
  instances[:local] = {
74
73
  base_url: 'http://127.0.0.1:11434',
75
74
  tier: :local,
76
- capabilities: %i[completion embedding vision]
75
+ capabilities: {},
76
+ provider_capabilities: { streaming: true }
77
77
  }
78
78
  end
79
79
 
@@ -85,7 +85,8 @@ module Legion
85
85
  configured.each do |name, config|
86
86
  instances[name.to_sym] = normalize_instance_config(config).merge(
87
87
  tier: :direct,
88
- capabilities: %i[completion embedding vision]
88
+ capabilities: {},
89
+ provider_capabilities: { streaming: true }
89
90
  )
90
91
  end
91
92
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-ollama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.14
4
+ version: 0.2.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -71,14 +71,14 @@ dependencies:
71
71
  requirements:
72
72
  - - ">="
73
73
  - !ruby/object:Gem::Version
74
- version: 0.4.3
74
+ version: 0.5.0
75
75
  type: :runtime
76
76
  prerelease: false
77
77
  version_requirements: !ruby/object:Gem::Requirement
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: 0.4.3
81
+ version: 0.5.0
82
82
  description: Ollama provider integration for the LegionIO LLM routing framework.
83
83
  email:
84
84
  - matthewdiverson@gmail.com
@@ -101,6 +101,7 @@ files:
101
101
  - lib/legion/extensions/llm/ollama/actors/fleet_worker.rb
102
102
  - lib/legion/extensions/llm/ollama/provider.rb
103
103
  - lib/legion/extensions/llm/ollama/runners/fleet_worker.rb
104
+ - lib/legion/extensions/llm/ollama/translator.rb
104
105
  - lib/legion/extensions/llm/ollama/version.rb
105
106
  homepage: https://github.com/LegionIO/lex-llm-ollama
106
107
  licenses: