lex-llm-ollama 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd3556fdfe59e46af218aa97edcc1f81b3ffb39c8af1c144f4b3f0e23bf17bea
4
- data.tar.gz: cff453247d66070976c3cb74e1f5524e718fbded408d340dc7cb74892750b39c
3
+ metadata.gz: 90c9bd5b9b740637320d528012401d93d38e1d0b38396c3baa51ad45067547aa
4
+ data.tar.gz: 1024e115de9bea52c58b3fb20bbbf1fc561bc6299f4bdfea4a95d2c7899618c8
5
5
  SHA512:
6
- metadata.gz: a3cac452d99bfb4e0c34285b38979a59e0937a98b3dac849fa401fde6022d9e04887062b062185199294c6d4dafe1ea5f65c9fdbb9be4b5e3cfb0fcbfd9a1159
7
- data.tar.gz: ecbd0541f0ca31ee2979f1424034fd719533d0f86b6631be510ff696b775ee23b4a314bf86596e3062c223a90d3b65d4bc5a1b30c5f69833cf7596cea00295fe
6
+ metadata.gz: 7222b463dd74c7ba154d363dabaf451979562831313583ac2e6fc034ddda595c13b3c4b1ae6772906842a92aa83842bb9a37c2c99f198c5a4985c82a3f61174c
7
+ data.tar.gz: 682b890f4c30d28d0786c6d3f7d03ac1d024fbec025119373346f9bcb87cad22391328b02238007ab9550cf6dd8ef07f565cc69628e26084fc2815a4b2d8e095
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.9 - 2026-05-13
4
+
5
+ - Add `fetch_model_detail` — calls POST `/api/show` to retrieve the real context window from Ollama.
6
+ - Add `resolve_context_window` — tries live model detail cache first, falls back to static prefix map.
7
+ - Add `extract_context_window` — parses `num_ctx` from `model_info` hash or `parameters` string in the `/api/show` response.
8
+ - Add `CONTEXT_WINDOWS` static fallback map covering common Ollama model families.
9
+ - Add `rescue Faraday::ConnectionFailed` in `discover_offerings` with a concise warn log instead of an unhandled exception.
10
+ - Add `show_model_url` endpoint helper returning `/api/show`.
11
+
12
+ ## 0.2.8 - 2026-05-12
13
+
14
+ - Include `Legion::Logging::Helper` directly in Ollama provider, actor, and fleet runner runtime surfaces.
15
+ - Add sanitized debug logging for provider discovery, payload rendering, tool formatting, embeddings, offerings, and fleet handoff.
16
+
3
17
  ## 0.2.7 - 2026-05-07
4
18
 
5
19
  - Render Ollama embedding payloads with the canonical model id when callers pass `Model::Info` objects.
@@ -12,6 +12,7 @@ end
12
12
 
13
13
  require 'legion/extensions/llm/ollama'
14
14
  require 'legion/extensions/llm/fleet/provider_responder'
15
+ require 'legion/logging/helper'
15
16
 
16
17
  module Legion
17
18
  module Extensions
@@ -20,6 +21,8 @@ module Legion
20
21
  module Actor
21
22
  # Subscription actor for Ollama fleet request consumption.
22
23
  class FleetWorker < Legion::Extensions::Actors::Subscription
24
+ include Legion::Logging::Helper
25
+
23
26
  def runner_class
24
27
  'Legion::Extensions::Llm::Ollama::Runners::FleetWorker'
25
28
  end
@@ -33,7 +36,9 @@ module Legion
33
36
  end
34
37
 
35
38
  def enabled?
36
- Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Ollama.discover_instances)
39
+ enabled = Legion::Extensions::Llm::Fleet::ProviderResponder.enabled_for?(Ollama.discover_instances)
40
+ log.debug { "ollama fleet worker actor enabled=#{enabled}" }
41
+ enabled
37
42
  end
38
43
  end
39
44
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'legion/extensions/llm'
4
+ require 'legion/logging/helper'
4
5
 
5
6
  module Legion
6
7
  module Extensions
@@ -8,6 +9,8 @@ module Legion
8
9
  module Ollama
9
10
  # Ollama provider implementation for the Legion::Extensions::Llm base provider contract.
10
11
  class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
12
+ include Legion::Logging::Helper
13
+
11
14
  class << self
12
15
  attr_writer :registry_publisher
13
16
 
@@ -54,7 +57,7 @@ module Legion
54
57
  def version_url = '/api/version'
55
58
 
56
59
  def list_running_models
57
- log.info { "listing running models from #{api_base}#{running_models_url}" }
60
+ log.debug { "ollama provider listing running models endpoint=#{api_base}#{running_models_url}" }
58
61
  connection.get(running_models_url).body.fetch('models', [])
59
62
  rescue StandardError => e
60
63
  handle_exception(e, level: :error, handled: true, operation: 'ollama.list_running_models')
@@ -62,29 +65,40 @@ module Legion
62
65
  end
63
66
 
64
67
  def readiness(live: false)
65
- log.info { "checking readiness live=#{live} at #{api_base}" }
68
+ log.debug { "ollama provider checking readiness live=#{live} endpoint=#{api_base}" }
66
69
  super.tap do |metadata|
67
70
  self.class.registry_publisher.publish_readiness_async(metadata) if live
68
71
  end
69
72
  end
70
73
 
71
74
  def list_models
72
- log.info { "discovering models from #{api_base}#{models_url}" }
75
+ log.debug { "ollama provider discovering models endpoint=#{api_base}#{models_url}" }
73
76
  super.tap do |models|
74
- log.info { "discovered #{models.size} model(s) from Ollama" }
77
+ log.debug { "ollama provider discovered model_count=#{models.size}" }
75
78
  self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
76
79
  end
77
80
  end
78
81
 
79
82
  def show_model(model)
80
- log.info { "fetching model details for #{model}" }
83
+ log.debug { "ollama provider fetching model details model=#{model}" }
81
84
  connection.post(show_model_url, { model: model }).body
82
85
  rescue StandardError => e
83
86
  handle_exception(e, level: :error, handled: true, operation: 'ollama.show_model')
84
87
  raise
85
88
  end
86
89
 
90
+ def fetch_model_detail(model_name)
91
+ raw = show_model(model_name)
92
+ context_window = extract_context_window(raw)
93
+ { context_window: context_window }.compact
94
+ rescue StandardError => e
95
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.fetch_model_detail',
96
+ model: model_name)
97
+ nil
98
+ end
99
+
87
100
  def pull_model(model, stream: false)
101
+ log.debug { "ollama provider pulling model=#{model} stream=#{stream}" }
88
102
  log.info { "pulling model #{model} stream=#{stream}" }
89
103
  connection.post(pull_url, { model: model, stream: stream }).body
90
104
  rescue StandardError => e
@@ -93,19 +107,52 @@ module Legion
93
107
  end
94
108
 
95
109
  def discover_offerings(live: false, **)
96
- models = if live
97
- @cached_models = list_models
98
- else
99
- Array(@cached_models)
100
- end
101
- models.map { |model_info| offering_from_model(model_info) }
110
+ log.debug do
111
+ "ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
112
+ end
113
+ resolve_models(live).map { |model_info| offering_from_model(model_info) }.tap do |offerings|
114
+ log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
115
+ end
116
+ rescue Faraday::ConnectionFailed => e
117
+ log.warn("[ollama] instance=#{provider_instance_id} unreachable: #{e.message}")
118
+ []
102
119
  rescue StandardError => e
103
- handle_exception(e, level: :warn, handled: true, operation: 'ollama.discover_offerings')
120
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.discover_offerings',
121
+ backtrace_limit: 3)
104
122
  []
105
123
  end
106
124
 
125
+ CONTEXT_WINDOWS = {
126
+ 'qwen3' => 128_000,
127
+ 'qwen2.5' => 128_000,
128
+ 'llama3' => 128_000,
129
+ 'llama3.1' => 128_000,
130
+ 'llama3.2' => 128_000,
131
+ 'llama3.3' => 128_000,
132
+ 'gemma2' => 8_192,
133
+ 'gemma3' => 128_000,
134
+ 'mistral' => 128_000,
135
+ 'deepseek' => 128_000,
136
+ 'phi3' => 128_000,
137
+ 'phi4' => 16_384,
138
+ 'command-r' => 128_000,
139
+ 'codellama' => 16_384,
140
+ 'nomic-embed' => 8_192,
141
+ 'mxbai-embed' => 512,
142
+ 'snowflake' => 512,
143
+ 'bge' => 512
144
+ }.freeze
145
+
107
146
  private
108
147
 
148
+ def resolve_models(live)
149
+ if live
150
+ @cached_models = list_models
151
+ else
152
+ Array(@cached_models)
153
+ end
154
+ end
155
+
109
156
  def offering_from_model(model_info)
110
157
  Legion::Extensions::Llm::Routing::ModelOffering.new(
111
158
  provider_family: :ollama,
@@ -137,7 +184,46 @@ module Legion
137
184
  end
138
185
 
139
186
  def offering_limits(model_info)
140
- { context_window: model_info.context_length }.compact
187
+ ctx = model_info.context_length || resolve_context_window(model_info.id)
188
+ ctx ? { context_window: ctx } : {}
189
+ end
190
+
191
+ def resolve_context_window(model_id)
192
+ detail = model_detail(model_id)
193
+ return detail[:context_window] if detail.is_a?(Hash) && detail[:context_window]
194
+
195
+ infer_context_window(model_id)
196
+ end
197
+
198
+ def infer_context_window(model_id)
199
+ name = model_id.to_s.split(':').first
200
+ CONTEXT_WINDOWS.find { |prefix, _| name.start_with?(prefix) }&.last
201
+ end
202
+
203
+ def extract_context_window(raw)
204
+ return nil unless raw.is_a?(Hash)
205
+
206
+ from_model_info(raw) || from_parameters_string(raw)
207
+ end
208
+
209
+ def from_model_info(raw)
210
+ model_info = raw['model_info'] || raw[:model_info]
211
+ return unless model_info.is_a?(Hash)
212
+
213
+ num_ctx_from_hash(model_info)&.to_i
214
+ end
215
+
216
+ def num_ctx_from_hash(model_info)
217
+ model_info['num_ctx'] || model_info[:num_ctx] ||
218
+ model_info.find { |k, _| k.to_s.end_with?('.context_length') }&.last
219
+ end
220
+
221
+ def from_parameters_string(raw)
222
+ params = raw['parameters'] || raw[:parameters]
223
+ return unless params.is_a?(String)
224
+
225
+ match = params.match(/num_ctx\s+(\d+)/)
226
+ match[1].to_i if match
141
227
  end
142
228
 
143
229
  def offering_metadata(model_info)
@@ -153,8 +239,14 @@ module Legion
153
239
  end
154
240
 
155
241
  def render_payload(messages, tools:, temperature:, model:, stream:, schema:, thinking:, tool_prefs:) # rubocop:disable Metrics/ParameterLists
242
+ model_id = model.respond_to?(:id) ? model.id : model
243
+ log.debug do
244
+ "ollama provider rendering chat payload model=#{model_id} message_count=#{messages.size} " \
245
+ "stream=#{stream} tools=#{tools.size} schema=#{!schema.nil?} thinking=#{thinking ? true : false}"
246
+ end
247
+
156
248
  {
157
- model: model.id,
249
+ model: model_id,
158
250
  messages: format_messages(messages),
159
251
  stream: stream,
160
252
  think: thinking ? true : nil,
@@ -196,6 +288,9 @@ module Legion
196
288
  def format_tools(tools)
197
289
  return nil if tools.empty?
198
290
 
291
+ tool_names = tools.values.filter_map { |tool| tool.respond_to?(:name) ? tool.name : nil }
292
+ log.debug { "ollama provider formatting tools count=#{tools.size} names=#{tool_names.join(',')}" }
293
+
199
294
  tools.values.map do |tool|
200
295
  {
201
296
  type: 'function',
@@ -243,6 +338,8 @@ module Legion
243
338
  def parse_tool_calls(tool_calls)
244
339
  return nil unless tool_calls
245
340
 
341
+ log.debug { "ollama provider parsing tool_call_count=#{tool_calls.size}" }
342
+
246
343
  tool_calls.to_h do |call|
247
344
  function = call.fetch('function', {})
248
345
  [
@@ -289,7 +386,11 @@ module Legion
289
386
  end
290
387
 
291
388
  def render_embedding_payload(text, model:, dimensions:)
292
- { model: model.respond_to?(:id) ? model.id : model, input: text, dimensions: dimensions }.compact
389
+ model_id = model.respond_to?(:id) ? model.id : model
390
+ input_count = text.respond_to?(:size) ? text.size : 1
391
+ log.debug { "ollama provider rendering embedding payload model=#{model_id} input_count=#{input_count}" }
392
+
393
+ { model: model_id, input: text, dimensions: dimensions }.compact
293
394
  end
294
395
 
295
396
  def parse_embedding_response(response, model:, text:)
@@ -302,6 +403,9 @@ module Legion
302
403
  body['embeddings']&.first
303
404
  end
304
405
 
406
+ vector_count = vectors.respond_to?(:size) ? vectors.size : 0
407
+ log.debug { "ollama provider parsed embedding response model=#{model} vector_count=#{vector_count}" }
408
+
305
409
  Legion::Extensions::Llm::Embedding.new(vectors: vectors, model: model,
306
410
  input_tokens: body['prompt_eval_count'].to_i)
307
411
  end
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'legion/extensions/llm/fleet/provider_responder'
4
4
  require 'legion/extensions/llm/ollama'
5
+ require 'legion/logging/helper'
5
6
 
6
7
  module Legion
7
8
  module Extensions
@@ -10,9 +11,17 @@ module Legion
10
11
  module Runners
11
12
  # Runner entrypoint for Ollama fleet request execution.
12
13
  module FleetWorker
14
+ extend Legion::Logging::Helper
15
+
13
16
  module_function
14
17
 
15
18
  def handle_fleet_request(payload, delivery: nil, properties: nil)
19
+ payload_keys = payload.respond_to?(:keys) ? payload.keys.join(',') : payload.class
20
+ log.debug do
21
+ "ollama fleet worker handing off request payload_keys=#{payload_keys} " \
22
+ "delivery=#{!delivery.nil?} properties=#{!properties.nil?}"
23
+ end
24
+
16
25
  Legion::Extensions::Llm::Fleet::ProviderResponder.call(
17
26
  payload: payload,
18
27
  provider_family: Ollama::PROVIDER_FAMILY,
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Ollama
7
- VERSION = '0.2.7'
7
+ VERSION = '0.2.9'
8
8
  end
9
9
  end
10
10
  end
@@ -3,6 +3,7 @@
3
3
  require 'legion/extensions/llm'
4
4
  require 'legion/extensions/llm/ollama/provider'
5
5
  require 'legion/extensions/llm/ollama/version'
6
+ require 'legion/logging/helper'
6
7
 
7
8
  module Legion
8
9
  module Extensions
@@ -49,24 +50,10 @@ module Legion
49
50
  def self.discover_instances
50
51
  instances = {}
51
52
 
52
- if CredentialSources.socket_open?('127.0.0.1', 11_434, timeout: 0.1)
53
- instances[:local] = {
54
- base_url: 'http://127.0.0.1:11434',
55
- tier: :local,
56
- capabilities: %i[completion embedding vision]
57
- }
58
- end
59
-
60
- configured = CredentialSources.setting(:extensions, :llm, :ollama, :instances)
61
- if configured.is_a?(Hash)
62
- configured.each do |name, config|
63
- instances[name.to_sym] = normalize_instance_config(config).merge(
64
- tier: :direct,
65
- capabilities: %i[completion embedding vision]
66
- )
67
- end
68
- end
53
+ discover_local_instance(instances)
54
+ discover_configured_instances(instances)
69
55
 
56
+ log.debug { "ollama discovery returning instance_count=#{instances.size}" }
70
57
  instances
71
58
  end
72
59
 
@@ -77,6 +64,33 @@ module Legion
77
64
  normalized[:base_url] ||= normalized.delete(:endpoint)
78
65
  normalized.compact
79
66
  end
67
+
68
+ def self.discover_local_instance(instances)
69
+ log.debug { 'ollama discovery probing local socket host=127.0.0.1 port=11434' }
70
+ return unless CredentialSources.socket_open?('127.0.0.1', 11_434, timeout: 0.1)
71
+
72
+ log.debug { 'ollama discovery found local socket instance' }
73
+ instances[:local] = {
74
+ base_url: 'http://127.0.0.1:11434',
75
+ tier: :local,
76
+ capabilities: %i[completion embedding vision]
77
+ }
78
+ end
79
+
80
+ def self.discover_configured_instances(instances)
81
+ configured = CredentialSources.setting(:extensions, :llm, :ollama, :instances)
82
+ return unless configured.is_a?(Hash)
83
+
84
+ log.debug { "ollama discovery loading configured instance_count=#{configured.size}" }
85
+ configured.each do |name, config|
86
+ instances[name.to_sym] = normalize_instance_config(config).merge(
87
+ tier: :direct,
88
+ capabilities: %i[completion embedding vision]
89
+ )
90
+ end
91
+ end
92
+
93
+ private_class_method :discover_local_instance, :discover_configured_instances
80
94
  end
81
95
  end
82
96
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-ollama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO