lex-llm-ollama 0.2.17 → 0.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7850eb1a4f0fcf50d9d0a86de7b9c2e60fa47154e1c6b330a492abeb00c25803
4
- data.tar.gz: 24e040db015065dd7e508a995aa0f2b072910da41552029a1ff561993961331a
3
+ metadata.gz: 9105e1f109fd5c83078224391ceb669ea321943a2075d85d5a77cf48b73d16e7
4
+ data.tar.gz: 1bd18adb284f8b8fa5c12e0049f310973989efb5a20070dcbb172754f64f941b
5
5
  SHA512:
6
- metadata.gz: 762912cf8067d8b1c9019ea2d1d10261e234abac127ad1eeeecb5d2b7e41219c09f6294f68f022f0ad33b1f2eb95332db8b1ed3521eeef84aecfbeb11c3f186e
7
- data.tar.gz: 9f99c4bc9f342d1061077d9dd8f663b35a0a9c962515cddbcf76d19a55fa734040ab8f4b7f7fd6767395d3dd9582913c6b2dbd3ac3010d10c787e932f218fd6a
6
+ metadata.gz: 175570e4fdf0574998741b731718675d643f03c1163183bd41a28a7c90151c71919150060c9d87ff27840f0bcf5203b90dba85e81680f1ec74d3f494b22c85f0
7
+ data.tar.gz: 9ebb03d6cdf2078303f8ac674cfe939a73770a0ce7b721e86c6daaeb63169c0039dfca9e7063138e83896322820939b203f68265da9a2125066ee4e05ecefcd4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.2.22] - 2026-06-20
4
+
5
+ ### Fixed
6
+ - Stub shared registry publishing through `RegistryPublisher#schedule` in specs so async availability-event coverage stays stable after the shared publisher moved off raw `Thread.new`.
7
+
8
+ ## [0.2.21] - 2026-06-20
9
+
10
+ ### Fixed
11
+ - Stop bulk-publishing Ollama model availability from `list_models`; discovery now emits one registry event per seen model from the shared `lex-llm` policy-filter path so blocked models stay observable without duplicate publishes.
12
+
13
+ ## [0.2.20] - 2026-06-20
14
+
15
+ ### Changed
16
+ - Slow the live discovery refresh cadence from 60 seconds to 300 seconds for Ollama instances; `extensions.llm.ollama.discovery_interval` still overrides the default.
17
+
18
+ ## [0.2.19] - 2026-06-20
19
+
20
+ ### Fixed
21
+ - Route Ollama capability overrides through the shared `lex-llm` provider contract and preserve the canonical singular `:embedding` capability on embedding offerings.
22
+
23
+ ## [0.2.18] - 2026-06-19
24
+
25
+ ### Changed
26
+ - Adopt `Legion::Extensions::Llm::Inventory::ScopedRefresher` mixin (lex-llm 0.6.0). Discovery
27
+ refresh actors now write directly to the live `Inventory` catalog via `Inventory.write_lane`.
28
+ - Pin `lex-llm >= 0.6.0` and `legion-llm >= 0.14.0` in gemspec.
29
+ - Standard `weight: 100` default added to provider instance settings schema.
30
+
3
31
  ## 0.2.17 - 2026-06-16
4
32
 
5
33
  - dependency updates, code quality improvements
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_dependency 'legion-logging', '>= 1.3.2'
28
28
  spec.add_dependency 'legion-settings', '>= 1.3.14'
29
29
  spec.add_dependency 'legion-transport', '>= 1.4.14'
30
- spec.add_dependency 'lex-llm', '>= 0.5.0'
30
+ spec.add_dependency 'lex-llm', '>= 0.6.0'
31
31
  end
@@ -1,11 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'digest'
4
+
3
5
  begin
4
6
  require 'legion/extensions/actors/every'
5
7
  rescue LoadError => e
6
8
  warn(e.message) if $VERBOSE
7
9
  end
8
10
 
11
+ begin
12
+ require 'legion/extensions/llm/inventory/scoped_refresher'
13
+ rescue LoadError => e
14
+ warn(e.message) if $VERBOSE
15
+ end
16
+
9
17
  return unless defined?(Legion::Extensions::Actors::Every)
10
18
 
11
19
  module Legion
@@ -16,7 +24,11 @@ module Legion
16
24
  class DiscoveryRefresh < Legion::Extensions::Actors::Every
17
25
  include Legion::Logging::Helper
18
26
 
19
- REFRESH_INTERVAL = 1800
27
+ if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
28
+ include Legion::Extensions::Llm::Inventory::ScopedRefresher
29
+ end
30
+
31
+ def self.every_seconds = 300
20
32
 
21
33
  def runner_class = self.class
22
34
  def runner_function = 'manual'
@@ -26,25 +38,143 @@ module Legion
26
38
  def generate_task? = false
27
39
 
28
40
  def time
29
- return REFRESH_INTERVAL unless defined?(Legion::Settings)
41
+ return self.class.every_seconds unless defined?(Legion::Settings)
30
42
 
31
- Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || REFRESH_INTERVAL
43
+ Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || self.class.every_seconds
32
44
  end
33
45
 
34
- def manual
35
- log.debug('[ollama][discovery_refresh] refreshing model list')
36
- return unless defined?(Legion::LLM::Discovery)
46
+ def scope_key(**)
47
+ { provider: :ollama }
48
+ end
37
49
 
38
- Legion::LLM::Discovery.refresh_discovered_models!(provider: :ollama)
39
- if defined?(Legion::LLM::Router) && Legion::LLM::Router.respond_to?(:populate_auto_rules)
40
- Legion::LLM::Router.populate_auto_rules(Legion::LLM::Discovery.discovered_instances)
41
- end
42
- if defined?(Legion::LLM::Inventory) && Legion::LLM::Inventory.respond_to?(:invalidate_offerings_cache!)
43
- Legion::LLM::Inventory.invalidate_offerings_cache!
50
+ def compute_lanes_for_scope(**)
51
+ return [] unless defined?(Legion::LLM::Call::Registry)
52
+
53
+ lanes = []
54
+ ollama_instances.each do |instance|
55
+ collect_lanes_for_instance(instance, lanes)
56
+ rescue StandardError => e
57
+ handle_exception(e, level: :warn, handled: true,
58
+ operation: 'ollama.discovery_refresh.compute_lanes',
59
+ instance: instance[:instance])
44
60
  end
61
+ lanes
62
+ rescue StandardError => e
63
+ handle_exception(e, level: :warn, handled: true,
64
+ operation: 'ollama.discovery_refresh.compute_lanes_for_scope')
65
+ []
66
+ end
67
+
68
+ def credential_hash(**)
69
+ settings = ollama_settings
70
+ Digest::SHA256.hexdigest(settings[:api_key].to_s + settings[:instances].to_s)[0, 16]
71
+ rescue StandardError => e
72
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.discovery_refresh.credential_hash')
73
+ 'unknown'
74
+ end
75
+
76
+ def manual(**)
77
+ tick if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher) &&
78
+ respond_to?(:tick, true)
45
79
  rescue StandardError => e
46
80
  handle_exception(e, level: :warn, handled: true, operation: 'ollama.actor.discovery_refresh')
47
81
  end
82
+
83
+ private
84
+
85
+ def ollama_instances
86
+ Legion::LLM::Call::Registry.all_instances.select do |e|
87
+ (e[:provider] || '').to_sym == :ollama
88
+ end
89
+ end
90
+
91
+ def collect_lanes_for_instance(instance, lanes)
92
+ adapter = instance[:adapter]
93
+ return unless adapter.respond_to?(:discover_offerings)
94
+
95
+ Array(adapter.discover_offerings(live: true)).each do |raw_offering|
96
+ offering = offering_to_hash(raw_offering)
97
+ next unless offering
98
+
99
+ model = offering[:model] || offering['model']
100
+ next unless model
101
+
102
+ lane = build_lane(offering, instance)
103
+ lanes << lane
104
+ lanes << fleet_lane(lane, instance) if emit_fleet_lane?(lane)
105
+ end
106
+ end
107
+
108
+ def offering_to_hash(offering)
109
+ return nil if offering.nil?
110
+ return offering if offering.is_a?(Hash)
111
+
112
+ hash = offering.to_h
113
+ hash[:type] ||= hash[:usage_type]
114
+ hash[:enabled] = offering.respond_to?(:enabled?) ? offering.enabled? : true
115
+ hash
116
+ end
117
+
118
+ def build_lane(offering, instance)
119
+ instance_id = instance[:instance] || instance[:instance_id] || instance[:id]
120
+ raw_tier = offering[:tier] || :local
121
+ offer_type = offering[:type]
122
+ type = %i[embed embedding].include?(offer_type) ? :embedding : :inference
123
+ capabilities = normalize_capabilities(offering[:capabilities] || [])
124
+ model = offering[:model] || offering['model']
125
+
126
+ lane_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
127
+ tier: raw_tier, provider_family: :ollama,
128
+ instance_id: instance_id, type: type, model: model
129
+ )
130
+
131
+ {
132
+ id: lane_id,
133
+ tier: raw_tier,
134
+ provider_family: :ollama,
135
+ instance_id: instance_id,
136
+ model: model,
137
+ canonical_model_alias: offering[:canonical_model_alias] || offering[:name],
138
+ type: type,
139
+ capabilities: capabilities,
140
+ limits: offering[:limits] || {},
141
+ enabled: offering.fetch(:enabled, true),
142
+ cost: offering[:cost] || {}
143
+ }
144
+ end
145
+
146
+ def emit_fleet_lane?(lane)
147
+ return false unless lane[:type] == :inference
148
+
149
+ ollama_settings&.dig(:fleet, :dispatch, :enabled)
150
+ end
151
+
152
+ def fleet_lane(lane, instance)
153
+ fleet_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
154
+ tier: :fleet, provider_family: :ollama,
155
+ instance_id: instance[:instance] || instance[:instance_id],
156
+ type: lane[:type], model: lane[:model]
157
+ )
158
+ lane.merge(id: fleet_id, tier: :fleet)
159
+ end
160
+
161
+ def normalize_capabilities(caps)
162
+ # Inventory::Capabilities lives in lex-llm; the previous fallback (`return []
163
+ # unless defined?(...)`) silently swallowed every capability the operator
164
+ # declared via enable_thinking/enable_tools when the constant wasn't loaded.
165
+ # Always normalize through the shared vocabulary so aliases collapse.
166
+ if defined?(Legion::Extensions::Llm::Inventory::Capabilities)
167
+ Legion::Extensions::Llm::Inventory::Capabilities.normalize(caps)
168
+ else
169
+ Array(caps).compact.map(&:to_sym).uniq
170
+ end
171
+ end
172
+
173
+ def ollama_settings
174
+ Legion::Settings.dig(:extensions, :llm, :ollama)
175
+ rescue StandardError
176
+ {}
177
+ end
48
178
  end
49
179
  end
50
180
  end
@@ -77,14 +77,27 @@ module Legion
77
77
  end
78
78
  end
79
79
 
80
- def list_models
80
+ def list_models(live: false, **filters)
81
81
  log.debug { "ollama provider discovering models endpoint=#{api_base}#{models_url}" }
82
82
  super.tap do |models|
83
83
  log.debug { "ollama provider discovered model_count=#{models.size}" }
84
- self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
85
84
  end
86
85
  end
87
86
 
87
+ def discover_offerings(live: false, raise_on_unreachable: false, **filters)
88
+ return filter_cached_offerings(Array(@cached_offerings), filters) unless live
89
+
90
+ provider_health = health(live:)
91
+ @cached_offerings = discover_live_offerings(filters, provider_health, live:)
92
+ log_discover_complete(@cached_offerings)
93
+ @cached_offerings
94
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
95
+ log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
96
+ raise if raise_on_unreachable
97
+
98
+ []
99
+ end
100
+
88
101
  def show_model(model)
89
102
  log.debug { "ollama provider fetching model details model=#{model}" }
90
103
  connection.post(show_model_url, { model: model }).body
@@ -112,25 +125,42 @@ module Legion
112
125
  raise
113
126
  end
114
127
 
115
- def discover_offerings(live: false, **)
116
- log.debug do
117
- "ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
118
- end
119
- running_ids = live ? running_model_ids : []
120
- offerings = resolve_models(live).filter_map do |model_info|
121
- next unless model_allowed?(model_info.id)
128
+ private
122
129
 
123
- offering_from_model(model_info, loaded: running_ids.include?(model_info.id.to_s))
130
+ def discovery_registry_readiness(provider_health, live:)
131
+ {
132
+ provider: slug.to_sym,
133
+ configured: configured?,
134
+ ready: provider_health[:ready] == true,
135
+ live: live,
136
+ health: provider_health
137
+ }
138
+ end
139
+
140
+ def discover_live_offerings(filters, provider_health, live:)
141
+ readiness = discovery_registry_readiness(provider_health, live:)
142
+ Array(list_models(live:, **filters)).filter_map do |model|
143
+ self.class.registry_publisher.publish_models_async([model], readiness:)
144
+ next unless model_matches_filters?(model, filters)
145
+ next unless model_allowed?(model.id)
146
+
147
+ log_model_discovered(model)
148
+ offering_from_model(model, health: provider_health)
124
149
  end
125
- log.debug { "ollama provider built offering_count=#{offerings.size} live=#{live}" }
126
- offerings
127
- rescue Faraday::ConnectionFailed => e
128
- log.warn("[ollama] instance=#{provider_instance_id} unreachable: #{e.message}")
129
- []
130
- rescue StandardError => e
131
- handle_exception(e, level: :warn, handled: true, operation: 'ollama.discover_offerings',
132
- backtrace_limit: 3)
133
- []
150
+ end
151
+
152
+ def log_model_discovered(model)
153
+ log.debug(
154
+ "[#{slug}] instance=#{provider_instance_id} action=model_discovered " \
155
+ "model=#{model.id} family=#{model.family}"
156
+ )
157
+ end
158
+
159
+ def log_discover_complete(offerings)
160
+ log.info(
161
+ "[#{slug}] instance=#{provider_instance_id} action=discover_complete " \
162
+ "model_count=#{Array(offerings).size}"
163
+ )
134
164
  end
135
165
 
136
166
  CONTEXT_WINDOWS = {
@@ -154,8 +184,6 @@ module Legion
154
184
  'bge' => 512
155
185
  }.freeze
156
186
 
157
- private
158
-
159
187
  def resolve_models(live)
160
188
  if live
161
189
  @cached_models = list_models
@@ -170,8 +198,20 @@ module Legion
170
198
  end.map(&:to_s)
171
199
  end
172
200
 
173
- def offering_from_model(model_info, loaded: false)
201
+ def offering_from_model(model_info, health: {})
202
+ loaded = begin
203
+ running_model_ids.include?(model_info.id.to_s)
204
+ rescue StandardError
205
+ health.is_a?(Hash) ? health.fetch(:loaded, false) : false
206
+ end
174
207
  policy = resolve_capability_policy(model_info)
208
+ embedding_model = model_info.embedding?
209
+ capabilities = embedding_model ? [:embedding] : policy[:capabilities]
210
+ capability_sources = if embedding_model
211
+ policy[:sources].merge(embedding: { value: true, source: :model_metadata })
212
+ else
213
+ policy[:sources]
214
+ end
175
215
  Legion::Extensions::Llm::Routing::ModelOffering.new(
176
216
  provider_family: :ollama,
177
217
  instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
@@ -179,8 +219,8 @@ module Legion
179
219
  tier: offering_tier,
180
220
  model: model_info.id,
181
221
  usage_type: offering_usage_type(model_info),
182
- capabilities: policy[:capabilities],
183
- capability_sources: policy[:sources],
222
+ capabilities: capabilities,
223
+ capability_sources: capability_sources,
184
224
  limits: offering_limits(model_info),
185
225
  metadata: offering_metadata(model_info).merge(loaded: loaded)
186
226
  )
@@ -193,9 +233,9 @@ module Legion
193
233
  provider_catalog: {},
194
234
  probe: {},
195
235
  provider_envelope: { streaming: true },
196
- provider_config: provider_level_config,
197
- instance_config: instance_level_config,
198
- model_config: model_level_config(model_id)
236
+ provider_config: provider_capability_config,
237
+ instance_config: instance_capability_config,
238
+ model_config: model_capability_config(model_id)
199
239
  )
200
240
  end
201
241
 
@@ -206,35 +246,6 @@ module Legion
206
246
  end
207
247
  end
208
248
 
209
- def provider_level_config
210
- raw = CredentialSources.setting(:extensions, :llm, :ollama)
211
- return {} unless raw.is_a?(Hash)
212
-
213
- raw.reject { |k, _| k.to_sym == :instances }
214
- end
215
-
216
- def instance_level_config
217
- extract_config_hash
218
- end
219
-
220
- def model_level_config(model_id)
221
- data = extract_config_hash
222
- models = data[:models]
223
- return {} unless models.is_a?(Hash)
224
-
225
- models[model_id.to_sym] || models[model_id.to_s] || models[model_id] || {}
226
- end
227
-
228
- def extract_config_hash
229
- return config.to_h if config.respond_to?(:to_h) && !config.is_a?(Legion::Extensions::Llm::HashConfig)
230
-
231
- if config.is_a?(Legion::Extensions::Llm::HashConfig)
232
- config.instance_variable_get(:@data) || {}
233
- else
234
- {}
235
- end
236
- end
237
-
238
249
  def offering_usage_type(model_info)
239
250
  model_info.embedding? ? :embedding : :inference
240
251
  end
@@ -352,7 +363,7 @@ module Legion
352
363
  chunks << built
353
364
  block&.call(built)
354
365
  rescue Legion::JSON::ParseError => e
355
- handle_exception(e, level: :debug, handled: true, operation: 'ollama.stream_parse')
366
+ handle_exception(e, level: :warn, handled: true, operation: 'ollama.stream_parse')
356
367
  end
357
368
 
358
369
  def finalize_stream(chunks)
@@ -4,7 +4,7 @@ module Legion
4
4
  module Extensions
5
5
  module Llm
6
6
  module Ollama
7
- VERSION = '0.2.17'
7
+ VERSION = '0.2.22'
8
8
  end
9
9
  end
10
10
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lex-llm-ollama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.17
4
+ version: 0.2.22
5
5
  platform: ruby
6
6
  authors:
7
7
  - LegionIO
@@ -71,14 +71,14 @@ dependencies:
71
71
  requirements:
72
72
  - - ">="
73
73
  - !ruby/object:Gem::Version
74
- version: 0.5.0
74
+ version: 0.6.0
75
75
  type: :runtime
76
76
  prerelease: false
77
77
  version_requirements: !ruby/object:Gem::Requirement
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: 0.5.0
81
+ version: 0.6.0
82
82
  description: Ollama provider integration for the LegionIO LLM routing framework.
83
83
  email:
84
84
  - matthewdiverson@gmail.com