lex-llm-ollama 0.2.17 → 0.2.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9105e1f109fd5c83078224391ceb669ea321943a2075d85d5a77cf48b73d16e7
|
|
4
|
+
data.tar.gz: 1bd18adb284f8b8fa5c12e0049f310973989efb5a20070dcbb172754f64f941b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 175570e4fdf0574998741b731718675d643f03c1163183bd41a28a7c90151c71919150060c9d87ff27840f0bcf5203b90dba85e81680f1ec74d3f494b22c85f0
|
|
7
|
+
data.tar.gz: 9ebb03d6cdf2078303f8ac674cfe939a73770a0ce7b721e86c6daaeb63169c0039dfca9e7063138e83896322820939b203f68265da9a2125066ee4e05ecefcd4
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.2.22] - 2026-06-20
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Stub shared registry publishing through `RegistryPublisher#schedule` in specs so async availability-event coverage stays stable after the shared publisher moved off raw `Thread.new`.
|
|
7
|
+
|
|
8
|
+
## [0.2.21] - 2026-06-20
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Stop bulk-publishing Ollama model availability from `list_models`; discovery now emits one registry event per seen model from the shared `lex-llm` policy-filter path so blocked models stay observable without duplicate publishes.
|
|
12
|
+
|
|
13
|
+
## [0.2.20] - 2026-06-20
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- Slow the live discovery refresh cadence from 60 seconds to 300 seconds for Ollama instances; `extensions.llm.ollama.discovery_interval` still overrides the default.
|
|
17
|
+
|
|
18
|
+
## [0.2.19] - 2026-06-20
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
- Route Ollama capability overrides through the shared `lex-llm` provider contract and preserve the canonical singular `:embedding` capability on embedding offerings.
|
|
22
|
+
|
|
23
|
+
## [0.2.18] - 2026-06-19
|
|
24
|
+
|
|
25
|
+
### Changed
|
|
26
|
+
- Adopt `Legion::Extensions::Llm::Inventory::ScopedRefresher` mixin (lex-llm 0.6.0). Discovery
|
|
27
|
+
refresh actors now write directly to the live `Inventory` catalog via `Inventory.write_lane`.
|
|
28
|
+
- Pin `lex-llm >= 0.6.0` and `legion-llm >= 0.14.0` in gemspec.
|
|
29
|
+
- Standard `weight: 100` default added to provider instance settings schema.
|
|
30
|
+
|
|
3
31
|
## 0.2.17 - 2026-06-16
|
|
4
32
|
|
|
5
33
|
- dependency updates, code quality improvements
|
data/lex-llm-ollama.gemspec
CHANGED
|
@@ -27,5 +27,5 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
28
28
|
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
29
29
|
spec.add_dependency 'legion-transport', '>= 1.4.14'
|
|
30
|
-
spec.add_dependency 'lex-llm', '>= 0.
|
|
30
|
+
spec.add_dependency 'lex-llm', '>= 0.6.0'
|
|
31
31
|
end
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'digest'
|
|
4
|
+
|
|
3
5
|
begin
|
|
4
6
|
require 'legion/extensions/actors/every'
|
|
5
7
|
rescue LoadError => e
|
|
6
8
|
warn(e.message) if $VERBOSE
|
|
7
9
|
end
|
|
8
10
|
|
|
11
|
+
begin
|
|
12
|
+
require 'legion/extensions/llm/inventory/scoped_refresher'
|
|
13
|
+
rescue LoadError => e
|
|
14
|
+
warn(e.message) if $VERBOSE
|
|
15
|
+
end
|
|
16
|
+
|
|
9
17
|
return unless defined?(Legion::Extensions::Actors::Every)
|
|
10
18
|
|
|
11
19
|
module Legion
|
|
@@ -16,7 +24,11 @@ module Legion
|
|
|
16
24
|
class DiscoveryRefresh < Legion::Extensions::Actors::Every
|
|
17
25
|
include Legion::Logging::Helper
|
|
18
26
|
|
|
19
|
-
|
|
27
|
+
if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher)
|
|
28
|
+
include Legion::Extensions::Llm::Inventory::ScopedRefresher
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.every_seconds = 300
|
|
20
32
|
|
|
21
33
|
def runner_class = self.class
|
|
22
34
|
def runner_function = 'manual'
|
|
@@ -26,25 +38,143 @@ module Legion
|
|
|
26
38
|
def generate_task? = false
|
|
27
39
|
|
|
28
40
|
def time
|
|
29
|
-
return
|
|
41
|
+
return self.class.every_seconds unless defined?(Legion::Settings)
|
|
30
42
|
|
|
31
|
-
Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) ||
|
|
43
|
+
Legion::Settings.dig(:extensions, :llm, :ollama, :discovery_interval) || self.class.every_seconds
|
|
32
44
|
end
|
|
33
45
|
|
|
34
|
-
def
|
|
35
|
-
|
|
36
|
-
|
|
46
|
+
def scope_key(**)
|
|
47
|
+
{ provider: :ollama }
|
|
48
|
+
end
|
|
37
49
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
def compute_lanes_for_scope(**)
|
|
51
|
+
return [] unless defined?(Legion::LLM::Call::Registry)
|
|
52
|
+
|
|
53
|
+
lanes = []
|
|
54
|
+
ollama_instances.each do |instance|
|
|
55
|
+
collect_lanes_for_instance(instance, lanes)
|
|
56
|
+
rescue StandardError => e
|
|
57
|
+
handle_exception(e, level: :warn, handled: true,
|
|
58
|
+
operation: 'ollama.discovery_refresh.compute_lanes',
|
|
59
|
+
instance: instance[:instance])
|
|
44
60
|
end
|
|
61
|
+
lanes
|
|
62
|
+
rescue StandardError => e
|
|
63
|
+
handle_exception(e, level: :warn, handled: true,
|
|
64
|
+
operation: 'ollama.discovery_refresh.compute_lanes_for_scope')
|
|
65
|
+
[]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def credential_hash(**)
|
|
69
|
+
settings = ollama_settings
|
|
70
|
+
Digest::SHA256.hexdigest(settings[:api_key].to_s + settings[:instances].to_s)[0, 16]
|
|
71
|
+
rescue StandardError => e
|
|
72
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.discovery_refresh.credential_hash')
|
|
73
|
+
'unknown'
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def manual(**)
|
|
77
|
+
tick if defined?(Legion::Extensions::Llm::Inventory::ScopedRefresher) &&
|
|
78
|
+
respond_to?(:tick, true)
|
|
45
79
|
rescue StandardError => e
|
|
46
80
|
handle_exception(e, level: :warn, handled: true, operation: 'ollama.actor.discovery_refresh')
|
|
47
81
|
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def ollama_instances
|
|
86
|
+
Legion::LLM::Call::Registry.all_instances.select do |e|
|
|
87
|
+
(e[:provider] || '').to_sym == :ollama
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def collect_lanes_for_instance(instance, lanes)
|
|
92
|
+
adapter = instance[:adapter]
|
|
93
|
+
return unless adapter.respond_to?(:discover_offerings)
|
|
94
|
+
|
|
95
|
+
Array(adapter.discover_offerings(live: true)).each do |raw_offering|
|
|
96
|
+
offering = offering_to_hash(raw_offering)
|
|
97
|
+
next unless offering
|
|
98
|
+
|
|
99
|
+
model = offering[:model] || offering['model']
|
|
100
|
+
next unless model
|
|
101
|
+
|
|
102
|
+
lane = build_lane(offering, instance)
|
|
103
|
+
lanes << lane
|
|
104
|
+
lanes << fleet_lane(lane, instance) if emit_fleet_lane?(lane)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def offering_to_hash(offering)
|
|
109
|
+
return nil if offering.nil?
|
|
110
|
+
return offering if offering.is_a?(Hash)
|
|
111
|
+
|
|
112
|
+
hash = offering.to_h
|
|
113
|
+
hash[:type] ||= hash[:usage_type]
|
|
114
|
+
hash[:enabled] = offering.respond_to?(:enabled?) ? offering.enabled? : true
|
|
115
|
+
hash
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def build_lane(offering, instance)
|
|
119
|
+
instance_id = instance[:instance] || instance[:instance_id] || instance[:id]
|
|
120
|
+
raw_tier = offering[:tier] || :local
|
|
121
|
+
offer_type = offering[:type]
|
|
122
|
+
type = %i[embed embedding].include?(offer_type) ? :embedding : :inference
|
|
123
|
+
capabilities = normalize_capabilities(offering[:capabilities] || [])
|
|
124
|
+
model = offering[:model] || offering['model']
|
|
125
|
+
|
|
126
|
+
lane_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
|
|
127
|
+
tier: raw_tier, provider_family: :ollama,
|
|
128
|
+
instance_id: instance_id, type: type, model: model
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
{
|
|
132
|
+
id: lane_id,
|
|
133
|
+
tier: raw_tier,
|
|
134
|
+
provider_family: :ollama,
|
|
135
|
+
instance_id: instance_id,
|
|
136
|
+
model: model,
|
|
137
|
+
canonical_model_alias: offering[:canonical_model_alias] || offering[:name],
|
|
138
|
+
type: type,
|
|
139
|
+
capabilities: capabilities,
|
|
140
|
+
limits: offering[:limits] || {},
|
|
141
|
+
enabled: offering.fetch(:enabled, true),
|
|
142
|
+
cost: offering[:cost] || {}
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def emit_fleet_lane?(lane)
|
|
147
|
+
return false unless lane[:type] == :inference
|
|
148
|
+
|
|
149
|
+
ollama_settings&.dig(:fleet, :dispatch, :enabled)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def fleet_lane(lane, instance)
|
|
153
|
+
fleet_id = Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
|
|
154
|
+
tier: :fleet, provider_family: :ollama,
|
|
155
|
+
instance_id: instance[:instance] || instance[:instance_id],
|
|
156
|
+
type: lane[:type], model: lane[:model]
|
|
157
|
+
)
|
|
158
|
+
lane.merge(id: fleet_id, tier: :fleet)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def normalize_capabilities(caps)
|
|
162
|
+
# Inventory::Capabilities lives in lex-llm; the previous fallback (`return []
|
|
163
|
+
# unless defined?(...)`) silently swallowed every capability the operator
|
|
164
|
+
# declared via enable_thinking/enable_tools when the constant wasn't loaded.
|
|
165
|
+
# Always normalize through the shared vocabulary so aliases collapse.
|
|
166
|
+
if defined?(Legion::Extensions::Llm::Inventory::Capabilities)
|
|
167
|
+
Legion::Extensions::Llm::Inventory::Capabilities.normalize(caps)
|
|
168
|
+
else
|
|
169
|
+
Array(caps).compact.map(&:to_sym).uniq
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def ollama_settings
|
|
174
|
+
Legion::Settings.dig(:extensions, :llm, :ollama)
|
|
175
|
+
rescue StandardError
|
|
176
|
+
{}
|
|
177
|
+
end
|
|
48
178
|
end
|
|
49
179
|
end
|
|
50
180
|
end
|
|
@@ -77,14 +77,27 @@ module Legion
|
|
|
77
77
|
end
|
|
78
78
|
end
|
|
79
79
|
|
|
80
|
-
def list_models
|
|
80
|
+
def list_models(live: false, **filters)
|
|
81
81
|
log.debug { "ollama provider discovering models endpoint=#{api_base}#{models_url}" }
|
|
82
82
|
super.tap do |models|
|
|
83
83
|
log.debug { "ollama provider discovered model_count=#{models.size}" }
|
|
84
|
-
self.class.registry_publisher.publish_models_async(models, readiness: readiness(live: false))
|
|
85
84
|
end
|
|
86
85
|
end
|
|
87
86
|
|
|
87
|
+
def discover_offerings(live: false, raise_on_unreachable: false, **filters)
|
|
88
|
+
return filter_cached_offerings(Array(@cached_offerings), filters) unless live
|
|
89
|
+
|
|
90
|
+
provider_health = health(live:)
|
|
91
|
+
@cached_offerings = discover_live_offerings(filters, provider_health, live:)
|
|
92
|
+
log_discover_complete(@cached_offerings)
|
|
93
|
+
@cached_offerings
|
|
94
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
95
|
+
log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
|
|
96
|
+
raise if raise_on_unreachable
|
|
97
|
+
|
|
98
|
+
[]
|
|
99
|
+
end
|
|
100
|
+
|
|
88
101
|
def show_model(model)
|
|
89
102
|
log.debug { "ollama provider fetching model details model=#{model}" }
|
|
90
103
|
connection.post(show_model_url, { model: model }).body
|
|
@@ -112,25 +125,42 @@ module Legion
|
|
|
112
125
|
raise
|
|
113
126
|
end
|
|
114
127
|
|
|
115
|
-
|
|
116
|
-
log.debug do
|
|
117
|
-
"ollama provider discovering offerings live=#{live} cached_model_count=#{Array(@cached_models).size}"
|
|
118
|
-
end
|
|
119
|
-
running_ids = live ? running_model_ids : []
|
|
120
|
-
offerings = resolve_models(live).filter_map do |model_info|
|
|
121
|
-
next unless model_allowed?(model_info.id)
|
|
128
|
+
private
|
|
122
129
|
|
|
123
|
-
|
|
130
|
+
def discovery_registry_readiness(provider_health, live:)
|
|
131
|
+
{
|
|
132
|
+
provider: slug.to_sym,
|
|
133
|
+
configured: configured?,
|
|
134
|
+
ready: provider_health[:ready] == true,
|
|
135
|
+
live: live,
|
|
136
|
+
health: provider_health
|
|
137
|
+
}
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def discover_live_offerings(filters, provider_health, live:)
|
|
141
|
+
readiness = discovery_registry_readiness(provider_health, live:)
|
|
142
|
+
Array(list_models(live:, **filters)).filter_map do |model|
|
|
143
|
+
self.class.registry_publisher.publish_models_async([model], readiness:)
|
|
144
|
+
next unless model_matches_filters?(model, filters)
|
|
145
|
+
next unless model_allowed?(model.id)
|
|
146
|
+
|
|
147
|
+
log_model_discovered(model)
|
|
148
|
+
offering_from_model(model, health: provider_health)
|
|
124
149
|
end
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
log.
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def log_model_discovered(model)
|
|
153
|
+
log.debug(
|
|
154
|
+
"[#{slug}] instance=#{provider_instance_id} action=model_discovered " \
|
|
155
|
+
"model=#{model.id} family=#{model.family}"
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def log_discover_complete(offerings)
|
|
160
|
+
log.info(
|
|
161
|
+
"[#{slug}] instance=#{provider_instance_id} action=discover_complete " \
|
|
162
|
+
"model_count=#{Array(offerings).size}"
|
|
163
|
+
)
|
|
134
164
|
end
|
|
135
165
|
|
|
136
166
|
CONTEXT_WINDOWS = {
|
|
@@ -154,8 +184,6 @@ module Legion
|
|
|
154
184
|
'bge' => 512
|
|
155
185
|
}.freeze
|
|
156
186
|
|
|
157
|
-
private
|
|
158
|
-
|
|
159
187
|
def resolve_models(live)
|
|
160
188
|
if live
|
|
161
189
|
@cached_models = list_models
|
|
@@ -170,8 +198,20 @@ module Legion
|
|
|
170
198
|
end.map(&:to_s)
|
|
171
199
|
end
|
|
172
200
|
|
|
173
|
-
def offering_from_model(model_info,
|
|
201
|
+
def offering_from_model(model_info, health: {})
|
|
202
|
+
loaded = begin
|
|
203
|
+
running_model_ids.include?(model_info.id.to_s)
|
|
204
|
+
rescue StandardError
|
|
205
|
+
health.is_a?(Hash) ? health.fetch(:loaded, false) : false
|
|
206
|
+
end
|
|
174
207
|
policy = resolve_capability_policy(model_info)
|
|
208
|
+
embedding_model = model_info.embedding?
|
|
209
|
+
capabilities = embedding_model ? [:embedding] : policy[:capabilities]
|
|
210
|
+
capability_sources = if embedding_model
|
|
211
|
+
policy[:sources].merge(embedding: { value: true, source: :model_metadata })
|
|
212
|
+
else
|
|
213
|
+
policy[:sources]
|
|
214
|
+
end
|
|
175
215
|
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
176
216
|
provider_family: :ollama,
|
|
177
217
|
instance_id: config.respond_to?(:instance_id) ? config.instance_id : :default,
|
|
@@ -179,8 +219,8 @@ module Legion
|
|
|
179
219
|
tier: offering_tier,
|
|
180
220
|
model: model_info.id,
|
|
181
221
|
usage_type: offering_usage_type(model_info),
|
|
182
|
-
capabilities:
|
|
183
|
-
capability_sources:
|
|
222
|
+
capabilities: capabilities,
|
|
223
|
+
capability_sources: capability_sources,
|
|
184
224
|
limits: offering_limits(model_info),
|
|
185
225
|
metadata: offering_metadata(model_info).merge(loaded: loaded)
|
|
186
226
|
)
|
|
@@ -193,9 +233,9 @@ module Legion
|
|
|
193
233
|
provider_catalog: {},
|
|
194
234
|
probe: {},
|
|
195
235
|
provider_envelope: { streaming: true },
|
|
196
|
-
provider_config:
|
|
197
|
-
instance_config:
|
|
198
|
-
model_config:
|
|
236
|
+
provider_config: provider_capability_config,
|
|
237
|
+
instance_config: instance_capability_config,
|
|
238
|
+
model_config: model_capability_config(model_id)
|
|
199
239
|
)
|
|
200
240
|
end
|
|
201
241
|
|
|
@@ -206,35 +246,6 @@ module Legion
|
|
|
206
246
|
end
|
|
207
247
|
end
|
|
208
248
|
|
|
209
|
-
def provider_level_config
|
|
210
|
-
raw = CredentialSources.setting(:extensions, :llm, :ollama)
|
|
211
|
-
return {} unless raw.is_a?(Hash)
|
|
212
|
-
|
|
213
|
-
raw.reject { |k, _| k.to_sym == :instances }
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
def instance_level_config
|
|
217
|
-
extract_config_hash
|
|
218
|
-
end
|
|
219
|
-
|
|
220
|
-
def model_level_config(model_id)
|
|
221
|
-
data = extract_config_hash
|
|
222
|
-
models = data[:models]
|
|
223
|
-
return {} unless models.is_a?(Hash)
|
|
224
|
-
|
|
225
|
-
models[model_id.to_sym] || models[model_id.to_s] || models[model_id] || {}
|
|
226
|
-
end
|
|
227
|
-
|
|
228
|
-
def extract_config_hash
|
|
229
|
-
return config.to_h if config.respond_to?(:to_h) && !config.is_a?(Legion::Extensions::Llm::HashConfig)
|
|
230
|
-
|
|
231
|
-
if config.is_a?(Legion::Extensions::Llm::HashConfig)
|
|
232
|
-
config.instance_variable_get(:@data) || {}
|
|
233
|
-
else
|
|
234
|
-
{}
|
|
235
|
-
end
|
|
236
|
-
end
|
|
237
|
-
|
|
238
249
|
def offering_usage_type(model_info)
|
|
239
250
|
model_info.embedding? ? :embedding : :inference
|
|
240
251
|
end
|
|
@@ -352,7 +363,7 @@ module Legion
|
|
|
352
363
|
chunks << built
|
|
353
364
|
block&.call(built)
|
|
354
365
|
rescue Legion::JSON::ParseError => e
|
|
355
|
-
handle_exception(e, level: :
|
|
366
|
+
handle_exception(e, level: :warn, handled: true, operation: 'ollama.stream_parse')
|
|
356
367
|
end
|
|
357
368
|
|
|
358
369
|
def finalize_stream(chunks)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm-ollama
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.22
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -71,14 +71,14 @@ dependencies:
|
|
|
71
71
|
requirements:
|
|
72
72
|
- - ">="
|
|
73
73
|
- !ruby/object:Gem::Version
|
|
74
|
-
version: 0.
|
|
74
|
+
version: 0.6.0
|
|
75
75
|
type: :runtime
|
|
76
76
|
prerelease: false
|
|
77
77
|
version_requirements: !ruby/object:Gem::Requirement
|
|
78
78
|
requirements:
|
|
79
79
|
- - ">="
|
|
80
80
|
- !ruby/object:Gem::Version
|
|
81
|
-
version: 0.
|
|
81
|
+
version: 0.6.0
|
|
82
82
|
description: Ollama provider integration for the LegionIO LLM routing framework.
|
|
83
83
|
email:
|
|
84
84
|
- matthewdiverson@gmail.com
|