lex-ollama 0.3.5 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/CHANGELOG.md +33 -1
- data/Gemfile +6 -0
- data/README.md +31 -6
- data/lex-ollama.gemspec +5 -0
- data/lib/legion/extensions/ollama/actors/model_worker.rb +391 -24
- data/lib/legion/extensions/ollama/transport/exchanges/llm_registry.rb +18 -0
- data/lib/legion/extensions/ollama/transport/messages/registry_event.rb +41 -0
- data/lib/legion/extensions/ollama/version.rb +1 -1
- data/lib/legion/extensions/ollama.rb +107 -19
- metadata +73 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 382e4c199c00f2ddd25fe9eea874e9e06340cefe26cc384e7e92a65504263241
|
|
4
|
+
data.tar.gz: cc45f497e285427b21b899ef1a282c5254fdf19c2d71ebb3a52457440b2892b3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 867a24f1bb87195ef55dfeede679eafdf612c5eac98e25fd733dbebb75e79e3e210459694de8feeb068a7808cf0ce55a52a45f35b2254cebfba6af4eeb8c890e
|
|
7
|
+
data.tar.gz: 3e4b46cbba3ce845379f62209c7ced95879134294e171226b06263d76b1aaa196dc440c34f6c6a6ebbccc5059d796225555da385a3d2ec350ae157365943084e
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,11 +1,43 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## [0.3.
|
|
3
|
+
## [0.3.10] - 2026-04-28
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- Require `lex-llm >= 0.1.6` so registry availability publishing always has the shared `RegistryEvent` envelope implementation it depends on.
|
|
7
|
+
|
|
8
|
+
## [0.3.9] - 2026-04-28
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Declare explicit shared Legion JSON, logging, and settings dependencies used by the legacy Ollama extension during the LLM uplift transition.
|
|
12
|
+
|
|
13
|
+
## [0.3.8] - 2026-04-28
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
- Publish nonblocking `llm.registry` availability, unavailability, heartbeat, and degraded events from `Actor::ModelWorker` using `lex-llm` `Legion::Extensions::Llm::Routing::RegistryEvent` envelopes when transport is available.
|
|
17
|
+
- Add local `Transport::Exchanges::LlmRegistry` and `Transport::Messages::RegistryEvent` wrappers for `llm.registry` topic publishing without requiring a database.
|
|
18
|
+
|
|
19
|
+
## [0.3.7] - 2026-04-28
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
- Declare the `legion-llm` runtime dependency required by the fleet exchange, response, and error classes inherited by lex-ollama fleet workers.
|
|
23
|
+
|
|
24
|
+
## [0.3.6] - 2026-04-28
|
|
25
|
+
|
|
26
|
+
### Added
|
|
27
|
+
- `Actor::ModelWorker` can now bind opt-in exact offering lanes compatible with legion-llm's `llm.fleet.offering.<instance>.<model>.<operation>` routing style while preserving the existing shared `llm.fleet.*` lanes
|
|
28
|
+
- `legion.ollama.fleet.offering_lanes` settings default to disabled with no instance id, so existing shared-lane fleet workers keep their current behavior unless exact offering lanes are explicitly enabled
|
|
29
|
+
|
|
30
|
+
## [0.3.5] - 2026-04-28
|
|
4
31
|
|
|
5
32
|
### Added
|
|
6
33
|
- Fleet model workers now bind transient classic queues to shared `llm.fleet` model lanes, with configurable consumer priority, queue expiration, and message TTL.
|
|
7
34
|
- Subscription entries can provide a context window so inference workers bind lanes like `llm.fleet.inference.qwen3-5-27b.ctx32768`.
|
|
8
35
|
|
|
36
|
+
### Changed
|
|
37
|
+
- `Actor::ModelWorker` now defaults endpoint fleet workers to explicit `basic_get` polling with a process-wide lane lock so local one-model-at-a-time devices do not reserve messages from multiple model queues; GPU/datacenter workers can opt back into RabbitMQ subscriptions with `legion.ollama.fleet.scheduler: :subscription`
|
|
38
|
+
- Fleet worker queue names and routing keys now use shared `llm.fleet.*` lanes (`llm.fleet.embed.<model>` and `llm.fleet.inference.<model>.ctx<context>`) instead of legacy `llm.request.ollama.*` keys
|
|
39
|
+
- `Ollama.build_actors` now orders generated model workers with embeddings first, then inference/chat workers from smallest to largest configured context window
|
|
40
|
+
|
|
9
41
|
## [0.3.4] - 2026-04-24
|
|
10
42
|
|
|
11
43
|
### Fixed
|
data/Gemfile
CHANGED
|
@@ -3,6 +3,12 @@
|
|
|
3
3
|
source 'https://rubygems.org'
|
|
4
4
|
gemspec
|
|
5
5
|
|
|
6
|
+
legion_llm_path = File.expand_path('../../legion-llm', __dir__)
|
|
7
|
+
gem 'legion-llm', path: legion_llm_path if Dir.exist?(legion_llm_path)
|
|
8
|
+
|
|
9
|
+
lex_llm_path = File.expand_path('../lex-llm', __dir__)
|
|
10
|
+
gem 'lex-llm', path: lex_llm_path if Dir.exist?(lex_llm_path)
|
|
11
|
+
|
|
6
12
|
group :test do
|
|
7
13
|
gem 'rake'
|
|
8
14
|
gem 'rspec'
|
data/README.md
CHANGED
|
@@ -49,11 +49,19 @@ gem install lex-ollama
|
|
|
49
49
|
- `handle_request` - Dispatch inbound fleet AMQP messages to the appropriate runner (chat/embed/generate)
|
|
50
50
|
|
|
51
51
|
When `Legion::Extensions::Core` is present, lex-ollama subscribes to model-scoped queues on the
|
|
52
|
-
`llm.
|
|
52
|
+
`llm.fleet` topic exchange, accepting routed LLM inference work from other Legion fleet members.
|
|
53
53
|
|
|
54
|
-
Each configured `(type, model)` pair gets its own
|
|
55
|
-
`llm.
|
|
56
|
-
|
|
54
|
+
Each configured `(type, model)` pair gets its own durable quorum lane queue. Shared lanes use
|
|
55
|
+
`llm.fleet.embed.<model>` for embeddings and `llm.fleet.inference.<model>.ctx<context>` for
|
|
56
|
+
generation/chat subscriptions with a configured context window. Endpoint workers default to
|
|
57
|
+
explicit `basic_get` polling with a process-wide lane lock, so local one-model-at-a-time
|
|
58
|
+
devices do not reserve work from multiple model queues. GPU or datacenter workers can opt into
|
|
59
|
+
RabbitMQ consumer subscriptions with `legion.ollama.fleet.scheduler: :subscription`.
|
|
60
|
+
|
|
61
|
+
When offering lanes are enabled, workers also bind exact `legion-llm` compatible lanes in the
|
|
62
|
+
form `llm.fleet.offering.<instance>.<model>.<operation>`. Workers publish nonblocking
|
|
63
|
+
availability, heartbeat, degraded, and unavailable events to `llm.registry` when the transport
|
|
64
|
+
runtime is loaded.
|
|
57
65
|
|
|
58
66
|
```yaml
|
|
59
67
|
legion:
|
|
@@ -67,12 +75,29 @@ legion:
|
|
|
67
75
|
- "qwen3.5:4b"
|
|
68
76
|
- "nomic-embed-text:latest"
|
|
69
77
|
fleet:
|
|
70
|
-
|
|
78
|
+
scheduler: basic_get
|
|
79
|
+
consumer_priority: 10
|
|
80
|
+
queue_expires_ms: 60000
|
|
81
|
+
message_ttl_ms: 120000
|
|
82
|
+
queue_max_length: 100
|
|
83
|
+
delivery_limit: 3
|
|
84
|
+
consumer_ack_timeout_ms: 300000
|
|
85
|
+
endpoint:
|
|
86
|
+
enabled: false
|
|
87
|
+
empty_lane_backoff_ms: 250
|
|
88
|
+
idle_backoff_ms: 1000
|
|
89
|
+
max_consecutive_pulls_per_lane: 0
|
|
90
|
+
offering_lanes:
|
|
91
|
+
enabled: false
|
|
92
|
+
instance_id: "macbook-m4"
|
|
93
|
+
registry:
|
|
94
|
+
heartbeat_interval_seconds: 30
|
|
71
95
|
subscriptions:
|
|
72
96
|
- type: embed
|
|
73
97
|
model: nomic-embed-text
|
|
74
98
|
- type: chat
|
|
75
99
|
model: "qwen3.5:27b"
|
|
100
|
+
context_window: 32768
|
|
76
101
|
```
|
|
77
102
|
|
|
78
103
|
**Auto-provisioning**: When `s3` and `default_models` are configured, the `ModelSync` actor
|
|
@@ -163,7 +188,7 @@ result[:usage] # => { input_tokens: 1, output_tokens: 5, total_duration: ..., .
|
|
|
163
188
|
|
|
164
189
|
## Version
|
|
165
190
|
|
|
166
|
-
0.3.
|
|
191
|
+
0.3.10
|
|
167
192
|
|
|
168
193
|
## License
|
|
169
194
|
|
data/lex-ollama.gemspec
CHANGED
|
@@ -27,5 +27,10 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.require_paths = ['lib']
|
|
28
28
|
|
|
29
29
|
spec.add_dependency 'faraday', '>= 2.0'
|
|
30
|
+
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
31
|
+
spec.add_dependency 'legion-llm', '>= 0.8.32'
|
|
32
|
+
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
33
|
+
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
34
|
+
spec.add_dependency 'lex-llm', '>= 0.1.6'
|
|
30
35
|
spec.add_dependency 'lex-s3', '>= 0.2'
|
|
31
36
|
end
|
|
@@ -4,8 +4,12 @@ module Legion
|
|
|
4
4
|
module Extensions
|
|
5
5
|
module Ollama
|
|
6
6
|
module Actor
|
|
7
|
-
#
|
|
7
|
+
# Fleet actor that listens on a model-scoped queue and forwards
|
|
8
8
|
# inbound LLM request messages to Runners::Fleet#handle_request.
|
|
9
|
+
# Endpoint workers default to explicit basic_get polling so a local
|
|
10
|
+
# one-model-at-a-time device does not reserve messages from every lane.
|
|
11
|
+
# Set legion.ollama.fleet.scheduler to :subscription for GPU/datacenter
|
|
12
|
+
# workers that should use RabbitMQ consumer priority and prefetch.
|
|
9
13
|
#
|
|
10
14
|
# One instance is created per (request_type, model) entry in settings:
|
|
11
15
|
#
|
|
@@ -19,17 +23,27 @@ module Legion
|
|
|
19
23
|
# - type: chat
|
|
20
24
|
# model: "qwen3.5:27b"
|
|
21
25
|
#
|
|
22
|
-
#
|
|
23
|
-
# llm.fleet.embed.<model>
|
|
24
|
-
# llm.fleet.inference.<model>.ctx<
|
|
25
|
-
# when
|
|
26
|
+
# Queue names and routing keys follow the shared fleet lane schema:
|
|
27
|
+
# llm.fleet.embed.<model-slug>
|
|
28
|
+
# llm.fleet.inference.<model-slug>.ctx<context-window>
|
|
29
|
+
# or, when explicitly enabled, exact offering lanes:
|
|
30
|
+
# llm.fleet.offering.<instance>.<model-slug>.<operation>
|
|
26
31
|
class ModelWorker < Legion::Extensions::Actors::Subscription
|
|
27
|
-
|
|
32
|
+
POLLING_SCHEDULERS = %i[basic_get poll polling].freeze
|
|
33
|
+
SUBSCRIPTION_SCHEDULERS = %i[subscribe subscription basic_consume consumer].freeze
|
|
34
|
+
POLL_LOCK = Mutex.new
|
|
35
|
+
REGISTRY_HEARTBEAT_INTERVAL = 30.0
|
|
28
36
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
37
|
+
attr_reader :request_type, :model_name, :context_window, :offering_instance_id
|
|
38
|
+
|
|
39
|
+
def initialize(request_type:, model:, context_window: nil, lane_style: :shared,
|
|
40
|
+
offering_instance_id: nil, **)
|
|
41
|
+
@request_type = request_type.to_s
|
|
42
|
+
@model_name = model.to_s
|
|
43
|
+
@context_window = normalize_context_window(context_window)
|
|
44
|
+
@lane_style = lane_style.to_s
|
|
45
|
+
@offering_instance_id = offering_instance_id&.to_s
|
|
46
|
+
@polling = false
|
|
33
47
|
super(**)
|
|
34
48
|
end
|
|
35
49
|
|
|
@@ -95,19 +109,94 @@ module Legion
|
|
|
95
109
|
base.merge(arguments: { 'x-priority' => consumer_priority })
|
|
96
110
|
end
|
|
97
111
|
|
|
112
|
+
def prepare
|
|
113
|
+
return super unless endpoint_polling?
|
|
114
|
+
|
|
115
|
+
@queue = queue.new
|
|
116
|
+
@polling = true
|
|
117
|
+
log.info "[ModelWorker] prepared polling lane #{lane_key}" if defined?(log)
|
|
118
|
+
rescue StandardError => e
|
|
119
|
+
handle_exception(e, level: :fatal)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def activate
|
|
123
|
+
result = if endpoint_polling?
|
|
124
|
+
@polling = true
|
|
125
|
+
@poll_task = async.run_basic_get_loop
|
|
126
|
+
log.info "[ModelWorker] activated polling lane #{lane_key}" if defined?(log)
|
|
127
|
+
@poll_task
|
|
128
|
+
else
|
|
129
|
+
super
|
|
130
|
+
end
|
|
131
|
+
publish_registry_event_async(:available)
|
|
132
|
+
start_registry_heartbeat
|
|
133
|
+
result
|
|
134
|
+
rescue StandardError => e
|
|
135
|
+
publish_registry_event_async(:degraded, error: e)
|
|
136
|
+
handle_exception(e, level: :fatal)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def cancel
|
|
140
|
+
@polling = false
|
|
141
|
+
stop_registry_heartbeat
|
|
142
|
+
publish_registry_event_async(:unavailable)
|
|
143
|
+
return true unless instance_variable_defined?(:@consumer) && @consumer
|
|
144
|
+
|
|
145
|
+
super
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def endpoint_polling?
|
|
149
|
+
scheduler = fleet_scheduler
|
|
150
|
+
return true if POLLING_SCHEDULERS.include?(scheduler)
|
|
151
|
+
return false if SUBSCRIPTION_SCHEDULERS.include?(scheduler)
|
|
152
|
+
|
|
153
|
+
nested_setting(settings, :fleet, :endpoint, :enabled) == true
|
|
154
|
+
rescue StandardError
|
|
155
|
+
false
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def lane_key
|
|
159
|
+
@lane_key ||= offering_lane? ? offering_lane_key : shared_lane_key
|
|
160
|
+
end
|
|
161
|
+
alias routing_key lane_key
|
|
162
|
+
|
|
163
|
+
def run_basic_get_loop
|
|
164
|
+
consecutive_pulls = 0
|
|
165
|
+
while @polling && !shutting_down?
|
|
166
|
+
pulled = POLL_LOCK.synchronize { pull_one_message }
|
|
167
|
+
consecutive_pulls = pulled ? consecutive_pulls + 1 : 0
|
|
168
|
+
sleep(pulled ? post_pull_backoff(consecutive_pulls) : empty_lane_backoff)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def pull_one_message
|
|
173
|
+
delivery_info, metadata, payload = @queue.pop(manual_ack: manual_ack)
|
|
174
|
+
return false unless delivery_info
|
|
175
|
+
|
|
176
|
+
handle_delivery(delivery_info, metadata, payload)
|
|
177
|
+
true
|
|
178
|
+
rescue StandardError => e
|
|
179
|
+
handle_exception(e)
|
|
180
|
+
reject_or_retry(delivery_info, metadata, payload) if manual_ack && delivery_info
|
|
181
|
+
true
|
|
182
|
+
end
|
|
183
|
+
|
|
98
184
|
# Returns a queue CLASS (not instance) bound to the llm.fleet exchange
|
|
99
|
-
# with the routing key for this worker's model
|
|
185
|
+
# with the routing key for this worker's model lane.
|
|
100
186
|
# The Subscription base class calls queue.new in initialize, so this must
|
|
101
187
|
# return a class, not an instance.
|
|
102
188
|
def queue
|
|
103
189
|
@queue ||= build_queue_class
|
|
104
190
|
end
|
|
105
191
|
|
|
106
|
-
def self.queue_class_for(request_type:, model:, context_window: nil, queue_config: {}
|
|
192
|
+
def self.queue_class_for(request_type:, model:, context_window: nil, queue_config: {},
|
|
193
|
+
lane_style: :shared, offering_instance_id: nil)
|
|
107
194
|
worker = allocate
|
|
108
195
|
worker.instance_variable_set(:@request_type, request_type.to_s)
|
|
109
196
|
worker.instance_variable_set(:@model_name, model.to_s)
|
|
110
197
|
worker.instance_variable_set(:@context_window, context_window&.to_i)
|
|
198
|
+
worker.instance_variable_set(:@lane_style, lane_style.to_s)
|
|
199
|
+
worker.instance_variable_set(:@offering_instance_id, offering_instance_id&.to_s)
|
|
111
200
|
worker.send(:build_queue_class, queue_config)
|
|
112
201
|
end
|
|
113
202
|
|
|
@@ -128,29 +217,199 @@ module Legion
|
|
|
128
217
|
}
|
|
129
218
|
end
|
|
130
219
|
|
|
131
|
-
def routing_key
|
|
132
|
-
parts = ['llm.fleet', lane_kind, sanitized_model]
|
|
133
|
-
parts << "ctx#{@context_window}" if lane_kind == 'inference' && @context_window
|
|
134
|
-
parts.join('.')
|
|
135
|
-
end
|
|
136
|
-
|
|
137
220
|
# Enrich every inbound message with the worker's own request_type and model
|
|
138
221
|
# so Runners::Fleet#handle_request always has them, even if the sender omitted
|
|
139
222
|
# them. Also defaults message_context to {} if absent.
|
|
140
223
|
def process_message(payload, metadata, delivery_info)
|
|
141
224
|
msg = super
|
|
142
|
-
msg[:request_type]
|
|
143
|
-
msg[:model]
|
|
225
|
+
msg[:request_type] ||= @request_type
|
|
226
|
+
msg[:model] ||= @model_name
|
|
144
227
|
msg[:message_context] ||= {}
|
|
145
228
|
msg
|
|
146
229
|
end
|
|
147
230
|
|
|
148
231
|
private
|
|
149
232
|
|
|
233
|
+
def start_registry_heartbeat
|
|
234
|
+
return unless registry_publishing_available?
|
|
235
|
+
return if @registry_heartbeat_thread&.alive?
|
|
236
|
+
|
|
237
|
+
@registry_heartbeat_running = true
|
|
238
|
+
@registry_heartbeat_thread = Thread.new do
|
|
239
|
+
Thread.current.abort_on_exception = false
|
|
240
|
+
while @registry_heartbeat_running && !shutting_down?
|
|
241
|
+
sleep registry_heartbeat_interval
|
|
242
|
+
publish_registry_event(:heartbeat) if @registry_heartbeat_running && !shutting_down?
|
|
243
|
+
end
|
|
244
|
+
rescue StandardError => e
|
|
245
|
+
log_registry_publish_failure(e, level: :debug)
|
|
246
|
+
end
|
|
247
|
+
rescue StandardError => e
|
|
248
|
+
log_registry_publish_failure(e, level: :debug)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def stop_registry_heartbeat
|
|
252
|
+
@registry_heartbeat_running = false
|
|
253
|
+
@registry_heartbeat_thread&.kill if @registry_heartbeat_thread&.alive?
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def registry_heartbeat_interval
|
|
257
|
+
configured = nested_setting(settings, :fleet, :registry, :heartbeat_interval_seconds) ||
|
|
258
|
+
nested_setting(settings, :fleet, :registry_heartbeat_interval_seconds)
|
|
259
|
+
interval = configured.nil? ? REGISTRY_HEARTBEAT_INTERVAL : Float(configured)
|
|
260
|
+
interval.positive? ? interval : REGISTRY_HEARTBEAT_INTERVAL
|
|
261
|
+
rescue StandardError
|
|
262
|
+
REGISTRY_HEARTBEAT_INTERVAL
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
def publish_registry_event_async(kind, error: nil)
|
|
266
|
+
return unless registry_publishing_available?
|
|
267
|
+
|
|
268
|
+
Thread.new do
|
|
269
|
+
Thread.current.abort_on_exception = false
|
|
270
|
+
publish_registry_event(kind, error: error)
|
|
271
|
+
rescue StandardError => e
|
|
272
|
+
log_registry_publish_failure(e, level: :debug)
|
|
273
|
+
end
|
|
274
|
+
rescue StandardError => e
|
|
275
|
+
log_registry_publish_failure(e, level: :debug)
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
def publish_registry_event(kind, error: nil)
|
|
279
|
+
event = registry_event_for(kind, error: error)
|
|
280
|
+
Transport::Messages::RegistryEvent.new(event: event).publish(spool: false)
|
|
281
|
+
rescue StandardError => e
|
|
282
|
+
log_registry_publish_failure(e)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def registry_event_for(kind, error: nil)
|
|
286
|
+
registry_event_class.public_send(
|
|
287
|
+
registry_event_method(kind),
|
|
288
|
+
registry_offering,
|
|
289
|
+
runtime: registry_runtime,
|
|
290
|
+
capacity: registry_capacity,
|
|
291
|
+
health: registry_health(kind, error: error),
|
|
292
|
+
lane: lane_key,
|
|
293
|
+
metadata: registry_metadata
|
|
294
|
+
)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def registry_event_method(kind)
|
|
298
|
+
case kind.to_sym
|
|
299
|
+
when :available then :available
|
|
300
|
+
when :unavailable then :unavailable
|
|
301
|
+
when :heartbeat then :heartbeat
|
|
302
|
+
else :degraded
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def registry_offering
|
|
307
|
+
limits = {}
|
|
308
|
+
limits[:context_window] = @context_window if @context_window
|
|
309
|
+
{
|
|
310
|
+
provider_family: :ollama,
|
|
311
|
+
provider_instance: registry_provider_instance,
|
|
312
|
+
transport: :rabbitmq,
|
|
313
|
+
model: @model_name,
|
|
314
|
+
usage_type: registry_usage_type,
|
|
315
|
+
capabilities: registry_capabilities,
|
|
316
|
+
limits: limits,
|
|
317
|
+
routing_metadata: { lane: lane_key },
|
|
318
|
+
metadata: { lex: :ollama, lane_style: @lane_style || 'shared' }
|
|
319
|
+
}
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def registry_runtime
|
|
323
|
+
{
|
|
324
|
+
node: registry_provider_instance,
|
|
325
|
+
scheduler: fleet_scheduler,
|
|
326
|
+
polling: endpoint_polling?
|
|
327
|
+
}
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def registry_capacity
|
|
331
|
+
{
|
|
332
|
+
concurrency: 1,
|
|
333
|
+
consumer_priority: consumer_priority,
|
|
334
|
+
queue_max_length: queue_max_length
|
|
335
|
+
}
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
def registry_health(kind, error: nil)
|
|
339
|
+
health = {
|
|
340
|
+
ready: %i[available heartbeat].include?(kind.to_sym),
|
|
341
|
+
status: registry_health_status(kind)
|
|
342
|
+
}
|
|
343
|
+
health[:error_class] = error.class.name if error
|
|
344
|
+
health[:error] = error.message if error
|
|
345
|
+
health
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def registry_health_status(kind)
|
|
349
|
+
case kind.to_sym
|
|
350
|
+
when :available, :heartbeat then :available
|
|
351
|
+
when :unavailable then :unavailable
|
|
352
|
+
else :degraded
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def registry_metadata
|
|
357
|
+
{
|
|
358
|
+
extension: :lex_ollama,
|
|
359
|
+
request_type: @request_type,
|
|
360
|
+
lane_kind: lane_kind
|
|
361
|
+
}
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def registry_usage_type
|
|
365
|
+
lane_kind == 'embed' ? :embedding : :inference
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def registry_capabilities
|
|
369
|
+
return %i[embedding] if lane_kind == 'embed'
|
|
370
|
+
return %i[completion] if @request_type == 'generate'
|
|
371
|
+
|
|
372
|
+
%i[chat]
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def registry_provider_instance
|
|
376
|
+
@offering_instance_id || node_identity
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def node_identity
|
|
380
|
+
return Legion::Settings.dig(:node, :canonical_name).to_s if defined?(Legion::Settings) &&
|
|
381
|
+
Legion::Settings.dig(:node, :canonical_name)
|
|
382
|
+
|
|
383
|
+
'unknown'
|
|
384
|
+
rescue StandardError
|
|
385
|
+
'unknown'
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def registry_publishing_available?
|
|
389
|
+
defined?(::Legion::Transport) &&
|
|
390
|
+
defined?(::Legion::Extensions::Llm::Routing::RegistryEvent) &&
|
|
391
|
+
defined?(Transport::Messages::RegistryEvent)
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def registry_event_class
|
|
395
|
+
::Legion::Extensions::Llm::Routing::RegistryEvent
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
def log_registry_publish_failure(error, level: :warn)
|
|
399
|
+
message = "[ModelWorker] llm.registry publish failed lane=#{lane_key}: #{error.class}: #{error.message}"
|
|
400
|
+
if defined?(log) && log.respond_to?(level)
|
|
401
|
+
log.public_send(level, message)
|
|
402
|
+
elsif defined?(log) && log.respond_to?(:debug)
|
|
403
|
+
log.debug(message)
|
|
404
|
+
end
|
|
405
|
+
rescue StandardError
|
|
406
|
+
nil
|
|
407
|
+
end
|
|
408
|
+
|
|
150
409
|
def build_queue_class(queue_config = {})
|
|
151
|
-
lane_key
|
|
152
|
-
exchange_class
|
|
153
|
-
queue_settings
|
|
410
|
+
lane_key = self.lane_key
|
|
411
|
+
exchange_class = Transport::Exchanges::LlmRequest
|
|
412
|
+
queue_settings = {
|
|
154
413
|
queue_expires_ms: queue_expires_ms,
|
|
155
414
|
message_ttl_ms: message_ttl_ms,
|
|
156
415
|
queue_max_length: queue_max_length,
|
|
@@ -181,12 +440,38 @@ module Legion
|
|
|
181
440
|
end
|
|
182
441
|
end
|
|
183
442
|
|
|
443
|
+
def handle_delivery(delivery_info, metadata, payload)
|
|
444
|
+
message = process_message(payload, metadata, delivery_info)
|
|
445
|
+
fn = find_function(message)
|
|
446
|
+
log.debug "[ModelWorker] basic_get message received: #{lex_name}/#{fn}" if defined?(log)
|
|
447
|
+
|
|
448
|
+
affinity_result = check_region_affinity(message)
|
|
449
|
+
if affinity_result == :reject
|
|
450
|
+
log.warn '[ModelWorker] nack: region affinity mismatch' if defined?(log)
|
|
451
|
+
@queue.reject(delivery_info.delivery_tag) if manual_ack
|
|
452
|
+
return
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
record_cross_region_metric(message) if affinity_result == :remote
|
|
456
|
+
|
|
457
|
+
if use_runner?
|
|
458
|
+
dispatch_runner(message, runner_class, fn, check_subtask?, generate_task?)
|
|
459
|
+
else
|
|
460
|
+
runner_class.send(fn, **message)
|
|
461
|
+
end
|
|
462
|
+
@queue.acknowledge(delivery_info.delivery_tag) if manual_ack
|
|
463
|
+
end
|
|
464
|
+
|
|
184
465
|
def fleet_settings
|
|
185
466
|
setting_value(settings, :fleet) || {}
|
|
186
467
|
rescue NameError
|
|
187
468
|
{}
|
|
188
469
|
end
|
|
189
470
|
|
|
471
|
+
def fleet_scheduler
|
|
472
|
+
(setting_value(fleet_settings, :scheduler) || :basic_get).to_sym
|
|
473
|
+
end
|
|
474
|
+
|
|
190
475
|
def setting_value(hash, key)
|
|
191
476
|
return nil unless hash.respond_to?(:key?)
|
|
192
477
|
|
|
@@ -196,12 +481,94 @@ module Legion
|
|
|
196
481
|
hash[key] if hash.key?(key)
|
|
197
482
|
end
|
|
198
483
|
|
|
484
|
+
def nested_setting(hash, *keys)
|
|
485
|
+
keys.reduce(hash) do |current, key|
|
|
486
|
+
return nil unless current.respond_to?(:key?)
|
|
487
|
+
|
|
488
|
+
setting_value(current, key)
|
|
489
|
+
end
|
|
490
|
+
end
|
|
491
|
+
|
|
199
492
|
def lane_kind
|
|
200
493
|
%w[embed embedding embeddings].include?(@request_type) ? 'embed' : 'inference'
|
|
201
494
|
end
|
|
202
495
|
|
|
203
496
|
def sanitized_model
|
|
204
|
-
@model_name
|
|
497
|
+
sanitize_segment(@model_name)
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def offering_lane?
|
|
501
|
+
@lane_style == 'offering'
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
def shared_lane_key
|
|
505
|
+
parts = ['llm.fleet', lane_kind, sanitized_model]
|
|
506
|
+
parts << "ctx#{@context_window}" if lane_kind == 'inference' && @context_window
|
|
507
|
+
parts.join('.')
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
def offering_lane_key
|
|
511
|
+
[
|
|
512
|
+
'llm',
|
|
513
|
+
'fleet',
|
|
514
|
+
'offering',
|
|
515
|
+
public_segment(:offering_instance_id, @offering_instance_id),
|
|
516
|
+
sanitized_model,
|
|
517
|
+
lane_kind
|
|
518
|
+
].join('.')
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
def sanitize_segment(value)
|
|
522
|
+
value.to_s.downcase.gsub(/[^a-z0-9]+/, '-').gsub(/\A-+|-+\z/, '').squeeze('-')
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
def public_segment(label, value)
|
|
526
|
+
segment = sanitize_segment(value)
|
|
527
|
+
raise ArgumentError, "#{label} is empty after sanitization" if segment.empty?
|
|
528
|
+
raise ArgumentError, "#{label} exceeds 64 characters" if segment.length > 64
|
|
529
|
+
|
|
530
|
+
segment
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
def normalize_context_window(value)
|
|
534
|
+
return nil if value.nil? || value.to_s.empty?
|
|
535
|
+
|
|
536
|
+
Integer(value)
|
|
537
|
+
rescue ArgumentError, TypeError
|
|
538
|
+
nil
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
def empty_lane_backoff
|
|
542
|
+
milliseconds = nested_setting(settings, :fleet, :endpoint, :empty_lane_backoff_ms) || 250
|
|
543
|
+
milliseconds.to_f / 1000.0
|
|
544
|
+
rescue StandardError
|
|
545
|
+
0.25
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
def idle_backoff
|
|
549
|
+
milliseconds = nested_setting(settings, :fleet, :endpoint, :idle_backoff_ms) || 1_000
|
|
550
|
+
milliseconds.to_f / 1000.0
|
|
551
|
+
rescue StandardError
|
|
552
|
+
1.0
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def max_consecutive_pulls_per_lane
|
|
556
|
+
Integer(nested_setting(settings, :fleet, :endpoint, :max_consecutive_pulls_per_lane) || 0)
|
|
557
|
+
rescue StandardError
|
|
558
|
+
0
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
def post_pull_backoff(consecutive_pulls)
|
|
562
|
+
max_pulls = max_consecutive_pulls_per_lane
|
|
563
|
+
return 0 if max_pulls.zero? || consecutive_pulls < max_pulls
|
|
564
|
+
|
|
565
|
+
idle_backoff
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
def shutting_down?
|
|
569
|
+
defined?(Legion::Settings) && Legion::Settings.dig(:client, :shutting_down)
|
|
570
|
+
rescue StandardError
|
|
571
|
+
false
|
|
205
572
|
end
|
|
206
573
|
end
|
|
207
574
|
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Ollama
|
|
6
|
+
module Transport
|
|
7
|
+
module Exchanges
|
|
8
|
+
# Topic exchange for provider availability events consumed by LLM routing registries.
|
|
9
|
+
class LlmRegistry < Legion::Transport::Exchange
|
|
10
|
+
def exchange_name
|
|
11
|
+
'llm.registry'
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/ollama/transport/exchanges/llm_registry'
|
|
4
|
+
|
|
5
|
+
module Legion
|
|
6
|
+
module Extensions
|
|
7
|
+
module Ollama
|
|
8
|
+
module Transport
|
|
9
|
+
module Messages
|
|
10
|
+
# Publishes lex-llm RegistryEvent envelopes to the llm.registry exchange.
|
|
11
|
+
class RegistryEvent < Legion::Transport::Message
|
|
12
|
+
def initialize(event:, **options)
|
|
13
|
+
envelope = event.to_h
|
|
14
|
+
super(**envelope.merge(options))
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def exchange
|
|
18
|
+
Transport::Exchanges::LlmRegistry
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def routing_key
|
|
22
|
+
@options[:routing_key] || "llm.registry.#{@options.fetch(:event_type)}"
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def type
|
|
26
|
+
'llm.registry.event'
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def app_id
|
|
30
|
+
'lex-ollama'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def persistent
|
|
34
|
+
false
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -24,7 +24,9 @@ end
|
|
|
24
24
|
# so the gem still works as a standalone HTTP client without any AMQP runtime.
|
|
25
25
|
if Legion::Extensions.const_defined?(:Core, false)
|
|
26
26
|
require 'legion/extensions/ollama/transport/exchanges/llm_request'
|
|
27
|
+
require 'legion/extensions/ollama/transport/exchanges/llm_registry'
|
|
27
28
|
require 'legion/extensions/ollama/transport/messages/llm_response'
|
|
29
|
+
require 'legion/extensions/ollama/transport/messages/registry_event'
|
|
28
30
|
require 'legion/extensions/ollama/transport'
|
|
29
31
|
require 'legion/extensions/ollama/actors/model_worker'
|
|
30
32
|
require 'legion/extensions/ollama/actors/endpoint_puller'
|
|
@@ -53,6 +55,10 @@ module Legion
|
|
|
53
55
|
idle_backoff_ms: 1_000,
|
|
54
56
|
max_consecutive_pulls_per_lane: 0,
|
|
55
57
|
accept_when: []
|
|
58
|
+
},
|
|
59
|
+
offering_lanes: {
|
|
60
|
+
enabled: false,
|
|
61
|
+
instance_id: nil
|
|
56
62
|
}
|
|
57
63
|
}
|
|
58
64
|
}
|
|
@@ -75,29 +81,111 @@ module Legion
|
|
|
75
81
|
return unless subs.is_a?(Array)
|
|
76
82
|
return if fleet_scheduler == :basic_get
|
|
77
83
|
|
|
78
|
-
subs.each do |sub|
|
|
79
|
-
request_type
|
|
80
|
-
model
|
|
81
|
-
|
|
82
|
-
context_window = setting_value(sub, :context_window) ||
|
|
83
|
-
setting_value(limits, :context_window)
|
|
84
|
+
sorted_subscriptions(subs).each do |sub|
|
|
85
|
+
request_type = setting_value(sub, :type)&.to_s
|
|
86
|
+
model = setting_value(sub, :model)&.to_s
|
|
87
|
+
context_window = context_window_for(sub)
|
|
84
88
|
next unless request_type && model
|
|
85
89
|
|
|
86
|
-
|
|
87
|
-
worker_class = Class.new(Legion::Extensions::Ollama::Actor::ModelWorker) do
|
|
88
|
-
define_method(:initialize) do
|
|
89
|
-
super(request_type: request_type, model: model, context_window: context_window)
|
|
90
|
-
end
|
|
91
|
-
end
|
|
90
|
+
register_model_worker(request_type: request_type, model: model, context_window: context_window)
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
92
|
+
offering_instance_id = offering_instance_for(sub)
|
|
93
|
+
next unless offering_instance_id
|
|
94
|
+
|
|
95
|
+
register_model_worker(request_type: request_type, model: model, context_window: context_window,
|
|
96
|
+
lane_style: :offering, offering_instance_id: offering_instance_id)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def self.sorted_subscriptions(subscriptions)
|
|
101
|
+
subscriptions.sort_by do |sub|
|
|
102
|
+
type = setting_value(sub, :type).to_s
|
|
103
|
+
[
|
|
104
|
+
type == 'embed' ? 0 : 1,
|
|
105
|
+
context_window_for(sub) || Float::INFINITY,
|
|
106
|
+
setting_value(sub, :model).to_s
|
|
107
|
+
]
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def self.context_window_for(subscription)
|
|
112
|
+
limits = setting_value(subscription, :limits) || {}
|
|
113
|
+
raw = setting_value(subscription, :context_window) ||
|
|
114
|
+
setting_value(subscription, :max_context) ||
|
|
115
|
+
setting_value(subscription, :max_input_tokens) ||
|
|
116
|
+
setting_value(limits, :context_window) ||
|
|
117
|
+
setting_value(limits, :max_input_tokens)
|
|
118
|
+
return nil if raw.nil? || raw.to_s.empty?
|
|
119
|
+
|
|
120
|
+
Integer(raw)
|
|
121
|
+
rescue ArgumentError, TypeError
|
|
122
|
+
nil
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def self.register_model_worker(request_type:, model:, context_window:, lane_style: :shared,
|
|
126
|
+
offering_instance_id: nil)
|
|
127
|
+
actor_name = model_worker_actor_name(
|
|
128
|
+
request_type: request_type,
|
|
129
|
+
model: model,
|
|
130
|
+
lane_style: lane_style,
|
|
131
|
+
offering_instance_id: offering_instance_id
|
|
132
|
+
)
|
|
133
|
+
worker_class = Class.new(Legion::Extensions::Ollama::Actor::ModelWorker) do
|
|
134
|
+
define_method(:initialize) do
|
|
135
|
+
super(
|
|
136
|
+
request_type: request_type,
|
|
137
|
+
model: model,
|
|
138
|
+
context_window: context_window,
|
|
139
|
+
lane_style: lane_style,
|
|
140
|
+
offering_instance_id: offering_instance_id
|
|
141
|
+
)
|
|
142
|
+
end
|
|
100
143
|
end
|
|
144
|
+
|
|
145
|
+
@actors[actor_name] = {
|
|
146
|
+
extension: 'lex-ollama',
|
|
147
|
+
extension_name: :ollama,
|
|
148
|
+
actor_name: actor_name,
|
|
149
|
+
actor_class: worker_class,
|
|
150
|
+
type: 'literal'
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def self.offering_instance_for(subscription)
|
|
155
|
+
return nil unless offering_lanes_enabled?
|
|
156
|
+
|
|
157
|
+
raw = setting_value(subscription, :offering_instance_id) ||
|
|
158
|
+
setting_value(subscription, :provider_instance) ||
|
|
159
|
+
setting_value(subscription, :instance_id) ||
|
|
160
|
+
fleet_offering_lane_setting(:instance_id) ||
|
|
161
|
+
fleet_offering_lane_setting(:provider_instance) ||
|
|
162
|
+
fleet_offering_lane_setting(:offering_instance_id)
|
|
163
|
+
normalized = raw&.to_s
|
|
164
|
+
return nil if normalized.nil? || normalized.empty?
|
|
165
|
+
|
|
166
|
+
normalized
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def self.offering_lanes_enabled?
|
|
170
|
+
fleet_offering_lane_setting(:enabled) == true
|
|
171
|
+
rescue StandardError
|
|
172
|
+
false
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def self.fleet_offering_lane_setting(key)
|
|
176
|
+
offering_lanes = nested_setting(settings, :fleet, :offering_lanes) || {}
|
|
177
|
+
setting_value(offering_lanes, key)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def self.model_worker_actor_name(request_type:, model:, lane_style:, offering_instance_id:)
|
|
181
|
+
return :"model_worker_#{request_type}_#{model.tr(':.', '__')}" if lane_style.to_s == 'shared'
|
|
182
|
+
|
|
183
|
+
suffix = [lane_style, request_type, model, offering_instance_id].compact.join('_')
|
|
184
|
+
:"model_worker_#{actor_suffix(suffix)}"
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def self.actor_suffix(value)
|
|
188
|
+
value.to_s.downcase.gsub(/[^a-z0-9]+/, '_').gsub(/\A_+|_+\z/, '')
|
|
101
189
|
end
|
|
102
190
|
|
|
103
191
|
def self.fleet_scheduler
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-ollama
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Esity
|
|
@@ -23,6 +23,76 @@ dependencies:
|
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '2.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: legion-json
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 1.2.1
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 1.2.1
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: legion-llm
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 0.8.32
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 0.8.32
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: legion-logging
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: 1.3.2
|
|
61
|
+
type: :runtime
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: 1.3.2
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: legion-settings
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: 1.3.14
|
|
75
|
+
type: :runtime
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - ">="
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: 1.3.14
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: lex-llm
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: 0.1.6
|
|
89
|
+
type: :runtime
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - ">="
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: 0.1.6
|
|
26
96
|
- !ruby/object:Gem::Dependency
|
|
27
97
|
name: lex-s3
|
|
28
98
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -72,8 +142,10 @@ files:
|
|
|
72
142
|
- lib/legion/extensions/ollama/runners/s3_models.rb
|
|
73
143
|
- lib/legion/extensions/ollama/runners/version.rb
|
|
74
144
|
- lib/legion/extensions/ollama/transport.rb
|
|
145
|
+
- lib/legion/extensions/ollama/transport/exchanges/llm_registry.rb
|
|
75
146
|
- lib/legion/extensions/ollama/transport/exchanges/llm_request.rb
|
|
76
147
|
- lib/legion/extensions/ollama/transport/messages/llm_response.rb
|
|
148
|
+
- lib/legion/extensions/ollama/transport/messages/registry_event.rb
|
|
77
149
|
- lib/legion/extensions/ollama/version.rb
|
|
78
150
|
homepage: https://github.com/LegionIO/lex-ollama
|
|
79
151
|
licenses:
|