lex-llm 0.5.4 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/lib/legion/extensions/llm/capabilities.rb +69 -0
- data/lib/legion/extensions/llm/capability_policy.rb +27 -18
- data/lib/legion/extensions/llm/credential_sources.rb +6 -6
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +1 -1
- data/lib/legion/extensions/llm/fleet/settings.rb +2 -2
- data/lib/legion/extensions/llm/inventory/capabilities.rb +40 -0
- data/lib/legion/extensions/llm/inventory/scoped_refresher.rb +105 -0
- data/lib/legion/extensions/llm/model/info.rb +12 -1
- data/lib/legion/extensions/llm/provider.rb +216 -12
- data/lib/legion/extensions/llm/registry_event_builder.rb +4 -3
- data/lib/legion/extensions/llm/registry_publisher.rb +11 -8
- data/lib/legion/extensions/llm/routing/model_offering.rb +2 -9
- data/lib/legion/extensions/llm/taxonomies.rb +14 -0
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +6 -0
- data/spec/legion/extensions/llm/capabilities_spec.rb +50 -0
- data/spec/legion/extensions/llm/capability_policy_spec.rb +28 -5
- data/spec/legion/extensions/llm/inventory/capabilities_spec.rb +43 -0
- data/spec/legion/extensions/llm/inventory/scoped_refresher_spec.rb +209 -0
- data/spec/legion/extensions/llm/provider/open_ai_compatible_spec.rb +1 -1
- data/spec/legion/extensions/llm/provider_spec.rb +20 -0
- data/spec/legion/extensions/llm/routing/model_offering_spec.rb +3 -2
- data/spec/legion/extensions/llm/taxonomies_spec.rb +28 -0
- metadata +9 -1
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
require 'legion/extensions/llm/inventory/scoped_refresher'
|
|
5
|
+
|
|
6
|
+
RSpec.describe Legion::Extensions::Llm::Inventory::ScopedRefresher do
|
|
7
|
+
describe '.compose_id' do
|
|
8
|
+
it 'builds a 5-part colon-separated id (G22)' do
|
|
9
|
+
id = described_class.compose_id(
|
|
10
|
+
tier: :direct, provider_family: :vllm, instance_id: :apollo,
|
|
11
|
+
type: :inference, model: 'gemma-12b'
|
|
12
|
+
)
|
|
13
|
+
expect(id).to eq('direct:vllm:apollo:inference:gemma-12b')
|
|
14
|
+
expect(id.split(':').size).to eq(5)
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
describe '#tick write-then-delete-orphans (G7)' do
|
|
19
|
+
let(:inventory_writes) { [] }
|
|
20
|
+
let(:inventory_deletes) { [] }
|
|
21
|
+
|
|
22
|
+
before do
|
|
23
|
+
stub_const('Legion::LLM::Inventory', Module.new)
|
|
24
|
+
allow(Legion::LLM::Inventory).to receive(:write_lane) { |lane:, **| inventory_writes << lane[:id] }
|
|
25
|
+
allow(Legion::LLM::Inventory).to receive(:delete_lane) { |id:, **| inventory_deletes << id }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def make_actor(models)
|
|
29
|
+
klass = Class.new do
|
|
30
|
+
include Legion::Extensions::Llm::Inventory::ScopedRefresher
|
|
31
|
+
|
|
32
|
+
def self.every_seconds = 60
|
|
33
|
+
def scope_key = { provider: :test }
|
|
34
|
+
def credential_hash = 'testhash'
|
|
35
|
+
|
|
36
|
+
attr_accessor :models
|
|
37
|
+
|
|
38
|
+
def compute_lanes_for_scope
|
|
39
|
+
@models.map do |m|
|
|
40
|
+
{
|
|
41
|
+
id: Legion::Extensions::Llm::Inventory::ScopedRefresher.compose_id(
|
|
42
|
+
tier: :direct, provider_family: :test, instance_id: :default,
|
|
43
|
+
type: :inference, model: m
|
|
44
|
+
),
|
|
45
|
+
tier: :direct, provider_family: :test, instance_id: :default,
|
|
46
|
+
model: m, type: :inference
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def log = Logger.new(File::NULL)
|
|
52
|
+
def handle_exception(_err, **) = nil
|
|
53
|
+
end
|
|
54
|
+
actor = klass.new
|
|
55
|
+
actor.models = models
|
|
56
|
+
actor
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it 'writes new lanes before deleting orphans (zero-results race window eliminated)' do
|
|
60
|
+
actor = make_actor(%w[gemma-12b])
|
|
61
|
+
actor.tick
|
|
62
|
+
expect(inventory_writes).to include('direct:test:default:inference:gemma-12b')
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'deletes orphaned lanes (present on previous tick, absent on current)' do
|
|
66
|
+
actor = make_actor(%w[gemma-12b gemma-31b])
|
|
67
|
+
actor.tick
|
|
68
|
+
actor.models = %w[gemma-31b]
|
|
69
|
+
actor.tick
|
|
70
|
+
expect(inventory_deletes).to include('direct:test:default:inference:gemma-12b')
|
|
71
|
+
expect(inventory_writes.last).to eq('direct:test:default:inference:gemma-31b')
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
it 'writes nothing when compute raises, leaving previous lanes in place' do
|
|
75
|
+
actor = make_actor(%w[gemma-12b])
|
|
76
|
+
actor.tick
|
|
77
|
+
allow(actor).to receive(:compute_lanes_for_scope).and_raise(StandardError, 'net error')
|
|
78
|
+
actor.tick
|
|
79
|
+
expect(inventory_writes.count('direct:test:default:inference:gemma-12b')).to eq(1)
|
|
80
|
+
expect(inventory_deletes).to be_empty
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
describe 'auth-failure cooldown circuit (P2 commit 3)' do
|
|
85
|
+
let(:cache_store) { {} }
|
|
86
|
+
let(:cooldown_key) { 'llm_auth_failed:testhash' }
|
|
87
|
+
|
|
88
|
+
before do
|
|
89
|
+
stub_const('Legion::LLM::Inventory', Module.new)
|
|
90
|
+
allow(Legion::LLM::Inventory).to receive(:write_lane)
|
|
91
|
+
allow(Legion::LLM::Inventory).to receive(:delete_lane)
|
|
92
|
+
|
|
93
|
+
stub_const('Legion::Cache::Local', Module.new)
|
|
94
|
+
allow(Legion::Cache::Local).to receive(:get) { |k| cache_store[k] }
|
|
95
|
+
allow(Legion::Cache::Local).to receive(:set) do |k, v, **|
|
|
96
|
+
cache_store[k] = v
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def make_auth_fail_actor(**)
|
|
101
|
+
klass = Class.new do
|
|
102
|
+
include Legion::Extensions::Llm::Inventory::ScopedRefresher
|
|
103
|
+
|
|
104
|
+
def self.every_seconds = 60
|
|
105
|
+
def scope_key = { provider: :test }
|
|
106
|
+
def credential_hash = 'testhash'
|
|
107
|
+
|
|
108
|
+
attr_accessor :should_raise, :raise_error
|
|
109
|
+
|
|
110
|
+
def compute_lanes_for_scope
|
|
111
|
+
raise @raise_error if @should_raise
|
|
112
|
+
|
|
113
|
+
[]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def log
|
|
117
|
+
@log ||= begin
|
|
118
|
+
l = Object.new
|
|
119
|
+
def l.warn(_msg) = nil
|
|
120
|
+
def l.info(_msg) = nil
|
|
121
|
+
def l.debug(_msg) = nil
|
|
122
|
+
l
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def handle_exception(_err, **) = nil
|
|
127
|
+
end
|
|
128
|
+
actor = klass.new
|
|
129
|
+
actor.should_raise = false
|
|
130
|
+
actor.raise_error = nil
|
|
131
|
+
actor
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it 'writes auth cooldown key when compute raises with HTTP 401 status' do
|
|
135
|
+
actor = make_auth_fail_actor
|
|
136
|
+
err = StandardError.new('Unauthorized')
|
|
137
|
+
err.define_singleton_method(:status_code) { 401 }
|
|
138
|
+
actor.raise_error = err
|
|
139
|
+
actor.should_raise = true
|
|
140
|
+
|
|
141
|
+
actor.tick
|
|
142
|
+
|
|
143
|
+
expect(cache_store).to have_key(cooldown_key)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
it 'writes auth cooldown key when compute raises with HTTP 403 status' do
|
|
147
|
+
actor = make_auth_fail_actor
|
|
148
|
+
err = StandardError.new('Forbidden')
|
|
149
|
+
err.define_singleton_method(:http_status) { 403 }
|
|
150
|
+
actor.raise_error = err
|
|
151
|
+
actor.should_raise = true
|
|
152
|
+
|
|
153
|
+
actor.tick
|
|
154
|
+
|
|
155
|
+
expect(cache_store).to have_key(cooldown_key)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
it 'writes auth cooldown key when compute raises with unauthorized message' do
|
|
159
|
+
actor = make_auth_fail_actor
|
|
160
|
+
actor.raise_error = StandardError.new('invalid_api_key: bad credentials')
|
|
161
|
+
actor.should_raise = true
|
|
162
|
+
|
|
163
|
+
actor.tick
|
|
164
|
+
|
|
165
|
+
expect(cache_store).to have_key(cooldown_key)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
it 'skips compute_lanes_for_scope when cooldown key is present' do
|
|
169
|
+
actor = make_auth_fail_actor
|
|
170
|
+
actor.should_raise = false
|
|
171
|
+
cache_store[cooldown_key] = 1 # simulate active cooldown
|
|
172
|
+
|
|
173
|
+
compute_called = false
|
|
174
|
+
actor.define_singleton_method(:compute_lanes_for_scope) do
|
|
175
|
+
compute_called = true
|
|
176
|
+
[]
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
actor.tick
|
|
180
|
+
expect(compute_called).to be false
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
it 'calls compute_lanes_for_scope after cooldown TTL expires' do
|
|
184
|
+
actor = make_auth_fail_actor
|
|
185
|
+
actor.should_raise = false
|
|
186
|
+
# Cooldown expired — key absent
|
|
187
|
+
cache_store.delete(cooldown_key)
|
|
188
|
+
|
|
189
|
+
compute_called = false
|
|
190
|
+
actor.define_singleton_method(:compute_lanes_for_scope) do
|
|
191
|
+
compute_called = true
|
|
192
|
+
[]
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
actor.tick
|
|
196
|
+
expect(compute_called).to be true
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
it 'does NOT write cooldown key for non-auth errors' do
|
|
200
|
+
actor = make_auth_fail_actor
|
|
201
|
+
actor.raise_error = StandardError.new('connection timeout: net unreachable')
|
|
202
|
+
actor.should_raise = true
|
|
203
|
+
|
|
204
|
+
actor.tick
|
|
205
|
+
|
|
206
|
+
expect(cache_store).not_to have_key(cooldown_key)
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
@@ -131,7 +131,7 @@ RSpec.describe Legion::Extensions::Llm::Provider::OpenAICompatible do
|
|
|
131
131
|
models = provider.send(:parse_list_models_response, fake_response(models_body), :compatible,
|
|
132
132
|
provider_class.capabilities)
|
|
133
133
|
|
|
134
|
-
expect(models.map(&:capabilities)).to eq([%i[streaming function_calling tools], %i[embeddings]])
|
|
134
|
+
expect(models.map(&:capabilities)).to eq([%i[streaming function_calling tools], %i[embeddings embedding]])
|
|
135
135
|
expect(models.map { |model| model.modalities.to_h }).to eq([
|
|
136
136
|
{ input: %w[text image], output: %w[text] },
|
|
137
137
|
{ input: %w[text], output: %w[embeddings] }
|
|
@@ -175,6 +175,26 @@ RSpec.describe Legion::Extensions::Llm::Provider do
|
|
|
175
175
|
expect(offering.context_window).to eq(8192)
|
|
176
176
|
end
|
|
177
177
|
|
|
178
|
+
it 'publishes every discovered model before policy filtering removes blocked models' do
|
|
179
|
+
blocked_model = Legion::Extensions::Llm::Model::Info.new(
|
|
180
|
+
id: 'blocked-model',
|
|
181
|
+
provider: :contract,
|
|
182
|
+
instance: :primary,
|
|
183
|
+
capabilities: %i[completion],
|
|
184
|
+
context_length: 4096
|
|
185
|
+
)
|
|
186
|
+
registry_publisher = instance_double(Legion::Extensions::Llm::RegistryPublisher)
|
|
187
|
+
allow(registry_publisher).to receive(:publish_models_async)
|
|
188
|
+
allow(provider_class).to receive(:registry_publisher).and_return(registry_publisher)
|
|
189
|
+
allow(provider).to receive_messages(list_models: [model, blocked_model], settings: { model_blacklist: ['blocked'] })
|
|
190
|
+
|
|
191
|
+
offerings = provider.discover_offerings(live: true)
|
|
192
|
+
|
|
193
|
+
expect(offerings.map(&:model)).to eq(['test-model'])
|
|
194
|
+
expect(registry_publisher).to have_received(:publish_models_async).with([model], anything)
|
|
195
|
+
expect(registry_publisher).to have_received(:publish_models_async).with([blocked_model], anything)
|
|
196
|
+
end
|
|
197
|
+
|
|
178
198
|
it 'passes live discovery filters through to list_models' do
|
|
179
199
|
provider.discover_offerings(live: true, capability: :tools, instance: :primary)
|
|
180
200
|
|
|
@@ -96,7 +96,8 @@ RSpec.describe Legion::Extensions::Llm::Routing::ModelOffering do
|
|
|
96
96
|
capabilities: %i[chat function_calling]
|
|
97
97
|
)
|
|
98
98
|
|
|
99
|
-
expect(legacy_tools.capabilities).to include(:
|
|
99
|
+
expect(legacy_tools.capabilities).to include(:tools)
|
|
100
|
+
expect(legacy_tools.capabilities).not_to include(:function_calling)
|
|
100
101
|
expect(legacy_tools.eligible_for?(required_capabilities: [:tools])).to be true
|
|
101
102
|
end
|
|
102
103
|
|
|
@@ -236,7 +237,7 @@ RSpec.describe Legion::Extensions::Llm::Routing::ModelOffering do
|
|
|
236
237
|
|
|
237
238
|
expect(sourced.capabilities).to include(:streaming, :tools)
|
|
238
239
|
expect(sourced.capability_sources[:tools]).to eq(value: true, source: :instance_override)
|
|
239
|
-
expect(sourced.capability_sources[:
|
|
240
|
+
expect(sourced.capability_sources[:embedding]).to eq(value: false, source: :default_false)
|
|
240
241
|
end
|
|
241
242
|
|
|
242
243
|
it 'includes capability_sources in to_h' do
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
require 'legion/extensions/llm/taxonomies'
|
|
5
|
+
|
|
6
|
+
RSpec.describe Legion::Extensions::Llm::Taxonomies do
|
|
7
|
+
it 'TIERS includes :fleet as a first-class tier' do
|
|
8
|
+
expect(described_class::TIERS).to include(:fleet)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it 'TIERS contains exactly the documented values' do
|
|
12
|
+
expect(described_class::TIERS).to contain_exactly(:direct, :local, :fleet, :cloud, :frontier)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'TYPES contains documented inference types' do
|
|
16
|
+
expect(described_class::TYPES).to include(:inference, :embedding)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it 'CIRCUIT_STATES contains three states' do
|
|
20
|
+
expect(described_class::CIRCUIT_STATES).to contain_exactly(:closed, :half_open, :open)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it 'all constants are frozen' do
|
|
24
|
+
expect(described_class::TIERS).to be_frozen
|
|
25
|
+
expect(described_class::TYPES).to be_frozen
|
|
26
|
+
expect(described_class::CIRCUIT_STATES).to be_frozen
|
|
27
|
+
end
|
|
28
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lex-llm
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- LegionIO
|
|
@@ -272,6 +272,7 @@ files:
|
|
|
272
272
|
- lib/legion/extensions/llm/canonical/tool_definition.rb
|
|
273
273
|
- lib/legion/extensions/llm/canonical/tool_schema.rb
|
|
274
274
|
- lib/legion/extensions/llm/canonical/usage.rb
|
|
275
|
+
- lib/legion/extensions/llm/capabilities.rb
|
|
275
276
|
- lib/legion/extensions/llm/capability_policy.rb
|
|
276
277
|
- lib/legion/extensions/llm/chat.rb
|
|
277
278
|
- lib/legion/extensions/llm/chunk.rb
|
|
@@ -293,6 +294,8 @@ files:
|
|
|
293
294
|
- lib/legion/extensions/llm/fleet/token_validator.rb
|
|
294
295
|
- lib/legion/extensions/llm/fleet/worker_execution.rb
|
|
295
296
|
- lib/legion/extensions/llm/image.rb
|
|
297
|
+
- lib/legion/extensions/llm/inventory/capabilities.rb
|
|
298
|
+
- lib/legion/extensions/llm/inventory/scoped_refresher.rb
|
|
296
299
|
- lib/legion/extensions/llm/message.rb
|
|
297
300
|
- lib/legion/extensions/llm/mime_type.rb
|
|
298
301
|
- lib/legion/extensions/llm/model.rb
|
|
@@ -322,6 +325,7 @@ files:
|
|
|
322
325
|
- lib/legion/extensions/llm/routing/registry_event.rb
|
|
323
326
|
- lib/legion/extensions/llm/stream_accumulator.rb
|
|
324
327
|
- lib/legion/extensions/llm/streaming.rb
|
|
328
|
+
- lib/legion/extensions/llm/taxonomies.rb
|
|
325
329
|
- lib/legion/extensions/llm/thinking.rb
|
|
326
330
|
- lib/legion/extensions/llm/tokens.rb
|
|
327
331
|
- lib/legion/extensions/llm/tool.rb
|
|
@@ -357,6 +361,7 @@ files:
|
|
|
357
361
|
- spec/legion/extensions/llm/canonical/tool_definition_spec.rb
|
|
358
362
|
- spec/legion/extensions/llm/canonical/tool_schema_spec.rb
|
|
359
363
|
- spec/legion/extensions/llm/canonical/usage_spec.rb
|
|
364
|
+
- spec/legion/extensions/llm/capabilities_spec.rb
|
|
360
365
|
- spec/legion/extensions/llm/capability_policy_spec.rb
|
|
361
366
|
- spec/legion/extensions/llm/configuration_spec.rb
|
|
362
367
|
- spec/legion/extensions/llm/conformance/client_translator_examples.rb
|
|
@@ -398,6 +403,8 @@ files:
|
|
|
398
403
|
- spec/legion/extensions/llm/fleet/worker_execution_spec.rb
|
|
399
404
|
- spec/legion/extensions/llm/fleet_messages_spec.rb
|
|
400
405
|
- spec/legion/extensions/llm/gemspec_spec.rb
|
|
406
|
+
- spec/legion/extensions/llm/inventory/capabilities_spec.rb
|
|
407
|
+
- spec/legion/extensions/llm/inventory/scoped_refresher_spec.rb
|
|
401
408
|
- spec/legion/extensions/llm/message_spec.rb
|
|
402
409
|
- spec/legion/extensions/llm/model/info_spec.rb
|
|
403
410
|
- spec/legion/extensions/llm/models_spec.rb
|
|
@@ -415,6 +422,7 @@ files:
|
|
|
415
422
|
- spec/legion/extensions/llm/routing/registry_event_spec.rb
|
|
416
423
|
- spec/legion/extensions/llm/stream_accumulator_spec.rb
|
|
417
424
|
- spec/legion/extensions/llm/streaming_spec.rb
|
|
425
|
+
- spec/legion/extensions/llm/taxonomies_spec.rb
|
|
418
426
|
- spec/legion/extensions/llm/tool_spec.rb
|
|
419
427
|
- spec/legion/extensions/llm/transport/fleet_lane_spec.rb
|
|
420
428
|
- spec/legion/extensions/llm/utils_spec.rb
|