lex-llm 0.4.16 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -2
- data/B1b-conformance-kit.md +79 -0
- data/CHANGELOG.md +33 -0
- data/README.md +349 -153
- data/lex-llm.gemspec +3 -3
- data/lib/legion/extensions/llm/attachment.rb +1 -1
- data/lib/legion/extensions/llm/canonical/chunk.rb +184 -0
- data/lib/legion/extensions/llm/canonical/content_block.rb +126 -0
- data/lib/legion/extensions/llm/canonical/message.rb +125 -0
- data/lib/legion/extensions/llm/canonical/params.rb +61 -0
- data/lib/legion/extensions/llm/canonical/request.rb +117 -0
- data/lib/legion/extensions/llm/canonical/response.rb +124 -0
- data/lib/legion/extensions/llm/canonical/thinking.rb +81 -0
- data/lib/legion/extensions/llm/canonical/tool_call.rb +134 -0
- data/lib/legion/extensions/llm/canonical/tool_definition.rb +73 -0
- data/lib/legion/extensions/llm/canonical/usage.rb +61 -0
- data/lib/legion/extensions/llm/canonical.rb +49 -0
- data/lib/legion/extensions/llm/chat.rb +3 -5
- data/lib/legion/extensions/llm/connection.rb +14 -2
- data/lib/legion/extensions/llm/error.rb +3 -7
- data/lib/legion/extensions/llm/fleet/envelope_validation.rb +1 -3
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +1 -3
- data/lib/legion/extensions/llm/fleet/token_validator.rb +1 -3
- data/lib/legion/extensions/llm/model/info.rb +4 -6
- data/lib/legion/extensions/llm/models.rb +3 -3
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +12 -4
- data/lib/legion/extensions/llm/routing/lane_key.rb +1 -3
- data/lib/legion/extensions/llm/stream_accumulator.rb +1 -1
- data/lib/legion/extensions/llm/streaming.rb +6 -4
- data/lib/legion/extensions/llm/tool.rb +1 -3
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +118 -35
- data/spec/fixtures/ruby.mp3 +0 -0
- data/spec/fixtures/ruby.mp4 +0 -0
- data/spec/fixtures/ruby.png +0 -0
- data/spec/fixtures/ruby.txt +1 -0
- data/spec/fixtures/ruby.wav +0 -0
- data/spec/fixtures/ruby.xml +1 -0
- data/spec/fixtures/sample.pdf +0 -0
- data/spec/legion/extensions/llm/agent_spec.rb +179 -0
- data/spec/legion/extensions/llm/attachment_spec.rb +25 -0
- data/spec/legion/extensions/llm/auto_registration_spec.rb +38 -0
- data/spec/legion/extensions/llm/canonical/chunk_spec.rb +285 -0
- data/spec/legion/extensions/llm/canonical/content_block_spec.rb +179 -0
- data/spec/legion/extensions/llm/canonical/message_spec.rb +203 -0
- data/spec/legion/extensions/llm/canonical/params_spec.rb +159 -0
- data/spec/legion/extensions/llm/canonical/request_spec.rb +174 -0
- data/spec/legion/extensions/llm/canonical/response_spec.rb +234 -0
- data/spec/legion/extensions/llm/canonical/thinking_spec.rb +151 -0
- data/spec/legion/extensions/llm/canonical/tool_call_spec.rb +191 -0
- data/spec/legion/extensions/llm/canonical/tool_definition_spec.rb +174 -0
- data/spec/legion/extensions/llm/canonical/usage_spec.rb +138 -0
- data/spec/legion/extensions/llm/configuration_spec.rb +38 -0
- data/spec/legion/extensions/llm/conformance/client_translator_examples.rb +269 -0
- data/spec/legion/extensions/llm/conformance/conformance.rb +51 -0
- data/spec/legion/extensions/llm/conformance/echo_translator.rb +56 -0
- data/spec/legion/extensions/llm/conformance/echo_translator_spec.rb +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_empty_response.json +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_error_response.json +19 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_fleet_round_trip.json +81 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_metering_audit_events.json +101 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_params_mapping_request.json +21 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_request.json +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_response.json +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_stop_reason_matrix.json +36 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_accumulated_response.json +20 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_error_chunks.json +26 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_text_chunks.json +33 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_thinking_chunks.json +42 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_tool_call_chunks.json +41 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_system_prompt_request.json +14 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_request.json +18 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_response.json +17 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_results_continuation_request.json +75 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_use_response.json +25 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_tools_request.json +34 -0
- data/spec/legion/extensions/llm/conformance/provider_translator_examples.rb +390 -0
- data/spec/legion/extensions/llm/connection_logging_spec.rb +53 -0
- data/spec/legion/extensions/llm/connection_retry_spec.rb +36 -0
- data/spec/legion/extensions/llm/context_spec.rb +127 -0
- data/spec/legion/extensions/llm/credential_sources_spec.rb +468 -0
- data/spec/legion/extensions/llm/error_middleware_spec.rb +102 -0
- data/spec/legion/extensions/llm/error_spec.rb +87 -0
- data/spec/legion/extensions/llm/fleet/provider_responder_spec.rb +120 -0
- data/spec/legion/extensions/llm/fleet/token_validator_spec.rb +163 -0
- data/spec/legion/extensions/llm/fleet/worker_execution_spec.rb +128 -0
- data/spec/legion/extensions/llm/fleet_messages_spec.rb +402 -0
- data/spec/legion/extensions/llm/gemspec_spec.rb +25 -0
- data/spec/legion/extensions/llm/message_spec.rb +64 -0
- data/spec/legion/extensions/llm/model/info_spec.rb +222 -0
- data/spec/legion/extensions/llm/models_spec.rb +104 -0
- data/spec/legion/extensions/llm/provider/open_ai_compatible_spec.rb +203 -0
- data/spec/legion/extensions/llm/provider_contract_spec.rb +60 -0
- data/spec/legion/extensions/llm/provider_settings_spec.rb +76 -0
- data/spec/legion/extensions/llm/provider_spec.rb +592 -0
- data/spec/legion/extensions/llm/registry_event_builder_spec.rb +68 -0
- data/spec/legion/extensions/llm/registry_publisher_spec.rb +22 -0
- data/spec/legion/extensions/llm/responses/response_objects_spec.rb +75 -0
- data/spec/legion/extensions/llm/responses/thinking_extractor_spec.rb +75 -0
- data/spec/legion/extensions/llm/routing/model_offering_spec.rb +222 -0
- data/spec/legion/extensions/llm/routing/offering_registry_spec.rb +50 -0
- data/spec/legion/extensions/llm/routing/registry_event_spec.rb +120 -0
- data/spec/legion/extensions/llm/stream_accumulator_spec.rb +103 -0
- data/spec/legion/extensions/llm/streaming_spec.rb +108 -0
- data/spec/legion/extensions/llm/tool_spec.rb +94 -0
- data/spec/legion/extensions/llm/transport/fleet_lane_spec.rb +60 -0
- data/spec/legion/extensions/llm/utils_spec.rb +113 -0
- data/spec/legion/extensions/llm_base_contract_spec.rb +110 -0
- data/spec/legion/extensions/llm_extension_spec.rb +78 -0
- data/spec/legion/extensions/llm_root_spec.rb +51 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/fake_llm_provider.rb +148 -0
- data/spec/support/llm_configuration.rb +21 -0
- data/spec/support/rspec_configuration.rb +19 -0
- data/spec/support/simplecov_configuration.rb +20 -0
- metadata +110 -15
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
# rubocop:disable RSpec/DescribeClass
|
|
6
|
+
RSpec.describe 'LLM normalized response objects' do
|
|
7
|
+
let(:unsafe_metadata) do
|
|
8
|
+
{
|
|
9
|
+
reasoning_content: 'metadata secret',
|
|
10
|
+
reasoning: 'metadata reasoning',
|
|
11
|
+
thinking_text: 'metadata thinking',
|
|
12
|
+
raw: { reasoning_content: 'nested raw secret' },
|
|
13
|
+
'raw-response' => { reasoning_content: 'hyphen raw secret' },
|
|
14
|
+
'provider-body' => { thinking_text: 'hyphen body secret' },
|
|
15
|
+
vendor: 'vllm'
|
|
16
|
+
}
|
|
17
|
+
end
|
|
18
|
+
let(:unsafe_raw) { { 'choices' => [{ 'message' => { 'content' => '<think>raw secret</think>visible' } }] } }
|
|
19
|
+
|
|
20
|
+
it 'serializes chat responses without raw provider thinking fields' do
|
|
21
|
+
response = Legion::Extensions::Llm::Responses::ChatResponse.new(
|
|
22
|
+
content: "<think>tag secret</think>\nvisible",
|
|
23
|
+
metadata: unsafe_metadata,
|
|
24
|
+
raw: unsafe_raw
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
payload = response.to_h
|
|
28
|
+
encoded = Legion::JSON.dump(payload)
|
|
29
|
+
|
|
30
|
+
expect(payload).to eq(content: 'visible', metadata: { vendor: 'vllm' })
|
|
31
|
+
expect(encoded).not_to include('reasoning', 'reasoning_content', 'thinking_text', '<think>', 'raw secret',
|
|
32
|
+
'nested raw secret', 'hyphen raw secret', 'hyphen body secret')
|
|
33
|
+
expect(response.to_internal_h).to include(
|
|
34
|
+
thinking: 'metadata secretmetadata reasoningmetadata thinkingtag secret',
|
|
35
|
+
metadata: unsafe_metadata,
|
|
36
|
+
raw: unsafe_raw
|
|
37
|
+
)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it 'serializes stream chunks without raw provider thinking fields' do
|
|
41
|
+
chunk = Legion::Extensions::Llm::Responses::StreamChunk.new(
|
|
42
|
+
content: "<think>tag secret</think>\nvisible",
|
|
43
|
+
metadata: unsafe_metadata,
|
|
44
|
+
raw: unsafe_raw
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
payload = chunk.to_h
|
|
48
|
+
encoded = Legion::JSON.dump(payload)
|
|
49
|
+
|
|
50
|
+
expect(payload).to eq(content: 'visible', metadata: { vendor: 'vllm' })
|
|
51
|
+
expect(encoded).not_to include('reasoning', 'reasoning_content', 'thinking_text', '<think>', 'raw secret',
|
|
52
|
+
'nested raw secret', 'hyphen raw secret', 'hyphen body secret')
|
|
53
|
+
expect(chunk.to_internal_h).to include(
|
|
54
|
+
thinking: 'metadata secretmetadata reasoningmetadata thinkingtag secret',
|
|
55
|
+
metadata: unsafe_metadata,
|
|
56
|
+
raw: unsafe_raw
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'serializes embedding responses without raw provider payloads' do
|
|
61
|
+
response = Legion::Extensions::Llm::Responses::EmbeddingResponse.new(
|
|
62
|
+
vectors: [[0.1]],
|
|
63
|
+
model: 'embed',
|
|
64
|
+
metadata: unsafe_metadata,
|
|
65
|
+
raw: unsafe_raw
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
payload = response.to_h
|
|
69
|
+
|
|
70
|
+
expect(payload).to eq(vectors: [[0.1]], model: 'embed', metadata: { vendor: 'vllm' })
|
|
71
|
+
expect(Legion::JSON.dump(payload)).not_to include('raw secret')
|
|
72
|
+
expect(response.to_internal_h).to include(metadata: unsafe_metadata, raw: unsafe_raw)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
# rubocop:enable RSpec/DescribeClass
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Llm::Responses::ThinkingExtractor do
|
|
6
|
+
it 'extracts normal think tags' do
|
|
7
|
+
result = described_class.extract("<think>hidden</think>\n\nvisible")
|
|
8
|
+
|
|
9
|
+
expect(result.content).to eq('visible')
|
|
10
|
+
expect(result.thinking).to eq('hidden')
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it 'extracts normal thinking tags' do
|
|
14
|
+
result = described_class.extract("<thinking>hidden</thinking>\n\nvisible")
|
|
15
|
+
|
|
16
|
+
expect(result.content).to eq('visible')
|
|
17
|
+
expect(result.thinking).to eq('hidden')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it 'extracts malformed trailing close tag' do
|
|
21
|
+
result = described_class.extract("hidden only\n</think>\n\nvisible")
|
|
22
|
+
|
|
23
|
+
expect(result.content).to eq('visible')
|
|
24
|
+
expect(result.thinking).to eq('hidden only')
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'extracts malformed trailing thinking close tag' do
|
|
28
|
+
result = described_class.extract("hidden only\n</thinking>\n\nvisible")
|
|
29
|
+
|
|
30
|
+
expect(result.content).to eq('visible')
|
|
31
|
+
expect(result.thinking).to eq('hidden only')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'extracts unterminated think blocks instead of leaking them as visible content' do
|
|
35
|
+
result = described_class.extract("visible\n<think>hidden only")
|
|
36
|
+
|
|
37
|
+
expect(result.content).to eq('visible')
|
|
38
|
+
expect(result.thinking).to eq('hidden only')
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it 'extracts untagged local-model reasoning preambles' do
|
|
42
|
+
result = described_class.extract(
|
|
43
|
+
"The user is just saying \"test\". Let me respond simply and confirm things are working.\n\n" \
|
|
44
|
+
'Hey! Things are working on my end. What can I help you with?'
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
expect(result.content).to eq('Hey! Things are working on my end. What can I help you with?')
|
|
48
|
+
expect(result.thinking)
|
|
49
|
+
.to eq('The user is just saying "test". Let me respond simply and confirm things are working.')
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'leaves normal text visible' do
|
|
53
|
+
result = described_class.extract('visible only')
|
|
54
|
+
|
|
55
|
+
expect(result.content).to eq('visible only')
|
|
56
|
+
expect(result.thinking).to be_nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it 'extracts provider-specific reasoning metadata without exposing reasoning fields as metadata' do
|
|
60
|
+
result = described_class.extract(
|
|
61
|
+
'visible',
|
|
62
|
+
metadata: {
|
|
63
|
+
'reasoning_content' => 'hidden',
|
|
64
|
+
'thinking_signature' => 'sig-1',
|
|
65
|
+
'reasoning-signature' => 'sig-2',
|
|
66
|
+
'vendor' => 'vllm'
|
|
67
|
+
}
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
expect(result.content).to eq('visible')
|
|
71
|
+
expect(result.thinking).to eq('hidden')
|
|
72
|
+
expect(result.signature).to eq('sig-1')
|
|
73
|
+
expect(result.metadata).to eq(vendor: 'vllm')
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Llm::Routing::ModelOffering do
|
|
6
|
+
subject(:offering) do
|
|
7
|
+
described_class.new(
|
|
8
|
+
provider_family: :ollama,
|
|
9
|
+
instance_id: :'macbook-m4-max',
|
|
10
|
+
transport: :rabbitmq,
|
|
11
|
+
model: 'qwen3.6:27b-q4_K_M',
|
|
12
|
+
capabilities: %i[chat tools thinking],
|
|
13
|
+
limits: { context_window: 32_768, max_output_tokens: 8192 },
|
|
14
|
+
policy_tags: %i[phi_allowed internal_only]
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'normalizes provider-neutral offering metadata' do
|
|
19
|
+
expect(offering).to have_attributes(
|
|
20
|
+
offering_id: 'ollama:macbook-m4-max:inference:qwen3-6-27b-q4-k-m',
|
|
21
|
+
provider_family: :ollama,
|
|
22
|
+
provider_instance: :'macbook-m4-max',
|
|
23
|
+
instance_id: :'macbook-m4-max',
|
|
24
|
+
transport: :rabbitmq,
|
|
25
|
+
tier: :fleet,
|
|
26
|
+
model: 'qwen3.6:27b-q4_K_M',
|
|
27
|
+
canonical_model_alias: 'qwen3.6:27b-q4_K_M',
|
|
28
|
+
usage_type: :inference
|
|
29
|
+
)
|
|
30
|
+
expect(offering.capabilities).to eq(%i[chat tools thinking])
|
|
31
|
+
expect(offering.context_window).to eq(32_768)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'accepts expanded contract fields while preserving instance_id compatibility' do
|
|
35
|
+
expanded = described_class.new(
|
|
36
|
+
offering_id: 'azure:gpt4o-prod',
|
|
37
|
+
provider_family: :azure_foundry,
|
|
38
|
+
model_family: :openai,
|
|
39
|
+
provider_instance: :eastus,
|
|
40
|
+
model: 'gpt4o-prod',
|
|
41
|
+
canonical_model_alias: 'gpt-4o',
|
|
42
|
+
routing_metadata: { region: 'eastus', deployment: 'gpt4o-prod' },
|
|
43
|
+
capabilities: %i[chat tools]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
expect(expanded).to have_attributes(
|
|
47
|
+
offering_id: 'azure:gpt4o-prod',
|
|
48
|
+
provider_family: :azure_foundry,
|
|
49
|
+
model_family: :openai,
|
|
50
|
+
provider_instance: :eastus,
|
|
51
|
+
instance_id: :eastus,
|
|
52
|
+
model: 'gpt4o-prod',
|
|
53
|
+
canonical_model_alias: 'gpt-4o',
|
|
54
|
+
routing_metadata: { region: 'eastus', deployment: 'gpt4o-prod' }
|
|
55
|
+
)
|
|
56
|
+
expect(expanded.to_h).to include(
|
|
57
|
+
provider_instance: :eastus,
|
|
58
|
+
instance_id: :eastus,
|
|
59
|
+
canonical_model_alias: 'gpt-4o',
|
|
60
|
+
routing_metadata: { region: 'eastus', deployment: 'gpt4o-prod' }
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it 'lifts model family and aliases from legacy metadata' do
|
|
65
|
+
legacy = described_class.new(
|
|
66
|
+
provider_family: :bedrock,
|
|
67
|
+
instance_id: :'us-east-1',
|
|
68
|
+
model: 'anthropic.claude-3-haiku-20240307-v1:0',
|
|
69
|
+
metadata: { model_family: :anthropic, alias: 'claude-3-haiku' }
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
expect(legacy.model_family).to eq(:anthropic)
|
|
73
|
+
expect(legacy.canonical_model_alias).to eq('claude-3-haiku')
|
|
74
|
+
expect(legacy.model_alias?('claude-3-haiku')).to be true
|
|
75
|
+
expect(legacy.model_alias?('anthropic.claude-3-haiku-20240307-v1:0')).to be true
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it 'checks route eligibility without provider-specific code' do
|
|
79
|
+
expect(
|
|
80
|
+
offering.eligible_for?(
|
|
81
|
+
usage_type: :inference,
|
|
82
|
+
required_capabilities: %i[tools thinking],
|
|
83
|
+
min_context_window: 32_000,
|
|
84
|
+
policy_tags: [:phi_allowed]
|
|
85
|
+
)
|
|
86
|
+
).to be true
|
|
87
|
+
|
|
88
|
+
expect(offering.eligible_for?(min_context_window: 65_536)).to be false
|
|
89
|
+
expect(offering.eligible_for?(required_capabilities: [:vision])).to be false
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it 'treats legacy function-calling capability names as tools support' do
|
|
93
|
+
legacy_tools = described_class.new(
|
|
94
|
+
provider_family: :vllm,
|
|
95
|
+
model: 'qwen-tools',
|
|
96
|
+
capabilities: %i[chat function_calling]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
expect(legacy_tools.capabilities).to include(:function_calling, :tools)
|
|
100
|
+
expect(legacy_tools.eligible_for?(required_capabilities: [:tools])).to be true
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
it 'treats disabled offerings as ineligible' do
|
|
104
|
+
disabled = described_class.new(
|
|
105
|
+
provider_family: :ollama,
|
|
106
|
+
instance_id: :local,
|
|
107
|
+
model: 'qwen',
|
|
108
|
+
metadata: { enabled: false }
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
expect(disabled).not_to be_enabled
|
|
112
|
+
expect(disabled.eligible_for?).to be false
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
it 'generates clean fleet inference lane keys with context windows' do
|
|
116
|
+
expect(offering.lane_key).to eq('llm.fleet.inference.qwen3-6-27b-q4-k-m.ctx32768')
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
it 'uses canonical model aliases for fleet lanes when provider deployments hide the base model' do
|
|
120
|
+
deployment = described_class.new(
|
|
121
|
+
provider_family: :azure_foundry,
|
|
122
|
+
provider_instance: :default,
|
|
123
|
+
model: 'gpt4o-prod',
|
|
124
|
+
canonical_model_alias: 'gpt-4o',
|
|
125
|
+
limits: { context_window: 128_000 }
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
expect(deployment.lane_key).to eq('llm.fleet.inference.gpt-4o.ctx128000')
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
it 'generates embedding lanes without context suffixes' do
|
|
132
|
+
embedding = described_class.new(
|
|
133
|
+
provider_family: :ollama,
|
|
134
|
+
instance_id: :'gpu-01',
|
|
135
|
+
transport: :rabbitmq,
|
|
136
|
+
model: 'nomic-embed-text:latest',
|
|
137
|
+
usage_type: :embed,
|
|
138
|
+
capabilities: [:embedding]
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
expect(embedding).to be_embedding
|
|
142
|
+
expect(embedding.lane_key).to eq('llm.fleet.embed.nomic-embed-text-latest')
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
it 'can include an eligibility fingerprint when lanes need stricter matching' do
|
|
146
|
+
key = offering.lane_key(include_fingerprint: true)
|
|
147
|
+
|
|
148
|
+
expect(key).to match(/\Allm\.fleet\.inference\.qwen3-6-27b-q4-k-m\.ctx32768\.elig\.[0-9a-f]{10}\z/)
|
|
149
|
+
expect(offering.eligibility_fingerprint).to eq(key.split('.').last)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it 'normalizes string numeric limits from JSON-backed settings' do
|
|
153
|
+
string_limits = described_class.new(
|
|
154
|
+
provider_family: :ollama,
|
|
155
|
+
model: 'qwen',
|
|
156
|
+
limits: { context_window: '32768', max_output_tokens: '8192' }
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
expect(string_limits.context_window).to eq(32_768)
|
|
160
|
+
expect(string_limits.max_output_tokens).to eq(8192)
|
|
161
|
+
expect(string_limits.eligible_for?(min_context_window: 32_000)).to be true
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
it 'normalizes string-keyed JSON-backed offering fields' do
|
|
165
|
+
json_offering = described_class.new(
|
|
166
|
+
'provider_family' => 'ollama',
|
|
167
|
+
'instance_id' => 'macbook-m4-max',
|
|
168
|
+
'transport' => 'rabbitmq',
|
|
169
|
+
'model' => 'nomic-embed-text',
|
|
170
|
+
'type' => 'embed',
|
|
171
|
+
'capabilities' => %w[embedding],
|
|
172
|
+
'limits' => { 'context_window' => '8192' },
|
|
173
|
+
'metadata' => { 'enabled' => true }
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
expect(json_offering.provider_family).to eq(:ollama)
|
|
177
|
+
expect(json_offering.instance_id).to eq(:'macbook-m4-max')
|
|
178
|
+
expect(json_offering.transport).to eq(:rabbitmq)
|
|
179
|
+
expect(json_offering.usage_type).to eq(:embedding)
|
|
180
|
+
expect(json_offering.context_window).to eq(8192)
|
|
181
|
+
expect(json_offering).to be_enabled
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
it 'treats string-keyed disabled metadata as ineligible' do
|
|
185
|
+
disabled = described_class.new(
|
|
186
|
+
'provider_family' => 'ollama',
|
|
187
|
+
'model' => 'qwen',
|
|
188
|
+
'metadata' => { 'enabled' => false }
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
expect(disabled).not_to be_enabled
|
|
192
|
+
expect(disabled.eligible_for?).to be false
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
it 'keeps sensitive metadata out of eligibility fingerprints' do
|
|
196
|
+
safe = described_class.new(
|
|
197
|
+
provider_family: :ollama,
|
|
198
|
+
model: 'qwen',
|
|
199
|
+
metadata: { eligibility: { endpoint_url: 'http://gpu.internal', network_boundary: :corp_lan } }
|
|
200
|
+
)
|
|
201
|
+
changed_secret = described_class.new(
|
|
202
|
+
provider_family: :ollama,
|
|
203
|
+
model: 'qwen',
|
|
204
|
+
metadata: { eligibility: { endpoint_url: 'http://other.internal', network_boundary: :corp_lan } }
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
expect(safe.eligibility_fingerprint).to eq(changed_secret.eligibility_fingerprint)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it 'serializes the normalized shape used by routers and registries' do
|
|
211
|
+
expect(offering.to_h).to include(
|
|
212
|
+
offering_id: 'ollama:macbook-m4-max:inference:qwen3-6-27b-q4-k-m',
|
|
213
|
+
provider_family: :ollama,
|
|
214
|
+
provider_instance: :'macbook-m4-max',
|
|
215
|
+
instance_id: :'macbook-m4-max',
|
|
216
|
+
tier: :fleet,
|
|
217
|
+
canonical_model_alias: 'qwen3.6:27b-q4_K_M',
|
|
218
|
+
usage_type: :inference,
|
|
219
|
+
limits: { context_window: 32_768, max_output_tokens: 8192 }
|
|
220
|
+
)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Llm::Routing::OfferingRegistry do
|
|
6
|
+
subject(:registry) { described_class.new([chat, embedding]) }
|
|
7
|
+
|
|
8
|
+
let(:chat) do
|
|
9
|
+
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
10
|
+
provider_family: :azure_foundry,
|
|
11
|
+
model_family: :openai,
|
|
12
|
+
provider_instance: :eastus,
|
|
13
|
+
model: 'gpt4o-prod',
|
|
14
|
+
canonical_model_alias: 'gpt-4o',
|
|
15
|
+
capabilities: %i[chat tools],
|
|
16
|
+
limits: { context_window: 128_000 }
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
let(:embedding) do
|
|
21
|
+
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
22
|
+
provider_family: :bedrock,
|
|
23
|
+
model_family: :amazon,
|
|
24
|
+
instance_id: :'us-east-1',
|
|
25
|
+
model: 'amazon.titan-embed-text-v2:0',
|
|
26
|
+
canonical_model_alias: 'titan-embed-text-v2',
|
|
27
|
+
usage_type: :embedding,
|
|
28
|
+
capabilities: [:embedding]
|
|
29
|
+
)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it 'registers hashes and offerings by normalized offering_id' do
|
|
33
|
+
replacement = registry.register(
|
|
34
|
+
chat.to_h.merge(capabilities: %i[chat vision])
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
expect(registry.find(chat.offering_id)).to eq(replacement)
|
|
38
|
+
expect(registry.find(chat.offering_id).capabilities).to eq(%i[chat vision])
|
|
39
|
+
expect(registry.count).to eq(2)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
it 'finds and filters offerings by the expanded routing contract' do
|
|
43
|
+
expect(registry.find_by_model_alias('gpt-4o')).to eq(chat)
|
|
44
|
+
expect(registry.filter(provider_family: :azure_foundry)).to eq([chat])
|
|
45
|
+
expect(registry.filter(model_family: :openai)).to eq([chat])
|
|
46
|
+
expect(registry.filter(provider_instance: :eastus)).to eq([chat])
|
|
47
|
+
expect(registry.filter(capability: :embedding)).to eq([embedding])
|
|
48
|
+
expect(registry.filter(model_alias: 'titan-embed-text-v2', usage_type: :embedding)).to eq([embedding])
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Llm::Routing::RegistryEvent do
|
|
6
|
+
subject(:event) do
|
|
7
|
+
described_class.new(
|
|
8
|
+
event_id: 'evt-123',
|
|
9
|
+
event_type: :offering_available,
|
|
10
|
+
occurred_at: Time.utc(2026, 4, 28, 14, 30, 15, 123_456),
|
|
11
|
+
offering: offering,
|
|
12
|
+
runtime: { host_id: 'macbook-m4-max', process: { pid: 12_345 } },
|
|
13
|
+
capacity: { concurrency: 4, queued: 0 },
|
|
14
|
+
health: { ready: true, latency_ms: 180 },
|
|
15
|
+
lane: 'llm.fleet.inference.qwen3-6.ctx32768',
|
|
16
|
+
metadata: { observed_by: :lex_llm_ollama }
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
let(:offering) do
|
|
21
|
+
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
22
|
+
provider_family: :ollama,
|
|
23
|
+
provider_instance: :'macbook-m4-max',
|
|
24
|
+
transport: :rabbitmq,
|
|
25
|
+
model: 'qwen3.6',
|
|
26
|
+
capabilities: %i[chat tools],
|
|
27
|
+
limits: { context_window: 32_768 },
|
|
28
|
+
credentials: { api_key: 'secret' },
|
|
29
|
+
metadata: { enabled: true, api_key: 'secret' }
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it 'serializes a provider-neutral registry envelope' do
|
|
34
|
+
expect(event).to have_attributes(
|
|
35
|
+
event_id: 'evt-123',
|
|
36
|
+
event_type: :offering_available,
|
|
37
|
+
occurred_at: Time.utc(2026, 4, 28, 14, 30, 15, 123_456)
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
expect(event.to_h).to include(
|
|
41
|
+
event_id: 'evt-123',
|
|
42
|
+
event_type: :offering_available,
|
|
43
|
+
occurred_at: '2026-04-28T14:30:15.123456Z',
|
|
44
|
+
runtime: { host_id: 'macbook-m4-max', process: { pid: 12_345 } },
|
|
45
|
+
capacity: { concurrency: 4, queued: 0 },
|
|
46
|
+
health: { ready: true, latency_ms: 180 },
|
|
47
|
+
lane: 'llm.fleet.inference.qwen3-6.ctx32768',
|
|
48
|
+
metadata: { observed_by: :lex_llm_ollama }
|
|
49
|
+
)
|
|
50
|
+
expect(event.to_h[:offering]).to include(
|
|
51
|
+
offering_id: 'ollama:macbook-m4-max:inference:qwen3-6',
|
|
52
|
+
provider_family: :ollama,
|
|
53
|
+
provider_instance: :'macbook-m4-max',
|
|
54
|
+
model: 'qwen3.6',
|
|
55
|
+
capabilities: %i[chat tools],
|
|
56
|
+
limits: { context_window: 32_768 },
|
|
57
|
+
metadata: { enabled: true }
|
|
58
|
+
)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it 'omits sensitive offering fields before publishing' do
|
|
62
|
+
envelope = event.to_h
|
|
63
|
+
|
|
64
|
+
expect(envelope[:offering]).not_to have_key(:credentials)
|
|
65
|
+
expect(envelope[:offering][:metadata]).not_to have_key(:api_key)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'normalizes hash offerings through ModelOffering' do
|
|
69
|
+
event = described_class.heartbeat(
|
|
70
|
+
{
|
|
71
|
+
'provider_family' => 'bedrock',
|
|
72
|
+
'provider_instance' => 'us-east-1',
|
|
73
|
+
'model' => 'amazon.titan-embed-text-v2:0',
|
|
74
|
+
'usage_type' => 'embedding',
|
|
75
|
+
'capabilities' => ['embedding']
|
|
76
|
+
},
|
|
77
|
+
event_id: 'evt-heartbeat',
|
|
78
|
+
occurred_at: '2026-04-28T14:31:00Z'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
expect(event.to_h).to include(
|
|
82
|
+
event_id: 'evt-heartbeat',
|
|
83
|
+
event_type: :offering_heartbeat,
|
|
84
|
+
occurred_at: '2026-04-28T14:31:00.000000Z'
|
|
85
|
+
)
|
|
86
|
+
expect(event.to_h[:offering]).to include(
|
|
87
|
+
provider_family: :bedrock,
|
|
88
|
+
provider_instance: :'us-east-1',
|
|
89
|
+
usage_type: :embedding,
|
|
90
|
+
capabilities: [:embedding]
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it 'provides event-type helpers' do
|
|
95
|
+
expect(described_class.available(offering).event_type).to eq(:offering_available)
|
|
96
|
+
expect(described_class.unavailable(offering).event_type).to eq(:offering_unavailable)
|
|
97
|
+
expect(described_class.degraded(offering).event_type).to eq(:offering_degraded)
|
|
98
|
+
expect(described_class.heartbeat(offering).event_type).to eq(:offering_heartbeat)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
it 'rejects unknown event types' do
|
|
102
|
+
expect do
|
|
103
|
+
described_class.new(event_type: :available, offering: offering)
|
|
104
|
+
end.to raise_error(ArgumentError, /unsupported registry event type/)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it 'rejects sensitive runtime, capacity, health, lane, and metadata keys' do
|
|
108
|
+
%i[runtime capacity health lane metadata].each do |field|
|
|
109
|
+
attributes = {
|
|
110
|
+
event_type: :offering_degraded,
|
|
111
|
+
offering: offering,
|
|
112
|
+
field => { 'nested' => { 'api_key' => 'secret' } }
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
expect do
|
|
116
|
+
described_class.new(**attributes)
|
|
117
|
+
end.to raise_error(ArgumentError, /#{field} contains sensitive key: nested.api_key/)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe Legion::Extensions::Llm::StreamAccumulator do
|
|
6
|
+
describe '#add' do
|
|
7
|
+
it 'handles tool call deltas that omit arguments' do
|
|
8
|
+
accumulator = described_class.new
|
|
9
|
+
tool_call = Legion::Extensions::Llm::ToolCall.new(id: 'call_1', name: 'weather', arguments: nil)
|
|
10
|
+
chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: nil, tool_calls: { 'call_1' => tool_call })
|
|
11
|
+
|
|
12
|
+
expect { accumulator.add(chunk) }.not_to raise_error
|
|
13
|
+
|
|
14
|
+
message = accumulator.to_message(nil)
|
|
15
|
+
expect(message.tool_calls['call_1'].arguments).to eq({})
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'drops malformed accumulated tool arguments instead of raising' do
|
|
19
|
+
accumulator = described_class.new
|
|
20
|
+
tool_call = Legion::Extensions::Llm::ToolCall.new(id: 'call_1', name: 'weather', arguments: '{"city"')
|
|
21
|
+
chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: nil, tool_calls: { 'call_1' => tool_call })
|
|
22
|
+
|
|
23
|
+
expect { accumulator.add(chunk) }.not_to raise_error
|
|
24
|
+
|
|
25
|
+
message = accumulator.to_message(nil)
|
|
26
|
+
expect(message.tool_calls['call_1'].arguments).to eq({})
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'treats content before an unmatched closing think tag as thinking' do
|
|
30
|
+
accumulator = described_class.new
|
|
31
|
+
chunk = Legion::Extensions::Llm::Chunk.new(
|
|
32
|
+
role: :assistant,
|
|
33
|
+
content: "internal\n</think>\n\nHello"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
accumulator.add(chunk)
|
|
37
|
+
|
|
38
|
+
message = accumulator.to_message(nil)
|
|
39
|
+
expect(message.content).to eq('Hello')
|
|
40
|
+
expect(message.thinking.text).to eq("internal\n")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it 'does not leak streamed thinking tag variants split across chunks' do
|
|
44
|
+
accumulator = described_class.new
|
|
45
|
+
stream = ['<thinking>', 'internal', '</thinking>Hello']
|
|
46
|
+
|
|
47
|
+
filtered = stream.filter_map do |content|
|
|
48
|
+
chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: content)
|
|
49
|
+
accumulator.add(chunk)
|
|
50
|
+
accumulator.filtered_chunk(chunk)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
message = accumulator.to_message(nil)
|
|
54
|
+
expect(filtered.filter_map(&:content)).to eq(['Hello'])
|
|
55
|
+
expect(filtered.filter_map { |chunk| chunk.thinking&.text }.join).to eq('internal')
|
|
56
|
+
expect(message.content).to eq('Hello')
|
|
57
|
+
expect(message.thinking.text).to eq('internal')
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'does not stream untagged local-model reasoning preambles as content' do
|
|
61
|
+
accumulator = described_class.new
|
|
62
|
+
stream = [
|
|
63
|
+
'The user is just saying "test".',
|
|
64
|
+
' Let me respond simply and confirm things are working.',
|
|
65
|
+
"\n\nHey! Things are working on my end."
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
filtered = stream.filter_map do |content|
|
|
69
|
+
chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: content)
|
|
70
|
+
accumulator.add(chunk)
|
|
71
|
+
accumulator.filtered_chunk(chunk)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
message = accumulator.to_message(nil)
|
|
75
|
+
expect(filtered.filter_map(&:content)).to eq(['Hey! Things are working on my end.'])
|
|
76
|
+
expect(filtered.filter_map { |chunk| chunk.thinking&.text }.join)
|
|
77
|
+
.to eq('The user is just saying "test". Let me respond simply and confirm things are working.')
|
|
78
|
+
expect(message.content).to eq('Hey! Things are working on my end.')
|
|
79
|
+
expect(message.thinking.text)
|
|
80
|
+
.to eq('The user is just saying "test". Let me respond simply and confirm things are working.')
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it 'releases normal text that starts like a possible reasoning preamble' do
|
|
84
|
+
accumulator = described_class.new
|
|
85
|
+
stream = [
|
|
86
|
+
'The user guide covers setup.',
|
|
87
|
+
"\n\nFollow the install section first."
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
filtered = stream.filter_map do |content|
|
|
91
|
+
chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: content)
|
|
92
|
+
accumulator.add(chunk)
|
|
93
|
+
accumulator.filtered_chunk(chunk)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
message = accumulator.to_message(nil)
|
|
97
|
+
expected = "The user guide covers setup.\n\nFollow the install section first."
|
|
98
|
+
expect(filtered.filter_map(&:content).join).to eq(expected)
|
|
99
|
+
expect(message.content).to eq(expected)
|
|
100
|
+
expect(message.thinking).to be_nil
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|