lex-llm 0.4.16 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +13 -2
  3. data/B1b-conformance-kit.md +79 -0
  4. data/CHANGELOG.md +33 -0
  5. data/README.md +349 -153
  6. data/lex-llm.gemspec +3 -3
  7. data/lib/legion/extensions/llm/attachment.rb +1 -1
  8. data/lib/legion/extensions/llm/canonical/chunk.rb +184 -0
  9. data/lib/legion/extensions/llm/canonical/content_block.rb +126 -0
  10. data/lib/legion/extensions/llm/canonical/message.rb +125 -0
  11. data/lib/legion/extensions/llm/canonical/params.rb +61 -0
  12. data/lib/legion/extensions/llm/canonical/request.rb +117 -0
  13. data/lib/legion/extensions/llm/canonical/response.rb +124 -0
  14. data/lib/legion/extensions/llm/canonical/thinking.rb +81 -0
  15. data/lib/legion/extensions/llm/canonical/tool_call.rb +134 -0
  16. data/lib/legion/extensions/llm/canonical/tool_definition.rb +73 -0
  17. data/lib/legion/extensions/llm/canonical/usage.rb +61 -0
  18. data/lib/legion/extensions/llm/canonical.rb +49 -0
  19. data/lib/legion/extensions/llm/chat.rb +3 -5
  20. data/lib/legion/extensions/llm/connection.rb +14 -2
  21. data/lib/legion/extensions/llm/error.rb +3 -7
  22. data/lib/legion/extensions/llm/fleet/envelope_validation.rb +1 -3
  23. data/lib/legion/extensions/llm/fleet/provider_responder.rb +1 -3
  24. data/lib/legion/extensions/llm/fleet/token_validator.rb +1 -3
  25. data/lib/legion/extensions/llm/model/info.rb +4 -6
  26. data/lib/legion/extensions/llm/models.rb +3 -3
  27. data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +12 -4
  28. data/lib/legion/extensions/llm/routing/lane_key.rb +1 -3
  29. data/lib/legion/extensions/llm/stream_accumulator.rb +1 -1
  30. data/lib/legion/extensions/llm/streaming.rb +6 -4
  31. data/lib/legion/extensions/llm/tool.rb +1 -3
  32. data/lib/legion/extensions/llm/version.rb +1 -1
  33. data/lib/legion/extensions/llm.rb +118 -35
  34. data/spec/fixtures/ruby.mp3 +0 -0
  35. data/spec/fixtures/ruby.mp4 +0 -0
  36. data/spec/fixtures/ruby.png +0 -0
  37. data/spec/fixtures/ruby.txt +1 -0
  38. data/spec/fixtures/ruby.wav +0 -0
  39. data/spec/fixtures/ruby.xml +1 -0
  40. data/spec/fixtures/sample.pdf +0 -0
  41. data/spec/legion/extensions/llm/agent_spec.rb +179 -0
  42. data/spec/legion/extensions/llm/attachment_spec.rb +25 -0
  43. data/spec/legion/extensions/llm/auto_registration_spec.rb +38 -0
  44. data/spec/legion/extensions/llm/canonical/chunk_spec.rb +285 -0
  45. data/spec/legion/extensions/llm/canonical/content_block_spec.rb +179 -0
  46. data/spec/legion/extensions/llm/canonical/message_spec.rb +203 -0
  47. data/spec/legion/extensions/llm/canonical/params_spec.rb +159 -0
  48. data/spec/legion/extensions/llm/canonical/request_spec.rb +174 -0
  49. data/spec/legion/extensions/llm/canonical/response_spec.rb +234 -0
  50. data/spec/legion/extensions/llm/canonical/thinking_spec.rb +151 -0
  51. data/spec/legion/extensions/llm/canonical/tool_call_spec.rb +191 -0
  52. data/spec/legion/extensions/llm/canonical/tool_definition_spec.rb +174 -0
  53. data/spec/legion/extensions/llm/canonical/usage_spec.rb +138 -0
  54. data/spec/legion/extensions/llm/configuration_spec.rb +38 -0
  55. data/spec/legion/extensions/llm/conformance/client_translator_examples.rb +269 -0
  56. data/spec/legion/extensions/llm/conformance/conformance.rb +51 -0
  57. data/spec/legion/extensions/llm/conformance/echo_translator.rb +56 -0
  58. data/spec/legion/extensions/llm/conformance/echo_translator_spec.rb +13 -0
  59. data/spec/legion/extensions/llm/conformance/fixtures/canonical_empty_response.json +13 -0
  60. data/spec/legion/extensions/llm/conformance/fixtures/canonical_error_response.json +19 -0
  61. data/spec/legion/extensions/llm/conformance/fixtures/canonical_fleet_round_trip.json +81 -0
  62. data/spec/legion/extensions/llm/conformance/fixtures/canonical_metering_audit_events.json +101 -0
  63. data/spec/legion/extensions/llm/conformance/fixtures/canonical_params_mapping_request.json +21 -0
  64. data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_request.json +13 -0
  65. data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_response.json +13 -0
  66. data/spec/legion/extensions/llm/conformance/fixtures/canonical_stop_reason_matrix.json +36 -0
  67. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_accumulated_response.json +20 -0
  68. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_error_chunks.json +26 -0
  69. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_text_chunks.json +33 -0
  70. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_thinking_chunks.json +42 -0
  71. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_tool_call_chunks.json +41 -0
  72. data/spec/legion/extensions/llm/conformance/fixtures/canonical_system_prompt_request.json +14 -0
  73. data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_request.json +18 -0
  74. data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_response.json +17 -0
  75. data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_results_continuation_request.json +75 -0
  76. data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_use_response.json +25 -0
  77. data/spec/legion/extensions/llm/conformance/fixtures/canonical_tools_request.json +34 -0
  78. data/spec/legion/extensions/llm/conformance/provider_translator_examples.rb +390 -0
  79. data/spec/legion/extensions/llm/connection_logging_spec.rb +53 -0
  80. data/spec/legion/extensions/llm/connection_retry_spec.rb +36 -0
  81. data/spec/legion/extensions/llm/context_spec.rb +127 -0
  82. data/spec/legion/extensions/llm/credential_sources_spec.rb +468 -0
  83. data/spec/legion/extensions/llm/error_middleware_spec.rb +102 -0
  84. data/spec/legion/extensions/llm/error_spec.rb +87 -0
  85. data/spec/legion/extensions/llm/fleet/provider_responder_spec.rb +120 -0
  86. data/spec/legion/extensions/llm/fleet/token_validator_spec.rb +163 -0
  87. data/spec/legion/extensions/llm/fleet/worker_execution_spec.rb +128 -0
  88. data/spec/legion/extensions/llm/fleet_messages_spec.rb +402 -0
  89. data/spec/legion/extensions/llm/gemspec_spec.rb +25 -0
  90. data/spec/legion/extensions/llm/message_spec.rb +64 -0
  91. data/spec/legion/extensions/llm/model/info_spec.rb +222 -0
  92. data/spec/legion/extensions/llm/models_spec.rb +104 -0
  93. data/spec/legion/extensions/llm/provider/open_ai_compatible_spec.rb +203 -0
  94. data/spec/legion/extensions/llm/provider_contract_spec.rb +60 -0
  95. data/spec/legion/extensions/llm/provider_settings_spec.rb +76 -0
  96. data/spec/legion/extensions/llm/provider_spec.rb +592 -0
  97. data/spec/legion/extensions/llm/registry_event_builder_spec.rb +68 -0
  98. data/spec/legion/extensions/llm/registry_publisher_spec.rb +22 -0
  99. data/spec/legion/extensions/llm/responses/response_objects_spec.rb +75 -0
  100. data/spec/legion/extensions/llm/responses/thinking_extractor_spec.rb +75 -0
  101. data/spec/legion/extensions/llm/routing/model_offering_spec.rb +222 -0
  102. data/spec/legion/extensions/llm/routing/offering_registry_spec.rb +50 -0
  103. data/spec/legion/extensions/llm/routing/registry_event_spec.rb +120 -0
  104. data/spec/legion/extensions/llm/stream_accumulator_spec.rb +103 -0
  105. data/spec/legion/extensions/llm/streaming_spec.rb +108 -0
  106. data/spec/legion/extensions/llm/tool_spec.rb +94 -0
  107. data/spec/legion/extensions/llm/transport/fleet_lane_spec.rb +60 -0
  108. data/spec/legion/extensions/llm/utils_spec.rb +113 -0
  109. data/spec/legion/extensions/llm_base_contract_spec.rb +110 -0
  110. data/spec/legion/extensions/llm_extension_spec.rb +78 -0
  111. data/spec/legion/extensions/llm_root_spec.rb +51 -0
  112. data/spec/spec_helper.rb +24 -0
  113. data/spec/support/fake_llm_provider.rb +148 -0
  114. data/spec/support/llm_configuration.rb +21 -0
  115. data/spec/support/rspec_configuration.rb +19 -0
  116. data/spec/support/simplecov_configuration.rb +20 -0
  117. metadata +110 -15
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ # rubocop:disable RSpec/DescribeClass
6
+ RSpec.describe 'LLM normalized response objects' do
7
+ let(:unsafe_metadata) do
8
+ {
9
+ reasoning_content: 'metadata secret',
10
+ reasoning: 'metadata reasoning',
11
+ thinking_text: 'metadata thinking',
12
+ raw: { reasoning_content: 'nested raw secret' },
13
+ 'raw-response' => { reasoning_content: 'hyphen raw secret' },
14
+ 'provider-body' => { thinking_text: 'hyphen body secret' },
15
+ vendor: 'vllm'
16
+ }
17
+ end
18
+ let(:unsafe_raw) { { 'choices' => [{ 'message' => { 'content' => '<think>raw secret</think>visible' } }] } }
19
+
20
+ it 'serializes chat responses without raw provider thinking fields' do
21
+ response = Legion::Extensions::Llm::Responses::ChatResponse.new(
22
+ content: "<think>tag secret</think>\nvisible",
23
+ metadata: unsafe_metadata,
24
+ raw: unsafe_raw
25
+ )
26
+
27
+ payload = response.to_h
28
+ encoded = Legion::JSON.dump(payload)
29
+
30
+ expect(payload).to eq(content: 'visible', metadata: { vendor: 'vllm' })
31
+ expect(encoded).not_to include('reasoning', 'reasoning_content', 'thinking_text', '<think>', 'raw secret',
32
+ 'nested raw secret', 'hyphen raw secret', 'hyphen body secret')
33
+ expect(response.to_internal_h).to include(
34
+ thinking: 'metadata secretmetadata reasoningmetadata thinkingtag secret',
35
+ metadata: unsafe_metadata,
36
+ raw: unsafe_raw
37
+ )
38
+ end
39
+
40
+ it 'serializes stream chunks without raw provider thinking fields' do
41
+ chunk = Legion::Extensions::Llm::Responses::StreamChunk.new(
42
+ content: "<think>tag secret</think>\nvisible",
43
+ metadata: unsafe_metadata,
44
+ raw: unsafe_raw
45
+ )
46
+
47
+ payload = chunk.to_h
48
+ encoded = Legion::JSON.dump(payload)
49
+
50
+ expect(payload).to eq(content: 'visible', metadata: { vendor: 'vllm' })
51
+ expect(encoded).not_to include('reasoning', 'reasoning_content', 'thinking_text', '<think>', 'raw secret',
52
+ 'nested raw secret', 'hyphen raw secret', 'hyphen body secret')
53
+ expect(chunk.to_internal_h).to include(
54
+ thinking: 'metadata secretmetadata reasoningmetadata thinkingtag secret',
55
+ metadata: unsafe_metadata,
56
+ raw: unsafe_raw
57
+ )
58
+ end
59
+
60
+ it 'serializes embedding responses without raw provider payloads' do
61
+ response = Legion::Extensions::Llm::Responses::EmbeddingResponse.new(
62
+ vectors: [[0.1]],
63
+ model: 'embed',
64
+ metadata: unsafe_metadata,
65
+ raw: unsafe_raw
66
+ )
67
+
68
+ payload = response.to_h
69
+
70
+ expect(payload).to eq(vectors: [[0.1]], model: 'embed', metadata: { vendor: 'vllm' })
71
+ expect(Legion::JSON.dump(payload)).not_to include('raw secret')
72
+ expect(response.to_internal_h).to include(metadata: unsafe_metadata, raw: unsafe_raw)
73
+ end
74
+ end
75
+ # rubocop:enable RSpec/DescribeClass
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Legion::Extensions::Llm::Responses::ThinkingExtractor do
6
+ it 'extracts normal think tags' do
7
+ result = described_class.extract("<think>hidden</think>\n\nvisible")
8
+
9
+ expect(result.content).to eq('visible')
10
+ expect(result.thinking).to eq('hidden')
11
+ end
12
+
13
+ it 'extracts normal thinking tags' do
14
+ result = described_class.extract("<thinking>hidden</thinking>\n\nvisible")
15
+
16
+ expect(result.content).to eq('visible')
17
+ expect(result.thinking).to eq('hidden')
18
+ end
19
+
20
+ it 'extracts malformed trailing close tag' do
21
+ result = described_class.extract("hidden only\n</think>\n\nvisible")
22
+
23
+ expect(result.content).to eq('visible')
24
+ expect(result.thinking).to eq('hidden only')
25
+ end
26
+
27
+ it 'extracts malformed trailing thinking close tag' do
28
+ result = described_class.extract("hidden only\n</thinking>\n\nvisible")
29
+
30
+ expect(result.content).to eq('visible')
31
+ expect(result.thinking).to eq('hidden only')
32
+ end
33
+
34
+ it 'extracts unterminated think blocks instead of leaking them as visible content' do
35
+ result = described_class.extract("visible\n<think>hidden only")
36
+
37
+ expect(result.content).to eq('visible')
38
+ expect(result.thinking).to eq('hidden only')
39
+ end
40
+
41
+ it 'extracts untagged local-model reasoning preambles' do
42
+ result = described_class.extract(
43
+ "The user is just saying \"test\". Let me respond simply and confirm things are working.\n\n" \
44
+ 'Hey! Things are working on my end. What can I help you with?'
45
+ )
46
+
47
+ expect(result.content).to eq('Hey! Things are working on my end. What can I help you with?')
48
+ expect(result.thinking)
49
+ .to eq('The user is just saying "test". Let me respond simply and confirm things are working.')
50
+ end
51
+
52
+ it 'leaves normal text visible' do
53
+ result = described_class.extract('visible only')
54
+
55
+ expect(result.content).to eq('visible only')
56
+ expect(result.thinking).to be_nil
57
+ end
58
+
59
+ it 'extracts provider-specific reasoning metadata without exposing reasoning fields as metadata' do
60
+ result = described_class.extract(
61
+ 'visible',
62
+ metadata: {
63
+ 'reasoning_content' => 'hidden',
64
+ 'thinking_signature' => 'sig-1',
65
+ 'reasoning-signature' => 'sig-2',
66
+ 'vendor' => 'vllm'
67
+ }
68
+ )
69
+
70
+ expect(result.content).to eq('visible')
71
+ expect(result.thinking).to eq('hidden')
72
+ expect(result.signature).to eq('sig-1')
73
+ expect(result.metadata).to eq(vendor: 'vllm')
74
+ end
75
+ end
@@ -0,0 +1,222 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Legion::Extensions::Llm::Routing::ModelOffering do
6
+ subject(:offering) do
7
+ described_class.new(
8
+ provider_family: :ollama,
9
+ instance_id: :'macbook-m4-max',
10
+ transport: :rabbitmq,
11
+ model: 'qwen3.6:27b-q4_K_M',
12
+ capabilities: %i[chat tools thinking],
13
+ limits: { context_window: 32_768, max_output_tokens: 8192 },
14
+ policy_tags: %i[phi_allowed internal_only]
15
+ )
16
+ end
17
+
18
+ it 'normalizes provider-neutral offering metadata' do
19
+ expect(offering).to have_attributes(
20
+ offering_id: 'ollama:macbook-m4-max:inference:qwen3-6-27b-q4-k-m',
21
+ provider_family: :ollama,
22
+ provider_instance: :'macbook-m4-max',
23
+ instance_id: :'macbook-m4-max',
24
+ transport: :rabbitmq,
25
+ tier: :fleet,
26
+ model: 'qwen3.6:27b-q4_K_M',
27
+ canonical_model_alias: 'qwen3.6:27b-q4_K_M',
28
+ usage_type: :inference
29
+ )
30
+ expect(offering.capabilities).to eq(%i[chat tools thinking])
31
+ expect(offering.context_window).to eq(32_768)
32
+ end
33
+
34
+ it 'accepts expanded contract fields while preserving instance_id compatibility' do
35
+ expanded = described_class.new(
36
+ offering_id: 'azure:gpt4o-prod',
37
+ provider_family: :azure_foundry,
38
+ model_family: :openai,
39
+ provider_instance: :eastus,
40
+ model: 'gpt4o-prod',
41
+ canonical_model_alias: 'gpt-4o',
42
+ routing_metadata: { region: 'eastus', deployment: 'gpt4o-prod' },
43
+ capabilities: %i[chat tools]
44
+ )
45
+
46
+ expect(expanded).to have_attributes(
47
+ offering_id: 'azure:gpt4o-prod',
48
+ provider_family: :azure_foundry,
49
+ model_family: :openai,
50
+ provider_instance: :eastus,
51
+ instance_id: :eastus,
52
+ model: 'gpt4o-prod',
53
+ canonical_model_alias: 'gpt-4o',
54
+ routing_metadata: { region: 'eastus', deployment: 'gpt4o-prod' }
55
+ )
56
+ expect(expanded.to_h).to include(
57
+ provider_instance: :eastus,
58
+ instance_id: :eastus,
59
+ canonical_model_alias: 'gpt-4o',
60
+ routing_metadata: { region: 'eastus', deployment: 'gpt4o-prod' }
61
+ )
62
+ end
63
+
64
+ it 'lifts model family and aliases from legacy metadata' do
65
+ legacy = described_class.new(
66
+ provider_family: :bedrock,
67
+ instance_id: :'us-east-1',
68
+ model: 'anthropic.claude-3-haiku-20240307-v1:0',
69
+ metadata: { model_family: :anthropic, alias: 'claude-3-haiku' }
70
+ )
71
+
72
+ expect(legacy.model_family).to eq(:anthropic)
73
+ expect(legacy.canonical_model_alias).to eq('claude-3-haiku')
74
+ expect(legacy.model_alias?('claude-3-haiku')).to be true
75
+ expect(legacy.model_alias?('anthropic.claude-3-haiku-20240307-v1:0')).to be true
76
+ end
77
+
78
+ it 'checks route eligibility without provider-specific code' do
79
+ expect(
80
+ offering.eligible_for?(
81
+ usage_type: :inference,
82
+ required_capabilities: %i[tools thinking],
83
+ min_context_window: 32_000,
84
+ policy_tags: [:phi_allowed]
85
+ )
86
+ ).to be true
87
+
88
+ expect(offering.eligible_for?(min_context_window: 65_536)).to be false
89
+ expect(offering.eligible_for?(required_capabilities: [:vision])).to be false
90
+ end
91
+
92
+ it 'treats legacy function-calling capability names as tools support' do
93
+ legacy_tools = described_class.new(
94
+ provider_family: :vllm,
95
+ model: 'qwen-tools',
96
+ capabilities: %i[chat function_calling]
97
+ )
98
+
99
+ expect(legacy_tools.capabilities).to include(:function_calling, :tools)
100
+ expect(legacy_tools.eligible_for?(required_capabilities: [:tools])).to be true
101
+ end
102
+
103
+ it 'treats disabled offerings as ineligible' do
104
+ disabled = described_class.new(
105
+ provider_family: :ollama,
106
+ instance_id: :local,
107
+ model: 'qwen',
108
+ metadata: { enabled: false }
109
+ )
110
+
111
+ expect(disabled).not_to be_enabled
112
+ expect(disabled.eligible_for?).to be false
113
+ end
114
+
115
+ it 'generates clean fleet inference lane keys with context windows' do
116
+ expect(offering.lane_key).to eq('llm.fleet.inference.qwen3-6-27b-q4-k-m.ctx32768')
117
+ end
118
+
119
+ it 'uses canonical model aliases for fleet lanes when provider deployments hide the base model' do
120
+ deployment = described_class.new(
121
+ provider_family: :azure_foundry,
122
+ provider_instance: :default,
123
+ model: 'gpt4o-prod',
124
+ canonical_model_alias: 'gpt-4o',
125
+ limits: { context_window: 128_000 }
126
+ )
127
+
128
+ expect(deployment.lane_key).to eq('llm.fleet.inference.gpt-4o.ctx128000')
129
+ end
130
+
131
+ it 'generates embedding lanes without context suffixes' do
132
+ embedding = described_class.new(
133
+ provider_family: :ollama,
134
+ instance_id: :'gpu-01',
135
+ transport: :rabbitmq,
136
+ model: 'nomic-embed-text:latest',
137
+ usage_type: :embed,
138
+ capabilities: [:embedding]
139
+ )
140
+
141
+ expect(embedding).to be_embedding
142
+ expect(embedding.lane_key).to eq('llm.fleet.embed.nomic-embed-text-latest')
143
+ end
144
+
145
+ it 'can include an eligibility fingerprint when lanes need stricter matching' do
146
+ key = offering.lane_key(include_fingerprint: true)
147
+
148
+ expect(key).to match(/\Allm\.fleet\.inference\.qwen3-6-27b-q4-k-m\.ctx32768\.elig\.[0-9a-f]{10}\z/)
149
+ expect(offering.eligibility_fingerprint).to eq(key.split('.').last)
150
+ end
151
+
152
+ it 'normalizes string numeric limits from JSON-backed settings' do
153
+ string_limits = described_class.new(
154
+ provider_family: :ollama,
155
+ model: 'qwen',
156
+ limits: { context_window: '32768', max_output_tokens: '8192' }
157
+ )
158
+
159
+ expect(string_limits.context_window).to eq(32_768)
160
+ expect(string_limits.max_output_tokens).to eq(8192)
161
+ expect(string_limits.eligible_for?(min_context_window: 32_000)).to be true
162
+ end
163
+
164
+ it 'normalizes string-keyed JSON-backed offering fields' do
165
+ json_offering = described_class.new(
166
+ 'provider_family' => 'ollama',
167
+ 'instance_id' => 'macbook-m4-max',
168
+ 'transport' => 'rabbitmq',
169
+ 'model' => 'nomic-embed-text',
170
+ 'type' => 'embed',
171
+ 'capabilities' => %w[embedding],
172
+ 'limits' => { 'context_window' => '8192' },
173
+ 'metadata' => { 'enabled' => true }
174
+ )
175
+
176
+ expect(json_offering.provider_family).to eq(:ollama)
177
+ expect(json_offering.instance_id).to eq(:'macbook-m4-max')
178
+ expect(json_offering.transport).to eq(:rabbitmq)
179
+ expect(json_offering.usage_type).to eq(:embedding)
180
+ expect(json_offering.context_window).to eq(8192)
181
+ expect(json_offering).to be_enabled
182
+ end
183
+
184
+ it 'treats string-keyed disabled metadata as ineligible' do
185
+ disabled = described_class.new(
186
+ 'provider_family' => 'ollama',
187
+ 'model' => 'qwen',
188
+ 'metadata' => { 'enabled' => false }
189
+ )
190
+
191
+ expect(disabled).not_to be_enabled
192
+ expect(disabled.eligible_for?).to be false
193
+ end
194
+
195
+ it 'keeps sensitive metadata out of eligibility fingerprints' do
196
+ safe = described_class.new(
197
+ provider_family: :ollama,
198
+ model: 'qwen',
199
+ metadata: { eligibility: { endpoint_url: 'http://gpu.internal', network_boundary: :corp_lan } }
200
+ )
201
+ changed_secret = described_class.new(
202
+ provider_family: :ollama,
203
+ model: 'qwen',
204
+ metadata: { eligibility: { endpoint_url: 'http://other.internal', network_boundary: :corp_lan } }
205
+ )
206
+
207
+ expect(safe.eligibility_fingerprint).to eq(changed_secret.eligibility_fingerprint)
208
+ end
209
+
210
+ it 'serializes the normalized shape used by routers and registries' do
211
+ expect(offering.to_h).to include(
212
+ offering_id: 'ollama:macbook-m4-max:inference:qwen3-6-27b-q4-k-m',
213
+ provider_family: :ollama,
214
+ provider_instance: :'macbook-m4-max',
215
+ instance_id: :'macbook-m4-max',
216
+ tier: :fleet,
217
+ canonical_model_alias: 'qwen3.6:27b-q4_K_M',
218
+ usage_type: :inference,
219
+ limits: { context_window: 32_768, max_output_tokens: 8192 }
220
+ )
221
+ end
222
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Legion::Extensions::Llm::Routing::OfferingRegistry do
6
+ subject(:registry) { described_class.new([chat, embedding]) }
7
+
8
+ let(:chat) do
9
+ Legion::Extensions::Llm::Routing::ModelOffering.new(
10
+ provider_family: :azure_foundry,
11
+ model_family: :openai,
12
+ provider_instance: :eastus,
13
+ model: 'gpt4o-prod',
14
+ canonical_model_alias: 'gpt-4o',
15
+ capabilities: %i[chat tools],
16
+ limits: { context_window: 128_000 }
17
+ )
18
+ end
19
+
20
+ let(:embedding) do
21
+ Legion::Extensions::Llm::Routing::ModelOffering.new(
22
+ provider_family: :bedrock,
23
+ model_family: :amazon,
24
+ instance_id: :'us-east-1',
25
+ model: 'amazon.titan-embed-text-v2:0',
26
+ canonical_model_alias: 'titan-embed-text-v2',
27
+ usage_type: :embedding,
28
+ capabilities: [:embedding]
29
+ )
30
+ end
31
+
32
+ it 'registers hashes and offerings by normalized offering_id' do
33
+ replacement = registry.register(
34
+ chat.to_h.merge(capabilities: %i[chat vision])
35
+ )
36
+
37
+ expect(registry.find(chat.offering_id)).to eq(replacement)
38
+ expect(registry.find(chat.offering_id).capabilities).to eq(%i[chat vision])
39
+ expect(registry.count).to eq(2)
40
+ end
41
+
42
+ it 'finds and filters offerings by the expanded routing contract' do
43
+ expect(registry.find_by_model_alias('gpt-4o')).to eq(chat)
44
+ expect(registry.filter(provider_family: :azure_foundry)).to eq([chat])
45
+ expect(registry.filter(model_family: :openai)).to eq([chat])
46
+ expect(registry.filter(provider_instance: :eastus)).to eq([chat])
47
+ expect(registry.filter(capability: :embedding)).to eq([embedding])
48
+ expect(registry.filter(model_alias: 'titan-embed-text-v2', usage_type: :embedding)).to eq([embedding])
49
+ end
50
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Legion::Extensions::Llm::Routing::RegistryEvent do
6
+ subject(:event) do
7
+ described_class.new(
8
+ event_id: 'evt-123',
9
+ event_type: :offering_available,
10
+ occurred_at: Time.utc(2026, 4, 28, 14, 30, 15, 123_456),
11
+ offering: offering,
12
+ runtime: { host_id: 'macbook-m4-max', process: { pid: 12_345 } },
13
+ capacity: { concurrency: 4, queued: 0 },
14
+ health: { ready: true, latency_ms: 180 },
15
+ lane: 'llm.fleet.inference.qwen3-6.ctx32768',
16
+ metadata: { observed_by: :lex_llm_ollama }
17
+ )
18
+ end
19
+
20
+ let(:offering) do
21
+ Legion::Extensions::Llm::Routing::ModelOffering.new(
22
+ provider_family: :ollama,
23
+ provider_instance: :'macbook-m4-max',
24
+ transport: :rabbitmq,
25
+ model: 'qwen3.6',
26
+ capabilities: %i[chat tools],
27
+ limits: { context_window: 32_768 },
28
+ credentials: { api_key: 'secret' },
29
+ metadata: { enabled: true, api_key: 'secret' }
30
+ )
31
+ end
32
+
33
+ it 'serializes a provider-neutral registry envelope' do
34
+ expect(event).to have_attributes(
35
+ event_id: 'evt-123',
36
+ event_type: :offering_available,
37
+ occurred_at: Time.utc(2026, 4, 28, 14, 30, 15, 123_456)
38
+ )
39
+
40
+ expect(event.to_h).to include(
41
+ event_id: 'evt-123',
42
+ event_type: :offering_available,
43
+ occurred_at: '2026-04-28T14:30:15.123456Z',
44
+ runtime: { host_id: 'macbook-m4-max', process: { pid: 12_345 } },
45
+ capacity: { concurrency: 4, queued: 0 },
46
+ health: { ready: true, latency_ms: 180 },
47
+ lane: 'llm.fleet.inference.qwen3-6.ctx32768',
48
+ metadata: { observed_by: :lex_llm_ollama }
49
+ )
50
+ expect(event.to_h[:offering]).to include(
51
+ offering_id: 'ollama:macbook-m4-max:inference:qwen3-6',
52
+ provider_family: :ollama,
53
+ provider_instance: :'macbook-m4-max',
54
+ model: 'qwen3.6',
55
+ capabilities: %i[chat tools],
56
+ limits: { context_window: 32_768 },
57
+ metadata: { enabled: true }
58
+ )
59
+ end
60
+
61
+ it 'omits sensitive offering fields before publishing' do
62
+ envelope = event.to_h
63
+
64
+ expect(envelope[:offering]).not_to have_key(:credentials)
65
+ expect(envelope[:offering][:metadata]).not_to have_key(:api_key)
66
+ end
67
+
68
+ it 'normalizes hash offerings through ModelOffering' do
69
+ event = described_class.heartbeat(
70
+ {
71
+ 'provider_family' => 'bedrock',
72
+ 'provider_instance' => 'us-east-1',
73
+ 'model' => 'amazon.titan-embed-text-v2:0',
74
+ 'usage_type' => 'embedding',
75
+ 'capabilities' => ['embedding']
76
+ },
77
+ event_id: 'evt-heartbeat',
78
+ occurred_at: '2026-04-28T14:31:00Z'
79
+ )
80
+
81
+ expect(event.to_h).to include(
82
+ event_id: 'evt-heartbeat',
83
+ event_type: :offering_heartbeat,
84
+ occurred_at: '2026-04-28T14:31:00.000000Z'
85
+ )
86
+ expect(event.to_h[:offering]).to include(
87
+ provider_family: :bedrock,
88
+ provider_instance: :'us-east-1',
89
+ usage_type: :embedding,
90
+ capabilities: [:embedding]
91
+ )
92
+ end
93
+
94
+ it 'provides event-type helpers' do
95
+ expect(described_class.available(offering).event_type).to eq(:offering_available)
96
+ expect(described_class.unavailable(offering).event_type).to eq(:offering_unavailable)
97
+ expect(described_class.degraded(offering).event_type).to eq(:offering_degraded)
98
+ expect(described_class.heartbeat(offering).event_type).to eq(:offering_heartbeat)
99
+ end
100
+
101
+ it 'rejects unknown event types' do
102
+ expect do
103
+ described_class.new(event_type: :available, offering: offering)
104
+ end.to raise_error(ArgumentError, /unsupported registry event type/)
105
+ end
106
+
107
+ it 'rejects sensitive runtime, capacity, health, lane, and metadata keys' do
108
+ %i[runtime capacity health lane metadata].each do |field|
109
+ attributes = {
110
+ event_type: :offering_degraded,
111
+ offering: offering,
112
+ field => { 'nested' => { 'api_key' => 'secret' } }
113
+ }
114
+
115
+ expect do
116
+ described_class.new(**attributes)
117
+ end.to raise_error(ArgumentError, /#{field} contains sensitive key: nested.api_key/)
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Legion::Extensions::Llm::StreamAccumulator do
6
+ describe '#add' do
7
+ it 'handles tool call deltas that omit arguments' do
8
+ accumulator = described_class.new
9
+ tool_call = Legion::Extensions::Llm::ToolCall.new(id: 'call_1', name: 'weather', arguments: nil)
10
+ chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: nil, tool_calls: { 'call_1' => tool_call })
11
+
12
+ expect { accumulator.add(chunk) }.not_to raise_error
13
+
14
+ message = accumulator.to_message(nil)
15
+ expect(message.tool_calls['call_1'].arguments).to eq({})
16
+ end
17
+
18
+ it 'drops malformed accumulated tool arguments instead of raising' do
19
+ accumulator = described_class.new
20
+ tool_call = Legion::Extensions::Llm::ToolCall.new(id: 'call_1', name: 'weather', arguments: '{"city"')
21
+ chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: nil, tool_calls: { 'call_1' => tool_call })
22
+
23
+ expect { accumulator.add(chunk) }.not_to raise_error
24
+
25
+ message = accumulator.to_message(nil)
26
+ expect(message.tool_calls['call_1'].arguments).to eq({})
27
+ end
28
+
29
+ it 'treats content before an unmatched closing think tag as thinking' do
30
+ accumulator = described_class.new
31
+ chunk = Legion::Extensions::Llm::Chunk.new(
32
+ role: :assistant,
33
+ content: "internal\n</think>\n\nHello"
34
+ )
35
+
36
+ accumulator.add(chunk)
37
+
38
+ message = accumulator.to_message(nil)
39
+ expect(message.content).to eq('Hello')
40
+ expect(message.thinking.text).to eq("internal\n")
41
+ end
42
+
43
+ it 'does not leak streamed thinking tag variants split across chunks' do
44
+ accumulator = described_class.new
45
+ stream = ['<thinking>', 'internal', '</thinking>Hello']
46
+
47
+ filtered = stream.filter_map do |content|
48
+ chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: content)
49
+ accumulator.add(chunk)
50
+ accumulator.filtered_chunk(chunk)
51
+ end
52
+
53
+ message = accumulator.to_message(nil)
54
+ expect(filtered.filter_map(&:content)).to eq(['Hello'])
55
+ expect(filtered.filter_map { |chunk| chunk.thinking&.text }.join).to eq('internal')
56
+ expect(message.content).to eq('Hello')
57
+ expect(message.thinking.text).to eq('internal')
58
+ end
59
+
60
+ it 'does not stream untagged local-model reasoning preambles as content' do
61
+ accumulator = described_class.new
62
+ stream = [
63
+ 'The user is just saying "test".',
64
+ ' Let me respond simply and confirm things are working.',
65
+ "\n\nHey! Things are working on my end."
66
+ ]
67
+
68
+ filtered = stream.filter_map do |content|
69
+ chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: content)
70
+ accumulator.add(chunk)
71
+ accumulator.filtered_chunk(chunk)
72
+ end
73
+
74
+ message = accumulator.to_message(nil)
75
+ expect(filtered.filter_map(&:content)).to eq(['Hey! Things are working on my end.'])
76
+ expect(filtered.filter_map { |chunk| chunk.thinking&.text }.join)
77
+ .to eq('The user is just saying "test". Let me respond simply and confirm things are working.')
78
+ expect(message.content).to eq('Hey! Things are working on my end.')
79
+ expect(message.thinking.text)
80
+ .to eq('The user is just saying "test". Let me respond simply and confirm things are working.')
81
+ end
82
+
83
+ it 'releases normal text that starts like a possible reasoning preamble' do
84
+ accumulator = described_class.new
85
+ stream = [
86
+ 'The user guide covers setup.',
87
+ "\n\nFollow the install section first."
88
+ ]
89
+
90
+ filtered = stream.filter_map do |content|
91
+ chunk = Legion::Extensions::Llm::Chunk.new(role: :assistant, content: content)
92
+ accumulator.add(chunk)
93
+ accumulator.filtered_chunk(chunk)
94
+ end
95
+
96
+ message = accumulator.to_message(nil)
97
+ expected = "The user guide covers setup.\n\nFollow the install section first."
98
+ expect(filtered.filter_map(&:content).join).to eq(expected)
99
+ expect(message.content).to eq(expected)
100
+ expect(message.thinking).to be_nil
101
+ end
102
+ end
103
+ end