lex-llm 0.4.18 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -2
- data/B1b-conformance-kit.md +79 -0
- data/CHANGELOG.md +27 -0
- data/lex-llm.gemspec +2 -3
- data/lib/legion/extensions/llm/attachment.rb +1 -1
- data/lib/legion/extensions/llm/canonical/chunk.rb +184 -0
- data/lib/legion/extensions/llm/canonical/content_block.rb +126 -0
- data/lib/legion/extensions/llm/canonical/message.rb +138 -0
- data/lib/legion/extensions/llm/canonical/params.rb +61 -0
- data/lib/legion/extensions/llm/canonical/request.rb +117 -0
- data/lib/legion/extensions/llm/canonical/response.rb +124 -0
- data/lib/legion/extensions/llm/canonical/thinking.rb +81 -0
- data/lib/legion/extensions/llm/canonical/tool_call.rb +134 -0
- data/lib/legion/extensions/llm/canonical/tool_definition.rb +98 -0
- data/lib/legion/extensions/llm/canonical/tool_schema.rb +46 -0
- data/lib/legion/extensions/llm/canonical/usage.rb +74 -0
- data/lib/legion/extensions/llm/canonical.rb +50 -0
- data/lib/legion/extensions/llm/chat.rb +3 -5
- data/lib/legion/extensions/llm/connection.rb +5 -1
- data/lib/legion/extensions/llm/error.rb +5 -7
- data/lib/legion/extensions/llm/fleet/envelope_validation.rb +1 -3
- data/lib/legion/extensions/llm/fleet/provider_responder.rb +1 -3
- data/lib/legion/extensions/llm/fleet/token_validator.rb +1 -3
- data/lib/legion/extensions/llm/model/info.rb +4 -6
- data/lib/legion/extensions/llm/models.rb +3 -3
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +9 -4
- data/lib/legion/extensions/llm/provider.rb +21 -4
- data/lib/legion/extensions/llm/provider_contract.rb +10 -1
- data/lib/legion/extensions/llm/routing/lane_key.rb +1 -3
- data/lib/legion/extensions/llm/stream_accumulator.rb +40 -1
- data/lib/legion/extensions/llm/streaming.rb +13 -5
- data/lib/legion/extensions/llm/tool.rb +1 -3
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +118 -35
- data/spec/fixtures/ruby.mp3 +0 -0
- data/spec/fixtures/ruby.mp4 +0 -0
- data/spec/fixtures/ruby.png +0 -0
- data/spec/fixtures/ruby.txt +1 -0
- data/spec/fixtures/ruby.wav +0 -0
- data/spec/fixtures/ruby.xml +1 -0
- data/spec/fixtures/sample.pdf +0 -0
- data/spec/legion/extensions/llm/agent_spec.rb +179 -0
- data/spec/legion/extensions/llm/attachment_spec.rb +25 -0
- data/spec/legion/extensions/llm/auto_registration_spec.rb +38 -0
- data/spec/legion/extensions/llm/canonical/chunk_spec.rb +285 -0
- data/spec/legion/extensions/llm/canonical/content_block_spec.rb +179 -0
- data/spec/legion/extensions/llm/canonical/message_spec.rb +203 -0
- data/spec/legion/extensions/llm/canonical/params_spec.rb +159 -0
- data/spec/legion/extensions/llm/canonical/request_spec.rb +174 -0
- data/spec/legion/extensions/llm/canonical/response_spec.rb +234 -0
- data/spec/legion/extensions/llm/canonical/thinking_spec.rb +151 -0
- data/spec/legion/extensions/llm/canonical/tool_call_spec.rb +191 -0
- data/spec/legion/extensions/llm/canonical/tool_definition_spec.rb +221 -0
- data/spec/legion/extensions/llm/canonical/tool_schema_spec.rb +83 -0
- data/spec/legion/extensions/llm/canonical/usage_spec.rb +178 -0
- data/spec/legion/extensions/llm/configuration_spec.rb +38 -0
- data/spec/legion/extensions/llm/conformance/client_translator_examples.rb +432 -0
- data/spec/legion/extensions/llm/conformance/conformance.rb +51 -0
- data/spec/legion/extensions/llm/conformance/echo_translator.rb +56 -0
- data/spec/legion/extensions/llm/conformance/echo_translator_spec.rb +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_empty_response.json +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_error_response.json +19 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_fleet_round_trip.json +81 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_metering_audit_events.json +101 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_params_mapping_request.json +21 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_continuation_request.json +43 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_use_response.json +29 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_request.json +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_response.json +13 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_stop_reason_matrix.json +36 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_accumulated_response.json +20 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_error_chunks.json +26 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_server_tool_chunks.json +52 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_text_chunks.json +33 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_thinking_chunks.json +42 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_tool_call_chunks.json +41 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_system_prompt_request.json +14 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_request.json +18 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_response.json +17 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_results_continuation_request.json +75 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_use_response.json +25 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_tools_request.json +34 -0
- data/spec/legion/extensions/llm/conformance/provider_tool_rendering_examples.rb +77 -0
- data/spec/legion/extensions/llm/conformance/provider_translator_examples.rb +390 -0
- data/spec/legion/extensions/llm/connection_logging_spec.rb +53 -0
- data/spec/legion/extensions/llm/connection_retry_spec.rb +36 -0
- data/spec/legion/extensions/llm/context_spec.rb +127 -0
- data/spec/legion/extensions/llm/credential_sources_spec.rb +468 -0
- data/spec/legion/extensions/llm/error_middleware_spec.rb +102 -0
- data/spec/legion/extensions/llm/error_spec.rb +87 -0
- data/spec/legion/extensions/llm/fleet/provider_responder_spec.rb +120 -0
- data/spec/legion/extensions/llm/fleet/token_validator_spec.rb +163 -0
- data/spec/legion/extensions/llm/fleet/worker_execution_spec.rb +128 -0
- data/spec/legion/extensions/llm/fleet_messages_spec.rb +402 -0
- data/spec/legion/extensions/llm/gemspec_spec.rb +25 -0
- data/spec/legion/extensions/llm/message_spec.rb +64 -0
- data/spec/legion/extensions/llm/model/info_spec.rb +222 -0
- data/spec/legion/extensions/llm/models_spec.rb +104 -0
- data/spec/legion/extensions/llm/provider/open_ai_compatible_spec.rb +203 -0
- data/spec/legion/extensions/llm/provider/open_ai_compatible_tool_calls_array_spec.rb +68 -0
- data/spec/legion/extensions/llm/provider_contract_spec.rb +60 -0
- data/spec/legion/extensions/llm/provider_settings_spec.rb +76 -0
- data/spec/legion/extensions/llm/provider_spec.rb +613 -0
- data/spec/legion/extensions/llm/registry_event_builder_spec.rb +68 -0
- data/spec/legion/extensions/llm/registry_publisher_spec.rb +22 -0
- data/spec/legion/extensions/llm/responses/response_objects_spec.rb +75 -0
- data/spec/legion/extensions/llm/responses/thinking_extractor_spec.rb +75 -0
- data/spec/legion/extensions/llm/routing/model_offering_spec.rb +222 -0
- data/spec/legion/extensions/llm/routing/offering_registry_spec.rb +50 -0
- data/spec/legion/extensions/llm/routing/registry_event_spec.rb +120 -0
- data/spec/legion/extensions/llm/stream_accumulator_spec.rb +155 -0
- data/spec/legion/extensions/llm/streaming_spec.rb +108 -0
- data/spec/legion/extensions/llm/tool_spec.rb +94 -0
- data/spec/legion/extensions/llm/transport/fleet_lane_spec.rb +60 -0
- data/spec/legion/extensions/llm/utils_spec.rb +113 -0
- data/spec/legion/extensions/llm_base_contract_spec.rb +110 -0
- data/spec/legion/extensions/llm_extension_spec.rb +78 -0
- data/spec/legion/extensions/llm_root_spec.rb +51 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/support/fake_llm_provider.rb +148 -0
- data/spec/support/llm_configuration.rb +21 -0
- data/spec/support/rspec_configuration.rb +19 -0
- data/spec/support/simplecov_configuration.rb +20 -0
- metadata +103 -15
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# -- from_hash normalization is intentional
|
|
4
|
+
module Legion
|
|
5
|
+
module Extensions
|
|
6
|
+
module Llm
|
|
7
|
+
module Canonical
|
|
8
|
+
# rubocop:disable Lint/ConstantDefinitionInBlock -- required for Data.define block scope
|
|
9
|
+
# Canonical usage/metering data for a response.
|
|
10
|
+
# Ports field vocabulary from lex-llm Tokens and legion-llm Types.
|
|
11
|
+
# Includes non-token units extension point per G20b.
|
|
12
|
+
Usage = ::Data.define(
|
|
13
|
+
:input_tokens, :output_tokens, :cache_read_tokens, :cache_write_tokens,
|
|
14
|
+
:thinking_tokens, :units
|
|
15
|
+
) do
|
|
16
|
+
USAGE_KNOWN_KEYS = %i[input_tokens output_tokens cache_read_tokens cache_write_tokens
|
|
17
|
+
thinking_tokens units].freeze
|
|
18
|
+
|
|
19
|
+
# Build from a Hash (raw provider response or deserialized wire payload).
|
|
20
|
+
# Accepts both canonical key names and legacy provider spellings.
|
|
21
|
+
def self.from_hash(source)
|
|
22
|
+
return nil if source.nil? || source.empty?
|
|
23
|
+
|
|
24
|
+
h = source.transform_keys(&:to_sym)
|
|
25
|
+
|
|
26
|
+
# Normalize legacy key names
|
|
27
|
+
h[:input_tokens] ||= h.delete(:input) || h.delete(:prompt_tokens)
|
|
28
|
+
h[:output_tokens] ||= h.delete(:output) || h.delete(:completion_tokens)
|
|
29
|
+
h[:cache_read_tokens] ||= h.delete(:cached) || h.delete(:cache_read)
|
|
30
|
+
h[:cache_write_tokens] ||= h.delete(:cache_creation) || h.delete(:cache_write)
|
|
31
|
+
h[:thinking_tokens] ||= h.delete(:thinking) || h.delete(:reasoning)
|
|
32
|
+
|
|
33
|
+
# Extract nested details (OpenAI prompt_tokens_details / input_tokens_details)
|
|
34
|
+
h[:cache_read_tokens] ||= dig_nested(h, :prompt_tokens_details, :cached_tokens) ||
|
|
35
|
+
dig_nested(h, :input_tokens_details, :cached_tokens)
|
|
36
|
+
h[:thinking_tokens] ||= dig_nested(h, :completion_tokens_details, :reasoning_tokens) ||
|
|
37
|
+
dig_nested(h, :output_tokens_details, :reasoning_tokens)
|
|
38
|
+
|
|
39
|
+
# Extract units (non-token extension point — G20b)
|
|
40
|
+
units = h.delete(:units) || {}
|
|
41
|
+
|
|
42
|
+
new(
|
|
43
|
+
input_tokens: h[:input_tokens],
|
|
44
|
+
output_tokens: h[:output_tokens],
|
|
45
|
+
cache_read_tokens: h[:cache_read_tokens],
|
|
46
|
+
cache_write_tokens: h[:cache_write_tokens],
|
|
47
|
+
thinking_tokens: h[:thinking_tokens],
|
|
48
|
+
units: units
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.dig_nested(hash, details_key, value_key)
|
|
53
|
+
details = hash[details_key]
|
|
54
|
+
return nil unless details.is_a?(Hash)
|
|
55
|
+
|
|
56
|
+
details[value_key] || details[value_key.to_s]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Serialize to a Hash for AMQP/fleet/wire transport.
|
|
60
|
+
def to_h
|
|
61
|
+
super.compact
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Total tokens across all categories.
|
|
65
|
+
def total_tokens
|
|
66
|
+
[input_tokens, output_tokens, cache_read_tokens, cache_write_tokens,
|
|
67
|
+
thinking_tokens].compact.sum
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
# rubocop:enable Lint/ConstantDefinitionInBlock
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'canonical/thinking'
|
|
4
|
+
require_relative 'canonical/usage'
|
|
5
|
+
require_relative 'canonical/params'
|
|
6
|
+
require_relative 'canonical/content_block'
|
|
7
|
+
require_relative 'canonical/tool_definition'
|
|
8
|
+
require_relative 'canonical/tool_schema'
|
|
9
|
+
require_relative 'canonical/tool_call'
|
|
10
|
+
require_relative 'canonical/message'
|
|
11
|
+
require_relative 'canonical/request'
|
|
12
|
+
require_relative 'canonical/response'
|
|
13
|
+
require_relative 'canonical/chunk'
|
|
14
|
+
|
|
15
|
+
module Legion
|
|
16
|
+
module Extensions
|
|
17
|
+
module Llm
|
|
18
|
+
# Canonical types for the N×N client→provider routing architecture.
|
|
19
|
+
#
|
|
20
|
+
# These Data.define structs form the single contract between client translators
|
|
21
|
+
# and provider translators. Per Amendment A: immutable, strict factories,
|
|
22
|
+
# enum validation, unknown keys → metadata.
|
|
23
|
+
#
|
|
24
|
+
# Contract version: incremented on any breaking change to the canonical shape.
|
|
25
|
+
# Provider registration refuses gems built against a mismatched version (G7).
|
|
26
|
+
module Canonical
|
|
27
|
+
CONTRACT_VERSION = '1.0.0'
|
|
28
|
+
|
|
29
|
+
# Available canonical types.
|
|
30
|
+
TYPES = %i[
|
|
31
|
+
Thinking Usage Params ContentBlock
|
|
32
|
+
ToolDefinition ToolCall Message
|
|
33
|
+
Request Response Chunk
|
|
34
|
+
].freeze
|
|
35
|
+
|
|
36
|
+
class << self
|
|
37
|
+
# List all canonical type classes.
|
|
38
|
+
def types
|
|
39
|
+
TYPES.map { |name| const_get(name) }
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Check if a given constant name is a registered canonical type.
|
|
43
|
+
def type?(name)
|
|
44
|
+
TYPES.include?(name.to_sym)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -11,9 +11,7 @@ module Legion
|
|
|
11
11
|
attr_reader :model, :messages, :tools, :tool_prefs, :params, :headers, :schema
|
|
12
12
|
|
|
13
13
|
def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
|
|
14
|
-
if assume_model_exists && !provider
|
|
15
|
-
raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
|
|
16
|
-
end
|
|
14
|
+
raise ArgumentError, 'Provider must be specified if assume_model_exists is true' if assume_model_exists && !provider
|
|
17
15
|
|
|
18
16
|
@context = context
|
|
19
17
|
@config = context&.config || Legion::Extensions::Llm.config
|
|
@@ -139,7 +137,7 @@ module Legion
|
|
|
139
137
|
messages.each(&)
|
|
140
138
|
end
|
|
141
139
|
|
|
142
|
-
def complete(&)
|
|
140
|
+
def complete(&)
|
|
143
141
|
response = @provider.complete(
|
|
144
142
|
messages,
|
|
145
143
|
tools: @tools,
|
|
@@ -234,7 +232,7 @@ module Legion
|
|
|
234
232
|
end
|
|
235
233
|
end
|
|
236
234
|
|
|
237
|
-
def handle_tool_calls(response, &)
|
|
235
|
+
def handle_tool_calls(response, &)
|
|
238
236
|
halt_result = nil
|
|
239
237
|
|
|
240
238
|
response.tool_calls.each_value do |tool_call|
|
|
@@ -77,9 +77,13 @@ module Legion
|
|
|
77
77
|
|
|
78
78
|
def setup_logging(faraday)
|
|
79
79
|
logger = faraday_logger
|
|
80
|
+
# Enable request body logging when the logger is at DEBUG level,
|
|
81
|
+
# or when explicitly enabled via fleet request_payload setting.
|
|
82
|
+
request_payload = Legion::Extensions::Llm.default_settings.dig(:fleet, :request, :logger, :request_payload)
|
|
83
|
+
bodies_enabled = request_payload == true || debug_logger?(logger)
|
|
80
84
|
faraday.response :logger,
|
|
81
85
|
logger,
|
|
82
|
-
bodies:
|
|
86
|
+
bodies: bodies_enabled,
|
|
83
87
|
errors: false,
|
|
84
88
|
headers: false,
|
|
85
89
|
log_level: :debug do |logger|
|
|
@@ -54,6 +54,8 @@ module Legion
|
|
|
54
54
|
|
|
55
55
|
# Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
|
|
56
56
|
class ErrorMiddleware < Faraday::Middleware
|
|
57
|
+
extend Legion::Logging::Helper
|
|
58
|
+
|
|
57
59
|
STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
|
|
58
60
|
|
|
59
61
|
def initialize(app, options = {})
|
|
@@ -80,7 +82,7 @@ module Legion
|
|
|
80
82
|
/reduce the length of messages/i
|
|
81
83
|
].freeze
|
|
82
84
|
|
|
83
|
-
def parse_error(provider:, response:)
|
|
85
|
+
def parse_error(provider:, response:)
|
|
84
86
|
response = response_with_stream_error_body(response)
|
|
85
87
|
message = provider&.parse_error(response)
|
|
86
88
|
|
|
@@ -88,9 +90,7 @@ module Legion
|
|
|
88
90
|
when 200..399
|
|
89
91
|
message
|
|
90
92
|
when 400
|
|
91
|
-
if context_length_exceeded?(message)
|
|
92
|
-
raise ContextLengthExceededError.new(response, message || 'Context length exceeded')
|
|
93
|
-
end
|
|
93
|
+
raise ContextLengthExceededError.new(response, message || 'Context length exceeded') if context_length_exceeded?(message)
|
|
94
94
|
|
|
95
95
|
raise BadRequestError.new(response, message || 'Invalid request - please check your input')
|
|
96
96
|
when 401
|
|
@@ -101,9 +101,7 @@ module Legion
|
|
|
101
101
|
raise ForbiddenError.new(response,
|
|
102
102
|
message || 'Forbidden - you do not have permission to access this resource')
|
|
103
103
|
when 429
|
|
104
|
-
if context_length_exceeded?(message)
|
|
105
|
-
raise ContextLengthExceededError.new(response, message || 'Context length exceeded')
|
|
106
|
-
end
|
|
104
|
+
raise ContextLengthExceededError.new(response, message || 'Context length exceeded') if context_length_exceeded?(message)
|
|
107
105
|
|
|
108
106
|
raise RateLimitError.new(response, message || 'Rate limit exceeded - please wait a moment')
|
|
109
107
|
when 500
|
|
@@ -14,9 +14,7 @@ module Legion
|
|
|
14
14
|
|
|
15
15
|
def reject_legacy_options!
|
|
16
16
|
LEGACY_OPTIONS.each do |key|
|
|
17
|
-
if @options.key?(key) || @options.key?(key.to_s)
|
|
18
|
-
raise ArgumentError, "#{key} is not supported by fleet protocol v2"
|
|
19
|
-
end
|
|
17
|
+
raise ArgumentError, "#{key} is not supported by fleet protocol v2" if @options.key?(key) || @options.key?(key.to_s)
|
|
20
18
|
end
|
|
21
19
|
end
|
|
22
20
|
|
|
@@ -119,9 +119,7 @@ module Legion
|
|
|
119
119
|
raise ConfigurationError,
|
|
120
120
|
"fleet provider instance is not configured: #{instance_id}"
|
|
121
121
|
end
|
|
122
|
-
unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
|
|
123
|
-
raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}"
|
|
124
|
-
end
|
|
122
|
+
raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}" unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
|
|
125
123
|
|
|
126
124
|
provider_class.new(deep_symbolize(instance_settings))
|
|
127
125
|
end
|
|
@@ -186,9 +186,7 @@ module Legion
|
|
|
186
186
|
end
|
|
187
187
|
|
|
188
188
|
def signing_key
|
|
189
|
-
if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
|
|
190
|
-
return ::Legion::Crypt.cluster_secret
|
|
191
|
-
end
|
|
189
|
+
return ::Legion::Crypt.cluster_secret if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
|
|
192
190
|
|
|
193
191
|
raise TokenError, 'no signing key available - Legion::Crypt not initialized'
|
|
194
192
|
rescue TokenError
|
|
@@ -19,7 +19,7 @@ module Legion
|
|
|
19
19
|
:parameter_size, :quantization, :size_bytes,
|
|
20
20
|
:modalities_input, :modalities_output, :metadata
|
|
21
21
|
) do
|
|
22
|
-
# rubocop:disable Metrics/ParameterLists
|
|
22
|
+
# rubocop:disable Metrics/ParameterLists
|
|
23
23
|
def initialize(
|
|
24
24
|
id:, name: nil, provider: nil, instance: :default,
|
|
25
25
|
family: nil, capabilities: [], context_length: nil,
|
|
@@ -46,7 +46,7 @@ module Legion
|
|
|
46
46
|
metadata: metadata.is_a?(Hash) ? metadata : {}
|
|
47
47
|
)
|
|
48
48
|
end
|
|
49
|
-
# rubocop:enable Metrics/ParameterLists
|
|
49
|
+
# rubocop:enable Metrics/ParameterLists
|
|
50
50
|
|
|
51
51
|
# ── Capability predicates ─────────────────────────────────────
|
|
52
52
|
|
|
@@ -206,11 +206,9 @@ module Legion
|
|
|
206
206
|
class << self
|
|
207
207
|
private
|
|
208
208
|
|
|
209
|
-
def extract_modalities(data)
|
|
209
|
+
def extract_modalities(data)
|
|
210
210
|
# New-style keys take priority (round-trip from to_h)
|
|
211
|
-
if data.key?(:modalities_input) || data.key?(:modalities_output)
|
|
212
|
-
return [Array(data[:modalities_input]), Array(data[:modalities_output])]
|
|
213
|
-
end
|
|
211
|
+
return [Array(data[:modalities_input]), Array(data[:modalities_output])] if data.key?(:modalities_input) || data.key?(:modalities_output)
|
|
214
212
|
|
|
215
213
|
# Legacy: modalities is a hash or Modalities object
|
|
216
214
|
modalities_data = data[:modalities]
|
|
@@ -123,7 +123,7 @@ module Legion
|
|
|
123
123
|
fetch_provider_models(remote_only: remote_only)[:models]
|
|
124
124
|
end
|
|
125
125
|
|
|
126
|
-
def resolve(model_id, provider: nil, assume_exists: false, config: nil)
|
|
126
|
+
def resolve(model_id, provider: nil, assume_exists: false, config: nil)
|
|
127
127
|
config ||= Legion::Extensions::Llm.config
|
|
128
128
|
provider_class = provider ? resolve_provider_class(provider) : nil
|
|
129
129
|
|
|
@@ -168,7 +168,7 @@ module Legion
|
|
|
168
168
|
instance.respond_to?(method, include_private) || super
|
|
169
169
|
end
|
|
170
170
|
|
|
171
|
-
def fetch_models_dev_models(existing_models)
|
|
171
|
+
def fetch_models_dev_models(existing_models)
|
|
172
172
|
log.info 'Fetching models from models.dev API...'
|
|
173
173
|
|
|
174
174
|
connection = Connection.basic do |f|
|
|
@@ -300,7 +300,7 @@ module Legion
|
|
|
300
300
|
end
|
|
301
301
|
end
|
|
302
302
|
|
|
303
|
-
def add_provider_metadata(models_dev_model, provider_model)
|
|
303
|
+
def add_provider_metadata(models_dev_model, provider_model)
|
|
304
304
|
data = models_dev_model.to_h
|
|
305
305
|
data[:name] = provider_model.name if blank_value?(data[:name])
|
|
306
306
|
data[:family] = provider_model.family if blank_value?(data[:family])
|
|
@@ -76,7 +76,12 @@ module Legion
|
|
|
76
76
|
def format_openai_tool_calls(tool_calls)
|
|
77
77
|
return nil unless tool_calls&.any?
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
# Array is the canonical shape (per canonical/message.rb); Hash
|
|
80
|
+
# is the legacy lex-llm shape (id => ToolCall). Both flow through
|
|
81
|
+
# this renderer depending on caller.
|
|
82
|
+
calls = tool_calls.is_a?(Hash) ? tool_calls.values : Array(tool_calls)
|
|
83
|
+
|
|
84
|
+
calls.map do |tool_call|
|
|
80
85
|
{
|
|
81
86
|
id: tool_call.id,
|
|
82
87
|
type: 'function',
|
|
@@ -95,9 +100,9 @@ module Legion
|
|
|
95
100
|
{
|
|
96
101
|
type: 'function',
|
|
97
102
|
function: {
|
|
98
|
-
name: tool
|
|
99
|
-
description: tool
|
|
100
|
-
parameters: tool
|
|
103
|
+
name: Canonical::ToolSchema.tool_name(tool),
|
|
104
|
+
description: Canonical::ToolSchema.tool_description(tool),
|
|
105
|
+
parameters: Canonical::ToolSchema.extract(tool)
|
|
101
106
|
}
|
|
102
107
|
}
|
|
103
108
|
end
|
|
@@ -137,7 +137,7 @@ module Legion
|
|
|
137
137
|
parse_list_models_response response, slug, capabilities
|
|
138
138
|
end
|
|
139
139
|
|
|
140
|
-
def discover_offerings(live: false, **filters)
|
|
140
|
+
def discover_offerings(live: false, raise_on_unreachable: false, **filters)
|
|
141
141
|
return filter_cached_offerings(Array(@cached_offerings), filters) unless live
|
|
142
142
|
|
|
143
143
|
provider_health = health(live:)
|
|
@@ -148,8 +148,10 @@ module Legion
|
|
|
148
148
|
offering_from_model(model, health: provider_health)
|
|
149
149
|
end
|
|
150
150
|
@cached_offerings
|
|
151
|
-
rescue Faraday::ConnectionFailed => e
|
|
151
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
152
152
|
log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
|
|
153
|
+
raise if raise_on_unreachable
|
|
154
|
+
|
|
153
155
|
[]
|
|
154
156
|
end
|
|
155
157
|
|
|
@@ -224,9 +226,16 @@ module Legion
|
|
|
224
226
|
end
|
|
225
227
|
|
|
226
228
|
def cache_enabled?
|
|
227
|
-
|
|
229
|
+
explicit = config.llm_cache_enabled if config.respond_to?(:llm_cache_enabled)
|
|
230
|
+
|
|
231
|
+
unless explicit.nil?
|
|
232
|
+
log.debug { "[#{slug}] cache_enabled? source=per_provider value=#{explicit}" }
|
|
233
|
+
return explicit == true
|
|
234
|
+
end
|
|
228
235
|
|
|
229
|
-
|
|
236
|
+
global = global_prompt_caching_enabled?
|
|
237
|
+
log.debug { "[#{slug}] cache_enabled? source=global value=#{global}" }
|
|
238
|
+
global
|
|
230
239
|
end
|
|
231
240
|
|
|
232
241
|
def cache_control_prefix_tokens
|
|
@@ -528,6 +537,14 @@ module Legion
|
|
|
528
537
|
|
|
529
538
|
private
|
|
530
539
|
|
|
540
|
+
def global_prompt_caching_enabled?
|
|
541
|
+
return false unless defined?(Legion::Settings)
|
|
542
|
+
|
|
543
|
+
Legion::Settings.dig(:llm, :prompt_caching, :enabled) == true
|
|
544
|
+
rescue StandardError
|
|
545
|
+
false
|
|
546
|
+
end
|
|
547
|
+
|
|
531
548
|
def model_detail_cache_key(model_name)
|
|
532
549
|
tier = offering_tier
|
|
533
550
|
instance_key = cache_instance_key
|
|
@@ -11,10 +11,19 @@ module Legion
|
|
|
11
11
|
embed: [%i[keyreq text], %i[keyreq model]],
|
|
12
12
|
image: [%i[keyreq prompt], %i[keyreq model]],
|
|
13
13
|
list_models: [%i[key live], %i[keyrest filters]],
|
|
14
|
-
discover_offerings: [%i[key live], %i[keyrest filters]],
|
|
14
|
+
discover_offerings: [%i[key live], %i[key raise_on_unreachable], %i[keyrest filters]],
|
|
15
15
|
health: [%i[key live]],
|
|
16
16
|
count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
|
|
17
17
|
}.freeze
|
|
18
|
+
|
|
19
|
+
# Tools passed to chat/stream_chat must support Canonical::ToolDefinition objects.
|
|
20
|
+
# Providers must not crash on Data.define instances (not Hashes).
|
|
21
|
+
TOOL_SUPPORT_CONTRACT = <<~DOC
|
|
22
|
+
- chat and stream_chat accept keyword `tools:` (Hash<name, tool_object>)
|
|
23
|
+
- tools may be Canonical::ToolDefinition, Hash, or legacy Lex::Llm::Tool
|
|
24
|
+
- Renderers must use Canonical::ToolSchema.extract(tool) for schema access
|
|
25
|
+
- discover_offerings(live: true, raise_on_unreachable: true) raises on transport failure
|
|
26
|
+
DOC
|
|
18
27
|
end
|
|
19
28
|
end
|
|
20
29
|
end
|
|
@@ -10,9 +10,7 @@ module Legion
|
|
|
10
10
|
|
|
11
11
|
def for(offering, prefix: 'llm.fleet', include_context: true, include_fingerprint: false)
|
|
12
12
|
parts = [prefix, lane_kind(offering), model_slug(lane_model(offering))]
|
|
13
|
-
if include_context && offering.inference? && offering.context_window
|
|
14
|
-
parts << "ctx#{offering.context_window}"
|
|
15
|
-
end
|
|
13
|
+
parts << "ctx#{offering.context_window}" if include_context && offering.inference? && offering.context_window
|
|
16
14
|
parts.push('elig', eligibility_fingerprint(offering)) if include_fingerprint
|
|
17
15
|
parts.join('.')
|
|
18
16
|
end
|
|
@@ -39,7 +39,7 @@ module Legion
|
|
|
39
39
|
log.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
-
def filtered_chunk(chunk)
|
|
42
|
+
def filtered_chunk(chunk)
|
|
43
43
|
has_content = !@last_content_delta.empty?
|
|
44
44
|
has_thinking = !@last_thinking_delta.empty?
|
|
45
45
|
has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
|
|
@@ -57,6 +57,27 @@ module Legion
|
|
|
57
57
|
)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
# Flush any text still held in the untagged-preamble buffer as a final
|
|
61
|
+
# streamed chunk. Without this, short responses that match the
|
|
62
|
+
# untagged-reasoning heuristic (e.g. starting with "I", "The", "Let me")
|
|
63
|
+
# and never hit a double newline are buffered for the entire stream and
|
|
64
|
+
# the caller's block never receives a single delta.
|
|
65
|
+
def flush_pending_chunk
|
|
66
|
+
return nil if @untagged_preamble_buffer.empty?
|
|
67
|
+
|
|
68
|
+
@last_content_delta = +''
|
|
69
|
+
@last_thinking_delta = +''
|
|
70
|
+
flush_pending_untagged_preamble_into_deltas
|
|
71
|
+
return nil if @last_content_delta.empty? && @last_thinking_delta.empty?
|
|
72
|
+
|
|
73
|
+
Chunk.new(
|
|
74
|
+
role: :assistant,
|
|
75
|
+
content: @last_content_delta.empty? ? nil : @last_content_delta,
|
|
76
|
+
thinking: @last_thinking_delta.empty? ? nil : Thinking.build(text: @last_thinking_delta),
|
|
77
|
+
model_id: model_id
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
60
81
|
def to_message(response)
|
|
61
82
|
flush_pending_untagged_preamble
|
|
62
83
|
|
|
@@ -233,6 +254,24 @@ module Legion
|
|
|
233
254
|
@untagged_preamble_pending = false
|
|
234
255
|
end
|
|
235
256
|
|
|
257
|
+
# Same as flush_pending_untagged_preamble, but also records the flushed
|
|
258
|
+
# text in the per-chunk delta accumulators so flush_pending_chunk can
|
|
259
|
+
# surface it to the streaming block.
|
|
260
|
+
def flush_pending_untagged_preamble_into_deltas
|
|
261
|
+
content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
|
|
262
|
+
if thinking
|
|
263
|
+
@content << content
|
|
264
|
+
@last_content_delta << content
|
|
265
|
+
@thinking_text << thinking
|
|
266
|
+
@last_thinking_delta << thinking
|
|
267
|
+
else
|
|
268
|
+
@content << @untagged_preamble_buffer
|
|
269
|
+
@last_content_delta << @untagged_preamble_buffer
|
|
270
|
+
end
|
|
271
|
+
@untagged_preamble_buffer = +''
|
|
272
|
+
@untagged_preamble_pending = false
|
|
273
|
+
end
|
|
274
|
+
|
|
236
275
|
def append_thinking_from_chunk(chunk)
|
|
237
276
|
thinking = chunk.thinking
|
|
238
277
|
return unless thinking
|
|
@@ -16,9 +16,7 @@ module Legion
|
|
|
16
16
|
response = connection.post stream_url, payload do |req|
|
|
17
17
|
req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
|
|
18
18
|
on_chunk = build_stream_callback(accumulator, block)
|
|
19
|
-
if Legion::Extensions::Llm.config.log_stream_debug
|
|
20
|
-
log.debug { "Stream callback prepared: #{on_chunk.inspect}" }
|
|
21
|
-
end
|
|
19
|
+
log.debug { "Stream callback prepared: #{on_chunk.inspect}" } if Legion::Extensions::Llm.config.log_stream_debug
|
|
22
20
|
if faraday_1?
|
|
23
21
|
req.options[:on_data] = handle_stream(&on_chunk)
|
|
24
22
|
else
|
|
@@ -26,6 +24,11 @@ module Legion
|
|
|
26
24
|
end
|
|
27
25
|
end
|
|
28
26
|
|
|
27
|
+
# Release any text held by the untagged-preamble heuristic so short
|
|
28
|
+
# responses still stream at least one delta to the caller.
|
|
29
|
+
final_chunk = accumulator.flush_pending_chunk
|
|
30
|
+
block&.call(final_chunk) if final_chunk
|
|
31
|
+
|
|
29
32
|
message = accumulator.to_message(response)
|
|
30
33
|
log.debug { "Stream completed: #{message.content}" }
|
|
31
34
|
message
|
|
@@ -33,6 +36,8 @@ module Legion
|
|
|
33
36
|
|
|
34
37
|
def build_stream_callback(accumulator, block)
|
|
35
38
|
proc do |chunk|
|
|
39
|
+
next unless chunk
|
|
40
|
+
|
|
36
41
|
accumulator.add chunk
|
|
37
42
|
filtered = accumulator.filtered_chunk(chunk)
|
|
38
43
|
block.call(filtered) if filtered
|
|
@@ -41,7 +46,10 @@ module Legion
|
|
|
41
46
|
|
|
42
47
|
def handle_stream(&block)
|
|
43
48
|
build_on_data_handler do |data|
|
|
44
|
-
|
|
49
|
+
next unless data.is_a?(Hash)
|
|
50
|
+
|
|
51
|
+
chunk = build_chunk(data)
|
|
52
|
+
block.call(chunk) if chunk
|
|
45
53
|
end
|
|
46
54
|
end
|
|
47
55
|
|
|
@@ -185,7 +193,7 @@ module Legion
|
|
|
185
193
|
def build_stream_error_response(parsed_data, env, status)
|
|
186
194
|
error_status = status || env&.status || 500
|
|
187
195
|
|
|
188
|
-
if faraday_1?
|
|
196
|
+
if faraday_1? || env.nil?
|
|
189
197
|
Struct.new(:body, :status).new(parsed_data, error_status)
|
|
190
198
|
else
|
|
191
199
|
env.merge(body: parsed_data, status: error_status)
|
|
@@ -235,9 +235,7 @@ module Legion
|
|
|
235
235
|
def resolve_direct_schema(schema)
|
|
236
236
|
return extract_schema(schema.to_json_schema) if schema.respond_to?(:to_json_schema)
|
|
237
237
|
return Legion::Extensions::Llm::Utils.deep_dup(schema) if schema.is_a?(Hash)
|
|
238
|
-
if schema.is_a?(Class) && schema.method_defined?(:to_json_schema)
|
|
239
|
-
return extract_schema(schema.new.to_json_schema)
|
|
240
|
-
end
|
|
238
|
+
return extract_schema(schema.new.to_json_schema) if schema.is_a?(Class) && schema.method_defined?(:to_json_schema)
|
|
241
239
|
|
|
242
240
|
nil
|
|
243
241
|
end
|