lex-llm 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/legion/extensions/llm/canonical/message.rb +16 -3
- data/lib/legion/extensions/llm/canonical/tool_definition.rb +26 -1
- data/lib/legion/extensions/llm/canonical/tool_schema.rb +46 -0
- data/lib/legion/extensions/llm/canonical/usage.rb +13 -0
- data/lib/legion/extensions/llm/canonical.rb +1 -0
- data/lib/legion/extensions/llm/capability_policy.rb +107 -0
- data/lib/legion/extensions/llm/configuration.rb +4 -0
- data/lib/legion/extensions/llm/error.rb +2 -0
- data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +9 -8
- data/lib/legion/extensions/llm/provider.rb +33 -5
- data/lib/legion/extensions/llm/provider_contract.rb +10 -1
- data/lib/legion/extensions/llm/routing/model_offering.rb +14 -2
- data/lib/legion/extensions/llm/stream_accumulator.rb +39 -0
- data/lib/legion/extensions/llm/streaming.rb +36 -3
- data/lib/legion/extensions/llm/version.rb +1 -1
- data/lib/legion/extensions/llm.rb +3 -0
- data/spec/legion/extensions/llm/canonical/tool_definition_spec.rb +49 -2
- data/spec/legion/extensions/llm/canonical/tool_schema_spec.rb +83 -0
- data/spec/legion/extensions/llm/canonical/usage_spec.rb +40 -0
- data/spec/legion/extensions/llm/capability_policy_spec.rb +192 -0
- data/spec/legion/extensions/llm/configuration_spec.rb +40 -0
- data/spec/legion/extensions/llm/conformance/client_translator_examples.rb +163 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_continuation_request.json +43 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_use_response.json +29 -0
- data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_server_tool_chunks.json +52 -0
- data/spec/legion/extensions/llm/conformance/provider_tool_rendering_examples.rb +77 -0
- data/spec/legion/extensions/llm/provider/open_ai_compatible_tool_calls_array_spec.rb +68 -0
- data/spec/legion/extensions/llm/provider_spec.rb +55 -3
- data/spec/legion/extensions/llm/routing/model_offering_spec.rb +58 -0
- data/spec/legion/extensions/llm/stream_accumulator_spec.rb +52 -0
- data/spec/legion/extensions/llm/streaming_spec.rb +9 -0
- metadata +10 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 97ce32819eeea5c69b1278c3bab36876fc420f7092d9428da4e801fe26073601
|
|
4
|
+
data.tar.gz: c91d281b0994aea741558c7ffacced69626da499c2bc7876a58c591b50f56137
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 931eb07b958e676e014804e044c8da11dfc42c866345b6a187c08294da0070e88b2fb30dc569c577a99ee2de95a5f4d55572c8b05ce6d79f327f3ec4a48a8350
|
|
7
|
+
data.tar.gz: 78450fa24b76f759218776b30b80a4d693b0395f58828d79593cb1ce4e640c8ca281f423dcc83144220707e837fd00c76bfc9c600c6382e209e18966d92fc5e6
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,26 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.5.3 - 2026-06-16
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- **Streaming error classification** — Partial non-2xx streaming responses now raise status-specific errors (`UnauthorizedError`, `ForbiddenError`, `RateLimitError`, `ServiceUnavailableError`, etc.) instead of always raising `ServerError`. This preserves auth failures for downstream escalation and circuit handling.
|
|
7
|
+
|
|
8
|
+
## 0.5.2 - 2026-06-15
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **CapabilityPolicy module** — Shared capability resolution with 7-layer precedence chain (model_override > instance_override > provider_override > model_metadata > provider_catalog > probe > provider_envelope > default_false). All optional capabilities default false.
|
|
12
|
+
- **Boolean aliases** — `enable_thinking`, `tools_flag`, `embedding_flag`, etc. map to canonical capability keys at any settings level.
|
|
13
|
+
- **ModelOffering#capability_sources** — Per-capability source metadata preserved through offering serialization.
|
|
14
|
+
- **Provider#offering_from_model** — Base class now generates `:model_metadata` source tags for capabilities from provider API responses.
|
|
15
|
+
|
|
16
|
+
## 0.5.1 - 2026-06-12
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
- **ToolDefinition constants** — Move `OBJECT_SCHEMA_KEYWORDS` and `COMPOSITE_SCHEMA_KEYWORDS` out of `Data.define` block to satisfy `Lint/ConstantDefinitionInBlock`.
|
|
20
|
+
- **ToolSchema documentation** — Add top-level module documentation comment.
|
|
21
|
+
- **Conformance spec cleanup** — Remove unused block argument from shared examples, fix duplicate describe block and context wording in tool_definition_spec.
|
|
22
|
+
- **RuboCop clean** — Zero offenses across 140 files.
|
|
23
|
+
|
|
3
24
|
## 0.5.0 - 2026-06-10
|
|
4
25
|
|
|
5
26
|
### Added
|
|
@@ -72,11 +72,24 @@ module Legion
|
|
|
72
72
|
h[:content] = ContentBlock.from_hash(content)
|
|
73
73
|
end
|
|
74
74
|
|
|
75
|
-
# Parse tool calls
|
|
75
|
+
# Parse tool calls — Array is canonical; Hash is legacy lex-llm format (name → ToolCall)
|
|
76
76
|
tool_calls = h[:tool_calls]
|
|
77
|
-
if tool_calls.is_a?(
|
|
77
|
+
if tool_calls.is_a?(Hash)
|
|
78
|
+
h[:tool_calls] = tool_calls.values.map do |tc|
|
|
79
|
+
next tc if tc.is_a?(ToolCall)
|
|
80
|
+
|
|
81
|
+
raw = tc.respond_to?(:to_h) ? tc.to_h : tc
|
|
82
|
+
ToolCall.from_hash(raw)
|
|
83
|
+
end
|
|
84
|
+
elsif tool_calls.is_a?(Array)
|
|
78
85
|
h[:tool_calls] = tool_calls.map do |tc|
|
|
79
|
-
tc.is_a?(ToolCall)
|
|
86
|
+
next tc if tc.is_a?(ToolCall)
|
|
87
|
+
|
|
88
|
+
if tc.is_a?(Hash)
|
|
89
|
+
ToolCall.from_hash(tc)
|
|
90
|
+
else
|
|
91
|
+
ToolCall.from_hash(tc.respond_to?(:to_h) ? tc.to_h : tc)
|
|
92
|
+
end
|
|
80
93
|
end
|
|
81
94
|
end
|
|
82
95
|
|
|
@@ -5,16 +5,33 @@ module Legion
|
|
|
5
5
|
module Llm
|
|
6
6
|
module Canonical
|
|
7
7
|
TOOL_NAME_MAX_LENGTH = 64
|
|
8
|
+
OBJECT_SCHEMA_KEYWORDS = %i[properties required additionalProperties].freeze
|
|
9
|
+
COMPOSITE_SCHEMA_KEYWORDS = %i[oneOf anyOf allOf enum $ref $defs definitions].freeze
|
|
8
10
|
|
|
9
11
|
# Canonical tool definition.
|
|
10
12
|
# Ports field vocabulary from Legion::LLM::Types::ToolDefinition.
|
|
11
13
|
ToolDefinition = ::Data.define(:name, :description, :parameters, :source) do
|
|
14
|
+
def self.normalize_parameters(parameters)
|
|
15
|
+
empty = { type: 'object', properties: {} }
|
|
16
|
+
return empty if parameters.nil?
|
|
17
|
+
|
|
18
|
+
schema = if parameters.respond_to?(:transform_keys)
|
|
19
|
+
parameters.transform_keys { |k| k.respond_to?(:to_sym) ? k.to_sym : k }
|
|
20
|
+
end
|
|
21
|
+
return empty if schema.nil? || schema.empty?
|
|
22
|
+
return schema if schema.key?(:type)
|
|
23
|
+
return schema.merge(type: 'object') if OBJECT_SCHEMA_KEYWORDS.any? { |k| schema.key?(k) }
|
|
24
|
+
return schema if COMPOSITE_SCHEMA_KEYWORDS.any? { |k| schema.key?(k) }
|
|
25
|
+
|
|
26
|
+
{ type: 'object', properties: schema }
|
|
27
|
+
end
|
|
28
|
+
|
|
12
29
|
# Build from keyword args (primary constructor).
|
|
13
30
|
def self.build(name:, description: '', parameters: nil, source: nil)
|
|
14
31
|
new(
|
|
15
32
|
sanitize_tool_name(name),
|
|
16
33
|
description.to_s,
|
|
17
|
-
parameters
|
|
34
|
+
normalize_parameters(parameters),
|
|
18
35
|
source || { type: :builtin }
|
|
19
36
|
)
|
|
20
37
|
end
|
|
@@ -58,6 +75,14 @@ module Legion
|
|
|
58
75
|
name.empty? ? 'tool' : name
|
|
59
76
|
end
|
|
60
77
|
|
|
78
|
+
def params_schema
|
|
79
|
+
parameters
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def input_schema
|
|
83
|
+
parameters
|
|
84
|
+
end
|
|
85
|
+
|
|
61
86
|
# Serialize to a Hash for AMQP/fleet/wire transport.
|
|
62
87
|
def to_h
|
|
63
88
|
{
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
module Canonical
|
|
7
|
+
# Extracts and normalizes tool schemas from heterogeneous sources.
|
|
8
|
+
module ToolSchema
|
|
9
|
+
EMPTY_OBJECT = { type: 'object', properties: {} }.freeze
|
|
10
|
+
|
|
11
|
+
module_function
|
|
12
|
+
|
|
13
|
+
def extract(tool)
|
|
14
|
+
raw = raw_schema(tool)
|
|
15
|
+
ToolDefinition.normalize_parameters(raw)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def raw_schema(tool)
|
|
19
|
+
return nil if tool.nil?
|
|
20
|
+
return tool.params_schema if tool.respond_to?(:params_schema) && tool.params_schema
|
|
21
|
+
return tool.parameters if tool.respond_to?(:parameters) && tool.parameters
|
|
22
|
+
|
|
23
|
+
return unless tool.respond_to?(:[])
|
|
24
|
+
|
|
25
|
+
tool[:parameters] || tool['parameters'] || tool[:input_schema] || tool['input_schema'] ||
|
|
26
|
+
tool[:params_schema] || tool['params_schema']
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def tool_name(tool)
|
|
30
|
+
return tool.name if tool.respond_to?(:name) && !tool.is_a?(Hash)
|
|
31
|
+
return tool[:name] || tool['name'] if tool.respond_to?(:[])
|
|
32
|
+
|
|
33
|
+
'unknown'
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def tool_description(tool)
|
|
37
|
+
return tool.description if tool.respond_to?(:description) && !tool.is_a?(Hash)
|
|
38
|
+
return (tool[:description] || tool['description'] || '').to_s if tool.respond_to?(:[])
|
|
39
|
+
|
|
40
|
+
''
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -30,6 +30,12 @@ module Legion
|
|
|
30
30
|
h[:cache_write_tokens] ||= h.delete(:cache_creation) || h.delete(:cache_write)
|
|
31
31
|
h[:thinking_tokens] ||= h.delete(:thinking) || h.delete(:reasoning)
|
|
32
32
|
|
|
33
|
+
# Extract nested details (OpenAI prompt_tokens_details / input_tokens_details)
|
|
34
|
+
h[:cache_read_tokens] ||= dig_nested(h, :prompt_tokens_details, :cached_tokens) ||
|
|
35
|
+
dig_nested(h, :input_tokens_details, :cached_tokens)
|
|
36
|
+
h[:thinking_tokens] ||= dig_nested(h, :completion_tokens_details, :reasoning_tokens) ||
|
|
37
|
+
dig_nested(h, :output_tokens_details, :reasoning_tokens)
|
|
38
|
+
|
|
33
39
|
# Extract units (non-token extension point — G20b)
|
|
34
40
|
units = h.delete(:units) || {}
|
|
35
41
|
|
|
@@ -43,6 +49,13 @@ module Legion
|
|
|
43
49
|
)
|
|
44
50
|
end
|
|
45
51
|
|
|
52
|
+
def self.dig_nested(hash, details_key, value_key)
|
|
53
|
+
details = hash[details_key]
|
|
54
|
+
return nil unless details.is_a?(Hash)
|
|
55
|
+
|
|
56
|
+
details[value_key] || details[value_key.to_s]
|
|
57
|
+
end
|
|
58
|
+
|
|
46
59
|
# Serialize to a Hash for AMQP/fleet/wire transport.
|
|
47
60
|
def to_h
|
|
48
61
|
super.compact
|
|
@@ -5,6 +5,7 @@ require_relative 'canonical/usage'
|
|
|
5
5
|
require_relative 'canonical/params'
|
|
6
6
|
require_relative 'canonical/content_block'
|
|
7
7
|
require_relative 'canonical/tool_definition'
|
|
8
|
+
require_relative 'canonical/tool_schema'
|
|
8
9
|
require_relative 'canonical/tool_call'
|
|
9
10
|
require_relative 'canonical/message'
|
|
10
11
|
require_relative 'canonical/request'
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Legion
|
|
4
|
+
module Extensions
|
|
5
|
+
module Llm
|
|
6
|
+
# Resolves capability truth from multiple sources with explicit precedence.
|
|
7
|
+
# Returns both a flat capability list and per-capability source metadata.
|
|
8
|
+
module CapabilityPolicy
|
|
9
|
+
OPTIONAL_CAPABILITIES = %i[
|
|
10
|
+
streaming tools vision embeddings thinking structured_output image audio_transcription audio_speech
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
BOOLEAN_ALIASES = {
|
|
14
|
+
enable_streaming: :streaming,
|
|
15
|
+
enable_tools: :tools,
|
|
16
|
+
enable_thinking: :thinking,
|
|
17
|
+
enable_vision: :vision,
|
|
18
|
+
enable_embeddings: :embeddings,
|
|
19
|
+
enable_images: :image,
|
|
20
|
+
streaming_flag: :streaming,
|
|
21
|
+
tool_flag: :tools,
|
|
22
|
+
tools_flag: :tools,
|
|
23
|
+
thinking_flag: :thinking,
|
|
24
|
+
vision_flag: :vision,
|
|
25
|
+
embedding_flag: :embeddings,
|
|
26
|
+
embeddings_flag: :embeddings,
|
|
27
|
+
image_flag: :image,
|
|
28
|
+
images_flag: :image
|
|
29
|
+
}.freeze
|
|
30
|
+
|
|
31
|
+
module_function
|
|
32
|
+
|
|
33
|
+
def resolve(real:, provider_catalog:, probe:, provider_envelope:, provider_config:, instance_config:, model_config:) # rubocop:disable Metrics/ParameterLists
|
|
34
|
+
sources = {}
|
|
35
|
+
OPTIONAL_CAPABILITIES.each do |capability|
|
|
36
|
+
sources[capability] = resolve_one(
|
|
37
|
+
capability,
|
|
38
|
+
real:, provider_catalog:, probe:, provider_envelope:,
|
|
39
|
+
provider_config:, instance_config:, model_config:
|
|
40
|
+
)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
{
|
|
44
|
+
capabilities: sources.filter_map { |capability, data| capability if data[:value] == true },
|
|
45
|
+
sources: sources
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def resolve_one(capability, real:, provider_catalog:, probe:, provider_envelope:, provider_config:, instance_config:, model_config:) # rubocop:disable Metrics/ParameterLists
|
|
50
|
+
model_overrides = normalized_overrides(model_config)
|
|
51
|
+
return { value: model_overrides[capability], source: :model_override } if model_overrides.key?(capability)
|
|
52
|
+
|
|
53
|
+
instance_overrides = normalized_overrides(instance_config)
|
|
54
|
+
return { value: instance_overrides[capability], source: :instance_override } if instance_overrides.key?(capability)
|
|
55
|
+
|
|
56
|
+
provider_overrides = normalized_overrides(provider_config)
|
|
57
|
+
return { value: provider_overrides[capability], source: :provider_override } if provider_overrides.key?(capability)
|
|
58
|
+
|
|
59
|
+
real_caps = normalized_booleans(real)
|
|
60
|
+
return { value: real_caps[capability], source: :model_metadata } if real_caps.key?(capability)
|
|
61
|
+
|
|
62
|
+
catalog_caps = normalized_booleans(provider_catalog)
|
|
63
|
+
return { value: catalog_caps[capability], source: :provider_catalog } if catalog_caps.key?(capability)
|
|
64
|
+
|
|
65
|
+
probe_caps = normalized_booleans(probe)
|
|
66
|
+
return { value: probe_caps[capability], source: :probe } if probe_caps.key?(capability)
|
|
67
|
+
|
|
68
|
+
provider_caps = normalized_booleans(provider_envelope)
|
|
69
|
+
return { value: provider_caps[capability], source: :provider_envelope } if provider_caps.key?(capability)
|
|
70
|
+
|
|
71
|
+
{ value: false, source: :default_false }
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def normalized_overrides(config)
|
|
75
|
+
config = normalize_hash(config)
|
|
76
|
+
caps_key = config.key?(:capabilities) ? :capabilities : 'capabilities'
|
|
77
|
+
overrides = normalized_booleans(config[caps_key])
|
|
78
|
+
BOOLEAN_ALIASES.each do |key, capability|
|
|
79
|
+
value = config[key]
|
|
80
|
+
value = config[key.to_s] if value.nil?
|
|
81
|
+
next unless [true, false].include?(value)
|
|
82
|
+
next if overrides.key?(capability)
|
|
83
|
+
|
|
84
|
+
overrides[capability] = value
|
|
85
|
+
end
|
|
86
|
+
overrides
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def normalized_booleans(value)
|
|
90
|
+
normalize_hash(value).each_with_object({}) do |(key, raw), result|
|
|
91
|
+
capability = key.to_s.downcase.tr('-', '_').to_sym
|
|
92
|
+
next unless OPTIONAL_CAPABILITIES.include?(capability)
|
|
93
|
+
next unless [true, false].include?(raw)
|
|
94
|
+
|
|
95
|
+
result[capability] = raw
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def normalize_hash(value)
|
|
100
|
+
return {} unless value.respond_to?(:to_h)
|
|
101
|
+
|
|
102
|
+
value.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -54,6 +54,8 @@ module Legion
|
|
|
54
54
|
|
|
55
55
|
# Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
|
|
56
56
|
class ErrorMiddleware < Faraday::Middleware
|
|
57
|
+
extend Legion::Logging::Helper
|
|
58
|
+
|
|
57
59
|
STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
|
|
58
60
|
|
|
59
61
|
def initialize(app, options = {})
|
|
@@ -76,7 +76,12 @@ module Legion
|
|
|
76
76
|
def format_openai_tool_calls(tool_calls)
|
|
77
77
|
return nil unless tool_calls&.any?
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
# Array is the canonical shape (per canonical/message.rb); Hash
|
|
80
|
+
# is the legacy lex-llm shape (id => ToolCall). Both flow through
|
|
81
|
+
# this renderer depending on caller.
|
|
82
|
+
calls = tool_calls.is_a?(Hash) ? tool_calls.values : Array(tool_calls)
|
|
83
|
+
|
|
84
|
+
calls.map do |tool_call|
|
|
80
85
|
{
|
|
81
86
|
id: tool_call.id,
|
|
82
87
|
type: 'function',
|
|
@@ -92,16 +97,12 @@ module Legion
|
|
|
92
97
|
return nil if tools.empty?
|
|
93
98
|
|
|
94
99
|
tools.values.map do |tool|
|
|
95
|
-
# Tools can be ToolDefinition objects or plain Hashes from native_dispatch.
|
|
96
|
-
tool_name = tool.respond_to?(:name) ? tool.name : (tool[:name] || tool['name'])
|
|
97
|
-
tool_desc = tool.respond_to?(:description) ? tool.description : (tool[:description] || tool['description'] || '')
|
|
98
|
-
tool_params = tool.respond_to?(:params_schema) ? tool.params_schema : (tool[:parameters] || tool['parameters'] || {})
|
|
99
100
|
{
|
|
100
101
|
type: 'function',
|
|
101
102
|
function: {
|
|
102
|
-
name: tool_name,
|
|
103
|
-
description:
|
|
104
|
-
parameters:
|
|
103
|
+
name: Canonical::ToolSchema.tool_name(tool),
|
|
104
|
+
description: Canonical::ToolSchema.tool_description(tool),
|
|
105
|
+
parameters: Canonical::ToolSchema.extract(tool)
|
|
105
106
|
}
|
|
106
107
|
}
|
|
107
108
|
end
|
|
@@ -30,6 +30,8 @@ module Legion
|
|
|
30
30
|
include Legion::Logging::Helper
|
|
31
31
|
include Legion::Cache::Helper
|
|
32
32
|
|
|
33
|
+
MODEL_DETAIL_CACHE_SCHEMA_VERSION = 2
|
|
34
|
+
|
|
33
35
|
attr_reader :config, :connection
|
|
34
36
|
|
|
35
37
|
def initialize(config)
|
|
@@ -137,7 +139,7 @@ module Legion
|
|
|
137
139
|
parse_list_models_response response, slug, capabilities
|
|
138
140
|
end
|
|
139
141
|
|
|
140
|
-
def discover_offerings(live: false, **filters)
|
|
142
|
+
def discover_offerings(live: false, raise_on_unreachable: false, **filters)
|
|
141
143
|
return filter_cached_offerings(Array(@cached_offerings), filters) unless live
|
|
142
144
|
|
|
143
145
|
provider_health = health(live:)
|
|
@@ -148,8 +150,10 @@ module Legion
|
|
|
148
150
|
offering_from_model(model, health: provider_health)
|
|
149
151
|
end
|
|
150
152
|
@cached_offerings
|
|
151
|
-
rescue Faraday::ConnectionFailed => e
|
|
153
|
+
rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
|
|
152
154
|
log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
|
|
155
|
+
raise if raise_on_unreachable
|
|
156
|
+
|
|
153
157
|
[]
|
|
154
158
|
end
|
|
155
159
|
|
|
@@ -224,9 +228,16 @@ module Legion
|
|
|
224
228
|
end
|
|
225
229
|
|
|
226
230
|
def cache_enabled?
|
|
227
|
-
|
|
231
|
+
explicit = config.llm_cache_enabled if config.respond_to?(:llm_cache_enabled)
|
|
232
|
+
|
|
233
|
+
unless explicit.nil?
|
|
234
|
+
log.debug { "[#{slug}] cache_enabled? source=per_provider value=#{explicit}" }
|
|
235
|
+
return explicit == true
|
|
236
|
+
end
|
|
228
237
|
|
|
229
|
-
|
|
238
|
+
global = global_prompt_caching_enabled?
|
|
239
|
+
log.debug { "[#{slug}] cache_enabled? source=global value=#{global}" }
|
|
240
|
+
global
|
|
230
241
|
end
|
|
231
242
|
|
|
232
243
|
def cache_control_prefix_tokens
|
|
@@ -528,11 +539,23 @@ module Legion
|
|
|
528
539
|
|
|
529
540
|
private
|
|
530
541
|
|
|
542
|
+
def global_prompt_caching_enabled?
|
|
543
|
+
return false unless defined?(Legion::Settings)
|
|
544
|
+
|
|
545
|
+
Legion::Settings.dig(:llm, :prompt_caching, :enabled) == true
|
|
546
|
+
rescue StandardError
|
|
547
|
+
false
|
|
548
|
+
end
|
|
549
|
+
|
|
531
550
|
def model_detail_cache_key(model_name)
|
|
532
551
|
tier = offering_tier
|
|
533
552
|
instance_key = cache_instance_key
|
|
534
553
|
cred_fp = credential_cache_fragment
|
|
535
|
-
key_parts = [
|
|
554
|
+
key_parts = [
|
|
555
|
+
'model_info',
|
|
556
|
+
"schema#{MODEL_DETAIL_CACHE_SCHEMA_VERSION}",
|
|
557
|
+
tier, slug, instance_key, cred_fp, model_name
|
|
558
|
+
].compact
|
|
536
559
|
key_parts.join('.')
|
|
537
560
|
end
|
|
538
561
|
|
|
@@ -555,6 +578,10 @@ module Legion
|
|
|
555
578
|
end
|
|
556
579
|
|
|
557
580
|
def offering_from_model(model, health: {})
|
|
581
|
+
capability_sources = Array(model.capabilities).to_h do |cap|
|
|
582
|
+
[cap.to_sym, { value: true, source: :model_metadata }]
|
|
583
|
+
end
|
|
584
|
+
|
|
558
585
|
Routing::ModelOffering.new(
|
|
559
586
|
provider_family: slug.to_sym,
|
|
560
587
|
provider_instance: model.instance || provider_instance_id,
|
|
@@ -565,6 +592,7 @@ module Legion
|
|
|
565
592
|
model_family: model.family,
|
|
566
593
|
usage_type: offering_usage_type(model),
|
|
567
594
|
capabilities: model.capabilities,
|
|
595
|
+
capability_sources: capability_sources,
|
|
568
596
|
limits: offering_limits(model),
|
|
569
597
|
health:,
|
|
570
598
|
metadata: offering_metadata(model)
|
|
@@ -11,10 +11,19 @@ module Legion
|
|
|
11
11
|
embed: [%i[keyreq text], %i[keyreq model]],
|
|
12
12
|
image: [%i[keyreq prompt], %i[keyreq model]],
|
|
13
13
|
list_models: [%i[key live], %i[keyrest filters]],
|
|
14
|
-
discover_offerings: [%i[key live], %i[keyrest filters]],
|
|
14
|
+
discover_offerings: [%i[key live], %i[key raise_on_unreachable], %i[keyrest filters]],
|
|
15
15
|
health: [%i[key live]],
|
|
16
16
|
count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
|
|
17
17
|
}.freeze
|
|
18
|
+
|
|
19
|
+
# Tools passed to chat/stream_chat must support Canonical::ToolDefinition objects.
|
|
20
|
+
# Providers must not crash on Data.define instances (not Hashes).
|
|
21
|
+
TOOL_SUPPORT_CONTRACT = <<~DOC
|
|
22
|
+
- chat and stream_chat accept keyword `tools:` (Hash<name, tool_object>)
|
|
23
|
+
- tools may be Canonical::ToolDefinition, Hash, or legacy Lex::Llm::Tool
|
|
24
|
+
- Renderers must use Canonical::ToolSchema.extract(tool) for schema access
|
|
25
|
+
- discover_offerings(live: true, raise_on_unreachable: true) raises on transport failure
|
|
26
|
+
DOC
|
|
18
27
|
end
|
|
19
28
|
end
|
|
20
29
|
end
|
|
@@ -16,8 +16,8 @@ module Legion
|
|
|
16
16
|
}.freeze
|
|
17
17
|
|
|
18
18
|
attr_reader :offering_id, :provider_family, :model_family, :provider_instance, :instance_id, :transport,
|
|
19
|
-
:tier, :model, :canonical_model_alias, :routing_metadata, :usage_type, :capabilities,
|
|
20
|
-
:credentials, :health, :cost, :policy_tags, :metadata
|
|
19
|
+
:tier, :model, :canonical_model_alias, :routing_metadata, :usage_type, :capabilities,
|
|
20
|
+
:capability_sources, :limits, :credentials, :health, :cost, :policy_tags, :metadata
|
|
21
21
|
|
|
22
22
|
def initialize(data)
|
|
23
23
|
@metadata = normalize_hash(fetch_value(data, :metadata))
|
|
@@ -37,6 +37,7 @@ module Legion
|
|
|
37
37
|
fetch_value(data, :kind) ||
|
|
38
38
|
infer_usage_type(data)))
|
|
39
39
|
@capabilities = normalize_capabilities(fetch_value(data, :capabilities))
|
|
40
|
+
@capability_sources = normalize_capability_sources(fetch_value(data, :capability_sources))
|
|
40
41
|
@limits = normalize_hash(fetch_value(data, :limits))
|
|
41
42
|
@credentials = fetch_value(data, :credentials)
|
|
42
43
|
@health = normalize_hash(fetch_value(data, :health))
|
|
@@ -106,6 +107,7 @@ module Legion
|
|
|
106
107
|
routing_metadata: routing_metadata,
|
|
107
108
|
usage_type: usage_type,
|
|
108
109
|
capabilities: capabilities,
|
|
110
|
+
capability_sources: capability_sources,
|
|
109
111
|
limits: limits,
|
|
110
112
|
credentials: credentials,
|
|
111
113
|
health: health,
|
|
@@ -167,6 +169,16 @@ module Legion
|
|
|
167
169
|
end.uniq
|
|
168
170
|
end
|
|
169
171
|
|
|
172
|
+
def normalize_capability_sources(value)
|
|
173
|
+
normalize_hash(value).to_h do |capability, source_data|
|
|
174
|
+
normalized_source = normalize_hash(source_data)
|
|
175
|
+
[
|
|
176
|
+
capability.to_s.downcase.tr('-', '_').to_sym,
|
|
177
|
+
{ value: normalized_source[:value], source: normalized_source[:source]&.to_sym }.compact
|
|
178
|
+
]
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
170
182
|
def normalize_hash(value)
|
|
171
183
|
(value || {}).to_h.transform_keys(&:to_sym)
|
|
172
184
|
end
|
|
@@ -57,6 +57,27 @@ module Legion
|
|
|
57
57
|
)
|
|
58
58
|
end
|
|
59
59
|
|
|
60
|
+
# Flush any text still held in the untagged-preamble buffer as a final
|
|
61
|
+
# streamed chunk. Without this, short responses that match the
|
|
62
|
+
# untagged-reasoning heuristic (e.g. starting with "I", "The", "Let me")
|
|
63
|
+
# and never hit a double newline are buffered for the entire stream and
|
|
64
|
+
# the caller's block never receives a single delta.
|
|
65
|
+
def flush_pending_chunk
|
|
66
|
+
return nil if @untagged_preamble_buffer.empty?
|
|
67
|
+
|
|
68
|
+
@last_content_delta = +''
|
|
69
|
+
@last_thinking_delta = +''
|
|
70
|
+
flush_pending_untagged_preamble_into_deltas
|
|
71
|
+
return nil if @last_content_delta.empty? && @last_thinking_delta.empty?
|
|
72
|
+
|
|
73
|
+
Chunk.new(
|
|
74
|
+
role: :assistant,
|
|
75
|
+
content: @last_content_delta.empty? ? nil : @last_content_delta,
|
|
76
|
+
thinking: @last_thinking_delta.empty? ? nil : Thinking.build(text: @last_thinking_delta),
|
|
77
|
+
model_id: model_id
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
60
81
|
def to_message(response)
|
|
61
82
|
flush_pending_untagged_preamble
|
|
62
83
|
|
|
@@ -233,6 +254,24 @@ module Legion
|
|
|
233
254
|
@untagged_preamble_pending = false
|
|
234
255
|
end
|
|
235
256
|
|
|
257
|
+
# Same as flush_pending_untagged_preamble, but also records the flushed
|
|
258
|
+
# text in the per-chunk delta accumulators so flush_pending_chunk can
|
|
259
|
+
# surface it to the streaming block.
|
|
260
|
+
def flush_pending_untagged_preamble_into_deltas
|
|
261
|
+
content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
|
|
262
|
+
if thinking
|
|
263
|
+
@content << content
|
|
264
|
+
@last_content_delta << content
|
|
265
|
+
@thinking_text << thinking
|
|
266
|
+
@last_thinking_delta << thinking
|
|
267
|
+
else
|
|
268
|
+
@content << @untagged_preamble_buffer
|
|
269
|
+
@last_content_delta << @untagged_preamble_buffer
|
|
270
|
+
end
|
|
271
|
+
@untagged_preamble_buffer = +''
|
|
272
|
+
@untagged_preamble_pending = false
|
|
273
|
+
end
|
|
274
|
+
|
|
236
275
|
def append_thinking_from_chunk(chunk)
|
|
237
276
|
thinking = chunk.thinking
|
|
238
277
|
return unless thinking
|
|
@@ -24,6 +24,11 @@ module Legion
|
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
26
|
|
|
27
|
+
# Release any text held by the untagged-preamble heuristic so short
|
|
28
|
+
# responses still stream at least one delta to the caller.
|
|
29
|
+
final_chunk = accumulator.flush_pending_chunk
|
|
30
|
+
block&.call(final_chunk) if final_chunk
|
|
31
|
+
|
|
27
32
|
message = accumulator.to_message(response)
|
|
28
33
|
log.debug { "Stream completed: #{message.content}" }
|
|
29
34
|
message
|
|
@@ -31,6 +36,8 @@ module Legion
|
|
|
31
36
|
|
|
32
37
|
def build_stream_callback(accumulator, block)
|
|
33
38
|
proc do |chunk|
|
|
39
|
+
next unless chunk
|
|
40
|
+
|
|
34
41
|
accumulator.add chunk
|
|
35
42
|
filtered = accumulator.filtered_chunk(chunk)
|
|
36
43
|
block.call(filtered) if filtered
|
|
@@ -39,7 +46,10 @@ module Legion
|
|
|
39
46
|
|
|
40
47
|
def handle_stream(&block)
|
|
41
48
|
build_on_data_handler do |data|
|
|
42
|
-
|
|
49
|
+
next unless data.is_a?(Hash)
|
|
50
|
+
|
|
51
|
+
chunk = build_chunk(data)
|
|
52
|
+
block.call(chunk) if chunk
|
|
43
53
|
end
|
|
44
54
|
end
|
|
45
55
|
|
|
@@ -132,7 +142,30 @@ module Legion
|
|
|
132
142
|
end
|
|
133
143
|
log.warn "[llm][streaming] action=handle_failed_response status=#{status} " \
|
|
134
144
|
"partial_body=#{buffer.length}b msg=#{partial.inspect}"
|
|
135
|
-
|
|
145
|
+
raise_streaming_status_error(status, msg)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def raise_streaming_status_error(status, message)
|
|
149
|
+
response = Struct.new(:body, :status).new({ 'error' => { 'message' => message } }, status)
|
|
150
|
+
case status
|
|
151
|
+
when 400
|
|
152
|
+
raise Legion::Extensions::Llm::BadRequestError.new(response, message)
|
|
153
|
+
when 401
|
|
154
|
+
raise Legion::Extensions::Llm::UnauthorizedError.new(response, message)
|
|
155
|
+
when 403
|
|
156
|
+
raise Legion::Extensions::Llm::ForbiddenError.new(response, message)
|
|
157
|
+
when 429
|
|
158
|
+
raise Legion::Extensions::Llm::RateLimitError.new(response, message)
|
|
159
|
+
when 500
|
|
160
|
+
raise Legion::Extensions::Llm::ServerError.new(response, message)
|
|
161
|
+
when 502..504
|
|
162
|
+
raise Legion::Extensions::Llm::ServiceUnavailableError.new(response, message)
|
|
163
|
+
when 529
|
|
164
|
+
raise Legion::Extensions::Llm::OverloadedError.new(response, message)
|
|
165
|
+
else
|
|
166
|
+
provider = respond_to?(:parse_error) ? self : nil
|
|
167
|
+
Legion::Extensions::Llm::ErrorMiddleware.parse_error(provider: provider, response: response)
|
|
168
|
+
end
|
|
136
169
|
end
|
|
137
170
|
|
|
138
171
|
def handle_sse(chunk, parser, env, &)
|
|
@@ -183,7 +216,7 @@ module Legion
|
|
|
183
216
|
def build_stream_error_response(parsed_data, env, status)
|
|
184
217
|
error_status = status || env&.status || 500
|
|
185
218
|
|
|
186
|
-
if faraday_1?
|
|
219
|
+
if faraday_1? || env.nil?
|
|
187
220
|
Struct.new(:body, :status).new(parsed_data, error_status)
|
|
188
221
|
else
|
|
189
222
|
env.merge(body: parsed_data, status: error_status)
|
|
@@ -36,6 +36,9 @@ module Legion
|
|
|
36
36
|
# unqualified constant lookups resolve via Ruby scope. #
|
|
37
37
|
# ------------------------------------------------------------------ #
|
|
38
38
|
|
|
39
|
+
# --- Capability resolution policy (no internal deps) ---
|
|
40
|
+
require_relative 'llm/capability_policy'
|
|
41
|
+
|
|
39
42
|
# --- Base value objects (no internal deps) ---
|
|
40
43
|
require_relative 'llm/mime_type'
|
|
41
44
|
require_relative 'llm/model/info'
|