lex-llm 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/lib/legion/extensions/llm/canonical/message.rb +16 -3
  4. data/lib/legion/extensions/llm/canonical/tool_definition.rb +26 -1
  5. data/lib/legion/extensions/llm/canonical/tool_schema.rb +46 -0
  6. data/lib/legion/extensions/llm/canonical/usage.rb +13 -0
  7. data/lib/legion/extensions/llm/canonical.rb +1 -0
  8. data/lib/legion/extensions/llm/capability_policy.rb +107 -0
  9. data/lib/legion/extensions/llm/configuration.rb +4 -0
  10. data/lib/legion/extensions/llm/error.rb +2 -0
  11. data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +9 -8
  12. data/lib/legion/extensions/llm/provider.rb +33 -5
  13. data/lib/legion/extensions/llm/provider_contract.rb +10 -1
  14. data/lib/legion/extensions/llm/routing/model_offering.rb +14 -2
  15. data/lib/legion/extensions/llm/stream_accumulator.rb +39 -0
  16. data/lib/legion/extensions/llm/streaming.rb +36 -3
  17. data/lib/legion/extensions/llm/version.rb +1 -1
  18. data/lib/legion/extensions/llm.rb +3 -0
  19. data/spec/legion/extensions/llm/canonical/tool_definition_spec.rb +49 -2
  20. data/spec/legion/extensions/llm/canonical/tool_schema_spec.rb +83 -0
  21. data/spec/legion/extensions/llm/canonical/usage_spec.rb +40 -0
  22. data/spec/legion/extensions/llm/capability_policy_spec.rb +192 -0
  23. data/spec/legion/extensions/llm/configuration_spec.rb +40 -0
  24. data/spec/legion/extensions/llm/conformance/client_translator_examples.rb +163 -0
  25. data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_continuation_request.json +43 -0
  26. data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_use_response.json +29 -0
  27. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_server_tool_chunks.json +52 -0
  28. data/spec/legion/extensions/llm/conformance/provider_tool_rendering_examples.rb +77 -0
  29. data/spec/legion/extensions/llm/provider/open_ai_compatible_tool_calls_array_spec.rb +68 -0
  30. data/spec/legion/extensions/llm/provider_spec.rb +55 -3
  31. data/spec/legion/extensions/llm/routing/model_offering_spec.rb +58 -0
  32. data/spec/legion/extensions/llm/stream_accumulator_spec.rb +52 -0
  33. data/spec/legion/extensions/llm/streaming_spec.rb +9 -0
  34. metadata +10 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ab0a51687719efbd7f048be70ff4ba57b6d81fafddfcaefb2f0d78f5f3721ae
4
- data.tar.gz: 49e57ce1c99330d956cc92b6c6a75a283f078f5e0b8fb036ae7aad9579caccd0
3
+ metadata.gz: 97ce32819eeea5c69b1278c3bab36876fc420f7092d9428da4e801fe26073601
4
+ data.tar.gz: c91d281b0994aea741558c7ffacced69626da499c2bc7876a58c591b50f56137
5
5
  SHA512:
6
- metadata.gz: 8d8fcea6f732dd4c5dfcd713024796a640941d18c8ef90f3c5b5ca0b3fcbc0094942f2cadabbdd78669ecabce8cd76f893488a92f15ec70c21ebec865fb0a531
7
- data.tar.gz: 4a062635e491cb84b60117b1c28520a8fc5bb4d0775609ed1104639e549114c2a373e68461833008a234e33d7ac14a5fe1292cc2f37fe37b0f00818387cc4851
6
+ metadata.gz: 931eb07b958e676e014804e044c8da11dfc42c866345b6a187c08294da0070e88b2fb30dc569c577a99ee2de95a5f4d55572c8b05ce6d79f327f3ec4a48a8350
7
+ data.tar.gz: 78450fa24b76f759218776b30b80a4d693b0395f58828d79593cb1ce4e640c8ca281f423dcc83144220707e837fd00c76bfc9c600c6382e209e18966d92fc5e6
data/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.5.3 - 2026-06-16
4
+
5
+ ### Fixed
6
+ - **Streaming error classification** — Partial non-2xx streaming responses now raise status-specific errors (`UnauthorizedError`, `ForbiddenError`, `RateLimitError`, `ServiceUnavailableError`, etc.) instead of always raising `ServerError`. This preserves auth failures for downstream escalation and circuit handling.
7
+
8
+ ## 0.5.2 - 2026-06-15
9
+
10
+ ### Added
11
+ - **CapabilityPolicy module** — Shared capability resolution with 7-layer precedence chain (model_override > instance_override > provider_override > model_metadata > provider_catalog > probe > provider_envelope > default_false). All optional capabilities default false.
12
+ - **Boolean aliases** — `enable_thinking`, `tools_flag`, `embedding_flag`, etc. map to canonical capability keys at any settings level.
13
+ - **ModelOffering#capability_sources** — Per-capability source metadata preserved through offering serialization.
14
+ - **Provider#offering_from_model** — Base class now generates `:model_metadata` source tags for capabilities from provider API responses.
15
+
16
+ ## 0.5.1 - 2026-06-12
17
+
18
+ ### Fixed
19
+ - **ToolDefinition constants** — Move `OBJECT_SCHEMA_KEYWORDS` and `COMPOSITE_SCHEMA_KEYWORDS` out of `Data.define` block to satisfy `Lint/ConstantDefinitionInBlock`.
20
+ - **ToolSchema documentation** — Add top-level module documentation comment.
21
+ - **Conformance spec cleanup** — Remove unused block argument from shared examples, fix duplicate describe block and context wording in tool_definition_spec.
22
+ - **RuboCop clean** — Zero offenses across 140 files.
23
+
3
24
  ## 0.5.0 - 2026-06-10
4
25
 
5
26
  ### Added
@@ -72,11 +72,24 @@ module Legion
72
72
  h[:content] = ContentBlock.from_hash(content)
73
73
  end
74
74
 
75
- # Parse tool calls if they're an array of hashes
75
+ # Parse tool calls Array is canonical; Hash is legacy lex-llm format (name → ToolCall)
76
76
  tool_calls = h[:tool_calls]
77
- if tool_calls.is_a?(Array)
77
+ if tool_calls.is_a?(Hash)
78
+ h[:tool_calls] = tool_calls.values.map do |tc|
79
+ next tc if tc.is_a?(ToolCall)
80
+
81
+ raw = tc.respond_to?(:to_h) ? tc.to_h : tc
82
+ ToolCall.from_hash(raw)
83
+ end
84
+ elsif tool_calls.is_a?(Array)
78
85
  h[:tool_calls] = tool_calls.map do |tc|
79
- tc.is_a?(ToolCall) ? tc : ToolCall.from_hash(tc)
86
+ next tc if tc.is_a?(ToolCall)
87
+
88
+ if tc.is_a?(Hash)
89
+ ToolCall.from_hash(tc)
90
+ else
91
+ ToolCall.from_hash(tc.respond_to?(:to_h) ? tc.to_h : tc)
92
+ end
80
93
  end
81
94
  end
82
95
 
@@ -5,16 +5,33 @@ module Legion
5
5
  module Llm
6
6
  module Canonical
7
7
  TOOL_NAME_MAX_LENGTH = 64
8
+ OBJECT_SCHEMA_KEYWORDS = %i[properties required additionalProperties].freeze
9
+ COMPOSITE_SCHEMA_KEYWORDS = %i[oneOf anyOf allOf enum $ref $defs definitions].freeze
8
10
 
9
11
  # Canonical tool definition.
10
12
  # Ports field vocabulary from Legion::LLM::Types::ToolDefinition.
11
13
  ToolDefinition = ::Data.define(:name, :description, :parameters, :source) do
14
+ def self.normalize_parameters(parameters)
15
+ empty = { type: 'object', properties: {} }
16
+ return empty if parameters.nil?
17
+
18
+ schema = if parameters.respond_to?(:transform_keys)
19
+ parameters.transform_keys { |k| k.respond_to?(:to_sym) ? k.to_sym : k }
20
+ end
21
+ return empty if schema.nil? || schema.empty?
22
+ return schema if schema.key?(:type)
23
+ return schema.merge(type: 'object') if OBJECT_SCHEMA_KEYWORDS.any? { |k| schema.key?(k) }
24
+ return schema if COMPOSITE_SCHEMA_KEYWORDS.any? { |k| schema.key?(k) }
25
+
26
+ { type: 'object', properties: schema }
27
+ end
28
+
12
29
  # Build from keyword args (primary constructor).
13
30
  def self.build(name:, description: '', parameters: nil, source: nil)
14
31
  new(
15
32
  sanitize_tool_name(name),
16
33
  description.to_s,
17
- parameters || {},
34
+ normalize_parameters(parameters),
18
35
  source || { type: :builtin }
19
36
  )
20
37
  end
@@ -58,6 +75,14 @@ module Legion
58
75
  name.empty? ? 'tool' : name
59
76
  end
60
77
 
78
+ def params_schema
79
+ parameters
80
+ end
81
+
82
+ def input_schema
83
+ parameters
84
+ end
85
+
61
86
  # Serialize to a Hash for AMQP/fleet/wire transport.
62
87
  def to_h
63
88
  {
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Canonical
7
+ # Extracts and normalizes tool schemas from heterogeneous sources.
8
+ module ToolSchema
9
+ EMPTY_OBJECT = { type: 'object', properties: {} }.freeze
10
+
11
+ module_function
12
+
13
+ def extract(tool)
14
+ raw = raw_schema(tool)
15
+ ToolDefinition.normalize_parameters(raw)
16
+ end
17
+
18
+ def raw_schema(tool)
19
+ return nil if tool.nil?
20
+ return tool.params_schema if tool.respond_to?(:params_schema) && tool.params_schema
21
+ return tool.parameters if tool.respond_to?(:parameters) && tool.parameters
22
+
23
+ return unless tool.respond_to?(:[])
24
+
25
+ tool[:parameters] || tool['parameters'] || tool[:input_schema] || tool['input_schema'] ||
26
+ tool[:params_schema] || tool['params_schema']
27
+ end
28
+
29
+ def tool_name(tool)
30
+ return tool.name if tool.respond_to?(:name) && !tool.is_a?(Hash)
31
+ return tool[:name] || tool['name'] if tool.respond_to?(:[])
32
+
33
+ 'unknown'
34
+ end
35
+
36
+ def tool_description(tool)
37
+ return tool.description if tool.respond_to?(:description) && !tool.is_a?(Hash)
38
+ return (tool[:description] || tool['description'] || '').to_s if tool.respond_to?(:[])
39
+
40
+ ''
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -30,6 +30,12 @@ module Legion
30
30
  h[:cache_write_tokens] ||= h.delete(:cache_creation) || h.delete(:cache_write)
31
31
  h[:thinking_tokens] ||= h.delete(:thinking) || h.delete(:reasoning)
32
32
 
33
+ # Extract nested details (OpenAI prompt_tokens_details / input_tokens_details)
34
+ h[:cache_read_tokens] ||= dig_nested(h, :prompt_tokens_details, :cached_tokens) ||
35
+ dig_nested(h, :input_tokens_details, :cached_tokens)
36
+ h[:thinking_tokens] ||= dig_nested(h, :completion_tokens_details, :reasoning_tokens) ||
37
+ dig_nested(h, :output_tokens_details, :reasoning_tokens)
38
+
33
39
  # Extract units (non-token extension point — G20b)
34
40
  units = h.delete(:units) || {}
35
41
 
@@ -43,6 +49,13 @@ module Legion
43
49
  )
44
50
  end
45
51
 
52
+ def self.dig_nested(hash, details_key, value_key)
53
+ details = hash[details_key]
54
+ return nil unless details.is_a?(Hash)
55
+
56
+ details[value_key] || details[value_key.to_s]
57
+ end
58
+
46
59
  # Serialize to a Hash for AMQP/fleet/wire transport.
47
60
  def to_h
48
61
  super.compact
@@ -5,6 +5,7 @@ require_relative 'canonical/usage'
5
5
  require_relative 'canonical/params'
6
6
  require_relative 'canonical/content_block'
7
7
  require_relative 'canonical/tool_definition'
8
+ require_relative 'canonical/tool_schema'
8
9
  require_relative 'canonical/tool_call'
9
10
  require_relative 'canonical/message'
10
11
  require_relative 'canonical/request'
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ # Resolves capability truth from multiple sources with explicit precedence.
7
+ # Returns both a flat capability list and per-capability source metadata.
8
+ module CapabilityPolicy
9
+ OPTIONAL_CAPABILITIES = %i[
10
+ streaming tools vision embeddings thinking structured_output image audio_transcription audio_speech
11
+ ].freeze
12
+
13
+ BOOLEAN_ALIASES = {
14
+ enable_streaming: :streaming,
15
+ enable_tools: :tools,
16
+ enable_thinking: :thinking,
17
+ enable_vision: :vision,
18
+ enable_embeddings: :embeddings,
19
+ enable_images: :image,
20
+ streaming_flag: :streaming,
21
+ tool_flag: :tools,
22
+ tools_flag: :tools,
23
+ thinking_flag: :thinking,
24
+ vision_flag: :vision,
25
+ embedding_flag: :embeddings,
26
+ embeddings_flag: :embeddings,
27
+ image_flag: :image,
28
+ images_flag: :image
29
+ }.freeze
30
+
31
+ module_function
32
+
33
+ def resolve(real:, provider_catalog:, probe:, provider_envelope:, provider_config:, instance_config:, model_config:) # rubocop:disable Metrics/ParameterLists
34
+ sources = {}
35
+ OPTIONAL_CAPABILITIES.each do |capability|
36
+ sources[capability] = resolve_one(
37
+ capability,
38
+ real:, provider_catalog:, probe:, provider_envelope:,
39
+ provider_config:, instance_config:, model_config:
40
+ )
41
+ end
42
+
43
+ {
44
+ capabilities: sources.filter_map { |capability, data| capability if data[:value] == true },
45
+ sources: sources
46
+ }
47
+ end
48
+
49
+ def resolve_one(capability, real:, provider_catalog:, probe:, provider_envelope:, provider_config:, instance_config:, model_config:) # rubocop:disable Metrics/ParameterLists
50
+ model_overrides = normalized_overrides(model_config)
51
+ return { value: model_overrides[capability], source: :model_override } if model_overrides.key?(capability)
52
+
53
+ instance_overrides = normalized_overrides(instance_config)
54
+ return { value: instance_overrides[capability], source: :instance_override } if instance_overrides.key?(capability)
55
+
56
+ provider_overrides = normalized_overrides(provider_config)
57
+ return { value: provider_overrides[capability], source: :provider_override } if provider_overrides.key?(capability)
58
+
59
+ real_caps = normalized_booleans(real)
60
+ return { value: real_caps[capability], source: :model_metadata } if real_caps.key?(capability)
61
+
62
+ catalog_caps = normalized_booleans(provider_catalog)
63
+ return { value: catalog_caps[capability], source: :provider_catalog } if catalog_caps.key?(capability)
64
+
65
+ probe_caps = normalized_booleans(probe)
66
+ return { value: probe_caps[capability], source: :probe } if probe_caps.key?(capability)
67
+
68
+ provider_caps = normalized_booleans(provider_envelope)
69
+ return { value: provider_caps[capability], source: :provider_envelope } if provider_caps.key?(capability)
70
+
71
+ { value: false, source: :default_false }
72
+ end
73
+
74
+ def normalized_overrides(config)
75
+ config = normalize_hash(config)
76
+ caps_key = config.key?(:capabilities) ? :capabilities : 'capabilities'
77
+ overrides = normalized_booleans(config[caps_key])
78
+ BOOLEAN_ALIASES.each do |key, capability|
79
+ value = config[key]
80
+ value = config[key.to_s] if value.nil?
81
+ next unless [true, false].include?(value)
82
+ next if overrides.key?(capability)
83
+
84
+ overrides[capability] = value
85
+ end
86
+ overrides
87
+ end
88
+
89
+ def normalized_booleans(value)
90
+ normalize_hash(value).each_with_object({}) do |(key, raw), result|
91
+ capability = key.to_s.downcase.tr('-', '_').to_sym
92
+ next unless OPTIONAL_CAPABILITIES.include?(capability)
93
+ next unless [true, false].include?(raw)
94
+
95
+ result[capability] = raw
96
+ end
97
+ end
98
+
99
+ def normalize_hash(value)
100
+ return {} unless value.respond_to?(:to_h)
101
+
102
+ value.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -22,6 +22,10 @@ module Legion
22
22
  option_keys.dup
23
23
  end
24
24
 
25
+ def register_provider_options(keys)
26
+ Array(keys).each { |key| option(key.to_sym) }
27
+ end
28
+
25
29
  private
26
30
 
27
31
  def option_keys = @option_keys ||= []
@@ -54,6 +54,8 @@ module Legion
54
54
 
55
55
  # Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
56
56
  class ErrorMiddleware < Faraday::Middleware
57
+ extend Legion::Logging::Helper
58
+
57
59
  STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
58
60
 
59
61
  def initialize(app, options = {})
@@ -76,7 +76,12 @@ module Legion
76
76
  def format_openai_tool_calls(tool_calls)
77
77
  return nil unless tool_calls&.any?
78
78
 
79
- tool_calls.values.map do |tool_call|
79
+ # Array is the canonical shape (per canonical/message.rb); Hash
80
+ # is the legacy lex-llm shape (id => ToolCall). Both flow through
81
+ # this renderer depending on caller.
82
+ calls = tool_calls.is_a?(Hash) ? tool_calls.values : Array(tool_calls)
83
+
84
+ calls.map do |tool_call|
80
85
  {
81
86
  id: tool_call.id,
82
87
  type: 'function',
@@ -92,16 +97,12 @@ module Legion
92
97
  return nil if tools.empty?
93
98
 
94
99
  tools.values.map do |tool|
95
- # Tools can be ToolDefinition objects or plain Hashes from native_dispatch.
96
- tool_name = tool.respond_to?(:name) ? tool.name : (tool[:name] || tool['name'])
97
- tool_desc = tool.respond_to?(:description) ? tool.description : (tool[:description] || tool['description'] || '')
98
- tool_params = tool.respond_to?(:params_schema) ? tool.params_schema : (tool[:parameters] || tool['parameters'] || {})
99
100
  {
100
101
  type: 'function',
101
102
  function: {
102
- name: tool_name,
103
- description: tool_desc,
104
- parameters: tool_params || { type: 'object', properties: {} }
103
+ name: Canonical::ToolSchema.tool_name(tool),
104
+ description: Canonical::ToolSchema.tool_description(tool),
105
+ parameters: Canonical::ToolSchema.extract(tool)
105
106
  }
106
107
  }
107
108
  end
@@ -30,6 +30,8 @@ module Legion
30
30
  include Legion::Logging::Helper
31
31
  include Legion::Cache::Helper
32
32
 
33
+ MODEL_DETAIL_CACHE_SCHEMA_VERSION = 2
34
+
33
35
  attr_reader :config, :connection
34
36
 
35
37
  def initialize(config)
@@ -137,7 +139,7 @@ module Legion
137
139
  parse_list_models_response response, slug, capabilities
138
140
  end
139
141
 
140
- def discover_offerings(live: false, **filters)
142
+ def discover_offerings(live: false, raise_on_unreachable: false, **filters)
141
143
  return filter_cached_offerings(Array(@cached_offerings), filters) unless live
142
144
 
143
145
  provider_health = health(live:)
@@ -148,8 +150,10 @@ module Legion
148
150
  offering_from_model(model, health: provider_health)
149
151
  end
150
152
  @cached_offerings
151
- rescue Faraday::ConnectionFailed => e
153
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
152
154
  log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
155
+ raise if raise_on_unreachable
156
+
153
157
  []
154
158
  end
155
159
 
@@ -224,9 +228,16 @@ module Legion
224
228
  end
225
229
 
226
230
  def cache_enabled?
227
- return false unless config.respond_to?(:llm_cache_enabled)
231
+ explicit = config.llm_cache_enabled if config.respond_to?(:llm_cache_enabled)
232
+
233
+ unless explicit.nil?
234
+ log.debug { "[#{slug}] cache_enabled? source=per_provider value=#{explicit}" }
235
+ return explicit == true
236
+ end
228
237
 
229
- config.llm_cache_enabled == true
238
+ global = global_prompt_caching_enabled?
239
+ log.debug { "[#{slug}] cache_enabled? source=global value=#{global}" }
240
+ global
230
241
  end
231
242
 
232
243
  def cache_control_prefix_tokens
@@ -528,11 +539,23 @@ module Legion
528
539
 
529
540
  private
530
541
 
542
+ def global_prompt_caching_enabled?
543
+ return false unless defined?(Legion::Settings)
544
+
545
+ Legion::Settings.dig(:llm, :prompt_caching, :enabled) == true
546
+ rescue StandardError
547
+ false
548
+ end
549
+
531
550
  def model_detail_cache_key(model_name)
532
551
  tier = offering_tier
533
552
  instance_key = cache_instance_key
534
553
  cred_fp = credential_cache_fragment
535
- key_parts = ['model_info', tier, slug, instance_key, cred_fp, model_name].compact
554
+ key_parts = [
555
+ 'model_info',
556
+ "schema#{MODEL_DETAIL_CACHE_SCHEMA_VERSION}",
557
+ tier, slug, instance_key, cred_fp, model_name
558
+ ].compact
536
559
  key_parts.join('.')
537
560
  end
538
561
 
@@ -555,6 +578,10 @@ module Legion
555
578
  end
556
579
 
557
580
  def offering_from_model(model, health: {})
581
+ capability_sources = Array(model.capabilities).to_h do |cap|
582
+ [cap.to_sym, { value: true, source: :model_metadata }]
583
+ end
584
+
558
585
  Routing::ModelOffering.new(
559
586
  provider_family: slug.to_sym,
560
587
  provider_instance: model.instance || provider_instance_id,
@@ -565,6 +592,7 @@ module Legion
565
592
  model_family: model.family,
566
593
  usage_type: offering_usage_type(model),
567
594
  capabilities: model.capabilities,
595
+ capability_sources: capability_sources,
568
596
  limits: offering_limits(model),
569
597
  health:,
570
598
  metadata: offering_metadata(model)
@@ -11,10 +11,19 @@ module Legion
11
11
  embed: [%i[keyreq text], %i[keyreq model]],
12
12
  image: [%i[keyreq prompt], %i[keyreq model]],
13
13
  list_models: [%i[key live], %i[keyrest filters]],
14
- discover_offerings: [%i[key live], %i[keyrest filters]],
14
+ discover_offerings: [%i[key live], %i[key raise_on_unreachable], %i[keyrest filters]],
15
15
  health: [%i[key live]],
16
16
  count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
17
17
  }.freeze
18
+
19
+ # Tools passed to chat/stream_chat must support Canonical::ToolDefinition objects.
20
+ # Providers must not crash on Data.define instances (not Hashes).
21
+ TOOL_SUPPORT_CONTRACT = <<~DOC
22
+ - chat and stream_chat accept keyword `tools:` (Hash<name, tool_object>)
23
+ - tools may be Canonical::ToolDefinition, Hash, or legacy Lex::Llm::Tool
24
+ - Renderers must use Canonical::ToolSchema.extract(tool) for schema access
25
+ - discover_offerings(live: true, raise_on_unreachable: true) raises on transport failure
26
+ DOC
18
27
  end
19
28
  end
20
29
  end
@@ -16,8 +16,8 @@ module Legion
16
16
  }.freeze
17
17
 
18
18
  attr_reader :offering_id, :provider_family, :model_family, :provider_instance, :instance_id, :transport,
19
- :tier, :model, :canonical_model_alias, :routing_metadata, :usage_type, :capabilities, :limits,
20
- :credentials, :health, :cost, :policy_tags, :metadata
19
+ :tier, :model, :canonical_model_alias, :routing_metadata, :usage_type, :capabilities,
20
+ :capability_sources, :limits, :credentials, :health, :cost, :policy_tags, :metadata
21
21
 
22
22
  def initialize(data)
23
23
  @metadata = normalize_hash(fetch_value(data, :metadata))
@@ -37,6 +37,7 @@ module Legion
37
37
  fetch_value(data, :kind) ||
38
38
  infer_usage_type(data)))
39
39
  @capabilities = normalize_capabilities(fetch_value(data, :capabilities))
40
+ @capability_sources = normalize_capability_sources(fetch_value(data, :capability_sources))
40
41
  @limits = normalize_hash(fetch_value(data, :limits))
41
42
  @credentials = fetch_value(data, :credentials)
42
43
  @health = normalize_hash(fetch_value(data, :health))
@@ -106,6 +107,7 @@ module Legion
106
107
  routing_metadata: routing_metadata,
107
108
  usage_type: usage_type,
108
109
  capabilities: capabilities,
110
+ capability_sources: capability_sources,
109
111
  limits: limits,
110
112
  credentials: credentials,
111
113
  health: health,
@@ -167,6 +169,16 @@ module Legion
167
169
  end.uniq
168
170
  end
169
171
 
172
+ def normalize_capability_sources(value)
173
+ normalize_hash(value).to_h do |capability, source_data|
174
+ normalized_source = normalize_hash(source_data)
175
+ [
176
+ capability.to_s.downcase.tr('-', '_').to_sym,
177
+ { value: normalized_source[:value], source: normalized_source[:source]&.to_sym }.compact
178
+ ]
179
+ end
180
+ end
181
+
170
182
  def normalize_hash(value)
171
183
  (value || {}).to_h.transform_keys(&:to_sym)
172
184
  end
@@ -57,6 +57,27 @@ module Legion
57
57
  )
58
58
  end
59
59
 
60
+ # Flush any text still held in the untagged-preamble buffer as a final
61
+ # streamed chunk. Without this, short responses that match the
62
+ # untagged-reasoning heuristic (e.g. starting with "I", "The", "Let me")
63
+ # and never hit a double newline are buffered for the entire stream and
64
+ # the caller's block never receives a single delta.
65
+ def flush_pending_chunk
66
+ return nil if @untagged_preamble_buffer.empty?
67
+
68
+ @last_content_delta = +''
69
+ @last_thinking_delta = +''
70
+ flush_pending_untagged_preamble_into_deltas
71
+ return nil if @last_content_delta.empty? && @last_thinking_delta.empty?
72
+
73
+ Chunk.new(
74
+ role: :assistant,
75
+ content: @last_content_delta.empty? ? nil : @last_content_delta,
76
+ thinking: @last_thinking_delta.empty? ? nil : Thinking.build(text: @last_thinking_delta),
77
+ model_id: model_id
78
+ )
79
+ end
80
+
60
81
  def to_message(response)
61
82
  flush_pending_untagged_preamble
62
83
 
@@ -233,6 +254,24 @@ module Legion
233
254
  @untagged_preamble_pending = false
234
255
  end
235
256
 
257
+ # Same as flush_pending_untagged_preamble, but also records the flushed
258
+ # text in the per-chunk delta accumulators so flush_pending_chunk can
259
+ # surface it to the streaming block.
260
+ def flush_pending_untagged_preamble_into_deltas
261
+ content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
262
+ if thinking
263
+ @content << content
264
+ @last_content_delta << content
265
+ @thinking_text << thinking
266
+ @last_thinking_delta << thinking
267
+ else
268
+ @content << @untagged_preamble_buffer
269
+ @last_content_delta << @untagged_preamble_buffer
270
+ end
271
+ @untagged_preamble_buffer = +''
272
+ @untagged_preamble_pending = false
273
+ end
274
+
236
275
  def append_thinking_from_chunk(chunk)
237
276
  thinking = chunk.thinking
238
277
  return unless thinking
@@ -24,6 +24,11 @@ module Legion
24
24
  end
25
25
  end
26
26
 
27
+ # Release any text held by the untagged-preamble heuristic so short
28
+ # responses still stream at least one delta to the caller.
29
+ final_chunk = accumulator.flush_pending_chunk
30
+ block&.call(final_chunk) if final_chunk
31
+
27
32
  message = accumulator.to_message(response)
28
33
  log.debug { "Stream completed: #{message.content}" }
29
34
  message
@@ -31,6 +36,8 @@ module Legion
31
36
 
32
37
  def build_stream_callback(accumulator, block)
33
38
  proc do |chunk|
39
+ next unless chunk
40
+
34
41
  accumulator.add chunk
35
42
  filtered = accumulator.filtered_chunk(chunk)
36
43
  block.call(filtered) if filtered
@@ -39,7 +46,10 @@ module Legion
39
46
 
40
47
  def handle_stream(&block)
41
48
  build_on_data_handler do |data|
42
- block.call(build_chunk(data)) if data.is_a?(Hash)
49
+ next unless data.is_a?(Hash)
50
+
51
+ chunk = build_chunk(data)
52
+ block.call(chunk) if chunk
43
53
  end
44
54
  end
45
55
 
@@ -132,7 +142,30 @@ module Legion
132
142
  end
133
143
  log.warn "[llm][streaming] action=handle_failed_response status=#{status} " \
134
144
  "partial_body=#{buffer.length}b msg=#{partial.inspect}"
135
- raise Legion::Extensions::Llm::ServerError, msg
145
+ raise_streaming_status_error(status, msg)
146
+ end
147
+
148
+ def raise_streaming_status_error(status, message)
149
+ response = Struct.new(:body, :status).new({ 'error' => { 'message' => message } }, status)
150
+ case status
151
+ when 400
152
+ raise Legion::Extensions::Llm::BadRequestError.new(response, message)
153
+ when 401
154
+ raise Legion::Extensions::Llm::UnauthorizedError.new(response, message)
155
+ when 403
156
+ raise Legion::Extensions::Llm::ForbiddenError.new(response, message)
157
+ when 429
158
+ raise Legion::Extensions::Llm::RateLimitError.new(response, message)
159
+ when 500
160
+ raise Legion::Extensions::Llm::ServerError.new(response, message)
161
+ when 502..504
162
+ raise Legion::Extensions::Llm::ServiceUnavailableError.new(response, message)
163
+ when 529
164
+ raise Legion::Extensions::Llm::OverloadedError.new(response, message)
165
+ else
166
+ provider = respond_to?(:parse_error) ? self : nil
167
+ Legion::Extensions::Llm::ErrorMiddleware.parse_error(provider: provider, response: response)
168
+ end
136
169
  end
137
170
 
138
171
  def handle_sse(chunk, parser, env, &)
@@ -183,7 +216,7 @@ module Legion
183
216
  def build_stream_error_response(parsed_data, env, status)
184
217
  error_status = status || env&.status || 500
185
218
 
186
- if faraday_1?
219
+ if faraday_1? || env.nil?
187
220
  Struct.new(:body, :status).new(parsed_data, error_status)
188
221
  else
189
222
  env.merge(body: parsed_data, status: error_status)
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Llm
6
- VERSION = '0.5.0'
6
+ VERSION = '0.5.3'
7
7
  end
8
8
  end
9
9
  end
@@ -36,6 +36,9 @@ module Legion
36
36
  # unqualified constant lookups resolve via Ruby scope. #
37
37
  # ------------------------------------------------------------------ #
38
38
 
39
+ # --- Capability resolution policy (no internal deps) ---
40
+ require_relative 'llm/capability_policy'
41
+
39
42
  # --- Base value objects (no internal deps) ---
40
43
  require_relative 'llm/mime_type'
41
44
  require_relative 'llm/model/info'