lex-llm 0.4.18 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +13 -2
  3. data/B1b-conformance-kit.md +79 -0
  4. data/CHANGELOG.md +27 -0
  5. data/lex-llm.gemspec +2 -3
  6. data/lib/legion/extensions/llm/attachment.rb +1 -1
  7. data/lib/legion/extensions/llm/canonical/chunk.rb +184 -0
  8. data/lib/legion/extensions/llm/canonical/content_block.rb +126 -0
  9. data/lib/legion/extensions/llm/canonical/message.rb +138 -0
  10. data/lib/legion/extensions/llm/canonical/params.rb +61 -0
  11. data/lib/legion/extensions/llm/canonical/request.rb +117 -0
  12. data/lib/legion/extensions/llm/canonical/response.rb +124 -0
  13. data/lib/legion/extensions/llm/canonical/thinking.rb +81 -0
  14. data/lib/legion/extensions/llm/canonical/tool_call.rb +134 -0
  15. data/lib/legion/extensions/llm/canonical/tool_definition.rb +98 -0
  16. data/lib/legion/extensions/llm/canonical/tool_schema.rb +46 -0
  17. data/lib/legion/extensions/llm/canonical/usage.rb +74 -0
  18. data/lib/legion/extensions/llm/canonical.rb +50 -0
  19. data/lib/legion/extensions/llm/chat.rb +3 -5
  20. data/lib/legion/extensions/llm/connection.rb +5 -1
  21. data/lib/legion/extensions/llm/error.rb +5 -7
  22. data/lib/legion/extensions/llm/fleet/envelope_validation.rb +1 -3
  23. data/lib/legion/extensions/llm/fleet/provider_responder.rb +1 -3
  24. data/lib/legion/extensions/llm/fleet/token_validator.rb +1 -3
  25. data/lib/legion/extensions/llm/model/info.rb +4 -6
  26. data/lib/legion/extensions/llm/models.rb +3 -3
  27. data/lib/legion/extensions/llm/provider/open_ai_compatible.rb +9 -4
  28. data/lib/legion/extensions/llm/provider.rb +21 -4
  29. data/lib/legion/extensions/llm/provider_contract.rb +10 -1
  30. data/lib/legion/extensions/llm/routing/lane_key.rb +1 -3
  31. data/lib/legion/extensions/llm/stream_accumulator.rb +40 -1
  32. data/lib/legion/extensions/llm/streaming.rb +13 -5
  33. data/lib/legion/extensions/llm/tool.rb +1 -3
  34. data/lib/legion/extensions/llm/version.rb +1 -1
  35. data/lib/legion/extensions/llm.rb +118 -35
  36. data/spec/fixtures/ruby.mp3 +0 -0
  37. data/spec/fixtures/ruby.mp4 +0 -0
  38. data/spec/fixtures/ruby.png +0 -0
  39. data/spec/fixtures/ruby.txt +1 -0
  40. data/spec/fixtures/ruby.wav +0 -0
  41. data/spec/fixtures/ruby.xml +1 -0
  42. data/spec/fixtures/sample.pdf +0 -0
  43. data/spec/legion/extensions/llm/agent_spec.rb +179 -0
  44. data/spec/legion/extensions/llm/attachment_spec.rb +25 -0
  45. data/spec/legion/extensions/llm/auto_registration_spec.rb +38 -0
  46. data/spec/legion/extensions/llm/canonical/chunk_spec.rb +285 -0
  47. data/spec/legion/extensions/llm/canonical/content_block_spec.rb +179 -0
  48. data/spec/legion/extensions/llm/canonical/message_spec.rb +203 -0
  49. data/spec/legion/extensions/llm/canonical/params_spec.rb +159 -0
  50. data/spec/legion/extensions/llm/canonical/request_spec.rb +174 -0
  51. data/spec/legion/extensions/llm/canonical/response_spec.rb +234 -0
  52. data/spec/legion/extensions/llm/canonical/thinking_spec.rb +151 -0
  53. data/spec/legion/extensions/llm/canonical/tool_call_spec.rb +191 -0
  54. data/spec/legion/extensions/llm/canonical/tool_definition_spec.rb +221 -0
  55. data/spec/legion/extensions/llm/canonical/tool_schema_spec.rb +83 -0
  56. data/spec/legion/extensions/llm/canonical/usage_spec.rb +178 -0
  57. data/spec/legion/extensions/llm/configuration_spec.rb +38 -0
  58. data/spec/legion/extensions/llm/conformance/client_translator_examples.rb +432 -0
  59. data/spec/legion/extensions/llm/conformance/conformance.rb +51 -0
  60. data/spec/legion/extensions/llm/conformance/echo_translator.rb +56 -0
  61. data/spec/legion/extensions/llm/conformance/echo_translator_spec.rb +13 -0
  62. data/spec/legion/extensions/llm/conformance/fixtures/canonical_empty_response.json +13 -0
  63. data/spec/legion/extensions/llm/conformance/fixtures/canonical_error_response.json +19 -0
  64. data/spec/legion/extensions/llm/conformance/fixtures/canonical_fleet_round_trip.json +81 -0
  65. data/spec/legion/extensions/llm/conformance/fixtures/canonical_metering_audit_events.json +101 -0
  66. data/spec/legion/extensions/llm/conformance/fixtures/canonical_params_mapping_request.json +21 -0
  67. data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_continuation_request.json +43 -0
  68. data/spec/legion/extensions/llm/conformance/fixtures/canonical_server_tool_use_response.json +29 -0
  69. data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_request.json +13 -0
  70. data/spec/legion/extensions/llm/conformance/fixtures/canonical_simple_text_response.json +13 -0
  71. data/spec/legion/extensions/llm/conformance/fixtures/canonical_stop_reason_matrix.json +36 -0
  72. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_accumulated_response.json +20 -0
  73. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_error_chunks.json +26 -0
  74. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_server_tool_chunks.json +52 -0
  75. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_text_chunks.json +33 -0
  76. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_thinking_chunks.json +42 -0
  77. data/spec/legion/extensions/llm/conformance/fixtures/canonical_streaming_tool_call_chunks.json +41 -0
  78. data/spec/legion/extensions/llm/conformance/fixtures/canonical_system_prompt_request.json +14 -0
  79. data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_request.json +18 -0
  80. data/spec/legion/extensions/llm/conformance/fixtures/canonical_thinking_response.json +17 -0
  81. data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_results_continuation_request.json +75 -0
  82. data/spec/legion/extensions/llm/conformance/fixtures/canonical_tool_use_response.json +25 -0
  83. data/spec/legion/extensions/llm/conformance/fixtures/canonical_tools_request.json +34 -0
  84. data/spec/legion/extensions/llm/conformance/provider_tool_rendering_examples.rb +77 -0
  85. data/spec/legion/extensions/llm/conformance/provider_translator_examples.rb +390 -0
  86. data/spec/legion/extensions/llm/connection_logging_spec.rb +53 -0
  87. data/spec/legion/extensions/llm/connection_retry_spec.rb +36 -0
  88. data/spec/legion/extensions/llm/context_spec.rb +127 -0
  89. data/spec/legion/extensions/llm/credential_sources_spec.rb +468 -0
  90. data/spec/legion/extensions/llm/error_middleware_spec.rb +102 -0
  91. data/spec/legion/extensions/llm/error_spec.rb +87 -0
  92. data/spec/legion/extensions/llm/fleet/provider_responder_spec.rb +120 -0
  93. data/spec/legion/extensions/llm/fleet/token_validator_spec.rb +163 -0
  94. data/spec/legion/extensions/llm/fleet/worker_execution_spec.rb +128 -0
  95. data/spec/legion/extensions/llm/fleet_messages_spec.rb +402 -0
  96. data/spec/legion/extensions/llm/gemspec_spec.rb +25 -0
  97. data/spec/legion/extensions/llm/message_spec.rb +64 -0
  98. data/spec/legion/extensions/llm/model/info_spec.rb +222 -0
  99. data/spec/legion/extensions/llm/models_spec.rb +104 -0
  100. data/spec/legion/extensions/llm/provider/open_ai_compatible_spec.rb +203 -0
  101. data/spec/legion/extensions/llm/provider/open_ai_compatible_tool_calls_array_spec.rb +68 -0
  102. data/spec/legion/extensions/llm/provider_contract_spec.rb +60 -0
  103. data/spec/legion/extensions/llm/provider_settings_spec.rb +76 -0
  104. data/spec/legion/extensions/llm/provider_spec.rb +613 -0
  105. data/spec/legion/extensions/llm/registry_event_builder_spec.rb +68 -0
  106. data/spec/legion/extensions/llm/registry_publisher_spec.rb +22 -0
  107. data/spec/legion/extensions/llm/responses/response_objects_spec.rb +75 -0
  108. data/spec/legion/extensions/llm/responses/thinking_extractor_spec.rb +75 -0
  109. data/spec/legion/extensions/llm/routing/model_offering_spec.rb +222 -0
  110. data/spec/legion/extensions/llm/routing/offering_registry_spec.rb +50 -0
  111. data/spec/legion/extensions/llm/routing/registry_event_spec.rb +120 -0
  112. data/spec/legion/extensions/llm/stream_accumulator_spec.rb +155 -0
  113. data/spec/legion/extensions/llm/streaming_spec.rb +108 -0
  114. data/spec/legion/extensions/llm/tool_spec.rb +94 -0
  115. data/spec/legion/extensions/llm/transport/fleet_lane_spec.rb +60 -0
  116. data/spec/legion/extensions/llm/utils_spec.rb +113 -0
  117. data/spec/legion/extensions/llm_base_contract_spec.rb +110 -0
  118. data/spec/legion/extensions/llm_extension_spec.rb +78 -0
  119. data/spec/legion/extensions/llm_root_spec.rb +51 -0
  120. data/spec/spec_helper.rb +24 -0
  121. data/spec/support/fake_llm_provider.rb +148 -0
  122. data/spec/support/llm_configuration.rb +21 -0
  123. data/spec/support/rspec_configuration.rb +19 -0
  124. data/spec/support/simplecov_configuration.rb +20 -0
  125. metadata +103 -15
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ # -- from_hash normalization is intentional
4
+ module Legion
5
+ module Extensions
6
+ module Llm
7
+ module Canonical
8
+ # rubocop:disable Lint/ConstantDefinitionInBlock -- required for Data.define block scope
9
+ # Canonical usage/metering data for a response.
10
+ # Ports field vocabulary from lex-llm Tokens and legion-llm Types.
11
+ # Includes non-token units extension point per G20b.
12
+ Usage = ::Data.define(
13
+ :input_tokens, :output_tokens, :cache_read_tokens, :cache_write_tokens,
14
+ :thinking_tokens, :units
15
+ ) do
16
+ USAGE_KNOWN_KEYS = %i[input_tokens output_tokens cache_read_tokens cache_write_tokens
17
+ thinking_tokens units].freeze
18
+
19
+ # Build from a Hash (raw provider response or deserialized wire payload).
20
+ # Accepts both canonical key names and legacy provider spellings.
21
+ def self.from_hash(source)
22
+ return nil if source.nil? || source.empty?
23
+
24
+ h = source.transform_keys(&:to_sym)
25
+
26
+ # Normalize legacy key names
27
+ h[:input_tokens] ||= h.delete(:input) || h.delete(:prompt_tokens)
28
+ h[:output_tokens] ||= h.delete(:output) || h.delete(:completion_tokens)
29
+ h[:cache_read_tokens] ||= h.delete(:cached) || h.delete(:cache_read)
30
+ h[:cache_write_tokens] ||= h.delete(:cache_creation) || h.delete(:cache_write)
31
+ h[:thinking_tokens] ||= h.delete(:thinking) || h.delete(:reasoning)
32
+
33
+ # Extract nested details (OpenAI prompt_tokens_details / input_tokens_details)
34
+ h[:cache_read_tokens] ||= dig_nested(h, :prompt_tokens_details, :cached_tokens) ||
35
+ dig_nested(h, :input_tokens_details, :cached_tokens)
36
+ h[:thinking_tokens] ||= dig_nested(h, :completion_tokens_details, :reasoning_tokens) ||
37
+ dig_nested(h, :output_tokens_details, :reasoning_tokens)
38
+
39
+ # Extract units (non-token extension point — G20b)
40
+ units = h.delete(:units) || {}
41
+
42
+ new(
43
+ input_tokens: h[:input_tokens],
44
+ output_tokens: h[:output_tokens],
45
+ cache_read_tokens: h[:cache_read_tokens],
46
+ cache_write_tokens: h[:cache_write_tokens],
47
+ thinking_tokens: h[:thinking_tokens],
48
+ units: units
49
+ )
50
+ end
51
+
52
+ def self.dig_nested(hash, details_key, value_key)
53
+ details = hash[details_key]
54
+ return nil unless details.is_a?(Hash)
55
+
56
+ details[value_key] || details[value_key.to_s]
57
+ end
58
+
59
+ # Serialize to a Hash for AMQP/fleet/wire transport.
60
+ def to_h
61
+ super.compact
62
+ end
63
+
64
+ # Total tokens across all categories.
65
+ def total_tokens
66
+ [input_tokens, output_tokens, cache_read_tokens, cache_write_tokens,
67
+ thinking_tokens].compact.sum
68
+ end
69
+ end
70
+ # rubocop:enable Lint/ConstantDefinitionInBlock
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'canonical/thinking'
4
+ require_relative 'canonical/usage'
5
+ require_relative 'canonical/params'
6
+ require_relative 'canonical/content_block'
7
+ require_relative 'canonical/tool_definition'
8
+ require_relative 'canonical/tool_schema'
9
+ require_relative 'canonical/tool_call'
10
+ require_relative 'canonical/message'
11
+ require_relative 'canonical/request'
12
+ require_relative 'canonical/response'
13
+ require_relative 'canonical/chunk'
14
+
15
+ module Legion
16
+ module Extensions
17
+ module Llm
18
+ # Canonical types for the N×N client→provider routing architecture.
19
+ #
20
+ # These Data.define structs form the single contract between client translators
21
+ # and provider translators. Per Amendment A: immutable, strict factories,
22
+ # enum validation, unknown keys → metadata.
23
+ #
24
+ # Contract version: incremented on any breaking change to the canonical shape.
25
+ # Provider registration refuses gems built against a mismatched version (G7).
26
+ module Canonical
27
+ CONTRACT_VERSION = '1.0.0'
28
+
29
+ # Available canonical types.
30
+ TYPES = %i[
31
+ Thinking Usage Params ContentBlock
32
+ ToolDefinition ToolCall Message
33
+ Request Response Chunk
34
+ ].freeze
35
+
36
+ class << self
37
+ # List all canonical type classes.
38
+ def types
39
+ TYPES.map { |name| const_get(name) }
40
+ end
41
+
42
+ # Check if a given constant name is a registered canonical type.
43
+ def type?(name)
44
+ TYPES.include?(name.to_sym)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -11,9 +11,7 @@ module Legion
11
11
  attr_reader :model, :messages, :tools, :tool_prefs, :params, :headers, :schema
12
12
 
13
13
  def initialize(model: nil, provider: nil, assume_model_exists: false, context: nil)
14
- if assume_model_exists && !provider
15
- raise ArgumentError, 'Provider must be specified if assume_model_exists is true'
16
- end
14
+ raise ArgumentError, 'Provider must be specified if assume_model_exists is true' if assume_model_exists && !provider
17
15
 
18
16
  @context = context
19
17
  @config = context&.config || Legion::Extensions::Llm.config
@@ -139,7 +137,7 @@ module Legion
139
137
  messages.each(&)
140
138
  end
141
139
 
142
- def complete(&) # rubocop:disable Metrics/PerceivedComplexity
140
+ def complete(&)
143
141
  response = @provider.complete(
144
142
  messages,
145
143
  tools: @tools,
@@ -234,7 +232,7 @@ module Legion
234
232
  end
235
233
  end
236
234
 
237
- def handle_tool_calls(response, &) # rubocop:disable Metrics/PerceivedComplexity
235
+ def handle_tool_calls(response, &)
238
236
  halt_result = nil
239
237
 
240
238
  response.tool_calls.each_value do |tool_call|
@@ -77,9 +77,13 @@ module Legion
77
77
 
78
78
  def setup_logging(faraday)
79
79
  logger = faraday_logger
80
+ # Enable request body logging when the logger is at DEBUG level,
81
+ # or when explicitly enabled via fleet request_payload setting.
82
+ request_payload = Legion::Extensions::Llm.default_settings.dig(:fleet, :request, :logger, :request_payload)
83
+ bodies_enabled = request_payload == true || debug_logger?(logger)
80
84
  faraday.response :logger,
81
85
  logger,
82
- bodies: debug_logger?(logger),
86
+ bodies: bodies_enabled,
83
87
  errors: false,
84
88
  headers: false,
85
89
  log_level: :debug do |logger|
@@ -54,6 +54,8 @@ module Legion
54
54
 
55
55
  # Faraday middleware that maps provider-specific API errors to Legion::Extensions::Llm errors.
56
56
  class ErrorMiddleware < Faraday::Middleware
57
+ extend Legion::Logging::Helper
58
+
57
59
  STREAM_ERROR_BODY_KEY = :legion_llm_stream_error_body
58
60
 
59
61
  def initialize(app, options = {})
@@ -80,7 +82,7 @@ module Legion
80
82
  /reduce the length of messages/i
81
83
  ].freeze
82
84
 
83
- def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplexity
85
+ def parse_error(provider:, response:)
84
86
  response = response_with_stream_error_body(response)
85
87
  message = provider&.parse_error(response)
86
88
 
@@ -88,9 +90,7 @@ module Legion
88
90
  when 200..399
89
91
  message
90
92
  when 400
91
- if context_length_exceeded?(message)
92
- raise ContextLengthExceededError.new(response, message || 'Context length exceeded')
93
- end
93
+ raise ContextLengthExceededError.new(response, message || 'Context length exceeded') if context_length_exceeded?(message)
94
94
 
95
95
  raise BadRequestError.new(response, message || 'Invalid request - please check your input')
96
96
  when 401
@@ -101,9 +101,7 @@ module Legion
101
101
  raise ForbiddenError.new(response,
102
102
  message || 'Forbidden - you do not have permission to access this resource')
103
103
  when 429
104
- if context_length_exceeded?(message)
105
- raise ContextLengthExceededError.new(response, message || 'Context length exceeded')
106
- end
104
+ raise ContextLengthExceededError.new(response, message || 'Context length exceeded') if context_length_exceeded?(message)
107
105
 
108
106
  raise RateLimitError.new(response, message || 'Rate limit exceeded - please wait a moment')
109
107
  when 500
@@ -14,9 +14,7 @@ module Legion
14
14
 
15
15
  def reject_legacy_options!
16
16
  LEGACY_OPTIONS.each do |key|
17
- if @options.key?(key) || @options.key?(key.to_s)
18
- raise ArgumentError, "#{key} is not supported by fleet protocol v2"
19
- end
17
+ raise ArgumentError, "#{key} is not supported by fleet protocol v2" if @options.key?(key) || @options.key?(key.to_s)
20
18
  end
21
19
  end
22
20
 
@@ -119,9 +119,7 @@ module Legion
119
119
  raise ConfigurationError,
120
120
  "fleet provider instance is not configured: #{instance_id}"
121
121
  end
122
- unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
123
- raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}"
124
- end
122
+ raise ConfigurationError, "fleet responses are disabled for provider instance: #{instance_id}" unless truthy?(dig(instance_settings, :fleet, :respond_to_requests))
125
123
 
126
124
  provider_class.new(deep_symbolize(instance_settings))
127
125
  end
@@ -186,9 +186,7 @@ module Legion
186
186
  end
187
187
 
188
188
  def signing_key
189
- if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
190
- return ::Legion::Crypt.cluster_secret
191
- end
189
+ return ::Legion::Crypt.cluster_secret if defined?(::Legion::Crypt) && ::Legion::Crypt.respond_to?(:cluster_secret)
192
190
 
193
191
  raise TokenError, 'no signing key available - Legion::Crypt not initialized'
194
192
  rescue TokenError
@@ -19,7 +19,7 @@ module Legion
19
19
  :parameter_size, :quantization, :size_bytes,
20
20
  :modalities_input, :modalities_output, :metadata
21
21
  ) do
22
- # rubocop:disable Metrics/ParameterLists, Metrics/PerceivedComplexity
22
+ # rubocop:disable Metrics/ParameterLists
23
23
  def initialize(
24
24
  id:, name: nil, provider: nil, instance: :default,
25
25
  family: nil, capabilities: [], context_length: nil,
@@ -46,7 +46,7 @@ module Legion
46
46
  metadata: metadata.is_a?(Hash) ? metadata : {}
47
47
  )
48
48
  end
49
- # rubocop:enable Metrics/ParameterLists, Metrics/PerceivedComplexity
49
+ # rubocop:enable Metrics/ParameterLists
50
50
 
51
51
  # ── Capability predicates ─────────────────────────────────────
52
52
 
@@ -206,11 +206,9 @@ module Legion
206
206
  class << self
207
207
  private
208
208
 
209
- def extract_modalities(data) # rubocop:disable Metrics/PerceivedComplexity
209
+ def extract_modalities(data)
210
210
  # New-style keys take priority (round-trip from to_h)
211
- if data.key?(:modalities_input) || data.key?(:modalities_output)
212
- return [Array(data[:modalities_input]), Array(data[:modalities_output])]
213
- end
211
+ return [Array(data[:modalities_input]), Array(data[:modalities_output])] if data.key?(:modalities_input) || data.key?(:modalities_output)
214
212
 
215
213
  # Legacy: modalities is a hash or Modalities object
216
214
  modalities_data = data[:modalities]
@@ -123,7 +123,7 @@ module Legion
123
123
  fetch_provider_models(remote_only: remote_only)[:models]
124
124
  end
125
125
 
126
- def resolve(model_id, provider: nil, assume_exists: false, config: nil) # rubocop:disable Metrics/PerceivedComplexity
126
+ def resolve(model_id, provider: nil, assume_exists: false, config: nil)
127
127
  config ||= Legion::Extensions::Llm.config
128
128
  provider_class = provider ? resolve_provider_class(provider) : nil
129
129
 
@@ -168,7 +168,7 @@ module Legion
168
168
  instance.respond_to?(method, include_private) || super
169
169
  end
170
170
 
171
- def fetch_models_dev_models(existing_models) # rubocop:disable Metrics/PerceivedComplexity
171
+ def fetch_models_dev_models(existing_models)
172
172
  log.info 'Fetching models from models.dev API...'
173
173
 
174
174
  connection = Connection.basic do |f|
@@ -300,7 +300,7 @@ module Legion
300
300
  end
301
301
  end
302
302
 
303
- def add_provider_metadata(models_dev_model, provider_model) # rubocop:disable Metrics/PerceivedComplexity
303
+ def add_provider_metadata(models_dev_model, provider_model)
304
304
  data = models_dev_model.to_h
305
305
  data[:name] = provider_model.name if blank_value?(data[:name])
306
306
  data[:family] = provider_model.family if blank_value?(data[:family])
@@ -76,7 +76,12 @@ module Legion
76
76
  def format_openai_tool_calls(tool_calls)
77
77
  return nil unless tool_calls&.any?
78
78
 
79
- tool_calls.values.map do |tool_call|
79
+ # Array is the canonical shape (per canonical/message.rb); Hash
80
+ # is the legacy lex-llm shape (id => ToolCall). Both flow through
81
+ # this renderer depending on caller.
82
+ calls = tool_calls.is_a?(Hash) ? tool_calls.values : Array(tool_calls)
83
+
84
+ calls.map do |tool_call|
80
85
  {
81
86
  id: tool_call.id,
82
87
  type: 'function',
@@ -95,9 +100,9 @@ module Legion
95
100
  {
96
101
  type: 'function',
97
102
  function: {
98
- name: tool.name,
99
- description: tool.description,
100
- parameters: tool.params_schema || { type: 'object', properties: {} }
103
+ name: Canonical::ToolSchema.tool_name(tool),
104
+ description: Canonical::ToolSchema.tool_description(tool),
105
+ parameters: Canonical::ToolSchema.extract(tool)
101
106
  }
102
107
  }
103
108
  end
@@ -137,7 +137,7 @@ module Legion
137
137
  parse_list_models_response response, slug, capabilities
138
138
  end
139
139
 
140
- def discover_offerings(live: false, **filters)
140
+ def discover_offerings(live: false, raise_on_unreachable: false, **filters)
141
141
  return filter_cached_offerings(Array(@cached_offerings), filters) unless live
142
142
 
143
143
  provider_health = health(live:)
@@ -148,8 +148,10 @@ module Legion
148
148
  offering_from_model(model, health: provider_health)
149
149
  end
150
150
  @cached_offerings
151
- rescue Faraday::ConnectionFailed => e
151
+ rescue Faraday::ConnectionFailed, Faraday::TimeoutError => e
152
152
  log.warn("[#{slug}] instance=#{provider_instance_id} unreachable: #{e.message}")
153
+ raise if raise_on_unreachable
154
+
153
155
  []
154
156
  end
155
157
 
@@ -224,9 +226,16 @@ module Legion
224
226
  end
225
227
 
226
228
  def cache_enabled?
227
- return false unless config.respond_to?(:llm_cache_enabled)
229
+ explicit = config.llm_cache_enabled if config.respond_to?(:llm_cache_enabled)
230
+
231
+ unless explicit.nil?
232
+ log.debug { "[#{slug}] cache_enabled? source=per_provider value=#{explicit}" }
233
+ return explicit == true
234
+ end
228
235
 
229
- config.llm_cache_enabled == true
236
+ global = global_prompt_caching_enabled?
237
+ log.debug { "[#{slug}] cache_enabled? source=global value=#{global}" }
238
+ global
230
239
  end
231
240
 
232
241
  def cache_control_prefix_tokens
@@ -528,6 +537,14 @@ module Legion
528
537
 
529
538
  private
530
539
 
540
+ def global_prompt_caching_enabled?
541
+ return false unless defined?(Legion::Settings)
542
+
543
+ Legion::Settings.dig(:llm, :prompt_caching, :enabled) == true
544
+ rescue StandardError
545
+ false
546
+ end
547
+
531
548
  def model_detail_cache_key(model_name)
532
549
  tier = offering_tier
533
550
  instance_key = cache_instance_key
@@ -11,10 +11,19 @@ module Legion
11
11
  embed: [%i[keyreq text], %i[keyreq model]],
12
12
  image: [%i[keyreq prompt], %i[keyreq model]],
13
13
  list_models: [%i[key live], %i[keyrest filters]],
14
- discover_offerings: [%i[key live], %i[keyrest filters]],
14
+ discover_offerings: [%i[key live], %i[key raise_on_unreachable], %i[keyrest filters]],
15
15
  health: [%i[key live]],
16
16
  count_tokens: [%i[keyreq messages], %i[keyreq model], %i[key params]]
17
17
  }.freeze
18
+
19
+ # Tools passed to chat/stream_chat must support Canonical::ToolDefinition objects.
20
+ # Providers must not crash on Data.define instances (not Hashes).
21
+ TOOL_SUPPORT_CONTRACT = <<~DOC
22
+ - chat and stream_chat accept keyword `tools:` (Hash<name, tool_object>)
23
+ - tools may be Canonical::ToolDefinition, Hash, or legacy Lex::Llm::Tool
24
+ - Renderers must use Canonical::ToolSchema.extract(tool) for schema access
25
+ - discover_offerings(live: true, raise_on_unreachable: true) raises on transport failure
26
+ DOC
18
27
  end
19
28
  end
20
29
  end
@@ -10,9 +10,7 @@ module Legion
10
10
 
11
11
  def for(offering, prefix: 'llm.fleet', include_context: true, include_fingerprint: false)
12
12
  parts = [prefix, lane_kind(offering), model_slug(lane_model(offering))]
13
- if include_context && offering.inference? && offering.context_window
14
- parts << "ctx#{offering.context_window}"
15
- end
13
+ parts << "ctx#{offering.context_window}" if include_context && offering.inference? && offering.context_window
16
14
  parts.push('elig', eligibility_fingerprint(offering)) if include_fingerprint
17
15
  parts.join('.')
18
16
  end
@@ -39,7 +39,7 @@ module Legion
39
39
  log.debug { inspect } if Legion::Extensions::Llm.config.log_stream_debug
40
40
  end
41
41
 
42
- def filtered_chunk(chunk) # rubocop:disable Metrics/PerceivedComplexity
42
+ def filtered_chunk(chunk)
43
43
  has_content = !@last_content_delta.empty?
44
44
  has_thinking = !@last_thinking_delta.empty?
45
45
  has_tokens = chunk.input_tokens&.positive? || chunk.output_tokens&.positive?
@@ -57,6 +57,27 @@ module Legion
57
57
  )
58
58
  end
59
59
 
60
+ # Flush any text still held in the untagged-preamble buffer as a final
61
+ # streamed chunk. Without this, short responses that match the
62
+ # untagged-reasoning heuristic (e.g. starting with "I", "The", "Let me")
63
+ # and never hit a double newline are buffered for the entire stream and
64
+ # the caller's block never receives a single delta.
65
+ def flush_pending_chunk
66
+ return nil if @untagged_preamble_buffer.empty?
67
+
68
+ @last_content_delta = +''
69
+ @last_thinking_delta = +''
70
+ flush_pending_untagged_preamble_into_deltas
71
+ return nil if @last_content_delta.empty? && @last_thinking_delta.empty?
72
+
73
+ Chunk.new(
74
+ role: :assistant,
75
+ content: @last_content_delta.empty? ? nil : @last_content_delta,
76
+ thinking: @last_thinking_delta.empty? ? nil : Thinking.build(text: @last_thinking_delta),
77
+ model_id: model_id
78
+ )
79
+ end
80
+
60
81
  def to_message(response)
61
82
  flush_pending_untagged_preamble
62
83
 
@@ -233,6 +254,24 @@ module Legion
233
254
  @untagged_preamble_pending = false
234
255
  end
235
256
 
257
+ # Same as flush_pending_untagged_preamble, but also records the flushed
258
+ # text in the per-chunk delta accumulators so flush_pending_chunk can
259
+ # surface it to the streaming block.
260
+ def flush_pending_untagged_preamble_into_deltas
261
+ content, thinking = Responses::ThinkingExtractor.extract_untagged_preamble(@untagged_preamble_buffer)
262
+ if thinking
263
+ @content << content
264
+ @last_content_delta << content
265
+ @thinking_text << thinking
266
+ @last_thinking_delta << thinking
267
+ else
268
+ @content << @untagged_preamble_buffer
269
+ @last_content_delta << @untagged_preamble_buffer
270
+ end
271
+ @untagged_preamble_buffer = +''
272
+ @untagged_preamble_pending = false
273
+ end
274
+
236
275
  def append_thinking_from_chunk(chunk)
237
276
  thinking = chunk.thinking
238
277
  return unless thinking
@@ -16,9 +16,7 @@ module Legion
16
16
  response = connection.post stream_url, payload do |req|
17
17
  req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
18
18
  on_chunk = build_stream_callback(accumulator, block)
19
- if Legion::Extensions::Llm.config.log_stream_debug
20
- log.debug { "Stream callback prepared: #{on_chunk.inspect}" }
21
- end
19
+ log.debug { "Stream callback prepared: #{on_chunk.inspect}" } if Legion::Extensions::Llm.config.log_stream_debug
22
20
  if faraday_1?
23
21
  req.options[:on_data] = handle_stream(&on_chunk)
24
22
  else
@@ -26,6 +24,11 @@ module Legion
26
24
  end
27
25
  end
28
26
 
27
+ # Release any text held by the untagged-preamble heuristic so short
28
+ # responses still stream at least one delta to the caller.
29
+ final_chunk = accumulator.flush_pending_chunk
30
+ block&.call(final_chunk) if final_chunk
31
+
29
32
  message = accumulator.to_message(response)
30
33
  log.debug { "Stream completed: #{message.content}" }
31
34
  message
@@ -33,6 +36,8 @@ module Legion
33
36
 
34
37
  def build_stream_callback(accumulator, block)
35
38
  proc do |chunk|
39
+ next unless chunk
40
+
36
41
  accumulator.add chunk
37
42
  filtered = accumulator.filtered_chunk(chunk)
38
43
  block.call(filtered) if filtered
@@ -41,7 +46,10 @@ module Legion
41
46
 
42
47
  def handle_stream(&block)
43
48
  build_on_data_handler do |data|
44
- block.call(build_chunk(data)) if data.is_a?(Hash)
49
+ next unless data.is_a?(Hash)
50
+
51
+ chunk = build_chunk(data)
52
+ block.call(chunk) if chunk
45
53
  end
46
54
  end
47
55
 
@@ -185,7 +193,7 @@ module Legion
185
193
  def build_stream_error_response(parsed_data, env, status)
186
194
  error_status = status || env&.status || 500
187
195
 
188
- if faraday_1?
196
+ if faraday_1? || env.nil?
189
197
  Struct.new(:body, :status).new(parsed_data, error_status)
190
198
  else
191
199
  env.merge(body: parsed_data, status: error_status)
@@ -235,9 +235,7 @@ module Legion
235
235
  def resolve_direct_schema(schema)
236
236
  return extract_schema(schema.to_json_schema) if schema.respond_to?(:to_json_schema)
237
237
  return Legion::Extensions::Llm::Utils.deep_dup(schema) if schema.is_a?(Hash)
238
- if schema.is_a?(Class) && schema.method_defined?(:to_json_schema)
239
- return extract_schema(schema.new.to_json_schema)
240
- end
238
+ return extract_schema(schema.new.to_json_schema) if schema.is_a?(Class) && schema.method_defined?(:to_json_schema)
241
239
 
242
240
  nil
243
241
  end
@@ -3,7 +3,7 @@
3
3
  module Legion
4
4
  module Extensions
5
5
  module Llm
6
- VERSION = '0.4.18'
6
+ VERSION = '0.5.1'
7
7
  end
8
8
  end
9
9
  end