llm_gateway 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.pi/skills/live-provider-testing/SKILL.md +183 -0
  3. data/.pi/skills/options-development/SKILL.md +131 -0
  4. data/CHANGELOG.md +43 -0
  5. data/README.md +110 -41
  6. data/Rakefile +1 -0
  7. data/docs/migration_guide_0.6.0.md +386 -0
  8. data/lib/llm_gateway/adapters/adapter.rb +8 -44
  9. data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
  10. data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
  11. data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
  12. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +59 -47
  13. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
  14. data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
  15. data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
  16. data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
  17. data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +336 -0
  18. data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
  19. data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
  20. data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
  21. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
  22. data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +193 -170
  23. data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
  24. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
  25. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
  26. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +106 -275
  27. data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
  28. data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
  29. data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
  30. data/lib/llm_gateway/adapters/stream_mapper.rb +57 -0
  31. data/lib/llm_gateway/adapters/structs.rb +102 -52
  32. data/lib/llm_gateway/base_client.rb +2 -4
  33. data/lib/llm_gateway/client.rb +10 -66
  34. data/lib/llm_gateway/clients/anthropic.rb +5 -4
  35. data/lib/llm_gateway/clients/groq.rb +18 -4
  36. data/lib/llm_gateway/clients/openai.rb +20 -18
  37. data/lib/llm_gateway/prompt.rb +35 -17
  38. data/lib/llm_gateway/version.rb +1 -1
  39. data/lib/llm_gateway.rb +5 -29
  40. metadata +8 -10
  41. data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
  42. data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
  43. data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
  44. data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
  45. data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
  46. data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
  47. data/scripts/generate_handoff_live_fixture.rb +0 -169
  48. data/scripts/generate_handoff_media_fixture.rb +0 -167
@@ -5,25 +5,115 @@ module LlmGateway
5
5
  module OpenAI
6
6
  module ChatCompletions
7
7
  module OptionMapper
8
- include LlmGateway::Adapters::OpenAI::PromptCacheOptionMapper
9
-
8
+ DEFAULT_MAX_COMPLETION_TOKENS = 20_480
10
9
  VALID_REASONING_LEVELS = %w[low medium high xhigh].freeze
11
10
 
11
+ # Source: https://developers.openai.com/api/reference/resources/chat/subresources/completions/methods/create/index.md
12
+ # API: OpenAI Chat Completions Create; accessed 2026-05-18.
13
+ # Body parameters listed by the API reference: messages, model, audio,
14
+ # frequency_penalty, function_call, functions, logit_bias, logprobs,
15
+ # max_completion_tokens, max_tokens, metadata, modalities, n,
16
+ # parallel_tool_calls, prediction, presence_penalty, prompt_cache_key,
17
+ # prompt_cache_retention, reasoning_effort, response_format,
18
+ # safety_identifier, seed, service_tier, stop, store, stream,
19
+ # stream_options, temperature, tool_choice, tools, top_logprobs, top_p,
20
+ # user, verbosity, web_search_options.
21
+ # This mapper intentionally excludes transcript/tool structural fields
22
+ # (messages, tools) from option handling.
23
+
24
+ VALID_OPTIONS = %i[
25
+ model
26
+ audio
27
+ frequency_penalty
28
+ function_call
29
+ functions
30
+ logit_bias
31
+ logprobs
32
+ max_completion_tokens
33
+ max_tokens
34
+ metadata
35
+ modalities
36
+ n
37
+ parallel_tool_calls
38
+ prediction
39
+ presence_penalty
40
+ prompt_cache_key
41
+ prompt_cache_retention
42
+ reasoning_effort
43
+ response_format
44
+ safety_identifier
45
+ seed
46
+ service_tier
47
+ stop
48
+ store
49
+ stream
50
+ stream_options
51
+ temperature
52
+ tool_choice
53
+ top_logprobs
54
+ top_p
55
+ user
56
+ verbosity
57
+ web_search_options
58
+ ].freeze
59
+
60
+ MANAGED_OPTIONS = %i[
61
+ reasoning
62
+ cache_key
63
+ cache_retention
64
+ ].freeze
65
+
12
66
  module_function
13
67
 
14
68
  def map(options)
15
- mapped_options = options.dup
16
- mapped_options[:max_completion_tokens] ||= 20_480
69
+ mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
70
+ mapped_options[:max_completion_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_COMPLETION_TOKENS
71
+
72
+ cache_key = options[:cache_key]
73
+ mapped_options[:prompt_cache_key] = cache_key unless cache_key.nil?
74
+
75
+ cache_retention = options[:cache_retention]
76
+ mapped_options[:prompt_cache_retention] = normalize_cache_retention(cache_retention) \
77
+ unless cache_retention.nil?
17
78
 
18
- map_cache_key!(mapped_options)
19
- map_prompt_cache_retention!(mapped_options)
79
+ if mapped_options[:prompt_cache_key] && !mapped_options[:prompt_cache_retention]
80
+ mapped_options[:prompt_cache_retention] = normalize_cache_retention("short")
81
+ end
20
82
 
21
- return mapped_options unless mapped_options.key?(:reasoning)
83
+ if cache_retention.to_s == "none"
84
+ mapped_options.delete(:prompt_cache_key)
85
+ mapped_options.delete(:prompt_cache_retention)
86
+ end
22
87
 
23
- reasoning = mapped_options.delete(:reasoning)
24
- return mapped_options if reasoning.nil? || reasoning.to_s == "none"
88
+ reasoning = options[:reasoning]
89
+ mapped_options[:reasoning_effort] = normalize_reasoning_effort(reasoning) \
90
+ unless reasoning.nil? || reasoning.to_s == "none"
91
+
92
+ validate_options!(mapped_options)
93
+ mapped_options
94
+ end
95
+
96
+ def validate_options!(mapped_options)
97
+ unknown_options = mapped_options.keys - VALID_OPTIONS
98
+ return if unknown_options.empty?
99
+
100
+ raise ArgumentError,
101
+ "Unknown OpenAI Chat Completions options: #{unknown_options.join(', ')}. " \
102
+ "Valid options: #{VALID_OPTIONS.join(', ')}."
103
+ end
25
104
 
26
- mapped_options.merge(reasoning_effort: normalize_reasoning_effort(reasoning))
105
+ def normalize_cache_retention(cache_retention)
106
+ case cache_retention.to_s
107
+ when "short"
108
+ "in_memory"
109
+ when "long"
110
+ "24h"
111
+ when "none"
112
+ nil
113
+ else
114
+ raise ArgumentError,
115
+ "Invalid cache_retention '#{cache_retention}'. Use 'short', 'long', or 'none'."
116
+ end
27
117
  end
28
118
 
29
119
  def normalize_reasoning_effort(reasoning)
@@ -1,188 +1,255 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../structs"
3
+ require_relative "../../stream_mapper"
4
4
 
5
5
  module LlmGateway
6
6
  module Adapters
7
7
  module OpenAI
8
8
  module ChatCompletions
9
- class StreamMapper
10
- def map(chunk)
11
- queued_event = shift_queued_event
12
- return queued_event if queued_event
13
-
9
+ class StreamMapper < LlmGateway::Adapters::StreamMapper
10
+ def map(chunk, &block)
14
11
  data = chunk[:data] || {}
15
12
  raise_stream_error!(data) if chunk[:event] == "error" || data[:error] || data[:type] == "error"
16
13
 
17
- choices = data[:choices] || []
14
+ push_patches(patches_for(data), &block)
15
+ end
18
16
 
19
- if choices.empty?
20
- return message_event(
21
- delta: pending_finish_delta,
22
- usage_increment: usage_increment(data)
23
- )
24
- end
17
+ private
18
+
19
+ def patches_for(data)
20
+ choices = data[:choices] || []
21
+ return final_usage_patches(data) if choices.empty?
25
22
 
26
23
  choice = choices.first || {}
27
24
  delta = choice[:delta] || {}
28
- finish_reason = choice[:finish_reason]
25
+ patches = []
26
+ active_block_type = accumulator.active_block_type
27
+ active_tool = active_tool_block
28
+
29
+ append_patches(patches, message_start_patches(data, delta))
30
+
31
+ active_block_type, active_tool = append_patches(
32
+ patches,
33
+ reasoning_patches(delta[:reasoning], active_block_type:),
34
+ active_block_type,
35
+ active_tool
36
+ )
37
+ active_block_type, active_tool = append_patches(
38
+ patches,
39
+ text_patches(delta[:content], active_block_type:),
40
+ active_block_type,
41
+ active_tool
42
+ )
43
+ delta.fetch(:tool_calls, []).each do |tool_call|
44
+ active_block_type, active_tool = append_patches(
45
+ patches,
46
+ patches_for_tool_call(tool_call, active_block_type:, active_tool:),
47
+ active_block_type,
48
+ active_tool
49
+ )
50
+ end
51
+ append_patches(patches, finish_patches(choice[:finish_reason], active_block_type:))
29
52
 
30
- event = map_choice_delta(data, choice, delta)
31
- return event if event
53
+ patches
54
+ end
32
55
 
33
- return finish_event_for(finish_reason) if finish_reason
56
+ def append_patches(patches, new_patches, active_block_type = nil, active_tool = nil)
57
+ patches.concat(new_patches)
58
+
59
+ new_patches.each do |patch|
60
+ case patch[:type]
61
+ when :text_start
62
+ active_block_type = :text
63
+ active_tool = nil
64
+ when :reasoning_start
65
+ active_block_type = :reasoning
66
+ active_tool = nil
67
+ when :tool_start
68
+ active_block_type = :tool
69
+ active_tool = { id: patch[:id], name: patch[:name] }
70
+ when :text_end, :reasoning_end, :tool_end
71
+ active_block_type = nil
72
+ active_tool = nil
73
+ end
74
+ end
34
75
 
35
- nil
76
+ [ active_block_type, active_tool ]
36
77
  end
37
78
 
38
- private
79
+ def message_start_patches(data, delta)
80
+ return [] unless accumulator.message_hash.empty?
39
81
 
40
- def map_choice_delta(data, choice, delta)
41
- if !message_started? && delta[:tool_calls]&.any?
42
- @message_started = true
43
- stash_message_attributes(data, delta)
44
- return tool_event(delta[:tool_calls].first)
45
- end
82
+ return [] unless delta.key?(:role) ||
83
+ data[:id] ||
84
+ data[:model] ||
85
+ delta[:content] ||
86
+ delta[:reasoning] ||
87
+ delta[:tool_calls]&.any?
46
88
 
47
- if !message_started? && (delta.key?(:role) || data[:id] || data[:model])
48
- @message_started = true
49
- return AssistantStreamMessageEvent.new(
89
+ [
90
+ {
50
91
  type: :message_start,
51
92
  delta: {
52
93
  id: data[:id],
53
94
  model: data[:model],
54
- role: delta[:role]
55
- }.compact,
56
- usage_increment: {}
57
- )
58
- end
59
-
60
- if (content = delta[:content]) && !content.empty?
61
- return text_event(content, choice[:index] || 0)
62
- end
63
-
64
- return tool_event(delta[:tool_calls].first) if delta[:tool_calls]&.any?
95
+ role: delta[:role] || "assistant",
96
+ timestamp: timestamp_milliseconds(data[:created])
97
+ }.compact
98
+ }
99
+ ]
100
+ end
65
101
 
66
- nil
102
+ # Groq exposes OpenAI-compatible chat completion chunks, but may include
103
+ # `delta.reasoning` before normal `delta.content`.
104
+ def reasoning_patches(reasoning, active_block_type: accumulator.active_block_type)
105
+ return [] if reasoning.to_s.empty?
106
+
107
+ [
108
+ *close_active_non_reasoning_patches(active_block_type:),
109
+ {
110
+ type: active_block_type == :reasoning ? :reasoning_delta : :reasoning_start,
111
+ delta: reasoning,
112
+ signature: ""
113
+ }
114
+ ]
67
115
  end
68
116
 
69
- def finish_event_for(finish_reason)
70
- normalized = normalize_stop_reason(finish_reason)
71
- stash_pending_finish_delta(stop_reason: normalized)
117
+ def text_patches(content, active_block_type: accumulator.active_block_type)
118
+ return [] if content.to_s.empty?
72
119
 
73
- case normalized
74
- when "tool_use"
75
- AssistantStreamEvent.new(type: :tool_end, content_index: last_started_tool_index || 0, delta: "")
76
- else
77
- AssistantStreamEvent.new(type: :text_end, content_index: last_started_text_index || 0, delta: "")
78
- end
120
+ [
121
+ *close_active_non_text_patches(active_block_type:),
122
+ {
123
+ type: active_block_type == :text ? :text_delta : :text_start,
124
+ delta: content
125
+ }
126
+ ]
79
127
  end
80
128
 
81
- def message_event(delta:, usage_increment: {})
82
- AssistantStreamMessageEvent.new(
83
- type: pending_message_attributes.empty? ? :message_delta : :message_start,
84
- delta: pending_message_attributes.merge(delta),
85
- usage_increment:
86
- ).tap do
87
- clear_pending_message_attributes
88
- clear_pending_finish_delta
129
+ def patches_for_tool_call(tool_call, active_block_type: accumulator.active_block_type, active_tool: active_tool_block)
130
+ id = tool_call[:id]
131
+ name = tool_call.dig(:function, :name)
132
+ arguments = tool_call.dig(:function, :arguments).to_s
133
+
134
+ patches = []
135
+
136
+ if id || name
137
+ if active_block_type == :tool
138
+ patches.concat(close_active_block_patches(active_block_type:)) if new_active_tool?(id, name, active_tool:)
139
+ else
140
+ patches.concat(close_active_non_tool_patches(active_block_type:))
141
+ end
142
+
143
+ unless active_block_type == :tool && patches.empty?
144
+ patches << {
145
+ type: :tool_start,
146
+ delta: "",
147
+ id: id,
148
+ name: name
149
+ }
150
+ end
89
151
  end
90
- end
91
152
 
92
- def usage_increment(data)
93
- usage = data[:usage] || {}
94
-
95
- {
96
- input_tokens: usage[:prompt_tokens] || 0,
97
- cache_creation_input_tokens: 0,
98
- cache_read_input_tokens: usage.dig(:prompt_tokens_details, :cached_tokens) || 0,
99
- output_tokens: usage[:completion_tokens] || 0,
100
- reasoning_tokens: usage.dig(:completion_tokens_details, :reasoning_tokens) || 0
101
- }
153
+ patches << { type: :tool_delta, delta: arguments } unless arguments.empty?
154
+ patches
102
155
  end
103
156
 
104
- def text_event(content, content_index)
105
- @last_started_text_index = content_index
157
+ def new_active_tool?(id, name, active_tool: active_tool_block)
158
+ return true unless active_tool
106
159
 
107
- if started_text_blocks.include?(content_index)
108
- AssistantStreamEvent.new(type: :text_delta, content_index:, delta: content)
109
- else
110
- started_text_blocks << content_index
111
- AssistantStreamEvent.new(type: :text_start, content_index:, delta: content)
112
- end
160
+ (id && active_tool[:id] != id) || (name && active_tool[:name] != name)
113
161
  end
114
162
 
115
- def tool_event(tool_call)
116
- tool_index = tool_call[:index] || 0
117
- @last_started_tool_index = tool_index
118
- function = tool_call[:function] || {}
119
- arguments = function[:arguments] || ""
120
-
121
- unless started_tool_blocks.include?(tool_index)
122
- pending_tool_calls[tool_index] = merge_tool_call(pending_tool_calls[tool_index], tool_call)
123
- pending = pending_tool_calls[tool_index]
163
+ def active_tool_block
164
+ return nil unless accumulator.active_tool?
124
165
 
125
- return nil unless pending[:id] && pending.dig(:function, :name)
166
+ accumulator.blocks.reverse.find { |block| block&.fetch(:type, nil) == "tool_use" }
167
+ end
126
168
 
127
- started_tool_blocks << tool_index
128
- return AssistantToolStartEvent.new(
129
- type: :tool_start,
130
- content_index: tool_index,
131
- delta: "",
132
- id: pending[:id],
133
- name: pending.dig(:function, :name)
134
- )
169
+ def close_active_block_patches(active_block_type: accumulator.active_block_type)
170
+ case active_block_type
171
+ when :text
172
+ [ { type: :text_end, delta: "" } ]
173
+ when :reasoning
174
+ [ { type: :reasoning_end, delta: "", signature: "" } ]
175
+ when :tool
176
+ [ { type: :tool_end, delta: "" } ]
177
+ else
178
+ []
135
179
  end
136
-
137
- AssistantStreamEvent.new(type: :tool_delta, content_index: tool_index, delta: arguments)
138
180
  end
139
181
 
140
- def stash_message_attributes(data, delta)
141
- @pending_message_attributes = {
142
- id: data[:id],
143
- model: data[:model],
144
- role: delta[:role]
145
- }.compact
182
+ def close_active_non_text_patches(active_block_type: accumulator.active_block_type)
183
+ active_block_type == :text ? [] : close_active_block_patches(active_block_type:)
146
184
  end
147
185
 
148
- def pending_message_attributes
149
- @pending_message_attributes ||= {}
186
+ def close_active_non_reasoning_patches(active_block_type: accumulator.active_block_type)
187
+ active_block_type == :reasoning ? [] : close_active_block_patches(active_block_type:)
150
188
  end
151
189
 
152
- def clear_pending_message_attributes
153
- @pending_message_attributes = {}
190
+ def close_active_non_tool_patches(active_block_type: accumulator.active_block_type)
191
+ active_block_type == :tool ? [] : close_active_block_patches(active_block_type:)
154
192
  end
155
193
 
156
- def stash_pending_finish_delta(delta)
157
- @pending_finish_delta = pending_finish_delta.merge(delta)
158
- end
194
+ def finish_patches(finish_reason, active_block_type: accumulator.active_block_type)
195
+ return [] unless finish_reason
159
196
 
160
- def pending_finish_delta
161
- @pending_finish_delta ||= {}
197
+ [
198
+ *close_active_block_patches(active_block_type:),
199
+ {
200
+ type: :message_delta,
201
+ delta: { stop_reason: normalize_stop_reason(finish_reason) }
202
+ }
203
+ ]
162
204
  end
163
205
 
164
- def clear_pending_finish_delta
165
- @pending_finish_delta = {}
166
- end
206
+ def final_usage_patches(data)
207
+ patch = {
208
+ type: :message_delta,
209
+ delta: {}
210
+ }
211
+ patch[:usage] = usage(data) if data.key?(:usage)
167
212
 
168
- def merge_tool_call(existing, incoming)
169
- existing ||= {}
170
- incoming ||= {}
213
+ [
214
+ patch,
215
+ { type: :message_end }
216
+ ]
217
+ end
171
218
 
172
- existing_function = existing[:function] || {}
173
- incoming_function = incoming[:function] || {}
219
+ def usage(data)
220
+ usage = data[:usage] || {}
221
+ cache_read = token_count(
222
+ usage.dig(:prompt_tokens_details, :cached_tokens),
223
+ usage[:prompt_cache_hit_tokens]
224
+ )
225
+ cache_write = token_count(
226
+ usage.dig(:prompt_tokens_details, :cache_write_tokens),
227
+ usage[:cache_write_tokens]
228
+ )
229
+ prompt_tokens = token_count(usage[:prompt_tokens])
230
+ input = [ prompt_tokens - cache_read - cache_write, 0 ].max
231
+ output = token_count(usage[:completion_tokens])
174
232
 
175
233
  {
176
- index: incoming[:index] || existing[:index],
177
- id: incoming[:id] || existing[:id],
178
- type: incoming[:type] || existing[:type],
179
- function: {
180
- name: incoming_function[:name] || existing_function[:name],
181
- arguments: "#{existing_function[:arguments]}#{incoming_function[:arguments]}"
182
- }
234
+ input:,
235
+ cache_write:,
236
+ cache_read:,
237
+ output:,
238
+ total: input + cache_write + cache_read + output,
239
+ raw: usage
183
240
  }
184
241
  end
185
242
 
243
+ def token_count(*values)
244
+ values.compact.first.to_i
245
+ end
246
+
247
+ def timestamp_milliseconds(unix_seconds)
248
+ return nil if unix_seconds.nil?
249
+
250
+ (unix_seconds.to_f * 1000).to_i
251
+ end
252
+
186
253
  def normalize_stop_reason(finish_reason)
187
254
  case finish_reason
188
255
  when "tool_calls"
@@ -191,50 +258,6 @@ module LlmGateway
191
258
  finish_reason
192
259
  end
193
260
  end
194
-
195
- def message_started?
196
- @message_started ||= false
197
- end
198
-
199
- def started_text_blocks
200
- @started_text_blocks ||= []
201
- end
202
-
203
- def started_tool_blocks
204
- @started_tool_blocks ||= []
205
- end
206
-
207
- def pending_tool_calls
208
- @pending_tool_calls ||= {}
209
- end
210
-
211
- def last_started_text_index
212
- @last_started_text_index
213
- end
214
-
215
- def last_started_tool_index
216
- @last_started_tool_index
217
- end
218
-
219
- def shift_queued_event
220
- queued_events.shift
221
- end
222
-
223
- def queued_events
224
- @queued_events ||= []
225
- end
226
-
227
- def raise_stream_error!(data)
228
- error = data[:error].is_a?(Hash) ? data[:error] : data
229
- message = error[:message] || "Stream error"
230
- code = error[:code] || error[:type]
231
-
232
- if LlmGateway::Errors.context_overflow_message?(message)
233
- raise LlmGateway::Errors::PromptTooLong.new(message, code)
234
- end
235
-
236
- raise LlmGateway::Errors::APIStatusError.new(message, code)
237
- end
238
261
  end
239
262
  end
240
263
  end
@@ -4,7 +4,6 @@ require_relative "../adapter"
4
4
  require_relative "acts_like_chat_completions"
5
5
  require_relative "chat_completions/input_mapper"
6
6
  require_relative "chat_completions/input_message_sanitizer"
7
- require_relative "chat_completions/output_mapper"
8
7
  require_relative "chat_completions/option_mapper"
9
8
  require_relative "file_output_mapper"
10
9
  require_relative "chat_completions/stream_mapper"