llm_gateway 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.pi/skills/live-provider-testing/SKILL.md +183 -0
  3. data/.pi/skills/options-development/SKILL.md +131 -0
  4. data/CHANGELOG.md +43 -0
  5. data/README.md +110 -41
  6. data/Rakefile +1 -0
  7. data/docs/migration_guide_0.6.0.md +386 -0
  8. data/lib/llm_gateway/adapters/adapter.rb +8 -44
  9. data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
  10. data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
  11. data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
  12. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +59 -47
  13. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
  14. data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
  15. data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
  16. data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
  17. data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +336 -0
  18. data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
  19. data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
  20. data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
  21. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
  22. data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +193 -170
  23. data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
  24. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
  25. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
  26. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +106 -275
  27. data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
  28. data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
  29. data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
  30. data/lib/llm_gateway/adapters/stream_mapper.rb +57 -0
  31. data/lib/llm_gateway/adapters/structs.rb +102 -52
  32. data/lib/llm_gateway/base_client.rb +2 -4
  33. data/lib/llm_gateway/client.rb +10 -66
  34. data/lib/llm_gateway/clients/anthropic.rb +5 -4
  35. data/lib/llm_gateway/clients/groq.rb +18 -4
  36. data/lib/llm_gateway/clients/openai.rb +20 -18
  37. data/lib/llm_gateway/prompt.rb +35 -17
  38. data/lib/llm_gateway/version.rb +1 -1
  39. data/lib/llm_gateway.rb +5 -29
  40. metadata +8 -10
  41. data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
  42. data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
  43. data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
  44. data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
  45. data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
  46. data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
  47. data/scripts/generate_handoff_live_fixture.rb +0 -169
  48. data/scripts/generate_handoff_media_fixture.rb +0 -167
@@ -1,337 +1,168 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../../structs"
3
+ require_relative "../../stream_mapper"
4
4
 
5
5
  module LlmGateway
6
6
  module Adapters
7
7
  module OpenAI
8
8
  module Responses
9
- class StreamMapper
10
- def map(chunk)
11
- queued_event = shift_queued_event
12
- return queued_event if queued_event
13
-
9
+ class StreamMapper < LlmGateway::Adapters::StreamMapper
10
+ def map(chunk, &block)
14
11
  event_type = chunk[:event]
15
12
  data = chunk[:data] || {}
16
13
  raise_stream_error!(data) if event_type == "error" || data[:error] || data[:type] == "error"
17
14
 
15
+ push_patches(patches_for(event_type, data), &block)
16
+ end
17
+
18
+ private
19
+
20
+ def patches_for(event_type, data)
18
21
  case event_type
19
22
  when "response.created"
20
- stash_response(data[:response])
21
- nil
23
+ response_created_patches(data[:response])
22
24
  when "response.output_item.added"
23
- map_output_item_added(data)
24
- when "response.output_item.done"
25
- map_output_item_done(data)
25
+ output_item_added_patches(data)
26
26
  when "response.content_part.added"
27
- map_content_part_added(data)
28
- when "response.content_part.done", "response.output_text.done"
29
- map_text_done(data)
27
+ content_part_added_patches(data)
28
+ when "response.content_part.done"
29
+ content_part_done_patches(data)
30
30
  when "response.output_text.delta"
31
- AssistantStreamEvent.new(
32
- type: :text_delta,
33
- content_index: content_index_for(data[:output_index] || 0),
34
- delta: data[:delta] || ""
35
- )
31
+ [ { type: :text_delta, delta: data[:delta] || "" } ]
36
32
  when "response.function_call_arguments.delta"
37
- AssistantStreamEvent.new(
38
- type: :tool_delta,
39
- content_index: content_index_for(data[:output_index] || 0),
40
- delta: data[:delta] || ""
41
- )
33
+ [ { type: :tool_delta, delta: data[:delta] || "" } ]
42
34
  when "response.function_call_arguments.done"
43
- map_tool_done(data)
35
+ [ { type: :tool_end, delta: "" } ]
36
+ when "response.reasoning_summary_part.added"
37
+ [ { type: :reasoning_start, delta: "", signature: "" } ]
44
38
  when "response.reasoning_summary_text.delta"
45
- output_index = data[:output_index] || 0
46
- mark_reasoning_has_content(output_index)
47
- AssistantStreamReasoningEvent.new(
48
- type: :reasoning_delta,
49
- content_index: content_index_for(output_index),
50
- delta: data[:delta] || "",
51
- signature: ""
52
- )
39
+ [ { type: :reasoning_delta, delta: data[:delta] || "", signature: "" } ]
40
+ when "response.reasoning_summary_part.done"
41
+ [ { type: :reasoning_end, delta: "", signature: "" } ]
53
42
  when "response.completed"
54
- map_response_completed(data[:response])
43
+ response_completed_patches(data[:response])
55
44
  else
56
- nil
45
+ []
57
46
  end
58
47
  end
59
48
 
60
- private
61
-
62
- def map_output_item_added(data)
63
- item = data[:item] || {}
64
- output_index = data[:output_index] || 0
49
+ def response_created_patches(response)
50
+ response ||= {}
65
51
 
66
- case item[:type]
67
- when "reasoning"
68
- mark_reasoning_started(output_index)
69
- AssistantStreamReasoningEvent.new(
70
- type: :reasoning_start,
71
- content_index: register_content_index(output_index),
72
- delta: "",
73
- signature: ""
74
- )
75
- when "message"
76
- register_content_index(output_index)
77
- ensure_message_started(role: item[:role] || "assistant")
78
- when "function_call"
79
- stash_role("assistant")
80
- mark_tool_started(output_index)
81
- AssistantToolStartEvent.new(
82
- type: :tool_start,
83
- content_index: register_content_index(output_index),
84
- delta: "",
85
- id: item[:call_id] || item[:id],
86
- name: item[:name]
87
- )
88
- else
89
- nil
90
- end
52
+ [
53
+ {
54
+ type: :message_start,
55
+ delta: {
56
+ id: response[:id],
57
+ model: response[:model],
58
+ role: "assistant",
59
+ timestamp: timestamp_milliseconds(response[:created_at])
60
+ }.compact
61
+ }
62
+ ]
91
63
  end
92
64
 
93
- def map_output_item_done(data)
65
+ def output_item_added_patches(data)
94
66
  item = data[:item] || {}
95
- output_index = data[:output_index] || 0
96
67
 
97
68
  case item[:type]
98
- when "reasoning"
99
- map_reasoning_done(output_index, item)
69
+ when "message"
70
+ return [] unless accumulator.message_hash.empty?
71
+
72
+ [
73
+ {
74
+ type: :message_start,
75
+ delta: { role: item[:role] || "assistant" }
76
+ }
77
+ ]
100
78
  when "function_call"
101
- map_function_call_done(output_index, item)
102
- else
103
- nil
104
- end
105
- end
106
-
107
- def map_reasoning_done(output_index, item)
108
- content_index = content_index_for(output_index)
109
- summary_text = extract_reasoning_summary_text(item)
110
-
111
- if reasoning_started_without_content?(output_index) && !summary_text.empty?
112
- queue_event(
113
- AssistantStreamReasoningEvent.new(
114
- type: :reasoning_end,
115
- content_index:,
79
+ [
80
+ {
81
+ type: :tool_start,
116
82
  delta: "",
117
- signature: ""
118
- )
119
- )
120
- mark_reasoning_completed(output_index)
121
- return AssistantStreamReasoningEvent.new(
122
- type: :reasoning_delta,
123
- content_index:,
124
- delta: summary_text,
125
- signature: ""
126
- )
83
+ id: item[:call_id] || item[:id],
84
+ name: item[:name]
85
+ }
86
+ ]
87
+ else
88
+ []
127
89
  end
128
-
129
- mark_reasoning_completed(output_index)
130
- AssistantStreamReasoningEvent.new(
131
- type: :reasoning_end,
132
- content_index:,
133
- delta: "",
134
- signature: ""
135
- )
136
90
  end
137
91
 
138
- def map_function_call_done(output_index, item)
139
- return nil if tool_started?(output_index)
140
-
141
- mark_tool_started(output_index)
142
- queue_event(
143
- AssistantStreamEvent.new(
144
- type: :tool_end,
145
- content_index: content_index_for(output_index),
146
- delta: ""
147
- )
148
- )
92
+ def content_part_added_patches(data)
93
+ part = data[:part] || {}
94
+ return [] unless part[:type] == "output_text"
149
95
 
150
- AssistantToolStartEvent.new(
151
- type: :tool_start,
152
- content_index: register_content_index(output_index),
153
- delta: "",
154
- id: item[:call_id] || item[:id],
155
- name: item[:name]
156
- )
96
+ [ { type: :text_start, delta: "" } ]
157
97
  end
158
98
 
159
- def map_content_part_added(data)
99
+ def content_part_done_patches(data)
160
100
  part = data[:part] || {}
161
- return nil unless part[:type] == "output_text"
101
+ return [] unless part.empty? || part[:type] == "output_text"
162
102
 
163
- AssistantStreamEvent.new(
164
- type: :text_start,
165
- content_index: content_index_for(data[:output_index] || 0),
166
- delta: ""
167
- )
103
+ [ { type: :text_end, delta: "" } ]
168
104
  end
169
105
 
170
- def map_text_done(data)
171
- AssistantStreamEvent.new(
172
- type: :text_end,
173
- content_index: content_index_for(data[:output_index] || 0),
174
- delta: ""
175
- )
176
- end
177
-
178
- def map_tool_done(data)
179
- AssistantStreamEvent.new(
180
- type: :tool_end,
181
- content_index: content_index_for(data[:output_index] || 0),
182
- delta: ""
183
- )
184
- end
106
+ def response_completed_patches(response)
107
+ response ||= {}
108
+ patch = {
109
+ type: :message_delta,
110
+ delta: {
111
+ id: response[:id],
112
+ model: response[:model],
113
+ role: "assistant",
114
+ timestamp: timestamp_milliseconds(response[:created_at]),
115
+ stop_reason: stop_reason_for(response)
116
+ }.compact
117
+ }
118
+ patch[:usage] = usage(response) if response.key?(:usage)
185
119
 
186
- def map_response_completed(response)
187
- stash_response(response)
188
- AssistantStreamMessageEvent.new(
189
- type: message_started? ? :message_delta : :message_start,
190
- delta: pending_message_attributes.merge(role: pending_message_attributes[:role] || "assistant", stop_reason: stop_reason_for(response)),
191
- usage_increment: usage_increment(response)
192
- ).tap do
193
- @message_started = true
194
- clear_pending_message_attributes
195
- end
120
+ [
121
+ patch,
122
+ { type: :message_end }
123
+ ]
196
124
  end
197
125
 
198
- def usage_increment(response)
126
+ def usage(response)
199
127
  usage = response[:usage] || {}
128
+ cache_read = token_count(usage.dig(:input_tokens_details, :cached_tokens))
129
+ cache_write = token_count(
130
+ usage.dig(:input_tokens_details, :cache_write_tokens),
131
+ usage[:cache_write_tokens]
132
+ )
133
+ input_tokens = token_count(usage[:input_tokens])
134
+ input = [ input_tokens - cache_read - cache_write, 0 ].max
135
+ output = token_count(usage[:output_tokens])
200
136
 
201
137
  {
202
- input_tokens: usage[:input_tokens] || 0,
203
- cache_creation_input_tokens: 0,
204
- cache_read_input_tokens: usage.dig(:input_tokens_details, :cached_tokens) || 0,
205
- output_tokens: usage[:output_tokens] || 0,
206
- reasoning_tokens: usage.dig(:output_tokens_details, :reasoning_tokens) || 0
138
+ input:,
139
+ cache_write:,
140
+ cache_read:,
141
+ output:,
142
+ total: input + cache_write + cache_read + output,
143
+ raw: usage
207
144
  }
208
145
  end
209
146
 
210
- def stop_reason_for(response)
211
- output = response[:output] || []
212
- last_item = output.last || {}
213
-
214
- tool_state.any? || last_item[:type] == "function_call" ? "tool_use" : "stop"
215
- end
216
-
217
- def ensure_message_started(role: "assistant")
218
- return nil if message_started?
219
-
220
- @message_started = true
221
- AssistantStreamMessageEvent.new(
222
- type: :message_start,
223
- delta: pending_message_attributes.merge(role: role).compact,
224
- usage_increment: {}
225
- ).tap do
226
- clear_pending_message_attributes
227
- end
228
- end
229
-
230
- def extract_reasoning_summary_text(item)
231
- Array(item[:summary]).filter_map do |summary|
232
- next summary[:text] if summary.is_a?(Hash) && summary[:text]
233
- next summary[:summary] if summary.is_a?(Hash) && summary[:summary]
234
- next summary if summary.is_a?(String)
235
- end.join
236
- end
237
-
238
- def mark_reasoning_started(output_index)
239
- reasoning_state[output_index] = :started
240
- end
241
-
242
- def mark_reasoning_has_content(output_index)
243
- reasoning_state[output_index] = :has_content
244
- end
245
-
246
- def mark_reasoning_completed(output_index)
247
- reasoning_state[output_index] = :completed
248
- end
249
-
250
- def reasoning_started_without_content?(output_index)
251
- reasoning_state[output_index] == :started
252
- end
253
-
254
- def reasoning_state
255
- @reasoning_state ||= {}
256
- end
257
-
258
- def mark_tool_started(output_index)
259
- tool_state[output_index] = :started
260
- end
261
-
262
- def tool_started?(output_index)
263
- tool_state[output_index] == :started
264
- end
265
-
266
- def tool_state
267
- @tool_state ||= {}
268
- end
269
-
270
- def stash_response(response)
271
- response ||= {}
272
- @pending_message_attributes = pending_message_attributes.merge(
273
- id: response[:id],
274
- model: response[:model]
275
- ).compact
276
- end
277
-
278
- def stash_role(role)
279
- @pending_message_attributes = pending_message_attributes.merge(role:)
280
- end
281
-
282
- def pending_message_attributes
283
- @pending_message_attributes ||= {}
284
- end
285
-
286
- def clear_pending_message_attributes
287
- @pending_message_attributes = {}
288
- end
289
-
290
- def register_content_index(output_index)
291
- content_index_map[output_index] ||= next_content_index!
292
- end
293
-
294
- def content_index_for(output_index)
295
- content_index_map.fetch(output_index) { register_content_index(output_index) }
296
- end
297
-
298
- def next_content_index!
299
- @next_content_index ||= 0
300
- current = @next_content_index
301
- @next_content_index += 1
302
- current
303
- end
304
-
305
- def content_index_map
306
- @content_index_map ||= {}
147
+ def token_count(*values)
148
+ values.compact.first.to_i
307
149
  end
308
150
 
309
- def message_started?
310
- @message_started ||= false
311
- end
151
+ def timestamp_milliseconds(unix_seconds)
152
+ return nil if unix_seconds.nil?
312
153
 
313
- def queue_event(event)
314
- queued_events << event
154
+ (unix_seconds.to_f * 1000).to_i
315
155
  end
316
156
 
317
- def shift_queued_event
318
- queued_events.shift
319
- end
157
+ def stop_reason_for(response)
158
+ output = response[:output] || []
159
+ last_item = output.last || {}
320
160
 
321
- def queued_events
322
- @queued_events ||= []
161
+ tool_seen? || last_item[:type] == "function_call" ? "tool_use" : "stop"
323
162
  end
324
163
 
325
- def raise_stream_error!(data)
326
- error = data[:error].is_a?(Hash) ? data[:error] : data
327
- message = error[:message] || "Stream error"
328
- code = error[:code] || error[:type]
329
-
330
- if LlmGateway::Errors.context_overflow_message?(message)
331
- raise LlmGateway::Errors::PromptTooLong.new(message, code)
332
- end
333
-
334
- raise LlmGateway::Errors::APIStatusError.new(message, code)
164
+ def tool_seen?
165
+ accumulator.blocks.any? { |content_block| content_block && content_block[:type] == "tool_use" }
335
166
  end
336
167
  end
337
168
  end
@@ -4,7 +4,6 @@ require_relative "../adapter"
4
4
  require_relative "acts_like_responses"
5
5
  require_relative "../input_message_sanitizer"
6
6
  require_relative "responses/input_mapper"
7
- require_relative "responses/output_mapper"
8
7
  require_relative "responses/option_mapper"
9
8
  require_relative "file_output_mapper"
10
9
  require_relative "responses/stream_mapper"
@@ -26,7 +26,7 @@ module LlmGateway
26
26
  def self.map_messages(messages)
27
27
  return messages unless messages.is_a?(Array)
28
28
 
29
- mapper = message_mapper
29
+ mapper = self
30
30
  stripped = strip_reasoning_blocks(messages)
31
31
 
32
32
  mapped = stripped.each_with_object([]) do |msg, acc|
@@ -85,7 +85,7 @@ module LlmGateway
85
85
  end
86
86
 
87
87
  # Ensure assistant messages carry "output_text" rather than "input_text".
88
- # The BidirectionalMessageMapper maps plain text blocks to "input_text";
88
+ # The base Responses input mapper maps plain text blocks to "input_text";
89
89
  # Codex is strict about directionality and rejects "input_text" on the
90
90
  # assistant side.
91
91
  def self.normalize_assistant_content_types(messages)
@@ -114,7 +114,7 @@ module LlmGateway
114
114
  # signature *is* the serialised item)
115
115
  # - tool_use / function_call → top-level function_call item
116
116
  # - text / *_text variants → output_text inside an assistant content block
117
- # - anything else → delegated to the BidirectionalMessageMapper
117
+ # - anything else → delegated to the Responses input mapper
118
118
  def self.map_assistant_content(content, mapper)
119
119
  text_parts = []
120
120
  items = []
@@ -2,7 +2,6 @@
2
2
 
3
3
  require_relative "../adapter"
4
4
  require_relative "../openai/acts_like_responses"
5
- require_relative "../openai/responses/output_mapper"
6
5
  require_relative "option_mapper"
7
6
  require_relative "../openai/responses/stream_mapper"
8
7
  require_relative "../openai/file_output_mapper"
@@ -25,10 +24,6 @@ module LlmGateway
25
24
  OptionMapper
26
25
  end
27
26
 
28
- def perform_chat(messages, tools:, system:, **options)
29
- client.chat_codex(messages, tools: tools, system: system, **options)
30
- end
31
-
32
27
  def perform_stream(messages, tools:, system:, **options, &block)
33
28
  client.stream_codex(messages, tools: tools, system: system, **options, &block)
34
29
  end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "normalized_stream_accumulator"
4
+
5
+ module LlmGateway
6
+ module Adapters
7
+ class StreamMapper
8
+ def initialize(provider:, api:)
9
+ @provider = provider
10
+ @api = api
11
+ end
12
+
13
+ def result
14
+ accumulator.final_message
15
+ end
16
+
17
+ private
18
+
19
+ attr_reader :provider, :api
20
+
21
+ def accumulator
22
+ @accumulator ||= LlmGateway::Adapters::NormalizedStreamAccumulator.new(provider:, api:)
23
+ end
24
+
25
+ def push_patches(patches, &block)
26
+ patches.each do |patch|
27
+ accumulator.push(patch, &block)
28
+ end
29
+
30
+ nil
31
+ end
32
+
33
+ def raise_stream_error!(data, overload_codes: [])
34
+ error = stream_error_payload(data)
35
+ message = error[:message] || error["message"] || "Stream error"
36
+ code = error[:code] || error["code"] || error[:type] || error["type"]
37
+
38
+ if LlmGateway::Errors.context_overflow_message?(message)
39
+ raise LlmGateway::Errors::PromptTooLong.new(message, code)
40
+ end
41
+
42
+ if Array(overload_codes).any? { |overload_code| overload_code.to_s == code.to_s }
43
+ raise LlmGateway::Errors::OverloadError.new(message, code)
44
+ end
45
+
46
+ raise LlmGateway::Errors::APIStatusError.new(message, code)
47
+ end
48
+
49
+ def stream_error_payload(data)
50
+ data ||= {}
51
+ error = data[:error] || data["error"]
52
+
53
+ error.is_a?(Hash) ? error : data
54
+ end
55
+ end
56
+ end
57
+ end