llm_gateway 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/README.md +350 -43
  4. data/docs/migration_guide_0.6.0.md +386 -0
  5. data/docs/migration_guide_0.7.0.md +193 -0
  6. data/lib/llm_gateway/adapters/adapter.rb +8 -11
  7. data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
  8. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +61 -11
  9. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
  10. data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
  11. data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
  12. data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +132 -39
  13. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
  14. data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +40 -16
  15. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
  16. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
  17. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +173 -24
  18. data/lib/llm_gateway/adapters/stream_mapper.rb +9 -2
  19. data/lib/llm_gateway/adapters/structs.rb +140 -55
  20. data/lib/llm_gateway/agents/event.rb +105 -0
  21. data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
  22. data/lib/llm_gateway/agents/harness.rb +176 -0
  23. data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
  24. data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
  25. data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
  26. data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
  27. data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
  28. data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
  29. data/lib/llm_gateway/base_client.rb +5 -7
  30. data/lib/llm_gateway/clients/anthropic.rb +10 -9
  31. data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
  32. data/lib/llm_gateway/clients/groq.rb +8 -6
  33. data/lib/llm_gateway/clients/openai.rb +22 -20
  34. data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
  35. data/lib/llm_gateway/prompt.rb +107 -52
  36. data/lib/llm_gateway/utils.rb +116 -13
  37. data/lib/llm_gateway/version.rb +1 -1
  38. data/lib/llm_gateway.rb +7 -21
  39. metadata +13 -2
@@ -92,9 +92,9 @@ module LlmGateway
92
92
  delta: {
93
93
  id: data[:id],
94
94
  model: data[:model],
95
- role: delta[:role] || "assistant"
96
- }.compact,
97
- usage_increment: {}
95
+ role: delta[:role] || "assistant",
96
+ timestamp: timestamp_milliseconds(data[:created])
97
+ }.compact
98
98
  }
99
99
  ]
100
100
  end
@@ -198,34 +198,58 @@ module LlmGateway
198
198
  *close_active_block_patches(active_block_type:),
199
199
  {
200
200
  type: :message_delta,
201
- delta: { stop_reason: normalize_stop_reason(finish_reason) },
202
- usage_increment: {}
201
+ delta: { stop_reason: normalize_stop_reason(finish_reason) }
203
202
  }
204
203
  ]
205
204
  end
206
205
 
207
206
  def final_usage_patches(data)
207
+ patch = {
208
+ type: :message_delta,
209
+ delta: {}
210
+ }
211
+ patch[:usage] = usage(data) if data.key?(:usage)
212
+
208
213
  [
209
- {
210
- type: accumulator.message_hash.empty? ? :message_start : :message_delta,
211
- delta: {},
212
- usage_increment: usage_increment(data)
213
- }
214
+ patch,
215
+ { type: :message_end }
214
216
  ]
215
217
  end
216
218
 
217
- def usage_increment(data)
219
+ def usage(data)
218
220
  usage = data[:usage] || {}
221
+ cache_read = token_count(
222
+ usage.dig(:prompt_tokens_details, :cached_tokens),
223
+ usage[:prompt_cache_hit_tokens]
224
+ )
225
+ cache_write = token_count(
226
+ usage.dig(:prompt_tokens_details, :cache_write_tokens),
227
+ usage[:cache_write_tokens]
228
+ )
229
+ prompt_tokens = token_count(usage[:prompt_tokens])
230
+ input = [ prompt_tokens - cache_read - cache_write, 0 ].max
231
+ output = token_count(usage[:completion_tokens])
219
232
 
220
233
  {
221
- input_tokens: usage[:prompt_tokens] || 0,
222
- cache_creation_input_tokens: 0,
223
- cache_read_input_tokens: usage.dig(:prompt_tokens_details, :cached_tokens) || 0,
224
- output_tokens: usage[:completion_tokens] || 0,
225
- reasoning_tokens: usage.dig(:completion_tokens_details, :reasoning_tokens) || 0
234
+ input:,
235
+ cache_write:,
236
+ cache_read:,
237
+ output:,
238
+ total: input + cache_write + cache_read + output,
239
+ raw: usage
226
240
  }
227
241
  end
228
242
 
243
+ def token_count(*values)
244
+ values.compact.first.to_i
245
+ end
246
+
247
+ def timestamp_milliseconds(unix_seconds)
248
+ return nil if unix_seconds.nil?
249
+
250
+ (unix_seconds.to_f * 1000).to_i
251
+ end
252
+
229
253
  def normalize_stop_reason(finish_reason)
230
254
  case finish_reason
231
255
  when "tool_calls"
@@ -37,22 +37,28 @@ module LlmGateway
37
37
  return tools unless tools
38
38
 
39
39
  tools.map do |tool|
40
- mapped_tool = {
41
- type: "function",
42
- name: tool[:name],
43
- description: tool[:description],
44
- parameters: tool[:input_schema]
45
- }
46
-
47
- [ :contents, :content ].each do |key|
48
- next unless tool[key].is_a?(Array)
49
-
50
- mapped_tool[key] = tool[key].map do |entry|
51
- entry.is_a?(Hash) ? map_content(entry.transform_keys(&:to_sym)) : entry
40
+ tool = tool.transform_keys(&:to_sym)
41
+
42
+ if tool[:name].nil?
43
+ tool
44
+ else
45
+ mapped_tool = {
46
+ type: "function",
47
+ name: tool[:name],
48
+ description: tool[:description],
49
+ parameters: tool[:input_schema]
50
+ }
51
+
52
+ [ :contents, :content ].each do |key|
53
+ next unless tool[key].is_a?(Array)
54
+
55
+ mapped_tool[key] = tool[key].map do |entry|
56
+ entry.is_a?(Hash) ? map_content(entry.transform_keys(&:to_sym)) : entry
57
+ end
52
58
  end
53
- end
54
59
 
55
- mapped_tool
60
+ mapped_tool
61
+ end
56
62
  end
57
63
  end
58
64
 
@@ -85,30 +91,40 @@ module LlmGateway
85
91
  def map_assistant_history_message(msg)
86
92
  blocks = (msg[:content] || []).map { |b| b.transform_keys(&:to_sym) }
87
93
 
88
- text_blocks = blocks.select { |b| b[:type] == "text" }
89
- tool_use_blocks = blocks.select { |b| b[:type] == "tool_use" }
90
-
91
94
  result = []
92
95
 
93
- if text_blocks.any?
94
- result << {
95
- role: "assistant",
96
- content: text_blocks.map { |b| { type: "output_text", text: b[:text] } }
97
- }
98
- end
99
-
100
- tool_use_blocks.each do |b|
101
- result << {
102
- type: "function_call",
103
- call_id: b[:id],
104
- name: b[:name],
105
- arguments: b[:input].is_a?(Hash) ? b[:input].to_json : (b[:input] || {}).to_json
106
- }
96
+ blocks.each do |block|
97
+ case block[:type]
98
+ when "text"
99
+ result << {
100
+ role: "assistant",
101
+ content: [ { type: "output_text", text: block[:text] } ]
102
+ }
103
+ when "tool_use"
104
+ result << {
105
+ type: "function_call",
106
+ call_id: block[:id],
107
+ name: block[:name],
108
+ arguments: block[:input].is_a?(Hash) ? block[:input].to_json : (block[:input] || {}).to_json
109
+ }
110
+ when "server_tool_use"
111
+ result << map_server_tool_use_history_item(block)
112
+ end
107
113
  end
108
114
 
109
115
  result
110
116
  end
111
117
 
118
+ def map_server_tool_use_history_item(block)
119
+ input = block[:input].is_a?(Hash) ? block[:input] : {}
120
+
121
+ {
122
+ id: block[:id],
123
+ type: block[:name],
124
+ status: "completed"
125
+ }.merge(input)
126
+ end
127
+
112
128
  def map_messages_content(message)
113
129
  message[:content].map { |content| map_content(content) }
114
130
  end
@@ -58,7 +58,7 @@ module LlmGateway
58
58
  module_function
59
59
 
60
60
  def map(options)
61
- mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
61
+ mapped_options = options.except(*MANAGED_OPTIONS)
62
62
  mapped_options[:max_output_tokens] = options[:max_completion_tokens] || options[:max_output_tokens] || DEFAULT_MAX_OUTPUT_TOKENS
63
63
 
64
64
  cache_key = options[:cache_key]
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "json"
4
+
3
5
  require_relative "../../stream_mapper"
4
6
 
5
7
  module LlmGateway
@@ -23,10 +25,16 @@ module LlmGateway
23
25
  response_created_patches(data[:response])
24
26
  when "response.output_item.added"
25
27
  output_item_added_patches(data)
28
+ when "response.output_item.done"
29
+ output_item_done_patches(data)
26
30
  when "response.content_part.added"
27
31
  content_part_added_patches(data)
28
- when "response.content_part.done"
32
+ when "response.content_part.done", "response.output_text.done"
29
33
  content_part_done_patches(data)
34
+ when "response.code_interpreter_call_code.delta"
35
+ code_interpreter_code_delta_patches(data)
36
+ when "response.code_interpreter_call.in_progress", "response.code_interpreter_call.interpreting", "response.code_interpreter_call.completed", "response.code_interpreter_call_code.done"
37
+ []
30
38
  when "response.output_text.delta"
31
39
  [ { type: :text_delta, delta: data[:delta] || "" } ]
32
40
  when "response.function_call_arguments.delta"
@@ -55,9 +63,9 @@ module LlmGateway
55
63
  delta: {
56
64
  id: response[:id],
57
65
  model: response[:model],
58
- role: "assistant"
59
- }.compact,
60
- usage_increment: {}
66
+ role: "assistant",
67
+ timestamp: timestamp_milliseconds(response[:created_at])
68
+ }.compact
61
69
  }
62
70
  ]
63
71
  end
@@ -72,8 +80,7 @@ module LlmGateway
72
80
  [
73
81
  {
74
82
  type: :message_start,
75
- delta: { role: item[:role] || "assistant" },
76
- usage_increment: {}
83
+ delta: { role: item[:role] || "assistant" }
77
84
  }
78
85
  ]
79
86
  when "function_call"
@@ -85,6 +92,38 @@ module LlmGateway
85
92
  name: item[:name]
86
93
  }
87
94
  ]
95
+ when "code_interpreter_call"
96
+ state = code_interpreter_state[data[:output_index] || 0] = {
97
+ id: item[:id],
98
+ container_id: item[:container_id],
99
+ outputs: item[:outputs],
100
+ input_opened: false,
101
+ input_closed: false
102
+ }
103
+ container_id_to_tool_id[state[:container_id]] = state[:id] if state[:container_id]
104
+
105
+ [
106
+ {
107
+ type: :tool_start,
108
+ delta: "",
109
+ id: item[:id],
110
+ name: "code_interpreter_call",
111
+ tool_type: "server_tool_use"
112
+ }
113
+ ]
114
+ else
115
+ []
116
+ end
117
+ end
118
+
119
+ def output_item_done_patches(data)
120
+ item = data[:item] || {}
121
+
122
+ case item[:type]
123
+ when "code_interpreter_call"
124
+ code_interpreter_done_patches(data[:output_index] || 0, item)
125
+ when "message"
126
+ container_file_citation_patches(item)
88
127
  else
89
128
  []
90
129
  end
@@ -101,38 +140,136 @@ module LlmGateway
101
140
  part = data[:part] || {}
102
141
  return [] unless part.empty? || part[:type] == "output_text"
103
142
 
104
- [ { type: :text_end, delta: "" } ]
143
+ citations = container_file_citation_patches(data)
144
+ return citations unless accumulator.active_block_type == :text
145
+
146
+ [ { type: :text_end, delta: "" } ] + citations
105
147
  end
106
148
 
107
- def response_completed_patches(response)
108
- response ||= {}
149
+ def code_interpreter_code_delta_patches(data)
150
+ output_index = data[:output_index] || 0
151
+ state = code_interpreter_state[output_index] ||= {
152
+ id: nil,
153
+ container_id: nil,
154
+ outputs: nil,
155
+ input_opened: false,
156
+ input_closed: false
157
+ }
158
+ delta = escape_json_string_fragment(data[:delta] || "")
159
+ delta = "{\"code\":\"#{delta}" unless state[:input_opened]
160
+ state[:input_opened] = true
161
+
162
+ [ { type: :tool_delta, delta: } ]
163
+ end
164
+
165
+ def code_interpreter_done_patches(output_index, item)
166
+ state = code_interpreter_state[output_index] ||= {}
167
+ state[:id] ||= item[:id]
168
+ state[:container_id] = item[:container_id] if item.key?(:container_id)
169
+ state[:outputs] = item[:outputs] if item.key?(:outputs)
170
+ container_id_to_tool_id[state[:container_id]] = state[:id] if state[:container_id] && state[:id]
171
+ return [] if state[:input_closed]
172
+
173
+ opening = state[:input_opened] ? "" : "{\"code\":\""
174
+ state[:input_opened] = true
175
+ closing = "\"," + JSON.generate(container_id: state[:container_id], outputs: state[:outputs])[1..]
176
+ state[:input_closed] = true
109
177
 
110
178
  [
179
+ { type: :tool_delta, delta: opening + closing },
180
+ { type: :tool_end, delta: "" }
181
+ ]
182
+ end
183
+
184
+ def container_file_citation_patches(data)
185
+ extract_annotations(data).filter_map do |annotation|
186
+ next unless annotation[:type] == "container_file_citation"
187
+
188
+ container_id = annotation[:container_id]
189
+ file_id = annotation[:file_id]
190
+ filename = annotation[:filename]
191
+ tool_id = container_id_to_tool_id[container_id]
192
+ next unless tool_id
193
+
194
+ key = [ tool_id, container_id, file_id, filename ]
195
+ next if emitted_citation_keys[key]
196
+
197
+ emitted_citation_keys[key] = true
111
198
  {
112
- type: accumulator.message_hash.empty? ? :message_start : :message_delta,
113
- delta: {
114
- id: response[:id],
115
- model: response[:model],
116
- role: "assistant",
117
- stop_reason: stop_reason_for(response)
118
- }.compact,
119
- usage_increment: usage_increment(response)
199
+ type: :tool_result_start,
200
+ delta: JSON.generate(container_id:, file_id:, filename:),
201
+ tool_use_id: tool_id,
202
+ name: "container_file_citation_tool_result"
120
203
  }
204
+ end.flat_map { |start| [ start, { type: :tool_result_end, delta: "" } ] }
205
+ end
206
+
207
+ def extract_annotations(data)
208
+ annotations = []
209
+ annotations.concat(Array(data[:annotations]))
210
+ annotations.concat(Array(data.dig(:part, :annotations)))
211
+ annotations.concat(Array(data.dig(:item, :annotations)))
212
+ Array(data.dig(:item, :content)).each do |content_part|
213
+ annotations.concat(Array(content_part[:annotations])) if content_part.is_a?(Hash)
214
+ end
215
+ annotations
216
+ end
217
+
218
+ def escape_json_string_fragment(value)
219
+ JSON.generate(value)[1...-1]
220
+ end
221
+
222
+ def response_completed_patches(response)
223
+ response ||= {}
224
+ patch = {
225
+ type: :message_delta,
226
+ delta: {
227
+ id: response[:id],
228
+ model: response[:model],
229
+ role: "assistant",
230
+ timestamp: timestamp_milliseconds(response[:created_at]),
231
+ stop_reason: stop_reason_for(response)
232
+ }.compact
233
+ }
234
+ patch[:usage] = usage(response) if response.key?(:usage)
235
+
236
+ [
237
+ patch,
238
+ { type: :message_end }
121
239
  ]
122
240
  end
123
241
 
124
- def usage_increment(response)
242
+ def usage(response)
125
243
  usage = response[:usage] || {}
244
+ cache_read = token_count(usage.dig(:input_tokens_details, :cached_tokens))
245
+ cache_write = token_count(
246
+ usage.dig(:input_tokens_details, :cache_write_tokens),
247
+ usage[:cache_write_tokens]
248
+ )
249
+ input_tokens = token_count(usage[:input_tokens])
250
+ input = [ input_tokens - cache_read - cache_write, 0 ].max
251
+ output = token_count(usage[:output_tokens])
126
252
 
127
253
  {
128
- input_tokens: usage[:input_tokens] || 0,
129
- cache_creation_input_tokens: 0,
130
- cache_read_input_tokens: usage.dig(:input_tokens_details, :cached_tokens) || 0,
131
- output_tokens: usage[:output_tokens] || 0,
132
- reasoning_tokens: usage.dig(:output_tokens_details, :reasoning_tokens) || 0
254
+ input:,
255
+ cache_write:,
256
+ cache_read:,
257
+ output:,
258
+ total: input + cache_write + cache_read + output,
259
+ raw: usage
133
260
  }
134
261
  end
135
262
 
263
+ def token_count(*values)
264
+ values.compact.first.to_i
265
+ end
266
+
267
+ def timestamp_milliseconds(unix_seconds)
268
+ return nil if unix_seconds.nil?
269
+
270
+ (unix_seconds.to_f * 1000).to_i
271
+ end
272
+
136
273
  def stop_reason_for(response)
137
274
  output = response[:output] || []
138
275
  last_item = output.last || {}
@@ -141,7 +278,19 @@ module LlmGateway
141
278
  end
142
279
 
143
280
  def tool_seen?
144
- accumulator.blocks.any? { |content_block| content_block && content_block[:type] == "tool_use" }
281
+ accumulator.blocks.any? { |content_block| content_block && [ "tool_use", "server_tool_use" ].include?(content_block[:type]) }
282
+ end
283
+
284
+ def code_interpreter_state
285
+ @code_interpreter_state ||= {}
286
+ end
287
+
288
+ def container_id_to_tool_id
289
+ @container_id_to_tool_id ||= {}
290
+ end
291
+
292
+ def emitted_citation_keys
293
+ @emitted_citation_keys ||= {}
145
294
  end
146
295
  end
147
296
  end
@@ -5,14 +5,21 @@ require_relative "normalized_stream_accumulator"
5
5
  module LlmGateway
6
6
  module Adapters
7
7
  class StreamMapper
8
+ def initialize(provider:, api:)
9
+ @provider = provider
10
+ @api = api
11
+ end
12
+
8
13
  def result
9
- accumulator.result
14
+ accumulator.final_message
10
15
  end
11
16
 
12
17
  private
13
18
 
19
+ attr_reader :provider, :api
20
+
14
21
  def accumulator
15
- @accumulator ||= LlmGateway::Adapters::NormalizedStreamAccumulator.new
22
+ @accumulator ||= LlmGateway::Adapters::NormalizedStreamAccumulator.new(provider:, api:)
16
23
  end
17
24
 
18
25
  def push_patches(patches, &block)