llm_gateway 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/README.md +350 -43
- data/docs/migration_guide_0.6.0.md +386 -0
- data/docs/migration_guide_0.7.0.md +193 -0
- data/lib/llm_gateway/adapters/adapter.rb +8 -11
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +24 -0
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +61 -11
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/input_message_sanitizer.rb +98 -7
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +132 -39
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +40 -16
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +47 -31
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +1 -1
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +173 -24
- data/lib/llm_gateway/adapters/stream_mapper.rb +9 -2
- data/lib/llm_gateway/adapters/structs.rb +140 -55
- data/lib/llm_gateway/agents/event.rb +105 -0
- data/lib/llm_gateway/agents/file_session_manager.rb +100 -0
- data/lib/llm_gateway/agents/harness.rb +176 -0
- data/lib/llm_gateway/agents/in_memory_session_manager.rb +222 -0
- data/lib/llm_gateway/agents/tools/bash_tool.rb +132 -0
- data/lib/llm_gateway/agents/tools/edit_tool.rb +215 -0
- data/lib/llm_gateway/agents/tools/read_tool.rb +143 -0
- data/lib/llm_gateway/agents/tools/tool_utils.rb +164 -0
- data/lib/llm_gateway/agents/tools/write_tool.rb +34 -0
- data/lib/llm_gateway/base_client.rb +5 -7
- data/lib/llm_gateway/clients/anthropic.rb +10 -9
- data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +2 -2
- data/lib/llm_gateway/clients/groq.rb +8 -6
- data/lib/llm_gateway/clients/openai.rb +22 -20
- data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +4 -4
- data/lib/llm_gateway/prompt.rb +107 -52
- data/lib/llm_gateway/utils.rb +116 -13
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +7 -21
- metadata +13 -2
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
3
5
|
require_relative "../stream_mapper"
|
|
4
6
|
|
|
5
7
|
module LlmGateway
|
|
@@ -14,25 +16,36 @@ module LlmGateway
|
|
|
14
16
|
model: chunk.dig(:data, :message, :model),
|
|
15
17
|
role: chunk.dig(:data, :message, :role)
|
|
16
18
|
}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
accumulator.push({ type: :message_start, usage_increment:, delta: }, &block)
|
|
19
|
+
accumulator.push({ type: :message_start, delta: }, &block)
|
|
20
20
|
when "content_block_start"
|
|
21
21
|
content_block = chunk.dig(:data, :content_block) || {}
|
|
22
|
-
@current_content_block_type = content_block[:type]
|
|
22
|
+
@current_content_block_type = normalize_content_block_type(content_block[:type])
|
|
23
23
|
|
|
24
24
|
case @current_content_block_type
|
|
25
25
|
when "thinking"
|
|
26
26
|
accumulator.push({ type: :reasoning_start, delta: content_block[:thinking], signature: "" }, &block)
|
|
27
27
|
when "text"
|
|
28
28
|
accumulator.push({ type: :text_start, delta: content_block[:text] }, &block)
|
|
29
|
-
when "tool_use"
|
|
29
|
+
when "tool_use", "server_tool_use"
|
|
30
30
|
accumulator.push(
|
|
31
31
|
{
|
|
32
32
|
type: :tool_start,
|
|
33
33
|
delta: "",
|
|
34
34
|
id: content_block[:id],
|
|
35
|
-
name: content_block[:name]
|
|
35
|
+
name: content_block[:name],
|
|
36
|
+
tool_type: @current_content_block_type
|
|
37
|
+
},
|
|
38
|
+
&block
|
|
39
|
+
)
|
|
40
|
+
when "server_tool_result"
|
|
41
|
+
content = content_block[:content]
|
|
42
|
+
result_delta = content.nil? ? "" : JSON.generate(content)
|
|
43
|
+
accumulator.push(
|
|
44
|
+
{
|
|
45
|
+
type: :tool_result_start,
|
|
46
|
+
delta: result_delta,
|
|
47
|
+
tool_use_id: content_block[:tool_use_id],
|
|
48
|
+
name: content_block[:type]
|
|
36
49
|
},
|
|
37
50
|
&block
|
|
38
51
|
)
|
|
@@ -46,9 +59,13 @@ module LlmGateway
|
|
|
46
59
|
when "text"
|
|
47
60
|
delta = chunk.dig(:data, :delta, :text)
|
|
48
61
|
accumulator.push({ type: :text_delta, delta: }, &block)
|
|
49
|
-
when "tool_use"
|
|
62
|
+
when "tool_use", "server_tool_use"
|
|
50
63
|
delta = chunk.dig(:data, :delta, :partial_json)
|
|
51
64
|
accumulator.push({ type: :tool_delta, delta: }, &block)
|
|
65
|
+
when "server_tool_result"
|
|
66
|
+
content = chunk.dig(:data, :delta, :content)
|
|
67
|
+
result_delta = content.nil? ? "" : JSON.generate(content)
|
|
68
|
+
accumulator.push({ type: :tool_result_delta, delta: result_delta }, &block)
|
|
52
69
|
end
|
|
53
70
|
when "content_block_stop"
|
|
54
71
|
case @current_content_block_type
|
|
@@ -56,16 +73,21 @@ module LlmGateway
|
|
|
56
73
|
accumulator.push({ type: :reasoning_end, delta: "", signature: "" }, &block)
|
|
57
74
|
when "text"
|
|
58
75
|
accumulator.push({ type: :text_end, delta: "" }, &block)
|
|
59
|
-
when "tool_use"
|
|
76
|
+
when "tool_use", "server_tool_use"
|
|
60
77
|
accumulator.push({ type: :tool_end, delta: "" }, &block)
|
|
78
|
+
when "server_tool_result"
|
|
79
|
+
accumulator.push({ type: :tool_result_end, delta: "" }, &block)
|
|
61
80
|
end
|
|
62
81
|
@current_content_block_type = nil
|
|
63
82
|
when "message_delta"
|
|
64
|
-
|
|
65
|
-
|
|
83
|
+
data = chunk[:data] || {}
|
|
84
|
+
delta = normalize_message_delta(data[:delta] || {})
|
|
85
|
+
patch = { type: :message_delta, delta: }
|
|
86
|
+
patch[:usage] = normalized_usage(data[:usage]) if data.key?(:usage)
|
|
66
87
|
|
|
67
|
-
accumulator.push(
|
|
88
|
+
accumulator.push(patch, &block)
|
|
68
89
|
when "message_stop"
|
|
90
|
+
|
|
69
91
|
accumulator.push({ type: :message_end }, &block)
|
|
70
92
|
when "ping"
|
|
71
93
|
nil
|
|
@@ -76,6 +98,34 @@ module LlmGateway
|
|
|
76
98
|
|
|
77
99
|
private
|
|
78
100
|
|
|
101
|
+
def normalized_usage(usage)
|
|
102
|
+
usage = usage.to_h.symbolize_keys
|
|
103
|
+
|
|
104
|
+
input = token_count(usage[:input_tokens])
|
|
105
|
+
cache_write = token_count(usage[:cache_creation_input_tokens])
|
|
106
|
+
cache_read = token_count(usage[:cache_read_input_tokens])
|
|
107
|
+
output = token_count(usage[:output_tokens])
|
|
108
|
+
|
|
109
|
+
{
|
|
110
|
+
input:,
|
|
111
|
+
cache_write:,
|
|
112
|
+
cache_read:,
|
|
113
|
+
output:,
|
|
114
|
+
total: input + cache_write + cache_read + output,
|
|
115
|
+
raw: usage
|
|
116
|
+
}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def token_count(value)
|
|
120
|
+
value.to_i
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def normalize_content_block_type(type)
|
|
124
|
+
return type unless type&.end_with?("_tool_result")
|
|
125
|
+
|
|
126
|
+
"server_tool_result"
|
|
127
|
+
end
|
|
128
|
+
|
|
79
129
|
def normalize_message_delta(delta)
|
|
80
130
|
return delta unless delta[:stop_reason] || delta["stop_reason"]
|
|
81
131
|
|
|
@@ -50,7 +50,7 @@ module LlmGateway
|
|
|
50
50
|
module_function
|
|
51
51
|
|
|
52
52
|
def map(options)
|
|
53
|
-
mapped_options = options.
|
|
53
|
+
mapped_options = options.except(*MANAGED_OPTIONS)
|
|
54
54
|
mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS
|
|
55
55
|
|
|
56
56
|
response_format = options[:response_format]
|
|
@@ -68,7 +68,7 @@ module LlmGateway
|
|
|
68
68
|
module_function
|
|
69
69
|
|
|
70
70
|
def map(options)
|
|
71
|
-
mapped_options = options.
|
|
71
|
+
mapped_options = options.except(*MANAGED_OPTIONS)
|
|
72
72
|
mapped_options[:temperature] = options.key?(:temperature) ? options[:temperature] : DEFAULT_TEMPERATURE
|
|
73
73
|
mapped_options[:max_completion_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_COMPLETION_TOKENS
|
|
74
74
|
mapped_options[:response_format] = normalize_response_format(options[:response_format] || "text")
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
3
5
|
module LlmGateway
|
|
4
6
|
module Adapters
|
|
5
7
|
class InputMessageSanitizer
|
|
6
8
|
def self.sanitize(messages, target_provider:, target_api:, target_model:)
|
|
7
9
|
return messages unless messages.is_a?(Array)
|
|
8
10
|
|
|
9
|
-
messages.map do |message|
|
|
11
|
+
sanitized = messages.map do |message|
|
|
10
12
|
sanitize_message(
|
|
11
13
|
message,
|
|
12
14
|
target_provider: target_provider,
|
|
@@ -14,6 +16,8 @@ module LlmGateway
|
|
|
14
16
|
target_model: target_model
|
|
15
17
|
)
|
|
16
18
|
end
|
|
19
|
+
|
|
20
|
+
relocate_assistant_tool_results(sanitized)
|
|
17
21
|
end
|
|
18
22
|
|
|
19
23
|
def self.sanitize_message(message, target_provider:, target_api:, target_model:)
|
|
@@ -25,9 +29,14 @@ module LlmGateway
|
|
|
25
29
|
return message unless message_metadata_present?(message)
|
|
26
30
|
|
|
27
31
|
same_model_replay = same_model_replay?(message, target_provider:, target_api:, target_model:)
|
|
32
|
+
same_provider_api_replay = same_provider_api_replay?(message, target_provider:, target_api:)
|
|
28
33
|
|
|
29
34
|
sanitized_content = content.each_with_object([]) do |block, acc|
|
|
30
|
-
sanitized = sanitize_content_block(
|
|
35
|
+
sanitized = sanitize_content_block(
|
|
36
|
+
block,
|
|
37
|
+
same_model_replay: same_model_replay,
|
|
38
|
+
same_provider_api_replay: same_provider_api_replay
|
|
39
|
+
)
|
|
31
40
|
next if sanitized.nil?
|
|
32
41
|
|
|
33
42
|
if sanitized.is_a?(Array)
|
|
@@ -40,19 +49,91 @@ module LlmGateway
|
|
|
40
49
|
message.merge(content: sanitized_content)
|
|
41
50
|
end
|
|
42
51
|
|
|
43
|
-
def self.sanitize_content_block(block, same_model_replay:)
|
|
52
|
+
def self.sanitize_content_block(block, same_model_replay:, same_provider_api_replay:)
|
|
44
53
|
return block unless block.is_a?(Hash)
|
|
45
54
|
|
|
46
55
|
type = block[:type] || block["type"]
|
|
56
|
+
|
|
57
|
+
if type == "server_tool_use"
|
|
58
|
+
return normalize_server_tool_use_for_replay(block) if same_provider_api_replay
|
|
59
|
+
|
|
60
|
+
return convert_server_tool_use_to_tool_use(block)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
if type == "server_tool_result"
|
|
64
|
+
return block if same_provider_api_replay
|
|
65
|
+
|
|
66
|
+
return convert_server_tool_result_to_tool_result(block)
|
|
67
|
+
end
|
|
68
|
+
|
|
47
69
|
return block unless %w[thinking reasoning].include?(type)
|
|
48
70
|
return block if same_model_replay
|
|
49
71
|
|
|
50
72
|
text = extract_reasoning_text(block)
|
|
51
|
-
return nil if text.
|
|
73
|
+
return nil if text.blank?
|
|
52
74
|
|
|
53
75
|
{ type: "text", text: text }
|
|
54
76
|
end
|
|
55
77
|
|
|
78
|
+
def self.normalize_server_tool_use_for_replay(block)
|
|
79
|
+
input = block[:input] || block["input"]
|
|
80
|
+
return block unless input.is_a?(Hash)
|
|
81
|
+
|
|
82
|
+
outputs = input[:outputs] || input["outputs"]
|
|
83
|
+
return block unless outputs.is_a?(Hash)
|
|
84
|
+
|
|
85
|
+
normalized_input = input.merge(outputs: outputs.values)
|
|
86
|
+
normalized_input.delete(:outputs) if input.key?("outputs") && !input.key?(:outputs)
|
|
87
|
+
normalized_input["outputs"] = outputs.values if input.key?("outputs")
|
|
88
|
+
|
|
89
|
+
normalized = block.merge(input: normalized_input)
|
|
90
|
+
normalized.delete(:input) if block.key?("input") && !block.key?(:input)
|
|
91
|
+
normalized["input"] = normalized_input if block.key?("input")
|
|
92
|
+
normalized
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def self.convert_server_tool_use_to_tool_use(block)
|
|
96
|
+
converted = block.merge(type: "tool_use")
|
|
97
|
+
converted.delete(:type) if block.key?("type") && !block.key?(:type)
|
|
98
|
+
converted["type"] = "tool_use" if block.key?("type")
|
|
99
|
+
converted
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def self.convert_server_tool_result_to_tool_result(block)
|
|
103
|
+
converted = block.merge(type: "tool_result")
|
|
104
|
+
converted.delete(:type) if block.key?("type") && !block.key?(:type)
|
|
105
|
+
converted["type"] = "tool_result" if block.key?("type")
|
|
106
|
+
|
|
107
|
+
content = converted[:content] || converted["content"]
|
|
108
|
+
if content.is_a?(Hash)
|
|
109
|
+
converted = converted.merge(content: JSON.generate(content))
|
|
110
|
+
converted.delete(:content) if block.key?("content") && !block.key?(:content)
|
|
111
|
+
converted["content"] = JSON.generate(content) if block.key?("content")
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
converted
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def self.relocate_assistant_tool_results(messages)
|
|
118
|
+
messages.flat_map do |message|
|
|
119
|
+
next message unless message.is_a?(Hash)
|
|
120
|
+
|
|
121
|
+
role = message[:role] || message["role"]
|
|
122
|
+
content = message[:content] || message["content"]
|
|
123
|
+
next message unless role == "assistant" && content.is_a?(Array)
|
|
124
|
+
|
|
125
|
+
tool_results, assistant_content = content.partition do |block|
|
|
126
|
+
block.is_a?(Hash) && (block[:type] || block["type"]) == "tool_result"
|
|
127
|
+
end
|
|
128
|
+
next message if tool_results.empty?
|
|
129
|
+
|
|
130
|
+
relocated = []
|
|
131
|
+
relocated << message.merge(content: assistant_content) unless assistant_content.empty?
|
|
132
|
+
relocated << { role: "user", content: tool_results }
|
|
133
|
+
relocated
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
56
137
|
def self.extract_reasoning_text(block)
|
|
57
138
|
return block[:thinking] if block[:thinking].is_a?(String)
|
|
58
139
|
return block[:reasoning] if block[:reasoning].is_a?(String)
|
|
@@ -65,7 +146,7 @@ module LlmGateway
|
|
|
65
146
|
|
|
66
147
|
item[:text] || item[:summary_text] || item[:reasoning]
|
|
67
148
|
end.join("\n")
|
|
68
|
-
return text
|
|
149
|
+
return text if text.present?
|
|
69
150
|
end
|
|
70
151
|
|
|
71
152
|
nil
|
|
@@ -79,15 +160,25 @@ module LlmGateway
|
|
|
79
160
|
provider == target_provider && api == target_api && model == target_model
|
|
80
161
|
end
|
|
81
162
|
|
|
163
|
+
def self.same_provider_api_replay?(message, target_provider:, target_api:)
|
|
164
|
+
provider = message[:provider] || message["provider"]
|
|
165
|
+
api = message[:api] || message["api"]
|
|
166
|
+
|
|
167
|
+
provider == target_provider && api == target_api
|
|
168
|
+
end
|
|
169
|
+
|
|
82
170
|
def self.message_metadata_present?(message)
|
|
83
171
|
provider = message[:provider] || message["provider"]
|
|
84
172
|
api = message[:api] || message["api"]
|
|
85
173
|
model = message[:model] || message["model"]
|
|
86
174
|
|
|
87
|
-
|
|
175
|
+
provider.present? && api.present? && model.present?
|
|
88
176
|
end
|
|
89
177
|
|
|
90
|
-
private_class_method :sanitize_message, :sanitize_content_block, :
|
|
178
|
+
private_class_method :sanitize_message, :sanitize_content_block, :normalize_server_tool_use_for_replay,
|
|
179
|
+
:convert_server_tool_use_to_tool_use, :convert_server_tool_result_to_tool_result,
|
|
180
|
+
:relocate_assistant_tool_results, :extract_reasoning_text, :same_model_replay?,
|
|
181
|
+
:same_provider_api_replay?, :message_metadata_present?
|
|
91
182
|
end
|
|
92
183
|
end
|
|
93
184
|
end
|
|
@@ -22,8 +22,8 @@ module LlmGateway
|
|
|
22
22
|
#
|
|
23
23
|
# Accepted event shapes:
|
|
24
24
|
#
|
|
25
|
-
# { type: :message_start, delta: { id: "...", model: "...", role: "assistant"
|
|
26
|
-
# { type: :message_delta, delta: { stop_reason: "stop" },
|
|
25
|
+
# { type: :message_start, delta: { id: "...", model: "...", role: "assistant", timestamp: 1716650000000 } }
|
|
26
|
+
# { type: :message_delta, delta: { stop_reason: "stop" }, usage: { output: 2 } }
|
|
27
27
|
# { type: :message_end }
|
|
28
28
|
#
|
|
29
29
|
# { type: :text_start, delta: "hi" }
|
|
@@ -34,10 +34,14 @@ module LlmGateway
|
|
|
34
34
|
# { type: :reasoning_delta, delta: "...", signature: "" }
|
|
35
35
|
# { type: :reasoning_end, delta: "", signature: "" }
|
|
36
36
|
#
|
|
37
|
-
# { type: :tool_start, id: "...", name: "tool_name", delta: "" }
|
|
37
|
+
# { type: :tool_start, id: "...", name: "tool_name", tool_type: "tool_use", delta: "" }
|
|
38
38
|
# { type: :tool_delta, delta: "{\"a\":" }
|
|
39
39
|
# { type: :tool_end, delta: "" }
|
|
40
40
|
#
|
|
41
|
+
# { type: :tool_result_start, tool_use_id: "...", name: "server_tool_result", delta: "..." }
|
|
42
|
+
# { type: :tool_result_delta, delta: "..." }
|
|
43
|
+
# { type: :tool_result_end, delta: "" }
|
|
44
|
+
#
|
|
41
45
|
# Mappers do not provide `content_index`. The accumulator assigns the next
|
|
42
46
|
# public content index when a block starts and reuses the active content
|
|
43
47
|
# index for that block's deltas and end event.
|
|
@@ -50,7 +54,16 @@ module LlmGateway
|
|
|
50
54
|
# The accumulator creates the public Assistant* event structs, updates its
|
|
51
55
|
# accumulated message state, then yields the created event to the callback.
|
|
52
56
|
attr_accessor :blocks, :message_hash, :usage_hash
|
|
53
|
-
attr_reader :active_block_type
|
|
57
|
+
attr_reader :active_block_type, :final_message
|
|
58
|
+
|
|
59
|
+
DEFAULT_USAGE = {
|
|
60
|
+
input: 0,
|
|
61
|
+
cache_write: 0,
|
|
62
|
+
cache_read: 0,
|
|
63
|
+
output: 0,
|
|
64
|
+
total: 0,
|
|
65
|
+
raw: {}
|
|
66
|
+
}.freeze
|
|
54
67
|
|
|
55
68
|
BLOCK_EVENT_TRANSITIONS = {
|
|
56
69
|
text_start: { block_type: :text, phase: :start },
|
|
@@ -59,33 +72,40 @@ module LlmGateway
|
|
|
59
72
|
tool_start: { block_type: :tool, phase: :start },
|
|
60
73
|
tool_delta: { block_type: :tool, phase: :delta },
|
|
61
74
|
tool_end: { block_type: :tool, phase: :end },
|
|
75
|
+
tool_result_start: { block_type: :tool_result, phase: :start },
|
|
76
|
+
tool_result_delta: { block_type: :tool_result, phase: :delta },
|
|
77
|
+
tool_result_end: { block_type: :tool_result, phase: :end },
|
|
62
78
|
reasoning_start: { block_type: :reasoning, phase: :start },
|
|
63
79
|
reasoning_delta: { block_type: :reasoning, phase: :delta },
|
|
64
80
|
reasoning_end: { block_type: :reasoning, phase: :end }
|
|
65
81
|
}.freeze
|
|
66
82
|
|
|
67
|
-
def initialize
|
|
83
|
+
def initialize(provider: nil, api: nil)
|
|
84
|
+
@provider = provider
|
|
85
|
+
@api = api
|
|
68
86
|
@message_hash = {}
|
|
69
|
-
@usage_hash =
|
|
70
|
-
input_tokens: 0,
|
|
71
|
-
cache_creation_input_tokens: 0,
|
|
72
|
-
cache_read_input_tokens: 0,
|
|
73
|
-
output_tokens: 0,
|
|
74
|
-
reasoning_tokens: 0
|
|
75
|
-
}
|
|
87
|
+
@usage_hash = default_usage
|
|
76
88
|
@blocks = []
|
|
77
89
|
@next_content_index = 0
|
|
78
90
|
@active_block_type = nil
|
|
79
91
|
@active_content_index = nil
|
|
92
|
+
@timestamp = nil
|
|
80
93
|
end
|
|
81
94
|
|
|
82
95
|
def result
|
|
96
|
+
ensure_timestamp!
|
|
97
|
+
|
|
83
98
|
message_hash.merge(
|
|
99
|
+
timestamp: @timestamp,
|
|
84
100
|
usage: usage_hash,
|
|
85
101
|
content: serialized_blocks
|
|
86
102
|
)
|
|
87
103
|
end
|
|
88
104
|
|
|
105
|
+
def final_result
|
|
106
|
+
result.merge(provider: @provider, api: @api)
|
|
107
|
+
end
|
|
108
|
+
|
|
89
109
|
def active_tool?
|
|
90
110
|
active_block_type == :tool
|
|
91
111
|
end
|
|
@@ -93,14 +113,22 @@ module LlmGateway
|
|
|
93
113
|
def push(event_patch, &block)
|
|
94
114
|
raise ArgumentError, "Normalized stream event patch must be a Hash" unless event_patch.is_a?(Hash)
|
|
95
115
|
|
|
96
|
-
event_patch = symbolize_keys
|
|
116
|
+
event_patch = event_patch.symbolize_keys
|
|
97
117
|
type = event_patch.fetch(:type).to_sym
|
|
98
118
|
event_patch = prepare_event_patch(event_patch.merge(type:), type)
|
|
119
|
+
ensure_timestamp!
|
|
99
120
|
|
|
100
|
-
|
|
121
|
+
if type == :message_end
|
|
122
|
+
@final_message = AssistantMessage.new(final_result)
|
|
123
|
+
block.call(AssistantStreamMessageEndEvent.new(type:, message: final_message)) if block
|
|
124
|
+
return nil
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
event = build_event(event_patch, partial: empty_partial)
|
|
101
128
|
accumulate(event)
|
|
102
129
|
content_index = event.content_index if event.respond_to?(:content_index)
|
|
103
130
|
commit_block_transition(type, content_index)
|
|
131
|
+
event = build_event(event_patch, partial: partial_message)
|
|
104
132
|
block.call(event) if block
|
|
105
133
|
|
|
106
134
|
nil
|
|
@@ -166,16 +194,21 @@ module LlmGateway
|
|
|
166
194
|
end
|
|
167
195
|
end
|
|
168
196
|
|
|
169
|
-
def build_event(event_patch)
|
|
170
|
-
event_patch = symbolize_keys
|
|
197
|
+
def build_event(event_patch, partial:)
|
|
198
|
+
event_patch = event_patch.symbolize_keys
|
|
171
199
|
type = event_patch.fetch(:type).to_sym
|
|
172
200
|
|
|
173
201
|
case type
|
|
174
|
-
when :message_start, :message_delta
|
|
202
|
+
when :message_start, :message_delta
|
|
203
|
+
delta = (event_patch[:delta] || {}).symbolize_keys
|
|
204
|
+
raw_usage = event_patch[:usage] || delta.delete(:usage) || {}
|
|
205
|
+
usage = raw_usage.empty? ? {} : normalized_usage(raw_usage)
|
|
206
|
+
|
|
175
207
|
AssistantStreamMessageEvent.new(
|
|
176
208
|
type:,
|
|
177
|
-
delta
|
|
178
|
-
|
|
209
|
+
delta:,
|
|
210
|
+
usage:,
|
|
211
|
+
partial:
|
|
179
212
|
)
|
|
180
213
|
when :tool_start
|
|
181
214
|
AssistantToolStartEvent.new(
|
|
@@ -183,20 +216,33 @@ module LlmGateway
|
|
|
183
216
|
content_index: event_patch.fetch(:content_index),
|
|
184
217
|
delta: string_value(event_patch[:delta]),
|
|
185
218
|
id: event_patch[:id],
|
|
186
|
-
name: event_patch[:name]
|
|
219
|
+
name: event_patch[:name],
|
|
220
|
+
partial:,
|
|
221
|
+
tool_type: event_patch[:tool_type] || "tool_use"
|
|
222
|
+
)
|
|
223
|
+
when :tool_result_start
|
|
224
|
+
AssistantToolResultStartEvent.new(
|
|
225
|
+
type:,
|
|
226
|
+
content_index: event_patch.fetch(:content_index),
|
|
227
|
+
delta: string_value(event_patch[:delta]),
|
|
228
|
+
tool_use_id: event_patch[:tool_use_id],
|
|
229
|
+
name: event_patch[:name],
|
|
230
|
+
partial:
|
|
187
231
|
)
|
|
188
232
|
when :reasoning_start, :reasoning_delta, :reasoning_end
|
|
189
233
|
AssistantStreamReasoningEvent.new(
|
|
190
234
|
type:,
|
|
191
235
|
content_index: event_patch.fetch(:content_index),
|
|
192
236
|
delta: string_value(event_patch[:delta]),
|
|
193
|
-
signature: string_value(event_patch[:signature])
|
|
237
|
+
signature: string_value(event_patch[:signature]),
|
|
238
|
+
partial:
|
|
194
239
|
)
|
|
195
|
-
when :text_start, :text_delta, :text_end, :tool_delta, :tool_end
|
|
240
|
+
when :text_start, :text_delta, :text_end, :tool_delta, :tool_end, :tool_result_delta, :tool_result_end
|
|
196
241
|
AssistantStreamEvent.new(
|
|
197
242
|
type:,
|
|
198
243
|
content_index: event_patch.fetch(:content_index),
|
|
199
|
-
delta: string_value(event_patch[:delta])
|
|
244
|
+
delta: string_value(event_patch[:delta]),
|
|
245
|
+
partial:
|
|
200
246
|
)
|
|
201
247
|
else
|
|
202
248
|
raise ArgumentError, "Unsupported normalized stream event type: #{type.inspect}"
|
|
@@ -204,6 +250,8 @@ module LlmGateway
|
|
|
204
250
|
end
|
|
205
251
|
|
|
206
252
|
def accumulate(event)
|
|
253
|
+
@timestamp = event.delta[:timestamp] if event.respond_to?(:delta) && event.delta.is_a?(Hash) && event.delta[:timestamp]
|
|
254
|
+
|
|
207
255
|
case event.type
|
|
208
256
|
when :text_start
|
|
209
257
|
blocks[event.content_index] = {
|
|
@@ -215,18 +263,23 @@ module LlmGateway
|
|
|
215
263
|
blocks[event.content_index][:text] += event.delta
|
|
216
264
|
when :tool_start
|
|
217
265
|
blocks[event.content_index] = {
|
|
218
|
-
type:
|
|
266
|
+
type: event.tool_type,
|
|
219
267
|
id: event.id,
|
|
220
268
|
name: event.name,
|
|
221
269
|
input: event.delta.to_s
|
|
222
270
|
}
|
|
223
271
|
when :tool_delta, :tool_end
|
|
224
272
|
blocks[event.content_index][:input] += event.delta
|
|
273
|
+
when :tool_result_start
|
|
274
|
+
blocks[event.content_index] = {
|
|
275
|
+
type: event.name,
|
|
276
|
+
tool_use_id: event.tool_use_id,
|
|
277
|
+
content: event.delta.to_s
|
|
278
|
+
}
|
|
279
|
+
when :tool_result_delta, :tool_result_end
|
|
280
|
+
blocks[event.content_index][:content] += event.delta
|
|
225
281
|
when :message_start
|
|
226
282
|
message_hash.merge!(event.delta)
|
|
227
|
-
usage_hash.each_key do |key|
|
|
228
|
-
usage_hash[key] += event.usage_increment.fetch(key, 0)
|
|
229
|
-
end
|
|
230
283
|
when :reasoning_start
|
|
231
284
|
blocks[event.content_index] = {
|
|
232
285
|
type: "reasoning",
|
|
@@ -240,36 +293,76 @@ module LlmGateway
|
|
|
240
293
|
blocks[event.content_index][:signature] += event.signature
|
|
241
294
|
when :message_delta
|
|
242
295
|
message_hash.merge!(event.delta)
|
|
243
|
-
|
|
244
|
-
usage_hash[key] += event.usage_increment.fetch(key, 0)
|
|
245
|
-
end
|
|
246
|
-
when :message_end
|
|
296
|
+
assign_usage(event.usage) unless event.usage.empty?
|
|
247
297
|
end
|
|
248
298
|
end
|
|
249
299
|
|
|
300
|
+
def empty_partial
|
|
301
|
+
PartialAssistantMessage.new(timestamp: @timestamp)
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def partial_message
|
|
305
|
+
PartialAssistantMessage.new(partial_result)
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def partial_result
|
|
309
|
+
ensure_timestamp!
|
|
310
|
+
|
|
311
|
+
message_hash.merge(
|
|
312
|
+
timestamp: @timestamp,
|
|
313
|
+
content: serialized_blocks
|
|
314
|
+
)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def assign_usage(usage)
|
|
318
|
+
@usage_hash = normalized_usage(usage)
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def normalized_usage(usage)
|
|
322
|
+
usage = default_usage.merge(usage.to_h.symbolize_keys.slice(*DEFAULT_USAGE.keys))
|
|
323
|
+
usage[:total] = usage[:input] + usage[:cache_write] + usage[:cache_read] + usage[:output]
|
|
324
|
+
usage[:raw] ||= {}
|
|
325
|
+
usage
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def default_usage
|
|
329
|
+
DEFAULT_USAGE.merge(raw: {})
|
|
330
|
+
end
|
|
331
|
+
|
|
250
332
|
def serialized_blocks
|
|
251
|
-
blocks.map do |content_block|
|
|
252
|
-
|
|
333
|
+
blocks.compact.map do |content_block|
|
|
334
|
+
if [ "tool_use", "server_tool_use" ].include?(content_block[:type])
|
|
335
|
+
next content_block.merge(input: parse_tool_input(content_block[:input]).deep_symbolize_keys)
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
if content_block[:type]&.end_with?("_tool_result")
|
|
339
|
+
next {
|
|
340
|
+
type: "server_tool_result",
|
|
341
|
+
tool_use_id: content_block[:tool_use_id],
|
|
342
|
+
name: content_block[:type],
|
|
343
|
+
content: parse_tool_input(content_block[:content]).deep_symbolize_keys
|
|
344
|
+
}
|
|
345
|
+
end
|
|
253
346
|
|
|
254
|
-
content_block
|
|
347
|
+
content_block
|
|
255
348
|
end
|
|
256
349
|
end
|
|
257
350
|
|
|
258
351
|
def parse_tool_input(input)
|
|
259
|
-
return {} if input.
|
|
352
|
+
return {} if input.blank?
|
|
260
353
|
|
|
261
354
|
JSON.parse(input)
|
|
262
355
|
rescue JSON::ParserError
|
|
263
356
|
{}
|
|
264
357
|
end
|
|
265
358
|
|
|
266
|
-
def symbolize_keys(hash)
|
|
267
|
-
hash.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
268
|
-
end
|
|
269
|
-
|
|
270
359
|
def string_value(value)
|
|
271
360
|
value.nil? ? "" : value.to_s
|
|
272
361
|
end
|
|
362
|
+
|
|
363
|
+
def ensure_timestamp!
|
|
364
|
+
@timestamp ||= (Time.now.to_f * 1000).to_i
|
|
365
|
+
end
|
|
273
366
|
end
|
|
274
367
|
end
|
|
275
368
|
end
|
|
@@ -66,7 +66,7 @@ module LlmGateway
|
|
|
66
66
|
module_function
|
|
67
67
|
|
|
68
68
|
def map(options)
|
|
69
|
-
mapped_options = options.
|
|
69
|
+
mapped_options = options.except(*MANAGED_OPTIONS)
|
|
70
70
|
mapped_options[:max_completion_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_COMPLETION_TOKENS
|
|
71
71
|
|
|
72
72
|
cache_key = options[:cache_key]
|