llm_gateway 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.pi/skills/live-provider-testing/SKILL.md +183 -0
- data/.pi/skills/options-development/SKILL.md +131 -0
- data/CHANGELOG.md +43 -0
- data/README.md +110 -41
- data/Rakefile +1 -0
- data/docs/migration_guide_0.6.0.md +386 -0
- data/lib/llm_gateway/adapters/adapter.rb +8 -44
- data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
- data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +59 -47
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
- data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
- data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +336 -0
- data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
- data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
- data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
- data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +193 -170
- data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +106 -275
- data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
- data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
- data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
- data/lib/llm_gateway/adapters/stream_mapper.rb +57 -0
- data/lib/llm_gateway/adapters/structs.rb +102 -52
- data/lib/llm_gateway/base_client.rb +2 -4
- data/lib/llm_gateway/client.rb +10 -66
- data/lib/llm_gateway/clients/anthropic.rb +5 -4
- data/lib/llm_gateway/clients/groq.rb +18 -4
- data/lib/llm_gateway/clients/openai.rb +20 -18
- data/lib/llm_gateway/prompt.rb +35 -17
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +5 -29
- metadata +8 -10
- data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
- data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
- data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
- data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
- data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
- data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
- data/scripts/generate_handoff_live_fixture.rb +0 -169
- data/scripts/generate_handoff_media_fixture.rb +0 -167
|
@@ -5,25 +5,115 @@ module LlmGateway
|
|
|
5
5
|
module OpenAI
|
|
6
6
|
module ChatCompletions
|
|
7
7
|
module OptionMapper
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
DEFAULT_MAX_COMPLETION_TOKENS = 20_480
|
|
10
9
|
VALID_REASONING_LEVELS = %w[low medium high xhigh].freeze
|
|
11
10
|
|
|
11
|
+
# Source: https://developers.openai.com/api/reference/resources/chat/subresources/completions/methods/create/index.md
|
|
12
|
+
# API: OpenAI Chat Completions Create; accessed 2026-05-18.
|
|
13
|
+
# Body parameters listed by the API reference: messages, model, audio,
|
|
14
|
+
# frequency_penalty, function_call, functions, logit_bias, logprobs,
|
|
15
|
+
# max_completion_tokens, max_tokens, metadata, modalities, n,
|
|
16
|
+
# parallel_tool_calls, prediction, presence_penalty, prompt_cache_key,
|
|
17
|
+
# prompt_cache_retention, reasoning_effort, response_format,
|
|
18
|
+
# safety_identifier, seed, service_tier, stop, store, stream,
|
|
19
|
+
# stream_options, temperature, tool_choice, tools, top_logprobs, top_p,
|
|
20
|
+
# user, verbosity, web_search_options.
|
|
21
|
+
# This mapper intentionally excludes transcript/tool structural fields
|
|
22
|
+
# (messages, tools) from option handling.
|
|
23
|
+
|
|
24
|
+
VALID_OPTIONS = %i[
|
|
25
|
+
model
|
|
26
|
+
audio
|
|
27
|
+
frequency_penalty
|
|
28
|
+
function_call
|
|
29
|
+
functions
|
|
30
|
+
logit_bias
|
|
31
|
+
logprobs
|
|
32
|
+
max_completion_tokens
|
|
33
|
+
max_tokens
|
|
34
|
+
metadata
|
|
35
|
+
modalities
|
|
36
|
+
n
|
|
37
|
+
parallel_tool_calls
|
|
38
|
+
prediction
|
|
39
|
+
presence_penalty
|
|
40
|
+
prompt_cache_key
|
|
41
|
+
prompt_cache_retention
|
|
42
|
+
reasoning_effort
|
|
43
|
+
response_format
|
|
44
|
+
safety_identifier
|
|
45
|
+
seed
|
|
46
|
+
service_tier
|
|
47
|
+
stop
|
|
48
|
+
store
|
|
49
|
+
stream
|
|
50
|
+
stream_options
|
|
51
|
+
temperature
|
|
52
|
+
tool_choice
|
|
53
|
+
top_logprobs
|
|
54
|
+
top_p
|
|
55
|
+
user
|
|
56
|
+
verbosity
|
|
57
|
+
web_search_options
|
|
58
|
+
].freeze
|
|
59
|
+
|
|
60
|
+
MANAGED_OPTIONS = %i[
|
|
61
|
+
reasoning
|
|
62
|
+
cache_key
|
|
63
|
+
cache_retention
|
|
64
|
+
].freeze
|
|
65
|
+
|
|
12
66
|
module_function
|
|
13
67
|
|
|
14
68
|
def map(options)
|
|
15
|
-
mapped_options = options.
|
|
16
|
-
mapped_options[:max_completion_tokens]
|
|
69
|
+
mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
|
|
70
|
+
mapped_options[:max_completion_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_COMPLETION_TOKENS
|
|
71
|
+
|
|
72
|
+
cache_key = options[:cache_key]
|
|
73
|
+
mapped_options[:prompt_cache_key] = cache_key unless cache_key.nil?
|
|
74
|
+
|
|
75
|
+
cache_retention = options[:cache_retention]
|
|
76
|
+
mapped_options[:prompt_cache_retention] = normalize_cache_retention(cache_retention) \
|
|
77
|
+
unless cache_retention.nil?
|
|
17
78
|
|
|
18
|
-
|
|
19
|
-
|
|
79
|
+
if mapped_options[:prompt_cache_key] && !mapped_options[:prompt_cache_retention]
|
|
80
|
+
mapped_options[:prompt_cache_retention] = normalize_cache_retention("short")
|
|
81
|
+
end
|
|
20
82
|
|
|
21
|
-
|
|
83
|
+
if cache_retention.to_s == "none"
|
|
84
|
+
mapped_options.delete(:prompt_cache_key)
|
|
85
|
+
mapped_options.delete(:prompt_cache_retention)
|
|
86
|
+
end
|
|
22
87
|
|
|
23
|
-
reasoning =
|
|
24
|
-
|
|
88
|
+
reasoning = options[:reasoning]
|
|
89
|
+
mapped_options[:reasoning_effort] = normalize_reasoning_effort(reasoning) \
|
|
90
|
+
unless reasoning.nil? || reasoning.to_s == "none"
|
|
91
|
+
|
|
92
|
+
validate_options!(mapped_options)
|
|
93
|
+
mapped_options
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def validate_options!(mapped_options)
|
|
97
|
+
unknown_options = mapped_options.keys - VALID_OPTIONS
|
|
98
|
+
return if unknown_options.empty?
|
|
99
|
+
|
|
100
|
+
raise ArgumentError,
|
|
101
|
+
"Unknown OpenAI Chat Completions options: #{unknown_options.join(', ')}. " \
|
|
102
|
+
"Valid options: #{VALID_OPTIONS.join(', ')}."
|
|
103
|
+
end
|
|
25
104
|
|
|
26
|
-
|
|
105
|
+
def normalize_cache_retention(cache_retention)
|
|
106
|
+
case cache_retention.to_s
|
|
107
|
+
when "short"
|
|
108
|
+
"in_memory"
|
|
109
|
+
when "long"
|
|
110
|
+
"24h"
|
|
111
|
+
when "none"
|
|
112
|
+
nil
|
|
113
|
+
else
|
|
114
|
+
raise ArgumentError,
|
|
115
|
+
"Invalid cache_retention '#{cache_retention}'. Use 'short', 'long', or 'none'."
|
|
116
|
+
end
|
|
27
117
|
end
|
|
28
118
|
|
|
29
119
|
def normalize_reasoning_effort(reasoning)
|
|
@@ -1,188 +1,255 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../../
|
|
3
|
+
require_relative "../../stream_mapper"
|
|
4
4
|
|
|
5
5
|
module LlmGateway
|
|
6
6
|
module Adapters
|
|
7
7
|
module OpenAI
|
|
8
8
|
module ChatCompletions
|
|
9
|
-
class StreamMapper
|
|
10
|
-
def map(chunk)
|
|
11
|
-
queued_event = shift_queued_event
|
|
12
|
-
return queued_event if queued_event
|
|
13
|
-
|
|
9
|
+
class StreamMapper < LlmGateway::Adapters::StreamMapper
|
|
10
|
+
def map(chunk, &block)
|
|
14
11
|
data = chunk[:data] || {}
|
|
15
12
|
raise_stream_error!(data) if chunk[:event] == "error" || data[:error] || data[:type] == "error"
|
|
16
13
|
|
|
17
|
-
|
|
14
|
+
push_patches(patches_for(data), &block)
|
|
15
|
+
end
|
|
18
16
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
end
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def patches_for(data)
|
|
20
|
+
choices = data[:choices] || []
|
|
21
|
+
return final_usage_patches(data) if choices.empty?
|
|
25
22
|
|
|
26
23
|
choice = choices.first || {}
|
|
27
24
|
delta = choice[:delta] || {}
|
|
28
|
-
|
|
25
|
+
patches = []
|
|
26
|
+
active_block_type = accumulator.active_block_type
|
|
27
|
+
active_tool = active_tool_block
|
|
28
|
+
|
|
29
|
+
append_patches(patches, message_start_patches(data, delta))
|
|
30
|
+
|
|
31
|
+
active_block_type, active_tool = append_patches(
|
|
32
|
+
patches,
|
|
33
|
+
reasoning_patches(delta[:reasoning], active_block_type:),
|
|
34
|
+
active_block_type,
|
|
35
|
+
active_tool
|
|
36
|
+
)
|
|
37
|
+
active_block_type, active_tool = append_patches(
|
|
38
|
+
patches,
|
|
39
|
+
text_patches(delta[:content], active_block_type:),
|
|
40
|
+
active_block_type,
|
|
41
|
+
active_tool
|
|
42
|
+
)
|
|
43
|
+
delta.fetch(:tool_calls, []).each do |tool_call|
|
|
44
|
+
active_block_type, active_tool = append_patches(
|
|
45
|
+
patches,
|
|
46
|
+
patches_for_tool_call(tool_call, active_block_type:, active_tool:),
|
|
47
|
+
active_block_type,
|
|
48
|
+
active_tool
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
append_patches(patches, finish_patches(choice[:finish_reason], active_block_type:))
|
|
29
52
|
|
|
30
|
-
|
|
31
|
-
|
|
53
|
+
patches
|
|
54
|
+
end
|
|
32
55
|
|
|
33
|
-
|
|
56
|
+
def append_patches(patches, new_patches, active_block_type = nil, active_tool = nil)
|
|
57
|
+
patches.concat(new_patches)
|
|
58
|
+
|
|
59
|
+
new_patches.each do |patch|
|
|
60
|
+
case patch[:type]
|
|
61
|
+
when :text_start
|
|
62
|
+
active_block_type = :text
|
|
63
|
+
active_tool = nil
|
|
64
|
+
when :reasoning_start
|
|
65
|
+
active_block_type = :reasoning
|
|
66
|
+
active_tool = nil
|
|
67
|
+
when :tool_start
|
|
68
|
+
active_block_type = :tool
|
|
69
|
+
active_tool = { id: patch[:id], name: patch[:name] }
|
|
70
|
+
when :text_end, :reasoning_end, :tool_end
|
|
71
|
+
active_block_type = nil
|
|
72
|
+
active_tool = nil
|
|
73
|
+
end
|
|
74
|
+
end
|
|
34
75
|
|
|
35
|
-
|
|
76
|
+
[ active_block_type, active_tool ]
|
|
36
77
|
end
|
|
37
78
|
|
|
38
|
-
|
|
79
|
+
def message_start_patches(data, delta)
|
|
80
|
+
return [] unless accumulator.message_hash.empty?
|
|
39
81
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
82
|
+
return [] unless delta.key?(:role) ||
|
|
83
|
+
data[:id] ||
|
|
84
|
+
data[:model] ||
|
|
85
|
+
delta[:content] ||
|
|
86
|
+
delta[:reasoning] ||
|
|
87
|
+
delta[:tool_calls]&.any?
|
|
46
88
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
return AssistantStreamMessageEvent.new(
|
|
89
|
+
[
|
|
90
|
+
{
|
|
50
91
|
type: :message_start,
|
|
51
92
|
delta: {
|
|
52
93
|
id: data[:id],
|
|
53
94
|
model: data[:model],
|
|
54
|
-
role: delta[:role]
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
if (content = delta[:content]) && !content.empty?
|
|
61
|
-
return text_event(content, choice[:index] || 0)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
return tool_event(delta[:tool_calls].first) if delta[:tool_calls]&.any?
|
|
95
|
+
role: delta[:role] || "assistant",
|
|
96
|
+
timestamp: timestamp_milliseconds(data[:created])
|
|
97
|
+
}.compact
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
end
|
|
65
101
|
|
|
66
|
-
|
|
102
|
+
# Groq exposes OpenAI-compatible chat completion chunks, but may include
|
|
103
|
+
# `delta.reasoning` before normal `delta.content`.
|
|
104
|
+
def reasoning_patches(reasoning, active_block_type: accumulator.active_block_type)
|
|
105
|
+
return [] if reasoning.to_s.empty?
|
|
106
|
+
|
|
107
|
+
[
|
|
108
|
+
*close_active_non_reasoning_patches(active_block_type:),
|
|
109
|
+
{
|
|
110
|
+
type: active_block_type == :reasoning ? :reasoning_delta : :reasoning_start,
|
|
111
|
+
delta: reasoning,
|
|
112
|
+
signature: ""
|
|
113
|
+
}
|
|
114
|
+
]
|
|
67
115
|
end
|
|
68
116
|
|
|
69
|
-
def
|
|
70
|
-
|
|
71
|
-
stash_pending_finish_delta(stop_reason: normalized)
|
|
117
|
+
def text_patches(content, active_block_type: accumulator.active_block_type)
|
|
118
|
+
return [] if content.to_s.empty?
|
|
72
119
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
120
|
+
[
|
|
121
|
+
*close_active_non_text_patches(active_block_type:),
|
|
122
|
+
{
|
|
123
|
+
type: active_block_type == :text ? :text_delta : :text_start,
|
|
124
|
+
delta: content
|
|
125
|
+
}
|
|
126
|
+
]
|
|
79
127
|
end
|
|
80
128
|
|
|
81
|
-
def
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
129
|
+
def patches_for_tool_call(tool_call, active_block_type: accumulator.active_block_type, active_tool: active_tool_block)
|
|
130
|
+
id = tool_call[:id]
|
|
131
|
+
name = tool_call.dig(:function, :name)
|
|
132
|
+
arguments = tool_call.dig(:function, :arguments).to_s
|
|
133
|
+
|
|
134
|
+
patches = []
|
|
135
|
+
|
|
136
|
+
if id || name
|
|
137
|
+
if active_block_type == :tool
|
|
138
|
+
patches.concat(close_active_block_patches(active_block_type:)) if new_active_tool?(id, name, active_tool:)
|
|
139
|
+
else
|
|
140
|
+
patches.concat(close_active_non_tool_patches(active_block_type:))
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
unless active_block_type == :tool && patches.empty?
|
|
144
|
+
patches << {
|
|
145
|
+
type: :tool_start,
|
|
146
|
+
delta: "",
|
|
147
|
+
id: id,
|
|
148
|
+
name: name
|
|
149
|
+
}
|
|
150
|
+
end
|
|
89
151
|
end
|
|
90
|
-
end
|
|
91
152
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
{
|
|
96
|
-
input_tokens: usage[:prompt_tokens] || 0,
|
|
97
|
-
cache_creation_input_tokens: 0,
|
|
98
|
-
cache_read_input_tokens: usage.dig(:prompt_tokens_details, :cached_tokens) || 0,
|
|
99
|
-
output_tokens: usage[:completion_tokens] || 0,
|
|
100
|
-
reasoning_tokens: usage.dig(:completion_tokens_details, :reasoning_tokens) || 0
|
|
101
|
-
}
|
|
153
|
+
patches << { type: :tool_delta, delta: arguments } unless arguments.empty?
|
|
154
|
+
patches
|
|
102
155
|
end
|
|
103
156
|
|
|
104
|
-
def
|
|
105
|
-
|
|
157
|
+
def new_active_tool?(id, name, active_tool: active_tool_block)
|
|
158
|
+
return true unless active_tool
|
|
106
159
|
|
|
107
|
-
|
|
108
|
-
AssistantStreamEvent.new(type: :text_delta, content_index:, delta: content)
|
|
109
|
-
else
|
|
110
|
-
started_text_blocks << content_index
|
|
111
|
-
AssistantStreamEvent.new(type: :text_start, content_index:, delta: content)
|
|
112
|
-
end
|
|
160
|
+
(id && active_tool[:id] != id) || (name && active_tool[:name] != name)
|
|
113
161
|
end
|
|
114
162
|
|
|
115
|
-
def
|
|
116
|
-
|
|
117
|
-
@last_started_tool_index = tool_index
|
|
118
|
-
function = tool_call[:function] || {}
|
|
119
|
-
arguments = function[:arguments] || ""
|
|
120
|
-
|
|
121
|
-
unless started_tool_blocks.include?(tool_index)
|
|
122
|
-
pending_tool_calls[tool_index] = merge_tool_call(pending_tool_calls[tool_index], tool_call)
|
|
123
|
-
pending = pending_tool_calls[tool_index]
|
|
163
|
+
def active_tool_block
|
|
164
|
+
return nil unless accumulator.active_tool?
|
|
124
165
|
|
|
125
|
-
|
|
166
|
+
accumulator.blocks.reverse.find { |block| block&.fetch(:type, nil) == "tool_use" }
|
|
167
|
+
end
|
|
126
168
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
169
|
+
def close_active_block_patches(active_block_type: accumulator.active_block_type)
|
|
170
|
+
case active_block_type
|
|
171
|
+
when :text
|
|
172
|
+
[ { type: :text_end, delta: "" } ]
|
|
173
|
+
when :reasoning
|
|
174
|
+
[ { type: :reasoning_end, delta: "", signature: "" } ]
|
|
175
|
+
when :tool
|
|
176
|
+
[ { type: :tool_end, delta: "" } ]
|
|
177
|
+
else
|
|
178
|
+
[]
|
|
135
179
|
end
|
|
136
|
-
|
|
137
|
-
AssistantStreamEvent.new(type: :tool_delta, content_index: tool_index, delta: arguments)
|
|
138
180
|
end
|
|
139
181
|
|
|
140
|
-
def
|
|
141
|
-
|
|
142
|
-
id: data[:id],
|
|
143
|
-
model: data[:model],
|
|
144
|
-
role: delta[:role]
|
|
145
|
-
}.compact
|
|
182
|
+
def close_active_non_text_patches(active_block_type: accumulator.active_block_type)
|
|
183
|
+
active_block_type == :text ? [] : close_active_block_patches(active_block_type:)
|
|
146
184
|
end
|
|
147
185
|
|
|
148
|
-
def
|
|
149
|
-
|
|
186
|
+
def close_active_non_reasoning_patches(active_block_type: accumulator.active_block_type)
|
|
187
|
+
active_block_type == :reasoning ? [] : close_active_block_patches(active_block_type:)
|
|
150
188
|
end
|
|
151
189
|
|
|
152
|
-
def
|
|
153
|
-
|
|
190
|
+
def close_active_non_tool_patches(active_block_type: accumulator.active_block_type)
|
|
191
|
+
active_block_type == :tool ? [] : close_active_block_patches(active_block_type:)
|
|
154
192
|
end
|
|
155
193
|
|
|
156
|
-
def
|
|
157
|
-
|
|
158
|
-
end
|
|
194
|
+
def finish_patches(finish_reason, active_block_type: accumulator.active_block_type)
|
|
195
|
+
return [] unless finish_reason
|
|
159
196
|
|
|
160
|
-
|
|
161
|
-
|
|
197
|
+
[
|
|
198
|
+
*close_active_block_patches(active_block_type:),
|
|
199
|
+
{
|
|
200
|
+
type: :message_delta,
|
|
201
|
+
delta: { stop_reason: normalize_stop_reason(finish_reason) }
|
|
202
|
+
}
|
|
203
|
+
]
|
|
162
204
|
end
|
|
163
205
|
|
|
164
|
-
def
|
|
165
|
-
|
|
166
|
-
|
|
206
|
+
def final_usage_patches(data)
|
|
207
|
+
patch = {
|
|
208
|
+
type: :message_delta,
|
|
209
|
+
delta: {}
|
|
210
|
+
}
|
|
211
|
+
patch[:usage] = usage(data) if data.key?(:usage)
|
|
167
212
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
213
|
+
[
|
|
214
|
+
patch,
|
|
215
|
+
{ type: :message_end }
|
|
216
|
+
]
|
|
217
|
+
end
|
|
171
218
|
|
|
172
|
-
|
|
173
|
-
|
|
219
|
+
def usage(data)
|
|
220
|
+
usage = data[:usage] || {}
|
|
221
|
+
cache_read = token_count(
|
|
222
|
+
usage.dig(:prompt_tokens_details, :cached_tokens),
|
|
223
|
+
usage[:prompt_cache_hit_tokens]
|
|
224
|
+
)
|
|
225
|
+
cache_write = token_count(
|
|
226
|
+
usage.dig(:prompt_tokens_details, :cache_write_tokens),
|
|
227
|
+
usage[:cache_write_tokens]
|
|
228
|
+
)
|
|
229
|
+
prompt_tokens = token_count(usage[:prompt_tokens])
|
|
230
|
+
input = [ prompt_tokens - cache_read - cache_write, 0 ].max
|
|
231
|
+
output = token_count(usage[:completion_tokens])
|
|
174
232
|
|
|
175
233
|
{
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
}
|
|
234
|
+
input:,
|
|
235
|
+
cache_write:,
|
|
236
|
+
cache_read:,
|
|
237
|
+
output:,
|
|
238
|
+
total: input + cache_write + cache_read + output,
|
|
239
|
+
raw: usage
|
|
183
240
|
}
|
|
184
241
|
end
|
|
185
242
|
|
|
243
|
+
def token_count(*values)
|
|
244
|
+
values.compact.first.to_i
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def timestamp_milliseconds(unix_seconds)
|
|
248
|
+
return nil if unix_seconds.nil?
|
|
249
|
+
|
|
250
|
+
(unix_seconds.to_f * 1000).to_i
|
|
251
|
+
end
|
|
252
|
+
|
|
186
253
|
def normalize_stop_reason(finish_reason)
|
|
187
254
|
case finish_reason
|
|
188
255
|
when "tool_calls"
|
|
@@ -191,50 +258,6 @@ module LlmGateway
|
|
|
191
258
|
finish_reason
|
|
192
259
|
end
|
|
193
260
|
end
|
|
194
|
-
|
|
195
|
-
def message_started?
|
|
196
|
-
@message_started ||= false
|
|
197
|
-
end
|
|
198
|
-
|
|
199
|
-
def started_text_blocks
|
|
200
|
-
@started_text_blocks ||= []
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
def started_tool_blocks
|
|
204
|
-
@started_tool_blocks ||= []
|
|
205
|
-
end
|
|
206
|
-
|
|
207
|
-
def pending_tool_calls
|
|
208
|
-
@pending_tool_calls ||= {}
|
|
209
|
-
end
|
|
210
|
-
|
|
211
|
-
def last_started_text_index
|
|
212
|
-
@last_started_text_index
|
|
213
|
-
end
|
|
214
|
-
|
|
215
|
-
def last_started_tool_index
|
|
216
|
-
@last_started_tool_index
|
|
217
|
-
end
|
|
218
|
-
|
|
219
|
-
def shift_queued_event
|
|
220
|
-
queued_events.shift
|
|
221
|
-
end
|
|
222
|
-
|
|
223
|
-
def queued_events
|
|
224
|
-
@queued_events ||= []
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
def raise_stream_error!(data)
|
|
228
|
-
error = data[:error].is_a?(Hash) ? data[:error] : data
|
|
229
|
-
message = error[:message] || "Stream error"
|
|
230
|
-
code = error[:code] || error[:type]
|
|
231
|
-
|
|
232
|
-
if LlmGateway::Errors.context_overflow_message?(message)
|
|
233
|
-
raise LlmGateway::Errors::PromptTooLong.new(message, code)
|
|
234
|
-
end
|
|
235
|
-
|
|
236
|
-
raise LlmGateway::Errors::APIStatusError.new(message, code)
|
|
237
|
-
end
|
|
238
261
|
end
|
|
239
262
|
end
|
|
240
263
|
end
|
|
@@ -4,7 +4,6 @@ require_relative "../adapter"
|
|
|
4
4
|
require_relative "acts_like_chat_completions"
|
|
5
5
|
require_relative "chat_completions/input_mapper"
|
|
6
6
|
require_relative "chat_completions/input_message_sanitizer"
|
|
7
|
-
require_relative "chat_completions/output_mapper"
|
|
8
7
|
require_relative "chat_completions/option_mapper"
|
|
9
8
|
require_relative "file_output_mapper"
|
|
10
9
|
require_relative "chat_completions/stream_mapper"
|