llm_gateway 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.pi/skills/live-provider-testing/SKILL.md +183 -0
- data/.pi/skills/options-development/SKILL.md +131 -0
- data/CHANGELOG.md +17 -0
- data/README.md +16 -0
- data/Rakefile +1 -0
- data/lib/llm_gateway/adapters/adapter.rb +2 -35
- data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
- data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
- data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
- data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +31 -46
- data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
- data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
- data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
- data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
- data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +275 -0
- data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
- data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
- data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
- data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
- data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +169 -170
- data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
- data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
- data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
- data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +81 -271
- data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
- data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
- data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
- data/lib/llm_gateway/adapters/stream_mapper.rb +50 -0
- data/lib/llm_gateway/client.rb +10 -66
- data/lib/llm_gateway/clients/groq.rb +13 -1
- data/lib/llm_gateway/version.rb +1 -1
- data/lib/llm_gateway.rb +2 -8
- metadata +7 -10
- data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
- data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
- data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
- data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
- data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
- data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
- data/scripts/generate_handoff_live_fixture.rb +0 -169
- data/scripts/generate_handoff_media_fixture.rb +0 -167
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative "../
|
|
3
|
+
require_relative "../stream_mapper"
|
|
4
4
|
|
|
5
5
|
module LlmGateway
|
|
6
6
|
module Adapters
|
|
7
7
|
module Anthropic
|
|
8
|
-
class StreamMapper
|
|
9
|
-
def map(chunk)
|
|
8
|
+
class StreamMapper < LlmGateway::Adapters::StreamMapper
|
|
9
|
+
def map(chunk, &block)
|
|
10
10
|
case chunk[:event]
|
|
11
11
|
when "message_start"
|
|
12
12
|
delta = {
|
|
@@ -16,81 +16,66 @@ module LlmGateway
|
|
|
16
16
|
}
|
|
17
17
|
usage_increment = chunk.dig(:data, :message, :usage) || {}
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
accumulator.push({ type: :message_start, usage_increment:, delta: }, &block)
|
|
20
20
|
when "content_block_start"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
current_type = chunk.dig(:data, :content_block, :type)
|
|
24
|
-
content_block_types[content_index] = current_type
|
|
21
|
+
content_block = chunk.dig(:data, :content_block) || {}
|
|
22
|
+
@current_content_block_type = content_block[:type]
|
|
25
23
|
|
|
26
|
-
case
|
|
24
|
+
case @current_content_block_type
|
|
27
25
|
when "thinking"
|
|
28
|
-
|
|
26
|
+
accumulator.push({ type: :reasoning_start, delta: content_block[:thinking], signature: "" }, &block)
|
|
29
27
|
when "text"
|
|
30
|
-
|
|
28
|
+
accumulator.push({ type: :text_start, delta: content_block[:text] }, &block)
|
|
31
29
|
when "tool_use"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
accumulator.push(
|
|
31
|
+
{
|
|
32
|
+
type: :tool_start,
|
|
33
|
+
delta: "",
|
|
34
|
+
id: content_block[:id],
|
|
35
|
+
name: content_block[:name]
|
|
36
|
+
},
|
|
37
|
+
&block
|
|
38
|
+
)
|
|
35
39
|
end
|
|
36
40
|
when "content_block_delta"
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
case content_block_types[content_index]
|
|
41
|
+
case @current_content_block_type
|
|
40
42
|
when "thinking"
|
|
41
43
|
delta = chunk.dig(:data, :delta, :thinking)
|
|
42
|
-
signature = chunk.dig(:data, :delta, :signature)
|
|
43
|
-
|
|
44
|
+
signature = chunk.dig(:data, :delta, :signature) || ""
|
|
45
|
+
accumulator.push({ type: :reasoning_delta, signature:, delta: }, &block)
|
|
44
46
|
when "text"
|
|
45
47
|
delta = chunk.dig(:data, :delta, :text)
|
|
46
|
-
|
|
48
|
+
accumulator.push({ type: :text_delta, delta: }, &block)
|
|
47
49
|
when "tool_use"
|
|
48
50
|
delta = chunk.dig(:data, :delta, :partial_json)
|
|
49
|
-
|
|
51
|
+
accumulator.push({ type: :tool_delta, delta: }, &block)
|
|
50
52
|
end
|
|
51
53
|
when "content_block_stop"
|
|
52
|
-
|
|
53
|
-
type = case content_block_types[content_index]
|
|
54
|
+
case @current_content_block_type
|
|
54
55
|
when "thinking"
|
|
55
|
-
:reasoning_end
|
|
56
|
+
accumulator.push({ type: :reasoning_end, delta: "", signature: "" }, &block)
|
|
56
57
|
when "text"
|
|
57
|
-
:text_end
|
|
58
|
+
accumulator.push({ type: :text_end, delta: "" }, &block)
|
|
58
59
|
when "tool_use"
|
|
59
|
-
:tool_end
|
|
60
|
+
accumulator.push({ type: :tool_end, delta: "" }, &block)
|
|
60
61
|
end
|
|
61
|
-
|
|
62
|
+
@current_content_block_type = nil
|
|
62
63
|
when "message_delta"
|
|
63
64
|
delta = normalize_message_delta(chunk.dig(:data, :delta) || {})
|
|
64
65
|
usage_increment = chunk.dig(:data, :usage) || {}
|
|
65
66
|
|
|
66
|
-
|
|
67
|
+
accumulator.push({ type: :message_delta, usage_increment:, delta: }, &block)
|
|
67
68
|
when "message_stop"
|
|
68
|
-
|
|
69
|
+
accumulator.push({ type: :message_end }, &block)
|
|
69
70
|
when "ping"
|
|
70
71
|
nil
|
|
71
72
|
when "error"
|
|
72
|
-
|
|
73
|
-
message = error[:message] || "Stream error"
|
|
74
|
-
code = error[:type]
|
|
75
|
-
|
|
76
|
-
if LlmGateway::Errors.context_overflow_message?(message)
|
|
77
|
-
raise LlmGateway::Errors::PromptTooLong.new(message, code)
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
if code == "overloaded_error"
|
|
81
|
-
raise LlmGateway::Errors::OverloadError.new(message, code)
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
raise LlmGateway::Errors::APIStatusError.new(message, code)
|
|
73
|
+
raise_stream_error!(chunk.dig(:data, :error) || {}, overload_codes: [ "overloaded_error" ])
|
|
85
74
|
end
|
|
86
75
|
end
|
|
87
76
|
|
|
88
77
|
private
|
|
89
78
|
|
|
90
|
-
def content_block_types
|
|
91
|
-
@content_block_types ||= {}
|
|
92
|
-
end
|
|
93
|
-
|
|
94
79
|
def normalize_message_delta(delta)
|
|
95
80
|
return delta unless delta[:stop_reason] || delta["stop_reason"]
|
|
96
81
|
|
|
@@ -11,25 +11,67 @@ module LlmGateway
|
|
|
11
11
|
"xhigh" => 20 * 1024
|
|
12
12
|
}.freeze
|
|
13
13
|
|
|
14
|
+
# Source: https://platform.claude.com/docs/en/api/messages/create.md
|
|
15
|
+
# API: Anthropic Messages Create; accessed 2026-05-18.
|
|
16
|
+
# Body parameters listed by the API reference: max_tokens, messages, model,
|
|
17
|
+
# cache_control, container, inference_geo, metadata, output_config,
|
|
18
|
+
# service_tier, stop_sequences, stream, system, temperature, thinking,
|
|
19
|
+
# tool_choice, tools, top_k, top_p.
|
|
20
|
+
# This mapper intentionally excludes transcript/tool/system structural fields
|
|
21
|
+
# (messages, system, tool_choice, tools) from option handling.
|
|
22
|
+
|
|
23
|
+
VALID_OPTIONS = %i[
|
|
24
|
+
max_tokens
|
|
25
|
+
model
|
|
26
|
+
cache_control
|
|
27
|
+
cache_retention
|
|
28
|
+
container
|
|
29
|
+
inference_geo
|
|
30
|
+
metadata
|
|
31
|
+
output_config
|
|
32
|
+
service_tier
|
|
33
|
+
stop_sequences
|
|
34
|
+
stream
|
|
35
|
+
temperature
|
|
36
|
+
thinking
|
|
37
|
+
top_k
|
|
38
|
+
top_p
|
|
39
|
+
].freeze
|
|
40
|
+
|
|
41
|
+
MANAGED_OPTIONS = %i[
|
|
42
|
+
reasoning
|
|
43
|
+
max_completion_tokens
|
|
44
|
+
response_format
|
|
45
|
+
cache_key
|
|
46
|
+
prompt_cache_key
|
|
47
|
+
prompt_cache_retention
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
14
50
|
module_function
|
|
15
51
|
|
|
16
52
|
def map(options)
|
|
17
|
-
mapped_options = options.reject { |key, _|
|
|
53
|
+
mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
|
|
18
54
|
mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS
|
|
19
55
|
|
|
20
|
-
retention = options[:cache_retention]
|
|
21
|
-
mapped_options[:cache_retention] = retention unless retention.nil?
|
|
22
|
-
|
|
23
56
|
response_format = options[:response_format]
|
|
24
57
|
mapped_options[:output_config] = normalize_output_config(response_format) unless response_format.nil?
|
|
25
58
|
|
|
26
59
|
reasoning = options[:reasoning]
|
|
27
|
-
|
|
60
|
+
mapped_options[:thinking] = normalize_reasoning(reasoning) unless reasoning.nil? || reasoning.to_s == "none"
|
|
28
61
|
|
|
29
|
-
mapped_options
|
|
62
|
+
validate_options!(mapped_options)
|
|
30
63
|
mapped_options
|
|
31
64
|
end
|
|
32
65
|
|
|
66
|
+
def validate_options!(mapped_options)
|
|
67
|
+
unknown_options = mapped_options.keys - VALID_OPTIONS
|
|
68
|
+
return if unknown_options.empty?
|
|
69
|
+
|
|
70
|
+
raise ArgumentError,
|
|
71
|
+
"Unknown Anthropic Messages options: #{unknown_options.join(', ')}. " \
|
|
72
|
+
"Valid options: #{VALID_OPTIONS.join(', ')}."
|
|
73
|
+
end
|
|
74
|
+
|
|
33
75
|
def normalize_output_config(response_format)
|
|
34
76
|
format_type = response_format.is_a?(Hash) ? response_format[:type] || response_format["type"] : response_format
|
|
35
77
|
|
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
require_relative "../adapter"
|
|
4
4
|
require_relative "../openai/acts_like_chat_completions"
|
|
5
5
|
require_relative "../input_message_sanitizer"
|
|
6
|
-
require_relative "../openai/chat_completions/
|
|
6
|
+
require_relative "../openai/chat_completions/stream_mapper"
|
|
7
|
+
require_relative "input_mapper"
|
|
7
8
|
require_relative "option_mapper"
|
|
8
9
|
|
|
9
10
|
module LlmGateway
|
|
@@ -15,7 +16,7 @@ module LlmGateway
|
|
|
15
16
|
private
|
|
16
17
|
|
|
17
18
|
def file_output_mapper = nil
|
|
18
|
-
def
|
|
19
|
+
def input_mapper = Groq::InputMapper
|
|
19
20
|
def option_mapper = Groq::OptionMapper
|
|
20
21
|
|
|
21
22
|
def map_input(input)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../openai/chat_completions/input_mapper"
|
|
4
|
+
|
|
5
|
+
module LlmGateway
|
|
6
|
+
module Adapters
|
|
7
|
+
module Groq
|
|
8
|
+
class InputMapper < OpenAI::ChatCompletions::InputMapper
|
|
9
|
+
def self.map(data)
|
|
10
|
+
mapped = super
|
|
11
|
+
mapped.merge(messages: map_groq_messages(mapped[:messages]))
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.map_groq_messages(messages)
|
|
15
|
+
return messages unless messages.is_a?(Array)
|
|
16
|
+
|
|
17
|
+
messages.map { |message| map_groq_message(message) }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def self.map_groq_message(message)
|
|
21
|
+
return message unless message.is_a?(Hash) && message[:role] == "assistant"
|
|
22
|
+
return message unless message[:content].is_a?(Array)
|
|
23
|
+
|
|
24
|
+
reasoning_blocks, content_blocks = message[:content].partition do |block|
|
|
25
|
+
block.is_a?(Hash) && %w[reasoning thinking].include?(block[:type] || block["type"])
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
return message if reasoning_blocks.empty?
|
|
29
|
+
|
|
30
|
+
mapped = message.merge(content: content_blocks.empty? ? nil : content_blocks)
|
|
31
|
+
reasoning = reasoning_blocks.filter_map { |block| reasoning_text(block) }.join("\n")
|
|
32
|
+
mapped[:reasoning] = reasoning unless reasoning.empty?
|
|
33
|
+
mapped
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.reasoning_text(block)
|
|
37
|
+
block[:reasoning] || block["reasoning"] || block[:thinking] || block["thinking"]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private_class_method :map_groq_messages, :map_groq_message, :reasoning_text
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -4,16 +4,94 @@ module LlmGateway
|
|
|
4
4
|
module Adapters
|
|
5
5
|
module Groq
|
|
6
6
|
module OptionMapper
|
|
7
|
+
DEFAULT_TEMPERATURE = 0
|
|
8
|
+
DEFAULT_MAX_COMPLETION_TOKENS = 20_480
|
|
9
|
+
VALID_REASONING_LEVELS = %w[default low medium high].freeze
|
|
10
|
+
|
|
11
|
+
# Source: https://console.groq.com/docs/text-chat.md and
|
|
12
|
+
# https://console.groq.com/docs/api-reference.md#chat-create
|
|
13
|
+
# API: Groq Chat Completions Create; accessed 2026-05-19.
|
|
14
|
+
# Body parameters listed by the API reference: messages, model,
|
|
15
|
+
# citation_options, compound_custom, disable_tool_validation, documents,
|
|
16
|
+
# exclude_domains, frequency_penalty, function_call, functions,
|
|
17
|
+
# include_domains, include_reasoning, logit_bias, logprobs,
|
|
18
|
+
# max_completion_tokens, max_tokens, metadata, n, parallel_tool_calls,
|
|
19
|
+
# presence_penalty, reasoning_effort, reasoning_format, response_format,
|
|
20
|
+
# search_settings, seed, service_tier, stop, store, stream,
|
|
21
|
+
# stream_options, temperature, tool_choice, tools, top_logprobs, top_p,
|
|
22
|
+
# user.
|
|
23
|
+
# This mapper intentionally excludes transcript/tool structural fields
|
|
24
|
+
# (messages, tools) from option handling.
|
|
25
|
+
VALID_OPTIONS = %i[
|
|
26
|
+
model
|
|
27
|
+
citation_options
|
|
28
|
+
compound_custom
|
|
29
|
+
disable_tool_validation
|
|
30
|
+
documents
|
|
31
|
+
exclude_domains
|
|
32
|
+
frequency_penalty
|
|
33
|
+
function_call
|
|
34
|
+
functions
|
|
35
|
+
include_domains
|
|
36
|
+
include_reasoning
|
|
37
|
+
logit_bias
|
|
38
|
+
logprobs
|
|
39
|
+
max_completion_tokens
|
|
40
|
+
max_tokens
|
|
41
|
+
metadata
|
|
42
|
+
n
|
|
43
|
+
parallel_tool_calls
|
|
44
|
+
presence_penalty
|
|
45
|
+
reasoning_effort
|
|
46
|
+
reasoning_format
|
|
47
|
+
response_format
|
|
48
|
+
search_settings
|
|
49
|
+
seed
|
|
50
|
+
service_tier
|
|
51
|
+
stop
|
|
52
|
+
store
|
|
53
|
+
stream
|
|
54
|
+
stream_options
|
|
55
|
+
temperature
|
|
56
|
+
tool_choice
|
|
57
|
+
top_logprobs
|
|
58
|
+
top_p
|
|
59
|
+
user
|
|
60
|
+
].freeze
|
|
61
|
+
|
|
62
|
+
MANAGED_OPTIONS = %i[
|
|
63
|
+
reasoning
|
|
64
|
+
cache_key
|
|
65
|
+
cache_retention
|
|
66
|
+
].freeze
|
|
67
|
+
|
|
7
68
|
module_function
|
|
8
69
|
|
|
9
70
|
def map(options)
|
|
10
|
-
mapped_options = options.
|
|
11
|
-
mapped_options[:temperature]
|
|
12
|
-
mapped_options[:max_completion_tokens]
|
|
13
|
-
mapped_options[:response_format] = normalize_response_format(
|
|
71
|
+
mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
|
|
72
|
+
mapped_options[:temperature] = options.key?(:temperature) ? options[:temperature] : DEFAULT_TEMPERATURE
|
|
73
|
+
mapped_options[:max_completion_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_COMPLETION_TOKENS
|
|
74
|
+
mapped_options[:response_format] = normalize_response_format(options[:response_format] || "text")
|
|
75
|
+
|
|
76
|
+
reasoning = options[:reasoning]
|
|
77
|
+
unless reasoning.nil? || reasoning.to_s == "none"
|
|
78
|
+
mapped_options[:reasoning_effort] = normalize_reasoning_effort(reasoning)
|
|
79
|
+
mapped_options[:reasoning_format] = "parsed"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
validate_options!(mapped_options)
|
|
14
83
|
mapped_options
|
|
15
84
|
end
|
|
16
85
|
|
|
86
|
+
def validate_options!(mapped_options)
|
|
87
|
+
unknown_options = mapped_options.keys - VALID_OPTIONS
|
|
88
|
+
return if unknown_options.empty?
|
|
89
|
+
|
|
90
|
+
raise ArgumentError,
|
|
91
|
+
"Unknown Groq Chat Completions options: #{unknown_options.join(', ')}. " \
|
|
92
|
+
"Valid options: #{VALID_OPTIONS.join(', ')}."
|
|
93
|
+
end
|
|
94
|
+
|
|
17
95
|
def normalize_response_format(response_format)
|
|
18
96
|
if response_format.is_a?(String)
|
|
19
97
|
{ type: response_format }
|
|
@@ -21,6 +99,13 @@ module LlmGateway
|
|
|
21
99
|
response_format
|
|
22
100
|
end
|
|
23
101
|
end
|
|
102
|
+
|
|
103
|
+
def normalize_reasoning_effort(reasoning)
|
|
104
|
+
effort = reasoning.to_s
|
|
105
|
+
return effort if VALID_REASONING_LEVELS.include?(effort)
|
|
106
|
+
|
|
107
|
+
raise ArgumentError, "Invalid reasoning '#{reasoning}'. Use 'none', 'default', 'low', 'medium', or 'high'."
|
|
108
|
+
end
|
|
24
109
|
end
|
|
25
110
|
end
|
|
26
111
|
end
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
require_relative "../utils"
|
|
6
|
+
require_relative "structs"
|
|
7
|
+
|
|
8
|
+
module LlmGateway
|
|
9
|
+
module Adapters
|
|
10
|
+
class NormalizedStreamAccumulator
|
|
11
|
+
# Contract:
|
|
12
|
+
#
|
|
13
|
+
# `push` accepts a single provider-independent, normalized stream event
|
|
14
|
+
# patch hash. Event patches are never arrays; mappers call `push` once per
|
|
15
|
+
# patch.
|
|
16
|
+
#
|
|
17
|
+
# Provider wire events such as Anthropic `message_start` /
|
|
18
|
+
# `content_block_start`, OpenAI `response.output_text.delta`, etc. must be
|
|
19
|
+
# translated by the mapper before calling this accumulator. The normalized
|
|
20
|
+
# symbol `:message_start` below is allowed; the raw provider event string is
|
|
21
|
+
# not.
|
|
22
|
+
#
|
|
23
|
+
# Accepted event shapes:
|
|
24
|
+
#
|
|
25
|
+
# { type: :message_start, delta: { id: "...", model: "...", role: "assistant" }, usage_increment: { ... } }
|
|
26
|
+
# { type: :message_delta, delta: { stop_reason: "stop" }, usage_increment: { ... } }
|
|
27
|
+
# { type: :message_end }
|
|
28
|
+
#
|
|
29
|
+
# { type: :text_start, delta: "hi" }
|
|
30
|
+
# { type: :text_delta, delta: " there" }
|
|
31
|
+
# { type: :text_end, delta: "" }
|
|
32
|
+
#
|
|
33
|
+
# { type: :reasoning_start, delta: "thinking", signature: "" }
|
|
34
|
+
# { type: :reasoning_delta, delta: "...", signature: "" }
|
|
35
|
+
# { type: :reasoning_end, delta: "", signature: "" }
|
|
36
|
+
#
|
|
37
|
+
# { type: :tool_start, id: "...", name: "tool_name", delta: "" }
|
|
38
|
+
# { type: :tool_delta, delta: "{\"a\":" }
|
|
39
|
+
# { type: :tool_end, delta: "" }
|
|
40
|
+
#
|
|
41
|
+
# Mappers do not provide `content_index`. The accumulator assigns the next
|
|
42
|
+
# public content index when a block starts and reuses the active content
|
|
43
|
+
# index for that block's deltas and end event.
|
|
44
|
+
#
|
|
45
|
+
# Without source indexes, the accumulator cannot detect two interleaved
|
|
46
|
+
# blocks of the same type. Providers that can interleave same-type blocks
|
|
47
|
+
# must buffer or serialize them in the mapper before pushing normalized
|
|
48
|
+
# events.
|
|
49
|
+
#
|
|
50
|
+
# The accumulator creates the public Assistant* event structs, updates its
|
|
51
|
+
# accumulated message state, then yields the created event to the callback.
|
|
52
|
+
attr_accessor :blocks, :message_hash, :usage_hash
|
|
53
|
+
attr_reader :active_block_type
|
|
54
|
+
|
|
55
|
+
BLOCK_EVENT_TRANSITIONS = {
|
|
56
|
+
text_start: { block_type: :text, phase: :start },
|
|
57
|
+
text_delta: { block_type: :text, phase: :delta },
|
|
58
|
+
text_end: { block_type: :text, phase: :end },
|
|
59
|
+
tool_start: { block_type: :tool, phase: :start },
|
|
60
|
+
tool_delta: { block_type: :tool, phase: :delta },
|
|
61
|
+
tool_end: { block_type: :tool, phase: :end },
|
|
62
|
+
reasoning_start: { block_type: :reasoning, phase: :start },
|
|
63
|
+
reasoning_delta: { block_type: :reasoning, phase: :delta },
|
|
64
|
+
reasoning_end: { block_type: :reasoning, phase: :end }
|
|
65
|
+
}.freeze
|
|
66
|
+
|
|
67
|
+
def initialize
|
|
68
|
+
@message_hash = {}
|
|
69
|
+
@usage_hash = {
|
|
70
|
+
input_tokens: 0,
|
|
71
|
+
cache_creation_input_tokens: 0,
|
|
72
|
+
cache_read_input_tokens: 0,
|
|
73
|
+
output_tokens: 0,
|
|
74
|
+
reasoning_tokens: 0
|
|
75
|
+
}
|
|
76
|
+
@blocks = []
|
|
77
|
+
@next_content_index = 0
|
|
78
|
+
@active_block_type = nil
|
|
79
|
+
@active_content_index = nil
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def result
|
|
83
|
+
message_hash.merge(
|
|
84
|
+
usage: usage_hash,
|
|
85
|
+
content: serialized_blocks
|
|
86
|
+
)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def active_tool?
|
|
90
|
+
active_block_type == :tool
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def push(event_patch, &block)
|
|
94
|
+
raise ArgumentError, "Normalized stream event patch must be a Hash" unless event_patch.is_a?(Hash)
|
|
95
|
+
|
|
96
|
+
event_patch = symbolize_keys(event_patch)
|
|
97
|
+
type = event_patch.fetch(:type).to_sym
|
|
98
|
+
event_patch = prepare_event_patch(event_patch.merge(type:), type)
|
|
99
|
+
|
|
100
|
+
event = build_event(event_patch)
|
|
101
|
+
accumulate(event)
|
|
102
|
+
content_index = event.content_index if event.respond_to?(:content_index)
|
|
103
|
+
commit_block_transition(type, content_index)
|
|
104
|
+
block.call(event) if block
|
|
105
|
+
|
|
106
|
+
nil
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
private
|
|
110
|
+
|
|
111
|
+
def prepare_event_patch(event_patch, type)
|
|
112
|
+
transition = BLOCK_EVENT_TRANSITIONS[type]
|
|
113
|
+
return event_patch unless transition
|
|
114
|
+
|
|
115
|
+
block_type = transition[:block_type]
|
|
116
|
+
|
|
117
|
+
case transition[:phase]
|
|
118
|
+
when :start
|
|
119
|
+
validate_start!(block_type)
|
|
120
|
+
event_patch.merge(content_index: @next_content_index)
|
|
121
|
+
when :delta
|
|
122
|
+
validate_delta!(type, block_type)
|
|
123
|
+
event_patch.merge(content_index: @active_content_index)
|
|
124
|
+
when :end
|
|
125
|
+
validate_end!(block_type)
|
|
126
|
+
event_patch.merge(content_index: @active_content_index)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def validate_start!(block_type)
|
|
131
|
+
return unless @active_block_type
|
|
132
|
+
|
|
133
|
+
raise ArgumentError, "Cannot start #{block_type} block while #{@active_block_type} block is active"
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def validate_delta!(type, block_type)
|
|
137
|
+
unless @active_block_type
|
|
138
|
+
raise ArgumentError, "Cannot apply #{type} without an active #{block_type} block"
|
|
139
|
+
end
|
|
140
|
+
return if @active_block_type == block_type
|
|
141
|
+
|
|
142
|
+
raise ArgumentError, "Cannot apply #{type} while #{@active_block_type} block is active"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def validate_end!(block_type)
|
|
146
|
+
unless @active_block_type
|
|
147
|
+
raise ArgumentError, "Cannot end #{block_type} block without an active #{block_type} block"
|
|
148
|
+
end
|
|
149
|
+
return if @active_block_type == block_type
|
|
150
|
+
|
|
151
|
+
raise ArgumentError, "Cannot end #{block_type} block while #{@active_block_type} block is active"
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def commit_block_transition(type, content_index)
|
|
155
|
+
transition = BLOCK_EVENT_TRANSITIONS[type]
|
|
156
|
+
return unless transition
|
|
157
|
+
|
|
158
|
+
case transition[:phase]
|
|
159
|
+
when :start
|
|
160
|
+
@active_block_type = transition[:block_type]
|
|
161
|
+
@active_content_index = content_index
|
|
162
|
+
@next_content_index += 1
|
|
163
|
+
when :end
|
|
164
|
+
@active_block_type = nil
|
|
165
|
+
@active_content_index = nil
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def build_event(event_patch)
|
|
170
|
+
event_patch = symbolize_keys(event_patch)
|
|
171
|
+
type = event_patch.fetch(:type).to_sym
|
|
172
|
+
|
|
173
|
+
case type
|
|
174
|
+
when :message_start, :message_delta, :message_end
|
|
175
|
+
AssistantStreamMessageEvent.new(
|
|
176
|
+
type:,
|
|
177
|
+
delta: symbolize_keys(event_patch[:delta] || {}),
|
|
178
|
+
usage_increment: symbolize_keys(event_patch[:usage_increment] || {})
|
|
179
|
+
)
|
|
180
|
+
when :tool_start
|
|
181
|
+
AssistantToolStartEvent.new(
|
|
182
|
+
type:,
|
|
183
|
+
content_index: event_patch.fetch(:content_index),
|
|
184
|
+
delta: string_value(event_patch[:delta]),
|
|
185
|
+
id: event_patch[:id],
|
|
186
|
+
name: event_patch[:name]
|
|
187
|
+
)
|
|
188
|
+
when :reasoning_start, :reasoning_delta, :reasoning_end
|
|
189
|
+
AssistantStreamReasoningEvent.new(
|
|
190
|
+
type:,
|
|
191
|
+
content_index: event_patch.fetch(:content_index),
|
|
192
|
+
delta: string_value(event_patch[:delta]),
|
|
193
|
+
signature: string_value(event_patch[:signature])
|
|
194
|
+
)
|
|
195
|
+
when :text_start, :text_delta, :text_end, :tool_delta, :tool_end
|
|
196
|
+
AssistantStreamEvent.new(
|
|
197
|
+
type:,
|
|
198
|
+
content_index: event_patch.fetch(:content_index),
|
|
199
|
+
delta: string_value(event_patch[:delta])
|
|
200
|
+
)
|
|
201
|
+
else
|
|
202
|
+
raise ArgumentError, "Unsupported normalized stream event type: #{type.inspect}"
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def accumulate(event)
|
|
207
|
+
case event.type
|
|
208
|
+
when :text_start
|
|
209
|
+
blocks[event.content_index] = {
|
|
210
|
+
type: "text",
|
|
211
|
+
text: ""
|
|
212
|
+
}
|
|
213
|
+
blocks[event.content_index][:text] += event.delta
|
|
214
|
+
when :text_delta, :text_end
|
|
215
|
+
blocks[event.content_index][:text] += event.delta
|
|
216
|
+
when :tool_start
|
|
217
|
+
blocks[event.content_index] = {
|
|
218
|
+
type: "tool_use",
|
|
219
|
+
id: event.id,
|
|
220
|
+
name: event.name,
|
|
221
|
+
input: event.delta.to_s
|
|
222
|
+
}
|
|
223
|
+
when :tool_delta, :tool_end
|
|
224
|
+
blocks[event.content_index][:input] += event.delta
|
|
225
|
+
when :message_start
|
|
226
|
+
message_hash.merge!(event.delta)
|
|
227
|
+
usage_hash.each_key do |key|
|
|
228
|
+
usage_hash[key] += event.usage_increment.fetch(key, 0)
|
|
229
|
+
end
|
|
230
|
+
when :reasoning_start
|
|
231
|
+
blocks[event.content_index] = {
|
|
232
|
+
type: "reasoning",
|
|
233
|
+
reasoning: "",
|
|
234
|
+
signature: ""
|
|
235
|
+
}
|
|
236
|
+
blocks[event.content_index][:reasoning] += event.delta
|
|
237
|
+
blocks[event.content_index][:signature] += event.signature
|
|
238
|
+
when :reasoning_delta, :reasoning_end
|
|
239
|
+
blocks[event.content_index][:reasoning] += event.delta
|
|
240
|
+
blocks[event.content_index][:signature] += event.signature
|
|
241
|
+
when :message_delta
|
|
242
|
+
message_hash.merge!(event.delta)
|
|
243
|
+
usage_hash.each_key do |key|
|
|
244
|
+
usage_hash[key] += event.usage_increment.fetch(key, 0)
|
|
245
|
+
end
|
|
246
|
+
when :message_end
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def serialized_blocks
|
|
251
|
+
blocks.map do |content_block|
|
|
252
|
+
next content_block unless content_block[:type] == "tool_use"
|
|
253
|
+
|
|
254
|
+
content_block.merge(input: LlmGateway::Utils.deep_symbolize_keys(parse_tool_input(content_block[:input])))
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def parse_tool_input(input)
|
|
259
|
+
return {} if input.nil? || input.empty?
|
|
260
|
+
|
|
261
|
+
JSON.parse(input)
|
|
262
|
+
rescue JSON::ParserError
|
|
263
|
+
{}
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def symbolize_keys(hash)
|
|
267
|
+
hash.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def string_value(value)
|
|
271
|
+
value.nil? ? "" : value.to_s
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
end
|
|
@@ -10,8 +10,6 @@ module LlmGateway
|
|
|
10
10
|
|
|
11
11
|
def input_sanitizer = OpenAI::ChatCompletions::InputMessageSanitizer
|
|
12
12
|
|
|
13
|
-
def output_mapper = OpenAI::ChatCompletions::OutputMapper
|
|
14
|
-
|
|
15
13
|
def file_output_mapper = OpenAI::FileOutputMapper
|
|
16
14
|
|
|
17
15
|
def option_mapper = OpenAI::ChatCompletions::OptionMapper
|
|
@@ -11,18 +11,12 @@ module LlmGateway
|
|
|
11
11
|
|
|
12
12
|
def input_sanitizer = InputMessageSanitizer
|
|
13
13
|
|
|
14
|
-
def output_mapper = OpenAI::Responses::OutputMapper
|
|
15
|
-
|
|
16
14
|
def file_output_mapper = OpenAI::FileOutputMapper
|
|
17
15
|
|
|
18
16
|
def option_mapper = OpenAI::Responses::OptionMapper
|
|
19
17
|
|
|
20
18
|
def stream_mapper = OpenAI::Responses::StreamMapper
|
|
21
19
|
|
|
22
|
-
def perform_chat(messages, tools:, system:, **options)
|
|
23
|
-
client.responses(messages, tools: tools, system: system, **options)
|
|
24
|
-
end
|
|
25
|
-
|
|
26
20
|
def perform_stream(messages, tools:, system:, **options, &block)
|
|
27
21
|
client.stream_responses(messages, tools: tools, system: system, **options, &block)
|
|
28
22
|
end
|