llm_gateway 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.pi/skills/live-provider-testing/SKILL.md +183 -0
  3. data/.pi/skills/options-development/SKILL.md +131 -0
  4. data/CHANGELOG.md +43 -0
  5. data/README.md +110 -41
  6. data/Rakefile +1 -0
  7. data/docs/migration_guide_0.6.0.md +386 -0
  8. data/lib/llm_gateway/adapters/adapter.rb +8 -44
  9. data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +0 -2
  10. data/lib/llm_gateway/adapters/anthropic/input_mapper.rb +106 -27
  11. data/lib/llm_gateway/adapters/anthropic/output_mapper.rb +0 -33
  12. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +59 -47
  13. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +48 -6
  14. data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +3 -2
  15. data/lib/llm_gateway/adapters/groq/input_mapper.rb +44 -0
  16. data/lib/llm_gateway/adapters/groq/option_mapper.rb +89 -4
  17. data/lib/llm_gateway/adapters/normalized_stream_accumulator.rb +336 -0
  18. data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +0 -2
  19. data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +0 -6
  20. data/lib/llm_gateway/adapters/openai/chat_completions/input_mapper.rb +135 -72
  21. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +100 -10
  22. data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +193 -170
  23. data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +0 -1
  24. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +128 -68
  25. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +99 -10
  26. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +106 -275
  27. data/lib/llm_gateway/adapters/openai/responses_adapter.rb +0 -1
  28. data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +3 -3
  29. data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +0 -5
  30. data/lib/llm_gateway/adapters/stream_mapper.rb +57 -0
  31. data/lib/llm_gateway/adapters/structs.rb +102 -52
  32. data/lib/llm_gateway/base_client.rb +2 -4
  33. data/lib/llm_gateway/client.rb +10 -66
  34. data/lib/llm_gateway/clients/anthropic.rb +5 -4
  35. data/lib/llm_gateway/clients/groq.rb +18 -4
  36. data/lib/llm_gateway/clients/openai.rb +20 -18
  37. data/lib/llm_gateway/prompt.rb +35 -17
  38. data/lib/llm_gateway/version.rb +1 -1
  39. data/lib/llm_gateway.rb +5 -29
  40. metadata +8 -10
  41. data/lib/llm_gateway/adapters/anthropic/bidirectional_message_mapper.rb +0 -111
  42. data/lib/llm_gateway/adapters/openai/chat_completions/bidirectional_message_mapper.rb +0 -110
  43. data/lib/llm_gateway/adapters/openai/chat_completions/output_mapper.rb +0 -40
  44. data/lib/llm_gateway/adapters/openai/responses/bidirectional_message_mapper.rb +0 -120
  45. data/lib/llm_gateway/adapters/openai/responses/output_mapper.rb +0 -47
  46. data/lib/llm_gateway/adapters/stream_accumulator.rb +0 -91
  47. data/scripts/generate_handoff_live_fixture.rb +0 -169
  48. data/scripts/generate_handoff_media_fixture.rb +0 -167
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "../structs.rb"
3
+ require_relative "../stream_mapper"
4
4
 
5
5
  module LlmGateway
6
6
  module Adapters
7
7
  module Anthropic
8
- class StreamMapper
9
- def map(chunk)
8
+ class StreamMapper < LlmGateway::Adapters::StreamMapper
9
+ def map(chunk, &block)
10
10
  case chunk[:event]
11
11
  when "message_start"
12
12
  delta = {
@@ -14,81 +14,93 @@ module LlmGateway
14
14
  model: chunk.dig(:data, :message, :model),
15
15
  role: chunk.dig(:data, :message, :role)
16
16
  }
17
- usage_increment = chunk.dig(:data, :message, :usage) || {}
18
-
19
- AssistantStreamMessageEvent.new(type: :message_start, usage_increment:, delta:)
17
+ accumulator.push({ type: :message_start, delta: }, &block)
20
18
  when "content_block_start"
21
- content_index = chunk.dig(:data, :index)
22
- delta = chunk.dig(:data, :content_block, :text)
23
- current_type = chunk.dig(:data, :content_block, :type)
24
- content_block_types[content_index] = current_type
19
+ content_block = chunk.dig(:data, :content_block) || {}
20
+ @current_content_block_type = content_block[:type]
25
21
 
26
- case current_type
22
+ case @current_content_block_type
27
23
  when "thinking"
28
- AssistantStreamEvent.new(type: :reasoning_start, content_index:, delta:)
24
+ accumulator.push({ type: :reasoning_start, delta: content_block[:thinking], signature: "" }, &block)
29
25
  when "text"
30
- AssistantStreamEvent.new(type: :text_start, content_index:, delta:)
26
+ accumulator.push({ type: :text_start, delta: content_block[:text] }, &block)
31
27
  when "tool_use"
32
- id = chunk.dig(:data, :content_block, :id)
33
- name = chunk.dig(:data, :content_block, :name)
34
- AssistantToolStartEvent.new(type: :tool_start, content_index:, delta:, id:, name:)
28
+ accumulator.push(
29
+ {
30
+ type: :tool_start,
31
+ delta: "",
32
+ id: content_block[:id],
33
+ name: content_block[:name]
34
+ },
35
+ &block
36
+ )
35
37
  end
36
38
  when "content_block_delta"
37
- content_index = chunk.dig(:data, :index)
38
-
39
- case content_block_types[content_index]
39
+ case @current_content_block_type
40
40
  when "thinking"
41
41
  delta = chunk.dig(:data, :delta, :thinking)
42
- signature = chunk.dig(:data, :delta, :signature)
43
- AssistantStreamReasoningEvent.new(type: :reasoning_delta, signature:, delta:, content_index:)
42
+ signature = chunk.dig(:data, :delta, :signature) || ""
43
+ accumulator.push({ type: :reasoning_delta, signature:, delta: }, &block)
44
44
  when "text"
45
45
  delta = chunk.dig(:data, :delta, :text)
46
- AssistantStreamEvent.new(type: :text_delta, content_index:, delta:)
46
+ accumulator.push({ type: :text_delta, delta: }, &block)
47
47
  when "tool_use"
48
48
  delta = chunk.dig(:data, :delta, :partial_json)
49
- AssistantStreamEvent.new(type: :tool_delta, content_index:, delta:)
49
+ accumulator.push({ type: :tool_delta, delta: }, &block)
50
50
  end
51
51
  when "content_block_stop"
52
- content_index = chunk.dig(:data, :index)
53
- type = case content_block_types[content_index]
52
+ case @current_content_block_type
54
53
  when "thinking"
55
- :reasoning_end
54
+ accumulator.push({ type: :reasoning_end, delta: "", signature: "" }, &block)
56
55
  when "text"
57
- :text_end
56
+ accumulator.push({ type: :text_end, delta: "" }, &block)
58
57
  when "tool_use"
59
- :tool_end
58
+ accumulator.push({ type: :tool_end, delta: "" }, &block)
60
59
  end
61
- AssistantStreamEvent.new(type: type, content_index:, delta: "")
60
+ @current_content_block_type = nil
62
61
  when "message_delta"
63
- delta = normalize_message_delta(chunk.dig(:data, :delta) || {})
64
- usage_increment = chunk.dig(:data, :usage) || {}
62
+ data = chunk[:data] || {}
63
+ delta = normalize_message_delta(data[:delta] || {})
64
+ patch = { type: :message_delta, delta: }
65
+ patch[:usage] = normalized_usage(data[:usage]) if data.key?(:usage)
65
66
 
66
- AssistantStreamMessageEvent.new(type: :message_delta, usage_increment:, delta:)
67
+ accumulator.push(patch, &block)
67
68
  when "message_stop"
68
- AssistantStreamMessageEvent.new(type: :message_end, usage_increment: {}, delta: {})
69
+
70
+ accumulator.push({ type: :message_end }, &block)
69
71
  when "ping"
70
72
  nil
71
73
  when "error"
72
- error = chunk.dig(:data, :error) || {}
73
- message = error[:message] || "Stream error"
74
- code = error[:type]
74
+ raise_stream_error!(chunk.dig(:data, :error) || {}, overload_codes: [ "overloaded_error" ])
75
+ end
76
+ end
75
77
 
76
- if LlmGateway::Errors.context_overflow_message?(message)
77
- raise LlmGateway::Errors::PromptTooLong.new(message, code)
78
- end
78
+ private
79
79
 
80
- if code == "overloaded_error"
81
- raise LlmGateway::Errors::OverloadError.new(message, code)
82
- end
80
+ def normalized_usage(usage)
81
+ usage = symbolize_keys(usage)
83
82
 
84
- raise LlmGateway::Errors::APIStatusError.new(message, code)
85
- end
83
+ input = token_count(usage[:input_tokens])
84
+ cache_write = token_count(usage[:cache_creation_input_tokens])
85
+ cache_read = token_count(usage[:cache_read_input_tokens])
86
+ output = token_count(usage[:output_tokens])
87
+
88
+ {
89
+ input:,
90
+ cache_write:,
91
+ cache_read:,
92
+ output:,
93
+ total: input + cache_write + cache_read + output,
94
+ raw: usage
95
+ }
86
96
  end
87
97
 
88
- private
98
+ def token_count(value)
99
+ value.to_i
100
+ end
89
101
 
90
- def content_block_types
91
- @content_block_types ||= {}
102
+ def symbolize_keys(hash)
103
+ hash.to_h.transform_keys { |key| key.respond_to?(:to_sym) ? key.to_sym : key }
92
104
  end
93
105
 
94
106
  def normalize_message_delta(delta)
@@ -11,25 +11,67 @@ module LlmGateway
11
11
  "xhigh" => 20 * 1024
12
12
  }.freeze
13
13
 
14
+ # Source: https://platform.claude.com/docs/en/api/messages/create.md
15
+ # API: Anthropic Messages Create; accessed 2026-05-18.
16
+ # Body parameters listed by the API reference: max_tokens, messages, model,
17
+ # cache_control, container, inference_geo, metadata, output_config,
18
+ # service_tier, stop_sequences, stream, system, temperature, thinking,
19
+ # tool_choice, tools, top_k, top_p.
20
+ # This mapper intentionally excludes transcript/tool/system structural fields
21
+ # (messages, system, tool_choice, tools) from option handling.
22
+
23
+ VALID_OPTIONS = %i[
24
+ max_tokens
25
+ model
26
+ cache_control
27
+ cache_retention
28
+ container
29
+ inference_geo
30
+ metadata
31
+ output_config
32
+ service_tier
33
+ stop_sequences
34
+ stream
35
+ temperature
36
+ thinking
37
+ top_k
38
+ top_p
39
+ ].freeze
40
+
41
+ MANAGED_OPTIONS = %i[
42
+ reasoning
43
+ max_completion_tokens
44
+ response_format
45
+ cache_key
46
+ prompt_cache_key
47
+ prompt_cache_retention
48
+ ].freeze
49
+
14
50
  module_function
15
51
 
16
52
  def map(options)
17
- mapped_options = options.reject { |key, _| %i[reasoning max_completion_tokens response_format prompt_cache_retention cache_key prompt_cache_key].include?(key) }
53
+ mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
18
54
  mapped_options[:max_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_TOKENS
19
55
 
20
- retention = options[:cache_retention]
21
- mapped_options[:cache_retention] = retention unless retention.nil?
22
-
23
56
  response_format = options[:response_format]
24
57
  mapped_options[:output_config] = normalize_output_config(response_format) unless response_format.nil?
25
58
 
26
59
  reasoning = options[:reasoning]
27
- return mapped_options if reasoning.nil? || reasoning.to_s == "none"
60
+ mapped_options[:thinking] = normalize_reasoning(reasoning) unless reasoning.nil? || reasoning.to_s == "none"
28
61
 
29
- mapped_options[:thinking] = normalize_reasoning(reasoning)
62
+ validate_options!(mapped_options)
30
63
  mapped_options
31
64
  end
32
65
 
66
+ def validate_options!(mapped_options)
67
+ unknown_options = mapped_options.keys - VALID_OPTIONS
68
+ return if unknown_options.empty?
69
+
70
+ raise ArgumentError,
71
+ "Unknown Anthropic Messages options: #{unknown_options.join(', ')}. " \
72
+ "Valid options: #{VALID_OPTIONS.join(', ')}."
73
+ end
74
+
33
75
  def normalize_output_config(response_format)
34
76
  format_type = response_format.is_a?(Hash) ? response_format[:type] || response_format["type"] : response_format
35
77
 
@@ -3,7 +3,8 @@
3
3
  require_relative "../adapter"
4
4
  require_relative "../openai/acts_like_chat_completions"
5
5
  require_relative "../input_message_sanitizer"
6
- require_relative "../openai/chat_completions/input_mapper"
6
+ require_relative "../openai/chat_completions/stream_mapper"
7
+ require_relative "input_mapper"
7
8
  require_relative "option_mapper"
8
9
 
9
10
  module LlmGateway
@@ -15,7 +16,7 @@ module LlmGateway
15
16
  private
16
17
 
17
18
  def file_output_mapper = nil
18
- def stream_mapper = nil
19
+ def input_mapper = Groq::InputMapper
19
20
  def option_mapper = Groq::OptionMapper
20
21
 
21
22
  def map_input(input)
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../openai/chat_completions/input_mapper"
4
+
5
+ module LlmGateway
6
+ module Adapters
7
+ module Groq
8
+ class InputMapper < OpenAI::ChatCompletions::InputMapper
9
+ def self.map(data)
10
+ mapped = super
11
+ mapped.merge(messages: map_groq_messages(mapped[:messages]))
12
+ end
13
+
14
+ def self.map_groq_messages(messages)
15
+ return messages unless messages.is_a?(Array)
16
+
17
+ messages.map { |message| map_groq_message(message) }
18
+ end
19
+
20
+ def self.map_groq_message(message)
21
+ return message unless message.is_a?(Hash) && message[:role] == "assistant"
22
+ return message unless message[:content].is_a?(Array)
23
+
24
+ reasoning_blocks, content_blocks = message[:content].partition do |block|
25
+ block.is_a?(Hash) && %w[reasoning thinking].include?(block[:type] || block["type"])
26
+ end
27
+
28
+ return message if reasoning_blocks.empty?
29
+
30
+ mapped = message.merge(content: content_blocks.empty? ? nil : content_blocks)
31
+ reasoning = reasoning_blocks.filter_map { |block| reasoning_text(block) }.join("\n")
32
+ mapped[:reasoning] = reasoning unless reasoning.empty?
33
+ mapped
34
+ end
35
+
36
+ def self.reasoning_text(block)
37
+ block[:reasoning] || block["reasoning"] || block[:thinking] || block["thinking"]
38
+ end
39
+
40
+ private_class_method :map_groq_messages, :map_groq_message, :reasoning_text
41
+ end
42
+ end
43
+ end
44
+ end
@@ -4,16 +4,94 @@ module LlmGateway
4
4
  module Adapters
5
5
  module Groq
6
6
  module OptionMapper
7
+ DEFAULT_TEMPERATURE = 0
8
+ DEFAULT_MAX_COMPLETION_TOKENS = 20_480
9
+ VALID_REASONING_LEVELS = %w[default low medium high].freeze
10
+
11
+ # Source: https://console.groq.com/docs/text-chat.md and
12
+ # https://console.groq.com/docs/api-reference.md#chat-create
13
+ # API: Groq Chat Completions Create; accessed 2026-05-19.
14
+ # Body parameters listed by the API reference: messages, model,
15
+ # citation_options, compound_custom, disable_tool_validation, documents,
16
+ # exclude_domains, frequency_penalty, function_call, functions,
17
+ # include_domains, include_reasoning, logit_bias, logprobs,
18
+ # max_completion_tokens, max_tokens, metadata, n, parallel_tool_calls,
19
+ # presence_penalty, reasoning_effort, reasoning_format, response_format,
20
+ # search_settings, seed, service_tier, stop, store, stream,
21
+ # stream_options, temperature, tool_choice, tools, top_logprobs, top_p,
22
+ # user.
23
+ # This mapper intentionally excludes transcript/tool structural fields
24
+ # (messages, tools) from option handling.
25
+ VALID_OPTIONS = %i[
26
+ model
27
+ citation_options
28
+ compound_custom
29
+ disable_tool_validation
30
+ documents
31
+ exclude_domains
32
+ frequency_penalty
33
+ function_call
34
+ functions
35
+ include_domains
36
+ include_reasoning
37
+ logit_bias
38
+ logprobs
39
+ max_completion_tokens
40
+ max_tokens
41
+ metadata
42
+ n
43
+ parallel_tool_calls
44
+ presence_penalty
45
+ reasoning_effort
46
+ reasoning_format
47
+ response_format
48
+ search_settings
49
+ seed
50
+ service_tier
51
+ stop
52
+ store
53
+ stream
54
+ stream_options
55
+ temperature
56
+ tool_choice
57
+ top_logprobs
58
+ top_p
59
+ user
60
+ ].freeze
61
+
62
+ MANAGED_OPTIONS = %i[
63
+ reasoning
64
+ cache_key
65
+ cache_retention
66
+ ].freeze
67
+
7
68
  module_function
8
69
 
9
70
  def map(options)
10
- mapped_options = options.dup
11
- mapped_options[:temperature] ||= 0
12
- mapped_options[:max_completion_tokens] ||= 20480
13
- mapped_options[:response_format] = normalize_response_format(mapped_options[:response_format] || "text")
71
+ mapped_options = options.reject { |key, _| MANAGED_OPTIONS.include?(key) }
72
+ mapped_options[:temperature] = options.key?(:temperature) ? options[:temperature] : DEFAULT_TEMPERATURE
73
+ mapped_options[:max_completion_tokens] = options[:max_completion_tokens] || DEFAULT_MAX_COMPLETION_TOKENS
74
+ mapped_options[:response_format] = normalize_response_format(options[:response_format] || "text")
75
+
76
+ reasoning = options[:reasoning]
77
+ unless reasoning.nil? || reasoning.to_s == "none"
78
+ mapped_options[:reasoning_effort] = normalize_reasoning_effort(reasoning)
79
+ mapped_options[:reasoning_format] = "parsed"
80
+ end
81
+
82
+ validate_options!(mapped_options)
14
83
  mapped_options
15
84
  end
16
85
 
86
+ def validate_options!(mapped_options)
87
+ unknown_options = mapped_options.keys - VALID_OPTIONS
88
+ return if unknown_options.empty?
89
+
90
+ raise ArgumentError,
91
+ "Unknown Groq Chat Completions options: #{unknown_options.join(', ')}. " \
92
+ "Valid options: #{VALID_OPTIONS.join(', ')}."
93
+ end
94
+
17
95
  def normalize_response_format(response_format)
18
96
  if response_format.is_a?(String)
19
97
  { type: response_format }
@@ -21,6 +99,13 @@ module LlmGateway
21
99
  response_format
22
100
  end
23
101
  end
102
+
103
+ def normalize_reasoning_effort(reasoning)
104
+ effort = reasoning.to_s
105
+ return effort if VALID_REASONING_LEVELS.include?(effort)
106
+
107
+ raise ArgumentError, "Invalid reasoning '#{reasoning}'. Use 'none', 'default', 'low', 'medium', or 'high'."
108
+ end
24
109
  end
25
110
  end
26
111
  end