llm_gateway 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/README.md +544 -186
  4. data/Rakefile +1 -2
  5. data/docs/migration-guide.md +135 -0
  6. data/lib/llm_gateway/adapters/adapter.rb +173 -0
  7. data/lib/llm_gateway/adapters/anthropic/acts_like_messages.rb +23 -0
  8. data/lib/llm_gateway/adapters/{claude → anthropic}/bidirectional_message_mapper.rb +31 -3
  9. data/lib/llm_gateway/adapters/{claude → anthropic}/input_mapper.rb +4 -3
  10. data/lib/llm_gateway/adapters/anthropic/messages_adapter.rb +19 -0
  11. data/lib/llm_gateway/adapters/{claude → anthropic}/output_mapper.rb +1 -1
  12. data/lib/llm_gateway/adapters/anthropic/stream_mapper.rb +110 -0
  13. data/lib/llm_gateway/adapters/anthropic_option_mapper.rb +53 -0
  14. data/lib/llm_gateway/adapters/groq/chat_completions_adapter.rb +47 -0
  15. data/lib/llm_gateway/adapters/groq/option_mapper.rb +27 -0
  16. data/lib/llm_gateway/adapters/input_message_sanitizer.rb +93 -0
  17. data/lib/llm_gateway/adapters/openai/acts_like_chat_completions.rb +22 -0
  18. data/lib/llm_gateway/adapters/openai/acts_like_responses.rb +31 -0
  19. data/lib/llm_gateway/adapters/{open_ai → openai}/chat_completions/bidirectional_message_mapper.rb +9 -2
  20. data/lib/llm_gateway/adapters/{open_ai → openai}/chat_completions/input_mapper.rb +1 -6
  21. data/lib/llm_gateway/adapters/openai/chat_completions/input_message_sanitizer.rb +65 -0
  22. data/lib/llm_gateway/adapters/openai/chat_completions/option_mapper.rb +39 -0
  23. data/lib/llm_gateway/adapters/{open_ai → openai}/chat_completions/output_mapper.rb +1 -1
  24. data/lib/llm_gateway/adapters/openai/chat_completions/stream_mapper.rb +242 -0
  25. data/lib/llm_gateway/adapters/openai/chat_completions_adapter.rb +20 -0
  26. data/lib/llm_gateway/adapters/{open_ai → openai}/file_output_mapper.rb +1 -1
  27. data/lib/llm_gateway/adapters/openai/prompt_cache_option_mapper.rb +39 -0
  28. data/lib/llm_gateway/adapters/{open_ai → openai}/responses/bidirectional_message_mapper.rb +52 -4
  29. data/lib/llm_gateway/adapters/openai/responses/input_mapper.rb +106 -0
  30. data/lib/llm_gateway/adapters/openai/responses/option_mapper.rb +41 -0
  31. data/lib/llm_gateway/adapters/{open_ai → openai}/responses/output_mapper.rb +1 -1
  32. data/lib/llm_gateway/adapters/openai/responses/stream_mapper.rb +340 -0
  33. data/lib/llm_gateway/adapters/openai/responses_adapter.rb +20 -0
  34. data/lib/llm_gateway/adapters/openai_codex/input_mapper.rb +206 -0
  35. data/lib/llm_gateway/adapters/openai_codex/option_mapper.rb +28 -0
  36. data/lib/llm_gateway/adapters/openai_codex/responses_adapter.rb +38 -0
  37. data/lib/llm_gateway/adapters/option_mapper.rb +13 -0
  38. data/lib/llm_gateway/adapters/stream_accumulator.rb +91 -0
  39. data/lib/llm_gateway/adapters/structs.rb +145 -0
  40. data/lib/llm_gateway/base_client.rb +62 -1
  41. data/lib/llm_gateway/client.rb +45 -129
  42. data/lib/llm_gateway/clients/anthropic.rb +167 -0
  43. data/lib/llm_gateway/clients/claude_code/oauth_flow.rb +162 -0
  44. data/lib/llm_gateway/clients/claude_code/token_manager.rb +112 -0
  45. data/lib/llm_gateway/clients/groq.rb +54 -0
  46. data/lib/llm_gateway/clients/openai.rb +208 -0
  47. data/lib/llm_gateway/clients/openai_codex/oauth_flow.rb +258 -0
  48. data/lib/llm_gateway/clients/openai_codex/token_manager.rb +71 -0
  49. data/lib/llm_gateway/errors.rb +21 -0
  50. data/lib/llm_gateway/prompt.rb +12 -1
  51. data/lib/llm_gateway/provider_registry.rb +37 -0
  52. data/lib/llm_gateway/version.rb +1 -1
  53. data/lib/llm_gateway.rb +165 -14
  54. data/scripts/create_anthropic_credentials.rb +106 -0
  55. data/scripts/create_openai_codex_credentials.rb +116 -0
  56. data/scripts/generate_handoff_live_fixture.rb +169 -0
  57. data/scripts/generate_handoff_media_fixture.rb +167 -0
  58. metadata +64 -28
  59. data/lib/llm_gateway/adapters/claude/client.rb +0 -60
  60. data/lib/llm_gateway/adapters/groq/bidirectional_message_mapper.rb +0 -18
  61. data/lib/llm_gateway/adapters/groq/client.rb +0 -58
  62. data/lib/llm_gateway/adapters/groq/input_mapper.rb +0 -18
  63. data/lib/llm_gateway/adapters/groq/output_mapper.rb +0 -10
  64. data/lib/llm_gateway/adapters/open_ai/client.rb +0 -80
  65. data/lib/llm_gateway/adapters/open_ai/responses/input_mapper.rb +0 -62
  66. data/sample/claude_code_clone/agent.rb +0 -65
  67. data/sample/claude_code_clone/claude_code_clone.rb +0 -40
  68. data/sample/claude_code_clone/prompt.rb +0 -79
  69. data/sample/claude_code_clone/run.rb +0 -47
  70. data/sample/claude_code_clone/tools/bash_tool.rb +0 -54
  71. data/sample/claude_code_clone/tools/edit_tool.rb +0 -61
  72. data/sample/claude_code_clone/tools/grep_tool.rb +0 -113
  73. data/sample/claude_code_clone/tools/read_tool.rb +0 -61
  74. data/sample/claude_code_clone/tools/todowrite_tool.rb +0 -98
@@ -0,0 +1,206 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require_relative "../openai/responses/input_mapper"
5
+
6
+ module LlmGateway
7
+ module Adapters
8
+ module OpenAICodex
9
+ # Custom input mapper for the Codex backend.
10
+ #
11
+ # The Codex Responses endpoint rejects several content block types that
12
+ # the standard OpenAI Responses InputMapper passes through:
13
+ # - "reasoning" and "summary_text" blocks are never accepted as input.
14
+ # - "thinking" blocks are only valid when they carry an encrypted
15
+ # `signature`; unsigned thinking blocks must be dropped.
16
+ #
17
+ # Additional normalisation:
18
+ # - Tool-result output is coerced to recognised Responses input types
19
+ # (input_text / input_image).
20
+ # - Assistant text content is always sent as "output_text" (not
21
+ # "input_text") because Codex is strict about directionality.
22
+ # - function_call / tool_use blocks inside an assistant turn are
23
+ # promoted to top-level function_call items so that Codex can match
24
+ # them against the subsequent function_call_output items.
25
+ class InputMapper < OpenAI::Responses::InputMapper
26
+ def self.map_messages(messages)
27
+ return messages unless messages.is_a?(Array)
28
+
29
+ mapper = message_mapper
30
+ stripped = strip_reasoning_blocks(messages)
31
+
32
+ mapped = stripped.each_with_object([]) do |msg, acc|
33
+ next unless msg.is_a?(Hash)
34
+
35
+ role = msg[:role]
36
+ content = msg[:content]
37
+
38
+ if %w[user developer].include?(role) && tool_result_message?(content)
39
+ # Responses API expects tool results as top-level input items.
40
+ # Also normalise nested tool_result output blocks to Responses
41
+ # input types (text → input_text, image → input_image).
42
+ content.each { |part| acc << map_tool_result_for_responses(part, mapper) }
43
+ next
44
+ end
45
+
46
+ if role == "assistant" && content.is_a?(Array)
47
+ acc.concat(map_assistant_content(content, mapper))
48
+ next
49
+ end
50
+
51
+ mapped_content =
52
+ if content.is_a?(Array)
53
+ content.map { |part| mapper.map_content(part) }
54
+ else
55
+ [ mapper.map_content(content) ]
56
+ end
57
+
58
+ acc << { role: role, content: mapped_content }
59
+ end
60
+
61
+ normalize_assistant_content_types(mapped)
62
+ end
63
+
64
+ # Recursively strip Codex-incompatible content blocks from a message tree.
65
+ #
66
+ # "reasoning" → always removed
67
+ # "summary_text" → always removed
68
+ # "thinking" → removed unless :signature is present
69
+ def self.strip_reasoning_blocks(obj)
70
+ case obj
71
+ when Array
72
+ obj.map { |item| strip_reasoning_blocks(item) }.compact
73
+ when Hash
74
+ type = obj[:type]
75
+ return nil if %w[reasoning summary_text].include?(type)
76
+ return nil if type == "thinking" && obj[:signature].nil?
77
+
78
+ obj.each_with_object({}) do |(k, v), acc|
79
+ result = strip_reasoning_blocks(v)
80
+ acc[k] = result unless result.nil?
81
+ end
82
+ else
83
+ obj
84
+ end
85
+ end
86
+
87
+ # Ensure assistant messages carry "output_text" rather than "input_text".
88
+ # The BidirectionalMessageMapper maps plain text blocks to "input_text";
89
+ # Codex is strict about directionality and rejects "input_text" on the
90
+ # assistant side.
91
+ def self.normalize_assistant_content_types(messages)
92
+ return messages unless messages.is_a?(Array)
93
+
94
+ messages.map do |msg|
95
+ next msg unless msg.is_a?(Hash) && msg[:role] == "assistant" && msg[:content].is_a?(Array)
96
+
97
+ msg.merge(
98
+ content: msg[:content].map do |part|
99
+ part.is_a?(Hash) && part[:type] == "input_text" ? part.merge(type: "output_text") : part
100
+ end
101
+ )
102
+ end
103
+ end
104
+
105
+ def self.tool_result_message?(content)
106
+ content.is_a?(Array) &&
107
+ content.first.is_a?(Hash) &&
108
+ content.first[:type] == "tool_result"
109
+ end
110
+
111
+ # Map assistant content blocks into Codex-compatible top-level items.
112
+ #
113
+ # - thinking with signature → parsed JSON reasoning item (the encrypted
114
+ # signature *is* the serialised item)
115
+ # - tool_use / function_call → top-level function_call item
116
+ # - text / *_text variants → output_text inside an assistant content block
117
+ # - anything else → delegated to the BidirectionalMessageMapper
118
+ def self.map_assistant_content(content, mapper)
119
+ text_parts = []
120
+ items = []
121
+
122
+ content.each do |part|
123
+ next unless part.is_a?(Hash)
124
+
125
+ case part[:type]
126
+ when "tool_use", "function_call"
127
+ call_id = part[:id] || part[:call_id]
128
+ arguments = part[:input] || part[:arguments] || {}
129
+ arguments = JSON.generate(arguments) unless arguments.is_a?(String)
130
+
131
+ items << {
132
+ type: "function_call",
133
+ call_id: call_id,
134
+ name: part[:name],
135
+ arguments: arguments
136
+ }.compact
137
+
138
+ when "thinking"
139
+ # Only signed thinking blocks survive strip_reasoning_blocks;
140
+ # the signature payload is the full reasoning item JSON.
141
+ signature = part[:signature]
142
+ if signature
143
+ begin
144
+ items << JSON.parse(signature, symbolize_names: true)
145
+ rescue JSON::ParserError
146
+ # Malformed signature — silently drop.
147
+ end
148
+ end
149
+
150
+ when "text", "input_text", "output_text"
151
+ text_parts << { type: "output_text", text: part[:text].to_s }
152
+
153
+ else
154
+ mapped = mapper.map_content(part)
155
+ text_parts << mapped if mapped
156
+ end
157
+ end
158
+
159
+ # Text parts form a single assistant message; tool/reasoning items follow.
160
+ items.unshift({ role: "assistant", content: text_parts }) if text_parts.any?
161
+ items
162
+ end
163
+
164
+ # Wrap a tool_result part in the Responses wire format, normalising the
165
+ # nested output content types along the way.
166
+ def self.map_tool_result_for_responses(part, mapper)
167
+ return mapper.map_content(part) unless part.is_a?(Hash) && part[:type] == "tool_result"
168
+
169
+ mapper.map_content(part.merge(content: normalize_tool_result_output(part[:content])))
170
+ end
171
+
172
+ # Coerce each element of a tool result's output array to a Responses
173
+ # input type (input_text or input_image).
174
+ def self.normalize_tool_result_output(output)
175
+ Array(output).map do |item|
176
+ case item
177
+ when String
178
+ { type: "input_text", text: item }
179
+ when Hash
180
+ type = item[:type] || item["type"]
181
+ case type
182
+ when "text", "input_text", "output_text"
183
+ { type: "input_text", text: (item[:text] || item["text"]).to_s }
184
+ when "image", "input_image"
185
+ data = item[:data] || item["data"]
186
+ mime = item[:mimeType] || item["mimeType"] ||
187
+ item[:media_type] || item["media_type"] || "image/png"
188
+ image_url = item[:image_url] || item["image_url"] ||
189
+ "data:#{mime};base64,#{data}"
190
+ { type: "input_image", image_url: image_url }
191
+ else
192
+ item
193
+ end
194
+ else
195
+ { type: "input_text", text: item.to_s }
196
+ end
197
+ end
198
+ end
199
+
200
+ private_class_method :strip_reasoning_blocks, :normalize_assistant_content_types,
201
+ :tool_result_message?, :map_assistant_content,
202
+ :map_tool_result_for_responses, :normalize_tool_result_output
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../openai/responses/option_mapper"
4
+
5
+ module LlmGateway
6
+ module Adapters
7
+ module OpenAICodex
8
+ module OptionMapper
9
+ module_function
10
+
11
+ def map(options)
12
+ mapped_options = OpenAI::Responses::OptionMapper.map(options)
13
+
14
+ # Codex endpoint currently rejects token limit parameters.
15
+ mapped_options.delete(:max_output_tokens)
16
+ mapped_options.delete(:max_completion_tokens)
17
+
18
+ # Codex transport does not use retention flags in the request body.
19
+ mapped_options.delete(:prompt_cache_retention)
20
+ mapped_options.delete(:cacheRetention)
21
+ mapped_options.delete(:cache_retention)
22
+
23
+ mapped_options
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../adapter"
4
+ require_relative "../openai/acts_like_responses"
5
+ require_relative "../openai/responses/output_mapper"
6
+ require_relative "option_mapper"
7
+ require_relative "../openai/responses/stream_mapper"
8
+ require_relative "../openai/file_output_mapper"
9
+ require_relative "input_mapper"
10
+ require_relative "../input_message_sanitizer"
11
+
12
+ module LlmGateway
13
+ module Adapters
14
+ module OpenAICodex
15
+ class ResponsesAdapter < Adapter
16
+ include ActsLikeOpenAIResponses
17
+
18
+ private
19
+
20
+ def input_mapper
21
+ OpenAICodex::InputMapper
22
+ end
23
+
24
+ def option_mapper
25
+ OptionMapper
26
+ end
27
+
28
+ def perform_chat(messages, tools:, system:, **options)
29
+ client.chat_codex(messages, tools: tools, system: system, **options)
30
+ end
31
+
32
+ def perform_stream(messages, tools:, system:, **options, &block)
33
+ client.stream_codex(messages, tools: tools, system: system, **options, &block)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LlmGateway
4
+ module Adapters
5
+ module OptionMapper
6
+ module_function
7
+
8
+ def map(options)
9
+ options
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,91 @@
1
+ require "json"
2
+
3
+ class StreamAccumulator
4
+ attr_accessor :blocks, :message_hash, :usage_hash
5
+
6
+ def initialize
7
+ @message_hash = {}
8
+ @usage_hash = {
9
+ input_tokens: 0,
10
+ cache_creation_input_tokens: 0,
11
+ cache_read_input_tokens: 0,
12
+ output_tokens: 0,
13
+ reasoning_tokens: 0
14
+ }
15
+ @blocks = []
16
+ end
17
+
18
+ def result
19
+ message_hash.merge(
20
+ usage: usage_hash,
21
+ content: serialized_blocks
22
+ )
23
+ end
24
+
25
+ def push(event)
26
+ return unless event
27
+
28
+ case event.type
29
+ when :text_start
30
+ blocks[event.content_index] = {
31
+ type: "text",
32
+ text: ""
33
+ }
34
+ blocks[event.content_index][:text] += event.delta
35
+ when :text_delta, :text_end
36
+ blocks[event.content_index][:text] += event.delta
37
+ when :tool_start
38
+ blocks[event.content_index] = {
39
+ type: "tool_use",
40
+ id: event.id,
41
+ name: event.name,
42
+ input: ""
43
+ }
44
+ when :tool_delta, :tool_end
45
+ blocks[event.content_index][:input] += event.delta
46
+ when :message_start
47
+ message_hash.merge!(event.delta)
48
+ usage_hash.each_key do |key|
49
+ usage_hash[key] += event.usage_increment.fetch(key, 0)
50
+ end
51
+ when :reasoning_start
52
+ blocks[event.content_index] = {
53
+ type: "reasoning",
54
+ reasoning: "",
55
+ signature: ""
56
+ }
57
+ blocks[event.content_index][:reasoning] += event.delta
58
+ blocks[event.content_index][:signature] += event.respond_to?(:signature) ? event.signature : ""
59
+ when :reasoning_delta
60
+ blocks[event.content_index][:reasoning] += event.delta
61
+ blocks[event.content_index][:signature] += event.signature
62
+ when :reasoning_end
63
+ blocks[event.content_index][:reasoning] += event.delta
64
+ blocks[event.content_index][:signature] += event.respond_to?(:signature) ? event.signature : ""
65
+ when :message_delta
66
+ message_hash.merge!(event.delta)
67
+ usage_hash.each_key do |key|
68
+ usage_hash[key] += event.usage_increment.fetch(key, 0)
69
+ end
70
+ when :message_end
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def serialized_blocks
77
+ blocks.map do |block|
78
+ next block unless block[:type] == "tool_use"
79
+
80
+ block.merge(input: LlmGateway::Utils.deep_symbolize_keys(parse_tool_input(block[:input])))
81
+ end
82
+ end
83
+
84
+ def parse_tool_input(input)
85
+ return {} if input.nil? || input.empty?
86
+
87
+ JSON.parse(input)
88
+ rescue JSON::ParserError
89
+ {}
90
+ end
91
+ end
@@ -0,0 +1,145 @@
1
+ require "dry-struct"
2
+ require "dry-types"
3
+
4
+ module Types
5
+ include Dry.Types()
6
+ end
7
+
8
+ class BaseStruct < Dry::Struct
9
+ transform_keys(&:to_sym)
10
+ end
11
+
12
+ class AssistantStreamEvent < BaseStruct
13
+ EventType = Types::Coercible::Symbol.enum(:text_start, :text_delta, :text_end, :tool_start, :tool_delta, :tool_end, :reasoning_start, :reasoning_delta, :reasoning_end)
14
+
15
+ attribute :type, EventType
16
+ attribute :delta, Types::Coercible::String.default { "" }
17
+ attribute :content_index, Types::Integer
18
+ end
19
+
20
+
21
+ class AssistantToolStartEvent < AssistantStreamEvent
22
+ attribute :id, Types::String
23
+ attribute :name, Types::String
24
+ attribute :content_index, Types::Integer
25
+ end
26
+
27
+
28
+ class AssistantStreamReasoningEvent < AssistantStreamEvent
29
+ attribute :signature, Types::Coercible::String.default { "" }
30
+ attribute :content_index, Types::Integer
31
+ end
32
+
33
+ class AssistantStreamMessageEvent < BaseStruct
34
+ EventType = Types::Coercible::Symbol.enum(:message_start, :message_delta, :message_end)
35
+
36
+ attribute :type, EventType
37
+ attribute :delta, Types::Coercible::Hash.default { {} }
38
+ attribute :usage_increment, Types::Coercible::Hash.default { {} }
39
+ end
40
+
41
+ class TextContent < BaseStruct
42
+ attribute :type, Types::String.enum("text")
43
+ attribute :text, Types::String
44
+
45
+ def to_h
46
+ {
47
+ type: type,
48
+ text: text
49
+ }
50
+ end
51
+ end
52
+
53
+ class ReasoningContent < BaseStruct
54
+ attribute :type, Types::String.enum("reasoning")
55
+ attribute :reasoning, Types::String
56
+ attribute? :signature, Types::String.optional
57
+
58
+ def to_h
59
+ result = {
60
+ type: type,
61
+ reasoning: reasoning
62
+ }
63
+ result[:signature] = signature unless signature.nil?
64
+ result
65
+ end
66
+ end
67
+
68
+ class ToolCall < BaseStruct
69
+ attribute :id, Types::String
70
+ attribute :type, Types::String.enum("tool_use")
71
+ attribute :name, Types::String
72
+ attribute :input, Types::Hash
73
+
74
+ def to_h
75
+ {
76
+ id: id,
77
+ type: type,
78
+ name: name,
79
+ input: input
80
+ }
81
+ end
82
+ end
83
+
84
+ class ToolResult < BaseStruct
85
+ attribute :type, Types::String.enum("tool_result")
86
+ attribute :tool_use_id, Types::String
87
+ attribute :content, Types::String
88
+ end
89
+
90
+ class AssistantMessage < BaseStruct
91
+ ContentBlock =
92
+ Types.Instance(TextContent) |
93
+ Types.Instance(ReasoningContent) |
94
+ Types.Instance(ToolCall)
95
+
96
+ attribute :id, Types::String
97
+ attribute :model, Types::String
98
+ attribute :usage, Types::Hash
99
+ attribute :role, Types::String.enum("assistant")
100
+ attribute :stop_reason, Types::String.enum("stop", "length", "tool_use", "toolUse", "error", "aborted")
101
+ attribute :provider, Types::String
102
+ attribute :api, Types::String
103
+ attribute? :error_message, Types::String.optional
104
+ attribute :content, Types::Array.of(ContentBlock)
105
+
106
+ def self.new(attributes)
107
+ attrs = attributes.to_h.transform_keys(&:to_sym)
108
+ attrs[:content] = Array(attrs[:content]).map { |block| build_content_block(block) }
109
+ super(attrs)
110
+ end
111
+
112
+ def to_h
113
+ result = {
114
+ id: id,
115
+ model: model,
116
+ usage: usage,
117
+ role: role,
118
+ stop_reason: stop_reason,
119
+ provider: provider,
120
+ api: api,
121
+ content: content.map(&:to_h)
122
+ }
123
+ result[:error_message] = error_message unless error_message.nil?
124
+ result
125
+ end
126
+
127
+ def self.build_content_block(block)
128
+ return block if block.is_a?(TextContent) || block.is_a?(ReasoningContent) || block.is_a?(ToolCall)
129
+
130
+ case block[:type] || block["type"]
131
+ when "text"
132
+ TextContent.new(block)
133
+ when "reasoning"
134
+ ReasoningContent.new(block)
135
+ when "thinking"
136
+ ReasoningContent.new(type: "reasoning", reasoning: block[:thinking] || block["thinking"] || block[:reasoning] || block["reasoning"], signature: block[:signature] || block["signature"])
137
+ when "tool_use"
138
+ ToolCall.new(block)
139
+ else
140
+ raise ArgumentError, "Unsupported content block type: #{block[:type] || block['type']}"
141
+ end
142
+ end
143
+
144
+ private_class_method :build_content_block
145
+ end
@@ -60,8 +60,70 @@ module LlmGateway
60
60
  process_response(response)
61
61
  end
62
62
 
63
+ def post_stream(url_part, body = nil, extra_headers = {}, &block)
64
+ endpoint = "#{base_endpoint}/#{url_part.sub(%r{^/}, "")}"
65
+ uri = URI(endpoint)
66
+ http = Net::HTTP.new(uri.host, uri.port)
67
+ http.use_ssl = true
68
+ http.read_timeout = 480
69
+ http.open_timeout = 10
70
+ body.merge!(stream: true)
71
+ request = Net::HTTP::Post.new(uri)
72
+ headers = build_headers.merge(extra_headers)
73
+ headers.each { |key, value| request[key] = value }
74
+ request.body = body.to_json if body
75
+
76
+ http.request(request) do |response|
77
+ unless response.code.to_i == 200
78
+ # Collect full body for error handling
79
+ full_body = +""
80
+ response.read_body { |chunk| full_body << chunk }
81
+ # Create a response-like object with the body for handle_error
82
+ response.instance_variable_set(:@body, full_body)
83
+ response.instance_variable_set(:@read, true)
84
+ handle_error(response)
85
+ end
86
+
87
+ parse_sse_stream(response, &block)
88
+ end
89
+ end
90
+
63
91
  protected
64
92
 
93
+ def parse_sse_stream(response)
94
+ buffer = +""
95
+ response.read_body do |chunk|
96
+ buffer << chunk
97
+ while (idx = buffer.index("\n\n"))
98
+ raw_event = buffer.slice!(0, idx + 2)
99
+ event_type = nil
100
+ data_lines = []
101
+
102
+ raw_event.each_line do |line|
103
+ line = line.chomp
104
+ if line.start_with?("event:")
105
+ event_type = line.sub(/^event:\s*/, "")
106
+ elsif line.start_with?("data:")
107
+ data_lines << line.sub(/^data:\s*/, "")
108
+ end
109
+ end
110
+
111
+ next if data_lines.empty?
112
+
113
+ data_str = data_lines.join("\n")
114
+ next if data_str == "[DONE]"
115
+
116
+ data = begin
117
+ LlmGateway::Utils.deep_symbolize_keys(JSON.parse(data_str))
118
+ rescue JSON::ParserError
119
+ { raw: data_str }
120
+ end
121
+
122
+ yield({ event: event_type, data: data })
123
+ end
124
+ end
125
+ end
126
+
65
127
  def make_request(endpoint, method, params = nil, extra_headers = {})
66
128
  uri = URI(endpoint)
67
129
  http = Net::HTTP.new(uri.host, uri.port)
@@ -73,7 +135,6 @@ module LlmGateway
73
135
  headers = build_headers.merge(extra_headers)
74
136
  headers.each { |key, value| request[key] = value }
75
137
  request.body = params.to_json if params
76
-
77
138
  http.request(request)
78
139
  end
79
140