ruby_llm 1.9.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -2
  3. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  4. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
  5. data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
  6. data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
  7. data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
  8. data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
  9. data/lib/ruby_llm/active_record/message_methods.rb +41 -8
  10. data/lib/ruby_llm/aliases.json +0 -12
  11. data/lib/ruby_llm/chat.rb +10 -7
  12. data/lib/ruby_llm/configuration.rb +1 -1
  13. data/lib/ruby_llm/message.rb +37 -11
  14. data/lib/ruby_llm/models.json +1059 -857
  15. data/lib/ruby_llm/models.rb +134 -12
  16. data/lib/ruby_llm/provider.rb +4 -3
  17. data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
  18. data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
  19. data/lib/ruby_llm/providers/bedrock/chat.rb +58 -15
  20. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
  21. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
  22. data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
  23. data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
  24. data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
  25. data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
  26. data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
  27. data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
  28. data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
  29. data/lib/ruby_llm/providers/openai/chat.rb +87 -3
  30. data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
  31. data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
  32. data/lib/ruby_llm/providers/openai.rb +1 -1
  33. data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
  34. data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
  35. data/lib/ruby_llm/providers/openrouter.rb +2 -0
  36. data/lib/ruby_llm/providers/vertexai.rb +5 -1
  37. data/lib/ruby_llm/stream_accumulator.rb +111 -14
  38. data/lib/ruby_llm/streaming.rb +54 -51
  39. data/lib/ruby_llm/thinking.rb +49 -0
  40. data/lib/ruby_llm/tokens.rb +47 -0
  41. data/lib/ruby_llm/tool_call.rb +6 -3
  42. data/lib/ruby_llm/version.rb +1 -1
  43. data/lib/tasks/models.rake +19 -12
  44. metadata +12 -5
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenRouter
6
+ # Chat methods of the OpenRouter API integration
7
+ module Chat
8
+ module_function
9
+
10
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
11
+ payload = {
12
+ model: model.id,
13
+ messages: format_messages(messages),
14
+ stream: stream
15
+ }
16
+
17
+ payload[:temperature] = temperature unless temperature.nil?
18
+ payload[:tools] = tools.map { |_, tool| OpenAI::Tools.tool_for(tool) } if tools.any?
19
+
20
+ if schema
21
+ strict = schema[:strict] != false
22
+ payload[:response_format] = {
23
+ type: 'json_schema',
24
+ json_schema: {
25
+ name: 'response',
26
+ schema: schema,
27
+ strict: strict
28
+ }
29
+ }
30
+ end
31
+
32
+ reasoning = build_reasoning(thinking)
33
+ payload[:reasoning] = reasoning if reasoning
34
+
35
+ payload[:stream_options] = { include_usage: true } if stream
36
+ payload
37
+ end
38
+
39
+ def parse_completion_response(response)
40
+ data = response.body
41
+ return if data.empty?
42
+
43
+ raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
44
+
45
+ message_data = data.dig('choices', 0, 'message')
46
+ return unless message_data
47
+
48
+ usage = data['usage'] || {}
49
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
50
+ thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
51
+ thinking_text = extract_thinking_text(message_data)
52
+ thinking_signature = extract_thinking_signature(message_data)
53
+
54
+ Message.new(
55
+ role: :assistant,
56
+ content: message_data['content'],
57
+ thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
58
+ tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
59
+ input_tokens: usage['prompt_tokens'],
60
+ output_tokens: usage['completion_tokens'],
61
+ cached_tokens: cached_tokens,
62
+ cache_creation_tokens: 0,
63
+ thinking_tokens: thinking_tokens,
64
+ model_id: data['model'],
65
+ raw: response
66
+ )
67
+ end
68
+
69
+ def format_messages(messages)
70
+ messages.map do |msg|
71
+ {
72
+ role: format_role(msg.role),
73
+ content: OpenAI::Media.format_content(msg.content),
74
+ tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
75
+ tool_call_id: msg.tool_call_id
76
+ }.compact.merge(format_thinking(msg))
77
+ end
78
+ end
79
+
80
+ def format_role(role)
81
+ case role
82
+ when :system
83
+ @config.openai_use_system_role ? 'system' : 'developer'
84
+ else
85
+ role.to_s
86
+ end
87
+ end
88
+
89
+ def build_reasoning(thinking)
90
+ return nil unless thinking&.enabled?
91
+
92
+ reasoning = {}
93
+ reasoning[:effort] = thinking.effort if thinking.respond_to?(:effort) && thinking.effort
94
+ reasoning[:max_tokens] = thinking.budget if thinking.respond_to?(:budget) && thinking.budget
95
+ reasoning[:enabled] = true if reasoning.empty?
96
+ reasoning
97
+ end
98
+
99
+ def format_thinking(msg)
100
+ thinking = msg.thinking
101
+ return {} unless thinking && msg.role == :assistant
102
+
103
+ details = []
104
+ if thinking.text
105
+ details << {
106
+ type: 'reasoning.text',
107
+ text: thinking.text,
108
+ signature: thinking.signature
109
+ }.compact
110
+ elsif thinking.signature
111
+ details << {
112
+ type: 'reasoning.encrypted',
113
+ data: thinking.signature
114
+ }
115
+ end
116
+
117
+ details.empty? ? {} : { reasoning_details: details }
118
+ end
119
+
120
+ def extract_thinking_text(message_data)
121
+ candidate = message_data['reasoning']
122
+ return candidate if candidate.is_a?(String)
123
+
124
+ details = message_data['reasoning_details']
125
+ return nil unless details.is_a?(Array)
126
+
127
+ text = details.filter_map do |detail|
128
+ case detail['type']
129
+ when 'reasoning.text'
130
+ detail['text']
131
+ when 'reasoning.summary'
132
+ detail['summary']
133
+ end
134
+ end.join
135
+
136
+ text.empty? ? nil : text
137
+ end
138
+
139
+ def extract_thinking_signature(message_data)
140
+ details = message_data['reasoning_details']
141
+ return nil unless details.is_a?(Array)
142
+
143
+ signature = details.filter_map do |detail|
144
+ detail['signature'] if detail['signature'].is_a?(String)
145
+ end.first
146
+ return signature if signature
147
+
148
+ encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
149
+ encrypted&.dig('data')
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenRouter
6
+ # Streaming methods of the OpenRouter API integration
7
+ module Streaming
8
+ module_function
9
+
10
+ def stream_url
11
+ completion_url
12
+ end
13
+
14
+ def build_chunk(data)
15
+ usage = data['usage'] || {}
16
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
+ delta = data.dig('choices', 0, 'delta') || {}
18
+
19
+ Chunk.new(
20
+ role: :assistant,
21
+ model_id: data['model'],
22
+ content: delta['content'],
23
+ thinking: Thinking.build(
24
+ text: extract_thinking_text(delta),
25
+ signature: extract_thinking_signature(delta)
26
+ ),
27
+ tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
28
+ input_tokens: usage['prompt_tokens'],
29
+ output_tokens: usage['completion_tokens'],
30
+ cached_tokens: cached_tokens,
31
+ cache_creation_tokens: 0,
32
+ thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
33
+ )
34
+ end
35
+
36
+ def parse_streaming_error(data)
37
+ OpenAI::Streaming.parse_streaming_error(data)
38
+ end
39
+
40
+ def extract_thinking_text(delta)
41
+ candidate = delta['reasoning']
42
+ return candidate if candidate.is_a?(String)
43
+
44
+ details = delta['reasoning_details']
45
+ return nil unless details.is_a?(Array)
46
+
47
+ text = details.filter_map do |detail|
48
+ case detail['type']
49
+ when 'reasoning.text'
50
+ detail['text']
51
+ when 'reasoning.summary'
52
+ detail['summary']
53
+ end
54
+ end.join
55
+
56
+ text.empty? ? nil : text
57
+ end
58
+
59
+ def extract_thinking_signature(delta)
60
+ details = delta['reasoning_details']
61
+ return nil unless details.is_a?(Array)
62
+
63
+ signature = details.filter_map do |detail|
64
+ detail['signature'] if detail['signature'].is_a?(String)
65
+ end.first
66
+ return signature if signature
67
+
68
+ encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
69
+ encrypted&.dig('data')
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -4,7 +4,9 @@ module RubyLLM
4
4
  module Providers
5
5
  # OpenRouter API integration.
6
6
  class OpenRouter < OpenAI
7
+ include OpenRouter::Chat
7
8
  include OpenRouter::Models
9
+ include OpenRouter::Streaming
8
10
 
9
11
  def api_base
10
12
  'https://openrouter.ai/api/v1'
@@ -16,7 +16,11 @@ module RubyLLM
16
16
  end
17
17
 
18
18
  def api_base
19
- "https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
19
+ if @config.vertexai_location.to_s == 'global'
20
+ 'https://aiplatform.googleapis.com/v1beta1'
21
+ else
22
+ "https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
23
+ end
20
24
  end
21
25
 
22
26
  def headers
@@ -7,11 +7,16 @@ module RubyLLM
7
7
 
8
8
  def initialize
9
9
  @content = +''
10
+ @thinking_text = +''
11
+ @thinking_signature = nil
10
12
  @tool_calls = {}
11
13
  @input_tokens = nil
12
14
  @output_tokens = nil
13
15
  @cached_tokens = nil
14
16
  @cache_creation_tokens = nil
17
+ @thinking_tokens = nil
18
+ @inside_think_tag = false
19
+ @pending_think_tag = +''
15
20
  @latest_tool_call_id = nil
16
21
  end
17
22
 
@@ -19,12 +24,8 @@ module RubyLLM
19
24
  RubyLLM.logger.debug chunk.inspect if RubyLLM.config.log_stream_debug
20
25
  @model_id ||= chunk.model_id
21
26
 
22
- if chunk.tool_call?
23
- accumulate_tool_calls chunk.tool_calls
24
- else
25
- @content << (chunk.content || '')
26
- end
27
-
27
+ handle_chunk_content(chunk)
28
+ append_thinking_from_chunk(chunk)
28
29
  count_tokens chunk
29
30
  RubyLLM.logger.debug inspect if RubyLLM.config.log_stream_debug
30
31
  end
@@ -33,12 +34,19 @@ module RubyLLM
33
34
  Message.new(
34
35
  role: :assistant,
35
36
  content: content.empty? ? nil : content,
37
+ thinking: Thinking.build(
38
+ text: @thinking_text.empty? ? nil : @thinking_text,
39
+ signature: @thinking_signature
40
+ ),
41
+ tokens: Tokens.build(
42
+ input: @input_tokens,
43
+ output: @output_tokens,
44
+ cached: @cached_tokens,
45
+ cache_creation: @cache_creation_tokens,
46
+ thinking: @thinking_tokens
47
+ ),
36
48
  model_id: model_id,
37
49
  tool_calls: tool_calls_from_stream,
38
- input_tokens: @input_tokens,
39
- output_tokens: @output_tokens,
40
- cached_tokens: @cached_tokens,
41
- cache_creation_tokens: @cache_creation_tokens,
42
50
  raw: response
43
51
  )
44
52
  end
@@ -58,12 +66,13 @@ module RubyLLM
58
66
  ToolCall.new(
59
67
  id: tc.id,
60
68
  name: tc.name,
61
- arguments: arguments
69
+ arguments: arguments,
70
+ thought_signature: tc.thought_signature
62
71
  )
63
72
  end
64
73
  end
65
74
 
66
- def accumulate_tool_calls(new_tool_calls)
75
+ def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
67
76
  RubyLLM.logger.debug "Accumulating tool calls: #{new_tool_calls}" if RubyLLM.config.log_stream_debug
68
77
  new_tool_calls.each_value do |tool_call|
69
78
  if tool_call.id
@@ -72,12 +81,18 @@ module RubyLLM
72
81
  @tool_calls[tool_call.id] = ToolCall.new(
73
82
  id: tool_call_id,
74
83
  name: tool_call.name,
75
- arguments: tool_call_arguments
84
+ arguments: tool_call_arguments,
85
+ thought_signature: tool_call.thought_signature
76
86
  )
77
87
  @latest_tool_call_id = tool_call.id
78
88
  else
79
89
  existing = @tool_calls[@latest_tool_call_id]
80
- existing.arguments << tool_call.arguments if existing
90
+ if existing
91
+ existing.arguments << tool_call.arguments
92
+ if tool_call.thought_signature && existing.thought_signature.nil?
93
+ existing.thought_signature = tool_call.thought_signature
94
+ end
95
+ end
81
96
  end
82
97
  end
83
98
  end
@@ -96,6 +111,88 @@ module RubyLLM
96
111
  @output_tokens = chunk.output_tokens if chunk.output_tokens
97
112
  @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
98
113
  @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
114
+ @thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
115
+ end
116
+
117
+ def handle_chunk_content(chunk)
118
+ return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
119
+
120
+ content_text = chunk.content || ''
121
+ if content_text.is_a?(String)
122
+ append_text_with_thinking(content_text)
123
+ else
124
+ @content << content_text.to_s
125
+ end
126
+ end
127
+
128
+ def append_text_with_thinking(text)
129
+ content_chunk, thinking_chunk = extract_think_tags(text)
130
+ @content << content_chunk
131
+ @thinking_text << thinking_chunk if thinking_chunk
132
+ end
133
+
134
+ def append_thinking_from_chunk(chunk)
135
+ thinking = chunk.thinking
136
+ return unless thinking
137
+
138
+ @thinking_text << thinking.text.to_s if thinking.text
139
+ @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
140
+ end
141
+
142
+ def extract_think_tags(text)
143
+ start_tag = '<think>'
144
+ end_tag = '</think>'
145
+ remaining = @pending_think_tag + text
146
+ @pending_think_tag = +''
147
+
148
+ output = +''
149
+ thinking = +''
150
+
151
+ until remaining.empty?
152
+ remaining = if @inside_think_tag
153
+ consume_think_content(remaining, end_tag, thinking)
154
+ else
155
+ consume_non_think_content(remaining, start_tag, output)
156
+ end
157
+ end
158
+
159
+ [output, thinking.empty? ? nil : thinking]
160
+ end
161
+
162
+ def consume_think_content(remaining, end_tag, thinking)
163
+ end_index = remaining.index(end_tag)
164
+ if end_index
165
+ thinking << remaining.slice(0, end_index)
166
+ @inside_think_tag = false
167
+ remaining.slice((end_index + end_tag.length)..) || +''
168
+ else
169
+ suffix_len = longest_suffix_prefix(remaining, end_tag)
170
+ thinking << remaining.slice(0, remaining.length - suffix_len)
171
+ @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
172
+ +''
173
+ end
174
+ end
175
+
176
+ def consume_non_think_content(remaining, start_tag, output)
177
+ start_index = remaining.index(start_tag)
178
+ if start_index
179
+ output << remaining.slice(0, start_index)
180
+ @inside_think_tag = true
181
+ remaining.slice((start_index + start_tag.length)..) || +''
182
+ else
183
+ suffix_len = longest_suffix_prefix(remaining, start_tag)
184
+ output << remaining.slice(0, remaining.length - suffix_len)
185
+ @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
186
+ +''
187
+ end
188
+ end
189
+
190
+ def longest_suffix_prefix(text, tag)
191
+ max = [text.length, tag.length - 1].min
192
+ max.downto(1) do |len|
193
+ return len if text.end_with?(tag[0, len])
194
+ end
195
+ 0
99
196
  end
100
197
  end
101
198
  end
@@ -29,7 +29,7 @@ module RubyLLM
29
29
  end
30
30
 
31
31
  def handle_stream(&block)
32
- to_json_stream do |data|
32
+ build_on_data_handler do |data|
33
33
  block.call(build_chunk(data)) if data
34
34
  end
35
35
  end
@@ -40,19 +40,15 @@ module RubyLLM
40
40
  Faraday::VERSION.start_with?('1')
41
41
  end
42
42
 
43
- def to_json_stream(&)
43
+ def build_on_data_handler(&handler)
44
44
  buffer = +''
45
45
  parser = EventStreamParser::Parser.new
46
46
 
47
- create_stream_processor(parser, buffer, &)
48
- end
49
-
50
- def create_stream_processor(parser, buffer, &)
51
- if faraday_1?
52
- legacy_stream_processor(parser, &)
53
- else
54
- stream_processor(parser, buffer, &)
55
- end
47
+ FaradayHandlers.build(
48
+ faraday_v1: faraday_1?,
49
+ on_chunk: ->(chunk, env) { process_stream_chunk(chunk, parser, env, &handler) },
50
+ on_failed_response: ->(chunk, env) { handle_failed_response(chunk, buffer, env) }
51
+ )
56
52
  end
57
53
 
58
54
  def process_stream_chunk(chunk, parser, env, &)
@@ -67,22 +63,6 @@ module RubyLLM
67
63
  end
68
64
  end
69
65
 
70
- def legacy_stream_processor(parser, &block)
71
- proc do |chunk, _size|
72
- process_stream_chunk(chunk, parser, nil, &block)
73
- end
74
- end
75
-
76
- def stream_processor(parser, buffer, &block)
77
- proc do |chunk, _bytes, env|
78
- if env&.status == 200
79
- process_stream_chunk(chunk, parser, env, &block)
80
- else
81
- handle_failed_response(chunk, buffer, env)
82
- end
83
- end
84
- end
85
-
86
66
  def error_chunk?(chunk)
87
67
  chunk.start_with?('event: error')
88
68
  end
@@ -92,30 +72,18 @@ module RubyLLM
92
72
  end
93
73
 
94
74
  def handle_json_error_chunk(chunk, env)
95
- parsed_data = JSON.parse(chunk)
96
- status, _message = parse_streaming_error(parsed_data.to_json)
97
- error_response = build_stream_error_response(parsed_data, env, status)
98
- ErrorMiddleware.parse_error(provider: self, response: error_response)
99
- rescue JSON::ParserError => e
100
- RubyLLM.logger.debug "Failed to parse JSON error chunk: #{e.message}"
75
+ parse_error_from_json(chunk, env, 'Failed to parse JSON error chunk')
101
76
  end
102
77
 
103
78
  def handle_error_chunk(chunk, env)
104
79
  error_data = chunk.split("\n")[1].delete_prefix('data: ')
105
- parsed_data = JSON.parse(error_data)
106
- status, _message = parse_streaming_error(parsed_data.to_json)
107
- error_response = build_stream_error_response(parsed_data, env, status)
108
- ErrorMiddleware.parse_error(provider: self, response: error_response)
109
- rescue JSON::ParserError => e
110
- RubyLLM.logger.debug "Failed to parse error chunk: #{e.message}"
80
+ parse_error_from_json(error_data, env, 'Failed to parse error chunk')
111
81
  end
112
82
 
113
83
  def handle_failed_response(chunk, buffer, env)
114
84
  buffer << chunk
115
85
  error_data = JSON.parse(buffer)
116
- status, _message = parse_streaming_error(error_data.to_json)
117
- error_response = env.merge(body: error_data, status: status || env.status)
118
- ErrorMiddleware.parse_error(provider: self, response: error_response)
86
+ handle_parsed_error(error_data, env)
119
87
  rescue JSON::ParserError
120
88
  RubyLLM.logger.debug "Accumulating error chunk: #{chunk}"
121
89
  end
@@ -135,20 +103,13 @@ module RubyLLM
135
103
  parsed = JSON.parse(data)
136
104
  return parsed unless parsed.is_a?(Hash) && parsed.key?('error')
137
105
 
138
- status, _message = parse_streaming_error(parsed.to_json)
139
- error_response = build_stream_error_response(parsed, env, status)
140
- ErrorMiddleware.parse_error(provider: self, response: error_response)
106
+ handle_parsed_error(parsed, env)
141
107
  rescue JSON::ParserError => e
142
108
  RubyLLM.logger.debug "Failed to parse data chunk: #{e.message}"
143
109
  end
144
110
 
145
111
  def handle_error_event(data, env)
146
- parsed_data = JSON.parse(data)
147
- status, _message = parse_streaming_error(parsed_data.to_json)
148
- error_response = build_stream_error_response(parsed_data, env, status)
149
- ErrorMiddleware.parse_error(provider: self, response: error_response)
150
- rescue JSON::ParserError => e
151
- RubyLLM.logger.debug "Failed to parse error event: #{e.message}"
112
+ parse_error_from_json(data, env, 'Failed to parse error event')
152
113
  end
153
114
 
154
115
  def parse_streaming_error(data)
@@ -159,6 +120,19 @@ module RubyLLM
159
120
  [500, "Failed to parse error: #{data}"]
160
121
  end
161
122
 
123
+ def handle_parsed_error(parsed_data, env)
124
+ status, _message = parse_streaming_error(parsed_data.to_json)
125
+ error_response = build_stream_error_response(parsed_data, env, status)
126
+ ErrorMiddleware.parse_error(provider: self, response: error_response)
127
+ end
128
+
129
+ def parse_error_from_json(data, env, error_message)
130
+ parsed_data = JSON.parse(data)
131
+ handle_parsed_error(parsed_data, env)
132
+ rescue JSON::ParserError => e
133
+ RubyLLM.logger.debug "#{error_message}: #{e.message}"
134
+ end
135
+
162
136
  def build_stream_error_response(parsed_data, env, status)
163
137
  error_status = status || env&.status || 500
164
138
 
@@ -168,5 +142,34 @@ module RubyLLM
168
142
  env.merge(body: parsed_data, status: error_status)
169
143
  end
170
144
  end
145
+
146
+ # Builds Faraday on_data handlers for different major versions.
147
+ module FaradayHandlers
148
+ module_function
149
+
150
+ def build(faraday_v1:, on_chunk:, on_failed_response:)
151
+ if faraday_v1
152
+ v1_on_data(on_chunk)
153
+ else
154
+ v2_on_data(on_chunk, on_failed_response)
155
+ end
156
+ end
157
+
158
+ def v1_on_data(on_chunk)
159
+ proc do |chunk, _size|
160
+ on_chunk.call(chunk, nil)
161
+ end
162
+ end
163
+
164
+ def v2_on_data(on_chunk, on_failed_response)
165
+ proc do |chunk, _bytes, env|
166
+ if env&.status == 200
167
+ on_chunk.call(chunk, env)
168
+ else
169
+ on_failed_response.call(chunk, env)
170
+ end
171
+ end
172
+ end
173
+ end
171
174
  end
172
175
  end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ # Represents provider thinking output.
5
+ class Thinking
6
+ attr_reader :text, :signature
7
+
8
+ def initialize(text: nil, signature: nil)
9
+ @text = text
10
+ @signature = signature
11
+ end
12
+
13
+ def self.build(text: nil, signature: nil)
14
+ text = nil if text.is_a?(String) && text.empty?
15
+ signature = nil if signature.is_a?(String) && signature.empty?
16
+
17
+ return nil if text.nil? && signature.nil?
18
+
19
+ new(text: text, signature: signature)
20
+ end
21
+
22
+ def pretty_print(printer)
23
+ printer.object_group(self) do
24
+ printer.breakable
25
+ printer.text 'text='
26
+ printer.pp text
27
+ printer.comma_breakable
28
+ printer.text 'signature='
29
+ printer.pp(signature ? '[REDACTED]' : nil)
30
+ end
31
+ end
32
+ end
33
+
34
+ class Thinking
35
+ # Normalized config for thinking across providers.
36
+ class Config
37
+ attr_reader :effort, :budget
38
+
39
+ def initialize(effort: nil, budget: nil)
40
+ @effort = effort.is_a?(Symbol) ? effort.to_s : effort
41
+ @budget = budget
42
+ end
43
+
44
+ def enabled?
45
+ !effort.nil? || !budget.nil?
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ # Represents token usage for a response.
5
+ class Tokens
6
+ attr_reader :input, :output, :cached, :cache_creation, :thinking
7
+
8
+ # rubocop:disable Metrics/ParameterLists
9
+ def initialize(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
10
+ @input = input
11
+ @output = output
12
+ @cached = cached
13
+ @cache_creation = cache_creation
14
+ @thinking = thinking || reasoning
15
+ end
16
+ # rubocop:enable Metrics/ParameterLists
17
+
18
+ # rubocop:disable Metrics/ParameterLists
19
+ def self.build(input: nil, output: nil, cached: nil, cache_creation: nil, thinking: nil, reasoning: nil)
20
+ return nil if [input, output, cached, cache_creation, thinking, reasoning].all?(&:nil?)
21
+
22
+ new(
23
+ input: input,
24
+ output: output,
25
+ cached: cached,
26
+ cache_creation: cache_creation,
27
+ thinking: thinking,
28
+ reasoning: reasoning
29
+ )
30
+ end
31
+ # rubocop:enable Metrics/ParameterLists
32
+
33
+ def to_h
34
+ {
35
+ input_tokens: input,
36
+ output_tokens: output,
37
+ cached_tokens: cached,
38
+ cache_creation_tokens: cache_creation,
39
+ thinking_tokens: thinking
40
+ }.compact
41
+ end
42
+
43
+ def reasoning
44
+ thinking
45
+ end
46
+ end
47
+ end