ruby_llm 1.9.2 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -4
  3. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  4. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +1 -0
  5. data/lib/generators/ruby_llm/upgrade_to_v1_10/templates/add_v1_10_message_columns.rb.tt +19 -0
  6. data/lib/generators/ruby_llm/upgrade_to_v1_10/upgrade_to_v1_10_generator.rb +50 -0
  7. data/lib/ruby_llm/active_record/acts_as_legacy.rb +5 -1
  8. data/lib/ruby_llm/active_record/chat_methods.rb +12 -0
  9. data/lib/ruby_llm/active_record/message_methods.rb +41 -8
  10. data/lib/ruby_llm/aliases.json +4 -16
  11. data/lib/ruby_llm/chat.rb +10 -7
  12. data/lib/ruby_llm/configuration.rb +2 -1
  13. data/lib/ruby_llm/message.rb +37 -11
  14. data/lib/ruby_llm/models.json +1902 -1785
  15. data/lib/ruby_llm/models.rb +134 -12
  16. data/lib/ruby_llm/provider.rb +9 -4
  17. data/lib/ruby_llm/providers/anthropic/chat.rb +128 -13
  18. data/lib/ruby_llm/providers/anthropic/media.rb +2 -2
  19. data/lib/ruby_llm/providers/anthropic/streaming.rb +25 -1
  20. data/lib/ruby_llm/providers/bedrock/chat.rb +67 -15
  21. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +59 -2
  22. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +5 -0
  23. data/lib/ruby_llm/providers/gemini/chat.rb +69 -3
  24. data/lib/ruby_llm/providers/gemini/streaming.rb +32 -1
  25. data/lib/ruby_llm/providers/gemini/tools.rb +16 -3
  26. data/lib/ruby_llm/providers/gpustack/chat.rb +1 -1
  27. data/lib/ruby_llm/providers/mistral/chat.rb +58 -1
  28. data/lib/ruby_llm/providers/ollama/chat.rb +1 -1
  29. data/lib/ruby_llm/providers/openai/capabilities.rb +6 -2
  30. data/lib/ruby_llm/providers/openai/chat.rb +87 -3
  31. data/lib/ruby_llm/providers/openai/media.rb +1 -1
  32. data/lib/ruby_llm/providers/openai/streaming.rb +11 -3
  33. data/lib/ruby_llm/providers/openai/temperature.rb +28 -0
  34. data/lib/ruby_llm/providers/openai.rb +1 -1
  35. data/lib/ruby_llm/providers/openrouter/chat.rb +154 -0
  36. data/lib/ruby_llm/providers/openrouter/streaming.rb +74 -0
  37. data/lib/ruby_llm/providers/openrouter.rb +2 -0
  38. data/lib/ruby_llm/providers/vertexai.rb +5 -1
  39. data/lib/ruby_llm/providers/xai/chat.rb +15 -0
  40. data/lib/ruby_llm/providers/xai/models.rb +75 -0
  41. data/lib/ruby_llm/providers/xai.rb +28 -0
  42. data/lib/ruby_llm/stream_accumulator.rb +111 -14
  43. data/lib/ruby_llm/streaming.rb +54 -51
  44. data/lib/ruby_llm/thinking.rb +49 -0
  45. data/lib/ruby_llm/tokens.rb +47 -0
  46. data/lib/ruby_llm/tool_call.rb +6 -3
  47. data/lib/ruby_llm/version.rb +1 -1
  48. data/lib/ruby_llm.rb +10 -8
  49. data/lib/tasks/models.rake +20 -12
  50. metadata +15 -5
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenRouter
6
+ # Chat methods of the OpenRouter API integration
7
+ module Chat
8
+ module_function
9
+
10
+ def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil) # rubocop:disable Metrics/ParameterLists
11
+ payload = {
12
+ model: model.id,
13
+ messages: format_messages(messages),
14
+ stream: stream
15
+ }
16
+
17
+ payload[:temperature] = temperature unless temperature.nil?
18
+ payload[:tools] = tools.map { |_, tool| OpenAI::Tools.tool_for(tool) } if tools.any?
19
+
20
+ if schema
21
+ strict = schema[:strict] != false
22
+ payload[:response_format] = {
23
+ type: 'json_schema',
24
+ json_schema: {
25
+ name: 'response',
26
+ schema: schema,
27
+ strict: strict
28
+ }
29
+ }
30
+ end
31
+
32
+ reasoning = build_reasoning(thinking)
33
+ payload[:reasoning] = reasoning if reasoning
34
+
35
+ payload[:stream_options] = { include_usage: true } if stream
36
+ payload
37
+ end
38
+
39
+ def parse_completion_response(response)
40
+ data = response.body
41
+ return if data.empty?
42
+
43
+ raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message')
44
+
45
+ message_data = data.dig('choices', 0, 'message')
46
+ return unless message_data
47
+
48
+ usage = data['usage'] || {}
49
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
50
+ thinking_tokens = usage.dig('completion_tokens_details', 'reasoning_tokens')
51
+ thinking_text = extract_thinking_text(message_data)
52
+ thinking_signature = extract_thinking_signature(message_data)
53
+
54
+ Message.new(
55
+ role: :assistant,
56
+ content: message_data['content'],
57
+ thinking: Thinking.build(text: thinking_text, signature: thinking_signature),
58
+ tool_calls: OpenAI::Tools.parse_tool_calls(message_data['tool_calls']),
59
+ input_tokens: usage['prompt_tokens'],
60
+ output_tokens: usage['completion_tokens'],
61
+ cached_tokens: cached_tokens,
62
+ cache_creation_tokens: 0,
63
+ thinking_tokens: thinking_tokens,
64
+ model_id: data['model'],
65
+ raw: response
66
+ )
67
+ end
68
+
69
+ def format_messages(messages)
70
+ messages.map do |msg|
71
+ {
72
+ role: format_role(msg.role),
73
+ content: OpenAI::Media.format_content(msg.content),
74
+ tool_calls: OpenAI::Tools.format_tool_calls(msg.tool_calls),
75
+ tool_call_id: msg.tool_call_id
76
+ }.compact.merge(format_thinking(msg))
77
+ end
78
+ end
79
+
80
+ def format_role(role)
81
+ case role
82
+ when :system
83
+ @config.openai_use_system_role ? 'system' : 'developer'
84
+ else
85
+ role.to_s
86
+ end
87
+ end
88
+
89
+ def build_reasoning(thinking)
90
+ return nil unless thinking&.enabled?
91
+
92
+ reasoning = {}
93
+ reasoning[:effort] = thinking.effort if thinking.respond_to?(:effort) && thinking.effort
94
+ reasoning[:max_tokens] = thinking.budget if thinking.respond_to?(:budget) && thinking.budget
95
+ reasoning[:enabled] = true if reasoning.empty?
96
+ reasoning
97
+ end
98
+
99
+ def format_thinking(msg)
100
+ thinking = msg.thinking
101
+ return {} unless thinking && msg.role == :assistant
102
+
103
+ details = []
104
+ if thinking.text
105
+ details << {
106
+ type: 'reasoning.text',
107
+ text: thinking.text,
108
+ signature: thinking.signature
109
+ }.compact
110
+ elsif thinking.signature
111
+ details << {
112
+ type: 'reasoning.encrypted',
113
+ data: thinking.signature
114
+ }
115
+ end
116
+
117
+ details.empty? ? {} : { reasoning_details: details }
118
+ end
119
+
120
+ def extract_thinking_text(message_data)
121
+ candidate = message_data['reasoning']
122
+ return candidate if candidate.is_a?(String)
123
+
124
+ details = message_data['reasoning_details']
125
+ return nil unless details.is_a?(Array)
126
+
127
+ text = details.filter_map do |detail|
128
+ case detail['type']
129
+ when 'reasoning.text'
130
+ detail['text']
131
+ when 'reasoning.summary'
132
+ detail['summary']
133
+ end
134
+ end.join
135
+
136
+ text.empty? ? nil : text
137
+ end
138
+
139
+ def extract_thinking_signature(message_data)
140
+ details = message_data['reasoning_details']
141
+ return nil unless details.is_a?(Array)
142
+
143
+ signature = details.filter_map do |detail|
144
+ detail['signature'] if detail['signature'].is_a?(String)
145
+ end.first
146
+ return signature if signature
147
+
148
+ encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
149
+ encrypted&.dig('data')
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenRouter
6
+ # Streaming methods of the OpenRouter API integration
7
+ module Streaming
8
+ module_function
9
+
10
+ def stream_url
11
+ completion_url
12
+ end
13
+
14
+ def build_chunk(data)
15
+ usage = data['usage'] || {}
16
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
+ delta = data.dig('choices', 0, 'delta') || {}
18
+
19
+ Chunk.new(
20
+ role: :assistant,
21
+ model_id: data['model'],
22
+ content: delta['content'],
23
+ thinking: Thinking.build(
24
+ text: extract_thinking_text(delta),
25
+ signature: extract_thinking_signature(delta)
26
+ ),
27
+ tool_calls: OpenAI::Tools.parse_tool_calls(delta['tool_calls'], parse_arguments: false),
28
+ input_tokens: usage['prompt_tokens'],
29
+ output_tokens: usage['completion_tokens'],
30
+ cached_tokens: cached_tokens,
31
+ cache_creation_tokens: 0,
32
+ thinking_tokens: usage.dig('completion_tokens_details', 'reasoning_tokens')
33
+ )
34
+ end
35
+
36
+ def parse_streaming_error(data)
37
+ OpenAI::Streaming.parse_streaming_error(data)
38
+ end
39
+
40
+ def extract_thinking_text(delta)
41
+ candidate = delta['reasoning']
42
+ return candidate if candidate.is_a?(String)
43
+
44
+ details = delta['reasoning_details']
45
+ return nil unless details.is_a?(Array)
46
+
47
+ text = details.filter_map do |detail|
48
+ case detail['type']
49
+ when 'reasoning.text'
50
+ detail['text']
51
+ when 'reasoning.summary'
52
+ detail['summary']
53
+ end
54
+ end.join
55
+
56
+ text.empty? ? nil : text
57
+ end
58
+
59
+ def extract_thinking_signature(delta)
60
+ details = delta['reasoning_details']
61
+ return nil unless details.is_a?(Array)
62
+
63
+ signature = details.filter_map do |detail|
64
+ detail['signature'] if detail['signature'].is_a?(String)
65
+ end.first
66
+ return signature if signature
67
+
68
+ encrypted = details.find { |detail| detail['type'] == 'reasoning.encrypted' && detail['data'].is_a?(String) }
69
+ encrypted&.dig('data')
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -4,7 +4,9 @@ module RubyLLM
4
4
  module Providers
5
5
  # OpenRouter API integration.
6
6
  class OpenRouter < OpenAI
7
+ include OpenRouter::Chat
7
8
  include OpenRouter::Models
9
+ include OpenRouter::Streaming
8
10
 
9
11
  def api_base
10
12
  'https://openrouter.ai/api/v1'
@@ -16,7 +16,11 @@ module RubyLLM
16
16
  end
17
17
 
18
18
  def api_base
19
- "https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
19
+ if @config.vertexai_location.to_s == 'global'
20
+ 'https://aiplatform.googleapis.com/v1beta1'
21
+ else
22
+ "https://#{@config.vertexai_location}-aiplatform.googleapis.com/v1beta1"
23
+ end
20
24
  end
21
25
 
22
26
  def headers
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class XAI
6
+ # Chat implementation for xAI
7
+ # https://docs.x.ai/docs/api-reference#chat-completions
8
+ module Chat
9
+ def format_role(role)
10
+ role.to_s
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class XAI
6
+ # Models metadata for xAI list models.
7
+ module Models
8
+ module_function
9
+
10
+ IMAGE_MODELS = %w[grok-2-image-1212].freeze
11
+ VISION_MODELS = %w[
12
+ grok-2-vision-1212
13
+ grok-4-0709
14
+ grok-4-fast-non-reasoning
15
+ grok-4-fast-reasoning
16
+ grok-4-1-fast-non-reasoning
17
+ grok-4-1-fast-reasoning
18
+ ].freeze
19
+ REASONING_MODELS = %w[
20
+ grok-3-mini
21
+ grok-4-0709
22
+ grok-4-fast-reasoning
23
+ grok-4-1-fast-reasoning
24
+ grok-code-fast-1
25
+ ].freeze
26
+
27
+ def parse_list_models_response(response, slug, _capabilities)
28
+ Array(response.body['data']).map do |model_data|
29
+ model_id = model_data['id']
30
+
31
+ Model::Info.new(
32
+ id: model_id,
33
+ name: format_display_name(model_id),
34
+ provider: slug,
35
+ family: 'grok',
36
+ created_at: model_data['created'] ? Time.at(model_data['created']) : nil,
37
+ context_window: nil,
38
+ max_output_tokens: nil,
39
+ modalities: modalities_for(model_id),
40
+ capabilities: capabilities_for(model_id),
41
+ pricing: {},
42
+ metadata: {
43
+ object: model_data['object'],
44
+ owned_by: model_data['owned_by']
45
+ }.compact
46
+ )
47
+ end
48
+ end
49
+
50
+ def modalities_for(model_id)
51
+ if IMAGE_MODELS.include?(model_id)
52
+ { input: ['text'], output: ['image'] }
53
+ else
54
+ input = ['text']
55
+ input << 'image' if VISION_MODELS.include?(model_id)
56
+ { input: input, output: ['text'] }
57
+ end
58
+ end
59
+
60
+ def capabilities_for(model_id)
61
+ return [] if IMAGE_MODELS.include?(model_id)
62
+
63
+ capabilities = %w[streaming function_calling structured_output]
64
+ capabilities << 'reasoning' if REASONING_MODELS.include?(model_id)
65
+ capabilities << 'vision' if VISION_MODELS.include?(model_id)
66
+ capabilities
67
+ end
68
+
69
+ def format_display_name(model_id)
70
+ model_id.tr('-', ' ').split.map(&:capitalize).join(' ')
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ # xAI API integration
6
+ class XAI < OpenAI
7
+ include XAI::Chat
8
+ include XAI::Models
9
+
10
+ def api_base
11
+ 'https://api.x.ai/v1'
12
+ end
13
+
14
+ def headers
15
+ {
16
+ 'Authorization' => "Bearer #{@config.xai_api_key}",
17
+ 'Content-Type' => 'application/json'
18
+ }
19
+ end
20
+
21
+ class << self
22
+ def configuration_requirements
23
+ %i[xai_api_key]
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -7,11 +7,16 @@ module RubyLLM
7
7
 
8
8
  def initialize
9
9
  @content = +''
10
+ @thinking_text = +''
11
+ @thinking_signature = nil
10
12
  @tool_calls = {}
11
13
  @input_tokens = nil
12
14
  @output_tokens = nil
13
15
  @cached_tokens = nil
14
16
  @cache_creation_tokens = nil
17
+ @thinking_tokens = nil
18
+ @inside_think_tag = false
19
+ @pending_think_tag = +''
15
20
  @latest_tool_call_id = nil
16
21
  end
17
22
 
@@ -19,12 +24,8 @@ module RubyLLM
19
24
  RubyLLM.logger.debug chunk.inspect if RubyLLM.config.log_stream_debug
20
25
  @model_id ||= chunk.model_id
21
26
 
22
- if chunk.tool_call?
23
- accumulate_tool_calls chunk.tool_calls
24
- else
25
- @content << (chunk.content || '')
26
- end
27
-
27
+ handle_chunk_content(chunk)
28
+ append_thinking_from_chunk(chunk)
28
29
  count_tokens chunk
29
30
  RubyLLM.logger.debug inspect if RubyLLM.config.log_stream_debug
30
31
  end
@@ -33,12 +34,19 @@ module RubyLLM
33
34
  Message.new(
34
35
  role: :assistant,
35
36
  content: content.empty? ? nil : content,
37
+ thinking: Thinking.build(
38
+ text: @thinking_text.empty? ? nil : @thinking_text,
39
+ signature: @thinking_signature
40
+ ),
41
+ tokens: Tokens.build(
42
+ input: @input_tokens,
43
+ output: @output_tokens,
44
+ cached: @cached_tokens,
45
+ cache_creation: @cache_creation_tokens,
46
+ thinking: @thinking_tokens
47
+ ),
36
48
  model_id: model_id,
37
49
  tool_calls: tool_calls_from_stream,
38
- input_tokens: @input_tokens,
39
- output_tokens: @output_tokens,
40
- cached_tokens: @cached_tokens,
41
- cache_creation_tokens: @cache_creation_tokens,
42
50
  raw: response
43
51
  )
44
52
  end
@@ -58,12 +66,13 @@ module RubyLLM
58
66
  ToolCall.new(
59
67
  id: tc.id,
60
68
  name: tc.name,
61
- arguments: arguments
69
+ arguments: arguments,
70
+ thought_signature: tc.thought_signature
62
71
  )
63
72
  end
64
73
  end
65
74
 
66
- def accumulate_tool_calls(new_tool_calls)
75
+ def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity
67
76
  RubyLLM.logger.debug "Accumulating tool calls: #{new_tool_calls}" if RubyLLM.config.log_stream_debug
68
77
  new_tool_calls.each_value do |tool_call|
69
78
  if tool_call.id
@@ -72,12 +81,18 @@ module RubyLLM
72
81
  @tool_calls[tool_call.id] = ToolCall.new(
73
82
  id: tool_call_id,
74
83
  name: tool_call.name,
75
- arguments: tool_call_arguments
84
+ arguments: tool_call_arguments,
85
+ thought_signature: tool_call.thought_signature
76
86
  )
77
87
  @latest_tool_call_id = tool_call.id
78
88
  else
79
89
  existing = @tool_calls[@latest_tool_call_id]
80
- existing.arguments << tool_call.arguments if existing
90
+ if existing
91
+ existing.arguments << tool_call.arguments
92
+ if tool_call.thought_signature && existing.thought_signature.nil?
93
+ existing.thought_signature = tool_call.thought_signature
94
+ end
95
+ end
81
96
  end
82
97
  end
83
98
  end
@@ -96,6 +111,88 @@ module RubyLLM
96
111
  @output_tokens = chunk.output_tokens if chunk.output_tokens
97
112
  @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
98
113
  @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
114
+ @thinking_tokens = chunk.thinking_tokens if chunk.thinking_tokens
115
+ end
116
+
117
+ def handle_chunk_content(chunk)
118
+ return accumulate_tool_calls(chunk.tool_calls) if chunk.tool_call?
119
+
120
+ content_text = chunk.content || ''
121
+ if content_text.is_a?(String)
122
+ append_text_with_thinking(content_text)
123
+ else
124
+ @content << content_text.to_s
125
+ end
126
+ end
127
+
128
+ def append_text_with_thinking(text)
129
+ content_chunk, thinking_chunk = extract_think_tags(text)
130
+ @content << content_chunk
131
+ @thinking_text << thinking_chunk if thinking_chunk
132
+ end
133
+
134
+ def append_thinking_from_chunk(chunk)
135
+ thinking = chunk.thinking
136
+ return unless thinking
137
+
138
+ @thinking_text << thinking.text.to_s if thinking.text
139
+ @thinking_signature ||= thinking.signature # rubocop:disable Naming/MemoizedInstanceVariableName
140
+ end
141
+
142
+ def extract_think_tags(text)
143
+ start_tag = '<think>'
144
+ end_tag = '</think>'
145
+ remaining = @pending_think_tag + text
146
+ @pending_think_tag = +''
147
+
148
+ output = +''
149
+ thinking = +''
150
+
151
+ until remaining.empty?
152
+ remaining = if @inside_think_tag
153
+ consume_think_content(remaining, end_tag, thinking)
154
+ else
155
+ consume_non_think_content(remaining, start_tag, output)
156
+ end
157
+ end
158
+
159
+ [output, thinking.empty? ? nil : thinking]
160
+ end
161
+
162
+ def consume_think_content(remaining, end_tag, thinking)
163
+ end_index = remaining.index(end_tag)
164
+ if end_index
165
+ thinking << remaining.slice(0, end_index)
166
+ @inside_think_tag = false
167
+ remaining.slice((end_index + end_tag.length)..) || +''
168
+ else
169
+ suffix_len = longest_suffix_prefix(remaining, end_tag)
170
+ thinking << remaining.slice(0, remaining.length - suffix_len)
171
+ @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
172
+ +''
173
+ end
174
+ end
175
+
176
+ def consume_non_think_content(remaining, start_tag, output)
177
+ start_index = remaining.index(start_tag)
178
+ if start_index
179
+ output << remaining.slice(0, start_index)
180
+ @inside_think_tag = true
181
+ remaining.slice((start_index + start_tag.length)..) || +''
182
+ else
183
+ suffix_len = longest_suffix_prefix(remaining, start_tag)
184
+ output << remaining.slice(0, remaining.length - suffix_len)
185
+ @pending_think_tag = remaining.slice(-suffix_len, suffix_len)
186
+ +''
187
+ end
188
+ end
189
+
190
+ def longest_suffix_prefix(text, tag)
191
+ max = [text.length, tag.length - 1].min
192
+ max.downto(1) do |len|
193
+ return len if text.end_with?(tag[0, len])
194
+ end
195
+ 0
99
196
  end
100
197
  end
101
198
  end