ruby_llm 1.6.2 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +73 -91
  3. data/lib/ruby_llm/active_record/acts_as.rb +14 -13
  4. data/lib/ruby_llm/aliases.json +8 -0
  5. data/lib/ruby_llm/aliases.rb +7 -25
  6. data/lib/ruby_llm/chat.rb +5 -12
  7. data/lib/ruby_llm/configuration.rb +1 -12
  8. data/lib/ruby_llm/content.rb +0 -2
  9. data/lib/ruby_llm/embedding.rb +1 -2
  10. data/lib/ruby_llm/error.rb +0 -8
  11. data/lib/ruby_llm/image.rb +0 -4
  12. data/lib/ruby_llm/message.rb +2 -4
  13. data/lib/ruby_llm/model/info.rb +0 -10
  14. data/lib/ruby_llm/model/pricing.rb +0 -3
  15. data/lib/ruby_llm/model/pricing_category.rb +0 -2
  16. data/lib/ruby_llm/model/pricing_tier.rb +0 -1
  17. data/lib/ruby_llm/models.json +623 -452
  18. data/lib/ruby_llm/models.rb +5 -13
  19. data/lib/ruby_llm/provider.rb +1 -5
  20. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
  21. data/lib/ruby_llm/providers/anthropic/media.rb +0 -1
  22. data/lib/ruby_llm/providers/anthropic/tools.rb +1 -2
  23. data/lib/ruby_llm/providers/anthropic.rb +1 -2
  24. data/lib/ruby_llm/providers/bedrock/chat.rb +0 -2
  25. data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
  26. data/lib/ruby_llm/providers/bedrock/models.rb +0 -2
  27. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
  28. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
  29. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
  30. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
  31. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
  32. data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
  33. data/lib/ruby_llm/providers/bedrock.rb +1 -2
  34. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
  35. data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
  36. data/lib/ruby_llm/providers/gemini/capabilities.rb +26 -101
  37. data/lib/ruby_llm/providers/gemini/chat.rb +57 -31
  38. data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
  39. data/lib/ruby_llm/providers/gemini/images.rb +0 -1
  40. data/lib/ruby_llm/providers/gemini/media.rb +0 -1
  41. data/lib/ruby_llm/providers/gemini/models.rb +1 -2
  42. data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
  43. data/lib/ruby_llm/providers/gpustack/chat.rb +0 -1
  44. data/lib/ruby_llm/providers/gpustack/models.rb +3 -4
  45. data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
  46. data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
  47. data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
  48. data/lib/ruby_llm/providers/mistral/models.rb +0 -1
  49. data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
  50. data/lib/ruby_llm/providers/ollama/media.rb +0 -1
  51. data/lib/ruby_llm/providers/openai/capabilities.rb +0 -15
  52. data/lib/ruby_llm/providers/openai/chat.rb +0 -3
  53. data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
  54. data/lib/ruby_llm/providers/openai/media.rb +0 -1
  55. data/lib/ruby_llm/providers/openai.rb +1 -3
  56. data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
  57. data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
  58. data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
  59. data/lib/ruby_llm/providers/perplexity.rb +1 -5
  60. data/lib/ruby_llm/railtie.rb +0 -1
  61. data/lib/ruby_llm/stream_accumulator.rb +1 -3
  62. data/lib/ruby_llm/streaming.rb +15 -24
  63. data/lib/ruby_llm/tool.rb +2 -19
  64. data/lib/ruby_llm/tool_call.rb +0 -9
  65. data/lib/ruby_llm/version.rb +1 -1
  66. data/lib/ruby_llm.rb +0 -2
  67. data/lib/tasks/models.rake +514 -0
  68. data/lib/tasks/release.rake +37 -2
  69. data/lib/tasks/vcr.rake +0 -7
  70. metadata +2 -4
  71. data/lib/tasks/aliases.rake +0 -235
  72. data/lib/tasks/models_docs.rake +0 -224
  73. data/lib/tasks/models_update.rake +0 -108
@@ -19,7 +19,6 @@ module RubyLLM
19
19
  end
20
20
 
21
21
  def format_role(role)
22
- # Ollama doesn't use the new OpenAI convention for system prompts
23
22
  role.to_s
24
23
  end
25
24
  end
@@ -10,7 +10,6 @@ module RubyLLM
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
- # Convert Hash/Array back to JSON string for API
14
13
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
15
14
  return content unless content.is_a?(Content)
16
15
 
@@ -235,20 +235,11 @@ module RubyLLM
235
235
 
236
236
  # Vision support
237
237
  modalities[:input] << 'image' if supports_vision?(model_id)
238
-
239
- # Audio support
240
238
  modalities[:input] << 'audio' if model_id.match?(/whisper|audio|tts|transcribe/)
241
-
242
- # PDF support
243
239
  modalities[:input] << 'pdf' if supports_vision?(model_id)
244
-
245
- # Output modalities
246
240
  modalities[:output] << 'audio' if model_id.match?(/tts|audio/)
247
-
248
241
  modalities[:output] << 'image' if model_id.match?(/dall-e|image/)
249
-
250
242
  modalities[:output] << 'embeddings' if model_id.match?(/embedding/)
251
-
252
243
  modalities[:output] << 'moderation' if model_id.match?(/moderation/)
253
244
 
254
245
  modalities
@@ -257,13 +248,10 @@ module RubyLLM
257
248
  def capabilities_for(model_id) # rubocop:disable Metrics/PerceivedComplexity
258
249
  capabilities = []
259
250
 
260
- # Common capabilities
261
251
  capabilities << 'streaming' unless model_id.match?(/moderation|embedding/)
262
252
  capabilities << 'function_calling' if supports_functions?(model_id)
263
253
  capabilities << 'structured_output' if supports_json_mode?(model_id)
264
254
  capabilities << 'batch' if model_id.match?(/embedding|batch/)
265
-
266
- # Advanced capabilities
267
255
  capabilities << 'reasoning' if model_id.match?(/o\d|gpt-5|codex/)
268
256
 
269
257
  if model_id.match?(/gpt-4-turbo|gpt-4o/)
@@ -281,16 +269,13 @@ module RubyLLM
281
269
  output_per_million: output_price_for(model_id)
282
270
  }
283
271
 
284
- # Add cached pricing if available
285
272
  if respond_to?(:cached_input_price_for)
286
273
  cached_price = cached_input_price_for(model_id)
287
274
  standard_pricing[:cached_input_per_million] = cached_price if cached_price
288
275
  end
289
276
 
290
- # Pricing structure
291
277
  pricing = { text_tokens: { standard: standard_pricing } }
292
278
 
293
- # Add batch pricing if applicable
294
279
  if model_id.match?(/embedding|batch/)
295
280
  pricing[:text_tokens][:batch] = {
296
281
  input_per_million: standard_pricing[:input_per_million] * 0.5,
@@ -18,13 +18,10 @@ module RubyLLM
18
18
  stream: stream
19
19
  }
20
20
 
21
- # Only include temperature if it's not nil (some models don't accept it)
22
21
  payload[:temperature] = temperature unless temperature.nil?
23
-
24
22
  payload[:tools] = tools.map { |_, tool| tool_for(tool) } if tools.any?
25
23
 
26
24
  if schema
27
- # Use strict mode from schema if specified, default to true
28
25
  strict = schema[:strict] != false
29
26
 
30
27
  payload[:response_format] = {
@@ -23,9 +23,6 @@ module RubyLLM
23
23
  data = response.body
24
24
  input_tokens = data.dig('usage', 'prompt_tokens') || 0
25
25
  vectors = data['data'].map { |d| d['embedding'] }
26
-
27
- # If we only got one embedding AND the input was a single string (not an array),
28
- # return it as a single vector
29
26
  vectors = vectors.first if vectors.length == 1 && !text.is_a?(Array)
30
27
 
31
28
  Embedding.new(vectors:, model:, input_tokens:)
@@ -8,7 +8,6 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
- # Convert Hash/Array back to JSON string for API
12
11
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
13
12
  return content unless content.is_a?(Content)
14
13
 
@@ -2,9 +2,7 @@
2
2
 
3
3
  module RubyLLM
4
4
  module Providers
5
- # OpenAI API integration. Handles chat completion, function calling,
6
- # and OpenAI's unique streaming format. Supports GPT-4, GPT-3.5,
7
- # and other OpenAI models.
5
+ # OpenAI API integration.
8
6
  class OpenAI < Provider
9
7
  include OpenAI::Chat
10
8
  include OpenAI::Embeddings
@@ -13,13 +13,11 @@ module RubyLLM
13
13
 
14
14
  def parse_list_models_response(response, slug, _capabilities)
15
15
  Array(response.body['data']).map do |model_data| # rubocop:disable Metrics/BlockLength
16
- # Extract modalities directly from architecture
17
16
  modalities = {
18
17
  input: Array(model_data.dig('architecture', 'input_modalities')),
19
18
  output: Array(model_data.dig('architecture', 'output_modalities'))
20
19
  }
21
20
 
22
- # Construct pricing from API data, only adding non-zero values
23
21
  pricing = { text_tokens: { standard: {} } }
24
22
 
25
23
  pricing_types = {
@@ -34,7 +32,6 @@ module RubyLLM
34
32
  pricing[:text_tokens][:standard][target_key] = value * 1_000_000 if value.positive?
35
33
  end
36
34
 
37
- # Convert OpenRouter's supported parameters to our capability format
38
35
  capabilities = supported_parameters_to_capabilities(model_data['supported_parameters'])
39
36
 
40
37
  Model::Info.new(
@@ -63,23 +60,11 @@ module RubyLLM
63
60
  return [] unless params
64
61
 
65
62
  capabilities = []
66
-
67
- # Standard capabilities mapping
68
- capabilities << 'streaming' # Assume all OpenRouter models support streaming
69
-
70
- # Function calling capability
63
+ capabilities << 'streaming'
71
64
  capabilities << 'function_calling' if params.include?('tools') || params.include?('tool_choice')
72
-
73
- # Structured output capability
74
65
  capabilities << 'structured_output' if params.include?('response_format')
75
-
76
- # Batch capability
77
66
  capabilities << 'batch' if params.include?('batch')
78
-
79
- # Additional mappings based on params
80
- # Handles advanced model capabilities that might be inferred from supported params
81
67
  capabilities << 'predicted_outputs' if params.include?('logit_bias') && params.include?('top_k')
82
-
83
68
  capabilities
84
69
  end
85
70
  end
@@ -106,7 +106,6 @@ module RubyLLM
106
106
  }
107
107
  end
108
108
 
109
- # Pricing information for Perplexity models (USD per 1M tokens)
110
109
  PRICES = {
111
110
  sonar: {
112
111
  input: 1.0,
@@ -8,7 +8,6 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_role(role)
11
- # Perplexity doesn't use the new OpenAI convention for system prompts
12
11
  role.to_s
13
12
  end
14
13
  end
@@ -34,17 +34,13 @@ module RubyLLM
34
34
 
35
35
  # If response is HTML (Perplexity returns HTML for auth errors)
36
36
  if body.include?('<html>') && body.include?('<title>')
37
- # Extract title content
38
37
  title_match = body.match(%r{<title>(.+?)</title>})
39
38
  if title_match
40
- # Clean up the title - remove status code if present
41
39
  message = title_match[1]
42
- message = message.sub(/^\d+\s+/, '') # Remove leading digits and space
40
+ message = message.sub(/^\d+\s+/, '')
43
41
  return message
44
42
  end
45
43
  end
46
-
47
- # Fall back to parent's implementation
48
44
  super
49
45
  end
50
46
  end
@@ -9,7 +9,6 @@ module RubyLLM
9
9
  end
10
10
  end
11
11
 
12
- # Register generators
13
12
  generators do
14
13
  require 'generators/ruby_llm/install_generator'
15
14
  end
@@ -2,8 +2,6 @@
2
2
 
3
3
  module RubyLLM
4
4
  # Assembles streaming responses from LLMs into complete messages.
5
- # Handles the complexities of accumulating content and tool calls
6
- # from partial chunks while tracking token usage.
7
5
  class StreamAccumulator
8
6
  attr_reader :content, :model_id, :tool_calls
9
7
 
@@ -48,7 +46,7 @@ module RubyLLM
48
46
  arguments = if tc.arguments.is_a?(String) && !tc.arguments.empty?
49
47
  JSON.parse(tc.arguments)
50
48
  elsif tc.arguments.is_a?(String)
51
- {} # Return empty hash for empty string arguments
49
+ {}
52
50
  else
53
51
  tc.arguments
54
52
  end
@@ -1,10 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- # Handles streaming responses from AI providers. Provides a unified way to process
5
- # chunked responses, accumulate content, and handle provider-specific streaming formats.
6
- # Each provider implements provider-specific parsing while sharing common stream handling
7
- # patterns.
4
+ # Handles streaming responses from AI providers.
8
5
  module Streaming
9
6
  module_function
10
7
 
@@ -12,17 +9,14 @@ module RubyLLM
12
9
  accumulator = StreamAccumulator.new
13
10
 
14
11
  response = connection.post stream_url, payload do |req|
15
- # Merge additional headers, with existing headers taking precedence
16
12
  req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
17
- if req.options.respond_to?(:on_data)
18
- # Handle Faraday 2.x streaming with on_data method
19
- req.options.on_data = handle_stream do |chunk|
13
+ if faraday_1?
14
+ req.options[:on_data] = handle_stream do |chunk|
20
15
  accumulator.add chunk
21
16
  block.call chunk
22
17
  end
23
18
  else
24
- # Handle Faraday 1.x streaming with :on_data key
25
- req.options[:on_data] = handle_stream do |chunk|
19
+ req.options.on_data = handle_stream do |chunk|
26
20
  accumulator.add chunk
27
21
  block.call chunk
28
22
  end
@@ -42,6 +36,10 @@ module RubyLLM
42
36
 
43
37
  private
44
38
 
39
+ def faraday_1?
40
+ Faraday::VERSION.start_with?('1')
41
+ end
42
+
45
43
  def to_json_stream(&)
46
44
  buffer = +''
47
45
  parser = EventStreamParser::Parser.new
@@ -50,11 +48,9 @@ module RubyLLM
50
48
  end
51
49
 
52
50
  def create_stream_processor(parser, buffer, &)
53
- if Faraday::VERSION.start_with?('1')
54
- # Faraday 1.x: on_data receives (chunk, size)
51
+ if faraday_1?
55
52
  legacy_stream_processor(parser, &)
56
53
  else
57
- # Faraday 2.x: on_data receives (chunk, bytes, env)
58
54
  stream_processor(parser, buffer, &)
59
55
  end
60
56
  end
@@ -94,12 +90,10 @@ module RubyLLM
94
90
  status, _message = parse_streaming_error(error_data)
95
91
  parsed_data = JSON.parse(error_data)
96
92
 
97
- # Create a response-like object that works for both Faraday v1 and v2
98
- error_response = if env
99
- env.merge(body: parsed_data, status: status)
100
- else
101
- # For Faraday v1, create a simple object that responds to .status and .body
93
+ error_response = if faraday_1?
102
94
  Struct.new(:body, :status).new(parsed_data, status)
95
+ else
96
+ env.merge(body: parsed_data, status: status)
103
97
  end
104
98
 
105
99
  ErrorMiddleware.parse_error(provider: self, response: error_response)
@@ -137,12 +131,10 @@ module RubyLLM
137
131
  status, _message = parse_streaming_error(data)
138
132
  parsed_data = JSON.parse(data)
139
133
 
140
- # Create a response-like object that works for both Faraday v1 and v2
141
- error_response = if env
142
- env.merge(body: parsed_data, status: status)
143
- else
144
- # For Faraday v1, create a simple object that responds to .status and .body
134
+ error_response = if faraday_1?
145
135
  Struct.new(:body, :status).new(parsed_data, status)
136
+ else
137
+ env.merge(body: parsed_data, status: status)
146
138
  end
147
139
 
148
140
  ErrorMiddleware.parse_error(provider: self, response: error_response)
@@ -150,7 +142,6 @@ module RubyLLM
150
142
  RubyLLM.logger.debug "Failed to parse error event: #{e.message}"
151
143
  end
152
144
 
153
- # Default implementation - providers should override this method
154
145
  def parse_streaming_error(data)
155
146
  error_data = JSON.parse(data)
156
147
  [500, error_data['message'] || 'Unknown streaming error']
data/lib/ruby_llm/tool.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- # Parameter definition for Tool methods. Specifies type constraints,
5
- # descriptions, and whether parameters are required.
4
+ # Parameter definition for Tool methods.
6
5
  class Parameter
7
6
  attr_reader :name, :type, :description, :required
8
7
 
@@ -14,23 +13,7 @@ module RubyLLM
14
13
  end
15
14
  end
16
15
 
17
- # Base class for creating tools that AI models can use. Provides a simple
18
- # interface for defining parameters and implementing tool behavior.
19
- #
20
- # Example:
21
- # require 'tzinfo'
22
- #
23
- # class TimeInfo < RubyLLM::Tool
24
- # description 'Gets the current time in various timezones'
25
- # param :timezone, desc: "Timezone name (e.g., 'UTC', 'America/New_York')"
26
- #
27
- # def execute(timezone:)
28
- # time = TZInfo::Timezone.get(timezone).now.strftime('%Y-%m-%d %H:%M:%S')
29
- # "Current time in #{timezone}: #{time}"
30
- # rescue StandardError => e
31
- # { error: e.message }
32
- # end
33
- # end
16
+ # Base class for creating tools that AI models can use
34
17
  class Tool
35
18
  # Stops conversation continuation after tool execution
36
19
  class Halt
@@ -2,15 +2,6 @@
2
2
 
3
3
  module RubyLLM
4
4
  # Represents a function call from an AI model to a Tool.
5
- # Encapsulates the function name, arguments, and execution results
6
- # in a clean Ruby interface.
7
- #
8
- # Example:
9
- # tool_call = ToolCall.new(
10
- # id: "call_123",
11
- # name: "calculator",
12
- # arguments: { expression: "2 + 2" }
13
- # )
14
5
  class ToolCall
15
6
  attr_reader :id, :name, :arguments
16
7
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- VERSION = '1.6.2'
4
+ VERSION = '1.6.4'
5
5
  end
data/lib/ruby_llm.rb CHANGED
@@ -30,8 +30,6 @@ loader.ignore("#{__dir__}/generators")
30
30
  loader.setup
31
31
 
32
32
  # A delightful Ruby interface to modern AI language models.
33
- # Provides a unified way to interact with models from OpenAI, Anthropic and others
34
- # with a focus on developer happiness and convention over configuration.
35
33
  module RubyLLM
36
34
  class Error < StandardError; end
37
35