ruby_llm_community 0.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +20 -3
  3. data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +127 -0
  4. data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +39 -0
  5. data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +24 -0
  6. data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +14 -0
  7. data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +12 -0
  8. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +16 -0
  9. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +29 -0
  10. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +16 -0
  11. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +11 -0
  12. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +23 -0
  13. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +21 -0
  14. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +10 -0
  15. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +9 -0
  16. data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +16 -0
  17. data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +30 -0
  18. data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +18 -0
  19. data/lib/generators/ruby_llm/install/install_generator.rb +227 -0
  20. data/lib/generators/ruby_llm/install/templates/chat_model.rb.tt +2 -2
  21. data/lib/generators/ruby_llm/install/templates/create_chats_migration.rb.tt +4 -4
  22. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +8 -7
  23. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +43 -0
  24. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +6 -5
  25. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +10 -4
  26. data/lib/generators/ruby_llm/install/templates/message_model.rb.tt +4 -3
  27. data/lib/generators/ruby_llm/install/templates/model_model.rb.tt +3 -0
  28. data/lib/generators/ruby_llm/install/templates/tool_call_model.rb.tt +2 -2
  29. data/lib/generators/ruby_llm/upgrade_to_v1_7/templates/migration.rb.tt +137 -0
  30. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +170 -0
  31. data/lib/ruby_llm/active_record/acts_as.rb +112 -332
  32. data/lib/ruby_llm/active_record/acts_as_legacy.rb +403 -0
  33. data/lib/ruby_llm/active_record/chat_methods.rb +336 -0
  34. data/lib/ruby_llm/active_record/message_methods.rb +72 -0
  35. data/lib/ruby_llm/active_record/model_methods.rb +84 -0
  36. data/lib/ruby_llm/aliases.json +130 -11
  37. data/lib/ruby_llm/aliases.rb +7 -25
  38. data/lib/ruby_llm/attachment.rb +22 -0
  39. data/lib/ruby_llm/chat.rb +10 -17
  40. data/lib/ruby_llm/configuration.rb +11 -12
  41. data/lib/ruby_llm/connection.rb +4 -4
  42. data/lib/ruby_llm/connection_multipart.rb +19 -0
  43. data/lib/ruby_llm/content.rb +5 -2
  44. data/lib/ruby_llm/embedding.rb +1 -2
  45. data/lib/ruby_llm/error.rb +0 -8
  46. data/lib/ruby_llm/image.rb +23 -8
  47. data/lib/ruby_llm/image_attachment.rb +30 -0
  48. data/lib/ruby_llm/message.rb +7 -7
  49. data/lib/ruby_llm/model/info.rb +12 -10
  50. data/lib/ruby_llm/model/pricing.rb +0 -3
  51. data/lib/ruby_llm/model/pricing_category.rb +0 -2
  52. data/lib/ruby_llm/model/pricing_tier.rb +0 -1
  53. data/lib/ruby_llm/models.json +4705 -2144
  54. data/lib/ruby_llm/models.rb +56 -35
  55. data/lib/ruby_llm/provider.rb +14 -12
  56. data/lib/ruby_llm/providers/anthropic/capabilities.rb +1 -46
  57. data/lib/ruby_llm/providers/anthropic/chat.rb +2 -2
  58. data/lib/ruby_llm/providers/anthropic/media.rb +1 -2
  59. data/lib/ruby_llm/providers/anthropic/tools.rb +1 -2
  60. data/lib/ruby_llm/providers/anthropic.rb +1 -2
  61. data/lib/ruby_llm/providers/bedrock/chat.rb +2 -4
  62. data/lib/ruby_llm/providers/bedrock/media.rb +0 -1
  63. data/lib/ruby_llm/providers/bedrock/models.rb +19 -3
  64. data/lib/ruby_llm/providers/bedrock/streaming/base.rb +0 -12
  65. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +0 -7
  66. data/lib/ruby_llm/providers/bedrock/streaming/message_processing.rb +0 -12
  67. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +0 -12
  68. data/lib/ruby_llm/providers/bedrock/streaming/prelude_handling.rb +0 -13
  69. data/lib/ruby_llm/providers/bedrock/streaming.rb +0 -18
  70. data/lib/ruby_llm/providers/bedrock.rb +1 -2
  71. data/lib/ruby_llm/providers/deepseek/capabilities.rb +1 -2
  72. data/lib/ruby_llm/providers/deepseek/chat.rb +0 -1
  73. data/lib/ruby_llm/providers/gemini/capabilities.rb +28 -100
  74. data/lib/ruby_llm/providers/gemini/chat.rb +57 -29
  75. data/lib/ruby_llm/providers/gemini/embeddings.rb +0 -2
  76. data/lib/ruby_llm/providers/gemini/images.rb +1 -2
  77. data/lib/ruby_llm/providers/gemini/media.rb +1 -2
  78. data/lib/ruby_llm/providers/gemini/models.rb +1 -2
  79. data/lib/ruby_llm/providers/gemini/streaming.rb +15 -1
  80. data/lib/ruby_llm/providers/gemini/tools.rb +0 -5
  81. data/lib/ruby_llm/providers/gpustack/chat.rb +11 -1
  82. data/lib/ruby_llm/providers/gpustack/media.rb +45 -0
  83. data/lib/ruby_llm/providers/gpustack/models.rb +44 -9
  84. data/lib/ruby_llm/providers/gpustack.rb +1 -0
  85. data/lib/ruby_llm/providers/mistral/capabilities.rb +2 -10
  86. data/lib/ruby_llm/providers/mistral/chat.rb +0 -2
  87. data/lib/ruby_llm/providers/mistral/embeddings.rb +0 -3
  88. data/lib/ruby_llm/providers/mistral/models.rb +0 -1
  89. data/lib/ruby_llm/providers/ollama/chat.rb +0 -1
  90. data/lib/ruby_llm/providers/ollama/media.rb +2 -7
  91. data/lib/ruby_llm/providers/ollama/models.rb +36 -0
  92. data/lib/ruby_llm/providers/ollama.rb +1 -0
  93. data/lib/ruby_llm/providers/openai/capabilities.rb +3 -16
  94. data/lib/ruby_llm/providers/openai/chat.rb +1 -3
  95. data/lib/ruby_llm/providers/openai/embeddings.rb +0 -3
  96. data/lib/ruby_llm/providers/openai/images.rb +73 -3
  97. data/lib/ruby_llm/providers/openai/media.rb +4 -5
  98. data/lib/ruby_llm/providers/openai/response.rb +121 -29
  99. data/lib/ruby_llm/providers/openai/response_media.rb +3 -3
  100. data/lib/ruby_llm/providers/openai/streaming.rb +110 -47
  101. data/lib/ruby_llm/providers/openai/tools.rb +12 -7
  102. data/lib/ruby_llm/providers/openai.rb +1 -3
  103. data/lib/ruby_llm/providers/openai_base.rb +2 -2
  104. data/lib/ruby_llm/providers/openrouter/models.rb +1 -16
  105. data/lib/ruby_llm/providers/perplexity/capabilities.rb +0 -1
  106. data/lib/ruby_llm/providers/perplexity/chat.rb +0 -1
  107. data/lib/ruby_llm/providers/perplexity.rb +1 -5
  108. data/lib/ruby_llm/providers/vertexai/chat.rb +14 -0
  109. data/lib/ruby_llm/providers/vertexai/embeddings.rb +32 -0
  110. data/lib/ruby_llm/providers/vertexai/models.rb +130 -0
  111. data/lib/ruby_llm/providers/vertexai/streaming.rb +14 -0
  112. data/lib/ruby_llm/providers/vertexai.rb +55 -0
  113. data/lib/ruby_llm/providers/xai/capabilities.rb +166 -0
  114. data/lib/ruby_llm/providers/xai/chat.rb +15 -0
  115. data/lib/ruby_llm/providers/xai/models.rb +48 -0
  116. data/lib/ruby_llm/providers/xai.rb +46 -0
  117. data/lib/ruby_llm/railtie.rb +20 -4
  118. data/lib/ruby_llm/stream_accumulator.rb +68 -10
  119. data/lib/ruby_llm/streaming.rb +16 -25
  120. data/lib/ruby_llm/tool.rb +2 -19
  121. data/lib/ruby_llm/tool_call.rb +0 -9
  122. data/lib/ruby_llm/utils.rb +5 -9
  123. data/lib/ruby_llm/version.rb +1 -1
  124. data/lib/ruby_llm_community.rb +8 -5
  125. data/lib/tasks/models.rake +549 -0
  126. data/lib/tasks/release.rake +37 -2
  127. data/lib/tasks/ruby_llm.rake +15 -0
  128. data/lib/tasks/vcr.rake +2 -9
  129. metadata +44 -6
  130. data/lib/generators/ruby_llm/install/templates/INSTALL_INFO.md.tt +0 -108
  131. data/lib/generators/ruby_llm/install_generator.rb +0 -121
  132. data/lib/tasks/aliases.rake +0 -235
  133. data/lib/tasks/models_docs.rake +0 -224
  134. data/lib/tasks/models_update.rake +0 -108
@@ -1,14 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- # Registry of available AI models and their capabilities. Provides a clean interface
5
- # to discover and work with models from different providers.
6
- #
7
- # Example:
8
- # RubyLLM.models.all # All available models
9
- # RubyLLM.models.chat_models # Models that support chat
10
- # RubyLLM.models.by_provider('openai').chat_models # OpenAI chat models
11
- # RubyLLM.models.find('claude-3') # Get info about a specific model
4
+ # Registry of available AI models and their capabilities.
12
5
  class Models
13
6
  include Enumerable
14
7
 
@@ -25,20 +18,24 @@ module RubyLLM
25
18
  File.expand_path('models.json', __dir__)
26
19
  end
27
20
 
28
- def refresh!
29
- # Collect models from both sources
30
- provider_models = fetch_from_providers
31
- parsera_models = fetch_from_parsera
21
+ def schema_file
22
+ File.expand_path('models_schema.json', __dir__)
23
+ end
32
24
 
33
- # Merge with parsera data taking precedence
25
+ def refresh!(remote_only: false)
26
+ provider_models = fetch_from_providers(remote_only: remote_only)
27
+ parsera_models = fetch_from_parsera
34
28
  merged_models = merge_models(provider_models, parsera_models)
35
-
36
29
  @instance = new(merged_models)
37
30
  end
38
31
 
39
- def fetch_from_providers
32
+ def fetch_from_providers(remote_only: true)
40
33
  config = RubyLLM.config
41
- configured_classes = Provider.configured_remote_providers(config)
34
+ configured_classes = if remote_only
35
+ Provider.configured_remote_providers(config)
36
+ else
37
+ Provider.configured_providers(config)
38
+ end
42
39
  configured = configured_classes.map { |klass| klass.new(config) }
43
40
 
44
41
  RubyLLM.logger.info "Fetching models from providers: #{configured.map(&:name).join(', ')}"
@@ -50,7 +47,6 @@ module RubyLLM
50
47
  config ||= RubyLLM.config
51
48
  provider_class = provider ? Provider.providers[provider.to_sym] : nil
52
49
 
53
- # Check if provider is local
54
50
  if provider_class
55
51
  temp_instance = provider_class.new(config)
56
52
  assume_exists = true if temp_instance.local?
@@ -62,14 +58,15 @@ module RubyLLM
62
58
  provider_class ||= raise(Error, "Unknown provider: #{provider.to_sym}")
63
59
  provider_instance = provider_class.new(config)
64
60
 
65
- model = Model::Info.new(
66
- id: model_id,
67
- name: model_id.tr('-', ' ').capitalize,
68
- provider: provider_instance.slug,
69
- capabilities: %w[function_calling streaming],
70
- modalities: { input: %w[text image], output: %w[text] },
71
- metadata: { warning: 'Assuming model exists, capabilities may not be accurate' }
72
- )
61
+ model = if provider_instance.local?
62
+ begin
63
+ Models.find(model_id, provider)
64
+ rescue ModelNotFoundError
65
+ nil
66
+ end
67
+ end
68
+
69
+ model ||= Model::Info.default(model_id, provider_instance.slug)
73
70
  else
74
71
  model = Models.find model_id, provider
75
72
  provider_class = Provider.providers[model.provider.to_sym] || raise(Error,
@@ -110,20 +107,36 @@ module RubyLLM
110
107
  all_keys = parsera_by_key.keys | provider_by_key.keys
111
108
 
112
109
  models = all_keys.map do |key|
113
- if (parsera_model = parsera_by_key[key])
114
- if (provider_model = provider_by_key[key])
115
- add_provider_metadata(parsera_model, provider_model)
116
- else
117
- parsera_model
118
- end
110
+ parsera_model = find_parsera_model(key, parsera_by_key)
111
+ provider_model = provider_by_key[key]
112
+
113
+ if parsera_model && provider_model
114
+ add_provider_metadata(parsera_model, provider_model)
115
+ elsif parsera_model
116
+ parsera_model
119
117
  else
120
- provider_by_key[key]
118
+ provider_model
121
119
  end
122
120
  end
123
121
 
124
122
  models.sort_by { |m| [m.provider, m.id] }
125
123
  end
126
124
 
125
+ def find_parsera_model(key, parsera_by_key)
126
+ # Direct match
127
+ return parsera_by_key[key] if parsera_by_key[key]
128
+
129
+ # VertexAI uses same models as Gemini
130
+ provider, model_id = key.split(':', 2)
131
+ return unless provider == 'vertexai'
132
+
133
+ gemini_model = parsera_by_key["gemini:#{model_id}"]
134
+ return unless gemini_model
135
+
136
+ # Return Gemini's Parsera data but with VertexAI as provider
137
+ Model::Info.new(gemini_model.to_h.merge(provider: 'vertexai'))
138
+ end
139
+
127
140
  def index_by_key(models)
128
141
  models.each_with_object({}) do |model, hash|
129
142
  hash["#{model.provider}:#{model.id}"] = model
@@ -142,13 +155,21 @@ module RubyLLM
142
155
  end
143
156
 
144
157
  def load_models
158
+ read_from_json
159
+ end
160
+
161
+ def load_from_json!
162
+ @models = read_from_json
163
+ end
164
+
165
+ def read_from_json
145
166
  data = File.exist?(self.class.models_file) ? File.read(self.class.models_file) : '[]'
146
167
  JSON.parse(data, symbolize_names: true).map { |model| Model::Info.new(model) }
147
168
  rescue JSON::ParserError
148
169
  []
149
170
  end
150
171
 
151
- def save_models
172
+ def save_to_json
152
173
  File.write(self.class.models_file, JSON.pretty_generate(all.map(&:to_h)))
153
174
  end
154
175
 
@@ -192,8 +213,8 @@ module RubyLLM
192
213
  self.class.new(all.select { |m| m.provider == provider.to_s })
193
214
  end
194
215
 
195
- def refresh!
196
- self.class.refresh!
216
+ def refresh!(remote_only: false)
217
+ self.class.refresh!(remote_only: remote_only)
197
218
  end
198
219
 
199
220
  private
@@ -1,10 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RubyLLM
4
- # Base class for LLM providers like OpenAI and Anthropic.
5
- # Handles the complexities of API communication, streaming responses,
6
- # and error handling so individual providers can focus on their unique features.
7
- # Encapsulates configuration and connection to eliminate parameter threading.
4
+ # Base class for LLM providers.
8
5
  class Provider
9
6
  include Streaming
10
7
 
@@ -45,7 +42,6 @@ module RubyLLM
45
42
  normalized_temperature = maybe_normalize_temperature(temperature, model)
46
43
 
47
44
  payload = Utils.deep_merge(
48
- params,
49
45
  render_payload(
50
46
  messages,
51
47
  tools: tools,
@@ -54,7 +50,8 @@ module RubyLLM
54
50
  cache_prompts: cache_prompts,
55
51
  stream: block_given?,
56
52
  schema: schema
57
- )
53
+ ),
54
+ params
58
55
  )
59
56
 
60
57
  if block_given?
@@ -65,8 +62,10 @@ module RubyLLM
65
62
  end
66
63
 
67
64
  def list_models
68
- response = @connection.get models_url
69
- parse_list_models_response response, slug, capabilities
65
+ Array(models_url).flat_map do |url|
66
+ response = @connection.get(url)
67
+ parse_list_models_response(response, slug, capabilities)
68
+ end
70
69
  end
71
70
 
72
71
  def embed(text, model:, dimensions:)
@@ -75,8 +74,8 @@ module RubyLLM
75
74
  parse_embedding_response(response, model:, text:)
76
75
  end
77
76
 
78
- def paint(prompt, model:, size:)
79
- payload = render_image_payload(prompt, model:, size:)
77
+ def paint(prompt, model:, size:, with:, params:)
78
+ payload = render_image_payload(prompt, model:, size:, with:, params:)
80
79
  response = @connection.post images_url, payload
81
80
  parse_image_response(response, model:)
82
81
  end
@@ -126,6 +125,10 @@ module RubyLLM
126
125
  nil
127
126
  end
128
127
 
128
+ def connection_multipart(config)
129
+ @connection_multipart ||= ConnectionMultipart.new(self, config)
130
+ end
131
+
129
132
  class << self
130
133
  def name
131
134
  to_s.split('::').last
@@ -206,13 +209,12 @@ module RubyLLM
206
209
  raise ConfigurationError, "Missing configuration for #{name}: #{missing.join(', ')}"
207
210
  end
208
211
 
209
- def maybe_normalize_temperature(temperature, _model_id)
212
+ def maybe_normalize_temperature(temperature, _model)
210
213
  temperature
211
214
  end
212
215
 
213
216
  def sync_response(connection, payload, additional_headers = {})
214
217
  response = connection.post completion_url, payload do |req|
215
- # Merge additional headers, with existing headers taking precedence
216
218
  req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
217
219
  end
218
220
  parse_completion_response response
@@ -7,17 +7,10 @@ module RubyLLM
7
7
  module Capabilities
8
8
  module_function
9
9
 
10
- # Determines the context window size for a given model
11
- # @param model_id [String] the model identifier
12
- # @return [Integer] the context window size in tokens
13
10
  def determine_context_window(_model_id)
14
- # All Claude 3 and 3.5 and 3.7 models have 200K token context windows
15
11
  200_000
16
12
  end
17
13
 
18
- # Determines the maximum output tokens for a given model
19
- # @param model_id [String] the model identifier
20
- # @return [Integer] the maximum output tokens
21
14
  def determine_max_tokens(model_id)
22
15
  case model_id
23
16
  when /claude-3-7-sonnet/, /claude-3-5/ then 8_192
@@ -25,52 +18,30 @@ module RubyLLM
25
18
  end
26
19
  end
27
20
 
28
- # Gets the input price per million tokens for a given model
29
- # @param model_id [String] the model identifier
30
- # @return [Float] the price per million tokens for input
31
21
  def get_input_price(model_id)
32
22
  PRICES.dig(model_family(model_id), :input) || default_input_price
33
23
  end
34
24
 
35
- # Gets the output price per million tokens for a given model
36
- # @param model_id [String] the model identifier
37
- # @return [Float] the price per million tokens for output
38
25
  def get_output_price(model_id)
39
26
  PRICES.dig(model_family(model_id), :output) || default_output_price
40
27
  end
41
28
 
42
- # Determines if a model supports vision capabilities
43
- # @param model_id [String] the model identifier
44
- # @return [Boolean] true if the model supports vision
45
29
  def supports_vision?(model_id)
46
- # All Claude 3, 3.5, and 3.7 models support vision
47
30
  !model_id.match?(/claude-[12]/)
48
31
  end
49
32
 
50
- # Determines if a model supports function calling
51
- # @param model_id [String] the model identifier
52
- # @return [Boolean] true if the model supports functions
53
33
  def supports_functions?(model_id)
54
34
  model_id.match?(/claude-3/)
55
35
  end
56
36
 
57
- # Determines if a model supports JSON mode
58
- # @param model_id [String] the model identifier
59
- # @return [Boolean] true if the model supports JSON mode
60
37
  def supports_json_mode?(model_id)
61
38
  model_id.match?(/claude-3/)
62
39
  end
63
40
 
64
- # Determines if a model supports extended thinking
65
- # @param model_id [String] the model identifier
66
- # @return [Boolean] true if the model supports extended thinking
67
41
  def supports_extended_thinking?(model_id)
68
42
  model_id.match?(/claude-3-7-sonnet/)
69
43
  end
70
44
 
71
- # Determines the model family for a given model ID
72
- # @param model_id [String] the model identifier
73
- # @return [Symbol] the model family identifier
74
45
  def model_family(model_id)
75
46
  case model_id
76
47
  when /claude-3-7-sonnet/ then 'claude-3-7-sonnet'
@@ -83,14 +54,10 @@ module RubyLLM
83
54
  end
84
55
  end
85
56
 
86
- # Returns the model type
87
- # @param model_id [String] the model identifier (unused but kept for API consistency)
88
- # @return [String] the model type, always 'chat' for Anthropic models
89
57
  def model_type(_)
90
58
  'chat'
91
59
  end
92
60
 
93
- # Pricing information for Anthropic models (per million tokens)
94
61
  PRICES = {
95
62
  'claude-3-7-sonnet': { input: 3.0, output: 15.0 },
96
63
  'claude-3-5-sonnet': { input: 3.0, output: 15.0 },
@@ -100,14 +67,10 @@ module RubyLLM
100
67
  'claude-2': { input: 3.0, output: 15.0 }
101
68
  }.freeze
102
69
 
103
- # Default input price if model not found in PRICES
104
- # @return [Float] default price per million tokens for input
105
70
  def default_input_price
106
71
  3.0
107
72
  end
108
73
 
109
- # Default output price if model not found in PRICES
110
- # @return [Float] default price per million tokens for output
111
74
  def default_output_price
112
75
  15.0
113
76
  end
@@ -118,7 +81,6 @@ module RubyLLM
118
81
  output: ['text']
119
82
  }
120
83
 
121
- # All Claude 3+ models support vision
122
84
  unless model_id.match?(/claude-[12]/)
123
85
  modalities[:input] << 'image'
124
86
  modalities[:input] << 'pdf'
@@ -130,18 +92,13 @@ module RubyLLM
130
92
  def capabilities_for(model_id)
131
93
  capabilities = ['streaming']
132
94
 
133
- # Function calling for Claude 3+
134
95
  if model_id.match?(/claude-3/)
135
96
  capabilities << 'function_calling'
136
97
  capabilities << 'batch'
137
98
  end
138
99
 
139
- # Extended thinking (reasoning) for Claude 3.7
140
- capabilities << 'reasoning' if model_id.match?(/claude-3-7/)
141
-
142
- # Citations
100
+ capabilities << 'reasoning' if model_id.match?(/claude-3-7|-4/)
143
101
  capabilities << 'citations' if model_id.match?(/claude-3\.5|claude-3-7/)
144
-
145
102
  capabilities
146
103
  end
147
104
 
@@ -154,13 +111,11 @@ module RubyLLM
154
111
  output_per_million: prices[:output]
155
112
  }
156
113
 
157
- # Batch is typically half the price
158
114
  batch_pricing = {
159
115
  input_per_million: prices[:input] * 0.5,
160
116
  output_per_million: prices[:output] * 0.5
161
117
  }
162
118
 
163
- # Add reasoning output pricing for 3.7 models
164
119
  if model_id.match?(/claude-3-7/)
165
120
  standard_pricing[:reasoning_output_per_million] = prices[:output] * 2.5
166
121
  batch_pricing[:reasoning_output_per_million] = prices[:output] * 1.25
@@ -40,10 +40,10 @@ module RubyLLM
40
40
  end
41
41
 
42
42
  {
43
- model: model,
43
+ model: model.id,
44
44
  messages:,
45
45
  stream: stream,
46
- max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
46
+ max_tokens: model.max_tokens || 4096
47
47
  }
48
48
  end
49
49
 
@@ -8,7 +8,6 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_content(content, cache: false)
11
- # Convert Hash/Array back to JSON string for API
12
11
  return [format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
13
12
  return [format_text(content, cache:)] unless content.is_a?(Content)
14
13
 
@@ -99,7 +98,7 @@ module RubyLLM
99
98
  with_cache_control(
100
99
  {
101
100
  type: 'text',
102
- text: Utils.format_text_file_for_llm(text_file)
101
+ text: text_file.for_llm
103
102
  },
104
103
  cache:
105
104
  )
@@ -46,7 +46,7 @@ module RubyLLM
46
46
  {
47
47
  type: 'tool_result',
48
48
  tool_use_id: msg.tool_call_id,
49
- content: msg.content
49
+ content: Media.format_content(msg.content)
50
50
  }
51
51
  end
52
52
 
@@ -73,7 +73,6 @@ module RubyLLM
73
73
  def parse_tool_calls(content_blocks)
74
74
  return nil if content_blocks.nil?
75
75
 
76
- # Handle single content block (backward compatibility)
77
76
  content_blocks = [content_blocks] unless content_blocks.is_a?(Array)
78
77
 
79
78
  tool_calls = {}
@@ -2,8 +2,7 @@
2
2
 
3
3
  module RubyLLM
4
4
  module Providers
5
- # Anthropic Claude API integration. Handles the complexities of
6
- # Claude's unique message format and tool calling conventions.
5
+ # Anthropic Claude API integration.
7
6
  class Anthropic < Provider
8
7
  include Anthropic::Chat
9
8
  include Anthropic::Embeddings
@@ -11,7 +11,6 @@ module RubyLLM
11
11
  signature = sign_request("#{connection.connection.url_prefix}#{completion_url}", payload:)
12
12
  response = connection.post completion_url, payload do |req|
13
13
  req.headers.merge! build_headers(signature.headers, streaming: block_given?)
14
- # Merge additional headers, with existing headers taking precedence
15
14
  req.headers = additional_headers.merge(req.headers) unless additional_headers.empty?
16
15
  end
17
16
  Anthropic::Chat.parse_completion_response response
@@ -42,8 +41,7 @@ module RubyLLM
42
41
 
43
42
  def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
44
43
  cache_prompts: { system: false, user: false, tools: false })
45
- # Hold model_id in instance variable for use in completion_url and stream_url
46
- @model_id = model
44
+ @model_id = model.id
47
45
 
48
46
  system_messages, chat_messages = Anthropic::Chat.separate_messages(messages)
49
47
  system_content = Anthropic::Chat.build_system_content(system_messages, cache: cache_prompts[:system])
@@ -67,7 +65,7 @@ module RubyLLM
67
65
  {
68
66
  anthropic_version: 'bedrock-2023-05-31',
69
67
  messages: messages,
70
- max_tokens: RubyLLM.models.find(model)&.max_tokens || 4096
68
+ max_tokens: model.max_tokens || 4096
71
69
  }
72
70
  end
73
71
  end
@@ -11,7 +11,6 @@ module RubyLLM
11
11
  module_function
12
12
 
13
13
  def format_content(content, cache: false)
14
- # Convert Hash/Array back to JSON string for API
15
14
  return [Anthropic::Media.format_text(content.to_json, cache:)] if content.is_a?(Hash) || content.is_a?(Array)
16
15
  return [Anthropic::Media.format_text(content, cache:)] unless content.is_a?(Content)
17
16
 
@@ -25,7 +25,6 @@ module RubyLLM
25
25
  def parse_list_models_response(response, slug, capabilities)
26
26
  models = Array(response.body['modelSummaries'])
27
27
 
28
- # Filter to include only models we care about
29
28
  models.select { |m| m['modelId'].include?('claude') }.map do |model_data|
30
29
  model_id = model_data['modelId']
31
30
 
@@ -51,7 +50,6 @@ module RubyLLM
51
50
  end
52
51
  end
53
52
 
54
- # Simple test-friendly method that only sets the ID
55
53
  def create_model_info(model_data, slug, _capabilities)
56
54
  model_id = model_data['modelId']
57
55
 
@@ -74,7 +72,25 @@ module RubyLLM
74
72
  return model_id unless model_data['inferenceTypesSupported']&.include?('INFERENCE_PROFILE')
75
73
  return model_id if model_data['inferenceTypesSupported']&.include?('ON_DEMAND')
76
74
 
77
- "us.#{model_id}"
75
+ desired_region_prefix = inference_profile_region_prefix
76
+
77
+ # Return unchanged if model already has the correct region prefix
78
+ return model_id if model_id.start_with?("#{desired_region_prefix}.")
79
+
80
+ # Remove any existing region prefix (e.g., "us.", "eu.", "ap.")
81
+ clean_model_id = model_id.sub(/^[a-z]{2}\./, '')
82
+
83
+ # Apply the desired region prefix
84
+ "#{desired_region_prefix}.#{clean_model_id}"
85
+ end
86
+
87
+ def inference_profile_region_prefix
88
+ # Extract region prefix from bedrock_region (e.g., "eu-west-3" -> "eu")
89
+ region = @config.bedrock_region.to_s
90
+ return 'us' if region.empty? # Default fallback
91
+
92
+ # Take first two characters as the region prefix
93
+ region[0, 2]
78
94
  end
79
95
  end
80
96
  end
@@ -5,18 +5,6 @@ module RubyLLM
5
5
  class Bedrock
6
6
  module Streaming
7
7
  # Base module for AWS Bedrock streaming functionality.
8
- # Serves as the core module that includes all other streaming-related modules
9
- # and provides fundamental streaming operations.
10
- #
11
- # Responsibilities:
12
- # - Stream URL management
13
- # - Stream handling and error processing
14
- # - Coordinating the functionality of other streaming modules
15
- #
16
- # @example
17
- # module MyStreamingImplementation
18
- # include RubyLLM::Providers::Bedrock::Streaming::Base
19
- # end
20
8
  module Base
21
9
  def self.included(base)
22
10
  base.include ContentExtraction
@@ -5,13 +5,6 @@ module RubyLLM
5
5
  class Bedrock
6
6
  module Streaming
7
7
  # Module for handling content extraction from AWS Bedrock streaming responses.
8
- # Provides methods to extract and process various types of content from the response data.
9
- #
10
- # Responsibilities:
11
- # - Extracting content from different response formats
12
- # - Processing JSON deltas and content blocks
13
- # - Extracting metadata (tokens, model IDs, tool calls)
14
- # - Handling different content structures (arrays, blocks, completions)
15
8
  module ContentExtraction
16
9
  def json_delta?(data)
17
10
  data['type'] == 'content_block_delta' && data.dig('delta', 'type') == 'input_json_delta'
@@ -5,18 +5,6 @@ module RubyLLM
5
5
  class Bedrock
6
6
  module Streaming
7
7
  # Module for processing streaming messages from AWS Bedrock.
8
- # Handles the core message processing logic, including validation and chunking.
9
- #
10
- # Responsibilities:
11
- # - Processing incoming message chunks
12
- # - Validating message structure and content
13
- # - Managing message offsets and boundaries
14
- # - Error handling during message processing
15
- #
16
- # @example Processing a message chunk
17
- # offset = process_message(chunk, current_offset) do |processed_chunk|
18
- # handle_processed_chunk(processed_chunk)
19
- # end
20
8
  module MessageProcessing
21
9
  def process_chunk(chunk, &)
22
10
  offset = 0
@@ -7,18 +7,6 @@ module RubyLLM
7
7
  class Bedrock
8
8
  module Streaming
9
9
  # Module for processing payloads from AWS Bedrock streaming responses.
10
- # Handles JSON payload extraction, decoding, and chunk creation.
11
- #
12
- # Responsibilities:
13
- # - Extracting and validating JSON payloads
14
- # - Decoding Base64-encoded response data
15
- # - Creating response chunks from processed data
16
- # - Error handling for JSON parsing and processing
17
- #
18
- # @example Processing a payload
19
- # process_payload(raw_payload) do |chunk|
20
- # yield_chunk_to_client(chunk)
21
- # end
22
10
  module PayloadProcessing
23
11
  def process_payload(payload, &)
24
12
  json_payload = extract_json_payload(payload)
@@ -5,19 +5,6 @@ module RubyLLM
5
5
  class Bedrock
6
6
  module Streaming
7
7
  # Module for handling message preludes in AWS Bedrock streaming responses.
8
- # Manages the parsing and validation of message headers and prelude data.
9
- #
10
- # Responsibilities:
11
- # - Reading and validating message preludes
12
- # - Calculating message positions and boundaries
13
- # - Finding and validating prelude positions in chunks
14
- # - Ensuring message integrity through length validation
15
- #
16
- # @example Reading a prelude
17
- # if can_read_prelude?(chunk, offset)
18
- # total_length, headers_length = read_prelude(chunk, offset)
19
- # process_message_with_lengths(total_length, headers_length)
20
- # end
21
8
  module PreludeHandling
22
9
  def can_read_prelude?(chunk, offset)
23
10
  chunk.bytesize - offset >= 12
@@ -10,24 +10,6 @@ module RubyLLM
10
10
  module Providers
11
11
  class Bedrock
12
12
  # Streaming implementation for the AWS Bedrock API.
13
- # This module provides functionality for handling streaming responses from AWS Bedrock,
14
- # including message processing, content extraction, and error handling.
15
- #
16
- # The implementation is split into several focused modules:
17
- # - Base: Core streaming functionality and module coordination
18
- # - ContentExtraction: Extracting content from response data
19
- # - MessageProcessing: Processing streaming message chunks
20
- # - PayloadProcessing: Handling JSON payloads and chunk creation
21
- # - PreludeHandling: Managing message preludes and headers
22
- #
23
- # @example Using the streaming module
24
- # class BedrockClient
25
- # include RubyLLM::Providers::Bedrock::Streaming
26
- #
27
- # def stream_response(&block)
28
- # handle_stream(&block)
29
- # end
30
- # end
31
13
  module Streaming
32
14
  include Base
33
15
  end
@@ -5,8 +5,7 @@ require 'time'
5
5
 
6
6
  module RubyLLM
7
7
  module Providers
8
- # AWS Bedrock API integration. Handles chat completion and streaming
9
- # for Claude models.
8
+ # AWS Bedrock API integration.
10
9
  class Bedrock < Provider
11
10
  include Bedrock::Chat
12
11
  include Bedrock::Streaming
@@ -10,7 +10,7 @@ module RubyLLM
10
10
  def context_window_for(model_id)
11
11
  case model_id
12
12
  when /deepseek-(?:chat|reasoner)/ then 64_000
13
- else 32_768 # Sensible default
13
+ else 32_768
14
14
  end
15
15
  end
16
16
 
@@ -67,7 +67,6 @@ module RubyLLM
67
67
  end
68
68
  end
69
69
 
70
- # Pricing information for DeepSeek models (USD per 1M tokens)
71
70
  PRICES = {
72
71
  chat: {
73
72
  input_hit: 0.07,
@@ -8,7 +8,6 @@ module RubyLLM
8
8
  module_function
9
9
 
10
10
  def format_role(role)
11
- # DeepSeek doesn't use the new OpenAI convention for system prompts
12
11
  role.to_s
13
12
  end
14
13
  end