ruby_llm_community 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -9
  3. data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +127 -67
  4. data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +12 -12
  5. data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +7 -7
  6. data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +4 -4
  7. data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +6 -6
  8. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +4 -4
  9. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +5 -5
  10. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +5 -5
  11. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +4 -4
  12. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +8 -8
  13. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_content.html.erb.tt +1 -0
  14. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +5 -5
  15. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +9 -6
  16. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +7 -0
  17. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +5 -5
  18. data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +9 -9
  19. data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +4 -6
  20. data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +11 -11
  21. data/lib/generators/ruby_llm/generator_helpers.rb +152 -87
  22. data/lib/generators/ruby_llm/install/install_generator.rb +75 -79
  23. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  24. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +5 -0
  25. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +7 -1
  26. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +1 -1
  27. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +88 -85
  28. data/lib/generators/ruby_llm/upgrade_to_v1_9/templates/add_v1_9_message_columns.rb.tt +15 -0
  29. data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +49 -0
  30. data/lib/ruby_llm/active_record/acts_as.rb +23 -16
  31. data/lib/ruby_llm/active_record/chat_methods.rb +41 -13
  32. data/lib/ruby_llm/active_record/message_methods.rb +11 -2
  33. data/lib/ruby_llm/active_record/model_methods.rb +1 -1
  34. data/lib/ruby_llm/aliases.json +61 -32
  35. data/lib/ruby_llm/attachment.rb +42 -11
  36. data/lib/ruby_llm/chat.rb +13 -2
  37. data/lib/ruby_llm/configuration.rb +6 -1
  38. data/lib/ruby_llm/connection.rb +4 -4
  39. data/lib/ruby_llm/content.rb +23 -0
  40. data/lib/ruby_llm/message.rb +17 -9
  41. data/lib/ruby_llm/model/info.rb +4 -0
  42. data/lib/ruby_llm/models.json +7157 -6089
  43. data/lib/ruby_llm/models.rb +14 -22
  44. data/lib/ruby_llm/provider.rb +27 -5
  45. data/lib/ruby_llm/providers/anthropic/chat.rb +18 -5
  46. data/lib/ruby_llm/providers/anthropic/content.rb +44 -0
  47. data/lib/ruby_llm/providers/anthropic/media.rb +6 -5
  48. data/lib/ruby_llm/providers/anthropic/models.rb +9 -2
  49. data/lib/ruby_llm/providers/anthropic/tools.rb +20 -18
  50. data/lib/ruby_llm/providers/bedrock/media.rb +2 -1
  51. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +9 -2
  52. data/lib/ruby_llm/providers/gemini/chat.rb +353 -72
  53. data/lib/ruby_llm/providers/gemini/media.rb +59 -1
  54. data/lib/ruby_llm/providers/gemini/tools.rb +146 -25
  55. data/lib/ruby_llm/providers/gemini/transcription.rb +116 -0
  56. data/lib/ruby_llm/providers/gemini.rb +2 -1
  57. data/lib/ruby_llm/providers/gpustack/media.rb +1 -0
  58. data/lib/ruby_llm/providers/ollama/media.rb +1 -0
  59. data/lib/ruby_llm/providers/openai/capabilities.rb +15 -7
  60. data/lib/ruby_llm/providers/openai/chat.rb +7 -3
  61. data/lib/ruby_llm/providers/openai/media.rb +2 -1
  62. data/lib/ruby_llm/providers/openai/streaming.rb +7 -3
  63. data/lib/ruby_llm/providers/openai/tools.rb +34 -12
  64. data/lib/ruby_llm/providers/openai/transcription.rb +70 -0
  65. data/lib/ruby_llm/providers/openai_base.rb +1 -0
  66. data/lib/ruby_llm/providers/vertexai/transcription.rb +16 -0
  67. data/lib/ruby_llm/providers/vertexai.rb +11 -11
  68. data/lib/ruby_llm/railtie.rb +24 -22
  69. data/lib/ruby_llm/stream_accumulator.rb +8 -12
  70. data/lib/ruby_llm/tool.rb +126 -0
  71. data/lib/ruby_llm/transcription.rb +35 -0
  72. data/lib/ruby_llm/utils.rb +46 -0
  73. data/lib/ruby_llm/version.rb +1 -1
  74. data/lib/ruby_llm_community.rb +7 -1
  75. metadata +27 -3
@@ -13,7 +13,41 @@ module RubyLLM
13
13
  }]
14
14
  end
15
15
 
16
- def extract_tool_calls(data)
16
+ def format_tool_call(msg)
17
+ parts = []
18
+
19
+ if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
20
+ formatted_content = Media.format_content(msg.content)
21
+ parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
22
+ end
23
+
24
+ msg.tool_calls.each_value do |tool_call|
25
+ parts << {
26
+ functionCall: {
27
+ name: tool_call.name,
28
+ args: tool_call.arguments
29
+ }
30
+ }
31
+ end
32
+
33
+ parts
34
+ end
35
+
36
+ def format_tool_result(msg, function_name = nil)
37
+ function_name ||= msg.tool_call_id
38
+
39
+ [{
40
+ functionResponse: {
41
+ name: function_name,
42
+ response: {
43
+ name: function_name,
44
+ content: Media.format_content(msg.content)
45
+ }
46
+ }
47
+ }]
48
+ end
49
+
50
+ def extract_tool_calls(data) # rubocop:disable Metrics/PerceivedComplexity
17
51
  return nil unless data
18
52
 
19
53
  candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
@@ -22,49 +56,136 @@ module RubyLLM
22
56
  parts = candidate.dig('content', 'parts')
23
57
  return nil unless parts.is_a?(Array)
24
58
 
25
- function_call_part = parts.find { |p| p['functionCall'] }
26
- return nil unless function_call_part
27
-
28
- function_data = function_call_part['functionCall']
29
- return nil unless function_data
59
+ tool_calls = parts.each_with_object({}) do |part, result|
60
+ function_data = part['functionCall']
61
+ next unless function_data
30
62
 
31
- id = SecureRandom.uuid
63
+ id = SecureRandom.uuid
32
64
 
33
- {
34
- id => ToolCall.new(
35
- id: id,
65
+ result[id] = ToolCall.new(
66
+ id:,
36
67
  name: function_data['name'],
37
- arguments: function_data['args']
68
+ arguments: function_data['args'] || {}
38
69
  )
39
- }
70
+ end
71
+
72
+ tool_calls.empty? ? nil : tool_calls
40
73
  end
41
74
 
42
75
  private
43
76
 
44
77
  def function_declaration_for(tool)
45
- {
78
+ parameters_schema = tool.params_schema ||
79
+ RubyLLM::Tool::SchemaDefinition.from_parameters(tool.parameters)&.json_schema
80
+
81
+ declaration = {
46
82
  name: tool.name,
47
- description: tool.description,
48
- parameters: tool.parameters.any? ? format_parameters(tool.parameters) : nil
49
- }.compact
83
+ description: tool.description
84
+ }
85
+
86
+ declaration[:parameters] = convert_tool_schema_to_gemini(parameters_schema) if parameters_schema
87
+
88
+ return declaration if tool.provider_params.empty?
89
+
90
+ RubyLLM::Utils.deep_merge(declaration, tool.provider_params)
50
91
  end
51
92
 
52
- def format_parameters(parameters)
93
+ def convert_tool_schema_to_gemini(schema)
94
+ return nil unless schema
95
+
96
+ schema = RubyLLM::Utils.deep_stringify_keys(schema)
97
+
98
+ raise ArgumentError, 'Gemini tool parameters must be objects' unless schema['type'] == 'object'
99
+
53
100
  {
54
101
  type: 'OBJECT',
55
- properties: parameters.transform_values do |param|
56
- {
57
- type: param_type_for_gemini(param.type),
58
- description: param.description
59
- }.compact
60
- end,
61
- required: parameters.select { |_, p| p.required }.keys.map(&:to_s)
102
+ properties: schema.fetch('properties', {}).transform_values { |property| convert_property(property) },
103
+ required: (schema['required'] || []).map(&:to_s)
62
104
  }
63
105
  end
64
106
 
107
+ def convert_property(property_schema) # rubocop:disable Metrics/PerceivedComplexity
108
+ normalized_schema = normalize_any_of_schema(property_schema)
109
+ working_schema = normalized_schema || property_schema
110
+
111
+ type = param_type_for_gemini(working_schema['type'])
112
+
113
+ property = {
114
+ type: type
115
+ }
116
+
117
+ copy_common_attributes(property, property_schema)
118
+ copy_common_attributes(property, working_schema)
119
+
120
+ case type
121
+ when 'ARRAY'
122
+ items_schema = working_schema['items'] || property_schema['items'] || { 'type' => 'string' }
123
+ property[:items] = convert_property(items_schema)
124
+ copy_tool_attributes(property, working_schema, %w[minItems maxItems])
125
+ copy_tool_attributes(property, property_schema, %w[minItems maxItems])
126
+ when 'OBJECT'
127
+ nested_properties = working_schema.fetch('properties', {}).transform_values do |child|
128
+ convert_property(child)
129
+ end
130
+ property[:properties] = nested_properties
131
+ required = working_schema['required'] || property_schema['required']
132
+ property[:required] = required.map(&:to_s) if required
133
+ end
134
+
135
+ property
136
+ end
137
+
138
+ def copy_common_attributes(target, source)
139
+ copy_tool_attributes(target, source, %w[description enum format nullable maximum minimum multipleOf])
140
+ end
141
+
142
+ def copy_tool_attributes(target, source, attributes)
143
+ attributes.each do |attribute|
144
+ value = schema_value(source, attribute)
145
+ next if value.nil?
146
+
147
+ target[attribute.to_sym] = value
148
+ end
149
+ end
150
+
151
+ def normalize_any_of_schema(schema) # rubocop:disable Metrics/PerceivedComplexity
152
+ any_of = schema['anyOf'] || schema[:anyOf]
153
+ return nil unless any_of.is_a?(Array) && any_of.any?
154
+
155
+ null_entries, non_null_entries = any_of.partition { |entry| schema_type(entry).to_s == 'null' }
156
+
157
+ if non_null_entries.size == 1 && null_entries.any?
158
+ normalized = RubyLLM::Utils.deep_dup(non_null_entries.first)
159
+ normalized['nullable'] = true
160
+ normalized
161
+ elsif non_null_entries.any?
162
+ RubyLLM::Utils.deep_dup(non_null_entries.first)
163
+ else
164
+ { 'type' => 'string', 'nullable' => true }
165
+ end
166
+ end
167
+
168
+ def schema_type(schema)
169
+ schema['type'] || schema[:type]
170
+ end
171
+
172
+ def schema_value(source, attribute) # rubocop:disable Metrics/PerceivedComplexity
173
+ case attribute
174
+ when 'multipleOf'
175
+ source['multipleOf'] || source[:multipleOf] || source['multiple_of'] || source[:multiple_of]
176
+ when 'minItems'
177
+ source['minItems'] || source[:minItems] || source['min_items'] || source[:min_items]
178
+ when 'maxItems'
179
+ source['maxItems'] || source[:maxItems] || source['max_items'] || source[:max_items]
180
+ else
181
+ source[attribute] || source[attribute.to_sym]
182
+ end
183
+ end
184
+
65
185
  def param_type_for_gemini(type)
66
186
  case type.to_s.downcase
67
- when 'integer', 'number', 'float' then 'NUMBER'
187
+ when 'integer' then 'INTEGER'
188
+ when 'number', 'float', 'double' then 'NUMBER'
68
189
  when 'boolean' then 'BOOLEAN'
69
190
  when 'array' then 'ARRAY'
70
191
  when 'object' then 'OBJECT'
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class Gemini
6
+ # Audio transcription helpers for the Gemini API implementation
7
+ module Transcription
8
+ DEFAULT_PROMPT = 'Transcribe the provided audio and respond with only the transcript text.'
9
+
10
+ def transcribe(audio_file, model:, language:, **options)
11
+ attachment = Attachment.new(audio_file)
12
+ payload = render_transcription_payload(attachment, language:, **options)
13
+ response = @connection.post(transcription_url(model), payload)
14
+ parse_transcription_response(response, model:)
15
+ end
16
+
17
+ private
18
+
19
+ def transcription_url(model)
20
+ "models/#{model}:generateContent"
21
+ end
22
+
23
+ def render_transcription_payload(attachment, language:, **options)
24
+ prompt = build_prompt(options[:prompt], language)
25
+ audio_part = format_audio_part(attachment)
26
+
27
+ raise UnsupportedAttachmentError, attachment.mime_type unless attachment.audio?
28
+
29
+ payload = {
30
+ contents: [
31
+ {
32
+ role: 'user',
33
+ parts: [
34
+ { text: prompt },
35
+ audio_part
36
+ ]
37
+ }
38
+ ]
39
+ }
40
+
41
+ generation_config = build_generation_config(options)
42
+ payload[:generationConfig] = generation_config unless generation_config.empty?
43
+ payload[:safetySettings] = options[:safety_settings] if options[:safety_settings]
44
+
45
+ payload
46
+ end
47
+
48
+ def build_generation_config(options)
49
+ config = {}
50
+ response_mime_type = options.fetch(:response_mime_type, 'text/plain')
51
+
52
+ config[:responseMimeType] = response_mime_type if response_mime_type
53
+ config[:temperature] = options[:temperature] if options.key?(:temperature)
54
+ config[:maxOutputTokens] = options[:max_output_tokens] if options[:max_output_tokens]
55
+
56
+ config
57
+ end
58
+
59
+ def build_prompt(custom_prompt, language)
60
+ prompt = DEFAULT_PROMPT
61
+ prompt += " Respond in the #{language} language." if language
62
+ prompt += " #{custom_prompt}" if custom_prompt
63
+ prompt
64
+ end
65
+
66
+ def format_audio_part(attachment)
67
+ {
68
+ inline_data: {
69
+ mime_type: attachment.mime_type,
70
+ data: attachment.encoded
71
+ }
72
+ }
73
+ end
74
+
75
+ def parse_transcription_response(response, model:)
76
+ data = response.body
77
+ text = extract_text(data)
78
+
79
+ usage = extract_usage(data)
80
+
81
+ RubyLLM::Transcription.new(
82
+ text: text,
83
+ model: model,
84
+ input_tokens: usage[:input_tokens],
85
+ output_tokens: usage[:output_tokens]
86
+ )
87
+ end
88
+
89
+ def extract_text(data)
90
+ candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
91
+ return unless candidate
92
+
93
+ parts = candidate.dig('content', 'parts') || []
94
+ texts = parts.filter_map { |part| part['text'] }
95
+ texts.join if texts.any?
96
+ end
97
+
98
+ def extract_usage(data)
99
+ metadata = data.is_a?(Hash) ? data['usageMetadata'] : nil
100
+ return { input_tokens: nil, output_tokens: nil } unless metadata
101
+
102
+ {
103
+ input_tokens: metadata['promptTokenCount'],
104
+ output_tokens: sum_output_tokens(metadata)
105
+ }
106
+ end
107
+
108
+ def sum_output_tokens(metadata)
109
+ candidates = metadata['candidatesTokenCount'] || 0
110
+ thoughts = metadata['thoughtsTokenCount'] || 0
111
+ candidates + thoughts
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
@@ -8,12 +8,13 @@ module RubyLLM
8
8
  include Gemini::Embeddings
9
9
  include Gemini::Images
10
10
  include Gemini::Models
11
+ include Gemini::Transcription
11
12
  include Gemini::Streaming
12
13
  include Gemini::Tools
13
14
  include Gemini::Media
14
15
 
15
16
  def api_base
16
- 'https://generativelanguage.googleapis.com/v1beta'
17
+ @config.gemini_api_base || 'https://generativelanguage.googleapis.com/v1beta'
17
18
  end
18
19
 
19
20
  def headers
@@ -10,6 +10,7 @@ module RubyLLM
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
13
14
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
14
15
  return content unless content.is_a?(Content)
15
16
 
@@ -10,6 +10,7 @@ module RubyLLM
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
13
14
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
14
15
  return content unless content.is_a?(Content)
15
16
 
@@ -27,6 +27,9 @@ module RubyLLM
27
27
  gpt4o_realtime: /^gpt-4o-realtime/,
28
28
  gpt4o_search: /^gpt-4o-search/,
29
29
  gpt4o_transcribe: /^gpt-4o-transcribe/,
30
+ gpt5: /^gpt-5/,
31
+ gpt5_mini: /^gpt-5-mini/,
32
+ gpt5_nano: /^gpt-5-nano/,
30
33
  o1: /^o1(?!-(?:mini|pro))/,
31
34
  o1_mini: /^o1-mini/,
32
35
  o1_pro: /^o1-pro/,
@@ -45,7 +48,7 @@ module RubyLLM
45
48
  def context_window_for(model_id)
46
49
  case model_family(model_id)
47
50
  when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 1_047_576
48
- when 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
51
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'chatgpt4o', 'gpt4_turbo', 'gpt4o', 'gpt4o_audio', 'gpt4o_mini',
49
52
  'gpt4o_mini_audio', 'gpt4o_mini_realtime', 'gpt4o_realtime',
50
53
  'gpt4o_search', 'gpt4o_transcribe', 'gpt4o_mini_search', 'o1_mini' then 128_000
51
54
  when 'gpt4' then 8_192
@@ -60,6 +63,7 @@ module RubyLLM
60
63
 
61
64
  def max_tokens_for(model_id)
62
65
  case model_family(model_id)
66
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano' then 400_000
63
67
  when 'gpt41', 'gpt41_mini', 'gpt41_nano' then 32_768
64
68
  when 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'gpt4o_mini_search' then 16_384
65
69
  when 'babbage', 'davinci' then 16_384 # rubocop:disable Lint/DuplicateBranch
@@ -77,16 +81,17 @@ module RubyLLM
77
81
 
78
82
  def supports_vision?(model_id)
79
83
  case model_family(model_id)
80
- when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1',
81
- 'o1_pro', 'moderation', 'gpt4o_search', 'gpt4o_mini_search' then true
84
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4',
85
+ 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro', 'moderation', 'gpt4o_search',
86
+ 'gpt4o_mini_search' then true
82
87
  else false
83
88
  end
84
89
  end
85
90
 
86
91
  def supports_functions?(model_id)
87
92
  case model_family(model_id)
88
- when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
89
- 'o3_mini' then true
93
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'gpt4', 'gpt4_turbo', 'gpt4o',
94
+ 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
90
95
  when 'chatgpt4o', 'gpt35_turbo', 'o1_mini', 'gpt4o_mini_tts',
91
96
  'gpt4o_transcribe', 'gpt4o_search', 'gpt4o_mini_search' then false
92
97
  else false # rubocop:disable Lint/DuplicateBranch
@@ -95,8 +100,8 @@ module RubyLLM
95
100
 
96
101
  def supports_structured_output?(model_id)
97
102
  case model_family(model_id)
98
- when 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o', 'gpt4o_mini', 'o1', 'o1_pro',
99
- 'o3_mini' then true
103
+ when 'gpt5', 'gpt5_mini', 'gpt5_nano', 'gpt41', 'gpt41_mini', 'gpt41_nano', 'chatgpt4o', 'gpt4o',
104
+ 'gpt4o_mini', 'o1', 'o1_pro', 'o3_mini' then true
100
105
  else false
101
106
  end
102
107
  end
@@ -107,6 +112,9 @@ module RubyLLM
107
112
 
108
113
  PRICES = {
109
114
  gpt_image_1: { input_text: 5.0, input_image: 10.0, output: 8.0, cached_input: 0.5 }, # rubocop:disable Naming/VariableNumber
115
+ gpt5: { input: 1.25, output: 10.0, cached_input: 0.125 },
116
+ gpt5_mini: { input: 0.25, output: 2.0, cached_input: 0.025 },
117
+ gpt5_nano: { input: 0.05, output: 0.4, cached_input: 0.005 },
110
118
  gpt41: { input: 2.0, output: 8.0, cached_input: 0.5 },
111
119
  gpt41_mini: { input: 0.4, output: 1.6, cached_input: 0.1 },
112
120
  gpt41_nano: { input: 0.1, output: 0.4 },
@@ -48,13 +48,17 @@ module RubyLLM
48
48
  message_data = data.dig('choices', 0, 'message')
49
49
  return unless message_data
50
50
 
51
+ usage = data['usage'] || {}
52
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
53
+
51
54
  Message.new(
52
55
  role: :assistant,
53
56
  content: message_data['content'],
54
57
  tool_calls: parse_tool_calls(message_data['tool_calls']),
55
- input_tokens: data['usage']['prompt_tokens'],
56
- output_tokens: data['usage']['completion_tokens'],
57
- cached_tokens: data.dig('usage', 'prompt_tokens_details', 'cached_tokens'),
58
+ input_tokens: usage['prompt_tokens'],
59
+ output_tokens: usage['completion_tokens'],
60
+ cached_tokens: cached_tokens,
61
+ cache_creation_tokens: 0,
58
62
  model_id: data['model'],
59
63
  raw: response
60
64
  )
@@ -7,7 +7,8 @@ module RubyLLM
7
7
  module Media
8
8
  module_function
9
9
 
10
- def format_content(content)
10
+ def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
11
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
11
12
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
12
13
  return content unless content.is_a?(Content)
13
14
 
@@ -86,14 +86,18 @@ module RubyLLM
86
86
  end
87
87
 
88
88
  def build_chat_completions_chunk(data)
89
+ usage = data['usage'] || {}
90
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
91
+
89
92
  Chunk.new(
90
93
  role: :assistant,
91
94
  model_id: data['model'],
92
95
  content: data.dig('choices', 0, 'delta', 'content'),
93
96
  tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
94
- input_tokens: data.dig('usage', 'prompt_tokens'),
95
- output_tokens: data.dig('usage', 'completion_tokens'),
96
- cached_tokens: data.dig('usage', 'cached_tokens')
97
+ input_tokens: usage['prompt_tokens'],
98
+ output_tokens: usage['completion_tokens'],
99
+ cached_tokens: cached_tokens,
100
+ cache_creation_tokens: 0
97
101
  )
98
102
  end
99
103
 
@@ -7,24 +7,54 @@ module RubyLLM
7
7
  module Tools
8
8
  module_function
9
9
 
10
+ EMPTY_PARAMETERS_SCHEMA = {
11
+ 'type' => 'object',
12
+ 'properties' => {},
13
+ 'required' => [],
14
+ 'additionalProperties' => false,
15
+ 'strict' => true
16
+ }.freeze
17
+
18
+ def parameters_schema_for(tool)
19
+ tool.params_schema ||
20
+ schema_from_parameters(tool.parameters)
21
+ end
22
+
23
+ def schema_from_parameters(parameters)
24
+ schema_definition = RubyLLM::Tool::SchemaDefinition.from_parameters(parameters)
25
+ schema_definition&.json_schema || EMPTY_PARAMETERS_SCHEMA
26
+ end
27
+
10
28
  def chat_tool_for(tool)
11
- {
29
+ parameters_schema = parameters_schema_for(tool)
30
+
31
+ definition = {
12
32
  type: 'function',
13
33
  function: {
14
34
  name: tool.name,
15
35
  description: tool.description,
16
- parameters: tool_parameters_for(tool)
36
+ parameters: parameters_schema
17
37
  }
18
38
  }
39
+
40
+ return definition if tool.provider_params.empty?
41
+
42
+ RubyLLM::Utils.deep_merge(definition, tool.provider_params)
19
43
  end
20
44
 
21
45
  def response_tool_for(tool)
22
- {
46
+ parameters_schema = parameters_schema_for(tool)
47
+
48
+ definition = {
23
49
  type: 'function',
24
50
  name: tool.name,
25
51
  description: tool.description,
26
- parameters: tool_parameters_for(tool)
52
+ parameters: parameters_schema
27
53
  }
54
+
55
+ return definition if tool.provider_params.empty?
56
+
57
+ RubyLLM::Utils.deep_merge(definition, tool.provider_params)
28
58
  end
29
59
 
30
60
  def param_schema(param)
@@ -34,14 +64,6 @@ module RubyLLM
34
64
  }.compact
35
65
  end
36
66
 
37
- def tool_parameters_for(tool)
38
- {
39
- type: 'object',
40
- properties: tool.parameters.transform_values { |param| param_schema(param) },
41
- required: tool.parameters.select { |_, p| p.required }.keys
42
- }
43
- end
44
-
45
67
  def format_tool_calls(tool_calls)
46
68
  return nil unless tool_calls&.any?
47
69
 
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAI
6
+ # Audio transcription methods for the OpenAI API integration
7
+ module Transcription
8
+ module_function
9
+
10
+ def transcription_url
11
+ 'audio/transcriptions'
12
+ end
13
+
14
+ def render_transcription_payload(file_part, model:, language:, **options)
15
+ {
16
+ model: model,
17
+ file: file_part,
18
+ language: language,
19
+ chunking_strategy: (options[:chunking_strategy] || 'auto' if supports_chunking_strategy?(model, options)),
20
+ response_format: response_format_for(model, options),
21
+ prompt: options[:prompt],
22
+ temperature: options[:temperature],
23
+ timestamp_granularities: options[:timestamp_granularities],
24
+ known_speaker_names: options[:speaker_names],
25
+ known_speaker_references: encode_speaker_references(options[:speaker_references])
26
+ }.compact
27
+ end
28
+
29
+ def encode_speaker_references(references)
30
+ return nil unless references
31
+
32
+ references.map do |ref|
33
+ Attachment.new(ref).for_llm
34
+ end
35
+ end
36
+
37
+ def response_format_for(model, options)
38
+ return options[:response_format] if options.key?(:response_format)
39
+
40
+ 'diarized_json' if model.include?('diarize')
41
+ end
42
+
43
+ def supports_chunking_strategy?(model, options)
44
+ return false if model.start_with?('whisper')
45
+ return true if options.key?(:chunking_strategy)
46
+
47
+ model.include?('diarize')
48
+ end
49
+
50
+ def parse_transcription_response(response, model:)
51
+ data = response.body
52
+
53
+ return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)
54
+
55
+ usage = data['usage'] || {}
56
+
57
+ RubyLLM::Transcription.new(
58
+ text: data['text'],
59
+ model: model,
60
+ language: data['language'],
61
+ duration: data['duration'],
62
+ segments: data['segments'],
63
+ input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
64
+ output_tokens: usage['output_tokens'] || usage['completion_tokens']
65
+ )
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -14,6 +14,7 @@ module RubyLLM
14
14
  include OpenAI::Tools
15
15
  include OpenAI::Images
16
16
  include OpenAI::Media
17
+ include OpenAI::Transcription
17
18
 
18
19
  def api_base
19
20
  @config.openai_api_base || 'https://api.openai.com/v1'
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class VertexAI
6
+ # Vertex AI specific helpers for audio transcription
7
+ module Transcription
8
+ private
9
+
10
+ def transcription_url(model)
11
+ "projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end