dify_llm 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -7
  3. data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +117 -69
  4. data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +12 -12
  5. data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +7 -7
  6. data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +4 -4
  7. data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +6 -6
  8. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +4 -4
  9. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +5 -5
  10. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +5 -5
  11. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +4 -4
  12. data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +8 -8
  13. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +5 -5
  14. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +9 -6
  15. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +7 -0
  16. data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +5 -5
  17. data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +9 -9
  18. data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +4 -6
  19. data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +11 -11
  20. data/lib/generators/ruby_llm/generator_helpers.rb +131 -87
  21. data/lib/generators/ruby_llm/install/install_generator.rb +75 -79
  22. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  23. data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +1 -1
  24. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +88 -85
  25. data/lib/generators/ruby_llm/upgrade_to_v1_9/templates/add_v1_9_message_columns.rb.tt +15 -0
  26. data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +49 -0
  27. data/lib/ruby_llm/active_record/acts_as.rb +17 -8
  28. data/lib/ruby_llm/active_record/chat_methods.rb +41 -13
  29. data/lib/ruby_llm/active_record/message_methods.rb +11 -2
  30. data/lib/ruby_llm/active_record/model_methods.rb +1 -1
  31. data/lib/ruby_llm/aliases.json +62 -20
  32. data/lib/ruby_llm/attachment.rb +8 -0
  33. data/lib/ruby_llm/chat.rb +13 -2
  34. data/lib/ruby_llm/configuration.rb +6 -1
  35. data/lib/ruby_llm/connection.rb +4 -4
  36. data/lib/ruby_llm/content.rb +23 -0
  37. data/lib/ruby_llm/message.rb +11 -6
  38. data/lib/ruby_llm/model/info.rb +4 -0
  39. data/lib/ruby_llm/models.json +9410 -7793
  40. data/lib/ruby_llm/models.rb +14 -22
  41. data/lib/ruby_llm/provider.rb +23 -1
  42. data/lib/ruby_llm/providers/anthropic/chat.rb +22 -3
  43. data/lib/ruby_llm/providers/anthropic/content.rb +44 -0
  44. data/lib/ruby_llm/providers/anthropic/media.rb +2 -1
  45. data/lib/ruby_llm/providers/anthropic/models.rb +15 -0
  46. data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
  47. data/lib/ruby_llm/providers/anthropic/tools.rb +20 -18
  48. data/lib/ruby_llm/providers/bedrock/media.rb +2 -1
  49. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +15 -0
  50. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +2 -0
  51. data/lib/ruby_llm/providers/dify/chat.rb +16 -5
  52. data/lib/ruby_llm/providers/gemini/chat.rb +352 -69
  53. data/lib/ruby_llm/providers/gemini/media.rb +59 -1
  54. data/lib/ruby_llm/providers/gemini/tools.rb +146 -25
  55. data/lib/ruby_llm/providers/gemini/transcription.rb +116 -0
  56. data/lib/ruby_llm/providers/gemini.rb +2 -1
  57. data/lib/ruby_llm/providers/gpustack/media.rb +1 -0
  58. data/lib/ruby_llm/providers/ollama/media.rb +1 -0
  59. data/lib/ruby_llm/providers/openai/chat.rb +7 -2
  60. data/lib/ruby_llm/providers/openai/media.rb +2 -1
  61. data/lib/ruby_llm/providers/openai/streaming.rb +7 -2
  62. data/lib/ruby_llm/providers/openai/tools.rb +26 -6
  63. data/lib/ruby_llm/providers/openai/transcription.rb +70 -0
  64. data/lib/ruby_llm/providers/openai.rb +1 -0
  65. data/lib/ruby_llm/providers/vertexai/transcription.rb +16 -0
  66. data/lib/ruby_llm/providers/vertexai.rb +3 -0
  67. data/lib/ruby_llm/stream_accumulator.rb +10 -4
  68. data/lib/ruby_llm/tool.rb +126 -0
  69. data/lib/ruby_llm/transcription.rb +35 -0
  70. data/lib/ruby_llm/utils.rb +46 -0
  71. data/lib/ruby_llm/version.rb +1 -1
  72. data/lib/ruby_llm.rb +6 -0
  73. metadata +25 -3
@@ -13,7 +13,41 @@ module RubyLLM
13
13
  }]
14
14
  end
15
15
 
16
- def extract_tool_calls(data)
16
+ def format_tool_call(msg)
17
+ parts = []
18
+
19
+ if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
20
+ formatted_content = Media.format_content(msg.content)
21
+ parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
22
+ end
23
+
24
+ msg.tool_calls.each_value do |tool_call|
25
+ parts << {
26
+ functionCall: {
27
+ name: tool_call.name,
28
+ args: tool_call.arguments
29
+ }
30
+ }
31
+ end
32
+
33
+ parts
34
+ end
35
+
36
+ def format_tool_result(msg, function_name = nil)
37
+ function_name ||= msg.tool_call_id
38
+
39
+ [{
40
+ functionResponse: {
41
+ name: function_name,
42
+ response: {
43
+ name: function_name,
44
+ content: Media.format_content(msg.content)
45
+ }
46
+ }
47
+ }]
48
+ end
49
+
50
+ def extract_tool_calls(data) # rubocop:disable Metrics/PerceivedComplexity
17
51
  return nil unless data
18
52
 
19
53
  candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
@@ -22,49 +56,136 @@ module RubyLLM
22
56
  parts = candidate.dig('content', 'parts')
23
57
  return nil unless parts.is_a?(Array)
24
58
 
25
- function_call_part = parts.find { |p| p['functionCall'] }
26
- return nil unless function_call_part
27
-
28
- function_data = function_call_part['functionCall']
29
- return nil unless function_data
59
+ tool_calls = parts.each_with_object({}) do |part, result|
60
+ function_data = part['functionCall']
61
+ next unless function_data
30
62
 
31
- id = SecureRandom.uuid
63
+ id = SecureRandom.uuid
32
64
 
33
- {
34
- id => ToolCall.new(
35
- id: id,
65
+ result[id] = ToolCall.new(
66
+ id:,
36
67
  name: function_data['name'],
37
- arguments: function_data['args']
68
+ arguments: function_data['args'] || {}
38
69
  )
39
- }
70
+ end
71
+
72
+ tool_calls.empty? ? nil : tool_calls
40
73
  end
41
74
 
42
75
  private
43
76
 
44
77
  def function_declaration_for(tool)
45
- {
78
+ parameters_schema = tool.params_schema ||
79
+ RubyLLM::Tool::SchemaDefinition.from_parameters(tool.parameters)&.json_schema
80
+
81
+ declaration = {
46
82
  name: tool.name,
47
- description: tool.description,
48
- parameters: tool.parameters.any? ? format_parameters(tool.parameters) : nil
49
- }.compact
83
+ description: tool.description
84
+ }
85
+
86
+ declaration[:parameters] = convert_tool_schema_to_gemini(parameters_schema) if parameters_schema
87
+
88
+ return declaration if tool.provider_params.empty?
89
+
90
+ RubyLLM::Utils.deep_merge(declaration, tool.provider_params)
50
91
  end
51
92
 
52
- def format_parameters(parameters)
93
+ def convert_tool_schema_to_gemini(schema)
94
+ return nil unless schema
95
+
96
+ schema = RubyLLM::Utils.deep_stringify_keys(schema)
97
+
98
+ raise ArgumentError, 'Gemini tool parameters must be objects' unless schema['type'] == 'object'
99
+
53
100
  {
54
101
  type: 'OBJECT',
55
- properties: parameters.transform_values do |param|
56
- {
57
- type: param_type_for_gemini(param.type),
58
- description: param.description
59
- }.compact
60
- end,
61
- required: parameters.select { |_, p| p.required }.keys.map(&:to_s)
102
+ properties: schema.fetch('properties', {}).transform_values { |property| convert_property(property) },
103
+ required: (schema['required'] || []).map(&:to_s)
62
104
  }
63
105
  end
64
106
 
107
+ def convert_property(property_schema) # rubocop:disable Metrics/PerceivedComplexity
108
+ normalized_schema = normalize_any_of_schema(property_schema)
109
+ working_schema = normalized_schema || property_schema
110
+
111
+ type = param_type_for_gemini(working_schema['type'])
112
+
113
+ property = {
114
+ type: type
115
+ }
116
+
117
+ copy_common_attributes(property, property_schema)
118
+ copy_common_attributes(property, working_schema)
119
+
120
+ case type
121
+ when 'ARRAY'
122
+ items_schema = working_schema['items'] || property_schema['items'] || { 'type' => 'string' }
123
+ property[:items] = convert_property(items_schema)
124
+ copy_tool_attributes(property, working_schema, %w[minItems maxItems])
125
+ copy_tool_attributes(property, property_schema, %w[minItems maxItems])
126
+ when 'OBJECT'
127
+ nested_properties = working_schema.fetch('properties', {}).transform_values do |child|
128
+ convert_property(child)
129
+ end
130
+ property[:properties] = nested_properties
131
+ required = working_schema['required'] || property_schema['required']
132
+ property[:required] = required.map(&:to_s) if required
133
+ end
134
+
135
+ property
136
+ end
137
+
138
+ def copy_common_attributes(target, source)
139
+ copy_tool_attributes(target, source, %w[description enum format nullable maximum minimum multipleOf])
140
+ end
141
+
142
+ def copy_tool_attributes(target, source, attributes)
143
+ attributes.each do |attribute|
144
+ value = schema_value(source, attribute)
145
+ next if value.nil?
146
+
147
+ target[attribute.to_sym] = value
148
+ end
149
+ end
150
+
151
+ def normalize_any_of_schema(schema) # rubocop:disable Metrics/PerceivedComplexity
152
+ any_of = schema['anyOf'] || schema[:anyOf]
153
+ return nil unless any_of.is_a?(Array) && any_of.any?
154
+
155
+ null_entries, non_null_entries = any_of.partition { |entry| schema_type(entry).to_s == 'null' }
156
+
157
+ if non_null_entries.size == 1 && null_entries.any?
158
+ normalized = RubyLLM::Utils.deep_dup(non_null_entries.first)
159
+ normalized['nullable'] = true
160
+ normalized
161
+ elsif non_null_entries.any?
162
+ RubyLLM::Utils.deep_dup(non_null_entries.first)
163
+ else
164
+ { 'type' => 'string', 'nullable' => true }
165
+ end
166
+ end
167
+
168
+ def schema_type(schema)
169
+ schema['type'] || schema[:type]
170
+ end
171
+
172
+ def schema_value(source, attribute) # rubocop:disable Metrics/PerceivedComplexity
173
+ case attribute
174
+ when 'multipleOf'
175
+ source['multipleOf'] || source[:multipleOf] || source['multiple_of'] || source[:multiple_of]
176
+ when 'minItems'
177
+ source['minItems'] || source[:minItems] || source['min_items'] || source[:min_items]
178
+ when 'maxItems'
179
+ source['maxItems'] || source[:maxItems] || source['max_items'] || source[:max_items]
180
+ else
181
+ source[attribute] || source[attribute.to_sym]
182
+ end
183
+ end
184
+
65
185
  def param_type_for_gemini(type)
66
186
  case type.to_s.downcase
67
- when 'integer', 'number', 'float' then 'NUMBER'
187
+ when 'integer' then 'INTEGER'
188
+ when 'number', 'float', 'double' then 'NUMBER'
68
189
  when 'boolean' then 'BOOLEAN'
69
190
  when 'array' then 'ARRAY'
70
191
  when 'object' then 'OBJECT'
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class Gemini
6
+ # Audio transcription helpers for the Gemini API implementation
7
+ module Transcription
8
+ DEFAULT_PROMPT = 'Transcribe the provided audio and respond with only the transcript text.'
9
+
10
+ def transcribe(audio_file, model:, language:, **options)
11
+ attachment = Attachment.new(audio_file)
12
+ payload = render_transcription_payload(attachment, language:, **options)
13
+ response = @connection.post(transcription_url(model), payload)
14
+ parse_transcription_response(response, model:)
15
+ end
16
+
17
+ private
18
+
19
+ def transcription_url(model)
20
+ "models/#{model}:generateContent"
21
+ end
22
+
23
+ def render_transcription_payload(attachment, language:, **options)
24
+ prompt = build_prompt(options[:prompt], language)
25
+ audio_part = format_audio_part(attachment)
26
+
27
+ raise UnsupportedAttachmentError, attachment.mime_type unless attachment.audio?
28
+
29
+ payload = {
30
+ contents: [
31
+ {
32
+ role: 'user',
33
+ parts: [
34
+ { text: prompt },
35
+ audio_part
36
+ ]
37
+ }
38
+ ]
39
+ }
40
+
41
+ generation_config = build_generation_config(options)
42
+ payload[:generationConfig] = generation_config unless generation_config.empty?
43
+ payload[:safetySettings] = options[:safety_settings] if options[:safety_settings]
44
+
45
+ payload
46
+ end
47
+
48
+ def build_generation_config(options)
49
+ config = {}
50
+ response_mime_type = options.fetch(:response_mime_type, 'text/plain')
51
+
52
+ config[:responseMimeType] = response_mime_type if response_mime_type
53
+ config[:temperature] = options[:temperature] if options.key?(:temperature)
54
+ config[:maxOutputTokens] = options[:max_output_tokens] if options[:max_output_tokens]
55
+
56
+ config
57
+ end
58
+
59
+ def build_prompt(custom_prompt, language)
60
+ prompt = DEFAULT_PROMPT
61
+ prompt += " Respond in the #{language} language." if language
62
+ prompt += " #{custom_prompt}" if custom_prompt
63
+ prompt
64
+ end
65
+
66
+ def format_audio_part(attachment)
67
+ {
68
+ inline_data: {
69
+ mime_type: attachment.mime_type,
70
+ data: attachment.encoded
71
+ }
72
+ }
73
+ end
74
+
75
+ def parse_transcription_response(response, model:)
76
+ data = response.body
77
+ text = extract_text(data)
78
+
79
+ usage = extract_usage(data)
80
+
81
+ RubyLLM::Transcription.new(
82
+ text: text,
83
+ model: model,
84
+ input_tokens: usage[:input_tokens],
85
+ output_tokens: usage[:output_tokens]
86
+ )
87
+ end
88
+
89
+ def extract_text(data)
90
+ candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
91
+ return unless candidate
92
+
93
+ parts = candidate.dig('content', 'parts') || []
94
+ texts = parts.filter_map { |part| part['text'] }
95
+ texts.join if texts.any?
96
+ end
97
+
98
+ def extract_usage(data)
99
+ metadata = data.is_a?(Hash) ? data['usageMetadata'] : nil
100
+ return { input_tokens: nil, output_tokens: nil } unless metadata
101
+
102
+ {
103
+ input_tokens: metadata['promptTokenCount'],
104
+ output_tokens: sum_output_tokens(metadata)
105
+ }
106
+ end
107
+
108
+ def sum_output_tokens(metadata)
109
+ candidates = metadata['candidatesTokenCount'] || 0
110
+ thoughts = metadata['thoughtsTokenCount'] || 0
111
+ candidates + thoughts
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
@@ -8,12 +8,13 @@ module RubyLLM
8
8
  include Gemini::Embeddings
9
9
  include Gemini::Images
10
10
  include Gemini::Models
11
+ include Gemini::Transcription
11
12
  include Gemini::Streaming
12
13
  include Gemini::Tools
13
14
  include Gemini::Media
14
15
 
15
16
  def api_base
16
- 'https://generativelanguage.googleapis.com/v1beta'
17
+ @config.gemini_api_base || 'https://generativelanguage.googleapis.com/v1beta'
17
18
  end
18
19
 
19
20
  def headers
@@ -10,6 +10,7 @@ module RubyLLM
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
13
14
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
14
15
  return content unless content.is_a?(Content)
15
16
 
@@ -10,6 +10,7 @@ module RubyLLM
10
10
  module_function
11
11
 
12
12
  def format_content(content)
13
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
13
14
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
14
15
  return content unless content.is_a?(Content)
15
16
 
@@ -47,12 +47,17 @@ module RubyLLM
47
47
  message_data = data.dig('choices', 0, 'message')
48
48
  return unless message_data
49
49
 
50
+ usage = data['usage'] || {}
51
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
52
+
50
53
  Message.new(
51
54
  role: :assistant,
52
55
  content: message_data['content'],
53
56
  tool_calls: parse_tool_calls(message_data['tool_calls']),
54
- input_tokens: data['usage']['prompt_tokens'],
55
- output_tokens: data['usage']['completion_tokens'],
57
+ input_tokens: usage['prompt_tokens'],
58
+ output_tokens: usage['completion_tokens'],
59
+ cached_tokens: cached_tokens,
60
+ cache_creation_tokens: 0,
56
61
  model_id: data['model'],
57
62
  raw: response
58
63
  )
@@ -7,7 +7,8 @@ module RubyLLM
7
7
  module Media
8
8
  module_function
9
9
 
10
- def format_content(content)
10
+ def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
11
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
11
12
  return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
12
13
  return content unless content.is_a?(Content)
13
14
 
@@ -12,13 +12,18 @@ module RubyLLM
12
12
  end
13
13
 
14
14
  def build_chunk(data)
15
+ usage = data['usage'] || {}
16
+ cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
17
+
15
18
  Chunk.new(
16
19
  role: :assistant,
17
20
  model_id: data['model'],
18
21
  content: data.dig('choices', 0, 'delta', 'content'),
19
22
  tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
20
- input_tokens: data.dig('usage', 'prompt_tokens'),
21
- output_tokens: data.dig('usage', 'completion_tokens')
23
+ input_tokens: usage['prompt_tokens'],
24
+ output_tokens: usage['completion_tokens'],
25
+ cached_tokens: cached_tokens,
26
+ cache_creation_tokens: 0
22
27
  )
23
28
  end
24
29
 
@@ -7,19 +7,39 @@ module RubyLLM
7
7
  module Tools
8
8
  module_function
9
9
 
10
+ EMPTY_PARAMETERS_SCHEMA = {
11
+ 'type' => 'object',
12
+ 'properties' => {},
13
+ 'required' => [],
14
+ 'additionalProperties' => false,
15
+ 'strict' => true
16
+ }.freeze
17
+
18
+ def parameters_schema_for(tool)
19
+ tool.params_schema ||
20
+ schema_from_parameters(tool.parameters)
21
+ end
22
+
23
+ def schema_from_parameters(parameters)
24
+ schema_definition = RubyLLM::Tool::SchemaDefinition.from_parameters(parameters)
25
+ schema_definition&.json_schema || EMPTY_PARAMETERS_SCHEMA
26
+ end
27
+
10
28
  def tool_for(tool)
11
- {
29
+ parameters_schema = parameters_schema_for(tool)
30
+
31
+ definition = {
12
32
  type: 'function',
13
33
  function: {
14
34
  name: tool.name,
15
35
  description: tool.description,
16
- parameters: {
17
- type: 'object',
18
- properties: tool.parameters.transform_values { |param| param_schema(param) },
19
- required: tool.parameters.select { |_, p| p.required }.keys
20
- }
36
+ parameters: parameters_schema
21
37
  }
22
38
  }
39
+
40
+ return definition if tool.provider_params.empty?
41
+
42
+ RubyLLM::Utils.deep_merge(definition, tool.provider_params)
23
43
  end
24
44
 
25
45
  def param_schema(param)
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class OpenAI
6
+ # Audio transcription methods for the OpenAI API integration
7
+ module Transcription
8
+ module_function
9
+
10
+ def transcription_url
11
+ 'audio/transcriptions'
12
+ end
13
+
14
+ def render_transcription_payload(file_part, model:, language:, **options)
15
+ {
16
+ model: model,
17
+ file: file_part,
18
+ language: language,
19
+ chunking_strategy: (options[:chunking_strategy] || 'auto' if supports_chunking_strategy?(model, options)),
20
+ response_format: response_format_for(model, options),
21
+ prompt: options[:prompt],
22
+ temperature: options[:temperature],
23
+ timestamp_granularities: options[:timestamp_granularities],
24
+ known_speaker_names: options[:speaker_names],
25
+ known_speaker_references: encode_speaker_references(options[:speaker_references])
26
+ }.compact
27
+ end
28
+
29
+ def encode_speaker_references(references)
30
+ return nil unless references
31
+
32
+ references.map do |ref|
33
+ Attachment.new(ref).for_llm
34
+ end
35
+ end
36
+
37
+ def response_format_for(model, options)
38
+ return options[:response_format] if options.key?(:response_format)
39
+
40
+ 'diarized_json' if model.include?('diarize')
41
+ end
42
+
43
+ def supports_chunking_strategy?(model, options)
44
+ return false if model.start_with?('whisper')
45
+ return true if options.key?(:chunking_strategy)
46
+
47
+ model.include?('diarize')
48
+ end
49
+
50
+ def parse_transcription_response(response, model:)
51
+ data = response.body
52
+
53
+ return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)
54
+
55
+ usage = data['usage'] || {}
56
+
57
+ RubyLLM::Transcription.new(
58
+ text: data['text'],
59
+ model: model,
60
+ language: data['language'],
61
+ duration: data['duration'],
62
+ segments: data['segments'],
63
+ input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
64
+ output_tokens: usage['output_tokens'] || usage['completion_tokens']
65
+ )
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -12,6 +12,7 @@ module RubyLLM
12
12
  include OpenAI::Tools
13
13
  include OpenAI::Images
14
14
  include OpenAI::Media
15
+ include OpenAI::Transcription
15
16
 
16
17
  def api_base
17
18
  @config.openai_api_base || 'https://api.openai.com/v1'
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ class VertexAI
6
+ # Vertex AI specific helpers for audio transcription
7
+ module Transcription
8
+ private
9
+
10
+ def transcription_url(model)
11
+ "projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -8,6 +8,7 @@ module RubyLLM
8
8
  include VertexAI::Streaming
9
9
  include VertexAI::Embeddings
10
10
  include VertexAI::Models
11
+ include VertexAI::Transcription
11
12
 
12
13
  def initialize(config)
13
14
  super
@@ -37,6 +38,8 @@ module RubyLLM
37
38
 
38
39
  initialize_authorizer unless @authorizer
39
40
  @authorizer.fetch_access_token!['access_token']
41
+ rescue Google::Auth::AuthorizationError => e
42
+ raise UnauthorizedError.new(nil, "Invalid Google Cloud credentials for Vertex AI: #{e.message}")
40
43
  end
41
44
 
42
45
  def initialize_authorizer
@@ -8,8 +8,10 @@ module RubyLLM
8
8
  def initialize
9
9
  @content = +''
10
10
  @tool_calls = {}
11
- @input_tokens = 0
12
- @output_tokens = 0
11
+ @input_tokens = nil
12
+ @output_tokens = nil
13
+ @cached_tokens = nil
14
+ @cache_creation_tokens = nil
13
15
  @latest_tool_call_id = nil
14
16
  end
15
17
 
@@ -35,8 +37,10 @@ module RubyLLM
35
37
  model_id: model_id,
36
38
  conversation_id: conversation_id,
37
39
  tool_calls: tool_calls_from_stream,
38
- input_tokens: @input_tokens.positive? ? @input_tokens : nil,
39
- output_tokens: @output_tokens.positive? ? @output_tokens : nil,
40
+ input_tokens: @input_tokens,
41
+ output_tokens: @output_tokens,
42
+ cached_tokens: @cached_tokens,
43
+ cache_creation_tokens: @cache_creation_tokens,
40
44
  raw: response
41
45
  )
42
46
  end
@@ -92,6 +96,8 @@ module RubyLLM
92
96
  def count_tokens(chunk)
93
97
  @input_tokens = chunk.input_tokens if chunk.input_tokens
94
98
  @output_tokens = chunk.output_tokens if chunk.output_tokens
99
+ @cached_tokens = chunk.cached_tokens if chunk.cached_tokens
100
+ @cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
95
101
  end
96
102
  end
97
103
  end