dify_llm 1.8.1 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -7
- data/lib/generators/ruby_llm/chat_ui/chat_ui_generator.rb +117 -69
- data/lib/generators/ruby_llm/chat_ui/templates/controllers/chats_controller.rb.tt +12 -12
- data/lib/generators/ruby_llm/chat_ui/templates/controllers/messages_controller.rb.tt +7 -7
- data/lib/generators/ruby_llm/chat_ui/templates/controllers/models_controller.rb.tt +4 -4
- data/lib/generators/ruby_llm/chat_ui/templates/jobs/chat_response_job.rb.tt +6 -6
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_chat.html.erb.tt +4 -4
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/_form.html.erb.tt +5 -5
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/index.html.erb.tt +5 -5
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/new.html.erb.tt +4 -4
- data/lib/generators/ruby_llm/chat_ui/templates/views/chats/show.html.erb.tt +8 -8
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_form.html.erb.tt +5 -5
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_message.html.erb.tt +9 -6
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/_tool_calls.html.erb.tt +7 -0
- data/lib/generators/ruby_llm/chat_ui/templates/views/messages/create.turbo_stream.erb.tt +5 -5
- data/lib/generators/ruby_llm/chat_ui/templates/views/models/_model.html.erb.tt +9 -9
- data/lib/generators/ruby_llm/chat_ui/templates/views/models/index.html.erb.tt +4 -6
- data/lib/generators/ruby_llm/chat_ui/templates/views/models/show.html.erb.tt +11 -11
- data/lib/generators/ruby_llm/generator_helpers.rb +131 -87
- data/lib/generators/ruby_llm/install/install_generator.rb +75 -79
- data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
- data/lib/generators/ruby_llm/install/templates/initializer.rb.tt +1 -1
- data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +88 -85
- data/lib/generators/ruby_llm/upgrade_to_v1_9/templates/add_v1_9_message_columns.rb.tt +15 -0
- data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +49 -0
- data/lib/ruby_llm/active_record/acts_as.rb +17 -8
- data/lib/ruby_llm/active_record/chat_methods.rb +41 -13
- data/lib/ruby_llm/active_record/message_methods.rb +11 -2
- data/lib/ruby_llm/active_record/model_methods.rb +1 -1
- data/lib/ruby_llm/aliases.json +62 -20
- data/lib/ruby_llm/attachment.rb +8 -0
- data/lib/ruby_llm/chat.rb +13 -2
- data/lib/ruby_llm/configuration.rb +6 -1
- data/lib/ruby_llm/connection.rb +4 -4
- data/lib/ruby_llm/content.rb +23 -0
- data/lib/ruby_llm/message.rb +11 -6
- data/lib/ruby_llm/model/info.rb +4 -0
- data/lib/ruby_llm/models.json +9410 -7793
- data/lib/ruby_llm/models.rb +14 -22
- data/lib/ruby_llm/provider.rb +23 -1
- data/lib/ruby_llm/providers/anthropic/chat.rb +22 -3
- data/lib/ruby_llm/providers/anthropic/content.rb +44 -0
- data/lib/ruby_llm/providers/anthropic/media.rb +2 -1
- data/lib/ruby_llm/providers/anthropic/models.rb +15 -0
- data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
- data/lib/ruby_llm/providers/anthropic/tools.rb +20 -18
- data/lib/ruby_llm/providers/bedrock/media.rb +2 -1
- data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +15 -0
- data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +2 -0
- data/lib/ruby_llm/providers/dify/chat.rb +16 -5
- data/lib/ruby_llm/providers/gemini/chat.rb +352 -69
- data/lib/ruby_llm/providers/gemini/media.rb +59 -1
- data/lib/ruby_llm/providers/gemini/tools.rb +146 -25
- data/lib/ruby_llm/providers/gemini/transcription.rb +116 -0
- data/lib/ruby_llm/providers/gemini.rb +2 -1
- data/lib/ruby_llm/providers/gpustack/media.rb +1 -0
- data/lib/ruby_llm/providers/ollama/media.rb +1 -0
- data/lib/ruby_llm/providers/openai/chat.rb +7 -2
- data/lib/ruby_llm/providers/openai/media.rb +2 -1
- data/lib/ruby_llm/providers/openai/streaming.rb +7 -2
- data/lib/ruby_llm/providers/openai/tools.rb +26 -6
- data/lib/ruby_llm/providers/openai/transcription.rb +70 -0
- data/lib/ruby_llm/providers/openai.rb +1 -0
- data/lib/ruby_llm/providers/vertexai/transcription.rb +16 -0
- data/lib/ruby_llm/providers/vertexai.rb +3 -0
- data/lib/ruby_llm/stream_accumulator.rb +10 -4
- data/lib/ruby_llm/tool.rb +126 -0
- data/lib/ruby_llm/transcription.rb +35 -0
- data/lib/ruby_llm/utils.rb +46 -0
- data/lib/ruby_llm/version.rb +1 -1
- data/lib/ruby_llm.rb +6 -0
- metadata +25 -3
|
@@ -13,7 +13,41 @@ module RubyLLM
|
|
|
13
13
|
}]
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def
|
|
16
|
+
def format_tool_call(msg)
|
|
17
|
+
parts = []
|
|
18
|
+
|
|
19
|
+
if msg.content && !(msg.content.respond_to?(:empty?) && msg.content.empty?)
|
|
20
|
+
formatted_content = Media.format_content(msg.content)
|
|
21
|
+
parts.concat(formatted_content.is_a?(Array) ? formatted_content : [formatted_content])
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
msg.tool_calls.each_value do |tool_call|
|
|
25
|
+
parts << {
|
|
26
|
+
functionCall: {
|
|
27
|
+
name: tool_call.name,
|
|
28
|
+
args: tool_call.arguments
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
parts
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def format_tool_result(msg, function_name = nil)
|
|
37
|
+
function_name ||= msg.tool_call_id
|
|
38
|
+
|
|
39
|
+
[{
|
|
40
|
+
functionResponse: {
|
|
41
|
+
name: function_name,
|
|
42
|
+
response: {
|
|
43
|
+
name: function_name,
|
|
44
|
+
content: Media.format_content(msg.content)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def extract_tool_calls(data) # rubocop:disable Metrics/PerceivedComplexity
|
|
17
51
|
return nil unless data
|
|
18
52
|
|
|
19
53
|
candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
|
|
@@ -22,49 +56,136 @@ module RubyLLM
|
|
|
22
56
|
parts = candidate.dig('content', 'parts')
|
|
23
57
|
return nil unless parts.is_a?(Array)
|
|
24
58
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
function_data = function_call_part['functionCall']
|
|
29
|
-
return nil unless function_data
|
|
59
|
+
tool_calls = parts.each_with_object({}) do |part, result|
|
|
60
|
+
function_data = part['functionCall']
|
|
61
|
+
next unless function_data
|
|
30
62
|
|
|
31
|
-
|
|
63
|
+
id = SecureRandom.uuid
|
|
32
64
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
id: id,
|
|
65
|
+
result[id] = ToolCall.new(
|
|
66
|
+
id:,
|
|
36
67
|
name: function_data['name'],
|
|
37
|
-
arguments: function_data['args']
|
|
68
|
+
arguments: function_data['args'] || {}
|
|
38
69
|
)
|
|
39
|
-
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
tool_calls.empty? ? nil : tool_calls
|
|
40
73
|
end
|
|
41
74
|
|
|
42
75
|
private
|
|
43
76
|
|
|
44
77
|
def function_declaration_for(tool)
|
|
45
|
-
|
|
78
|
+
parameters_schema = tool.params_schema ||
|
|
79
|
+
RubyLLM::Tool::SchemaDefinition.from_parameters(tool.parameters)&.json_schema
|
|
80
|
+
|
|
81
|
+
declaration = {
|
|
46
82
|
name: tool.name,
|
|
47
|
-
description: tool.description
|
|
48
|
-
|
|
49
|
-
|
|
83
|
+
description: tool.description
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
declaration[:parameters] = convert_tool_schema_to_gemini(parameters_schema) if parameters_schema
|
|
87
|
+
|
|
88
|
+
return declaration if tool.provider_params.empty?
|
|
89
|
+
|
|
90
|
+
RubyLLM::Utils.deep_merge(declaration, tool.provider_params)
|
|
50
91
|
end
|
|
51
92
|
|
|
52
|
-
def
|
|
93
|
+
def convert_tool_schema_to_gemini(schema)
|
|
94
|
+
return nil unless schema
|
|
95
|
+
|
|
96
|
+
schema = RubyLLM::Utils.deep_stringify_keys(schema)
|
|
97
|
+
|
|
98
|
+
raise ArgumentError, 'Gemini tool parameters must be objects' unless schema['type'] == 'object'
|
|
99
|
+
|
|
53
100
|
{
|
|
54
101
|
type: 'OBJECT',
|
|
55
|
-
properties:
|
|
56
|
-
|
|
57
|
-
type: param_type_for_gemini(param.type),
|
|
58
|
-
description: param.description
|
|
59
|
-
}.compact
|
|
60
|
-
end,
|
|
61
|
-
required: parameters.select { |_, p| p.required }.keys.map(&:to_s)
|
|
102
|
+
properties: schema.fetch('properties', {}).transform_values { |property| convert_property(property) },
|
|
103
|
+
required: (schema['required'] || []).map(&:to_s)
|
|
62
104
|
}
|
|
63
105
|
end
|
|
64
106
|
|
|
107
|
+
def convert_property(property_schema) # rubocop:disable Metrics/PerceivedComplexity
|
|
108
|
+
normalized_schema = normalize_any_of_schema(property_schema)
|
|
109
|
+
working_schema = normalized_schema || property_schema
|
|
110
|
+
|
|
111
|
+
type = param_type_for_gemini(working_schema['type'])
|
|
112
|
+
|
|
113
|
+
property = {
|
|
114
|
+
type: type
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
copy_common_attributes(property, property_schema)
|
|
118
|
+
copy_common_attributes(property, working_schema)
|
|
119
|
+
|
|
120
|
+
case type
|
|
121
|
+
when 'ARRAY'
|
|
122
|
+
items_schema = working_schema['items'] || property_schema['items'] || { 'type' => 'string' }
|
|
123
|
+
property[:items] = convert_property(items_schema)
|
|
124
|
+
copy_tool_attributes(property, working_schema, %w[minItems maxItems])
|
|
125
|
+
copy_tool_attributes(property, property_schema, %w[minItems maxItems])
|
|
126
|
+
when 'OBJECT'
|
|
127
|
+
nested_properties = working_schema.fetch('properties', {}).transform_values do |child|
|
|
128
|
+
convert_property(child)
|
|
129
|
+
end
|
|
130
|
+
property[:properties] = nested_properties
|
|
131
|
+
required = working_schema['required'] || property_schema['required']
|
|
132
|
+
property[:required] = required.map(&:to_s) if required
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
property
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def copy_common_attributes(target, source)
|
|
139
|
+
copy_tool_attributes(target, source, %w[description enum format nullable maximum minimum multipleOf])
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def copy_tool_attributes(target, source, attributes)
|
|
143
|
+
attributes.each do |attribute|
|
|
144
|
+
value = schema_value(source, attribute)
|
|
145
|
+
next if value.nil?
|
|
146
|
+
|
|
147
|
+
target[attribute.to_sym] = value
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def normalize_any_of_schema(schema) # rubocop:disable Metrics/PerceivedComplexity
|
|
152
|
+
any_of = schema['anyOf'] || schema[:anyOf]
|
|
153
|
+
return nil unless any_of.is_a?(Array) && any_of.any?
|
|
154
|
+
|
|
155
|
+
null_entries, non_null_entries = any_of.partition { |entry| schema_type(entry).to_s == 'null' }
|
|
156
|
+
|
|
157
|
+
if non_null_entries.size == 1 && null_entries.any?
|
|
158
|
+
normalized = RubyLLM::Utils.deep_dup(non_null_entries.first)
|
|
159
|
+
normalized['nullable'] = true
|
|
160
|
+
normalized
|
|
161
|
+
elsif non_null_entries.any?
|
|
162
|
+
RubyLLM::Utils.deep_dup(non_null_entries.first)
|
|
163
|
+
else
|
|
164
|
+
{ 'type' => 'string', 'nullable' => true }
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def schema_type(schema)
|
|
169
|
+
schema['type'] || schema[:type]
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def schema_value(source, attribute) # rubocop:disable Metrics/PerceivedComplexity
|
|
173
|
+
case attribute
|
|
174
|
+
when 'multipleOf'
|
|
175
|
+
source['multipleOf'] || source[:multipleOf] || source['multiple_of'] || source[:multiple_of]
|
|
176
|
+
when 'minItems'
|
|
177
|
+
source['minItems'] || source[:minItems] || source['min_items'] || source[:min_items]
|
|
178
|
+
when 'maxItems'
|
|
179
|
+
source['maxItems'] || source[:maxItems] || source['max_items'] || source[:max_items]
|
|
180
|
+
else
|
|
181
|
+
source[attribute] || source[attribute.to_sym]
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
65
185
|
def param_type_for_gemini(type)
|
|
66
186
|
case type.to_s.downcase
|
|
67
|
-
when 'integer'
|
|
187
|
+
when 'integer' then 'INTEGER'
|
|
188
|
+
when 'number', 'float', 'double' then 'NUMBER'
|
|
68
189
|
when 'boolean' then 'BOOLEAN'
|
|
69
190
|
when 'array' then 'ARRAY'
|
|
70
191
|
when 'object' then 'OBJECT'
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class Gemini
|
|
6
|
+
# Audio transcription helpers for the Gemini API implementation
|
|
7
|
+
module Transcription
|
|
8
|
+
DEFAULT_PROMPT = 'Transcribe the provided audio and respond with only the transcript text.'
|
|
9
|
+
|
|
10
|
+
def transcribe(audio_file, model:, language:, **options)
|
|
11
|
+
attachment = Attachment.new(audio_file)
|
|
12
|
+
payload = render_transcription_payload(attachment, language:, **options)
|
|
13
|
+
response = @connection.post(transcription_url(model), payload)
|
|
14
|
+
parse_transcription_response(response, model:)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def transcription_url(model)
|
|
20
|
+
"models/#{model}:generateContent"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def render_transcription_payload(attachment, language:, **options)
|
|
24
|
+
prompt = build_prompt(options[:prompt], language)
|
|
25
|
+
audio_part = format_audio_part(attachment)
|
|
26
|
+
|
|
27
|
+
raise UnsupportedAttachmentError, attachment.mime_type unless attachment.audio?
|
|
28
|
+
|
|
29
|
+
payload = {
|
|
30
|
+
contents: [
|
|
31
|
+
{
|
|
32
|
+
role: 'user',
|
|
33
|
+
parts: [
|
|
34
|
+
{ text: prompt },
|
|
35
|
+
audio_part
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
generation_config = build_generation_config(options)
|
|
42
|
+
payload[:generationConfig] = generation_config unless generation_config.empty?
|
|
43
|
+
payload[:safetySettings] = options[:safety_settings] if options[:safety_settings]
|
|
44
|
+
|
|
45
|
+
payload
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def build_generation_config(options)
|
|
49
|
+
config = {}
|
|
50
|
+
response_mime_type = options.fetch(:response_mime_type, 'text/plain')
|
|
51
|
+
|
|
52
|
+
config[:responseMimeType] = response_mime_type if response_mime_type
|
|
53
|
+
config[:temperature] = options[:temperature] if options.key?(:temperature)
|
|
54
|
+
config[:maxOutputTokens] = options[:max_output_tokens] if options[:max_output_tokens]
|
|
55
|
+
|
|
56
|
+
config
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def build_prompt(custom_prompt, language)
|
|
60
|
+
prompt = DEFAULT_PROMPT
|
|
61
|
+
prompt += " Respond in the #{language} language." if language
|
|
62
|
+
prompt += " #{custom_prompt}" if custom_prompt
|
|
63
|
+
prompt
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def format_audio_part(attachment)
|
|
67
|
+
{
|
|
68
|
+
inline_data: {
|
|
69
|
+
mime_type: attachment.mime_type,
|
|
70
|
+
data: attachment.encoded
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def parse_transcription_response(response, model:)
|
|
76
|
+
data = response.body
|
|
77
|
+
text = extract_text(data)
|
|
78
|
+
|
|
79
|
+
usage = extract_usage(data)
|
|
80
|
+
|
|
81
|
+
RubyLLM::Transcription.new(
|
|
82
|
+
text: text,
|
|
83
|
+
model: model,
|
|
84
|
+
input_tokens: usage[:input_tokens],
|
|
85
|
+
output_tokens: usage[:output_tokens]
|
|
86
|
+
)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def extract_text(data)
|
|
90
|
+
candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
|
|
91
|
+
return unless candidate
|
|
92
|
+
|
|
93
|
+
parts = candidate.dig('content', 'parts') || []
|
|
94
|
+
texts = parts.filter_map { |part| part['text'] }
|
|
95
|
+
texts.join if texts.any?
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def extract_usage(data)
|
|
99
|
+
metadata = data.is_a?(Hash) ? data['usageMetadata'] : nil
|
|
100
|
+
return { input_tokens: nil, output_tokens: nil } unless metadata
|
|
101
|
+
|
|
102
|
+
{
|
|
103
|
+
input_tokens: metadata['promptTokenCount'],
|
|
104
|
+
output_tokens: sum_output_tokens(metadata)
|
|
105
|
+
}
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def sum_output_tokens(metadata)
|
|
109
|
+
candidates = metadata['candidatesTokenCount'] || 0
|
|
110
|
+
thoughts = metadata['thoughtsTokenCount'] || 0
|
|
111
|
+
candidates + thoughts
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
@@ -8,12 +8,13 @@ module RubyLLM
|
|
|
8
8
|
include Gemini::Embeddings
|
|
9
9
|
include Gemini::Images
|
|
10
10
|
include Gemini::Models
|
|
11
|
+
include Gemini::Transcription
|
|
11
12
|
include Gemini::Streaming
|
|
12
13
|
include Gemini::Tools
|
|
13
14
|
include Gemini::Media
|
|
14
15
|
|
|
15
16
|
def api_base
|
|
16
|
-
'https://generativelanguage.googleapis.com/v1beta'
|
|
17
|
+
@config.gemini_api_base || 'https://generativelanguage.googleapis.com/v1beta'
|
|
17
18
|
end
|
|
18
19
|
|
|
19
20
|
def headers
|
|
@@ -47,12 +47,17 @@ module RubyLLM
|
|
|
47
47
|
message_data = data.dig('choices', 0, 'message')
|
|
48
48
|
return unless message_data
|
|
49
49
|
|
|
50
|
+
usage = data['usage'] || {}
|
|
51
|
+
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
52
|
+
|
|
50
53
|
Message.new(
|
|
51
54
|
role: :assistant,
|
|
52
55
|
content: message_data['content'],
|
|
53
56
|
tool_calls: parse_tool_calls(message_data['tool_calls']),
|
|
54
|
-
input_tokens:
|
|
55
|
-
output_tokens:
|
|
57
|
+
input_tokens: usage['prompt_tokens'],
|
|
58
|
+
output_tokens: usage['completion_tokens'],
|
|
59
|
+
cached_tokens: cached_tokens,
|
|
60
|
+
cache_creation_tokens: 0,
|
|
56
61
|
model_id: data['model'],
|
|
57
62
|
raw: response
|
|
58
63
|
)
|
|
@@ -7,7 +7,8 @@ module RubyLLM
|
|
|
7
7
|
module Media
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
-
def format_content(content)
|
|
10
|
+
def format_content(content) # rubocop:disable Metrics/PerceivedComplexity
|
|
11
|
+
return content.value if content.is_a?(RubyLLM::Content::Raw)
|
|
11
12
|
return content.to_json if content.is_a?(Hash) || content.is_a?(Array)
|
|
12
13
|
return content unless content.is_a?(Content)
|
|
13
14
|
|
|
@@ -12,13 +12,18 @@ module RubyLLM
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def build_chunk(data)
|
|
15
|
+
usage = data['usage'] || {}
|
|
16
|
+
cached_tokens = usage.dig('prompt_tokens_details', 'cached_tokens')
|
|
17
|
+
|
|
15
18
|
Chunk.new(
|
|
16
19
|
role: :assistant,
|
|
17
20
|
model_id: data['model'],
|
|
18
21
|
content: data.dig('choices', 0, 'delta', 'content'),
|
|
19
22
|
tool_calls: parse_tool_calls(data.dig('choices', 0, 'delta', 'tool_calls'), parse_arguments: false),
|
|
20
|
-
input_tokens:
|
|
21
|
-
output_tokens:
|
|
23
|
+
input_tokens: usage['prompt_tokens'],
|
|
24
|
+
output_tokens: usage['completion_tokens'],
|
|
25
|
+
cached_tokens: cached_tokens,
|
|
26
|
+
cache_creation_tokens: 0
|
|
22
27
|
)
|
|
23
28
|
end
|
|
24
29
|
|
|
@@ -7,19 +7,39 @@ module RubyLLM
|
|
|
7
7
|
module Tools
|
|
8
8
|
module_function
|
|
9
9
|
|
|
10
|
+
EMPTY_PARAMETERS_SCHEMA = {
|
|
11
|
+
'type' => 'object',
|
|
12
|
+
'properties' => {},
|
|
13
|
+
'required' => [],
|
|
14
|
+
'additionalProperties' => false,
|
|
15
|
+
'strict' => true
|
|
16
|
+
}.freeze
|
|
17
|
+
|
|
18
|
+
def parameters_schema_for(tool)
|
|
19
|
+
tool.params_schema ||
|
|
20
|
+
schema_from_parameters(tool.parameters)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def schema_from_parameters(parameters)
|
|
24
|
+
schema_definition = RubyLLM::Tool::SchemaDefinition.from_parameters(parameters)
|
|
25
|
+
schema_definition&.json_schema || EMPTY_PARAMETERS_SCHEMA
|
|
26
|
+
end
|
|
27
|
+
|
|
10
28
|
def tool_for(tool)
|
|
11
|
-
|
|
29
|
+
parameters_schema = parameters_schema_for(tool)
|
|
30
|
+
|
|
31
|
+
definition = {
|
|
12
32
|
type: 'function',
|
|
13
33
|
function: {
|
|
14
34
|
name: tool.name,
|
|
15
35
|
description: tool.description,
|
|
16
|
-
parameters:
|
|
17
|
-
type: 'object',
|
|
18
|
-
properties: tool.parameters.transform_values { |param| param_schema(param) },
|
|
19
|
-
required: tool.parameters.select { |_, p| p.required }.keys
|
|
20
|
-
}
|
|
36
|
+
parameters: parameters_schema
|
|
21
37
|
}
|
|
22
38
|
}
|
|
39
|
+
|
|
40
|
+
return definition if tool.provider_params.empty?
|
|
41
|
+
|
|
42
|
+
RubyLLM::Utils.deep_merge(definition, tool.provider_params)
|
|
23
43
|
end
|
|
24
44
|
|
|
25
45
|
def param_schema(param)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class OpenAI
|
|
6
|
+
# Audio transcription methods for the OpenAI API integration
|
|
7
|
+
module Transcription
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def transcription_url
|
|
11
|
+
'audio/transcriptions'
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def render_transcription_payload(file_part, model:, language:, **options)
|
|
15
|
+
{
|
|
16
|
+
model: model,
|
|
17
|
+
file: file_part,
|
|
18
|
+
language: language,
|
|
19
|
+
chunking_strategy: (options[:chunking_strategy] || 'auto' if supports_chunking_strategy?(model, options)),
|
|
20
|
+
response_format: response_format_for(model, options),
|
|
21
|
+
prompt: options[:prompt],
|
|
22
|
+
temperature: options[:temperature],
|
|
23
|
+
timestamp_granularities: options[:timestamp_granularities],
|
|
24
|
+
known_speaker_names: options[:speaker_names],
|
|
25
|
+
known_speaker_references: encode_speaker_references(options[:speaker_references])
|
|
26
|
+
}.compact
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def encode_speaker_references(references)
|
|
30
|
+
return nil unless references
|
|
31
|
+
|
|
32
|
+
references.map do |ref|
|
|
33
|
+
Attachment.new(ref).for_llm
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def response_format_for(model, options)
|
|
38
|
+
return options[:response_format] if options.key?(:response_format)
|
|
39
|
+
|
|
40
|
+
'diarized_json' if model.include?('diarize')
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def supports_chunking_strategy?(model, options)
|
|
44
|
+
return false if model.start_with?('whisper')
|
|
45
|
+
return true if options.key?(:chunking_strategy)
|
|
46
|
+
|
|
47
|
+
model.include?('diarize')
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def parse_transcription_response(response, model:)
|
|
51
|
+
data = response.body
|
|
52
|
+
|
|
53
|
+
return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)
|
|
54
|
+
|
|
55
|
+
usage = data['usage'] || {}
|
|
56
|
+
|
|
57
|
+
RubyLLM::Transcription.new(
|
|
58
|
+
text: data['text'],
|
|
59
|
+
model: model,
|
|
60
|
+
language: data['language'],
|
|
61
|
+
duration: data['duration'],
|
|
62
|
+
segments: data['segments'],
|
|
63
|
+
input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
|
|
64
|
+
output_tokens: usage['output_tokens'] || usage['completion_tokens']
|
|
65
|
+
)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module RubyLLM
|
|
4
|
+
module Providers
|
|
5
|
+
class VertexAI
|
|
6
|
+
# Vertex AI specific helpers for audio transcription
|
|
7
|
+
module Transcription
|
|
8
|
+
private
|
|
9
|
+
|
|
10
|
+
def transcription_url(model)
|
|
11
|
+
"projects/#{@config.vertexai_project_id}/locations/#{@config.vertexai_location}/publishers/google/models/#{model}:generateContent" # rubocop:disable Layout/LineLength
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -8,6 +8,7 @@ module RubyLLM
|
|
|
8
8
|
include VertexAI::Streaming
|
|
9
9
|
include VertexAI::Embeddings
|
|
10
10
|
include VertexAI::Models
|
|
11
|
+
include VertexAI::Transcription
|
|
11
12
|
|
|
12
13
|
def initialize(config)
|
|
13
14
|
super
|
|
@@ -37,6 +38,8 @@ module RubyLLM
|
|
|
37
38
|
|
|
38
39
|
initialize_authorizer unless @authorizer
|
|
39
40
|
@authorizer.fetch_access_token!['access_token']
|
|
41
|
+
rescue Google::Auth::AuthorizationError => e
|
|
42
|
+
raise UnauthorizedError.new(nil, "Invalid Google Cloud credentials for Vertex AI: #{e.message}")
|
|
40
43
|
end
|
|
41
44
|
|
|
42
45
|
def initialize_authorizer
|
|
@@ -8,8 +8,10 @@ module RubyLLM
|
|
|
8
8
|
def initialize
|
|
9
9
|
@content = +''
|
|
10
10
|
@tool_calls = {}
|
|
11
|
-
@input_tokens =
|
|
12
|
-
@output_tokens =
|
|
11
|
+
@input_tokens = nil
|
|
12
|
+
@output_tokens = nil
|
|
13
|
+
@cached_tokens = nil
|
|
14
|
+
@cache_creation_tokens = nil
|
|
13
15
|
@latest_tool_call_id = nil
|
|
14
16
|
end
|
|
15
17
|
|
|
@@ -35,8 +37,10 @@ module RubyLLM
|
|
|
35
37
|
model_id: model_id,
|
|
36
38
|
conversation_id: conversation_id,
|
|
37
39
|
tool_calls: tool_calls_from_stream,
|
|
38
|
-
input_tokens: @input_tokens
|
|
39
|
-
output_tokens: @output_tokens
|
|
40
|
+
input_tokens: @input_tokens,
|
|
41
|
+
output_tokens: @output_tokens,
|
|
42
|
+
cached_tokens: @cached_tokens,
|
|
43
|
+
cache_creation_tokens: @cache_creation_tokens,
|
|
40
44
|
raw: response
|
|
41
45
|
)
|
|
42
46
|
end
|
|
@@ -92,6 +96,8 @@ module RubyLLM
|
|
|
92
96
|
def count_tokens(chunk)
|
|
93
97
|
@input_tokens = chunk.input_tokens if chunk.input_tokens
|
|
94
98
|
@output_tokens = chunk.output_tokens if chunk.output_tokens
|
|
99
|
+
@cached_tokens = chunk.cached_tokens if chunk.cached_tokens
|
|
100
|
+
@cache_creation_tokens = chunk.cache_creation_tokens if chunk.cache_creation_tokens
|
|
95
101
|
end
|
|
96
102
|
end
|
|
97
103
|
end
|