dify_llm 1.8.2 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -3
  3. data/lib/generators/ruby_llm/generator_helpers.rb +31 -10
  4. data/lib/generators/ruby_llm/install/templates/create_messages_migration.rb.tt +3 -0
  5. data/lib/generators/ruby_llm/install/templates/create_models_migration.rb.tt +5 -0
  6. data/lib/generators/ruby_llm/install/templates/create_tool_calls_migration.rb.tt +7 -1
  7. data/lib/generators/ruby_llm/upgrade_to_v1_7/upgrade_to_v1_7_generator.rb +1 -1
  8. data/lib/generators/ruby_llm/upgrade_to_v1_9/templates/add_v1_9_message_columns.rb.tt +15 -0
  9. data/lib/generators/ruby_llm/upgrade_to_v1_9/upgrade_to_v1_9_generator.rb +49 -0
  10. data/lib/ruby_llm/active_record/acts_as.rb +22 -24
  11. data/lib/ruby_llm/active_record/chat_methods.rb +41 -13
  12. data/lib/ruby_llm/active_record/message_methods.rb +11 -2
  13. data/lib/ruby_llm/active_record/model_methods.rb +1 -1
  14. data/lib/ruby_llm/aliases.json +61 -32
  15. data/lib/ruby_llm/attachment.rb +44 -13
  16. data/lib/ruby_llm/chat.rb +13 -2
  17. data/lib/ruby_llm/configuration.rb +6 -1
  18. data/lib/ruby_llm/connection.rb +3 -3
  19. data/lib/ruby_llm/content.rb +23 -0
  20. data/lib/ruby_llm/message.rb +11 -6
  21. data/lib/ruby_llm/model/info.rb +4 -0
  22. data/lib/ruby_llm/models.json +9649 -8211
  23. data/lib/ruby_llm/models.rb +14 -22
  24. data/lib/ruby_llm/provider.rb +23 -1
  25. data/lib/ruby_llm/providers/anthropic/chat.rb +22 -3
  26. data/lib/ruby_llm/providers/anthropic/content.rb +44 -0
  27. data/lib/ruby_llm/providers/anthropic/media.rb +3 -2
  28. data/lib/ruby_llm/providers/anthropic/models.rb +15 -0
  29. data/lib/ruby_llm/providers/anthropic/streaming.rb +2 -0
  30. data/lib/ruby_llm/providers/anthropic/tools.rb +20 -18
  31. data/lib/ruby_llm/providers/bedrock/media.rb +2 -1
  32. data/lib/ruby_llm/providers/bedrock/streaming/content_extraction.rb +15 -0
  33. data/lib/ruby_llm/providers/bedrock/streaming/payload_processing.rb +2 -0
  34. data/lib/ruby_llm/providers/dify/chat.rb +16 -5
  35. data/lib/ruby_llm/providers/gemini/chat.rb +352 -69
  36. data/lib/ruby_llm/providers/gemini/media.rb +59 -1
  37. data/lib/ruby_llm/providers/gemini/tools.rb +146 -25
  38. data/lib/ruby_llm/providers/gemini/transcription.rb +116 -0
  39. data/lib/ruby_llm/providers/gemini.rb +2 -1
  40. data/lib/ruby_llm/providers/gpustack/media.rb +1 -0
  41. data/lib/ruby_llm/providers/ollama/media.rb +1 -0
  42. data/lib/ruby_llm/providers/openai/chat.rb +7 -2
  43. data/lib/ruby_llm/providers/openai/media.rb +2 -1
  44. data/lib/ruby_llm/providers/openai/streaming.rb +7 -2
  45. data/lib/ruby_llm/providers/openai/tools.rb +26 -6
  46. data/lib/ruby_llm/providers/openai/transcription.rb +70 -0
  47. data/lib/ruby_llm/providers/openai.rb +1 -0
  48. data/lib/ruby_llm/providers/vertexai/transcription.rb +16 -0
  49. data/lib/ruby_llm/providers/vertexai.rb +11 -11
  50. data/lib/ruby_llm/railtie.rb +24 -22
  51. data/lib/ruby_llm/stream_accumulator.rb +10 -4
  52. data/lib/ruby_llm/tool.rb +126 -0
  53. data/lib/ruby_llm/transcription.rb +35 -0
  54. data/lib/ruby_llm/utils.rb +46 -0
  55. data/lib/ruby_llm/version.rb +1 -1
  56. data/lib/ruby_llm.rb +7 -0
  57. metadata +24 -3
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'set'
4
+ require 'rubygems/version'
5
+
3
6
  module RubyLLM
4
7
  module Providers
5
8
  class Gemini
@@ -20,10 +23,7 @@ module RubyLLM
20
23
 
21
24
  payload[:generationConfig][:temperature] = temperature unless temperature.nil?
22
25
 
23
- if schema
24
- payload[:generationConfig][:responseMimeType] = 'application/json'
25
- payload[:generationConfig][:responseSchema] = convert_schema_to_gemini(schema)
26
- end
26
+ payload[:generationConfig].merge!(structured_output_config(schema, model)) if schema
27
27
 
28
28
  payload[:tools] = format_tools(tools) if tools.any?
29
29
  payload
@@ -32,40 +32,29 @@ module RubyLLM
32
32
  private
33
33
 
34
34
  def format_messages(messages)
35
- messages.map do |msg|
36
- {
37
- role: format_role(msg.role),
38
- parts: format_parts(msg)
39
- }
40
- end
35
+ formatter = MessageFormatter.new(
36
+ messages,
37
+ format_role: method(:format_role),
38
+ format_parts: method(:format_parts),
39
+ format_tool_result: method(:format_tool_result)
40
+ )
41
+ formatter.format
41
42
  end
42
43
 
43
44
  def format_role(role)
44
45
  case role
45
46
  when :assistant then 'model'
46
- when :system, :tool then 'user'
47
+ when :system then 'user'
48
+ when :tool then 'function'
47
49
  else role.to_s
48
50
  end
49
51
  end
50
52
 
51
53
  def format_parts(msg)
52
54
  if msg.tool_call?
53
- [{
54
- functionCall: {
55
- name: msg.tool_calls.values.first.name,
56
- args: msg.tool_calls.values.first.arguments
57
- }
58
- }]
55
+ format_tool_call(msg)
59
56
  elsif msg.tool_result?
60
- [{
61
- functionResponse: {
62
- name: msg.tool_call_id,
63
- response: {
64
- name: msg.tool_call_id,
65
- content: Media.format_content(msg.content)
66
- }
67
- }
68
- }]
57
+ format_tool_result(msg)
69
58
  else
70
59
  Media.format_content(msg.content)
71
60
  end
@@ -77,7 +66,7 @@ module RubyLLM
77
66
 
78
67
  Message.new(
79
68
  role: :assistant,
80
- content: extract_content(data),
69
+ content: parse_content(data),
81
70
  tool_calls: tool_calls,
82
71
  input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
83
72
  output_tokens: calculate_output_tokens(data),
@@ -89,23 +78,19 @@ module RubyLLM
89
78
  def convert_schema_to_gemini(schema)
90
79
  return nil unless schema
91
80
 
92
- build_base_schema(schema).tap do |result|
93
- result[:description] = schema[:description] if schema[:description]
94
- apply_type_specific_attributes(result, schema)
95
- end
81
+ GeminiSchema.new(schema).to_h
96
82
  end
97
83
 
98
- def extract_content(data)
84
+ def parse_content(data)
99
85
  candidate = data.dig('candidates', 0)
100
86
  return '' unless candidate
101
87
 
102
88
  return '' if function_call?(candidate)
103
89
 
104
90
  parts = candidate.dig('content', 'parts')
105
- text_parts = parts&.select { |p| p['text'] }
106
- return '' unless text_parts&.any?
91
+ return '' unless parts&.any?
107
92
 
108
- text_parts.map { |p| p['text'] }.join
93
+ build_response_content(parts)
109
94
  end
110
95
 
111
96
  def function_call?(candidate)
@@ -119,50 +104,348 @@ module RubyLLM
119
104
  candidates + thoughts
120
105
  end
121
106
 
122
- def build_base_schema(schema)
123
- case schema[:type]
124
- when 'object'
125
- build_object_schema(schema)
126
- when 'array'
127
- { type: 'ARRAY', items: schema[:items] ? convert_schema_to_gemini(schema[:items]) : { type: 'STRING' } }
128
- when 'number'
129
- { type: 'NUMBER' }
130
- when 'integer'
131
- { type: 'INTEGER' }
132
- when 'boolean'
133
- { type: 'BOOLEAN' }
134
- else
135
- { type: 'STRING' }
107
+ def response_json_schema_supported?(model)
108
+ version = gemini_version(model)
109
+ version && version >= Gem::Version.new('2.5')
110
+ end
111
+
112
+ def build_json_schema(schema)
113
+ normalized = RubyLLM::Utils.deep_dup(schema)
114
+ normalized.delete(:strict)
115
+ normalized.delete('strict')
116
+ RubyLLM::Utils.deep_stringify_keys(normalized)
117
+ end
118
+
119
+ def gemini_version(model)
120
+ return nil unless model
121
+
122
+ candidates = [
123
+ safe_string(model.id),
124
+ safe_string(model.respond_to?(:family) ? model.family : nil),
125
+ safe_string(model_metadata_value(model, :version)),
126
+ safe_string(model_metadata_value(model, 'version')),
127
+ safe_string(model_metadata_value(model, :description))
128
+ ].compact
129
+
130
+ candidates.each do |candidate|
131
+ version = extract_version(candidate)
132
+ return version if version
136
133
  end
134
+
135
+ nil
137
136
  end
138
137
 
139
- def build_object_schema(schema)
138
+ def model_metadata_value(model, key)
139
+ return unless model.respond_to?(:metadata)
140
+
141
+ metadata = model.metadata
142
+ return unless metadata.is_a?(Hash)
143
+
144
+ metadata[key] || metadata[key.to_s]
145
+ end
146
+
147
+ def safe_string(value)
148
+ value&.to_s
149
+ end
150
+
151
+ def extract_version(text)
152
+ return nil unless text
153
+
154
+ match = text.match(/(\d+\.\d+|\d+)/)
155
+ return nil unless match
156
+
157
+ Gem::Version.new(match[1])
158
+ rescue ArgumentError
159
+ nil
160
+ end
161
+
162
+ def structured_output_config(schema, model)
140
163
  {
141
- type: 'OBJECT',
142
- properties: (schema[:properties] || {}).transform_values { |prop| convert_schema_to_gemini(prop) },
143
- required: schema[:required] || []
144
- }.tap do |object|
145
- object[:propertyOrdering] = schema[:propertyOrdering] if schema[:propertyOrdering]
146
- object[:nullable] = schema[:nullable] if schema.key?(:nullable)
164
+ responseMimeType: 'application/json'
165
+ }.tap do |config|
166
+ if response_json_schema_supported?(model)
167
+ config[:responseJsonSchema] = build_json_schema(schema)
168
+ else
169
+ config[:responseSchema] = convert_schema_to_gemini(schema)
170
+ end
147
171
  end
148
172
  end
149
173
 
150
- def apply_type_specific_attributes(result, schema)
151
- case schema[:type]
152
- when 'string'
153
- copy_attributes(result, schema, :enum, :format, :nullable)
154
- when 'number', 'integer'
155
- copy_attributes(result, schema, :format, :minimum, :maximum, :enum, :nullable)
156
- when 'array'
157
- copy_attributes(result, schema, :minItems, :maxItems, :nullable)
158
- when 'boolean'
159
- copy_attributes(result, schema, :nullable)
174
+ # formats a message
175
+ class MessageFormatter
176
+ def initialize(messages, format_role:, format_parts:, format_tool_result:)
177
+ @messages = messages
178
+ @index = 0
179
+ @tool_call_names = {}
180
+ @format_role = format_role
181
+ @format_parts = format_parts
182
+ @format_tool_result = format_tool_result
183
+ end
184
+
185
+ def format
186
+ formatted = []
187
+
188
+ while current_message
189
+ if tool_message?(current_message)
190
+ tool_parts, next_index = collect_tool_parts
191
+ formatted << build_tool_response(tool_parts)
192
+ @index = next_index
193
+ else
194
+ remember_tool_calls if current_message.tool_call?
195
+ formatted << build_standard_message(current_message)
196
+ @index += 1
197
+ end
198
+ end
199
+
200
+ formatted
201
+ end
202
+
203
+ private
204
+
205
+ def current_message
206
+ @messages[@index]
207
+ end
208
+
209
+ def tool_message?(message)
210
+ message&.role == :tool
211
+ end
212
+
213
+ def collect_tool_parts
214
+ parts = []
215
+ index = @index
216
+
217
+ while tool_message?(@messages[index])
218
+ tool_message = @messages[index]
219
+ tool_name = @tool_call_names.delete(tool_message.tool_call_id)
220
+ parts.concat(format_tool_result(tool_message, tool_name))
221
+ index += 1
222
+ end
223
+
224
+ [parts, index]
225
+ end
226
+
227
+ def build_tool_response(parts)
228
+ { role: 'function', parts: parts }
229
+ end
230
+
231
+ def remember_tool_calls
232
+ current_message.tool_calls.each do |tool_call_id, tool_call|
233
+ @tool_call_names[tool_call_id] = tool_call.name
234
+ end
235
+ end
236
+
237
+ def build_standard_message(message)
238
+ {
239
+ role: @format_role.call(message.role),
240
+ parts: @format_parts.call(message)
241
+ }
242
+ end
243
+
244
+ def format_tool_result(message, tool_name)
245
+ @format_tool_result.call(message, tool_name)
160
246
  end
161
247
  end
162
248
 
163
- def copy_attributes(target, source, *attributes)
164
- attributes.each do |attr|
165
- target[attr] = source[attr] if attr == :nullable ? source.key?(attr) : source[attr]
249
+ # converts json schema to gemini
250
+ class GeminiSchema
251
+ def initialize(schema)
252
+ @raw_schema = RubyLLM::Utils.deep_dup(schema)
253
+ @definitions = {}
254
+ end
255
+
256
+ def to_h
257
+ return nil unless @raw_schema
258
+
259
+ symbolized = symbolize_and_extract_definitions(@raw_schema)
260
+ convert(symbolized, Set.new)
261
+ end
262
+
263
+ private
264
+
265
+ attr_reader :definitions
266
+
267
+ def symbolize_and_extract_definitions(value)
268
+ case value
269
+ when Hash
270
+ value.each_with_object({}) do |(key, val), hash|
271
+ key_sym = begin
272
+ key.to_sym
273
+ rescue StandardError
274
+ key
275
+ end
276
+
277
+ if definition_key?(key_sym)
278
+ merge_definitions(val)
279
+ else
280
+ hash[key_sym] = symbolize_and_extract_definitions(val)
281
+ end
282
+ end
283
+ when Array
284
+ value.map { |item| symbolize_and_extract_definitions(item) }
285
+ else
286
+ value
287
+ end
288
+ end
289
+
290
+ def definition_key?(key)
291
+ %i[$defs definitions].include?(key)
292
+ end
293
+
294
+ def merge_definitions(raw_defs)
295
+ return unless raw_defs
296
+
297
+ symbolized = symbolize_and_extract_definitions(raw_defs)
298
+ @definitions = if definitions.empty?
299
+ symbolized
300
+ else
301
+ RubyLLM::Utils.deep_merge(definitions, symbolized)
302
+ end
303
+ end
304
+
305
+ def convert(schema, visited_refs)
306
+ return default_string_schema unless schema.is_a?(Hash)
307
+
308
+ schema = strip_unsupported_keys(schema)
309
+
310
+ if schema[:$ref]
311
+ resolved = resolve_reference(schema, visited_refs)
312
+ return resolved if resolved
313
+ end
314
+
315
+ schema = normalize_any_of(schema)
316
+
317
+ result = case schema[:type].to_s
318
+ when 'object'
319
+ build_object(schema, visited_refs)
320
+ when 'array'
321
+ build_array(schema, visited_refs)
322
+ when 'number'
323
+ build_scalar('NUMBER', schema, %i[format minimum maximum enum nullable multipleOf])
324
+ when 'integer'
325
+ build_scalar('INTEGER', schema, %i[format minimum maximum enum nullable multipleOf])
326
+ when 'boolean'
327
+ build_scalar('BOOLEAN', schema, %i[nullable])
328
+ else
329
+ build_scalar('STRING', schema, %i[enum format nullable])
330
+ end
331
+
332
+ apply_description(result, schema)
333
+ result
334
+ end
335
+
336
+ def strip_unsupported_keys(schema)
337
+ schema.dup.tap do |copy|
338
+ copy.delete(:strict)
339
+ copy.delete(:additionalProperties)
340
+ end
341
+ end
342
+
343
+ def resolve_reference(schema, visited_refs)
344
+ ref = schema[:$ref]
345
+ return unless ref
346
+ return if visited_refs.include?(ref)
347
+
348
+ referenced = lookup_definition(ref)
349
+ return unless referenced
350
+
351
+ overrides = schema.except(:$ref)
352
+ visited_refs.add(ref)
353
+ merged = RubyLLM::Utils.deep_merge(referenced, overrides)
354
+ convert(merged, visited_refs)
355
+ ensure
356
+ visited_refs.delete(ref)
357
+ end
358
+
359
+ def lookup_definition(ref) # rubocop:disable Metrics/PerceivedComplexity
360
+ segments = ref.to_s.split('/').reject(&:empty?)
361
+ return nil if segments.empty?
362
+
363
+ segments.shift if segments.first == '#'
364
+ segments.shift if %w[$defs definitions].include?(segments.first)
365
+
366
+ current = definitions
367
+
368
+ segments.each do |segment|
369
+ break current = nil unless current.is_a?(Hash)
370
+
371
+ key = begin
372
+ segment.to_sym
373
+ rescue StandardError
374
+ segment
375
+ end
376
+ current = current[key]
377
+ end
378
+
379
+ current ? RubyLLM::Utils.deep_dup(current) : nil
380
+ end
381
+
382
+ def normalize_any_of(schema)
383
+ any_of = schema[:anyOf]
384
+ return schema unless any_of
385
+
386
+ options = Array(any_of).map { |option| RubyLLM::Utils.deep_symbolize_keys(option) }
387
+ nullables, non_null = options.partition { |option| schema_type(option) == 'null' }
388
+
389
+ base = RubyLLM::Utils.deep_symbolize_keys(non_null.first || { type: 'string' })
390
+ base[:nullable] = true if nullables.any?
391
+
392
+ without_any_of = schema.each_with_object({}) do |(key, value), result|
393
+ result[key] = value unless key == :anyOf
394
+ end
395
+
396
+ without_any_of.merge(base)
397
+ end
398
+
399
+ def schema_type(option)
400
+ (option[:type] || option['type']).to_s.downcase
401
+ end
402
+
403
+ def build_object(schema, visited_refs)
404
+ properties = schema.fetch(:properties, {}).transform_values do |child|
405
+ convert(child, visited_refs)
406
+ end
407
+
408
+ {
409
+ type: 'OBJECT',
410
+ properties: properties
411
+ }.tap do |object|
412
+ required = Array(schema[:required]).map(&:to_s).uniq
413
+ object[:required] = required if required.any?
414
+ object[:propertyOrdering] = schema[:propertyOrdering] if schema[:propertyOrdering]
415
+ copy_attribute(object, schema, :nullable)
416
+ end
417
+ end
418
+
419
+ def build_array(schema, visited_refs)
420
+ items_schema = schema[:items] ? convert(schema[:items], visited_refs) : default_string_schema
421
+
422
+ {
423
+ type: 'ARRAY',
424
+ items: items_schema
425
+ }.tap do |array|
426
+ copy_attribute(array, schema, :minItems)
427
+ copy_attribute(array, schema, :maxItems)
428
+ copy_attribute(array, schema, :nullable)
429
+ end
430
+ end
431
+
432
+ def build_scalar(type, schema, allowed_keys)
433
+ { type: type }.tap do |result|
434
+ allowed_keys.each { |key| copy_attribute(result, schema, key) }
435
+ end
436
+ end
437
+
438
+ def apply_description(target, schema)
439
+ description = schema[:description]
440
+ target[:description] = description if description
441
+ end
442
+
443
+ def copy_attribute(target, source, key)
444
+ target[key] = source[key] if source.key?(key)
445
+ end
446
+
447
+ def default_string_schema
448
+ { type: 'STRING' }
166
449
  end
167
450
  end
168
451
  end
@@ -2,12 +2,13 @@
2
2
 
3
3
  module RubyLLM
4
4
  module Providers
5
- class Gemini
5
+ class Gemini # rubocop:disable Style/Documentation
6
6
  # Media handling methods for the Gemini API integration
7
7
  module Media
8
8
  module_function
9
9
 
10
10
  def format_content(content)
11
+ return content.value if content.is_a?(RubyLLM::Content::Raw)
11
12
  return [format_text(content.to_json)] if content.is_a?(Hash) || content.is_a?(Array)
12
13
  return [format_text(content)] unless content.is_a?(Content)
13
14
 
@@ -49,6 +50,63 @@ module RubyLLM
49
50
  }
50
51
  end
51
52
  end
53
+
54
+ def build_response_content(parts) # rubocop:disable Metrics/PerceivedComplexity
55
+ text = []
56
+ attachments = []
57
+
58
+ parts.each_with_index do |part, index|
59
+ if part['text']
60
+ text << part['text']
61
+ elsif part['inlineData']
62
+ attachment = build_inline_attachment(part['inlineData'], index)
63
+ attachments << attachment if attachment
64
+ elsif part['fileData']
65
+ attachment = build_file_attachment(part['fileData'], index)
66
+ attachments << attachment if attachment
67
+ end
68
+ end
69
+
70
+ text = text.join
71
+ text = nil if text.empty?
72
+ return text if attachments.empty?
73
+
74
+ Content.new(text:, attachments:)
75
+ end
76
+
77
+ def build_inline_attachment(inline_data, index)
78
+ encoded = inline_data['data']
79
+ return unless encoded
80
+
81
+ mime_type = inline_data['mimeType']
82
+ decoded = Base64.decode64(encoded)
83
+ io = StringIO.new(decoded)
84
+ io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
85
+
86
+ filename = attachment_filename(mime_type, index)
87
+ RubyLLM::Attachment.new(io, filename:)
88
+ rescue ArgumentError => e
89
+ RubyLLM.logger.warn "Failed to decode Gemini inline data attachment: #{e.message}"
90
+ nil
91
+ end
92
+
93
+ def build_file_attachment(file_data, index)
94
+ uri = file_data['fileUri']
95
+ return unless uri
96
+
97
+ filename = file_data['filename'] || attachment_filename(file_data['mimeType'], index)
98
+ RubyLLM::Attachment.new(uri, filename:)
99
+ end
100
+
101
+ def attachment_filename(mime_type, index)
102
+ return "gemini_attachment_#{index + 1}" unless mime_type
103
+
104
+ extension = mime_type.split('/').last.to_s
105
+ extension = 'jpg' if extension == 'jpeg'
106
+ extension = 'txt' if extension == 'plain'
107
+ extension = extension.tr('+', '.')
108
+ "gemini_attachment_#{index + 1}.#{extension}"
109
+ end
52
110
  end
53
111
  end
54
112
  end