ruby_llm 0.1.0.pre40 → 0.1.0.pre42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,7 +106,7 @@ module RubyLLM
106
106
  end
107
107
  end
108
108
 
109
- def to_json_stream(&block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
109
+ def to_json_stream(&block) # rubocop:disable Metrics/MethodLength
110
110
  buffer = String.new
111
111
  parser = EventStreamParser::Parser.new
112
112
 
@@ -126,7 +126,6 @@ module RubyLLM
126
126
  parser.feed(chunk) do |_type, data|
127
127
  unless data == '[DONE]'
128
128
  parsed_data = JSON.parse(data)
129
- RubyLLM.logger.debug "chunk: #{parsed_data}"
130
129
  block.call(parsed_data)
131
130
  end
132
131
  end
@@ -62,7 +62,7 @@ module RubyLLM
62
62
  def format_basic_message(msg)
63
63
  {
64
64
  role: convert_role(msg.role),
65
- content: msg.content
65
+ content: Media.format_content(msg.content)
66
66
  }
67
67
  end
68
68
 
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Anthropic
6
+ # Handles formatting of media content (images, PDFs, audio) for Anthropic
7
+ module Media
8
+ module_function
9
+
10
+ def format_content(content) # rubocop:disable Metrics/MethodLength
11
+ return content unless content.is_a?(Array)
12
+
13
+ content.map do |part|
14
+ case part[:type]
15
+ when 'image'
16
+ format_image(part)
17
+ when 'pdf'
18
+ format_pdf(part)
19
+ when 'text'
20
+ format_text_block(part[:text])
21
+ else
22
+ part
23
+ end
24
+ end
25
+ end
26
+
27
+ def format_image(part)
28
+ # Handle image formatting for Anthropic
29
+ # This is just a placeholder - implement based on Anthropic's requirements
30
+ part
31
+ end
32
+
33
+ def format_pdf(part) # rubocop:disable Metrics/MethodLength
34
+ source = part[:source]
35
+
36
+ if source.start_with?('http')
37
+ # For URLs
38
+ {
39
+ type: 'document',
40
+ source: { url: source }
41
+ }
42
+ else
43
+ # For local files
44
+ data = Base64.strict_encode64(part[:content])
45
+
46
+ {
47
+ type: 'document',
48
+ source: {
49
+ type: 'base64',
50
+ media_type: 'application/pdf',
51
+ data: data
52
+ }
53
+ }
54
+ end
55
+ end
56
+
57
+ def format_text_block(text)
58
+ {
59
+ type: 'text',
60
+ text: text
61
+ }
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -8,6 +8,7 @@ module RubyLLM
8
8
  extend Provider
9
9
  extend Anthropic::Chat
10
10
  extend Anthropic::Embeddings
11
+ extend Anthropic::Media
11
12
  extend Anthropic::Models
12
13
  extend Anthropic::Streaming
13
14
  extend Anthropic::Tools
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Chat methods for the Gemini API implementation
7
+ module Chat # rubocop:disable Metrics/ModuleLength
8
+ # Must be public for Provider to use
9
+ def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable Metrics/MethodLength
10
+ payload = {
11
+ contents: format_messages(messages),
12
+ generationConfig: {
13
+ temperature: temperature
14
+ }
15
+ }
16
+
17
+ payload[:tools] = format_tools(tools) if tools.any?
18
+
19
+ # Store tools for use in generate_completion
20
+ @tools = tools
21
+
22
+ if block_given?
23
+ stream_completion(model, payload, &block)
24
+ else
25
+ generate_completion(model, payload)
26
+ end
27
+ end
28
+
29
+ # Format methods can be private
30
+ private
31
+
32
+ def generate_completion(model, payload)
33
+ url = "models/#{model}:generateContent"
34
+ response = post(url, payload)
35
+ result = parse_completion_response(response)
36
+
37
+ # If this contains a tool call, log it
38
+ result.tool_calls.values.first if result.tool_call?
39
+
40
+ result
41
+ end
42
+
43
+ def format_messages(messages)
44
+ messages.map do |msg|
45
+ {
46
+ role: format_role(msg.role),
47
+ parts: format_parts(msg)
48
+ }
49
+ end
50
+ end
51
+
52
+ def format_role(role)
53
+ case role
54
+ when :assistant then 'model'
55
+ when :system, :tool then 'user' # Gemini doesn't have system, use user role, function responses use user role
56
+ else role.to_s
57
+ end
58
+ end
59
+
60
+ def format_parts(msg) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
61
+ if msg.tool_call?
62
+ # Handle function calls
63
+ [{
64
+ functionCall: {
65
+ name: msg.tool_calls.values.first.name,
66
+ args: msg.tool_calls.values.first.arguments
67
+ }
68
+ }]
69
+ elsif msg.tool_result?
70
+ # Handle function responses
71
+ [{
72
+ functionResponse: {
73
+ name: msg.tool_call_id,
74
+ response: {
75
+ name: msg.tool_call_id,
76
+ content: msg.content
77
+ }
78
+ }
79
+ }]
80
+ elsif msg.content.is_a?(Array)
81
+ # Handle multi-part content (text, images, etc.)
82
+ msg.content.map { |part| format_part(part) }
83
+ else
84
+ # Simple text content
85
+ [{ text: msg.content.to_s }]
86
+ end
87
+ end
88
+
89
+ def format_part(part) # rubocop:disable Metrics/MethodLength
90
+ case part[:type]
91
+ when 'text'
92
+ { text: part[:text] }
93
+ when 'image'
94
+ Media.format_image(part)
95
+ when 'pdf'
96
+ Media.format_pdf(part)
97
+ when 'audio'
98
+ Media.format_audio(part)
99
+ else
100
+ { text: part.to_s }
101
+ end
102
+ end
103
+
104
+ def parse_completion_response(response)
105
+ data = response.body
106
+ tool_calls = extract_tool_calls(data)
107
+
108
+ Message.new(
109
+ role: :assistant,
110
+ content: extract_content(data),
111
+ tool_calls: tool_calls,
112
+ input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
113
+ output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
114
+ model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0]
115
+ )
116
+ end
117
+
118
+ def extract_content(data) # rubocop:disable Metrics/CyclomaticComplexity
119
+ candidate = data.dig('candidates', 0)
120
+ return '' unless candidate
121
+
122
+ # Content will be empty for function calls
123
+ return '' if function_call?(candidate)
124
+
125
+ # Extract text content
126
+ parts = candidate.dig('content', 'parts')
127
+ text_parts = parts&.select { |p| p['text'] }
128
+ return '' unless text_parts&.any?
129
+
130
+ text_parts.map { |p| p['text'] }.join
131
+ end
132
+
133
+ def function_call?(candidate)
134
+ parts = candidate.dig('content', 'parts')
135
+ parts&.any? { |p| p['functionCall'] }
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Embeddings methods for the Gemini API integration
7
+ module Embeddings
8
+ # Must be public for Provider module
9
+ def embed(text, model:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
10
+ payload = {
11
+ content: {
12
+ parts: format_text_for_embedding(text)
13
+ }
14
+ }
15
+
16
+ url = "models/#{model}:embedContent"
17
+ response = post(url, payload)
18
+
19
+ if text.is_a?(Array)
20
+ # We need to make separate calls for each text with Gemini
21
+ embeddings = text.map do |t|
22
+ single_payload = { content: { parts: [{ text: t.to_s }] } }
23
+ single_response = post(url, single_payload)
24
+ single_response.body.dig('embedding', 'values')
25
+ end
26
+
27
+ Embedding.new(
28
+ vectors: embeddings,
29
+ model: model,
30
+ input_tokens: response.body.dig('usageMetadata', 'promptTokenCount') || 0
31
+ )
32
+ else
33
+ Embedding.new(
34
+ vectors: response.body.dig('embedding', 'values'),
35
+ model: model,
36
+ input_tokens: response.body.dig('usageMetadata', 'promptTokenCount') || 0
37
+ )
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def format_text_for_embedding(text)
44
+ if text.is_a?(Array)
45
+ text.map { |t| { text: t.to_s } }
46
+ else
47
+ [{ text: text.to_s }]
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Image generation methods for the Gemini API implementation
7
+ module Images
8
+ def images_url(model:)
9
+ "models/#{model}:predict"
10
+ end
11
+
12
+ def paint(prompt, model:, size:) # rubocop:disable Lint/UnusedMethodArgument
13
+ payload = render_image_payload(prompt)
14
+
15
+ response = post(images_url(model:), payload)
16
+ parse_image_response(response)
17
+ end
18
+
19
+ def render_image_payload(prompt)
20
+ {
21
+ instances: [
22
+ {
23
+ prompt: prompt
24
+ }
25
+ ],
26
+ parameters: {
27
+ sampleCount: 1
28
+ }
29
+ }
30
+ end
31
+
32
+ def parse_image_response(response) # rubocop:disable Metrics/MethodLength
33
+ data = response.body
34
+ image_data = data['predictions']&.first
35
+
36
+ unless image_data&.key?('bytesBase64Encoded')
37
+ raise Error, 'Unexpected response format from Gemini image generation API'
38
+ end
39
+
40
+ # Handle response with base64 encoded image data
41
+ image_url = "data:#{image_data['mimeType'] || 'image/png'};base64,#{image_data['bytesBase64Encoded']}"
42
+ Image.new(
43
+ url: image_url,
44
+ revised_prompt: '', # Imagen doesn't return revised prompts
45
+ model_id: ''
46
+ )
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Media handling methods for the Gemini API integration
7
+ module Media # rubocop:disable Metrics/ModuleLength
8
+ module_function
9
+
10
+ def format_image(part) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/PerceivedComplexity
11
+ source = part[:source]
12
+
13
+ if source.is_a?(String)
14
+ if source.start_with?('http')
15
+ # Handle URL
16
+ {
17
+ inline_data: {
18
+ mime_type: mime_type_for_image(source),
19
+ data: fetch_and_encode_image(source)
20
+ }
21
+ }
22
+ else
23
+ # Handle file path
24
+ {
25
+ inline_data: {
26
+ mime_type: mime_type_for_image(source),
27
+ data: encode_image_file(source)
28
+ }
29
+ }
30
+ end
31
+ elsif source.is_a?(Hash)
32
+ if source[:url]
33
+ # Handle URL in hash
34
+ {
35
+ inline_data: {
36
+ mime_type: source[:media_type] || mime_type_for_image(source[:url]),
37
+ data: fetch_and_encode_image(source[:url])
38
+ }
39
+ }
40
+ else
41
+ # Handle data in hash
42
+ {
43
+ inline_data: {
44
+ mime_type: source[:media_type] || 'image/jpeg',
45
+ data: source[:data]
46
+ }
47
+ }
48
+ end
49
+ end
50
+ end
51
+
52
+ def format_pdf(part) # rubocop:disable Metrics/MethodLength
53
+ source = part[:source]
54
+
55
+ if source.is_a?(String) && source.start_with?('http')
56
+ # Handle URL
57
+ {
58
+ inline_data: {
59
+ mime_type: 'application/pdf',
60
+ data: fetch_and_encode_pdf(source)
61
+ }
62
+ }
63
+ else
64
+ # Handle file path or data
65
+ {
66
+ inline_data: {
67
+ mime_type: 'application/pdf',
68
+ data: part[:content] ? Base64.strict_encode64(part[:content]) : encode_pdf_file(source)
69
+ }
70
+ }
71
+ end
72
+ end
73
+
74
+ def format_audio(part) # rubocop:disable Metrics/MethodLength
75
+ source = part[:source]
76
+
77
+ if source.is_a?(String) && source.start_with?('http')
78
+ # Handle URL
79
+ {
80
+ file_data: {
81
+ mime_type: mime_type_for_audio(source),
82
+ file_uri: source
83
+ }
84
+ }
85
+ else
86
+ # Handle file path or data
87
+ content = part[:content] || File.read(source)
88
+ {
89
+ inline_data: {
90
+ mime_type: mime_type_for_audio(source),
91
+ data: Base64.strict_encode64(content)
92
+ }
93
+ }
94
+ end
95
+ end
96
+
97
+ def mime_type_for_image(path)
98
+ ext = File.extname(path).downcase.delete('.')
99
+ case ext
100
+ when 'png' then 'image/png'
101
+ when 'gif' then 'image/gif'
102
+ when 'webp' then 'image/webp'
103
+ else 'image/jpeg'
104
+ end
105
+ end
106
+
107
+ def mime_type_for_audio(path)
108
+ ext = File.extname(path).downcase.delete('.')
109
+ case ext
110
+ when 'mp3' then 'audio/mpeg'
111
+ when 'ogg' then 'audio/ogg'
112
+ else 'audio/wav'
113
+ end
114
+ end
115
+
116
+ def fetch_and_encode_image(url)
117
+ response = Faraday.get(url)
118
+ Base64.strict_encode64(response.body)
119
+ end
120
+
121
+ def fetch_and_encode_pdf(url)
122
+ response = Faraday.get(url)
123
+ Base64.strict_encode64(response.body)
124
+ end
125
+
126
+ def encode_image_file(path)
127
+ Base64.strict_encode64(File.read(path))
128
+ end
129
+
130
+ def encode_pdf_file(path)
131
+ Base64.strict_encode64(File.read(path))
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -3,16 +3,51 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  module Gemini
6
- # Models methods of the Gemini API integration
6
+ # Models methods for the Gemini API integration
7
7
  module Models
8
- module_function
8
+ # Methods needed by Provider - must be public
9
+ def models_url
10
+ 'models'
11
+ end
9
12
 
10
- def parse_list_models_response(response, slug, capabilities)
11
- response.body['data']&.each do |model|
12
- model['id'] = model['id'].delete_prefix('models/')
13
+ def list_models
14
+ response = connection.get("models?key=#{RubyLLM.config.gemini_api_key}") do |req|
15
+ req.headers.merge! headers
13
16
  end
14
17
 
15
- OpenAI::Models.parse_list_models_response(response, slug, capabilities)
18
+ parse_list_models_response(response, slug, capabilities)
19
+ end
20
+
21
+ private
22
+
23
+ def parse_list_models_response(response, slug, capabilities) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
24
+ (response.body['models'] || []).map do |model|
25
+ # Extract model ID without "models/" prefix
26
+ model_id = model['name'].gsub('models/', '')
27
+
28
+ ModelInfo.new(
29
+ id: model_id,
30
+ created_at: nil,
31
+ display_name: model['displayName'],
32
+ provider: slug,
33
+ type: capabilities.model_type(model_id),
34
+ family: capabilities.model_family(model_id),
35
+ metadata: {
36
+ version: model['version'],
37
+ description: model['description'],
38
+ input_token_limit: model['inputTokenLimit'],
39
+ output_token_limit: model['outputTokenLimit'],
40
+ supported_generation_methods: model['supportedGenerationMethods']
41
+ },
42
+ context_window: model['inputTokenLimit'] || capabilities.context_window_for(model_id),
43
+ max_tokens: model['outputTokenLimit'] || capabilities.max_tokens_for(model_id),
44
+ supports_vision: capabilities.supports_vision?(model_id),
45
+ supports_functions: capabilities.supports_functions?(model_id),
46
+ supports_json_mode: capabilities.supports_json_mode?(model_id),
47
+ input_price_per_million: capabilities.input_price_for(model_id),
48
+ output_price_per_million: capabilities.output_price_for(model_id)
49
+ )
50
+ end
16
51
  end
17
52
  end
18
53
  end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Streaming methods for the Gemini API implementation
7
+ module Streaming
8
+ # Need to make stream_completion public for chat.rb to access
9
+ def stream_completion(model, payload, &block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
10
+ url = "models/#{model}:streamGenerateContent?alt=sse"
11
+ accumulator = StreamAccumulator.new
12
+
13
+ post(url, payload) do |req|
14
+ req.options.on_data = stream_handler(accumulator, &block)
15
+ end
16
+
17
+ # If this is a tool call, immediately execute it and include the result
18
+ message = accumulator.to_message
19
+ if message.tool_call? && message.content.to_s.empty? && @tools && !@tools.empty?
20
+ tool_call = message.tool_calls.values.first
21
+ tool = @tools[tool_call.name.to_sym]
22
+
23
+ if tool
24
+ tool_result = tool.call(tool_call.arguments)
25
+ # Create a new chunk with the result
26
+ result_chunk = Chunk.new(
27
+ role: :assistant,
28
+ content: "The result is #{tool_result}",
29
+ model_id: message.model_id,
30
+ input_tokens: message.input_tokens,
31
+ output_tokens: message.output_tokens,
32
+ tool_calls: message.tool_calls
33
+ )
34
+
35
+ # Add to accumulator and call the block
36
+ accumulator.add(result_chunk)
37
+ block.call(result_chunk)
38
+ end
39
+ end
40
+
41
+ accumulator.to_message
42
+ end
43
+
44
+ private
45
+
46
+ # Handle streaming
47
+ def stream_handler(accumulator, &block) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
48
+ to_json_stream do |data| # rubocop:disable Metrics/BlockLength
49
+ next unless data['candidates']&.any?
50
+
51
+ candidate = data['candidates'][0]
52
+ parts = candidate.dig('content', 'parts')
53
+ model_id = data['modelVersion']
54
+
55
+ # First attempt to extract tool calls
56
+ tool_calls = nil
57
+
58
+ # Check if any part contains a functionCall
59
+ if parts&.any? { |p| p['functionCall'] }
60
+ function_part = parts.find { |p| p['functionCall'] }
61
+ function_data = function_part['functionCall']
62
+
63
+ if function_data && function_data['name']
64
+ # Create a tool call with proper structure - convert args to JSON string
65
+ id = SecureRandom.uuid
66
+ tool_calls = {
67
+ id => ToolCall.new(
68
+ id: id,
69
+ name: function_data['name'],
70
+ arguments: JSON.generate(function_data['args']) # Convert Hash to JSON string
71
+ )
72
+ }
73
+ end
74
+ end
75
+
76
+ # Extract text content (if any)
77
+ text = nil
78
+ if parts
79
+ text_parts = parts.select { |p| p['text'] }
80
+ text = text_parts.map { |p| p['text'] }.join if text_parts.any?
81
+ end
82
+
83
+ chunk = Chunk.new(
84
+ role: :assistant,
85
+ content: text,
86
+ model_id: model_id,
87
+ input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
88
+ output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
89
+ tool_calls: tool_calls
90
+ )
91
+
92
+ accumulator.add(chunk)
93
+ block.call(chunk)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end