ruby_llm 0.1.0.pre41 → 0.1.0.pre42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,7 +106,7 @@ module RubyLLM
106
106
  end
107
107
  end
108
108
 
109
- def to_json_stream(&block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
109
+ def to_json_stream(&block) # rubocop:disable Metrics/MethodLength
110
110
  buffer = String.new
111
111
  parser = EventStreamParser::Parser.new
112
112
 
@@ -126,7 +126,6 @@ module RubyLLM
126
126
  parser.feed(chunk) do |_type, data|
127
127
  unless data == '[DONE]'
128
128
  parsed_data = JSON.parse(data)
129
- RubyLLM.logger.debug "chunk: #{parsed_data}"
130
129
  block.call(parsed_data)
131
130
  end
132
131
  end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Chat methods for the Gemini API implementation
7
+ module Chat # rubocop:disable Metrics/ModuleLength
8
+ # Must be public for Provider to use
9
+ def complete(messages, tools:, temperature:, model:, &block) # rubocop:disable Metrics/MethodLength
10
+ payload = {
11
+ contents: format_messages(messages),
12
+ generationConfig: {
13
+ temperature: temperature
14
+ }
15
+ }
16
+
17
+ payload[:tools] = format_tools(tools) if tools.any?
18
+
19
+ # Store tools for use in generate_completion
20
+ @tools = tools
21
+
22
+ if block_given?
23
+ stream_completion(model, payload, &block)
24
+ else
25
+ generate_completion(model, payload)
26
+ end
27
+ end
28
+
29
+ # Format methods can be private
30
+ private
31
+
32
+ def generate_completion(model, payload)
33
+ url = "models/#{model}:generateContent"
34
+ response = post(url, payload)
35
+ result = parse_completion_response(response)
36
+
37
+ # If this contains a tool call, log it
38
+ result.tool_calls.values.first if result.tool_call?
39
+
40
+ result
41
+ end
42
+
43
+ def format_messages(messages)
44
+ messages.map do |msg|
45
+ {
46
+ role: format_role(msg.role),
47
+ parts: format_parts(msg)
48
+ }
49
+ end
50
+ end
51
+
52
+ def format_role(role)
53
+ case role
54
+ when :assistant then 'model'
55
+ when :system, :tool then 'user' # Gemini doesn't have system, use user role, function responses use user role
56
+ else role.to_s
57
+ end
58
+ end
59
+
60
+ def format_parts(msg) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
61
+ if msg.tool_call?
62
+ # Handle function calls
63
+ [{
64
+ functionCall: {
65
+ name: msg.tool_calls.values.first.name,
66
+ args: msg.tool_calls.values.first.arguments
67
+ }
68
+ }]
69
+ elsif msg.tool_result?
70
+ # Handle function responses
71
+ [{
72
+ functionResponse: {
73
+ name: msg.tool_call_id,
74
+ response: {
75
+ name: msg.tool_call_id,
76
+ content: msg.content
77
+ }
78
+ }
79
+ }]
80
+ elsif msg.content.is_a?(Array)
81
+ # Handle multi-part content (text, images, etc.)
82
+ msg.content.map { |part| format_part(part) }
83
+ else
84
+ # Simple text content
85
+ [{ text: msg.content.to_s }]
86
+ end
87
+ end
88
+
89
+ def format_part(part) # rubocop:disable Metrics/MethodLength
90
+ case part[:type]
91
+ when 'text'
92
+ { text: part[:text] }
93
+ when 'image'
94
+ Media.format_image(part)
95
+ when 'pdf'
96
+ Media.format_pdf(part)
97
+ when 'audio'
98
+ Media.format_audio(part)
99
+ else
100
+ { text: part.to_s }
101
+ end
102
+ end
103
+
104
+ def parse_completion_response(response)
105
+ data = response.body
106
+ tool_calls = extract_tool_calls(data)
107
+
108
+ Message.new(
109
+ role: :assistant,
110
+ content: extract_content(data),
111
+ tool_calls: tool_calls,
112
+ input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
113
+ output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
114
+ model_id: data['modelVersion'] || response.env.url.path.split('/')[3].split(':')[0]
115
+ )
116
+ end
117
+
118
+ def extract_content(data) # rubocop:disable Metrics/CyclomaticComplexity
119
+ candidate = data.dig('candidates', 0)
120
+ return '' unless candidate
121
+
122
+ # Content will be empty for function calls
123
+ return '' if function_call?(candidate)
124
+
125
+ # Extract text content
126
+ parts = candidate.dig('content', 'parts')
127
+ text_parts = parts&.select { |p| p['text'] }
128
+ return '' unless text_parts&.any?
129
+
130
+ text_parts.map { |p| p['text'] }.join
131
+ end
132
+
133
+ def function_call?(candidate)
134
+ parts = candidate.dig('content', 'parts')
135
+ parts&.any? { |p| p['functionCall'] }
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Embeddings methods for the Gemini API integration
7
+ module Embeddings
8
+ # Must be public for Provider module
9
+ def embed(text, model:) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
10
+ payload = {
11
+ content: {
12
+ parts: format_text_for_embedding(text)
13
+ }
14
+ }
15
+
16
+ url = "models/#{model}:embedContent"
17
+ response = post(url, payload)
18
+
19
+ if text.is_a?(Array)
20
+ # We need to make separate calls for each text with Gemini
21
+ embeddings = text.map do |t|
22
+ single_payload = { content: { parts: [{ text: t.to_s }] } }
23
+ single_response = post(url, single_payload)
24
+ single_response.body.dig('embedding', 'values')
25
+ end
26
+
27
+ Embedding.new(
28
+ vectors: embeddings,
29
+ model: model,
30
+ input_tokens: response.body.dig('usageMetadata', 'promptTokenCount') || 0
31
+ )
32
+ else
33
+ Embedding.new(
34
+ vectors: response.body.dig('embedding', 'values'),
35
+ model: model,
36
+ input_tokens: response.body.dig('usageMetadata', 'promptTokenCount') || 0
37
+ )
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def format_text_for_embedding(text)
44
+ if text.is_a?(Array)
45
+ text.map { |t| { text: t.to_s } }
46
+ else
47
+ [{ text: text.to_s }]
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Image generation methods for the Gemini API implementation
7
+ module Images
8
+ def images_url(model:)
9
+ "models/#{model}:predict"
10
+ end
11
+
12
+ def paint(prompt, model:, size:) # rubocop:disable Lint/UnusedMethodArgument
13
+ payload = render_image_payload(prompt)
14
+
15
+ response = post(images_url(model:), payload)
16
+ parse_image_response(response)
17
+ end
18
+
19
+ def render_image_payload(prompt)
20
+ {
21
+ instances: [
22
+ {
23
+ prompt: prompt
24
+ }
25
+ ],
26
+ parameters: {
27
+ sampleCount: 1
28
+ }
29
+ }
30
+ end
31
+
32
+ def parse_image_response(response) # rubocop:disable Metrics/MethodLength
33
+ data = response.body
34
+ image_data = data['predictions']&.first
35
+
36
+ unless image_data&.key?('bytesBase64Encoded')
37
+ raise Error, 'Unexpected response format from Gemini image generation API'
38
+ end
39
+
40
+ # Handle response with base64 encoded image data
41
+ image_url = "data:#{image_data['mimeType'] || 'image/png'};base64,#{image_data['bytesBase64Encoded']}"
42
+ Image.new(
43
+ url: image_url,
44
+ revised_prompt: '', # Imagen doesn't return revised prompts
45
+ model_id: ''
46
+ )
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Media handling methods for the Gemini API integration
7
+ module Media # rubocop:disable Metrics/ModuleLength
8
+ module_function
9
+
10
+ def format_image(part) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength,Metrics/PerceivedComplexity
11
+ source = part[:source]
12
+
13
+ if source.is_a?(String)
14
+ if source.start_with?('http')
15
+ # Handle URL
16
+ {
17
+ inline_data: {
18
+ mime_type: mime_type_for_image(source),
19
+ data: fetch_and_encode_image(source)
20
+ }
21
+ }
22
+ else
23
+ # Handle file path
24
+ {
25
+ inline_data: {
26
+ mime_type: mime_type_for_image(source),
27
+ data: encode_image_file(source)
28
+ }
29
+ }
30
+ end
31
+ elsif source.is_a?(Hash)
32
+ if source[:url]
33
+ # Handle URL in hash
34
+ {
35
+ inline_data: {
36
+ mime_type: source[:media_type] || mime_type_for_image(source[:url]),
37
+ data: fetch_and_encode_image(source[:url])
38
+ }
39
+ }
40
+ else
41
+ # Handle data in hash
42
+ {
43
+ inline_data: {
44
+ mime_type: source[:media_type] || 'image/jpeg',
45
+ data: source[:data]
46
+ }
47
+ }
48
+ end
49
+ end
50
+ end
51
+
52
+ def format_pdf(part) # rubocop:disable Metrics/MethodLength
53
+ source = part[:source]
54
+
55
+ if source.is_a?(String) && source.start_with?('http')
56
+ # Handle URL
57
+ {
58
+ inline_data: {
59
+ mime_type: 'application/pdf',
60
+ data: fetch_and_encode_pdf(source)
61
+ }
62
+ }
63
+ else
64
+ # Handle file path or data
65
+ {
66
+ inline_data: {
67
+ mime_type: 'application/pdf',
68
+ data: part[:content] ? Base64.strict_encode64(part[:content]) : encode_pdf_file(source)
69
+ }
70
+ }
71
+ end
72
+ end
73
+
74
+ def format_audio(part) # rubocop:disable Metrics/MethodLength
75
+ source = part[:source]
76
+
77
+ if source.is_a?(String) && source.start_with?('http')
78
+ # Handle URL
79
+ {
80
+ file_data: {
81
+ mime_type: mime_type_for_audio(source),
82
+ file_uri: source
83
+ }
84
+ }
85
+ else
86
+ # Handle file path or data
87
+ content = part[:content] || File.read(source)
88
+ {
89
+ inline_data: {
90
+ mime_type: mime_type_for_audio(source),
91
+ data: Base64.strict_encode64(content)
92
+ }
93
+ }
94
+ end
95
+ end
96
+
97
+ def mime_type_for_image(path)
98
+ ext = File.extname(path).downcase.delete('.')
99
+ case ext
100
+ when 'png' then 'image/png'
101
+ when 'gif' then 'image/gif'
102
+ when 'webp' then 'image/webp'
103
+ else 'image/jpeg'
104
+ end
105
+ end
106
+
107
+ def mime_type_for_audio(path)
108
+ ext = File.extname(path).downcase.delete('.')
109
+ case ext
110
+ when 'mp3' then 'audio/mpeg'
111
+ when 'ogg' then 'audio/ogg'
112
+ else 'audio/wav'
113
+ end
114
+ end
115
+
116
+ def fetch_and_encode_image(url)
117
+ response = Faraday.get(url)
118
+ Base64.strict_encode64(response.body)
119
+ end
120
+
121
+ def fetch_and_encode_pdf(url)
122
+ response = Faraday.get(url)
123
+ Base64.strict_encode64(response.body)
124
+ end
125
+
126
+ def encode_image_file(path)
127
+ Base64.strict_encode64(File.read(path))
128
+ end
129
+
130
+ def encode_pdf_file(path)
131
+ Base64.strict_encode64(File.read(path))
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -3,16 +3,51 @@
3
3
  module RubyLLM
4
4
  module Providers
5
5
  module Gemini
6
- # Models methods of the Gemini API integration
6
+ # Models methods for the Gemini API integration
7
7
  module Models
8
- module_function
8
+ # Methods needed by Provider - must be public
9
+ def models_url
10
+ 'models'
11
+ end
9
12
 
10
- def parse_list_models_response(response, slug, capabilities)
11
- response.body['data']&.each do |model|
12
- model['id'] = model['id'].delete_prefix('models/')
13
+ def list_models
14
+ response = connection.get("models?key=#{RubyLLM.config.gemini_api_key}") do |req|
15
+ req.headers.merge! headers
13
16
  end
14
17
 
15
- OpenAI::Models.parse_list_models_response(response, slug, capabilities)
18
+ parse_list_models_response(response, slug, capabilities)
19
+ end
20
+
21
+ private
22
+
23
+ def parse_list_models_response(response, slug, capabilities) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
24
+ (response.body['models'] || []).map do |model|
25
+ # Extract model ID without "models/" prefix
26
+ model_id = model['name'].gsub('models/', '')
27
+
28
+ ModelInfo.new(
29
+ id: model_id,
30
+ created_at: nil,
31
+ display_name: model['displayName'],
32
+ provider: slug,
33
+ type: capabilities.model_type(model_id),
34
+ family: capabilities.model_family(model_id),
35
+ metadata: {
36
+ version: model['version'],
37
+ description: model['description'],
38
+ input_token_limit: model['inputTokenLimit'],
39
+ output_token_limit: model['outputTokenLimit'],
40
+ supported_generation_methods: model['supportedGenerationMethods']
41
+ },
42
+ context_window: model['inputTokenLimit'] || capabilities.context_window_for(model_id),
43
+ max_tokens: model['outputTokenLimit'] || capabilities.max_tokens_for(model_id),
44
+ supports_vision: capabilities.supports_vision?(model_id),
45
+ supports_functions: capabilities.supports_functions?(model_id),
46
+ supports_json_mode: capabilities.supports_json_mode?(model_id),
47
+ input_price_per_million: capabilities.input_price_for(model_id),
48
+ output_price_per_million: capabilities.output_price_for(model_id)
49
+ )
50
+ end
16
51
  end
17
52
  end
18
53
  end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Streaming methods for the Gemini API implementation
7
+ module Streaming
8
+ # Need to make stream_completion public for chat.rb to access
9
+ def stream_completion(model, payload, &block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
10
+ url = "models/#{model}:streamGenerateContent?alt=sse"
11
+ accumulator = StreamAccumulator.new
12
+
13
+ post(url, payload) do |req|
14
+ req.options.on_data = stream_handler(accumulator, &block)
15
+ end
16
+
17
+ # If this is a tool call, immediately execute it and include the result
18
+ message = accumulator.to_message
19
+ if message.tool_call? && message.content.to_s.empty? && @tools && !@tools.empty?
20
+ tool_call = message.tool_calls.values.first
21
+ tool = @tools[tool_call.name.to_sym]
22
+
23
+ if tool
24
+ tool_result = tool.call(tool_call.arguments)
25
+ # Create a new chunk with the result
26
+ result_chunk = Chunk.new(
27
+ role: :assistant,
28
+ content: "The result is #{tool_result}",
29
+ model_id: message.model_id,
30
+ input_tokens: message.input_tokens,
31
+ output_tokens: message.output_tokens,
32
+ tool_calls: message.tool_calls
33
+ )
34
+
35
+ # Add to accumulator and call the block
36
+ accumulator.add(result_chunk)
37
+ block.call(result_chunk)
38
+ end
39
+ end
40
+
41
+ accumulator.to_message
42
+ end
43
+
44
+ private
45
+
46
+ # Handle streaming
47
+ def stream_handler(accumulator, &block) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
48
+ to_json_stream do |data| # rubocop:disable Metrics/BlockLength
49
+ next unless data['candidates']&.any?
50
+
51
+ candidate = data['candidates'][0]
52
+ parts = candidate.dig('content', 'parts')
53
+ model_id = data['modelVersion']
54
+
55
+ # First attempt to extract tool calls
56
+ tool_calls = nil
57
+
58
+ # Check if any part contains a functionCall
59
+ if parts&.any? { |p| p['functionCall'] }
60
+ function_part = parts.find { |p| p['functionCall'] }
61
+ function_data = function_part['functionCall']
62
+
63
+ if function_data && function_data['name']
64
+ # Create a tool call with proper structure - convert args to JSON string
65
+ id = SecureRandom.uuid
66
+ tool_calls = {
67
+ id => ToolCall.new(
68
+ id: id,
69
+ name: function_data['name'],
70
+ arguments: JSON.generate(function_data['args']) # Convert Hash to JSON string
71
+ )
72
+ }
73
+ end
74
+ end
75
+
76
+ # Extract text content (if any)
77
+ text = nil
78
+ if parts
79
+ text_parts = parts.select { |p| p['text'] }
80
+ text = text_parts.map { |p| p['text'] }.join if text_parts.any?
81
+ end
82
+
83
+ chunk = Chunk.new(
84
+ role: :assistant,
85
+ content: text,
86
+ model_id: model_id,
87
+ input_tokens: data.dig('usageMetadata', 'promptTokenCount'),
88
+ output_tokens: data.dig('usageMetadata', 'candidatesTokenCount'),
89
+ tool_calls: tool_calls
90
+ )
91
+
92
+ accumulator.add(chunk)
93
+ block.call(chunk)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RubyLLM
4
+ module Providers
5
+ module Gemini
6
+ # Tools methods for the Gemini API implementation
7
+ module Tools
8
+ # Format tools for Gemini API
9
+ def format_tools(tools)
10
+ return [] if tools.empty?
11
+
12
+ [{
13
+ functionDeclarations: tools.values.map { |tool| function_declaration_for(tool) }
14
+ }]
15
+ end
16
+
17
+ # Extract tool calls from response data
18
+ def extract_tool_calls(data) # rubocop:disable Metrics/CyclomaticComplexity,Metrics/MethodLength
19
+ return nil unless data
20
+
21
+ # Get the first candidate
22
+ candidate = data.is_a?(Hash) ? data.dig('candidates', 0) : nil
23
+ return nil unless candidate
24
+
25
+ # Get the parts array from content
26
+ parts = candidate.dig('content', 'parts')
27
+ return nil unless parts.is_a?(Array)
28
+
29
+ # Find the function call part
30
+ function_call_part = parts.find { |p| p['functionCall'] }
31
+ return nil unless function_call_part
32
+
33
+ # Get the function call data
34
+ function_data = function_call_part['functionCall']
35
+ return nil unless function_data
36
+
37
+ # Create a unique ID for the tool call
38
+ id = SecureRandom.uuid
39
+
40
+ # Return the tool call in the expected format
41
+ {
42
+ id => ToolCall.new(
43
+ id: id,
44
+ name: function_data['name'],
45
+ arguments: function_data['args']
46
+ )
47
+ }
48
+ end
49
+
50
+ private
51
+
52
+ # Format a single tool for Gemini API
53
+ def function_declaration_for(tool)
54
+ {
55
+ name: tool.name,
56
+ description: tool.description,
57
+ parameters: {
58
+ type: 'OBJECT',
59
+ properties: format_parameters(tool.parameters),
60
+ required: tool.parameters.select { |_, p| p.required }.keys.map(&:to_s)
61
+ }
62
+ }
63
+ end
64
+
65
+ # Format tool parameters for Gemini API
66
+ def format_parameters(parameters)
67
+ parameters.transform_values do |param|
68
+ {
69
+ type: param_type_for_gemini(param.type),
70
+ description: param.description
71
+ }.compact
72
+ end
73
+ end
74
+
75
+ # Convert RubyLLM param types to Gemini API types
76
+ def param_type_for_gemini(type)
77
+ case type.to_s.downcase
78
+ when 'integer', 'number', 'float' then 'NUMBER'
79
+ when 'boolean' then 'BOOLEAN'
80
+ when 'array' then 'ARRAY'
81
+ when 'object' then 'OBJECT'
82
+ else 'STRING'
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -2,20 +2,26 @@
2
2
 
3
3
  module RubyLLM
4
4
  module Providers
5
- # Gemini API integration.
5
+ # Native Gemini API implementation
6
6
  module Gemini
7
- extend OpenAI
7
+ extend Provider
8
+ extend Gemini::Chat
9
+ extend Gemini::Embeddings
10
+ extend Gemini::Images
8
11
  extend Gemini::Models
12
+ extend Gemini::Streaming
13
+ extend Gemini::Tools
14
+ extend Gemini::Media
9
15
 
10
16
  module_function
11
17
 
12
18
  def api_base
13
- 'https://generativelanguage.googleapis.com/v1beta/openai'
19
+ 'https://generativelanguage.googleapis.com/v1beta'
14
20
  end
15
21
 
16
22
  def headers
17
23
  {
18
- 'Authorization' => "Bearer #{RubyLLM.config.gemini_api_key}"
24
+ 'x-goog-api-key' => RubyLLM.config.gemini_api_key
19
25
  }
20
26
  end
21
27
 
@@ -20,8 +20,6 @@ module RubyLLM
20
20
  }
21
21
  end
22
22
 
23
- private
24
-
25
23
  def parse_image_response(response)
26
24
  data = response.body
27
25
  image_data = data['data'].first
@@ -47,7 +47,7 @@ module RubyLLM
47
47
  ToolCall.new(
48
48
  id: tc.id,
49
49
  name: tc.name,
50
- arguments: JSON.parse(tc.arguments)
50
+ arguments: tc.arguments.is_a?(String) ? JSON.parse(tc.arguments) : tc.arguments
51
51
  )
52
52
  end
53
53
  end