ruby-gemini-api 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/README.md +196 -31
- data/lib/gemini/images.rb +263 -27
- data/lib/gemini/response.rb +21 -0
- data/lib/gemini/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c80fbf2cb7142ab3ff7d6a17d8b5f1e43960ec8ed398ee7f4c35286ed72ce962
|
|
4
|
+
data.tar.gz: 261f1a1e04757b93aac9c8a42263e355758bba4ec512f6c9dd408b63146e17fe
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 780a9684677d9bfdd8945727c9cd00e1e0ecc9e43a63a038da17a18aa8e524e43f601946bd1fbc00c9406e4a45b80d59fedfdfc85e190b973cedd9c2340a5be4
|
|
7
|
+
data.tar.gz: 399dff8bc6f6693b6267412b2fee067269125ea9b16ffd105d94b4ac9154ca1ade4246b6d3ca9aa9e7cb689e3f2d4a12ea771d4fc688de24e36688205675ab0f
|
data/CHANGELOG.md
CHANGED
|
@@ -7,4 +7,10 @@
|
|
|
7
7
|
- Changed generate_contents to accept temperature parameter
|
|
8
8
|
|
|
9
9
|
## [0.1.2] - 2025-07-10
|
|
10
|
-
- Add function calling
|
|
10
|
+
- Add function calling
|
|
11
|
+
|
|
12
|
+
## [0.1.3] - 2025-10-09
|
|
13
|
+
- Add support for multi-image input
|
|
14
|
+
|
|
15
|
+
## [0.1.4] - 2025-11-08
|
|
16
|
+
- Add support for grounding search
|
data/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
[README ‐ 日本語](https://github.com/rira100000000/ruby-gemini-api/
|
|
1
|
+
[README ‐ 日本語](https://github.com/rira100000000/ruby-gemini-api/blob/main/README_ja.md)
|
|
2
2
|
# Ruby-Gemini-API
|
|
3
3
|
|
|
4
4
|
A Ruby client library for Google's Gemini API. This gem provides a simple, intuitive interface for interacting with Gemini's generative AI capabilities, following patterns similar to other AI client libraries.
|
|
@@ -18,7 +18,11 @@ This project is inspired by and pays homage to [ruby-openai](https://github.com/
|
|
|
18
18
|
- Document processing (PDFs and other formats)
|
|
19
19
|
- Context caching for efficient processing
|
|
20
20
|
|
|
21
|
-
###
|
|
21
|
+
### Function Calling
|
|
22
|
+
|
|
23
|
+
This library provides an intuitive DSL to define tools for function calling, making it easy to describe your functions to the Gemini model.
|
|
24
|
+
|
|
25
|
+
#### Basic Usage
|
|
22
26
|
|
|
23
27
|
```ruby
|
|
24
28
|
require 'gemini'
|
|
@@ -26,45 +30,67 @@ require 'gemini'
|
|
|
26
30
|
# Initialize Gemini client
|
|
27
31
|
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
28
32
|
|
|
29
|
-
# Define
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
name: "get_current_weather",
|
|
36
|
-
description: "Get the current weather information",
|
|
37
|
-
parameters: {
|
|
38
|
-
type: "object",
|
|
39
|
-
properties: {
|
|
40
|
-
location: {
|
|
41
|
-
type: "string",
|
|
42
|
-
description: "City name, e.g., Tokyo"
|
|
43
|
-
}
|
|
44
|
-
},
|
|
45
|
-
required: ["location"]
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
]
|
|
49
|
-
}
|
|
50
|
-
]
|
|
33
|
+
# Define tools using the ToolDefinition DSL
|
|
34
|
+
tools = Gemini::ToolDefinition.new do
|
|
35
|
+
function :get_current_weather, description: "Get the current weather information" do
|
|
36
|
+
property :location, type: :string, description: "City name, e.g., Tokyo", required: true
|
|
37
|
+
end
|
|
38
|
+
end
|
|
51
39
|
|
|
52
40
|
# User prompt
|
|
53
41
|
user_prompt = "Tell me the current weather in Tokyo."
|
|
54
42
|
|
|
55
|
-
# Send request with
|
|
43
|
+
# Send request with the defined tools
|
|
56
44
|
response = client.generate_content(
|
|
57
45
|
user_prompt,
|
|
58
|
-
model: "gemini-
|
|
46
|
+
model: "gemini-1.5-flash", # Or any model that supports function calling
|
|
59
47
|
tools: tools
|
|
60
48
|
)
|
|
61
49
|
|
|
62
50
|
# Parse function call from the response
|
|
63
51
|
unless response.function_calls.empty?
|
|
64
52
|
function_call = response.function_calls.first
|
|
65
|
-
puts "Function
|
|
66
|
-
puts "
|
|
53
|
+
puts "Function to call: #{function_call['name']}"
|
|
54
|
+
puts "Arguments: #{function_call['args']}"
|
|
55
|
+
end
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
#### Advanced Tool Management
|
|
59
|
+
|
|
60
|
+
You can define multiple functions, add them dynamically, combine tool sets, and manage them easily.
|
|
61
|
+
|
|
62
|
+
```ruby
|
|
63
|
+
# Define a set of weather tools
|
|
64
|
+
weather_tools = Gemini::ToolDefinition.new do
|
|
65
|
+
function :get_current_weather, description: "Get the current weather" do
|
|
66
|
+
property :location, type: :string, description: "City name", required: true
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Define another set of stock-related tools
|
|
71
|
+
stock_tools = Gemini::ToolDefinition.new do
|
|
72
|
+
function :get_stock_price, description: "Get the stock price for a symbol" do
|
|
73
|
+
property :ticker, type: :string, description: "Stock ticker symbol", required: true
|
|
74
|
+
end
|
|
67
75
|
end
|
|
76
|
+
|
|
77
|
+
# Combine tool sets using the + operator
|
|
78
|
+
all_tools = weather_tools + stock_tools
|
|
79
|
+
puts "Combined functions: #{all_tools.list_functions}"
|
|
80
|
+
# => Combined functions: [:get_current_weather, :get_stock_price]
|
|
81
|
+
|
|
82
|
+
# Add a new function later
|
|
83
|
+
all_tools.add_function :send_email, description: "Send an email" do
|
|
84
|
+
property :to, type: :string, required: true
|
|
85
|
+
property :body, type: :string, required: true
|
|
86
|
+
end
|
|
87
|
+
puts "After adding a function: #{all_tools.list_functions}"
|
|
88
|
+
# => After adding a function: [:get_current_weather, :get_stock_price, :send_email]
|
|
89
|
+
|
|
90
|
+
# Delete a function
|
|
91
|
+
all_tools.delete_function(:get_stock_price)
|
|
92
|
+
puts "After deleting a function: #{all_tools.list_functions}"
|
|
93
|
+
# => After deleting a function: [:get_current_weather, :send_email]
|
|
68
94
|
```
|
|
69
95
|
|
|
70
96
|
## Installation
|
|
@@ -246,6 +272,79 @@ client.files.delete(name: file_name)
|
|
|
246
272
|
|
|
247
273
|
For more examples, check out the `demo/vision_demo.rb` and `demo/file_vision_demo.rb` files included with the gem.
|
|
248
274
|
|
|
275
|
+
### Grounding with Google Search
|
|
276
|
+
|
|
277
|
+
You can use Gemini API's Google Search grounding feature to retrieve real-time information.
|
|
278
|
+
|
|
279
|
+
#### Basic Usage
|
|
280
|
+
|
|
281
|
+
```ruby
|
|
282
|
+
require 'gemini'
|
|
283
|
+
|
|
284
|
+
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
285
|
+
|
|
286
|
+
# Use Google Search to get real-time information
|
|
287
|
+
response = client.generate_content(
|
|
288
|
+
"Who won the euro 2024?",
|
|
289
|
+
model: "gemini-2.0-flash-lite",
|
|
290
|
+
tools: [{ google_search: {} }]
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
if response.success?
|
|
294
|
+
puts response.text
|
|
295
|
+
|
|
296
|
+
# Check grounding information
|
|
297
|
+
if response.grounded?
|
|
298
|
+
puts "\nSource references:"
|
|
299
|
+
response.grounding_chunks.each do |chunk|
|
|
300
|
+
if chunk['web']
|
|
301
|
+
puts "- #{chunk['web']['title']}"
|
|
302
|
+
puts " #{chunk['web']['uri']}"
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
#### Checking Grounding Metadata
|
|
310
|
+
|
|
311
|
+
```ruby
|
|
312
|
+
# Check if response is grounded
|
|
313
|
+
if response.grounded?
|
|
314
|
+
# Get full grounding metadata
|
|
315
|
+
metadata = response.grounding_metadata
|
|
316
|
+
|
|
317
|
+
# Get source chunks (references)
|
|
318
|
+
chunks = response.grounding_chunks
|
|
319
|
+
|
|
320
|
+
# Get search entry point
|
|
321
|
+
entry_point = response.search_entry_point
|
|
322
|
+
end
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
#### Example with Different Topics
|
|
326
|
+
|
|
327
|
+
```ruby
|
|
328
|
+
response = client.generate_content(
|
|
329
|
+
"What are the latest AI developments in 2024?",
|
|
330
|
+
model: "gemini-2.0-flash-lite",
|
|
331
|
+
tools: [{ google_search: {} }]
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if response.success? && response.grounded?
|
|
335
|
+
puts response.text
|
|
336
|
+
puts "\nSources: #{response.grounding_chunks.length} references"
|
|
337
|
+
end
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
#### Demo Application
|
|
341
|
+
|
|
342
|
+
You can find a grounding search demo in:
|
|
343
|
+
|
|
344
|
+
```bash
|
|
345
|
+
ruby demo/grounding_search_demo_ja.rb
|
|
346
|
+
```
|
|
347
|
+
|
|
249
348
|
### Image Generation
|
|
250
349
|
|
|
251
350
|
```ruby
|
|
@@ -253,11 +352,11 @@ require 'gemini'
|
|
|
253
352
|
|
|
254
353
|
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
255
354
|
|
|
256
|
-
# Generate an image using Gemini 2.
|
|
355
|
+
# Generate an image using Gemini 2.5
|
|
257
356
|
response = client.images.generate(
|
|
258
357
|
parameters: {
|
|
259
358
|
prompt: "A beautiful sunset over the ocean with sailing boats",
|
|
260
|
-
model: "gemini-2.
|
|
359
|
+
model: "gemini-2.5-flash-image-preview",
|
|
261
360
|
size: "16:9"
|
|
262
361
|
}
|
|
263
362
|
)
|
|
@@ -272,6 +371,72 @@ else
|
|
|
272
371
|
end
|
|
273
372
|
```
|
|
274
373
|
|
|
374
|
+
#### Image Generation with Multiple Input Images
|
|
375
|
+
|
|
376
|
+
You can generate new images by combining or editing multiple input images:
|
|
377
|
+
|
|
378
|
+
```ruby
|
|
379
|
+
require 'gemini'
|
|
380
|
+
|
|
381
|
+
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
382
|
+
|
|
383
|
+
# Generate a new image using multiple input images
|
|
384
|
+
response = client.images.generate(
|
|
385
|
+
parameters: {
|
|
386
|
+
prompt: "Combine these two images to create a single artistic composition",
|
|
387
|
+
image_paths: ["path/to/image1.jpg", "path/to/image2.png"],
|
|
388
|
+
model: "gemini-2.5-flash-image-preview",
|
|
389
|
+
temperature: 0.7
|
|
390
|
+
}
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Save the generated image
|
|
394
|
+
if response.success? && response.images.any?
|
|
395
|
+
response.save_image("combined_image.png")
|
|
396
|
+
puts "Combined image saved"
|
|
397
|
+
end
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
You can also use file objects:
|
|
401
|
+
|
|
402
|
+
```ruby
|
|
403
|
+
# Using file objects
|
|
404
|
+
File.open("image1.jpg", "rb") do |file1|
|
|
405
|
+
File.open("image2.png", "rb") do |file2|
|
|
406
|
+
response = client.images.generate(
|
|
407
|
+
parameters: {
|
|
408
|
+
prompt: "Combine these images together",
|
|
409
|
+
images: [file1, file2],
|
|
410
|
+
model: "gemini-2.5-flash-image-preview"
|
|
411
|
+
}
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
if response.success? && response.images.any?
|
|
415
|
+
response.save_image("result.png")
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
end
|
|
419
|
+
```
|
|
420
|
+
|
|
421
|
+
Base64-encoded image data is also supported:
|
|
422
|
+
|
|
423
|
+
```ruby
|
|
424
|
+
require 'base64'
|
|
425
|
+
|
|
426
|
+
# Base64-encoded image data
|
|
427
|
+
base64_data1 = Base64.strict_encode64(File.binread("image1.jpg"))
|
|
428
|
+
base64_data2 = Base64.strict_encode64(File.binread("image2.png"))
|
|
429
|
+
|
|
430
|
+
response = client.images.generate(
|
|
431
|
+
parameters: {
|
|
432
|
+
prompt: "Merge these images together",
|
|
433
|
+
image_base64s: [base64_data1, base64_data2],
|
|
434
|
+
mime_types: ["image/jpeg", "image/png"],
|
|
435
|
+
model: "gemini-2.5-flash-image-preview"
|
|
436
|
+
}
|
|
437
|
+
)
|
|
438
|
+
```
|
|
439
|
+
|
|
275
440
|
You can also use Imagen 3 model (Note: This feature is not fully tested yet):
|
|
276
441
|
|
|
277
442
|
```ruby
|
|
@@ -293,7 +458,7 @@ if response.success? && !response.images.empty?
|
|
|
293
458
|
end
|
|
294
459
|
```
|
|
295
460
|
|
|
296
|
-
For
|
|
461
|
+
For complete examples, check out the `demo/image_generation_demo.rb` and `demo/multi_image_generation_demo.rb` files included with the gem.
|
|
297
462
|
|
|
298
463
|
### Audio Transcription
|
|
299
464
|
|
data/lib/gemini/images.rb
CHANGED
|
@@ -4,43 +4,279 @@ module Gemini
|
|
|
4
4
|
@client = client
|
|
5
5
|
end
|
|
6
6
|
|
|
7
|
-
#
|
|
7
|
+
# Main method to generate images
|
|
8
8
|
def generate(parameters: {})
|
|
9
9
|
prompt = parameters[:prompt]
|
|
10
10
|
raise ArgumentError, "prompt parameter is required" unless prompt
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
model = parameters[:model] || "gemini-2.0-flash-exp-image-generation"
|
|
12
|
+
model = parameters[:model] || "gemini-2.5-flash-image-preview"
|
|
14
13
|
|
|
15
|
-
#
|
|
14
|
+
# Image editing mode if input images are provided (supports single/multiple images)
|
|
15
|
+
if has_input_images?(parameters)
|
|
16
|
+
return generate_with_images(prompt, model, parameters)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Image generation process based on model
|
|
16
20
|
if model.start_with?("imagen")
|
|
17
|
-
# Imagen 3
|
|
21
|
+
# Use Imagen 3
|
|
18
22
|
response = imagen_generate(prompt, parameters)
|
|
19
23
|
else
|
|
20
|
-
# Gemini 2.0
|
|
24
|
+
# Use Gemini 2.0
|
|
21
25
|
response = gemini_generate(prompt, parameters)
|
|
22
26
|
end
|
|
23
27
|
|
|
24
|
-
#
|
|
28
|
+
# Wrap and return response
|
|
25
29
|
Gemini::Response.new(response)
|
|
26
30
|
end
|
|
27
31
|
|
|
28
32
|
private
|
|
33
|
+
|
|
34
|
+
# Check if input images exist (supports single/multiple images)
|
|
35
|
+
def has_input_images?(parameters)
|
|
36
|
+
# Single image parameters
|
|
37
|
+
single_image = parameters[:image] || parameters[:image_path] || parameters[:image_base64]
|
|
38
|
+
# Multiple image parameters
|
|
39
|
+
multiple_images = parameters[:images] || parameters[:image_paths] || parameters[:image_base64s]
|
|
40
|
+
|
|
41
|
+
single_image || multiple_images
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Image generation with image+text (supports single/multiple images)
|
|
45
|
+
def generate_with_images(prompt, model, parameters)
|
|
46
|
+
# Process image data (supports single/multiple images)
|
|
47
|
+
image_parts = process_input_images(parameters)
|
|
48
|
+
|
|
49
|
+
# Build content parts (place text first, then images)
|
|
50
|
+
parts = [{ "text" => prompt }] + image_parts
|
|
51
|
+
|
|
52
|
+
# Build generation config
|
|
53
|
+
generation_config = {
|
|
54
|
+
"responseModalities" => ["Image"]
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Add temperature setting if provided
|
|
58
|
+
if parameters[:temperature]
|
|
59
|
+
generation_config["temperature"] = parameters[:temperature]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Build request parameters
|
|
63
|
+
request_params = {
|
|
64
|
+
"contents" => [{
|
|
65
|
+
"parts" => parts
|
|
66
|
+
}],
|
|
67
|
+
"generationConfig" => generation_config
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Merge other parameters (specify keys to exclude)
|
|
71
|
+
excluded_keys = [:prompt, :image, :image_path, :image_base64, :images, :image_paths, :image_base64s, :model, :temperature]
|
|
72
|
+
parameters.each do |key, value|
|
|
73
|
+
next if excluded_keys.include?(key)
|
|
74
|
+
request_params[key.to_s] = value
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# API call
|
|
78
|
+
response = @client.json_post(
|
|
79
|
+
path: "models/#{model}:generateContent",
|
|
80
|
+
parameters: request_params
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
Gemini::Response.new(response)
|
|
84
|
+
end
|
|
29
85
|
|
|
30
|
-
#
|
|
86
|
+
# Image generation with image+text (kept for backward compatibility)
|
|
87
|
+
def generate_with_image(prompt, model, parameters)
|
|
88
|
+
generate_with_images(prompt, model, parameters)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Process input images (supports single/multiple images)
|
|
92
|
+
def process_input_images(parameters)
|
|
93
|
+
image_parts = []
|
|
94
|
+
|
|
95
|
+
# Process multiple images
|
|
96
|
+
if parameters[:images] || parameters[:image_paths] || parameters[:image_base64s]
|
|
97
|
+
# Multiple file objects
|
|
98
|
+
if parameters[:images]
|
|
99
|
+
parameters[:images].each_with_index do |image, index|
|
|
100
|
+
if image.respond_to?(:read)
|
|
101
|
+
image_data = process_image_io(image)
|
|
102
|
+
image_parts << create_image_part(image_data)
|
|
103
|
+
else
|
|
104
|
+
raise ArgumentError, "Invalid image at index #{index}. Expected file object."
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Multiple file paths
|
|
110
|
+
if parameters[:image_paths]
|
|
111
|
+
parameters[:image_paths].each_with_index do |path, index|
|
|
112
|
+
image_data = process_image_file(path)
|
|
113
|
+
image_parts << create_image_part(image_data)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Multiple Base64 data
|
|
118
|
+
if parameters[:image_base64s]
|
|
119
|
+
mime_types = parameters[:mime_types] || Array.new(parameters[:image_base64s].size, "image/jpeg")
|
|
120
|
+
parameters[:image_base64s].each_with_index do |base64_data, index|
|
|
121
|
+
image_data = {
|
|
122
|
+
data: base64_data,
|
|
123
|
+
mime_type: mime_types[index] || "image/jpeg"
|
|
124
|
+
}
|
|
125
|
+
image_parts << create_image_part(image_data)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
else
|
|
129
|
+
# Process single image (for backward compatibility)
|
|
130
|
+
image_data = process_single_input_image(parameters)
|
|
131
|
+
image_parts << create_image_part(image_data)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
image_parts
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Process single input image (for backward compatibility)
|
|
138
|
+
def process_single_input_image(parameters)
|
|
139
|
+
if parameters[:image_base64]
|
|
140
|
+
# When Base64 data is provided directly
|
|
141
|
+
{
|
|
142
|
+
data: parameters[:image_base64],
|
|
143
|
+
mime_type: parameters[:mime_type] || "image/jpeg"
|
|
144
|
+
}
|
|
145
|
+
elsif parameters[:image_path]
|
|
146
|
+
# When file path is provided
|
|
147
|
+
process_image_file(parameters[:image_path])
|
|
148
|
+
elsif parameters[:image]
|
|
149
|
+
# When file object is provided
|
|
150
|
+
if parameters[:image].respond_to?(:read)
|
|
151
|
+
process_image_io(parameters[:image])
|
|
152
|
+
else
|
|
153
|
+
raise ArgumentError, "Invalid image parameter. Expected file path, file object, or base64 data."
|
|
154
|
+
end
|
|
155
|
+
else
|
|
156
|
+
raise ArgumentError, "No image data provided"
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Create API part from image data
|
|
161
|
+
def create_image_part(image_data)
|
|
162
|
+
{
|
|
163
|
+
"inline_data" => {
|
|
164
|
+
"mime_type" => image_data[:mime_type],
|
|
165
|
+
"data" => image_data[:data]
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Process input image (old method - kept for backward compatibility)
|
|
171
|
+
def process_input_image(parameters)
|
|
172
|
+
process_single_input_image(parameters)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Process image from file path (newly added)
|
|
176
|
+
def process_image_file(file_path)
|
|
177
|
+
raise ArgumentError, "File does not exist: #{file_path}" unless File.exist?(file_path)
|
|
178
|
+
|
|
179
|
+
require 'base64'
|
|
180
|
+
|
|
181
|
+
# Determine MIME type
|
|
182
|
+
mime_type = determine_image_mime_type(file_path)
|
|
183
|
+
|
|
184
|
+
# Read file and encode as Base64
|
|
185
|
+
file_data = File.binread(file_path)
|
|
186
|
+
base64_data = Base64.strict_encode64(file_data)
|
|
187
|
+
|
|
188
|
+
{
|
|
189
|
+
data: base64_data,
|
|
190
|
+
mime_type: mime_type
|
|
191
|
+
}
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Process image from IO object (newly added)
|
|
195
|
+
def process_image_io(image_io)
|
|
196
|
+
require 'base64'
|
|
197
|
+
|
|
198
|
+
# Move to beginning of file
|
|
199
|
+
image_io.rewind if image_io.respond_to?(:rewind)
|
|
200
|
+
|
|
201
|
+
# Read data
|
|
202
|
+
file_data = image_io.read
|
|
203
|
+
|
|
204
|
+
# Determine MIME type (use file path if available, otherwise infer from content)
|
|
205
|
+
mime_type = if image_io.respond_to?(:path) && image_io.path
|
|
206
|
+
determine_image_mime_type(image_io.path)
|
|
207
|
+
else
|
|
208
|
+
determine_mime_type_from_content(file_data)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Base64 encode
|
|
212
|
+
base64_data = Base64.strict_encode64(file_data)
|
|
213
|
+
|
|
214
|
+
{
|
|
215
|
+
data: base64_data,
|
|
216
|
+
mime_type: mime_type
|
|
217
|
+
}
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# Determine image MIME type from file path (newly added)
|
|
221
|
+
def determine_image_mime_type(file_path)
|
|
222
|
+
ext = File.extname(file_path).downcase
|
|
223
|
+
case ext
|
|
224
|
+
when ".jpg", ".jpeg"
|
|
225
|
+
"image/jpeg"
|
|
226
|
+
when ".png"
|
|
227
|
+
"image/png"
|
|
228
|
+
when ".gif"
|
|
229
|
+
"image/gif"
|
|
230
|
+
when ".webp"
|
|
231
|
+
"image/webp"
|
|
232
|
+
when ".bmp"
|
|
233
|
+
"image/bmp"
|
|
234
|
+
when ".tiff", ".tif"
|
|
235
|
+
"image/tiff"
|
|
236
|
+
else
|
|
237
|
+
# Default to JPEG
|
|
238
|
+
"image/jpeg"
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Determine MIME type from file content (newly added)
|
|
243
|
+
def determine_mime_type_from_content(data)
|
|
244
|
+
return "image/jpeg" if data.nil? || data.empty?
|
|
245
|
+
|
|
246
|
+
# Check file header
|
|
247
|
+
header = data[0, 8].bytes
|
|
248
|
+
|
|
249
|
+
case
|
|
250
|
+
when header[0..1] == [0xFF, 0xD8]
|
|
251
|
+
"image/jpeg"
|
|
252
|
+
when header[0..7] == [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]
|
|
253
|
+
"image/png"
|
|
254
|
+
when header[0..2] == [0x47, 0x49, 0x46]
|
|
255
|
+
"image/gif"
|
|
256
|
+
when header[0..3] == [0x52, 0x49, 0x46, 0x46] && data[8..11].bytes == [0x57, 0x45, 0x42, 0x50]
|
|
257
|
+
"image/webp"
|
|
258
|
+
when header[0..1] == [0x42, 0x4D]
|
|
259
|
+
"image/bmp"
|
|
260
|
+
else
|
|
261
|
+
# Default to JPEG
|
|
262
|
+
"image/jpeg"
|
|
263
|
+
end
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Image generation using Gemini 2.5 model (original code unchanged)
|
|
31
267
|
def gemini_generate(prompt, parameters)
|
|
32
|
-
#
|
|
33
|
-
model = parameters[:model] || "gemini-2.
|
|
268
|
+
# Prepare parameters
|
|
269
|
+
model = parameters[:model] || "gemini-2.5-flash-image-preview"
|
|
34
270
|
|
|
35
|
-
#
|
|
271
|
+
# Process size parameter (currently not used in Gemini API)
|
|
36
272
|
# aspect_ratio = process_size_parameter(parameters[:size])
|
|
37
273
|
|
|
38
|
-
#
|
|
274
|
+
# Build generation config
|
|
39
275
|
generation_config = {
|
|
40
|
-
"responseModalities" => ["
|
|
276
|
+
"responseModalities" => ["Image"] # Image output only, even for text-only image generation
|
|
41
277
|
}
|
|
42
278
|
|
|
43
|
-
#
|
|
279
|
+
# Build request parameters
|
|
44
280
|
request_params = {
|
|
45
281
|
"contents" => [{
|
|
46
282
|
"parts" => [
|
|
@@ -50,29 +286,29 @@ module Gemini
|
|
|
50
286
|
"generationConfig" => generation_config
|
|
51
287
|
}
|
|
52
288
|
|
|
53
|
-
# API
|
|
289
|
+
# API call
|
|
54
290
|
@client.json_post(
|
|
55
291
|
path: "models/#{model}:generateContent",
|
|
56
292
|
parameters: request_params
|
|
57
293
|
)
|
|
58
294
|
end
|
|
59
295
|
|
|
60
|
-
# Imagen 3
|
|
296
|
+
# Image generation using Imagen 3 model (original code unchanged)
|
|
61
297
|
def imagen_generate(prompt, parameters)
|
|
62
|
-
#
|
|
298
|
+
# Get model name (default is Imagen 3 standard model)
|
|
63
299
|
model = parameters[:model] || "imagen-3.0-generate-002"
|
|
64
300
|
|
|
65
|
-
#
|
|
301
|
+
# Get aspect ratio from size parameter
|
|
66
302
|
aspect_ratio = process_size_parameter(parameters[:size])
|
|
67
303
|
|
|
68
|
-
#
|
|
304
|
+
# Set number of images to generate
|
|
69
305
|
sample_count = parameters[:n] || parameters[:sample_count] || 1
|
|
70
|
-
sample_count = [[sample_count.to_i, 1].max, 4].min # 1
|
|
306
|
+
sample_count = [[sample_count.to_i, 1].max, 4].min # Limit to range 1-4
|
|
71
307
|
|
|
72
|
-
#
|
|
308
|
+
# Set person generation setting
|
|
73
309
|
person_generation = parameters[:person_generation] || "ALLOW_ADULT"
|
|
74
310
|
|
|
75
|
-
#
|
|
311
|
+
# Build request parameters
|
|
76
312
|
request_params = {
|
|
77
313
|
"instances" => [
|
|
78
314
|
{
|
|
@@ -84,20 +320,20 @@ module Gemini
|
|
|
84
320
|
}
|
|
85
321
|
}
|
|
86
322
|
|
|
87
|
-
#
|
|
323
|
+
# Add aspect ratio if specified
|
|
88
324
|
request_params["parameters"]["aspectRatio"] = aspect_ratio if aspect_ratio
|
|
89
325
|
|
|
90
|
-
#
|
|
326
|
+
# Add person generation setting
|
|
91
327
|
request_params["parameters"]["personGeneration"] = person_generation
|
|
92
328
|
|
|
93
|
-
# API
|
|
329
|
+
# API call
|
|
94
330
|
@client.json_post(
|
|
95
331
|
path: "models/#{model}:predict",
|
|
96
332
|
parameters: request_params
|
|
97
333
|
)
|
|
98
334
|
end
|
|
99
335
|
|
|
100
|
-
#
|
|
336
|
+
# Determine aspect ratio from size parameter (original code unchanged)
|
|
101
337
|
def process_size_parameter(size)
|
|
102
338
|
return nil unless size
|
|
103
339
|
|
|
@@ -115,7 +351,7 @@ module Gemini
|
|
|
115
351
|
when "1:1", "3:4", "4:3", "9:16", "16:9"
|
|
116
352
|
size.to_s
|
|
117
353
|
else
|
|
118
|
-
"1:1" #
|
|
354
|
+
"1:1" # Default
|
|
119
355
|
end
|
|
120
356
|
end
|
|
121
357
|
end
|
data/lib/gemini/response.rb
CHANGED
|
@@ -99,6 +99,27 @@ module Gemini
|
|
|
99
99
|
def safety_blocked?
|
|
100
100
|
finish_reason == "SAFETY"
|
|
101
101
|
end
|
|
102
|
+
|
|
103
|
+
# Get grounding metadata (for Google Search grounding)
|
|
104
|
+
def grounding_metadata
|
|
105
|
+
first_candidate&.dig("groundingMetadata")
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Check if response has grounding metadata
|
|
109
|
+
def grounded?
|
|
110
|
+
!grounding_metadata.nil? && !grounding_metadata.empty?
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Get grounding chunks (source references)
|
|
114
|
+
def grounding_chunks
|
|
115
|
+
grounding_metadata&.dig("groundingChunks") || []
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Get search entry point URL (if available)
|
|
119
|
+
def search_entry_point
|
|
120
|
+
grounding_metadata&.dig("searchEntryPoint", "renderedContent")
|
|
121
|
+
end
|
|
122
|
+
|
|
102
123
|
|
|
103
124
|
# Get token usage information
|
|
104
125
|
def usage
|
data/lib/gemini/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby-gemini-api
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- rira100000000
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-07
|
|
11
|
+
date: 2025-11-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: faraday
|