ruby-gemini-api 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/README.md +251 -34
- data/lib/gemini/audio.rb +1 -1
- data/lib/gemini/cached_content.rb +1 -1
- data/lib/gemini/client.rb +55 -17
- data/lib/gemini/documents.rb +2 -2
- data/lib/gemini/files.rb +11 -1
- data/lib/gemini/response.rb +51 -0
- data/lib/gemini/threads.rb +1 -1
- data/lib/gemini/version.rb +1 -1
- data/lib/gemini/video.rb +341 -0
- data/lib/gemini.rb +1 -0
- metadata +4 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 80e6cd429265c5341e1efcd3e701f9643cdb4785f18826234ec879178d4a236e
|
|
4
|
+
data.tar.gz: c9a9c5201b616ce2d534393c1d99240563b9d36229dac4f564f71b4ed82ee42b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b1ac2fe8cf4dacad20f21a7eab1872fa496a797b76d9af12cb401c5f563aeaed9cd58e4323334fcd6bbf67d74273902b5f9f076a5a44833474b4f3a4e8409148
|
|
7
|
+
data.tar.gz: 17693cd9f1a87ad1fdf21c26153d1ac351ca9137c99e1c97cddd2524333db21190b0bbf5c08b995939c950338f0d5bf4607263a2ae0bc92c0343b28d439c2a6f
|
data/CHANGELOG.md
CHANGED
|
@@ -13,4 +13,15 @@
|
|
|
13
13
|
- Add support for multi-image input
|
|
14
14
|
|
|
15
15
|
## [0.1.4] - 2025-11-08
|
|
16
|
-
- Add support for grounding search
|
|
16
|
+
- Add support for grounding search
|
|
17
|
+
|
|
18
|
+
## [0.1.5] - 2025-11-13
|
|
19
|
+
- Add support for URL Context tool
|
|
20
|
+
- Add simplified method for accessing grounding search sources
|
|
21
|
+
|
|
22
|
+
## [0.1.6] - 2025-12-11
|
|
23
|
+
- Add support for video understanding
|
|
24
|
+
- Analyze local video files (Files API and inline data)
|
|
25
|
+
- Analyze YouTube videos
|
|
26
|
+
- Helper methods: describe, ask, extract_timestamps, analyze_segment
|
|
27
|
+
- Support for MP4, MPEG, MOV, AVI, FLV, WebM, WMV, 3GPP formats
|
data/README.md
CHANGED
|
@@ -11,6 +11,7 @@ This project is inspired by and pays homage to [ruby-openai](https://github.com/
|
|
|
11
11
|
- Chat functionality with conversation history
|
|
12
12
|
- Streaming responses for real-time text generation
|
|
13
13
|
- Audio transcription capabilities
|
|
14
|
+
- Video understanding (including YouTube videos)
|
|
14
15
|
- Thread and message management for chat applications
|
|
15
16
|
- Runs management for executing AI tasks
|
|
16
17
|
- Convenient Response object for easy access to generated content
|
|
@@ -43,7 +44,7 @@ user_prompt = "Tell me the current weather in Tokyo."
|
|
|
43
44
|
# Send request with the defined tools
|
|
44
45
|
response = client.generate_content(
|
|
45
46
|
user_prompt,
|
|
46
|
-
model: "gemini-
|
|
47
|
+
model: "gemini-2.5-flash", # Or any model that supports function calling
|
|
47
48
|
tools: tools
|
|
48
49
|
)
|
|
49
50
|
|
|
@@ -126,7 +127,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
126
127
|
# Generate text
|
|
127
128
|
response = client.generate_content(
|
|
128
129
|
"What are the main features of Ruby programming language?",
|
|
129
|
-
model: "gemini-2.
|
|
130
|
+
model: "gemini-2.5-flash"
|
|
130
131
|
)
|
|
131
132
|
|
|
132
133
|
# Access the generated content using Response object
|
|
@@ -147,7 +148,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
147
148
|
# Stream response in real-time
|
|
148
149
|
client.generate_content_stream(
|
|
149
150
|
"Tell me a story about a programmer who loves Ruby",
|
|
150
|
-
model: "gemini-2.
|
|
151
|
+
model: "gemini-2.5-flash"
|
|
151
152
|
) do |chunk|
|
|
152
153
|
print chunk
|
|
153
154
|
$stdout.flush
|
|
@@ -170,7 +171,7 @@ contents = [
|
|
|
170
171
|
|
|
171
172
|
# Get response with conversation history
|
|
172
173
|
response = client.chat(parameters: {
|
|
173
|
-
model: "gemini-2.
|
|
174
|
+
model: "gemini-2.5-flash",
|
|
174
175
|
contents: contents
|
|
175
176
|
})
|
|
176
177
|
|
|
@@ -198,7 +199,7 @@ system_instruction = "You are a Ruby programming expert who provides concise cod
|
|
|
198
199
|
|
|
199
200
|
# Use system instructions with chat
|
|
200
201
|
response = client.chat(parameters: {
|
|
201
|
-
model: "gemini-2.
|
|
202
|
+
model: "gemini-2.5-flash",
|
|
202
203
|
system_instruction: { parts: [{ text: system_instruction }] },
|
|
203
204
|
contents: [{ role: "user", parts: [{ text: "How do I write a simple web server in Ruby?" }] }]
|
|
204
205
|
})
|
|
@@ -225,7 +226,7 @@ response = client.generate_content(
|
|
|
225
226
|
{ type: "text", text: "Describe what you see in this image" },
|
|
226
227
|
{ type: "image_file", image_file: { file_path: "path/to/image.jpg" } }
|
|
227
228
|
],
|
|
228
|
-
model: "gemini-2.
|
|
229
|
+
model: "gemini-2.5-flash"
|
|
229
230
|
)
|
|
230
231
|
|
|
231
232
|
# Access the description using Response object
|
|
@@ -256,7 +257,7 @@ response = client.generate_content(
|
|
|
256
257
|
{ text: "Describe this image in detail" },
|
|
257
258
|
{ file_data: { mime_type: "image/jpeg", file_uri: file_uri } }
|
|
258
259
|
],
|
|
259
|
-
model: "gemini-2.
|
|
260
|
+
model: "gemini-2.5-flash"
|
|
260
261
|
)
|
|
261
262
|
|
|
262
263
|
# Process the response using Response object
|
|
@@ -286,21 +287,19 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
286
287
|
# Use Google Search to get real-time information
|
|
287
288
|
response = client.generate_content(
|
|
288
289
|
"Who won the euro 2024?",
|
|
289
|
-
model: "gemini-2.
|
|
290
|
+
model: "gemini-2.5-flash",
|
|
290
291
|
tools: [{ google_search: {} }]
|
|
291
292
|
)
|
|
292
293
|
|
|
293
294
|
if response.success?
|
|
294
295
|
puts response.text
|
|
295
|
-
|
|
296
|
+
|
|
296
297
|
# Check grounding information
|
|
297
298
|
if response.grounded?
|
|
298
299
|
puts "\nSource references:"
|
|
299
|
-
response.
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
puts " #{chunk['web']['uri']}"
|
|
303
|
-
end
|
|
300
|
+
response.grounding_sources.each do |source|
|
|
301
|
+
puts "- #{source[:title]}"
|
|
302
|
+
puts " #{source[:url]}"
|
|
304
303
|
end
|
|
305
304
|
end
|
|
306
305
|
end
|
|
@@ -311,13 +310,15 @@ end
|
|
|
311
310
|
```ruby
|
|
312
311
|
# Check if response is grounded
|
|
313
312
|
if response.grounded?
|
|
314
|
-
# Get
|
|
313
|
+
# Get formatted source information (recommended)
|
|
314
|
+
response.grounding_sources.each do |source|
|
|
315
|
+
puts "Title: #{source[:title]}"
|
|
316
|
+
puts "URL: #{source[:url]}"
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# You can also access raw metadata
|
|
315
320
|
metadata = response.grounding_metadata
|
|
316
|
-
|
|
317
|
-
# Get source chunks (references)
|
|
318
321
|
chunks = response.grounding_chunks
|
|
319
|
-
|
|
320
|
-
# Get search entry point
|
|
321
322
|
entry_point = response.search_entry_point
|
|
322
323
|
end
|
|
323
324
|
```
|
|
@@ -327,7 +328,7 @@ end
|
|
|
327
328
|
```ruby
|
|
328
329
|
response = client.generate_content(
|
|
329
330
|
"What are the latest AI developments in 2024?",
|
|
330
|
-
model: "gemini-2.
|
|
331
|
+
model: "gemini-2.5-flash",
|
|
331
332
|
tools: [{ google_search: {} }]
|
|
332
333
|
)
|
|
333
334
|
|
|
@@ -345,6 +346,86 @@ You can find a grounding search demo in:
|
|
|
345
346
|
ruby demo/grounding_search_demo_ja.rb
|
|
346
347
|
```
|
|
347
348
|
|
|
349
|
+
### URL Context
|
|
350
|
+
|
|
351
|
+
You can use Gemini API's URL Context feature to retrieve and analyze content from web pages.
|
|
352
|
+
|
|
353
|
+
#### Basic Usage
|
|
354
|
+
|
|
355
|
+
```ruby
|
|
356
|
+
require 'gemini'
|
|
357
|
+
|
|
358
|
+
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
359
|
+
|
|
360
|
+
# Use URL Context to analyze web pages (shortcut)
|
|
361
|
+
response = client.generate_content(
|
|
362
|
+
"Summarize the content of this page: https://www.ruby-lang.org",
|
|
363
|
+
model: "gemini-2.5-flash",
|
|
364
|
+
url_context: true
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
if response.success?
|
|
368
|
+
puts response.text
|
|
369
|
+
end
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
#### Using Explicit Tools Parameter
|
|
373
|
+
|
|
374
|
+
```ruby
|
|
375
|
+
# Explicit tools parameter
|
|
376
|
+
response = client.generate_content(
|
|
377
|
+
"Compare these two pages: https://www.ruby-lang.org and https://www.python.org",
|
|
378
|
+
model: "gemini-2.5-flash",
|
|
379
|
+
tools: [{ url_context: {} }]
|
|
380
|
+
)
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
#### Combining URL Context with Google Search
|
|
384
|
+
|
|
385
|
+
```ruby
|
|
386
|
+
# Use both URL Context and Google Search
|
|
387
|
+
response = client.generate_content(
|
|
388
|
+
"What is the latest information about Ruby from https://www.ruby-lang.org and recent news?",
|
|
389
|
+
model: "gemini-2.5-flash",
|
|
390
|
+
url_context: true,
|
|
391
|
+
google_search: true
|
|
392
|
+
)
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
#### Checking URL Context Metadata
|
|
396
|
+
|
|
397
|
+
```ruby
|
|
398
|
+
# Check if URL Context was used
|
|
399
|
+
if response.url_context?
|
|
400
|
+
# Get full metadata
|
|
401
|
+
metadata = response.url_context_metadata
|
|
402
|
+
|
|
403
|
+
# Get retrieved URL information
|
|
404
|
+
urls = response.retrieved_urls
|
|
405
|
+
|
|
406
|
+
# Check retrieval status for each URL
|
|
407
|
+
response.url_retrieval_statuses.each do |url_info|
|
|
408
|
+
puts "URL: #{url_info[:url]}"
|
|
409
|
+
puts "Status: #{url_info[:status]}"
|
|
410
|
+
puts "Title: #{url_info[:title]}" if url_info[:title]
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
#### Limitations
|
|
416
|
+
|
|
417
|
+
- Maximum 20 URLs per request
|
|
418
|
+
- Maximum 34MB content size per URL
|
|
419
|
+
- YouTube videos and paywalled content are not supported
|
|
420
|
+
|
|
421
|
+
#### Demo Application
|
|
422
|
+
|
|
423
|
+
You can find a URL context demo in:
|
|
424
|
+
|
|
425
|
+
```bash
|
|
426
|
+
ruby demo/url_context_demo.rb https://www.ruby-lang.org
|
|
427
|
+
```
|
|
428
|
+
|
|
348
429
|
### Image Generation
|
|
349
430
|
|
|
350
431
|
```ruby
|
|
@@ -470,7 +551,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
470
551
|
# Transcribe audio file (note: file size limit is 20MB for direct upload)
|
|
471
552
|
response = client.audio.transcribe(
|
|
472
553
|
parameters: {
|
|
473
|
-
model: "gemini-
|
|
554
|
+
model: "gemini-2.5-flash",
|
|
474
555
|
file: File.open("audio_file.mp3", "rb"),
|
|
475
556
|
language: "en",
|
|
476
557
|
content_text: "Transcribe this audio clip"
|
|
@@ -502,7 +583,7 @@ file_name = upload_result["file"]["name"]
|
|
|
502
583
|
# Use the file ID for transcription
|
|
503
584
|
response = client.audio.transcribe(
|
|
504
585
|
parameters: {
|
|
505
|
-
model: "gemini-
|
|
586
|
+
model: "gemini-2.5-flash",
|
|
506
587
|
file_uri: file_uri,
|
|
507
588
|
language: "en"
|
|
508
589
|
}
|
|
@@ -521,6 +602,137 @@ client.files.delete(name: file_name)
|
|
|
521
602
|
|
|
522
603
|
For more examples, check out the `demo/file_audio_demo.rb` file included with the gem.
|
|
523
604
|
|
|
605
|
+
### Video Understanding
|
|
606
|
+
|
|
607
|
+
Gemini API can understand video content, enabling description, segmentation, information extraction, and question answering. It can process videos up to 2 hours long.
|
|
608
|
+
|
|
609
|
+
#### Basic Usage (Upload via Files API)
|
|
610
|
+
|
|
611
|
+
For video files larger than 20MB or files you want to reuse multiple times, uploading via Files API is recommended:
|
|
612
|
+
|
|
613
|
+
```ruby
|
|
614
|
+
require 'gemini'
|
|
615
|
+
|
|
616
|
+
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
617
|
+
|
|
618
|
+
# Upload and analyze a video file
|
|
619
|
+
result = client.video.analyze(
|
|
620
|
+
file_path: "path/to/video.mp4",
|
|
621
|
+
prompt: "Describe this video in detail"
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
response = result[:response]
|
|
625
|
+
|
|
626
|
+
if response.success?
|
|
627
|
+
puts response.text
|
|
628
|
+
else
|
|
629
|
+
puts "Video analysis failed: #{response.error}"
|
|
630
|
+
end
|
|
631
|
+
|
|
632
|
+
# File information (optional)
|
|
633
|
+
puts "File URI: #{result[:file_uri]}"
|
|
634
|
+
puts "File name: #{result[:file_name]}"
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
#### Analyze as Inline Data (Videos under 20MB)
|
|
638
|
+
|
|
639
|
+
Small video files can be Base64-encoded and sent inline:
|
|
640
|
+
|
|
641
|
+
```ruby
|
|
642
|
+
# Analyze a video under 20MB inline
|
|
643
|
+
response = client.video.analyze_inline(
|
|
644
|
+
file_path: "path/to/small_video.mp4",
|
|
645
|
+
prompt: "What is happening in this video?"
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if response.success?
|
|
649
|
+
puts response.text
|
|
650
|
+
end
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
#### YouTube Video Analysis
|
|
654
|
+
|
|
655
|
+
You can directly analyze public YouTube videos (private and unlisted videos are not supported):
|
|
656
|
+
|
|
657
|
+
```ruby
|
|
658
|
+
# Analyze a video using YouTube URL
|
|
659
|
+
response = client.video.analyze_youtube(
|
|
660
|
+
url: "https://www.youtube.com/watch?v=XXXXX",
|
|
661
|
+
prompt: "What are the three main points of this video?"
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
if response.success?
|
|
665
|
+
puts response.text
|
|
666
|
+
end
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
#### Helper Methods
|
|
670
|
+
|
|
671
|
+
Helper methods are provided for common operations:
|
|
672
|
+
|
|
673
|
+
```ruby
|
|
674
|
+
# Get video description
|
|
675
|
+
response = client.video.describe(file_path: "video.mp4")
|
|
676
|
+
puts response.text
|
|
677
|
+
|
|
678
|
+
# Get YouTube video description
|
|
679
|
+
response = client.video.describe(youtube_url: "https://youtube.com/...")
|
|
680
|
+
puts response.text
|
|
681
|
+
|
|
682
|
+
# Ask questions about a video
|
|
683
|
+
response = client.video.ask(
|
|
684
|
+
file_uri: result[:file_uri],
|
|
685
|
+
question: "Who appears in this video?"
|
|
686
|
+
)
|
|
687
|
+
puts response.text
|
|
688
|
+
|
|
689
|
+
# Extract timestamps
|
|
690
|
+
response = client.video.extract_timestamps(
|
|
691
|
+
file_uri: result[:file_uri],
|
|
692
|
+
query: "important scenes"
|
|
693
|
+
)
|
|
694
|
+
puts response.text
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
#### Video Segment Analysis
|
|
698
|
+
|
|
699
|
+
You can analyze only a portion of a video:
|
|
700
|
+
|
|
701
|
+
```ruby
|
|
702
|
+
# Analyze a specific segment of the video
|
|
703
|
+
response = client.video.analyze_segment(
|
|
704
|
+
file_uri: result[:file_uri],
|
|
705
|
+
prompt: "What is happening in this scene?",
|
|
706
|
+
start_offset: "30s",
|
|
707
|
+
end_offset: "60s"
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
if response.success?
|
|
711
|
+
puts response.text
|
|
712
|
+
end
|
|
713
|
+
```
|
|
714
|
+
|
|
715
|
+
#### Supported Video Formats
|
|
716
|
+
|
|
717
|
+
- MP4 - video/mp4
|
|
718
|
+
- MPEG - video/mpeg
|
|
719
|
+
- MOV - video/quicktime
|
|
720
|
+
- AVI - video/x-msvideo
|
|
721
|
+
- FLV - video/x-flv
|
|
722
|
+
- MPG - video/mpeg
|
|
723
|
+
- WebM - video/webm
|
|
724
|
+
- WMV - video/x-ms-wmv
|
|
725
|
+
- 3GPP - video/3gpp
|
|
726
|
+
|
|
727
|
+
#### Limitations
|
|
728
|
+
|
|
729
|
+
- 2 million context window: up to approximately 2 hours
|
|
730
|
+
- 1 million context window: up to approximately 1 hour
|
|
731
|
+
- YouTube free plan: cannot process more than 8 hours of video per day
|
|
732
|
+
- Approximately 300 tokens consumed per second of video (at default resolution)
|
|
733
|
+
|
|
734
|
+
Demo application can be found in `demo/video_demo.rb`.
|
|
735
|
+
|
|
524
736
|
### Document Processing
|
|
525
737
|
|
|
526
738
|
Gemini API can process long documents (up to 3,600 pages), including PDFs. Gemini models understand both text and images within the document, enabling you to analyze, summarize, and extract information.
|
|
@@ -534,7 +746,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
534
746
|
result = client.documents.process(
|
|
535
747
|
file_path: "path/to/document.pdf",
|
|
536
748
|
prompt: "Summarize this document in three key points",
|
|
537
|
-
model: "gemini-
|
|
749
|
+
model: "gemini-2.5-flash"
|
|
538
750
|
)
|
|
539
751
|
|
|
540
752
|
response = result[:response]
|
|
@@ -563,7 +775,7 @@ file_path = "path/to/document.pdf"
|
|
|
563
775
|
thread_result = client.chat_with_file(
|
|
564
776
|
file_path,
|
|
565
777
|
"Please provide an overview of this document",
|
|
566
|
-
model: "gemini-
|
|
778
|
+
model: "gemini-2.5-flash"
|
|
567
779
|
)
|
|
568
780
|
|
|
569
781
|
# Get the thread ID (for continuing the conversation)
|
|
@@ -610,7 +822,7 @@ Demo applications can be found in `demo/document_chat_demo.rb` and `demo/documen
|
|
|
610
822
|
|
|
611
823
|
Context caching allows you to preprocess and store inputs like large documents or images with the Gemini API, then reuse them across multiple requests. This saves processing time and token usage when asking different questions about the same content.
|
|
612
824
|
|
|
613
|
-
**Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live).
|
|
825
|
+
**Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live). Using stable model versions like gemini-2.5-flash is recommended.
|
|
614
826
|
|
|
615
827
|
```ruby
|
|
616
828
|
require 'gemini'
|
|
@@ -622,7 +834,7 @@ cache_result = client.documents.cache(
|
|
|
622
834
|
file_path: "path/to/large_document.pdf",
|
|
623
835
|
system_instruction: "You are a document analysis expert. Please understand the content thoroughly and answer questions accurately.",
|
|
624
836
|
ttl: "86400s", # 24 hours (in seconds)
|
|
625
|
-
model: "gemini-
|
|
837
|
+
model: "gemini-2.5-flash"
|
|
626
838
|
)
|
|
627
839
|
|
|
628
840
|
# Get the cache name
|
|
@@ -633,7 +845,7 @@ puts "Cache name: #{cache_name}"
|
|
|
633
845
|
response = client.generate_content_with_cache(
|
|
634
846
|
"What are the key findings in this document?",
|
|
635
847
|
cached_content: cache_name,
|
|
636
|
-
model: "gemini-
|
|
848
|
+
model: "gemini-2.5-flash"
|
|
637
849
|
)
|
|
638
850
|
|
|
639
851
|
if response.success?
|
|
@@ -785,7 +997,7 @@ require 'gemini'
|
|
|
785
997
|
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
786
998
|
|
|
787
999
|
# Create a new thread
|
|
788
|
-
thread = client.threads.create(parameters: { model: "gemini-2.
|
|
1000
|
+
thread = client.threads.create(parameters: { model: "gemini-2.5-flash" })
|
|
789
1001
|
thread_id = thread["id"]
|
|
790
1002
|
|
|
791
1003
|
# Add a message to the thread
|
|
@@ -821,7 +1033,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
821
1033
|
|
|
822
1034
|
response = client.generate_content(
|
|
823
1035
|
"Tell me about the Ruby programming language",
|
|
824
|
-
model: "gemini-2.
|
|
1036
|
+
model: "gemini-2.5-flash"
|
|
825
1037
|
)
|
|
826
1038
|
|
|
827
1039
|
# Basic response information
|
|
@@ -897,8 +1109,9 @@ The gem includes several demo applications that showcase its functionality:
|
|
|
897
1109
|
- `demo/demo.rb` - Basic text generation and chat
|
|
898
1110
|
- `demo/stream_demo.rb` - Streaming text generation
|
|
899
1111
|
- `demo/audio_demo.rb` - Audio transcription
|
|
1112
|
+
- `demo/video_demo.rb` - Video understanding (local files and YouTube)
|
|
900
1113
|
- `demo/vision_demo.rb` - Image recognition
|
|
901
|
-
- `demo/image_generation_demo.rb` - Image generation
|
|
1114
|
+
- `demo/image_generation_demo.rb` - Image generation
|
|
902
1115
|
- `demo/file_vision_demo.rb` - Image recognition with large image files
|
|
903
1116
|
- `demo/file_audio_demo.rb` - Audio transcription with large audio files
|
|
904
1117
|
- `demo/structured_output_demo.rb` - Structured JSON output with schema
|
|
@@ -925,6 +1138,12 @@ ruby demo/audio_demo.rb path/to/audio/file.mp3
|
|
|
925
1138
|
# Audio transcription with over 20MB audio file
|
|
926
1139
|
ruby demo/file_audio_demo.rb path/to/audio/file.mp3
|
|
927
1140
|
|
|
1141
|
+
# Video understanding (local file)
|
|
1142
|
+
ruby demo/video_demo.rb path/to/video/file.mp4
|
|
1143
|
+
|
|
1144
|
+
# Video understanding (YouTube)
|
|
1145
|
+
ruby demo/video_demo.rb --youtube https://www.youtube.com/watch?v=XXXXX
|
|
1146
|
+
|
|
928
1147
|
# Image recognition
|
|
929
1148
|
ruby demo/vision_demo.rb path/to/image/file.jpg
|
|
930
1149
|
|
|
@@ -954,10 +1173,8 @@ ruby demo/document_cache_demo.rb path/to/document.pdf
|
|
|
954
1173
|
|
|
955
1174
|
The library supports various Gemini models:
|
|
956
1175
|
|
|
957
|
-
- `gemini-2.
|
|
958
|
-
- `gemini-2.
|
|
959
|
-
- `gemini-2.0-pro`
|
|
960
|
-
- `gemini-1.5-flash`
|
|
1176
|
+
- `gemini-2.5-flash`
|
|
1177
|
+
- `gemini-2.5-pro`
|
|
961
1178
|
|
|
962
1179
|
## Requirements
|
|
963
1180
|
|
data/lib/gemini/audio.rb
CHANGED
|
@@ -8,7 +8,7 @@ module Gemini
|
|
|
8
8
|
def transcribe(parameters: {})
|
|
9
9
|
file = parameters.delete(:file)
|
|
10
10
|
file_uri = parameters.delete(:file_uri)
|
|
11
|
-
model = parameters.delete(:model) || "gemini-
|
|
11
|
+
model = parameters.delete(:model) || "gemini-2.5-flash"
|
|
12
12
|
language = parameters.delete(:language)
|
|
13
13
|
content_text = parameters.delete(:content_text) || "Transcribe this audio clip"
|
|
14
14
|
|
|
@@ -24,7 +24,7 @@ module Gemini
|
|
|
24
24
|
mime_type ||= file_path ? @client.determine_mime_type(file_path) : "application/octet-stream"
|
|
25
25
|
|
|
26
26
|
# モデルを取得(models/プレフィックスを追加)
|
|
27
|
-
model_name = model || parameters[:model] || "gemini-
|
|
27
|
+
model_name = model || parameters[:model] || "gemini-2.5-flash"
|
|
28
28
|
model_name = "models/#{model_name}" unless model_name.start_with?("models/")
|
|
29
29
|
|
|
30
30
|
# キャッシュリクエストを構築(キャメルケースに注意)
|
data/lib/gemini/client.rb
CHANGED
|
@@ -59,6 +59,11 @@ module Gemini
|
|
|
59
59
|
@documents ||= Gemini::Documents.new(client: self)
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
# 動画処理アクセサ
|
|
63
|
+
def video
|
|
64
|
+
@video ||= Gemini::Video.new(client: self)
|
|
65
|
+
end
|
|
66
|
+
|
|
62
67
|
# キャッシュ管理アクセサ
|
|
63
68
|
def cached_content
|
|
64
69
|
@cached_content ||= Gemini::CachedContent.new(client: self)
|
|
@@ -77,7 +82,7 @@ module Gemini
|
|
|
77
82
|
# OpenAI chat-like text generation method for Gemini API
|
|
78
83
|
# Extended to support streaming callbacks
|
|
79
84
|
def chat(parameters: {}, &stream_callback)
|
|
80
|
-
model = parameters.delete(:model) || "gemini-2.
|
|
85
|
+
model = parameters.delete(:model) || "gemini-2.5-flash"
|
|
81
86
|
|
|
82
87
|
# If streaming callback is provided
|
|
83
88
|
if block_given?
|
|
@@ -117,8 +122,9 @@ module Gemini
|
|
|
117
122
|
# Helper methods for convenience
|
|
118
123
|
|
|
119
124
|
# Method with usage similar to OpenAI's chat
|
|
120
|
-
def generate_content(prompt, model: "gemini-2.
|
|
121
|
-
response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
|
|
125
|
+
def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
|
|
126
|
+
response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
|
|
127
|
+
url_context: false, google_search: false, **parameters, &stream_callback)
|
|
122
128
|
content = format_content(prompt)
|
|
123
129
|
params = {
|
|
124
130
|
contents: [content],
|
|
@@ -137,7 +143,11 @@ module Gemini
|
|
|
137
143
|
if response_schema
|
|
138
144
|
params[:generation_config]["response_schema"] = response_schema
|
|
139
145
|
end
|
|
140
|
-
|
|
146
|
+
|
|
147
|
+
# Handle tool shortcuts
|
|
148
|
+
tools = build_tools_array(tools, url_context: url_context, google_search: google_search)
|
|
149
|
+
params[:tools] = tools if tools && !tools.empty?
|
|
150
|
+
|
|
141
151
|
params.merge!(parameters)
|
|
142
152
|
|
|
143
153
|
if block_given?
|
|
@@ -148,38 +158,44 @@ module Gemini
|
|
|
148
158
|
end
|
|
149
159
|
|
|
150
160
|
# Streaming text generation
|
|
151
|
-
def generate_content_stream(prompt, model: "gemini-2.
|
|
152
|
-
response_mime_type: nil, response_schema: nil, temperature: 0.5,
|
|
161
|
+
def generate_content_stream(prompt, model: "gemini-2.5-flash", system_instruction: nil,
|
|
162
|
+
response_mime_type: nil, response_schema: nil, temperature: 0.5,
|
|
163
|
+
url_context: false, google_search: false, **parameters, &block)
|
|
153
164
|
raise ArgumentError, "Block is required for streaming" unless block_given?
|
|
154
|
-
|
|
165
|
+
|
|
155
166
|
content = format_content(prompt)
|
|
156
167
|
params = {
|
|
157
168
|
contents: [content],
|
|
158
169
|
model: model
|
|
159
170
|
}
|
|
160
|
-
|
|
171
|
+
|
|
161
172
|
if system_instruction
|
|
162
173
|
params[:system_instruction] = format_content(system_instruction)
|
|
163
174
|
end
|
|
164
|
-
|
|
175
|
+
|
|
165
176
|
params[:generation_config] ||= {}
|
|
166
|
-
|
|
177
|
+
|
|
167
178
|
if response_mime_type
|
|
168
179
|
params[:generation_config][:response_mime_type] = response_mime_type
|
|
169
180
|
end
|
|
170
|
-
|
|
181
|
+
|
|
171
182
|
if response_schema
|
|
172
183
|
params[:generation_config][:response_schema] = response_schema
|
|
173
184
|
end
|
|
174
185
|
params[:generation_config]["temperature"] = temperature
|
|
186
|
+
|
|
187
|
+
# Handle tool shortcuts
|
|
188
|
+
tools = build_tools_array(nil, url_context: url_context, google_search: google_search)
|
|
189
|
+
params[:tools] = tools if tools && !tools.empty?
|
|
190
|
+
|
|
175
191
|
# Merge other parameters
|
|
176
192
|
params.merge!(parameters)
|
|
177
|
-
|
|
193
|
+
|
|
178
194
|
chat(parameters: params, &block)
|
|
179
195
|
end
|
|
180
196
|
|
|
181
197
|
# ファイルを使った会話(複数ファイル対応)
|
|
182
|
-
def chat_with_multimodal(file_paths, prompt, model: "gemini-
|
|
198
|
+
def chat_with_multimodal(file_paths, prompt, model: "gemini-2.5-flash", **parameters)
|
|
183
199
|
# スレッドを作成
|
|
184
200
|
thread = threads.create(parameters: { model: model })
|
|
185
201
|
thread_id = thread["id"]
|
|
@@ -247,7 +263,7 @@ module Gemini
|
|
|
247
263
|
end
|
|
248
264
|
end
|
|
249
265
|
|
|
250
|
-
def generate_content_with_cache(prompt, cached_content:, model: "gemini-
|
|
266
|
+
def generate_content_with_cache(prompt, cached_content:, model: "gemini-2.5-flash", **parameters)
|
|
251
267
|
# モデル名にmodels/プレフィックスを追加
|
|
252
268
|
model_name = model.start_with?("models/") ? model : "models/#{model}"
|
|
253
269
|
|
|
@@ -278,12 +294,12 @@ module Gemini
|
|
|
278
294
|
end
|
|
279
295
|
|
|
280
296
|
# 単一ファイルのヘルパー
|
|
281
|
-
def chat_with_file(file_path, prompt, model: "gemini-
|
|
297
|
+
def chat_with_file(file_path, prompt, model: "gemini-2.5-flash", **parameters)
|
|
282
298
|
chat_with_multimodal([file_path], prompt, model: model, **parameters)
|
|
283
299
|
end
|
|
284
300
|
|
|
285
301
|
# ファイルをアップロードして質問するシンプルなヘルパー
|
|
286
|
-
def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-
|
|
302
|
+
def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-2.5-flash", **parameters)
|
|
287
303
|
# MIMEタイプを自動判定
|
|
288
304
|
mime_type = content_type || determine_mime_type(file_path)
|
|
289
305
|
|
|
@@ -399,7 +415,29 @@ module Gemini
|
|
|
399
415
|
end
|
|
400
416
|
|
|
401
417
|
private
|
|
402
|
-
|
|
418
|
+
|
|
419
|
+
# Build tools array from explicit tools parameter and shortcuts
|
|
420
|
+
def build_tools_array(tools, url_context: false, google_search: false)
|
|
421
|
+
result_tools = []
|
|
422
|
+
|
|
423
|
+
# Add existing tools if provided
|
|
424
|
+
if tools.is_a?(Array)
|
|
425
|
+
result_tools.concat(tools)
|
|
426
|
+
elsif tools
|
|
427
|
+
result_tools << tools
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Add url_context tool if requested
|
|
431
|
+
result_tools << { url_context: {} } if url_context
|
|
432
|
+
|
|
433
|
+
# Add google_search tool if requested
|
|
434
|
+
result_tools << { google_search: {} } if google_search
|
|
435
|
+
|
|
436
|
+
# Remove duplicates based on tool keys and return
|
|
437
|
+
return nil if result_tools.empty?
|
|
438
|
+
result_tools.uniq { |tool| tool.keys.first }
|
|
439
|
+
end
|
|
440
|
+
|
|
403
441
|
# Process stream chunk and pass to callback
|
|
404
442
|
def process_stream_chunk(chunk, &callback)
|
|
405
443
|
if chunk.respond_to?(:dig) && chunk.dig("candidates", 0, "content", "parts", 0, "text")
|
data/lib/gemini/documents.rb
CHANGED
|
@@ -5,7 +5,7 @@ module Gemini
|
|
|
5
5
|
end
|
|
6
6
|
|
|
7
7
|
# ドキュメントをアップロードして質問する基本メソッド
|
|
8
|
-
def process(file: nil, file_path: nil, prompt:, model: "gemini-
|
|
8
|
+
def process(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
9
9
|
# ファイルパスが指定されている場合はファイルを開く
|
|
10
10
|
if file_path && !file
|
|
11
11
|
file = File.open(file_path, "rb")
|
|
@@ -70,7 +70,7 @@ module Gemini
|
|
|
70
70
|
file_name = upload_result["file"]["name"]
|
|
71
71
|
|
|
72
72
|
# モデル名の取得と調整
|
|
73
|
-
model = parameters[:model] || "gemini-
|
|
73
|
+
model = parameters[:model] || "gemini-2.5-flash"
|
|
74
74
|
model = "models/#{model}" unless model.start_with?("models/")
|
|
75
75
|
|
|
76
76
|
# キャッシュに保存(パラメータの名前に注意)
|
data/lib/gemini/files.rb
CHANGED
|
@@ -125,11 +125,21 @@ module Gemini
|
|
|
125
125
|
when ".mp4"
|
|
126
126
|
"video/mp4"
|
|
127
127
|
when ".avi"
|
|
128
|
-
"video/
|
|
128
|
+
"video/x-msvideo"
|
|
129
129
|
when ".mov"
|
|
130
130
|
"video/quicktime"
|
|
131
131
|
when ".mkv"
|
|
132
132
|
"video/x-matroska"
|
|
133
|
+
when ".mpeg", ".mpg"
|
|
134
|
+
"video/mpeg"
|
|
135
|
+
when ".webm"
|
|
136
|
+
"video/webm"
|
|
137
|
+
when ".wmv"
|
|
138
|
+
"video/x-ms-wmv"
|
|
139
|
+
when ".flv"
|
|
140
|
+
"video/x-flv"
|
|
141
|
+
when ".3gp", ".3gpp"
|
|
142
|
+
"video/3gpp"
|
|
133
143
|
when ".pdf"
|
|
134
144
|
"application/pdf"
|
|
135
145
|
when ".txt"
|
data/lib/gemini/response.rb
CHANGED
|
@@ -119,6 +119,57 @@ module Gemini
|
|
|
119
119
|
def search_entry_point
|
|
120
120
|
grounding_metadata&.dig("searchEntryPoint", "renderedContent")
|
|
121
121
|
end
|
|
122
|
+
|
|
123
|
+
# Get formatted grounding sources (simplified access)
|
|
124
|
+
def grounding_sources
|
|
125
|
+
return [] unless grounded?
|
|
126
|
+
|
|
127
|
+
grounding_chunks.map do |chunk|
|
|
128
|
+
if chunk["web"]
|
|
129
|
+
{
|
|
130
|
+
url: chunk["web"]["uri"],
|
|
131
|
+
title: chunk["web"]["title"],
|
|
132
|
+
type: "web"
|
|
133
|
+
}
|
|
134
|
+
else
|
|
135
|
+
# Handle other potential chunk types
|
|
136
|
+
{
|
|
137
|
+
type: "unknown",
|
|
138
|
+
data: chunk
|
|
139
|
+
}
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Get URL context metadata (for URL Context tool)
|
|
145
|
+
def url_context_metadata
|
|
146
|
+
first_candidate&.dig("urlContextMetadata")
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Check if response has URL context metadata
|
|
150
|
+
def url_context?
|
|
151
|
+
!url_context_metadata.nil? && !url_context_metadata.empty?
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Get retrieved URLs from URL context
|
|
155
|
+
def retrieved_urls
|
|
156
|
+
return [] unless url_context?
|
|
157
|
+
|
|
158
|
+
url_context_metadata&.dig("urlMetadata") || []
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Get URL retrieval statuses
|
|
162
|
+
def url_retrieval_statuses
|
|
163
|
+
return [] unless url_context?
|
|
164
|
+
|
|
165
|
+
retrieved_urls.map do |url_info|
|
|
166
|
+
{
|
|
167
|
+
url: url_info["retrievedUrl"],
|
|
168
|
+
status: url_info["urlRetrievalStatus"],
|
|
169
|
+
title: url_info["title"]
|
|
170
|
+
}
|
|
171
|
+
end
|
|
172
|
+
end
|
|
122
173
|
|
|
123
174
|
|
|
124
175
|
# Get token usage information
|
data/lib/gemini/threads.rb
CHANGED
data/lib/gemini/version.rb
CHANGED
data/lib/gemini/video.rb
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
module Gemini
|
|
2
|
+
class Video
|
|
3
|
+
# サポートされる動画形式
|
|
4
|
+
SUPPORTED_FORMATS = %w[.mp4 .mpeg .mov .avi .flv .mpg .webm .wmv .3gp .3gpp].freeze
|
|
5
|
+
|
|
6
|
+
def initialize(client:)
|
|
7
|
+
@client = client
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# 動画ファイルを分析する(Files APIでアップロード後に分析)
|
|
11
|
+
# 20MB以上のファイルや複数回利用する場合に推奨
|
|
12
|
+
def analyze(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
13
|
+
# ファイルパスが指定されている場合はファイルを開く
|
|
14
|
+
if file_path && !file
|
|
15
|
+
file = File.open(file_path, "rb")
|
|
16
|
+
close_file = true
|
|
17
|
+
else
|
|
18
|
+
close_file = false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
begin
|
|
22
|
+
raise ArgumentError, "file or file_path parameter is required" unless file
|
|
23
|
+
|
|
24
|
+
# MIMEタイプを判定
|
|
25
|
+
mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
|
|
26
|
+
|
|
27
|
+
# ファイルをアップロード
|
|
28
|
+
upload_result = @client.files.upload(file: file)
|
|
29
|
+
file_uri = upload_result["file"]["uri"]
|
|
30
|
+
file_name = upload_result["file"]["name"]
|
|
31
|
+
|
|
32
|
+
# ファイルがACTIVE状態になるまで待機
|
|
33
|
+
wait_for_file_active(file_name)
|
|
34
|
+
|
|
35
|
+
# コンテンツを生成
|
|
36
|
+
raw_response = generate_video_content(
|
|
37
|
+
file_uri: file_uri,
|
|
38
|
+
mime_type: mime_type,
|
|
39
|
+
prompt: prompt,
|
|
40
|
+
model: model,
|
|
41
|
+
**parameters
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# レスポンスとファイル情報を返す
|
|
45
|
+
{
|
|
46
|
+
response: Gemini::Response.new(raw_response),
|
|
47
|
+
file_uri: file_uri,
|
|
48
|
+
file_name: file_name
|
|
49
|
+
}
|
|
50
|
+
ensure
|
|
51
|
+
file.close if file && close_file
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# アップロード済みのファイルURIを使用して分析
|
|
56
|
+
def analyze_with_file_uri(file_uri:, prompt:, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
|
|
57
|
+
raw_response = generate_video_content(
|
|
58
|
+
file_uri: file_uri,
|
|
59
|
+
mime_type: mime_type,
|
|
60
|
+
prompt: prompt,
|
|
61
|
+
model: model,
|
|
62
|
+
**parameters
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
Gemini::Response.new(raw_response)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# YouTube URLから動画を分析(公開動画のみ)
|
|
69
|
+
def analyze_youtube(url:, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
70
|
+
# YouTube URLのバリデーション
|
|
71
|
+
unless valid_youtube_url?(url)
|
|
72
|
+
raise ArgumentError, "Invalid YouTube URL. Only public YouTube videos are supported."
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# リクエストパラメータを構築
|
|
76
|
+
request_params = {
|
|
77
|
+
contents: [{
|
|
78
|
+
parts: [
|
|
79
|
+
{ text: prompt },
|
|
80
|
+
{
|
|
81
|
+
file_data: {
|
|
82
|
+
file_uri: url
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
}]
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# 追加パラメータをマージ
|
|
90
|
+
merge_additional_params(request_params, parameters)
|
|
91
|
+
|
|
92
|
+
# APIリクエスト
|
|
93
|
+
response = @client.json_post(
|
|
94
|
+
path: "models/#{model}:generateContent",
|
|
95
|
+
parameters: request_params
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
Gemini::Response.new(response)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# 小さい動画ファイルをインラインデータとして分析(20MB未満向け)
|
|
102
|
+
def analyze_inline(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
103
|
+
# ファイルパスが指定されている場合はファイルを開く
|
|
104
|
+
if file_path && !file
|
|
105
|
+
file = File.open(file_path, "rb")
|
|
106
|
+
close_file = true
|
|
107
|
+
else
|
|
108
|
+
close_file = false
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
begin
|
|
112
|
+
raise ArgumentError, "file or file_path parameter is required" unless file
|
|
113
|
+
|
|
114
|
+
# ファイルサイズチェック(20MB = 20 * 1024 * 1024)
|
|
115
|
+
file.rewind
|
|
116
|
+
file_size = file.size
|
|
117
|
+
if file_size > 20 * 1024 * 1024
|
|
118
|
+
raise ArgumentError, "File size exceeds 20MB. Use analyze method with Files API instead."
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# MIMEタイプを判定
|
|
122
|
+
mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
|
|
123
|
+
|
|
124
|
+
# Base64エンコード
|
|
125
|
+
file.rewind
|
|
126
|
+
require 'base64'
|
|
127
|
+
file_data = Base64.strict_encode64(file.read)
|
|
128
|
+
|
|
129
|
+
# リクエストパラメータを構築
|
|
130
|
+
request_params = {
|
|
131
|
+
contents: [{
|
|
132
|
+
parts: [
|
|
133
|
+
{ text: prompt },
|
|
134
|
+
{
|
|
135
|
+
inline_data: {
|
|
136
|
+
mime_type: mime_type,
|
|
137
|
+
data: file_data
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
]
|
|
141
|
+
}]
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# 追加パラメータをマージ
|
|
145
|
+
merge_additional_params(request_params, parameters)
|
|
146
|
+
|
|
147
|
+
# APIリクエスト
|
|
148
|
+
response = @client.json_post(
|
|
149
|
+
path: "models/#{model}:generateContent",
|
|
150
|
+
parameters: request_params
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
Gemini::Response.new(response)
|
|
154
|
+
ensure
|
|
155
|
+
file.close if file && close_file
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# 動画の説明を取得するヘルパーメソッド
|
|
160
|
+
def describe(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, model: "gemini-2.5-flash", language: "ja", **parameters)
|
|
161
|
+
prompt = language == "ja" ? "この動画の内容を詳しく説明してください。" : "Describe this video in detail."
|
|
162
|
+
|
|
163
|
+
if youtube_url
|
|
164
|
+
analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
|
|
165
|
+
elsif file_uri
|
|
166
|
+
analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
|
|
167
|
+
elsif file || file_path
|
|
168
|
+
result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
|
|
169
|
+
result[:response]
|
|
170
|
+
else
|
|
171
|
+
raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# タイムスタンプを抽出するヘルパーメソッド
|
|
176
|
+
def extract_timestamps(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, query:, model: "gemini-2.5-flash", **parameters)
|
|
177
|
+
prompt = "動画内で「#{query}」が登場するタイムスタンプを全て抽出してください。MM:SS形式で出力してください。"
|
|
178
|
+
|
|
179
|
+
if youtube_url
|
|
180
|
+
analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
|
|
181
|
+
elsif file_uri
|
|
182
|
+
analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
|
|
183
|
+
elsif file || file_path
|
|
184
|
+
result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
|
|
185
|
+
result[:response]
|
|
186
|
+
else
|
|
187
|
+
raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# 動画のセグメント(一部分)を分析
|
|
192
|
+
def analyze_segment(file_uri:, prompt:, start_offset: nil, end_offset: nil, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
|
|
193
|
+
# videoMetadataを構築
|
|
194
|
+
video_metadata = {}
|
|
195
|
+
video_metadata[:startOffset] = start_offset if start_offset
|
|
196
|
+
video_metadata[:endOffset] = end_offset if end_offset
|
|
197
|
+
|
|
198
|
+
# リクエストパラメータを構築
|
|
199
|
+
file_data_part = {
|
|
200
|
+
file_data: {
|
|
201
|
+
mime_type: mime_type,
|
|
202
|
+
file_uri: file_uri
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
file_data_part[:file_data][:video_metadata] = video_metadata unless video_metadata.empty?
|
|
206
|
+
|
|
207
|
+
request_params = {
|
|
208
|
+
contents: [{
|
|
209
|
+
parts: [
|
|
210
|
+
{ text: prompt },
|
|
211
|
+
file_data_part
|
|
212
|
+
]
|
|
213
|
+
}]
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# 追加パラメータをマージ
|
|
217
|
+
merge_additional_params(request_params, parameters)
|
|
218
|
+
|
|
219
|
+
# APIリクエスト
|
|
220
|
+
response = @client.json_post(
|
|
221
|
+
path: "models/#{model}:generateContent",
|
|
222
|
+
parameters: request_params
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
Gemini::Response.new(response)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# 動画に関する質問に回答
|
|
229
|
+
def ask(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, question:, model: "gemini-2.5-flash", **parameters)
|
|
230
|
+
if youtube_url
|
|
231
|
+
analyze_youtube(url: youtube_url, prompt: question, model: model, **parameters)
|
|
232
|
+
elsif file_uri
|
|
233
|
+
analyze_with_file_uri(file_uri: file_uri, prompt: question, model: model, **parameters)
|
|
234
|
+
elsif file || file_path
|
|
235
|
+
result = analyze(file: file, file_path: file_path, prompt: question, model: model, **parameters)
|
|
236
|
+
result[:response]
|
|
237
|
+
else
|
|
238
|
+
raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
private
|
|
243
|
+
|
|
244
|
+
# 動画コンテンツを生成する共通メソッド(生のレスポンスを返す)
|
|
245
|
+
def generate_video_content(file_uri:, mime_type:, prompt:, model:, **parameters)
|
|
246
|
+
request_params = {
|
|
247
|
+
contents: [{
|
|
248
|
+
parts: [
|
|
249
|
+
{ text: prompt },
|
|
250
|
+
{
|
|
251
|
+
file_data: {
|
|
252
|
+
mime_type: mime_type,
|
|
253
|
+
file_uri: file_uri
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
]
|
|
257
|
+
}]
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# 追加パラメータをマージ
|
|
261
|
+
merge_additional_params(request_params, parameters)
|
|
262
|
+
|
|
263
|
+
# APIリクエスト(生のレスポンスを返す)
|
|
264
|
+
@client.json_post(
|
|
265
|
+
path: "models/#{model}:generateContent",
|
|
266
|
+
parameters: request_params
|
|
267
|
+
)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# 追加パラメータをマージするヘルパー
|
|
271
|
+
def merge_additional_params(request_params, parameters)
|
|
272
|
+
parameters.each do |key, value|
|
|
273
|
+
next if key == :contents
|
|
274
|
+
request_params[key] = value
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# YouTube URLのバリデーション
|
|
279
|
+
def valid_youtube_url?(url)
|
|
280
|
+
youtube_patterns = [
|
|
281
|
+
%r{^https?://(?:www\.)?youtube\.com/watch\?v=[\w-]+},
|
|
282
|
+
%r{^https?://youtu\.be/[\w-]+},
|
|
283
|
+
%r{^https?://(?:www\.)?youtube\.com/embed/[\w-]+},
|
|
284
|
+
%r{^https?://(?:www\.)?youtube\.com/v/[\w-]+},
|
|
285
|
+
%r{^https?://(?:www\.)?youtube\.com/shorts/[\w-]+}
|
|
286
|
+
]
|
|
287
|
+
youtube_patterns.any? { |pattern| url.match?(pattern) }
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# ファイルがACTIVE状態になるまで待機するメソッド
|
|
291
|
+
def wait_for_file_active(file_name, max_attempts: 30, interval: 2)
|
|
292
|
+
attempts = 0
|
|
293
|
+
loop do
|
|
294
|
+
file_info = @client.files.get(name: file_name)
|
|
295
|
+
state = file_info["state"]
|
|
296
|
+
|
|
297
|
+
case state
|
|
298
|
+
when "ACTIVE"
|
|
299
|
+
return true
|
|
300
|
+
when "FAILED"
|
|
301
|
+
raise StandardError, "File processing failed: #{file_info['error']&.dig('message') || 'Unknown error'}"
|
|
302
|
+
else
|
|
303
|
+
# PROCESSING状態の場合は待機
|
|
304
|
+
attempts += 1
|
|
305
|
+
if attempts >= max_attempts
|
|
306
|
+
raise StandardError, "File processing timeout. File is still in #{state} state after #{max_attempts * interval} seconds."
|
|
307
|
+
end
|
|
308
|
+
sleep(interval)
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# 動画のMIMEタイプを判定するヘルパーメソッド
|
|
314
|
+
def determine_video_mime_type(file)
|
|
315
|
+
return "video/mp4" unless file.respond_to?(:path)
|
|
316
|
+
|
|
317
|
+
ext = File.extname(file.path).downcase
|
|
318
|
+
case ext
|
|
319
|
+
when ".mp4"
|
|
320
|
+
"video/mp4"
|
|
321
|
+
when ".mpeg", ".mpg"
|
|
322
|
+
"video/mpeg"
|
|
323
|
+
when ".mov"
|
|
324
|
+
"video/quicktime"
|
|
325
|
+
when ".avi"
|
|
326
|
+
"video/x-msvideo"
|
|
327
|
+
when ".flv"
|
|
328
|
+
"video/x-flv"
|
|
329
|
+
when ".webm"
|
|
330
|
+
"video/webm"
|
|
331
|
+
when ".wmv"
|
|
332
|
+
"video/x-ms-wmv"
|
|
333
|
+
when ".3gp", ".3gpp"
|
|
334
|
+
"video/3gpp"
|
|
335
|
+
else
|
|
336
|
+
# デフォルトはMP4
|
|
337
|
+
"video/mp4"
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
end
|
data/lib/gemini.rb
CHANGED
|
@@ -19,6 +19,7 @@ require_relative "gemini/images"
|
|
|
19
19
|
require_relative "gemini/response"
|
|
20
20
|
require_relative "gemini/documents"
|
|
21
21
|
require_relative "gemini/cached_content"
|
|
22
|
+
require_relative "gemini/video"
|
|
22
23
|
module Gemini
|
|
23
24
|
class Error < StandardError; end
|
|
24
25
|
class ConfigurationError < Error; end
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby-gemini-api
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- rira100000000
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: faraday
|
|
@@ -163,6 +162,7 @@ files:
|
|
|
163
162
|
- lib/gemini/threads.rb
|
|
164
163
|
- lib/gemini/tool_definition.rb
|
|
165
164
|
- lib/gemini/version.rb
|
|
165
|
+
- lib/gemini/video.rb
|
|
166
166
|
- lib/ruby/gemini.rb
|
|
167
167
|
homepage: https://github.com/rira100000000/ruby-gemini-api
|
|
168
168
|
licenses:
|
|
@@ -172,7 +172,6 @@ metadata:
|
|
|
172
172
|
source_code_uri: https://github.com/rira100000000/ruby-gemini-api
|
|
173
173
|
changelog_uri: https://github.com/rira100000000/ruby-gemini-api/blob/main/CHANGELOG.md
|
|
174
174
|
rubygems_mfa_required: 'true'
|
|
175
|
-
post_install_message:
|
|
176
175
|
rdoc_options: []
|
|
177
176
|
require_paths:
|
|
178
177
|
- lib
|
|
@@ -187,8 +186,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
187
186
|
- !ruby/object:Gem::Version
|
|
188
187
|
version: '0'
|
|
189
188
|
requirements: []
|
|
190
|
-
rubygems_version: 3.
|
|
191
|
-
signing_key:
|
|
189
|
+
rubygems_version: 3.7.2
|
|
192
190
|
specification_version: 4
|
|
193
191
|
summary: Ruby client for Google's Gemini API
|
|
194
192
|
test_files: []
|