ruby-gemini-api 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +160 -23
- data/lib/gemini/audio.rb +1 -1
- data/lib/gemini/cached_content.rb +1 -1
- data/lib/gemini/client.rb +12 -7
- data/lib/gemini/documents.rb +2 -2
- data/lib/gemini/files.rb +11 -1
- data/lib/gemini/threads.rb +1 -1
- data/lib/gemini/version.rb +1 -1
- data/lib/gemini/video.rb +341 -0
- data/lib/gemini.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 80e6cd429265c5341e1efcd3e701f9643cdb4785f18826234ec879178d4a236e
|
|
4
|
+
data.tar.gz: c9a9c5201b616ce2d534393c1d99240563b9d36229dac4f564f71b4ed82ee42b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b1ac2fe8cf4dacad20f21a7eab1872fa496a797b76d9af12cb401c5f563aeaed9cd58e4323334fcd6bbf67d74273902b5f9f076a5a44833474b4f3a4e8409148
|
|
7
|
+
data.tar.gz: 17693cd9f1a87ad1fdf21c26153d1ac351ca9137c99e1c97cddd2524333db21190b0bbf5c08b995939c950338f0d5bf4607263a2ae0bc92c0343b28d439c2a6f
|
data/CHANGELOG.md
CHANGED
|
@@ -17,4 +17,11 @@
|
|
|
17
17
|
|
|
18
18
|
## [0.1.5] - 2025-11-13
|
|
19
19
|
- Add support for URL Context tool
|
|
20
|
-
- Add simplified method for accessing grounding search sources
|
|
20
|
+
- Add simplified method for accessing grounding search sources
|
|
21
|
+
|
|
22
|
+
## [0.1.6] - 2025-12-11
|
|
23
|
+
- Add support for video understanding
|
|
24
|
+
- Analyze local video files (Files API and inline data)
|
|
25
|
+
- Analyze YouTube videos
|
|
26
|
+
- Helper methods: describe, ask, extract_timestamps, analyze_segment
|
|
27
|
+
- Support for MP4, MPEG, MOV, AVI, FLV, WebM, WMV, 3GPP formats
|
data/README.md
CHANGED
|
@@ -11,6 +11,7 @@ This project is inspired by and pays homage to [ruby-openai](https://github.com/
|
|
|
11
11
|
- Chat functionality with conversation history
|
|
12
12
|
- Streaming responses for real-time text generation
|
|
13
13
|
- Audio transcription capabilities
|
|
14
|
+
- Video understanding (including YouTube videos)
|
|
14
15
|
- Thread and message management for chat applications
|
|
15
16
|
- Runs management for executing AI tasks
|
|
16
17
|
- Convenient Response object for easy access to generated content
|
|
@@ -43,7 +44,7 @@ user_prompt = "Tell me the current weather in Tokyo."
|
|
|
43
44
|
# Send request with the defined tools
|
|
44
45
|
response = client.generate_content(
|
|
45
46
|
user_prompt,
|
|
46
|
-
model: "gemini-
|
|
47
|
+
model: "gemini-2.5-flash", # Or any model that supports function calling
|
|
47
48
|
tools: tools
|
|
48
49
|
)
|
|
49
50
|
|
|
@@ -126,7 +127,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
126
127
|
# Generate text
|
|
127
128
|
response = client.generate_content(
|
|
128
129
|
"What are the main features of Ruby programming language?",
|
|
129
|
-
model: "gemini-2.
|
|
130
|
+
model: "gemini-2.5-flash"
|
|
130
131
|
)
|
|
131
132
|
|
|
132
133
|
# Access the generated content using Response object
|
|
@@ -147,7 +148,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
147
148
|
# Stream response in real-time
|
|
148
149
|
client.generate_content_stream(
|
|
149
150
|
"Tell me a story about a programmer who loves Ruby",
|
|
150
|
-
model: "gemini-2.
|
|
151
|
+
model: "gemini-2.5-flash"
|
|
151
152
|
) do |chunk|
|
|
152
153
|
print chunk
|
|
153
154
|
$stdout.flush
|
|
@@ -170,7 +171,7 @@ contents = [
|
|
|
170
171
|
|
|
171
172
|
# Get response with conversation history
|
|
172
173
|
response = client.chat(parameters: {
|
|
173
|
-
model: "gemini-2.
|
|
174
|
+
model: "gemini-2.5-flash",
|
|
174
175
|
contents: contents
|
|
175
176
|
})
|
|
176
177
|
|
|
@@ -198,7 +199,7 @@ system_instruction = "You are a Ruby programming expert who provides concise cod
|
|
|
198
199
|
|
|
199
200
|
# Use system instructions with chat
|
|
200
201
|
response = client.chat(parameters: {
|
|
201
|
-
model: "gemini-2.
|
|
202
|
+
model: "gemini-2.5-flash",
|
|
202
203
|
system_instruction: { parts: [{ text: system_instruction }] },
|
|
203
204
|
contents: [{ role: "user", parts: [{ text: "How do I write a simple web server in Ruby?" }] }]
|
|
204
205
|
})
|
|
@@ -225,7 +226,7 @@ response = client.generate_content(
|
|
|
225
226
|
{ type: "text", text: "Describe what you see in this image" },
|
|
226
227
|
{ type: "image_file", image_file: { file_path: "path/to/image.jpg" } }
|
|
227
228
|
],
|
|
228
|
-
model: "gemini-2.
|
|
229
|
+
model: "gemini-2.5-flash"
|
|
229
230
|
)
|
|
230
231
|
|
|
231
232
|
# Access the description using Response object
|
|
@@ -256,7 +257,7 @@ response = client.generate_content(
|
|
|
256
257
|
{ text: "Describe this image in detail" },
|
|
257
258
|
{ file_data: { mime_type: "image/jpeg", file_uri: file_uri } }
|
|
258
259
|
],
|
|
259
|
-
model: "gemini-2.
|
|
260
|
+
model: "gemini-2.5-flash"
|
|
260
261
|
)
|
|
261
262
|
|
|
262
263
|
# Process the response using Response object
|
|
@@ -286,7 +287,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
286
287
|
# Use Google Search to get real-time information
|
|
287
288
|
response = client.generate_content(
|
|
288
289
|
"Who won the euro 2024?",
|
|
289
|
-
model: "gemini-2.
|
|
290
|
+
model: "gemini-2.5-flash",
|
|
290
291
|
tools: [{ google_search: {} }]
|
|
291
292
|
)
|
|
292
293
|
|
|
@@ -327,7 +328,7 @@ end
|
|
|
327
328
|
```ruby
|
|
328
329
|
response = client.generate_content(
|
|
329
330
|
"What are the latest AI developments in 2024?",
|
|
330
|
-
model: "gemini-2.
|
|
331
|
+
model: "gemini-2.5-flash",
|
|
331
332
|
tools: [{ google_search: {} }]
|
|
332
333
|
)
|
|
333
334
|
|
|
@@ -550,7 +551,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
550
551
|
# Transcribe audio file (note: file size limit is 20MB for direct upload)
|
|
551
552
|
response = client.audio.transcribe(
|
|
552
553
|
parameters: {
|
|
553
|
-
model: "gemini-
|
|
554
|
+
model: "gemini-2.5-flash",
|
|
554
555
|
file: File.open("audio_file.mp3", "rb"),
|
|
555
556
|
language: "en",
|
|
556
557
|
content_text: "Transcribe this audio clip"
|
|
@@ -582,7 +583,7 @@ file_name = upload_result["file"]["name"]
|
|
|
582
583
|
# Use the file ID for transcription
|
|
583
584
|
response = client.audio.transcribe(
|
|
584
585
|
parameters: {
|
|
585
|
-
model: "gemini-
|
|
586
|
+
model: "gemini-2.5-flash",
|
|
586
587
|
file_uri: file_uri,
|
|
587
588
|
language: "en"
|
|
588
589
|
}
|
|
@@ -601,6 +602,137 @@ client.files.delete(name: file_name)
|
|
|
601
602
|
|
|
602
603
|
For more examples, check out the `demo/file_audio_demo.rb` file included with the gem.
|
|
603
604
|
|
|
605
|
+
### Video Understanding
|
|
606
|
+
|
|
607
|
+
Gemini API can understand video content, enabling description, segmentation, information extraction, and question answering. It can process videos up to 2 hours long.
|
|
608
|
+
|
|
609
|
+
#### Basic Usage (Upload via Files API)
|
|
610
|
+
|
|
611
|
+
For video files larger than 20MB or files you want to reuse multiple times, uploading via Files API is recommended:
|
|
612
|
+
|
|
613
|
+
```ruby
|
|
614
|
+
require 'gemini'
|
|
615
|
+
|
|
616
|
+
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
617
|
+
|
|
618
|
+
# Upload and analyze a video file
|
|
619
|
+
result = client.video.analyze(
|
|
620
|
+
file_path: "path/to/video.mp4",
|
|
621
|
+
prompt: "Describe this video in detail"
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
response = result[:response]
|
|
625
|
+
|
|
626
|
+
if response.success?
|
|
627
|
+
puts response.text
|
|
628
|
+
else
|
|
629
|
+
puts "Video analysis failed: #{response.error}"
|
|
630
|
+
end
|
|
631
|
+
|
|
632
|
+
# File information (optional)
|
|
633
|
+
puts "File URI: #{result[:file_uri]}"
|
|
634
|
+
puts "File name: #{result[:file_name]}"
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
#### Analyze as Inline Data (Videos under 20MB)
|
|
638
|
+
|
|
639
|
+
Small video files can be Base64-encoded and sent inline:
|
|
640
|
+
|
|
641
|
+
```ruby
|
|
642
|
+
# Analyze a video under 20MB inline
|
|
643
|
+
response = client.video.analyze_inline(
|
|
644
|
+
file_path: "path/to/small_video.mp4",
|
|
645
|
+
prompt: "What is happening in this video?"
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
if response.success?
|
|
649
|
+
puts response.text
|
|
650
|
+
end
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
#### YouTube Video Analysis
|
|
654
|
+
|
|
655
|
+
You can directly analyze public YouTube videos (private and unlisted videos are not supported):
|
|
656
|
+
|
|
657
|
+
```ruby
|
|
658
|
+
# Analyze a video using YouTube URL
|
|
659
|
+
response = client.video.analyze_youtube(
|
|
660
|
+
url: "https://www.youtube.com/watch?v=XXXXX",
|
|
661
|
+
prompt: "What are the three main points of this video?"
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
if response.success?
|
|
665
|
+
puts response.text
|
|
666
|
+
end
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
#### Helper Methods
|
|
670
|
+
|
|
671
|
+
Helper methods are provided for common operations:
|
|
672
|
+
|
|
673
|
+
```ruby
|
|
674
|
+
# Get video description
|
|
675
|
+
response = client.video.describe(file_path: "video.mp4")
|
|
676
|
+
puts response.text
|
|
677
|
+
|
|
678
|
+
# Get YouTube video description
|
|
679
|
+
response = client.video.describe(youtube_url: "https://youtube.com/...")
|
|
680
|
+
puts response.text
|
|
681
|
+
|
|
682
|
+
# Ask questions about a video
|
|
683
|
+
response = client.video.ask(
|
|
684
|
+
file_uri: result[:file_uri],
|
|
685
|
+
question: "Who appears in this video?"
|
|
686
|
+
)
|
|
687
|
+
puts response.text
|
|
688
|
+
|
|
689
|
+
# Extract timestamps
|
|
690
|
+
response = client.video.extract_timestamps(
|
|
691
|
+
file_uri: result[:file_uri],
|
|
692
|
+
query: "important scenes"
|
|
693
|
+
)
|
|
694
|
+
puts response.text
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
#### Video Segment Analysis
|
|
698
|
+
|
|
699
|
+
You can analyze only a portion of a video:
|
|
700
|
+
|
|
701
|
+
```ruby
|
|
702
|
+
# Analyze a specific segment of the video
|
|
703
|
+
response = client.video.analyze_segment(
|
|
704
|
+
file_uri: result[:file_uri],
|
|
705
|
+
prompt: "What is happening in this scene?",
|
|
706
|
+
start_offset: "30s",
|
|
707
|
+
end_offset: "60s"
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
if response.success?
|
|
711
|
+
puts response.text
|
|
712
|
+
end
|
|
713
|
+
```
|
|
714
|
+
|
|
715
|
+
#### Supported Video Formats
|
|
716
|
+
|
|
717
|
+
- MP4 - video/mp4
|
|
718
|
+
- MPEG - video/mpeg
|
|
719
|
+
- MOV - video/quicktime
|
|
720
|
+
- AVI - video/x-msvideo
|
|
721
|
+
- FLV - video/x-flv
|
|
722
|
+
- MPG - video/mpeg
|
|
723
|
+
- WebM - video/webm
|
|
724
|
+
- WMV - video/x-ms-wmv
|
|
725
|
+
- 3GPP - video/3gpp
|
|
726
|
+
|
|
727
|
+
#### Limitations
|
|
728
|
+
|
|
729
|
+
- 2 million context window: up to approximately 2 hours
|
|
730
|
+
- 1 million context window: up to approximately 1 hour
|
|
731
|
+
- YouTube free plan: cannot process more than 8 hours of video per day
|
|
732
|
+
- Approximately 300 tokens consumed per second of video (at default resolution)
|
|
733
|
+
|
|
734
|
+
Demo application can be found in `demo/video_demo.rb`.
|
|
735
|
+
|
|
604
736
|
### Document Processing
|
|
605
737
|
|
|
606
738
|
Gemini API can process long documents (up to 3,600 pages), including PDFs. Gemini models understand both text and images within the document, enabling you to analyze, summarize, and extract information.
|
|
@@ -614,7 +746,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
614
746
|
result = client.documents.process(
|
|
615
747
|
file_path: "path/to/document.pdf",
|
|
616
748
|
prompt: "Summarize this document in three key points",
|
|
617
|
-
model: "gemini-
|
|
749
|
+
model: "gemini-2.5-flash"
|
|
618
750
|
)
|
|
619
751
|
|
|
620
752
|
response = result[:response]
|
|
@@ -643,7 +775,7 @@ file_path = "path/to/document.pdf"
|
|
|
643
775
|
thread_result = client.chat_with_file(
|
|
644
776
|
file_path,
|
|
645
777
|
"Please provide an overview of this document",
|
|
646
|
-
model: "gemini-
|
|
778
|
+
model: "gemini-2.5-flash"
|
|
647
779
|
)
|
|
648
780
|
|
|
649
781
|
# Get the thread ID (for continuing the conversation)
|
|
@@ -690,7 +822,7 @@ Demo applications can be found in `demo/document_chat_demo.rb` and `demo/documen
|
|
|
690
822
|
|
|
691
823
|
Context caching allows you to preprocess and store inputs like large documents or images with the Gemini API, then reuse them across multiple requests. This saves processing time and token usage when asking different questions about the same content.
|
|
692
824
|
|
|
693
|
-
**Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live).
|
|
825
|
+
**Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live). Using stable model versions like gemini-2.5-flash is recommended.
|
|
694
826
|
|
|
695
827
|
```ruby
|
|
696
828
|
require 'gemini'
|
|
@@ -702,7 +834,7 @@ cache_result = client.documents.cache(
|
|
|
702
834
|
file_path: "path/to/large_document.pdf",
|
|
703
835
|
system_instruction: "You are a document analysis expert. Please understand the content thoroughly and answer questions accurately.",
|
|
704
836
|
ttl: "86400s", # 24 hours (in seconds)
|
|
705
|
-
model: "gemini-
|
|
837
|
+
model: "gemini-2.5-flash"
|
|
706
838
|
)
|
|
707
839
|
|
|
708
840
|
# Get the cache name
|
|
@@ -713,7 +845,7 @@ puts "Cache name: #{cache_name}"
|
|
|
713
845
|
response = client.generate_content_with_cache(
|
|
714
846
|
"What are the key findings in this document?",
|
|
715
847
|
cached_content: cache_name,
|
|
716
|
-
model: "gemini-
|
|
848
|
+
model: "gemini-2.5-flash"
|
|
717
849
|
)
|
|
718
850
|
|
|
719
851
|
if response.success?
|
|
@@ -865,7 +997,7 @@ require 'gemini'
|
|
|
865
997
|
client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
866
998
|
|
|
867
999
|
# Create a new thread
|
|
868
|
-
thread = client.threads.create(parameters: { model: "gemini-2.
|
|
1000
|
+
thread = client.threads.create(parameters: { model: "gemini-2.5-flash" })
|
|
869
1001
|
thread_id = thread["id"]
|
|
870
1002
|
|
|
871
1003
|
# Add a message to the thread
|
|
@@ -901,7 +1033,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
|
|
|
901
1033
|
|
|
902
1034
|
response = client.generate_content(
|
|
903
1035
|
"Tell me about the Ruby programming language",
|
|
904
|
-
model: "gemini-2.
|
|
1036
|
+
model: "gemini-2.5-flash"
|
|
905
1037
|
)
|
|
906
1038
|
|
|
907
1039
|
# Basic response information
|
|
@@ -977,8 +1109,9 @@ The gem includes several demo applications that showcase its functionality:
|
|
|
977
1109
|
- `demo/demo.rb` - Basic text generation and chat
|
|
978
1110
|
- `demo/stream_demo.rb` - Streaming text generation
|
|
979
1111
|
- `demo/audio_demo.rb` - Audio transcription
|
|
1112
|
+
- `demo/video_demo.rb` - Video understanding (local files and YouTube)
|
|
980
1113
|
- `demo/vision_demo.rb` - Image recognition
|
|
981
|
-
- `demo/image_generation_demo.rb` - Image generation
|
|
1114
|
+
- `demo/image_generation_demo.rb` - Image generation
|
|
982
1115
|
- `demo/file_vision_demo.rb` - Image recognition with large image files
|
|
983
1116
|
- `demo/file_audio_demo.rb` - Audio transcription with large audio files
|
|
984
1117
|
- `demo/structured_output_demo.rb` - Structured JSON output with schema
|
|
@@ -1005,6 +1138,12 @@ ruby demo/audio_demo.rb path/to/audio/file.mp3
|
|
|
1005
1138
|
# Audio transcription with over 20MB audio file
|
|
1006
1139
|
ruby demo/file_audio_demo.rb path/to/audio/file.mp3
|
|
1007
1140
|
|
|
1141
|
+
# Video understanding (local file)
|
|
1142
|
+
ruby demo/video_demo.rb path/to/video/file.mp4
|
|
1143
|
+
|
|
1144
|
+
# Video understanding (YouTube)
|
|
1145
|
+
ruby demo/video_demo.rb --youtube https://www.youtube.com/watch?v=XXXXX
|
|
1146
|
+
|
|
1008
1147
|
# Image recognition
|
|
1009
1148
|
ruby demo/vision_demo.rb path/to/image/file.jpg
|
|
1010
1149
|
|
|
@@ -1034,10 +1173,8 @@ ruby demo/document_cache_demo.rb path/to/document.pdf
|
|
|
1034
1173
|
|
|
1035
1174
|
The library supports various Gemini models:
|
|
1036
1175
|
|
|
1037
|
-
- `gemini-2.
|
|
1038
|
-
- `gemini-2.
|
|
1039
|
-
- `gemini-2.0-pro`
|
|
1040
|
-
- `gemini-1.5-flash`
|
|
1176
|
+
- `gemini-2.5-flash`
|
|
1177
|
+
- `gemini-2.5-pro`
|
|
1041
1178
|
|
|
1042
1179
|
## Requirements
|
|
1043
1180
|
|
data/lib/gemini/audio.rb
CHANGED
|
@@ -8,7 +8,7 @@ module Gemini
|
|
|
8
8
|
def transcribe(parameters: {})
|
|
9
9
|
file = parameters.delete(:file)
|
|
10
10
|
file_uri = parameters.delete(:file_uri)
|
|
11
|
-
model = parameters.delete(:model) || "gemini-
|
|
11
|
+
model = parameters.delete(:model) || "gemini-2.5-flash"
|
|
12
12
|
language = parameters.delete(:language)
|
|
13
13
|
content_text = parameters.delete(:content_text) || "Transcribe this audio clip"
|
|
14
14
|
|
|
@@ -24,7 +24,7 @@ module Gemini
|
|
|
24
24
|
mime_type ||= file_path ? @client.determine_mime_type(file_path) : "application/octet-stream"
|
|
25
25
|
|
|
26
26
|
# モデルを取得(models/プレフィックスを追加)
|
|
27
|
-
model_name = model || parameters[:model] || "gemini-
|
|
27
|
+
model_name = model || parameters[:model] || "gemini-2.5-flash"
|
|
28
28
|
model_name = "models/#{model_name}" unless model_name.start_with?("models/")
|
|
29
29
|
|
|
30
30
|
# キャッシュリクエストを構築(キャメルケースに注意)
|
data/lib/gemini/client.rb
CHANGED
|
@@ -59,6 +59,11 @@ module Gemini
|
|
|
59
59
|
@documents ||= Gemini::Documents.new(client: self)
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
# 動画処理アクセサ
|
|
63
|
+
def video
|
|
64
|
+
@video ||= Gemini::Video.new(client: self)
|
|
65
|
+
end
|
|
66
|
+
|
|
62
67
|
# キャッシュ管理アクセサ
|
|
63
68
|
def cached_content
|
|
64
69
|
@cached_content ||= Gemini::CachedContent.new(client: self)
|
|
@@ -77,7 +82,7 @@ module Gemini
|
|
|
77
82
|
# OpenAI chat-like text generation method for Gemini API
|
|
78
83
|
# Extended to support streaming callbacks
|
|
79
84
|
def chat(parameters: {}, &stream_callback)
|
|
80
|
-
model = parameters.delete(:model) || "gemini-2.
|
|
85
|
+
model = parameters.delete(:model) || "gemini-2.5-flash"
|
|
81
86
|
|
|
82
87
|
# If streaming callback is provided
|
|
83
88
|
if block_given?
|
|
@@ -117,7 +122,7 @@ module Gemini
|
|
|
117
122
|
# Helper methods for convenience
|
|
118
123
|
|
|
119
124
|
# Method with usage similar to OpenAI's chat
|
|
120
|
-
def generate_content(prompt, model: "gemini-2.
|
|
125
|
+
def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
|
|
121
126
|
response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
|
|
122
127
|
url_context: false, google_search: false, **parameters, &stream_callback)
|
|
123
128
|
content = format_content(prompt)
|
|
@@ -153,7 +158,7 @@ module Gemini
|
|
|
153
158
|
end
|
|
154
159
|
|
|
155
160
|
# Streaming text generation
|
|
156
|
-
def generate_content_stream(prompt, model: "gemini-2.
|
|
161
|
+
def generate_content_stream(prompt, model: "gemini-2.5-flash", system_instruction: nil,
|
|
157
162
|
response_mime_type: nil, response_schema: nil, temperature: 0.5,
|
|
158
163
|
url_context: false, google_search: false, **parameters, &block)
|
|
159
164
|
raise ArgumentError, "Block is required for streaming" unless block_given?
|
|
@@ -190,7 +195,7 @@ module Gemini
|
|
|
190
195
|
end
|
|
191
196
|
|
|
192
197
|
# ファイルを使った会話(複数ファイル対応)
|
|
193
|
-
def chat_with_multimodal(file_paths, prompt, model: "gemini-
|
|
198
|
+
def chat_with_multimodal(file_paths, prompt, model: "gemini-2.5-flash", **parameters)
|
|
194
199
|
# スレッドを作成
|
|
195
200
|
thread = threads.create(parameters: { model: model })
|
|
196
201
|
thread_id = thread["id"]
|
|
@@ -258,7 +263,7 @@ module Gemini
|
|
|
258
263
|
end
|
|
259
264
|
end
|
|
260
265
|
|
|
261
|
-
def generate_content_with_cache(prompt, cached_content:, model: "gemini-
|
|
266
|
+
def generate_content_with_cache(prompt, cached_content:, model: "gemini-2.5-flash", **parameters)
|
|
262
267
|
# モデル名にmodels/プレフィックスを追加
|
|
263
268
|
model_name = model.start_with?("models/") ? model : "models/#{model}"
|
|
264
269
|
|
|
@@ -289,12 +294,12 @@ module Gemini
|
|
|
289
294
|
end
|
|
290
295
|
|
|
291
296
|
# 単一ファイルのヘルパー
|
|
292
|
-
def chat_with_file(file_path, prompt, model: "gemini-
|
|
297
|
+
def chat_with_file(file_path, prompt, model: "gemini-2.5-flash", **parameters)
|
|
293
298
|
chat_with_multimodal([file_path], prompt, model: model, **parameters)
|
|
294
299
|
end
|
|
295
300
|
|
|
296
301
|
# ファイルをアップロードして質問するシンプルなヘルパー
|
|
297
|
-
def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-
|
|
302
|
+
def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-2.5-flash", **parameters)
|
|
298
303
|
# MIMEタイプを自動判定
|
|
299
304
|
mime_type = content_type || determine_mime_type(file_path)
|
|
300
305
|
|
data/lib/gemini/documents.rb
CHANGED
|
@@ -5,7 +5,7 @@ module Gemini
|
|
|
5
5
|
end
|
|
6
6
|
|
|
7
7
|
# ドキュメントをアップロードして質問する基本メソッド
|
|
8
|
-
def process(file: nil, file_path: nil, prompt:, model: "gemini-
|
|
8
|
+
def process(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
9
9
|
# ファイルパスが指定されている場合はファイルを開く
|
|
10
10
|
if file_path && !file
|
|
11
11
|
file = File.open(file_path, "rb")
|
|
@@ -70,7 +70,7 @@ module Gemini
|
|
|
70
70
|
file_name = upload_result["file"]["name"]
|
|
71
71
|
|
|
72
72
|
# モデル名の取得と調整
|
|
73
|
-
model = parameters[:model] || "gemini-
|
|
73
|
+
model = parameters[:model] || "gemini-2.5-flash"
|
|
74
74
|
model = "models/#{model}" unless model.start_with?("models/")
|
|
75
75
|
|
|
76
76
|
# キャッシュに保存(パラメータの名前に注意)
|
data/lib/gemini/files.rb
CHANGED
|
@@ -125,11 +125,21 @@ module Gemini
|
|
|
125
125
|
when ".mp4"
|
|
126
126
|
"video/mp4"
|
|
127
127
|
when ".avi"
|
|
128
|
-
"video/
|
|
128
|
+
"video/x-msvideo"
|
|
129
129
|
when ".mov"
|
|
130
130
|
"video/quicktime"
|
|
131
131
|
when ".mkv"
|
|
132
132
|
"video/x-matroska"
|
|
133
|
+
when ".mpeg", ".mpg"
|
|
134
|
+
"video/mpeg"
|
|
135
|
+
when ".webm"
|
|
136
|
+
"video/webm"
|
|
137
|
+
when ".wmv"
|
|
138
|
+
"video/x-ms-wmv"
|
|
139
|
+
when ".flv"
|
|
140
|
+
"video/x-flv"
|
|
141
|
+
when ".3gp", ".3gpp"
|
|
142
|
+
"video/3gpp"
|
|
133
143
|
when ".pdf"
|
|
134
144
|
"application/pdf"
|
|
135
145
|
when ".txt"
|
data/lib/gemini/threads.rb
CHANGED
data/lib/gemini/version.rb
CHANGED
data/lib/gemini/video.rb
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
module Gemini
|
|
2
|
+
class Video
|
|
3
|
+
# サポートされる動画形式
|
|
4
|
+
SUPPORTED_FORMATS = %w[.mp4 .mpeg .mov .avi .flv .mpg .webm .wmv .3gp .3gpp].freeze
|
|
5
|
+
|
|
6
|
+
def initialize(client:)
|
|
7
|
+
@client = client
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# 動画ファイルを分析する(Files APIでアップロード後に分析)
|
|
11
|
+
# 20MB以上のファイルや複数回利用する場合に推奨
|
|
12
|
+
def analyze(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
13
|
+
# ファイルパスが指定されている場合はファイルを開く
|
|
14
|
+
if file_path && !file
|
|
15
|
+
file = File.open(file_path, "rb")
|
|
16
|
+
close_file = true
|
|
17
|
+
else
|
|
18
|
+
close_file = false
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
begin
|
|
22
|
+
raise ArgumentError, "file or file_path parameter is required" unless file
|
|
23
|
+
|
|
24
|
+
# MIMEタイプを判定
|
|
25
|
+
mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
|
|
26
|
+
|
|
27
|
+
# ファイルをアップロード
|
|
28
|
+
upload_result = @client.files.upload(file: file)
|
|
29
|
+
file_uri = upload_result["file"]["uri"]
|
|
30
|
+
file_name = upload_result["file"]["name"]
|
|
31
|
+
|
|
32
|
+
# ファイルがACTIVE状態になるまで待機
|
|
33
|
+
wait_for_file_active(file_name)
|
|
34
|
+
|
|
35
|
+
# コンテンツを生成
|
|
36
|
+
raw_response = generate_video_content(
|
|
37
|
+
file_uri: file_uri,
|
|
38
|
+
mime_type: mime_type,
|
|
39
|
+
prompt: prompt,
|
|
40
|
+
model: model,
|
|
41
|
+
**parameters
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# レスポンスとファイル情報を返す
|
|
45
|
+
{
|
|
46
|
+
response: Gemini::Response.new(raw_response),
|
|
47
|
+
file_uri: file_uri,
|
|
48
|
+
file_name: file_name
|
|
49
|
+
}
|
|
50
|
+
ensure
|
|
51
|
+
file.close if file && close_file
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# アップロード済みのファイルURIを使用して分析
|
|
56
|
+
def analyze_with_file_uri(file_uri:, prompt:, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
|
|
57
|
+
raw_response = generate_video_content(
|
|
58
|
+
file_uri: file_uri,
|
|
59
|
+
mime_type: mime_type,
|
|
60
|
+
prompt: prompt,
|
|
61
|
+
model: model,
|
|
62
|
+
**parameters
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
Gemini::Response.new(raw_response)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# YouTube URLから動画を分析(公開動画のみ)
|
|
69
|
+
def analyze_youtube(url:, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
70
|
+
# YouTube URLのバリデーション
|
|
71
|
+
unless valid_youtube_url?(url)
|
|
72
|
+
raise ArgumentError, "Invalid YouTube URL. Only public YouTube videos are supported."
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# リクエストパラメータを構築
|
|
76
|
+
request_params = {
|
|
77
|
+
contents: [{
|
|
78
|
+
parts: [
|
|
79
|
+
{ text: prompt },
|
|
80
|
+
{
|
|
81
|
+
file_data: {
|
|
82
|
+
file_uri: url
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
}]
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# 追加パラメータをマージ
|
|
90
|
+
merge_additional_params(request_params, parameters)
|
|
91
|
+
|
|
92
|
+
# APIリクエスト
|
|
93
|
+
response = @client.json_post(
|
|
94
|
+
path: "models/#{model}:generateContent",
|
|
95
|
+
parameters: request_params
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
Gemini::Response.new(response)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# 小さい動画ファイルをインラインデータとして分析(20MB未満向け)
|
|
102
|
+
def analyze_inline(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
|
|
103
|
+
# ファイルパスが指定されている場合はファイルを開く
|
|
104
|
+
if file_path && !file
|
|
105
|
+
file = File.open(file_path, "rb")
|
|
106
|
+
close_file = true
|
|
107
|
+
else
|
|
108
|
+
close_file = false
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
begin
|
|
112
|
+
raise ArgumentError, "file or file_path parameter is required" unless file
|
|
113
|
+
|
|
114
|
+
# ファイルサイズチェック(20MB = 20 * 1024 * 1024)
|
|
115
|
+
file.rewind
|
|
116
|
+
file_size = file.size
|
|
117
|
+
if file_size > 20 * 1024 * 1024
|
|
118
|
+
raise ArgumentError, "File size exceeds 20MB. Use analyze method with Files API instead."
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# MIMEタイプを判定
|
|
122
|
+
mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
|
|
123
|
+
|
|
124
|
+
# Base64エンコード
|
|
125
|
+
file.rewind
|
|
126
|
+
require 'base64'
|
|
127
|
+
file_data = Base64.strict_encode64(file.read)
|
|
128
|
+
|
|
129
|
+
# リクエストパラメータを構築
|
|
130
|
+
request_params = {
|
|
131
|
+
contents: [{
|
|
132
|
+
parts: [
|
|
133
|
+
{ text: prompt },
|
|
134
|
+
{
|
|
135
|
+
inline_data: {
|
|
136
|
+
mime_type: mime_type,
|
|
137
|
+
data: file_data
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
]
|
|
141
|
+
}]
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# 追加パラメータをマージ
|
|
145
|
+
merge_additional_params(request_params, parameters)
|
|
146
|
+
|
|
147
|
+
# APIリクエスト
|
|
148
|
+
response = @client.json_post(
|
|
149
|
+
path: "models/#{model}:generateContent",
|
|
150
|
+
parameters: request_params
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
Gemini::Response.new(response)
|
|
154
|
+
ensure
|
|
155
|
+
file.close if file && close_file
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# 動画の説明を取得するヘルパーメソッド
|
|
160
|
+
def describe(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, model: "gemini-2.5-flash", language: "ja", **parameters)
|
|
161
|
+
prompt = language == "ja" ? "この動画の内容を詳しく説明してください。" : "Describe this video in detail."
|
|
162
|
+
|
|
163
|
+
if youtube_url
|
|
164
|
+
analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
|
|
165
|
+
elsif file_uri
|
|
166
|
+
analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
|
|
167
|
+
elsif file || file_path
|
|
168
|
+
result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
|
|
169
|
+
result[:response]
|
|
170
|
+
else
|
|
171
|
+
raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# タイムスタンプを抽出するヘルパーメソッド
|
|
176
|
+
def extract_timestamps(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, query:, model: "gemini-2.5-flash", **parameters)
|
|
177
|
+
prompt = "動画内で「#{query}」が登場するタイムスタンプを全て抽出してください。MM:SS形式で出力してください。"
|
|
178
|
+
|
|
179
|
+
if youtube_url
|
|
180
|
+
analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
|
|
181
|
+
elsif file_uri
|
|
182
|
+
analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
|
|
183
|
+
elsif file || file_path
|
|
184
|
+
result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
|
|
185
|
+
result[:response]
|
|
186
|
+
else
|
|
187
|
+
raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# 動画のセグメント(一部分)を分析
|
|
192
|
+
def analyze_segment(file_uri:, prompt:, start_offset: nil, end_offset: nil, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
|
|
193
|
+
# videoMetadataを構築
|
|
194
|
+
video_metadata = {}
|
|
195
|
+
video_metadata[:startOffset] = start_offset if start_offset
|
|
196
|
+
video_metadata[:endOffset] = end_offset if end_offset
|
|
197
|
+
|
|
198
|
+
# リクエストパラメータを構築
|
|
199
|
+
file_data_part = {
|
|
200
|
+
file_data: {
|
|
201
|
+
mime_type: mime_type,
|
|
202
|
+
file_uri: file_uri
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
file_data_part[:file_data][:video_metadata] = video_metadata unless video_metadata.empty?
|
|
206
|
+
|
|
207
|
+
request_params = {
|
|
208
|
+
contents: [{
|
|
209
|
+
parts: [
|
|
210
|
+
{ text: prompt },
|
|
211
|
+
file_data_part
|
|
212
|
+
]
|
|
213
|
+
}]
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# 追加パラメータをマージ
|
|
217
|
+
merge_additional_params(request_params, parameters)
|
|
218
|
+
|
|
219
|
+
# APIリクエスト
|
|
220
|
+
response = @client.json_post(
|
|
221
|
+
path: "models/#{model}:generateContent",
|
|
222
|
+
parameters: request_params
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
Gemini::Response.new(response)
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# 動画に関する質問に回答
|
|
229
|
+
def ask(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, question:, model: "gemini-2.5-flash", **parameters)
|
|
230
|
+
if youtube_url
|
|
231
|
+
analyze_youtube(url: youtube_url, prompt: question, model: model, **parameters)
|
|
232
|
+
elsif file_uri
|
|
233
|
+
analyze_with_file_uri(file_uri: file_uri, prompt: question, model: model, **parameters)
|
|
234
|
+
elsif file || file_path
|
|
235
|
+
result = analyze(file: file, file_path: file_path, prompt: question, model: model, **parameters)
|
|
236
|
+
result[:response]
|
|
237
|
+
else
|
|
238
|
+
raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
private
|
|
243
|
+
|
|
244
|
+
# 動画コンテンツを生成する共通メソッド(生のレスポンスを返す)
|
|
245
|
+
def generate_video_content(file_uri:, mime_type:, prompt:, model:, **parameters)
|
|
246
|
+
request_params = {
|
|
247
|
+
contents: [{
|
|
248
|
+
parts: [
|
|
249
|
+
{ text: prompt },
|
|
250
|
+
{
|
|
251
|
+
file_data: {
|
|
252
|
+
mime_type: mime_type,
|
|
253
|
+
file_uri: file_uri
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
]
|
|
257
|
+
}]
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# 追加パラメータをマージ
|
|
261
|
+
merge_additional_params(request_params, parameters)
|
|
262
|
+
|
|
263
|
+
# APIリクエスト(生のレスポンスを返す)
|
|
264
|
+
@client.json_post(
|
|
265
|
+
path: "models/#{model}:generateContent",
|
|
266
|
+
parameters: request_params
|
|
267
|
+
)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# 追加パラメータをマージするヘルパー
|
|
271
|
+
def merge_additional_params(request_params, parameters)
|
|
272
|
+
parameters.each do |key, value|
|
|
273
|
+
next if key == :contents
|
|
274
|
+
request_params[key] = value
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
# YouTube URLのバリデーション
|
|
279
|
+
def valid_youtube_url?(url)
|
|
280
|
+
youtube_patterns = [
|
|
281
|
+
%r{^https?://(?:www\.)?youtube\.com/watch\?v=[\w-]+},
|
|
282
|
+
%r{^https?://youtu\.be/[\w-]+},
|
|
283
|
+
%r{^https?://(?:www\.)?youtube\.com/embed/[\w-]+},
|
|
284
|
+
%r{^https?://(?:www\.)?youtube\.com/v/[\w-]+},
|
|
285
|
+
%r{^https?://(?:www\.)?youtube\.com/shorts/[\w-]+}
|
|
286
|
+
]
|
|
287
|
+
youtube_patterns.any? { |pattern| url.match?(pattern) }
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# ファイルがACTIVE状態になるまで待機するメソッド
|
|
291
|
+
def wait_for_file_active(file_name, max_attempts: 30, interval: 2)
|
|
292
|
+
attempts = 0
|
|
293
|
+
loop do
|
|
294
|
+
file_info = @client.files.get(name: file_name)
|
|
295
|
+
state = file_info["state"]
|
|
296
|
+
|
|
297
|
+
case state
|
|
298
|
+
when "ACTIVE"
|
|
299
|
+
return true
|
|
300
|
+
when "FAILED"
|
|
301
|
+
raise StandardError, "File processing failed: #{file_info['error']&.dig('message') || 'Unknown error'}"
|
|
302
|
+
else
|
|
303
|
+
# PROCESSING状態の場合は待機
|
|
304
|
+
attempts += 1
|
|
305
|
+
if attempts >= max_attempts
|
|
306
|
+
raise StandardError, "File processing timeout. File is still in #{state} state after #{max_attempts * interval} seconds."
|
|
307
|
+
end
|
|
308
|
+
sleep(interval)
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# 動画のMIMEタイプを判定するヘルパーメソッド
|
|
314
|
+
def determine_video_mime_type(file)
|
|
315
|
+
return "video/mp4" unless file.respond_to?(:path)
|
|
316
|
+
|
|
317
|
+
ext = File.extname(file.path).downcase
|
|
318
|
+
case ext
|
|
319
|
+
when ".mp4"
|
|
320
|
+
"video/mp4"
|
|
321
|
+
when ".mpeg", ".mpg"
|
|
322
|
+
"video/mpeg"
|
|
323
|
+
when ".mov"
|
|
324
|
+
"video/quicktime"
|
|
325
|
+
when ".avi"
|
|
326
|
+
"video/x-msvideo"
|
|
327
|
+
when ".flv"
|
|
328
|
+
"video/x-flv"
|
|
329
|
+
when ".webm"
|
|
330
|
+
"video/webm"
|
|
331
|
+
when ".wmv"
|
|
332
|
+
"video/x-ms-wmv"
|
|
333
|
+
when ".3gp", ".3gpp"
|
|
334
|
+
"video/3gpp"
|
|
335
|
+
else
|
|
336
|
+
# デフォルトはMP4
|
|
337
|
+
"video/mp4"
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
end
|
data/lib/gemini.rb
CHANGED
|
@@ -19,6 +19,7 @@ require_relative "gemini/images"
|
|
|
19
19
|
require_relative "gemini/response"
|
|
20
20
|
require_relative "gemini/documents"
|
|
21
21
|
require_relative "gemini/cached_content"
|
|
22
|
+
require_relative "gemini/video"
|
|
22
23
|
module Gemini
|
|
23
24
|
class Error < StandardError; end
|
|
24
25
|
class ConfigurationError < Error; end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ruby-gemini-api
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- rira100000000
|
|
@@ -162,6 +162,7 @@ files:
|
|
|
162
162
|
- lib/gemini/threads.rb
|
|
163
163
|
- lib/gemini/tool_definition.rb
|
|
164
164
|
- lib/gemini/version.rb
|
|
165
|
+
- lib/gemini/video.rb
|
|
165
166
|
- lib/ruby/gemini.rb
|
|
166
167
|
homepage: https://github.com/rira100000000/ruby-gemini-api
|
|
167
168
|
licenses:
|