ruby-gemini-api 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e140bb695362a7924c1b633fbe0a5ac0a2dda14880a755157592024f3f4e719e
4
- data.tar.gz: 5b6e2ec5d5be300ba558199c8ff2160e598615cbdb4e744209416fa7322bf0f6
3
+ metadata.gz: 80e6cd429265c5341e1efcd3e701f9643cdb4785f18826234ec879178d4a236e
4
+ data.tar.gz: c9a9c5201b616ce2d534393c1d99240563b9d36229dac4f564f71b4ed82ee42b
5
5
  SHA512:
6
- metadata.gz: a2f3b01548fc4da69d1d1e77943444a164dc781940eeb1e7b8ee1d34eeca34db7f46e532f5b504175000145e11e0d343319da59111b87f2f403a99c08e6517e1
7
- data.tar.gz: a84dcc5431abb1a4aa9753cec5a8e36106aa365cacf88b3d29998622382069f01650f11396a6867ce0b940e5faa4788fcb2eb7c744a27203a034f2918e353416
6
+ metadata.gz: b1ac2fe8cf4dacad20f21a7eab1872fa496a797b76d9af12cb401c5f563aeaed9cd58e4323334fcd6bbf67d74273902b5f9f076a5a44833474b4f3a4e8409148
7
+ data.tar.gz: 17693cd9f1a87ad1fdf21c26153d1ac351ca9137c99e1c97cddd2524333db21190b0bbf5c08b995939c950338f0d5bf4607263a2ae0bc92c0343b28d439c2a6f
data/CHANGELOG.md CHANGED
@@ -17,4 +17,11 @@
17
17
 
18
18
  ## [0.1.5] - 2025-11-13
19
19
  - Add support for URL Context tool
20
- - Add simplified method for accessing grounding search sources
20
+ - Add simplified method for accessing grounding search sources
21
+
22
+ ## [0.1.6] - 2025-12-11
23
+ - Add support for video understanding
24
+ - Analyze local video files (Files API and inline data)
25
+ - Analyze YouTube videos
26
+ - Helper methods: describe, ask, extract_timestamps, analyze_segment
27
+ - Support for MP4, MPEG, MOV, AVI, FLV, WebM, WMV, 3GPP formats
data/README.md CHANGED
@@ -11,6 +11,7 @@ This project is inspired by and pays homage to [ruby-openai](https://github.com/
11
11
  - Chat functionality with conversation history
12
12
  - Streaming responses for real-time text generation
13
13
  - Audio transcription capabilities
14
+ - Video understanding (including YouTube videos)
14
15
  - Thread and message management for chat applications
15
16
  - Runs management for executing AI tasks
16
17
  - Convenient Response object for easy access to generated content
@@ -43,7 +44,7 @@ user_prompt = "Tell me the current weather in Tokyo."
43
44
  # Send request with the defined tools
44
45
  response = client.generate_content(
45
46
  user_prompt,
46
- model: "gemini-1.5-flash", # Or any model that supports function calling
47
+ model: "gemini-2.5-flash", # Or any model that supports function calling
47
48
  tools: tools
48
49
  )
49
50
 
@@ -126,7 +127,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
126
127
  # Generate text
127
128
  response = client.generate_content(
128
129
  "What are the main features of Ruby programming language?",
129
- model: "gemini-2.0-flash-lite"
130
+ model: "gemini-2.5-flash"
130
131
  )
131
132
 
132
133
  # Access the generated content using Response object
@@ -147,7 +148,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
147
148
  # Stream response in real-time
148
149
  client.generate_content_stream(
149
150
  "Tell me a story about a programmer who loves Ruby",
150
- model: "gemini-2.0-flash-lite"
151
+ model: "gemini-2.5-flash"
151
152
  ) do |chunk|
152
153
  print chunk
153
154
  $stdout.flush
@@ -170,7 +171,7 @@ contents = [
170
171
 
171
172
  # Get response with conversation history
172
173
  response = client.chat(parameters: {
173
- model: "gemini-2.0-flash-lite",
174
+ model: "gemini-2.5-flash",
174
175
  contents: contents
175
176
  })
176
177
 
@@ -198,7 +199,7 @@ system_instruction = "You are a Ruby programming expert who provides concise cod
198
199
 
199
200
  # Use system instructions with chat
200
201
  response = client.chat(parameters: {
201
- model: "gemini-2.0-flash-lite",
202
+ model: "gemini-2.5-flash",
202
203
  system_instruction: { parts: [{ text: system_instruction }] },
203
204
  contents: [{ role: "user", parts: [{ text: "How do I write a simple web server in Ruby?" }] }]
204
205
  })
@@ -225,7 +226,7 @@ response = client.generate_content(
225
226
  { type: "text", text: "Describe what you see in this image" },
226
227
  { type: "image_file", image_file: { file_path: "path/to/image.jpg" } }
227
228
  ],
228
- model: "gemini-2.0-flash"
229
+ model: "gemini-2.5-flash"
229
230
  )
230
231
 
231
232
  # Access the description using Response object
@@ -256,7 +257,7 @@ response = client.generate_content(
256
257
  { text: "Describe this image in detail" },
257
258
  { file_data: { mime_type: "image/jpeg", file_uri: file_uri } }
258
259
  ],
259
- model: "gemini-2.0-flash"
260
+ model: "gemini-2.5-flash"
260
261
  )
261
262
 
262
263
  # Process the response using Response object
@@ -286,7 +287,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
286
287
  # Use Google Search to get real-time information
287
288
  response = client.generate_content(
288
289
  "Who won the euro 2024?",
289
- model: "gemini-2.0-flash-lite",
290
+ model: "gemini-2.5-flash",
290
291
  tools: [{ google_search: {} }]
291
292
  )
292
293
 
@@ -327,7 +328,7 @@ end
327
328
  ```ruby
328
329
  response = client.generate_content(
329
330
  "What are the latest AI developments in 2024?",
330
- model: "gemini-2.0-flash-lite",
331
+ model: "gemini-2.5-flash",
331
332
  tools: [{ google_search: {} }]
332
333
  )
333
334
 
@@ -550,7 +551,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
550
551
  # Transcribe audio file (note: file size limit is 20MB for direct upload)
551
552
  response = client.audio.transcribe(
552
553
  parameters: {
553
- model: "gemini-1.5-flash",
554
+ model: "gemini-2.5-flash",
554
555
  file: File.open("audio_file.mp3", "rb"),
555
556
  language: "en",
556
557
  content_text: "Transcribe this audio clip"
@@ -582,7 +583,7 @@ file_name = upload_result["file"]["name"]
582
583
  # Use the file ID for transcription
583
584
  response = client.audio.transcribe(
584
585
  parameters: {
585
- model: "gemini-1.5-flash",
586
+ model: "gemini-2.5-flash",
586
587
  file_uri: file_uri,
587
588
  language: "en"
588
589
  }
@@ -601,6 +602,137 @@ client.files.delete(name: file_name)
601
602
 
602
603
  For more examples, check out the `demo/file_audio_demo.rb` file included with the gem.
603
604
 
605
+ ### Video Understanding
606
+
607
+ Gemini API can understand video content, enabling description, segmentation, information extraction, and question answering. It can process videos up to 2 hours long.
608
+
609
+ #### Basic Usage (Upload via Files API)
610
+
611
+ For video files larger than 20MB or files you want to reuse multiple times, uploading via Files API is recommended:
612
+
613
+ ```ruby
614
+ require 'gemini'
615
+
616
+ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
617
+
618
+ # Upload and analyze a video file
619
+ result = client.video.analyze(
620
+ file_path: "path/to/video.mp4",
621
+ prompt: "Describe this video in detail"
622
+ )
623
+
624
+ response = result[:response]
625
+
626
+ if response.success?
627
+ puts response.text
628
+ else
629
+ puts "Video analysis failed: #{response.error}"
630
+ end
631
+
632
+ # File information (optional)
633
+ puts "File URI: #{result[:file_uri]}"
634
+ puts "File name: #{result[:file_name]}"
635
+ ```
636
+
637
+ #### Analyze as Inline Data (Videos under 20MB)
638
+
639
+ Small video files can be Base64-encoded and sent inline:
640
+
641
+ ```ruby
642
+ # Analyze a video under 20MB inline
643
+ response = client.video.analyze_inline(
644
+ file_path: "path/to/small_video.mp4",
645
+ prompt: "What is happening in this video?"
646
+ )
647
+
648
+ if response.success?
649
+ puts response.text
650
+ end
651
+ ```
652
+
653
+ #### YouTube Video Analysis
654
+
655
+ You can directly analyze public YouTube videos (private and unlisted videos are not supported):
656
+
657
+ ```ruby
658
+ # Analyze a video using YouTube URL
659
+ response = client.video.analyze_youtube(
660
+ url: "https://www.youtube.com/watch?v=XXXXX",
661
+ prompt: "What are the three main points of this video?"
662
+ )
663
+
664
+ if response.success?
665
+ puts response.text
666
+ end
667
+ ```
668
+
669
+ #### Helper Methods
670
+
671
+ Helper methods are provided for common operations:
672
+
673
+ ```ruby
674
+ # Get video description
675
+ response = client.video.describe(file_path: "video.mp4")
676
+ puts response.text
677
+
678
+ # Get YouTube video description
679
+ response = client.video.describe(youtube_url: "https://youtube.com/...")
680
+ puts response.text
681
+
682
+ # Ask questions about a video
683
+ response = client.video.ask(
684
+ file_uri: result[:file_uri],
685
+ question: "Who appears in this video?"
686
+ )
687
+ puts response.text
688
+
689
+ # Extract timestamps
690
+ response = client.video.extract_timestamps(
691
+ file_uri: result[:file_uri],
692
+ query: "important scenes"
693
+ )
694
+ puts response.text
695
+ ```
696
+
697
+ #### Video Segment Analysis
698
+
699
+ You can analyze only a portion of a video:
700
+
701
+ ```ruby
702
+ # Analyze a specific segment of the video
703
+ response = client.video.analyze_segment(
704
+ file_uri: result[:file_uri],
705
+ prompt: "What is happening in this scene?",
706
+ start_offset: "30s",
707
+ end_offset: "60s"
708
+ )
709
+
710
+ if response.success?
711
+ puts response.text
712
+ end
713
+ ```
714
+
715
+ #### Supported Video Formats
716
+
717
+ - MP4 - video/mp4
718
+ - MPEG - video/mpeg
719
+ - MOV - video/quicktime
720
+ - AVI - video/x-msvideo
721
+ - FLV - video/x-flv
722
+ - MPG - video/mpeg
723
+ - WebM - video/webm
724
+ - WMV - video/x-ms-wmv
725
+ - 3GPP - video/3gpp
726
+
727
+ #### Limitations
728
+
729
+ - 2 million context window: up to approximately 2 hours
730
+ - 1 million context window: up to approximately 1 hour
731
+ - YouTube free plan: cannot process more than 8 hours of video per day
732
+ - Approximately 300 tokens consumed per second of video (at default resolution)
733
+
734
+ Demo application can be found in `demo/video_demo.rb`.
735
+
604
736
  ### Document Processing
605
737
 
606
738
  Gemini API can process long documents (up to 3,600 pages), including PDFs. Gemini models understand both text and images within the document, enabling you to analyze, summarize, and extract information.
@@ -614,7 +746,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
614
746
  result = client.documents.process(
615
747
  file_path: "path/to/document.pdf",
616
748
  prompt: "Summarize this document in three key points",
617
- model: "gemini-1.5-flash"
749
+ model: "gemini-2.5-flash"
618
750
  )
619
751
 
620
752
  response = result[:response]
@@ -643,7 +775,7 @@ file_path = "path/to/document.pdf"
643
775
  thread_result = client.chat_with_file(
644
776
  file_path,
645
777
  "Please provide an overview of this document",
646
- model: "gemini-1.5-flash"
778
+ model: "gemini-2.5-flash"
647
779
  )
648
780
 
649
781
  # Get the thread ID (for continuing the conversation)
@@ -690,7 +822,7 @@ Demo applications can be found in `demo/document_chat_demo.rb` and `demo/documen
690
822
 
691
823
  Context caching allows you to preprocess and store inputs like large documents or images with the Gemini API, then reuse them across multiple requests. This saves processing time and token usage when asking different questions about the same content.
692
824
 
693
- **Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live).Models are only available in fixed version stable models (e.g. gemini-1.5-pro-001).The version suffix (e.g. -001 for gemini-1.5-pro-001) must be included.
825
+ **Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live). Using stable model versions like gemini-2.5-flash is recommended.
694
826
 
695
827
  ```ruby
696
828
  require 'gemini'
@@ -702,7 +834,7 @@ cache_result = client.documents.cache(
702
834
  file_path: "path/to/large_document.pdf",
703
835
  system_instruction: "You are a document analysis expert. Please understand the content thoroughly and answer questions accurately.",
704
836
  ttl: "86400s", # 24 hours (in seconds)
705
- model: "gemini-1.5-flash-001"
837
+ model: "gemini-2.5-flash"
706
838
  )
707
839
 
708
840
  # Get the cache name
@@ -713,7 +845,7 @@ puts "Cache name: #{cache_name}"
713
845
  response = client.generate_content_with_cache(
714
846
  "What are the key findings in this document?",
715
847
  cached_content: cache_name,
716
- model: "gemini-1.5-flash-001"
848
+ model: "gemini-2.5-flash"
717
849
  )
718
850
 
719
851
  if response.success?
@@ -865,7 +997,7 @@ require 'gemini'
865
997
  client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
866
998
 
867
999
  # Create a new thread
868
- thread = client.threads.create(parameters: { model: "gemini-2.0-flash-lite" })
1000
+ thread = client.threads.create(parameters: { model: "gemini-2.5-flash" })
869
1001
  thread_id = thread["id"]
870
1002
 
871
1003
  # Add a message to the thread
@@ -901,7 +1033,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
901
1033
 
902
1034
  response = client.generate_content(
903
1035
  "Tell me about the Ruby programming language",
904
- model: "gemini-2.0-flash-lite"
1036
+ model: "gemini-2.5-flash"
905
1037
  )
906
1038
 
907
1039
  # Basic response information
@@ -977,8 +1109,9 @@ The gem includes several demo applications that showcase its functionality:
977
1109
  - `demo/demo.rb` - Basic text generation and chat
978
1110
  - `demo/stream_demo.rb` - Streaming text generation
979
1111
  - `demo/audio_demo.rb` - Audio transcription
1112
+ - `demo/video_demo.rb` - Video understanding (local files and YouTube)
980
1113
  - `demo/vision_demo.rb` - Image recognition
981
- - `demo/image_generation_demo.rb` - Image generation
1114
+ - `demo/image_generation_demo.rb` - Image generation
982
1115
  - `demo/file_vision_demo.rb` - Image recognition with large image files
983
1116
  - `demo/file_audio_demo.rb` - Audio transcription with large audio files
984
1117
  - `demo/structured_output_demo.rb` - Structured JSON output with schema
@@ -1005,6 +1138,12 @@ ruby demo/audio_demo.rb path/to/audio/file.mp3
1005
1138
  # Audio transcription with over 20MB audio file
1006
1139
  ruby demo/file_audio_demo.rb path/to/audio/file.mp3
1007
1140
 
1141
+ # Video understanding (local file)
1142
+ ruby demo/video_demo.rb path/to/video/file.mp4
1143
+
1144
+ # Video understanding (YouTube)
1145
+ ruby demo/video_demo.rb --youtube https://www.youtube.com/watch?v=XXXXX
1146
+
1008
1147
  # Image recognition
1009
1148
  ruby demo/vision_demo.rb path/to/image/file.jpg
1010
1149
 
@@ -1034,10 +1173,8 @@ ruby demo/document_cache_demo.rb path/to/document.pdf
1034
1173
 
1035
1174
  The library supports various Gemini models:
1036
1175
 
1037
- - `gemini-2.0-flash-lite`
1038
- - `gemini-2.0-flash`
1039
- - `gemini-2.0-pro`
1040
- - `gemini-1.5-flash`
1176
+ - `gemini-2.5-flash`
1177
+ - `gemini-2.5-pro`
1041
1178
 
1042
1179
  ## Requirements
1043
1180
 
data/lib/gemini/audio.rb CHANGED
@@ -8,7 +8,7 @@ module Gemini
8
8
  def transcribe(parameters: {})
9
9
  file = parameters.delete(:file)
10
10
  file_uri = parameters.delete(:file_uri)
11
- model = parameters.delete(:model) || "gemini-1.5-flash"
11
+ model = parameters.delete(:model) || "gemini-2.5-flash"
12
12
  language = parameters.delete(:language)
13
13
  content_text = parameters.delete(:content_text) || "Transcribe this audio clip"
14
14
 
@@ -24,7 +24,7 @@ module Gemini
24
24
  mime_type ||= file_path ? @client.determine_mime_type(file_path) : "application/octet-stream"
25
25
 
26
26
  # モデルを取得(models/プレフィックスを追加)
27
- model_name = model || parameters[:model] || "gemini-1.5-flash"
27
+ model_name = model || parameters[:model] || "gemini-2.5-flash"
28
28
  model_name = "models/#{model_name}" unless model_name.start_with?("models/")
29
29
 
30
30
  # キャッシュリクエストを構築(キャメルケースに注意)
data/lib/gemini/client.rb CHANGED
@@ -59,6 +59,11 @@ module Gemini
59
59
  @documents ||= Gemini::Documents.new(client: self)
60
60
  end
61
61
 
62
+ # 動画処理アクセサ
63
+ def video
64
+ @video ||= Gemini::Video.new(client: self)
65
+ end
66
+
62
67
  # キャッシュ管理アクセサ
63
68
  def cached_content
64
69
  @cached_content ||= Gemini::CachedContent.new(client: self)
@@ -77,7 +82,7 @@ module Gemini
77
82
  # OpenAI chat-like text generation method for Gemini API
78
83
  # Extended to support streaming callbacks
79
84
  def chat(parameters: {}, &stream_callback)
80
- model = parameters.delete(:model) || "gemini-2.0-flash-lite"
85
+ model = parameters.delete(:model) || "gemini-2.5-flash"
81
86
 
82
87
  # If streaming callback is provided
83
88
  if block_given?
@@ -117,7 +122,7 @@ module Gemini
117
122
  # Helper methods for convenience
118
123
 
119
124
  # Method with usage similar to OpenAI's chat
120
- def generate_content(prompt, model: "gemini-2.0-flash-lite", system_instruction: nil,
125
+ def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
121
126
  response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
122
127
  url_context: false, google_search: false, **parameters, &stream_callback)
123
128
  content = format_content(prompt)
@@ -153,7 +158,7 @@ module Gemini
153
158
  end
154
159
 
155
160
  # Streaming text generation
156
- def generate_content_stream(prompt, model: "gemini-2.0-flash-lite", system_instruction: nil,
161
+ def generate_content_stream(prompt, model: "gemini-2.5-flash", system_instruction: nil,
157
162
  response_mime_type: nil, response_schema: nil, temperature: 0.5,
158
163
  url_context: false, google_search: false, **parameters, &block)
159
164
  raise ArgumentError, "Block is required for streaming" unless block_given?
@@ -190,7 +195,7 @@ module Gemini
190
195
  end
191
196
 
192
197
  # ファイルを使った会話(複数ファイル対応)
193
- def chat_with_multimodal(file_paths, prompt, model: "gemini-1.5-flash", **parameters)
198
+ def chat_with_multimodal(file_paths, prompt, model: "gemini-2.5-flash", **parameters)
194
199
  # スレッドを作成
195
200
  thread = threads.create(parameters: { model: model })
196
201
  thread_id = thread["id"]
@@ -258,7 +263,7 @@ module Gemini
258
263
  end
259
264
  end
260
265
 
261
- def generate_content_with_cache(prompt, cached_content:, model: "gemini-1.5-flash", **parameters)
266
+ def generate_content_with_cache(prompt, cached_content:, model: "gemini-2.5-flash", **parameters)
262
267
  # モデル名にmodels/プレフィックスを追加
263
268
  model_name = model.start_with?("models/") ? model : "models/#{model}"
264
269
 
@@ -289,12 +294,12 @@ module Gemini
289
294
  end
290
295
 
291
296
  # 単一ファイルのヘルパー
292
- def chat_with_file(file_path, prompt, model: "gemini-1.5-flash", **parameters)
297
+ def chat_with_file(file_path, prompt, model: "gemini-2.5-flash", **parameters)
293
298
  chat_with_multimodal([file_path], prompt, model: model, **parameters)
294
299
  end
295
300
 
296
301
  # ファイルをアップロードして質問するシンプルなヘルパー
297
- def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-1.5-flash", **parameters)
302
+ def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-2.5-flash", **parameters)
298
303
  # MIMEタイプを自動判定
299
304
  mime_type = content_type || determine_mime_type(file_path)
300
305
 
@@ -5,7 +5,7 @@ module Gemini
5
5
  end
6
6
 
7
7
  # ドキュメントをアップロードして質問する基本メソッド
8
- def process(file: nil, file_path: nil, prompt:, model: "gemini-1.5-flash", **parameters)
8
+ def process(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
9
9
  # ファイルパスが指定されている場合はファイルを開く
10
10
  if file_path && !file
11
11
  file = File.open(file_path, "rb")
@@ -70,7 +70,7 @@ module Gemini
70
70
  file_name = upload_result["file"]["name"]
71
71
 
72
72
  # モデル名の取得と調整
73
- model = parameters[:model] || "gemini-1.5-flash"
73
+ model = parameters[:model] || "gemini-2.5-flash"
74
74
  model = "models/#{model}" unless model.start_with?("models/")
75
75
 
76
76
  # キャッシュに保存(パラメータの名前に注意)
data/lib/gemini/files.rb CHANGED
@@ -125,11 +125,21 @@ module Gemini
125
125
  when ".mp4"
126
126
  "video/mp4"
127
127
  when ".avi"
128
- "video/avi"
128
+ "video/x-msvideo"
129
129
  when ".mov"
130
130
  "video/quicktime"
131
131
  when ".mkv"
132
132
  "video/x-matroska"
133
+ when ".mpeg", ".mpg"
134
+ "video/mpeg"
135
+ when ".webm"
136
+ "video/webm"
137
+ when ".wmv"
138
+ "video/x-ms-wmv"
139
+ when ".flv"
140
+ "video/x-flv"
141
+ when ".3gp", ".3gpp"
142
+ "video/3gpp"
133
143
  when ".pdf"
134
144
  "application/pdf"
135
145
  when ".txt"
@@ -27,7 +27,7 @@ module Gemini
27
27
  id: thread_id,
28
28
  created_at: created_at,
29
29
  metadata: parameters[:metadata] || {},
30
- model: parameters[:model] || "gemini-2.0-flash-lite"
30
+ model: parameters[:model] || "gemini-2.5-flash"
31
31
  }
32
32
 
33
33
  {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gemini
4
- VERSION = "0.1.5"
4
+ VERSION = "0.1.6"
5
5
  end
@@ -0,0 +1,341 @@
1
+ module Gemini
2
+ class Video
3
+ # サポートされる動画形式
4
+ SUPPORTED_FORMATS = %w[.mp4 .mpeg .mov .avi .flv .mpg .webm .wmv .3gp .3gpp].freeze
5
+
6
+ def initialize(client:)
7
+ @client = client
8
+ end
9
+
10
+ # 動画ファイルを分析する(Files APIでアップロード後に分析)
11
+ # 20MB以上のファイルや複数回利用する場合に推奨
12
+ def analyze(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
13
+ # ファイルパスが指定されている場合はファイルを開く
14
+ if file_path && !file
15
+ file = File.open(file_path, "rb")
16
+ close_file = true
17
+ else
18
+ close_file = false
19
+ end
20
+
21
+ begin
22
+ raise ArgumentError, "file or file_path parameter is required" unless file
23
+
24
+ # MIMEタイプを判定
25
+ mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
26
+
27
+ # ファイルをアップロード
28
+ upload_result = @client.files.upload(file: file)
29
+ file_uri = upload_result["file"]["uri"]
30
+ file_name = upload_result["file"]["name"]
31
+
32
+ # ファイルがACTIVE状態になるまで待機
33
+ wait_for_file_active(file_name)
34
+
35
+ # コンテンツを生成
36
+ raw_response = generate_video_content(
37
+ file_uri: file_uri,
38
+ mime_type: mime_type,
39
+ prompt: prompt,
40
+ model: model,
41
+ **parameters
42
+ )
43
+
44
+ # レスポンスとファイル情報を返す
45
+ {
46
+ response: Gemini::Response.new(raw_response),
47
+ file_uri: file_uri,
48
+ file_name: file_name
49
+ }
50
+ ensure
51
+ file.close if file && close_file
52
+ end
53
+ end
54
+
55
+ # アップロード済みのファイルURIを使用して分析
56
+ def analyze_with_file_uri(file_uri:, prompt:, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
57
+ raw_response = generate_video_content(
58
+ file_uri: file_uri,
59
+ mime_type: mime_type,
60
+ prompt: prompt,
61
+ model: model,
62
+ **parameters
63
+ )
64
+
65
+ Gemini::Response.new(raw_response)
66
+ end
67
+
68
+ # YouTube URLから動画を分析(公開動画のみ)
69
+ def analyze_youtube(url:, prompt:, model: "gemini-2.5-flash", **parameters)
70
+ # YouTube URLのバリデーション
71
+ unless valid_youtube_url?(url)
72
+ raise ArgumentError, "Invalid YouTube URL. Only public YouTube videos are supported."
73
+ end
74
+
75
+ # リクエストパラメータを構築
76
+ request_params = {
77
+ contents: [{
78
+ parts: [
79
+ { text: prompt },
80
+ {
81
+ file_data: {
82
+ file_uri: url
83
+ }
84
+ }
85
+ ]
86
+ }]
87
+ }
88
+
89
+ # 追加パラメータをマージ
90
+ merge_additional_params(request_params, parameters)
91
+
92
+ # APIリクエスト
93
+ response = @client.json_post(
94
+ path: "models/#{model}:generateContent",
95
+ parameters: request_params
96
+ )
97
+
98
+ Gemini::Response.new(response)
99
+ end
100
+
101
+ # 小さい動画ファイルをインラインデータとして分析(20MB未満向け)
102
+ def analyze_inline(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
103
+ # ファイルパスが指定されている場合はファイルを開く
104
+ if file_path && !file
105
+ file = File.open(file_path, "rb")
106
+ close_file = true
107
+ else
108
+ close_file = false
109
+ end
110
+
111
+ begin
112
+ raise ArgumentError, "file or file_path parameter is required" unless file
113
+
114
+ # ファイルサイズチェック(20MB = 20 * 1024 * 1024)
115
+ file.rewind
116
+ file_size = file.size
117
+ if file_size > 20 * 1024 * 1024
118
+ raise ArgumentError, "File size exceeds 20MB. Use analyze method with Files API instead."
119
+ end
120
+
121
+ # MIMEタイプを判定
122
+ mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
123
+
124
+ # Base64エンコード
125
+ file.rewind
126
+ require 'base64'
127
+ file_data = Base64.strict_encode64(file.read)
128
+
129
+ # リクエストパラメータを構築
130
+ request_params = {
131
+ contents: [{
132
+ parts: [
133
+ { text: prompt },
134
+ {
135
+ inline_data: {
136
+ mime_type: mime_type,
137
+ data: file_data
138
+ }
139
+ }
140
+ ]
141
+ }]
142
+ }
143
+
144
+ # 追加パラメータをマージ
145
+ merge_additional_params(request_params, parameters)
146
+
147
+ # APIリクエスト
148
+ response = @client.json_post(
149
+ path: "models/#{model}:generateContent",
150
+ parameters: request_params
151
+ )
152
+
153
+ Gemini::Response.new(response)
154
+ ensure
155
+ file.close if file && close_file
156
+ end
157
+ end
158
+
159
+ # 動画の説明を取得するヘルパーメソッド
160
+ def describe(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, model: "gemini-2.5-flash", language: "ja", **parameters)
161
+ prompt = language == "ja" ? "この動画の内容を詳しく説明してください。" : "Describe this video in detail."
162
+
163
+ if youtube_url
164
+ analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
165
+ elsif file_uri
166
+ analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
167
+ elsif file || file_path
168
+ result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
169
+ result[:response]
170
+ else
171
+ raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
172
+ end
173
+ end
174
+
175
+ # タイムスタンプを抽出するヘルパーメソッド
176
+ def extract_timestamps(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, query:, model: "gemini-2.5-flash", **parameters)
177
+ prompt = "動画内で「#{query}」が登場するタイムスタンプを全て抽出してください。MM:SS形式で出力してください。"
178
+
179
+ if youtube_url
180
+ analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
181
+ elsif file_uri
182
+ analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
183
+ elsif file || file_path
184
+ result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
185
+ result[:response]
186
+ else
187
+ raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
188
+ end
189
+ end
190
+
191
+ # 動画のセグメント(一部分)を分析
192
+ def analyze_segment(file_uri:, prompt:, start_offset: nil, end_offset: nil, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
193
+ # videoMetadataを構築
194
+ video_metadata = {}
195
+ video_metadata[:startOffset] = start_offset if start_offset
196
+ video_metadata[:endOffset] = end_offset if end_offset
197
+
198
+ # リクエストパラメータを構築
199
+ file_data_part = {
200
+ file_data: {
201
+ mime_type: mime_type,
202
+ file_uri: file_uri
203
+ }
204
+ }
205
+ file_data_part[:file_data][:video_metadata] = video_metadata unless video_metadata.empty?
206
+
207
+ request_params = {
208
+ contents: [{
209
+ parts: [
210
+ { text: prompt },
211
+ file_data_part
212
+ ]
213
+ }]
214
+ }
215
+
216
+ # 追加パラメータをマージ
217
+ merge_additional_params(request_params, parameters)
218
+
219
+ # APIリクエスト
220
+ response = @client.json_post(
221
+ path: "models/#{model}:generateContent",
222
+ parameters: request_params
223
+ )
224
+
225
+ Gemini::Response.new(response)
226
+ end
227
+
228
+ # 動画に関する質問に回答
229
+ def ask(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, question:, model: "gemini-2.5-flash", **parameters)
230
+ if youtube_url
231
+ analyze_youtube(url: youtube_url, prompt: question, model: model, **parameters)
232
+ elsif file_uri
233
+ analyze_with_file_uri(file_uri: file_uri, prompt: question, model: model, **parameters)
234
+ elsif file || file_path
235
+ result = analyze(file: file, file_path: file_path, prompt: question, model: model, **parameters)
236
+ result[:response]
237
+ else
238
+ raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
239
+ end
240
+ end
241
+
242
+ private
243
+
244
+ # 動画コンテンツを生成する共通メソッド(生のレスポンスを返す)
245
+ def generate_video_content(file_uri:, mime_type:, prompt:, model:, **parameters)
246
+ request_params = {
247
+ contents: [{
248
+ parts: [
249
+ { text: prompt },
250
+ {
251
+ file_data: {
252
+ mime_type: mime_type,
253
+ file_uri: file_uri
254
+ }
255
+ }
256
+ ]
257
+ }]
258
+ }
259
+
260
+ # 追加パラメータをマージ
261
+ merge_additional_params(request_params, parameters)
262
+
263
+ # APIリクエスト(生のレスポンスを返す)
264
+ @client.json_post(
265
+ path: "models/#{model}:generateContent",
266
+ parameters: request_params
267
+ )
268
+ end
269
+
270
+ # 追加パラメータをマージするヘルパー
271
+ def merge_additional_params(request_params, parameters)
272
+ parameters.each do |key, value|
273
+ next if key == :contents
274
+ request_params[key] = value
275
+ end
276
+ end
277
+
278
+ # YouTube URLのバリデーション
279
+ def valid_youtube_url?(url)
280
+ youtube_patterns = [
281
+ %r{^https?://(?:www\.)?youtube\.com/watch\?v=[\w-]+},
282
+ %r{^https?://youtu\.be/[\w-]+},
283
+ %r{^https?://(?:www\.)?youtube\.com/embed/[\w-]+},
284
+ %r{^https?://(?:www\.)?youtube\.com/v/[\w-]+},
285
+ %r{^https?://(?:www\.)?youtube\.com/shorts/[\w-]+}
286
+ ]
287
+ youtube_patterns.any? { |pattern| url.match?(pattern) }
288
+ end
289
+
290
+ # ファイルがACTIVE状態になるまで待機するメソッド
291
+ def wait_for_file_active(file_name, max_attempts: 30, interval: 2)
292
+ attempts = 0
293
+ loop do
294
+ file_info = @client.files.get(name: file_name)
295
+ state = file_info["state"]
296
+
297
+ case state
298
+ when "ACTIVE"
299
+ return true
300
+ when "FAILED"
301
+ raise StandardError, "File processing failed: #{file_info['error']&.dig('message') || 'Unknown error'}"
302
+ else
303
+ # PROCESSING状態の場合は待機
304
+ attempts += 1
305
+ if attempts >= max_attempts
306
+ raise StandardError, "File processing timeout. File is still in #{state} state after #{max_attempts * interval} seconds."
307
+ end
308
+ sleep(interval)
309
+ end
310
+ end
311
+ end
312
+
313
+ # 動画のMIMEタイプを判定するヘルパーメソッド
314
+ def determine_video_mime_type(file)
315
+ return "video/mp4" unless file.respond_to?(:path)
316
+
317
+ ext = File.extname(file.path).downcase
318
+ case ext
319
+ when ".mp4"
320
+ "video/mp4"
321
+ when ".mpeg", ".mpg"
322
+ "video/mpeg"
323
+ when ".mov"
324
+ "video/quicktime"
325
+ when ".avi"
326
+ "video/x-msvideo"
327
+ when ".flv"
328
+ "video/x-flv"
329
+ when ".webm"
330
+ "video/webm"
331
+ when ".wmv"
332
+ "video/x-ms-wmv"
333
+ when ".3gp", ".3gpp"
334
+ "video/3gpp"
335
+ else
336
+ # デフォルトはMP4
337
+ "video/mp4"
338
+ end
339
+ end
340
+ end
341
+ end
data/lib/gemini.rb CHANGED
@@ -19,6 +19,7 @@ require_relative "gemini/images"
19
19
  require_relative "gemini/response"
20
20
  require_relative "gemini/documents"
21
21
  require_relative "gemini/cached_content"
22
+ require_relative "gemini/video"
22
23
  module Gemini
23
24
  class Error < StandardError; end
24
25
  class ConfigurationError < Error; end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-gemini-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - rira100000000
@@ -162,6 +162,7 @@ files:
162
162
  - lib/gemini/threads.rb
163
163
  - lib/gemini/tool_definition.rb
164
164
  - lib/gemini/version.rb
165
+ - lib/gemini/video.rb
165
166
  - lib/ruby/gemini.rb
166
167
  homepage: https://github.com/rira100000000/ruby-gemini-api
167
168
  licenses: