ruby-gemini-api 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c80fbf2cb7142ab3ff7d6a17d8b5f1e43960ec8ed398ee7f4c35286ed72ce962
4
- data.tar.gz: 261f1a1e04757b93aac9c8a42263e355758bba4ec512f6c9dd408b63146e17fe
3
+ metadata.gz: 80e6cd429265c5341e1efcd3e701f9643cdb4785f18826234ec879178d4a236e
4
+ data.tar.gz: c9a9c5201b616ce2d534393c1d99240563b9d36229dac4f564f71b4ed82ee42b
5
5
  SHA512:
6
- metadata.gz: 780a9684677d9bfdd8945727c9cd00e1e0ecc9e43a63a038da17a18aa8e524e43f601946bd1fbc00c9406e4a45b80d59fedfdfc85e190b973cedd9c2340a5be4
7
- data.tar.gz: 399dff8bc6f6693b6267412b2fee067269125ea9b16ffd105d94b4ac9154ca1ade4246b6d3ca9aa9e7cb689e3f2d4a12ea771d4fc688de24e36688205675ab0f
6
+ metadata.gz: b1ac2fe8cf4dacad20f21a7eab1872fa496a797b76d9af12cb401c5f563aeaed9cd58e4323334fcd6bbf67d74273902b5f9f076a5a44833474b4f3a4e8409148
7
+ data.tar.gz: 17693cd9f1a87ad1fdf21c26153d1ac351ca9137c99e1c97cddd2524333db21190b0bbf5c08b995939c950338f0d5bf4607263a2ae0bc92c0343b28d439c2a6f
data/CHANGELOG.md CHANGED
@@ -13,4 +13,15 @@
13
13
  - Add support for multi-image input
14
14
 
15
15
  ## [0.1.4] - 2025-11-08
16
- - Add support for grounding search
16
+ - Add support for grounding search
17
+
18
+ ## [0.1.5] - 2025-11-13
19
+ - Add support for URL Context tool
20
+ - Add simplified method for accessing grounding search sources
21
+
22
+ ## [0.1.6] - 2025-12-11
23
+ - Add support for video understanding
24
+ - Analyze local video files (Files API and inline data)
25
+ - Analyze YouTube videos
26
+ - Helper methods: describe, ask, extract_timestamps, analyze_segment
27
+ - Support for MP4, MPEG, MOV, AVI, FLV, WebM, WMV, 3GPP formats
data/README.md CHANGED
@@ -11,6 +11,7 @@ This project is inspired by and pays homage to [ruby-openai](https://github.com/
11
11
  - Chat functionality with conversation history
12
12
  - Streaming responses for real-time text generation
13
13
  - Audio transcription capabilities
14
+ - Video understanding (including YouTube videos)
14
15
  - Thread and message management for chat applications
15
16
  - Runs management for executing AI tasks
16
17
  - Convenient Response object for easy access to generated content
@@ -43,7 +44,7 @@ user_prompt = "Tell me the current weather in Tokyo."
43
44
  # Send request with the defined tools
44
45
  response = client.generate_content(
45
46
  user_prompt,
46
- model: "gemini-1.5-flash", # Or any model that supports function calling
47
+ model: "gemini-2.5-flash", # Or any model that supports function calling
47
48
  tools: tools
48
49
  )
49
50
 
@@ -126,7 +127,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
126
127
  # Generate text
127
128
  response = client.generate_content(
128
129
  "What are the main features of Ruby programming language?",
129
- model: "gemini-2.0-flash-lite"
130
+ model: "gemini-2.5-flash"
130
131
  )
131
132
 
132
133
  # Access the generated content using Response object
@@ -147,7 +148,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
147
148
  # Stream response in real-time
148
149
  client.generate_content_stream(
149
150
  "Tell me a story about a programmer who loves Ruby",
150
- model: "gemini-2.0-flash-lite"
151
+ model: "gemini-2.5-flash"
151
152
  ) do |chunk|
152
153
  print chunk
153
154
  $stdout.flush
@@ -170,7 +171,7 @@ contents = [
170
171
 
171
172
  # Get response with conversation history
172
173
  response = client.chat(parameters: {
173
- model: "gemini-2.0-flash-lite",
174
+ model: "gemini-2.5-flash",
174
175
  contents: contents
175
176
  })
176
177
 
@@ -198,7 +199,7 @@ system_instruction = "You are a Ruby programming expert who provides concise cod
198
199
 
199
200
  # Use system instructions with chat
200
201
  response = client.chat(parameters: {
201
- model: "gemini-2.0-flash-lite",
202
+ model: "gemini-2.5-flash",
202
203
  system_instruction: { parts: [{ text: system_instruction }] },
203
204
  contents: [{ role: "user", parts: [{ text: "How do I write a simple web server in Ruby?" }] }]
204
205
  })
@@ -225,7 +226,7 @@ response = client.generate_content(
225
226
  { type: "text", text: "Describe what you see in this image" },
226
227
  { type: "image_file", image_file: { file_path: "path/to/image.jpg" } }
227
228
  ],
228
- model: "gemini-2.0-flash"
229
+ model: "gemini-2.5-flash"
229
230
  )
230
231
 
231
232
  # Access the description using Response object
@@ -256,7 +257,7 @@ response = client.generate_content(
256
257
  { text: "Describe this image in detail" },
257
258
  { file_data: { mime_type: "image/jpeg", file_uri: file_uri } }
258
259
  ],
259
- model: "gemini-2.0-flash"
260
+ model: "gemini-2.5-flash"
260
261
  )
261
262
 
262
263
  # Process the response using Response object
@@ -286,21 +287,19 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
286
287
  # Use Google Search to get real-time information
287
288
  response = client.generate_content(
288
289
  "Who won the euro 2024?",
289
- model: "gemini-2.0-flash-lite",
290
+ model: "gemini-2.5-flash",
290
291
  tools: [{ google_search: {} }]
291
292
  )
292
293
 
293
294
  if response.success?
294
295
  puts response.text
295
-
296
+
296
297
  # Check grounding information
297
298
  if response.grounded?
298
299
  puts "\nSource references:"
299
- response.grounding_chunks.each do |chunk|
300
- if chunk['web']
301
- puts "- #{chunk['web']['title']}"
302
- puts " #{chunk['web']['uri']}"
303
- end
300
+ response.grounding_sources.each do |source|
301
+ puts "- #{source[:title]}"
302
+ puts " #{source[:url]}"
304
303
  end
305
304
  end
306
305
  end
@@ -311,13 +310,15 @@ end
311
310
  ```ruby
312
311
  # Check if response is grounded
313
312
  if response.grounded?
314
- # Get full grounding metadata
313
+ # Get formatted source information (recommended)
314
+ response.grounding_sources.each do |source|
315
+ puts "Title: #{source[:title]}"
316
+ puts "URL: #{source[:url]}"
317
+ end
318
+
319
+ # You can also access raw metadata
315
320
  metadata = response.grounding_metadata
316
-
317
- # Get source chunks (references)
318
321
  chunks = response.grounding_chunks
319
-
320
- # Get search entry point
321
322
  entry_point = response.search_entry_point
322
323
  end
323
324
  ```
@@ -327,7 +328,7 @@ end
327
328
  ```ruby
328
329
  response = client.generate_content(
329
330
  "What are the latest AI developments in 2024?",
330
- model: "gemini-2.0-flash-lite",
331
+ model: "gemini-2.5-flash",
331
332
  tools: [{ google_search: {} }]
332
333
  )
333
334
 
@@ -345,6 +346,86 @@ You can find a grounding search demo in:
345
346
  ruby demo/grounding_search_demo_ja.rb
346
347
  ```
347
348
 
349
+ ### URL Context
350
+
351
+ You can use Gemini API's URL Context feature to retrieve and analyze content from web pages.
352
+
353
+ #### Basic Usage
354
+
355
+ ```ruby
356
+ require 'gemini'
357
+
358
+ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
359
+
360
+ # Use URL Context to analyze web pages (shortcut)
361
+ response = client.generate_content(
362
+ "Summarize the content of this page: https://www.ruby-lang.org",
363
+ model: "gemini-2.5-flash",
364
+ url_context: true
365
+ )
366
+
367
+ if response.success?
368
+ puts response.text
369
+ end
370
+ ```
371
+
372
+ #### Using Explicit Tools Parameter
373
+
374
+ ```ruby
375
+ # Explicit tools parameter
376
+ response = client.generate_content(
377
+ "Compare these two pages: https://www.ruby-lang.org and https://www.python.org",
378
+ model: "gemini-2.5-flash",
379
+ tools: [{ url_context: {} }]
380
+ )
381
+ ```
382
+
383
+ #### Combining URL Context with Google Search
384
+
385
+ ```ruby
386
+ # Use both URL Context and Google Search
387
+ response = client.generate_content(
388
+ "What is the latest information about Ruby from https://www.ruby-lang.org and recent news?",
389
+ model: "gemini-2.5-flash",
390
+ url_context: true,
391
+ google_search: true
392
+ )
393
+ ```
394
+
395
+ #### Checking URL Context Metadata
396
+
397
+ ```ruby
398
+ # Check if URL Context was used
399
+ if response.url_context?
400
+ # Get full metadata
401
+ metadata = response.url_context_metadata
402
+
403
+ # Get retrieved URL information
404
+ urls = response.retrieved_urls
405
+
406
+ # Check retrieval status for each URL
407
+ response.url_retrieval_statuses.each do |url_info|
408
+ puts "URL: #{url_info[:url]}"
409
+ puts "Status: #{url_info[:status]}"
410
+ puts "Title: #{url_info[:title]}" if url_info[:title]
411
+ end
412
+ end
413
+ ```
414
+
415
+ #### Limitations
416
+
417
+ - Maximum 20 URLs per request
418
+ - Maximum 34MB content size per URL
419
+ - YouTube videos and paywalled content are not supported
420
+
421
+ #### Demo Application
422
+
423
+ You can find a URL context demo in:
424
+
425
+ ```bash
426
+ ruby demo/url_context_demo.rb https://www.ruby-lang.org
427
+ ```
428
+
348
429
  ### Image Generation
349
430
 
350
431
  ```ruby
@@ -470,7 +551,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
470
551
  # Transcribe audio file (note: file size limit is 20MB for direct upload)
471
552
  response = client.audio.transcribe(
472
553
  parameters: {
473
- model: "gemini-1.5-flash",
554
+ model: "gemini-2.5-flash",
474
555
  file: File.open("audio_file.mp3", "rb"),
475
556
  language: "en",
476
557
  content_text: "Transcribe this audio clip"
@@ -502,7 +583,7 @@ file_name = upload_result["file"]["name"]
502
583
  # Use the file ID for transcription
503
584
  response = client.audio.transcribe(
504
585
  parameters: {
505
- model: "gemini-1.5-flash",
586
+ model: "gemini-2.5-flash",
506
587
  file_uri: file_uri,
507
588
  language: "en"
508
589
  }
@@ -521,6 +602,137 @@ client.files.delete(name: file_name)
521
602
 
522
603
  For more examples, check out the `demo/file_audio_demo.rb` file included with the gem.
523
604
 
605
+ ### Video Understanding
606
+
607
+ Gemini API can understand video content, enabling description, segmentation, information extraction, and question answering. It can process videos up to 2 hours long.
608
+
609
+ #### Basic Usage (Upload via Files API)
610
+
611
+ For video files larger than 20MB or files you want to reuse multiple times, uploading via Files API is recommended:
612
+
613
+ ```ruby
614
+ require 'gemini'
615
+
616
+ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
617
+
618
+ # Upload and analyze a video file
619
+ result = client.video.analyze(
620
+ file_path: "path/to/video.mp4",
621
+ prompt: "Describe this video in detail"
622
+ )
623
+
624
+ response = result[:response]
625
+
626
+ if response.success?
627
+ puts response.text
628
+ else
629
+ puts "Video analysis failed: #{response.error}"
630
+ end
631
+
632
+ # File information (optional)
633
+ puts "File URI: #{result[:file_uri]}"
634
+ puts "File name: #{result[:file_name]}"
635
+ ```
636
+
637
+ #### Analyze as Inline Data (Videos under 20MB)
638
+
639
+ Small video files can be Base64-encoded and sent inline:
640
+
641
+ ```ruby
642
+ # Analyze a video under 20MB inline
643
+ response = client.video.analyze_inline(
644
+ file_path: "path/to/small_video.mp4",
645
+ prompt: "What is happening in this video?"
646
+ )
647
+
648
+ if response.success?
649
+ puts response.text
650
+ end
651
+ ```
652
+
653
+ #### YouTube Video Analysis
654
+
655
+ You can directly analyze public YouTube videos (private and unlisted videos are not supported):
656
+
657
+ ```ruby
658
+ # Analyze a video using YouTube URL
659
+ response = client.video.analyze_youtube(
660
+ url: "https://www.youtube.com/watch?v=XXXXX",
661
+ prompt: "What are the three main points of this video?"
662
+ )
663
+
664
+ if response.success?
665
+ puts response.text
666
+ end
667
+ ```
668
+
669
+ #### Helper Methods
670
+
671
+ Helper methods are provided for common operations:
672
+
673
+ ```ruby
674
+ # Get video description
675
+ response = client.video.describe(file_path: "video.mp4")
676
+ puts response.text
677
+
678
+ # Get YouTube video description
679
+ response = client.video.describe(youtube_url: "https://youtube.com/...")
680
+ puts response.text
681
+
682
+ # Ask questions about a video
683
+ response = client.video.ask(
684
+ file_uri: result[:file_uri],
685
+ question: "Who appears in this video?"
686
+ )
687
+ puts response.text
688
+
689
+ # Extract timestamps
690
+ response = client.video.extract_timestamps(
691
+ file_uri: result[:file_uri],
692
+ query: "important scenes"
693
+ )
694
+ puts response.text
695
+ ```
696
+
697
+ #### Video Segment Analysis
698
+
699
+ You can analyze only a portion of a video:
700
+
701
+ ```ruby
702
+ # Analyze a specific segment of the video
703
+ response = client.video.analyze_segment(
704
+ file_uri: result[:file_uri],
705
+ prompt: "What is happening in this scene?",
706
+ start_offset: "30s",
707
+ end_offset: "60s"
708
+ )
709
+
710
+ if response.success?
711
+ puts response.text
712
+ end
713
+ ```
714
+
715
+ #### Supported Video Formats
716
+
717
+ - MP4 - video/mp4
718
+ - MPEG - video/mpeg
719
+ - MOV - video/quicktime
720
+ - AVI - video/x-msvideo
721
+ - FLV - video/x-flv
722
+ - MPG - video/mpeg
723
+ - WebM - video/webm
724
+ - WMV - video/x-ms-wmv
725
+ - 3GPP - video/3gpp
726
+
727
+ #### Limitations
728
+
729
+ - 2 million context window: up to approximately 2 hours
730
+ - 1 million context window: up to approximately 1 hour
731
+ - YouTube free plan: cannot process more than 8 hours of video per day
732
+ - Approximately 300 tokens consumed per second of video (at default resolution)
733
+
734
+ Demo application can be found in `demo/video_demo.rb`.
735
+
524
736
  ### Document Processing
525
737
 
526
738
  Gemini API can process long documents (up to 3,600 pages), including PDFs. Gemini models understand both text and images within the document, enabling you to analyze, summarize, and extract information.
@@ -534,7 +746,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
534
746
  result = client.documents.process(
535
747
  file_path: "path/to/document.pdf",
536
748
  prompt: "Summarize this document in three key points",
537
- model: "gemini-1.5-flash"
749
+ model: "gemini-2.5-flash"
538
750
  )
539
751
 
540
752
  response = result[:response]
@@ -563,7 +775,7 @@ file_path = "path/to/document.pdf"
563
775
  thread_result = client.chat_with_file(
564
776
  file_path,
565
777
  "Please provide an overview of this document",
566
- model: "gemini-1.5-flash"
778
+ model: "gemini-2.5-flash"
567
779
  )
568
780
 
569
781
  # Get the thread ID (for continuing the conversation)
@@ -610,7 +822,7 @@ Demo applications can be found in `demo/document_chat_demo.rb` and `demo/documen
610
822
 
611
823
  Context caching allows you to preprocess and store inputs like large documents or images with the Gemini API, then reuse them across multiple requests. This saves processing time and token usage when asking different questions about the same content.
612
824
 
613
- **Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live).Models are only available in fixed version stable models (e.g. gemini-1.5-pro-001).The version suffix (e.g. -001 for gemini-1.5-pro-001) must be included.
825
+ **Important**: Context caching requires a minimum input of 32,768 tokens. The maximum token count matches the context window size of the model you are using. Caches automatically expire after 48 hours, but you can set a custom TTL (Time To Live). Using stable model versions like gemini-2.5-flash is recommended.
614
826
 
615
827
  ```ruby
616
828
  require 'gemini'
@@ -622,7 +834,7 @@ cache_result = client.documents.cache(
622
834
  file_path: "path/to/large_document.pdf",
623
835
  system_instruction: "You are a document analysis expert. Please understand the content thoroughly and answer questions accurately.",
624
836
  ttl: "86400s", # 24 hours (in seconds)
625
- model: "gemini-1.5-flash-001"
837
+ model: "gemini-2.5-flash"
626
838
  )
627
839
 
628
840
  # Get the cache name
@@ -633,7 +845,7 @@ puts "Cache name: #{cache_name}"
633
845
  response = client.generate_content_with_cache(
634
846
  "What are the key findings in this document?",
635
847
  cached_content: cache_name,
636
- model: "gemini-1.5-flash-001"
848
+ model: "gemini-2.5-flash"
637
849
  )
638
850
 
639
851
  if response.success?
@@ -785,7 +997,7 @@ require 'gemini'
785
997
  client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
786
998
 
787
999
  # Create a new thread
788
- thread = client.threads.create(parameters: { model: "gemini-2.0-flash-lite" })
1000
+ thread = client.threads.create(parameters: { model: "gemini-2.5-flash" })
789
1001
  thread_id = thread["id"]
790
1002
 
791
1003
  # Add a message to the thread
@@ -821,7 +1033,7 @@ client = Gemini::Client.new(ENV['GEMINI_API_KEY'])
821
1033
 
822
1034
  response = client.generate_content(
823
1035
  "Tell me about the Ruby programming language",
824
- model: "gemini-2.0-flash-lite"
1036
+ model: "gemini-2.5-flash"
825
1037
  )
826
1038
 
827
1039
  # Basic response information
@@ -897,8 +1109,9 @@ The gem includes several demo applications that showcase its functionality:
897
1109
  - `demo/demo.rb` - Basic text generation and chat
898
1110
  - `demo/stream_demo.rb` - Streaming text generation
899
1111
  - `demo/audio_demo.rb` - Audio transcription
1112
+ - `demo/video_demo.rb` - Video understanding (local files and YouTube)
900
1113
  - `demo/vision_demo.rb` - Image recognition
901
- - `demo/image_generation_demo.rb` - Image generation
1114
+ - `demo/image_generation_demo.rb` - Image generation
902
1115
  - `demo/file_vision_demo.rb` - Image recognition with large image files
903
1116
  - `demo/file_audio_demo.rb` - Audio transcription with large audio files
904
1117
  - `demo/structured_output_demo.rb` - Structured JSON output with schema
@@ -925,6 +1138,12 @@ ruby demo/audio_demo.rb path/to/audio/file.mp3
925
1138
  # Audio transcription with over 20MB audio file
926
1139
  ruby demo/file_audio_demo.rb path/to/audio/file.mp3
927
1140
 
1141
+ # Video understanding (local file)
1142
+ ruby demo/video_demo.rb path/to/video/file.mp4
1143
+
1144
+ # Video understanding (YouTube)
1145
+ ruby demo/video_demo.rb --youtube https://www.youtube.com/watch?v=XXXXX
1146
+
928
1147
  # Image recognition
929
1148
  ruby demo/vision_demo.rb path/to/image/file.jpg
930
1149
 
@@ -954,10 +1173,8 @@ ruby demo/document_cache_demo.rb path/to/document.pdf
954
1173
 
955
1174
  The library supports various Gemini models:
956
1175
 
957
- - `gemini-2.0-flash-lite`
958
- - `gemini-2.0-flash`
959
- - `gemini-2.0-pro`
960
- - `gemini-1.5-flash`
1176
+ - `gemini-2.5-flash`
1177
+ - `gemini-2.5-pro`
961
1178
 
962
1179
  ## Requirements
963
1180
 
data/lib/gemini/audio.rb CHANGED
@@ -8,7 +8,7 @@ module Gemini
8
8
  def transcribe(parameters: {})
9
9
  file = parameters.delete(:file)
10
10
  file_uri = parameters.delete(:file_uri)
11
- model = parameters.delete(:model) || "gemini-1.5-flash"
11
+ model = parameters.delete(:model) || "gemini-2.5-flash"
12
12
  language = parameters.delete(:language)
13
13
  content_text = parameters.delete(:content_text) || "Transcribe this audio clip"
14
14
 
@@ -24,7 +24,7 @@ module Gemini
24
24
  mime_type ||= file_path ? @client.determine_mime_type(file_path) : "application/octet-stream"
25
25
 
26
26
  # モデルを取得(models/プレフィックスを追加)
27
- model_name = model || parameters[:model] || "gemini-1.5-flash"
27
+ model_name = model || parameters[:model] || "gemini-2.5-flash"
28
28
  model_name = "models/#{model_name}" unless model_name.start_with?("models/")
29
29
 
30
30
  # キャッシュリクエストを構築(キャメルケースに注意)
data/lib/gemini/client.rb CHANGED
@@ -59,6 +59,11 @@ module Gemini
59
59
  @documents ||= Gemini::Documents.new(client: self)
60
60
  end
61
61
 
62
+ # 動画処理アクセサ
63
+ def video
64
+ @video ||= Gemini::Video.new(client: self)
65
+ end
66
+
62
67
  # キャッシュ管理アクセサ
63
68
  def cached_content
64
69
  @cached_content ||= Gemini::CachedContent.new(client: self)
@@ -77,7 +82,7 @@ module Gemini
77
82
  # OpenAI chat-like text generation method for Gemini API
78
83
  # Extended to support streaming callbacks
79
84
  def chat(parameters: {}, &stream_callback)
80
- model = parameters.delete(:model) || "gemini-2.0-flash-lite"
85
+ model = parameters.delete(:model) || "gemini-2.5-flash"
81
86
 
82
87
  # If streaming callback is provided
83
88
  if block_given?
@@ -117,8 +122,9 @@ module Gemini
117
122
  # Helper methods for convenience
118
123
 
119
124
  # Method with usage similar to OpenAI's chat
120
- def generate_content(prompt, model: "gemini-2.0-flash-lite", system_instruction: nil,
121
- response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil, **parameters, &stream_callback)
125
+ def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
126
+ response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
127
+ url_context: false, google_search: false, **parameters, &stream_callback)
122
128
  content = format_content(prompt)
123
129
  params = {
124
130
  contents: [content],
@@ -137,7 +143,11 @@ module Gemini
137
143
  if response_schema
138
144
  params[:generation_config]["response_schema"] = response_schema
139
145
  end
140
- params[:tools] = tools if tools
146
+
147
+ # Handle tool shortcuts
148
+ tools = build_tools_array(tools, url_context: url_context, google_search: google_search)
149
+ params[:tools] = tools if tools && !tools.empty?
150
+
141
151
  params.merge!(parameters)
142
152
 
143
153
  if block_given?
@@ -148,38 +158,44 @@ module Gemini
148
158
  end
149
159
 
150
160
  # Streaming text generation
151
- def generate_content_stream(prompt, model: "gemini-2.0-flash-lite", system_instruction: nil,
152
- response_mime_type: nil, response_schema: nil, temperature: 0.5, **parameters, &block)
161
+ def generate_content_stream(prompt, model: "gemini-2.5-flash", system_instruction: nil,
162
+ response_mime_type: nil, response_schema: nil, temperature: 0.5,
163
+ url_context: false, google_search: false, **parameters, &block)
153
164
  raise ArgumentError, "Block is required for streaming" unless block_given?
154
-
165
+
155
166
  content = format_content(prompt)
156
167
  params = {
157
168
  contents: [content],
158
169
  model: model
159
170
  }
160
-
171
+
161
172
  if system_instruction
162
173
  params[:system_instruction] = format_content(system_instruction)
163
174
  end
164
-
175
+
165
176
  params[:generation_config] ||= {}
166
-
177
+
167
178
  if response_mime_type
168
179
  params[:generation_config][:response_mime_type] = response_mime_type
169
180
  end
170
-
181
+
171
182
  if response_schema
172
183
  params[:generation_config][:response_schema] = response_schema
173
184
  end
174
185
  params[:generation_config]["temperature"] = temperature
186
+
187
+ # Handle tool shortcuts
188
+ tools = build_tools_array(nil, url_context: url_context, google_search: google_search)
189
+ params[:tools] = tools if tools && !tools.empty?
190
+
175
191
  # Merge other parameters
176
192
  params.merge!(parameters)
177
-
193
+
178
194
  chat(parameters: params, &block)
179
195
  end
180
196
 
181
197
  # ファイルを使った会話(複数ファイル対応)
182
- def chat_with_multimodal(file_paths, prompt, model: "gemini-1.5-flash", **parameters)
198
+ def chat_with_multimodal(file_paths, prompt, model: "gemini-2.5-flash", **parameters)
183
199
  # スレッドを作成
184
200
  thread = threads.create(parameters: { model: model })
185
201
  thread_id = thread["id"]
@@ -247,7 +263,7 @@ module Gemini
247
263
  end
248
264
  end
249
265
 
250
- def generate_content_with_cache(prompt, cached_content:, model: "gemini-1.5-flash", **parameters)
266
+ def generate_content_with_cache(prompt, cached_content:, model: "gemini-2.5-flash", **parameters)
251
267
  # モデル名にmodels/プレフィックスを追加
252
268
  model_name = model.start_with?("models/") ? model : "models/#{model}"
253
269
 
@@ -278,12 +294,12 @@ module Gemini
278
294
  end
279
295
 
280
296
  # 単一ファイルのヘルパー
281
- def chat_with_file(file_path, prompt, model: "gemini-1.5-flash", **parameters)
297
+ def chat_with_file(file_path, prompt, model: "gemini-2.5-flash", **parameters)
282
298
  chat_with_multimodal([file_path], prompt, model: model, **parameters)
283
299
  end
284
300
 
285
301
  # ファイルをアップロードして質問するシンプルなヘルパー
286
- def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-1.5-flash", **parameters)
302
+ def upload_and_process_file(file_path, prompt, content_type: nil, model: "gemini-2.5-flash", **parameters)
287
303
  # MIMEタイプを自動判定
288
304
  mime_type = content_type || determine_mime_type(file_path)
289
305
 
@@ -399,7 +415,29 @@ module Gemini
399
415
  end
400
416
 
401
417
  private
402
-
418
+
419
+ # Build tools array from explicit tools parameter and shortcuts
420
+ def build_tools_array(tools, url_context: false, google_search: false)
421
+ result_tools = []
422
+
423
+ # Add existing tools if provided
424
+ if tools.is_a?(Array)
425
+ result_tools.concat(tools)
426
+ elsif tools
427
+ result_tools << tools
428
+ end
429
+
430
+ # Add url_context tool if requested
431
+ result_tools << { url_context: {} } if url_context
432
+
433
+ # Add google_search tool if requested
434
+ result_tools << { google_search: {} } if google_search
435
+
436
+ # Remove duplicates based on tool keys and return
437
+ return nil if result_tools.empty?
438
+ result_tools.uniq { |tool| tool.keys.first }
439
+ end
440
+
403
441
  # Process stream chunk and pass to callback
404
442
  def process_stream_chunk(chunk, &callback)
405
443
  if chunk.respond_to?(:dig) && chunk.dig("candidates", 0, "content", "parts", 0, "text")
@@ -5,7 +5,7 @@ module Gemini
5
5
  end
6
6
 
7
7
  # ドキュメントをアップロードして質問する基本メソッド
8
- def process(file: nil, file_path: nil, prompt:, model: "gemini-1.5-flash", **parameters)
8
+ def process(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
9
9
  # ファイルパスが指定されている場合はファイルを開く
10
10
  if file_path && !file
11
11
  file = File.open(file_path, "rb")
@@ -70,7 +70,7 @@ module Gemini
70
70
  file_name = upload_result["file"]["name"]
71
71
 
72
72
  # モデル名の取得と調整
73
- model = parameters[:model] || "gemini-1.5-flash"
73
+ model = parameters[:model] || "gemini-2.5-flash"
74
74
  model = "models/#{model}" unless model.start_with?("models/")
75
75
 
76
76
  # キャッシュに保存(パラメータの名前に注意)
data/lib/gemini/files.rb CHANGED
@@ -125,11 +125,21 @@ module Gemini
125
125
  when ".mp4"
126
126
  "video/mp4"
127
127
  when ".avi"
128
- "video/avi"
128
+ "video/x-msvideo"
129
129
  when ".mov"
130
130
  "video/quicktime"
131
131
  when ".mkv"
132
132
  "video/x-matroska"
133
+ when ".mpeg", ".mpg"
134
+ "video/mpeg"
135
+ when ".webm"
136
+ "video/webm"
137
+ when ".wmv"
138
+ "video/x-ms-wmv"
139
+ when ".flv"
140
+ "video/x-flv"
141
+ when ".3gp", ".3gpp"
142
+ "video/3gpp"
133
143
  when ".pdf"
134
144
  "application/pdf"
135
145
  when ".txt"
@@ -119,6 +119,57 @@ module Gemini
119
119
  def search_entry_point
120
120
  grounding_metadata&.dig("searchEntryPoint", "renderedContent")
121
121
  end
122
+
123
+ # Get formatted grounding sources (simplified access)
124
+ def grounding_sources
125
+ return [] unless grounded?
126
+
127
+ grounding_chunks.map do |chunk|
128
+ if chunk["web"]
129
+ {
130
+ url: chunk["web"]["uri"],
131
+ title: chunk["web"]["title"],
132
+ type: "web"
133
+ }
134
+ else
135
+ # Handle other potential chunk types
136
+ {
137
+ type: "unknown",
138
+ data: chunk
139
+ }
140
+ end
141
+ end
142
+ end
143
+
144
+ # Get URL context metadata (for URL Context tool)
145
+ def url_context_metadata
146
+ first_candidate&.dig("urlContextMetadata")
147
+ end
148
+
149
+ # Check if response has URL context metadata
150
+ def url_context?
151
+ !url_context_metadata.nil? && !url_context_metadata.empty?
152
+ end
153
+
154
+ # Get retrieved URLs from URL context
155
+ def retrieved_urls
156
+ return [] unless url_context?
157
+
158
+ url_context_metadata&.dig("urlMetadata") || []
159
+ end
160
+
161
+ # Get URL retrieval statuses
162
+ def url_retrieval_statuses
163
+ return [] unless url_context?
164
+
165
+ retrieved_urls.map do |url_info|
166
+ {
167
+ url: url_info["retrievedUrl"],
168
+ status: url_info["urlRetrievalStatus"],
169
+ title: url_info["title"]
170
+ }
171
+ end
172
+ end
122
173
 
123
174
 
124
175
  # Get token usage information
@@ -27,7 +27,7 @@ module Gemini
27
27
  id: thread_id,
28
28
  created_at: created_at,
29
29
  metadata: parameters[:metadata] || {},
30
- model: parameters[:model] || "gemini-2.0-flash-lite"
30
+ model: parameters[:model] || "gemini-2.5-flash"
31
31
  }
32
32
 
33
33
  {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gemini
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.6"
5
5
  end
@@ -0,0 +1,341 @@
1
+ module Gemini
2
+ class Video
3
+ # サポートされる動画形式
4
+ SUPPORTED_FORMATS = %w[.mp4 .mpeg .mov .avi .flv .mpg .webm .wmv .3gp .3gpp].freeze
5
+
6
+ def initialize(client:)
7
+ @client = client
8
+ end
9
+
10
+ # 動画ファイルを分析する(Files APIでアップロード後に分析)
11
+ # 20MB以上のファイルや複数回利用する場合に推奨
12
+ def analyze(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
13
+ # ファイルパスが指定されている場合はファイルを開く
14
+ if file_path && !file
15
+ file = File.open(file_path, "rb")
16
+ close_file = true
17
+ else
18
+ close_file = false
19
+ end
20
+
21
+ begin
22
+ raise ArgumentError, "file or file_path parameter is required" unless file
23
+
24
+ # MIMEタイプを判定
25
+ mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
26
+
27
+ # ファイルをアップロード
28
+ upload_result = @client.files.upload(file: file)
29
+ file_uri = upload_result["file"]["uri"]
30
+ file_name = upload_result["file"]["name"]
31
+
32
+ # ファイルがACTIVE状態になるまで待機
33
+ wait_for_file_active(file_name)
34
+
35
+ # コンテンツを生成
36
+ raw_response = generate_video_content(
37
+ file_uri: file_uri,
38
+ mime_type: mime_type,
39
+ prompt: prompt,
40
+ model: model,
41
+ **parameters
42
+ )
43
+
44
+ # レスポンスとファイル情報を返す
45
+ {
46
+ response: Gemini::Response.new(raw_response),
47
+ file_uri: file_uri,
48
+ file_name: file_name
49
+ }
50
+ ensure
51
+ file.close if file && close_file
52
+ end
53
+ end
54
+
55
+ # アップロード済みのファイルURIを使用して分析
56
+ def analyze_with_file_uri(file_uri:, prompt:, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
57
+ raw_response = generate_video_content(
58
+ file_uri: file_uri,
59
+ mime_type: mime_type,
60
+ prompt: prompt,
61
+ model: model,
62
+ **parameters
63
+ )
64
+
65
+ Gemini::Response.new(raw_response)
66
+ end
67
+
68
+ # YouTube URLから動画を分析(公開動画のみ)
69
+ def analyze_youtube(url:, prompt:, model: "gemini-2.5-flash", **parameters)
70
+ # YouTube URLのバリデーション
71
+ unless valid_youtube_url?(url)
72
+ raise ArgumentError, "Invalid YouTube URL. Only public YouTube videos are supported."
73
+ end
74
+
75
+ # リクエストパラメータを構築
76
+ request_params = {
77
+ contents: [{
78
+ parts: [
79
+ { text: prompt },
80
+ {
81
+ file_data: {
82
+ file_uri: url
83
+ }
84
+ }
85
+ ]
86
+ }]
87
+ }
88
+
89
+ # 追加パラメータをマージ
90
+ merge_additional_params(request_params, parameters)
91
+
92
+ # APIリクエスト
93
+ response = @client.json_post(
94
+ path: "models/#{model}:generateContent",
95
+ parameters: request_params
96
+ )
97
+
98
+ Gemini::Response.new(response)
99
+ end
100
+
101
+ # 小さい動画ファイルをインラインデータとして分析(20MB未満向け)
102
+ def analyze_inline(file: nil, file_path: nil, prompt:, model: "gemini-2.5-flash", **parameters)
103
+ # ファイルパスが指定されている場合はファイルを開く
104
+ if file_path && !file
105
+ file = File.open(file_path, "rb")
106
+ close_file = true
107
+ else
108
+ close_file = false
109
+ end
110
+
111
+ begin
112
+ raise ArgumentError, "file or file_path parameter is required" unless file
113
+
114
+ # ファイルサイズチェック(20MB = 20 * 1024 * 1024)
115
+ file.rewind
116
+ file_size = file.size
117
+ if file_size > 20 * 1024 * 1024
118
+ raise ArgumentError, "File size exceeds 20MB. Use analyze method with Files API instead."
119
+ end
120
+
121
+ # MIMEタイプを判定
122
+ mime_type = parameters.delete(:mime_type) || determine_video_mime_type(file)
123
+
124
+ # Base64エンコード
125
+ file.rewind
126
+ require 'base64'
127
+ file_data = Base64.strict_encode64(file.read)
128
+
129
+ # リクエストパラメータを構築
130
+ request_params = {
131
+ contents: [{
132
+ parts: [
133
+ { text: prompt },
134
+ {
135
+ inline_data: {
136
+ mime_type: mime_type,
137
+ data: file_data
138
+ }
139
+ }
140
+ ]
141
+ }]
142
+ }
143
+
144
+ # 追加パラメータをマージ
145
+ merge_additional_params(request_params, parameters)
146
+
147
+ # APIリクエスト
148
+ response = @client.json_post(
149
+ path: "models/#{model}:generateContent",
150
+ parameters: request_params
151
+ )
152
+
153
+ Gemini::Response.new(response)
154
+ ensure
155
+ file.close if file && close_file
156
+ end
157
+ end
158
+
159
+ # 動画の説明を取得するヘルパーメソッド
160
+ def describe(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, model: "gemini-2.5-flash", language: "ja", **parameters)
161
+ prompt = language == "ja" ? "この動画の内容を詳しく説明してください。" : "Describe this video in detail."
162
+
163
+ if youtube_url
164
+ analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
165
+ elsif file_uri
166
+ analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
167
+ elsif file || file_path
168
+ result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
169
+ result[:response]
170
+ else
171
+ raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
172
+ end
173
+ end
174
+
175
+ # タイムスタンプを抽出するヘルパーメソッド
176
+ def extract_timestamps(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, query:, model: "gemini-2.5-flash", **parameters)
177
+ prompt = "動画内で「#{query}」が登場するタイムスタンプを全て抽出してください。MM:SS形式で出力してください。"
178
+
179
+ if youtube_url
180
+ analyze_youtube(url: youtube_url, prompt: prompt, model: model, **parameters)
181
+ elsif file_uri
182
+ analyze_with_file_uri(file_uri: file_uri, prompt: prompt, model: model, **parameters)
183
+ elsif file || file_path
184
+ result = analyze(file: file, file_path: file_path, prompt: prompt, model: model, **parameters)
185
+ result[:response]
186
+ else
187
+ raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
188
+ end
189
+ end
190
+
191
+ # 動画のセグメント(一部分)を分析
192
+ def analyze_segment(file_uri:, prompt:, start_offset: nil, end_offset: nil, model: "gemini-2.5-flash", mime_type: "video/mp4", **parameters)
193
+ # videoMetadataを構築
194
+ video_metadata = {}
195
+ video_metadata[:startOffset] = start_offset if start_offset
196
+ video_metadata[:endOffset] = end_offset if end_offset
197
+
198
+ # リクエストパラメータを構築
199
+ file_data_part = {
200
+ file_data: {
201
+ mime_type: mime_type,
202
+ file_uri: file_uri
203
+ }
204
+ }
205
+ file_data_part[:file_data][:video_metadata] = video_metadata unless video_metadata.empty?
206
+
207
+ request_params = {
208
+ contents: [{
209
+ parts: [
210
+ { text: prompt },
211
+ file_data_part
212
+ ]
213
+ }]
214
+ }
215
+
216
+ # 追加パラメータをマージ
217
+ merge_additional_params(request_params, parameters)
218
+
219
+ # APIリクエスト
220
+ response = @client.json_post(
221
+ path: "models/#{model}:generateContent",
222
+ parameters: request_params
223
+ )
224
+
225
+ Gemini::Response.new(response)
226
+ end
227
+
228
+ # 動画に関する質問に回答
229
+ def ask(file: nil, file_path: nil, file_uri: nil, youtube_url: nil, question:, model: "gemini-2.5-flash", **parameters)
230
+ if youtube_url
231
+ analyze_youtube(url: youtube_url, prompt: question, model: model, **parameters)
232
+ elsif file_uri
233
+ analyze_with_file_uri(file_uri: file_uri, prompt: question, model: model, **parameters)
234
+ elsif file || file_path
235
+ result = analyze(file: file, file_path: file_path, prompt: question, model: model, **parameters)
236
+ result[:response]
237
+ else
238
+ raise ArgumentError, "file, file_path, file_uri, or youtube_url is required"
239
+ end
240
+ end
241
+
242
+ private
243
+
244
+ # 動画コンテンツを生成する共通メソッド(生のレスポンスを返す)
245
+ def generate_video_content(file_uri:, mime_type:, prompt:, model:, **parameters)
246
+ request_params = {
247
+ contents: [{
248
+ parts: [
249
+ { text: prompt },
250
+ {
251
+ file_data: {
252
+ mime_type: mime_type,
253
+ file_uri: file_uri
254
+ }
255
+ }
256
+ ]
257
+ }]
258
+ }
259
+
260
+ # 追加パラメータをマージ
261
+ merge_additional_params(request_params, parameters)
262
+
263
+ # APIリクエスト(生のレスポンスを返す)
264
+ @client.json_post(
265
+ path: "models/#{model}:generateContent",
266
+ parameters: request_params
267
+ )
268
+ end
269
+
270
+ # 追加パラメータをマージするヘルパー
271
+ def merge_additional_params(request_params, parameters)
272
+ parameters.each do |key, value|
273
+ next if key == :contents
274
+ request_params[key] = value
275
+ end
276
+ end
277
+
278
+ # YouTube URLのバリデーション
279
+ def valid_youtube_url?(url)
280
+ youtube_patterns = [
281
+ %r{^https?://(?:www\.)?youtube\.com/watch\?v=[\w-]+},
282
+ %r{^https?://youtu\.be/[\w-]+},
283
+ %r{^https?://(?:www\.)?youtube\.com/embed/[\w-]+},
284
+ %r{^https?://(?:www\.)?youtube\.com/v/[\w-]+},
285
+ %r{^https?://(?:www\.)?youtube\.com/shorts/[\w-]+}
286
+ ]
287
+ youtube_patterns.any? { |pattern| url.match?(pattern) }
288
+ end
289
+
290
+ # ファイルがACTIVE状態になるまで待機するメソッド
291
+ def wait_for_file_active(file_name, max_attempts: 30, interval: 2)
292
+ attempts = 0
293
+ loop do
294
+ file_info = @client.files.get(name: file_name)
295
+ state = file_info["state"]
296
+
297
+ case state
298
+ when "ACTIVE"
299
+ return true
300
+ when "FAILED"
301
+ raise StandardError, "File processing failed: #{file_info['error']&.dig('message') || 'Unknown error'}"
302
+ else
303
+ # PROCESSING状態の場合は待機
304
+ attempts += 1
305
+ if attempts >= max_attempts
306
+ raise StandardError, "File processing timeout. File is still in #{state} state after #{max_attempts * interval} seconds."
307
+ end
308
+ sleep(interval)
309
+ end
310
+ end
311
+ end
312
+
313
+ # 動画のMIMEタイプを判定するヘルパーメソッド
314
+ def determine_video_mime_type(file)
315
+ return "video/mp4" unless file.respond_to?(:path)
316
+
317
+ ext = File.extname(file.path).downcase
318
+ case ext
319
+ when ".mp4"
320
+ "video/mp4"
321
+ when ".mpeg", ".mpg"
322
+ "video/mpeg"
323
+ when ".mov"
324
+ "video/quicktime"
325
+ when ".avi"
326
+ "video/x-msvideo"
327
+ when ".flv"
328
+ "video/x-flv"
329
+ when ".webm"
330
+ "video/webm"
331
+ when ".wmv"
332
+ "video/x-ms-wmv"
333
+ when ".3gp", ".3gpp"
334
+ "video/3gpp"
335
+ else
336
+ # デフォルトはMP4
337
+ "video/mp4"
338
+ end
339
+ end
340
+ end
341
+ end
data/lib/gemini.rb CHANGED
@@ -19,6 +19,7 @@ require_relative "gemini/images"
19
19
  require_relative "gemini/response"
20
20
  require_relative "gemini/documents"
21
21
  require_relative "gemini/cached_content"
22
+ require_relative "gemini/video"
22
23
  module Gemini
23
24
  class Error < StandardError; end
24
25
  class ConfigurationError < Error; end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-gemini-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - rira100000000
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2025-11-07 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: faraday
@@ -163,6 +162,7 @@ files:
163
162
  - lib/gemini/threads.rb
164
163
  - lib/gemini/tool_definition.rb
165
164
  - lib/gemini/version.rb
165
+ - lib/gemini/video.rb
166
166
  - lib/ruby/gemini.rb
167
167
  homepage: https://github.com/rira100000000/ruby-gemini-api
168
168
  licenses:
@@ -172,7 +172,6 @@ metadata:
172
172
  source_code_uri: https://github.com/rira100000000/ruby-gemini-api
173
173
  changelog_uri: https://github.com/rira100000000/ruby-gemini-api/blob/main/CHANGELOG.md
174
174
  rubygems_mfa_required: 'true'
175
- post_install_message:
176
175
  rdoc_options: []
177
176
  require_paths:
178
177
  - lib
@@ -187,8 +186,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
187
186
  - !ruby/object:Gem::Version
188
187
  version: '0'
189
188
  requirements: []
190
- rubygems_version: 3.5.11
191
- signing_key:
189
+ rubygems_version: 3.7.2
192
190
  specification_version: 4
193
191
  summary: Ruby client for Google's Gemini API
194
192
  test_files: []