ruby-gemini-api 0.1.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "base64"
5
+
6
+ module Gemini
7
+ class Live
8
+ # Live API session manager
9
+ class Session
10
+ attr_reader :configuration, :last_resumption_token, :usage_metadata
11
+
12
+ def initialize(api_key:, configuration:)
13
+ @api_key = api_key
14
+ @configuration = configuration
15
+ @event_handlers = Hash.new { |h, k| h[k] = [] }
16
+ @connected = false
17
+ @setup_complete = false
18
+ @last_resumption_token = nil
19
+ @usage_metadata = nil
20
+ @connection = nil
21
+
22
+ setup_connection
23
+ end
24
+
25
+ # Register event handler
26
+ # Supported events:
27
+ # :setup_complete - Session setup completed
28
+ # :text - Text response received (text)
29
+ # :audio - Audio data received (base64_data, mime_type)
30
+ # :data - Other inline data received (base64_data, mime_type)
31
+ # :tool_call - Tool call requested (function_calls)
32
+ # :interrupted - User interrupted the model
33
+ # :turn_complete - Model turn completed
34
+ # :generation_complete - Generation completed
35
+ # :usage_metadata - Token usage info received (metadata)
36
+ # :session_resumption - Session resumption token updated (update)
37
+ # :go_away - Connection will close soon (info)
38
+ # :error - Error occurred (error)
39
+ # :close - Connection closed (code, reason)
40
+ def on(event, &block)
41
+ @event_handlers[event.to_sym] << block
42
+ self
43
+ end
44
+
45
+ # Send text message via clientContent.turns. This is the legacy form
46
+ # used by native-audio Live models. Newer models such as
47
+ # gemini-3.1-flash-live-preview reject this payload — use
48
+ # #send_realtime_text instead, which works on every Live model.
49
+ def send_text(text, turn_complete: true)
50
+ ensure_setup_complete!
51
+ message = MessageBuilder.client_content(
52
+ text: text,
53
+ turn_complete: turn_complete
54
+ )
55
+ @connection.send(message)
56
+ end
57
+
58
+ # Send text input via realtimeInput.text (universal form).
59
+ # Works with every currently-deployed Live model, including
60
+ # gemini-3.1-flash-live-preview and native-audio variants.
61
+ def send_realtime_text(text)
62
+ ensure_setup_complete!
63
+ @connection.send(MessageBuilder.realtime_text(text))
64
+ end
65
+
66
+ # Send audio data (Base64 encoded PCM)
67
+ def send_audio(audio_data, mime_type: "audio/pcm;rate=16000")
68
+ ensure_setup_complete!
69
+ encoded_data = audio_data.is_a?(String) && audio_data.encoding == Encoding::BINARY ?
70
+ Base64.strict_encode64(audio_data) : audio_data
71
+ message = MessageBuilder.realtime_input(
72
+ audio_data: encoded_data,
73
+ mime_type: mime_type
74
+ )
75
+ @connection.send(message)
76
+ end
77
+
78
+ # Send video/image data (Base64 encoded)
79
+ def send_video(image_data, mime_type: "image/jpeg")
80
+ ensure_setup_complete!
81
+ encoded_data = image_data.is_a?(String) && image_data.encoding == Encoding::BINARY ?
82
+ Base64.strict_encode64(image_data) : image_data
83
+ message = MessageBuilder.realtime_input(
84
+ video_data: encoded_data,
85
+ mime_type: mime_type
86
+ )
87
+ @connection.send(message)
88
+ end
89
+
90
+ # Send tool response
91
+ def send_tool_response(function_responses)
92
+ ensure_setup_complete!
93
+ message = MessageBuilder.tool_response(function_responses)
94
+ @connection.send(message)
95
+ end
96
+
97
+ # Manual VAD control - signal activity start
98
+ def activity_start
99
+ ensure_setup_complete!
100
+ @connection.send(MessageBuilder.activity_start)
101
+ end
102
+
103
+ # Manual VAD control - signal activity end
104
+ def activity_end
105
+ ensure_setup_complete!
106
+ @connection.send(MessageBuilder.activity_end)
107
+ end
108
+
109
+ # Close the session
110
+ def close
111
+ @connection&.close
112
+ @connected = false
113
+ @setup_complete = false
114
+ end
115
+
116
+ def connected?
117
+ @connected && @connection&.connected?
118
+ end
119
+
120
+ def setup_complete?
121
+ @setup_complete
122
+ end
123
+
124
+ private
125
+
126
+ def setup_connection
127
+ @connection = Connection.new(
128
+ api_key: @api_key,
129
+ on_message: method(:handle_message),
130
+ on_open: method(:handle_open),
131
+ on_error: method(:handle_error),
132
+ on_close: method(:handle_close)
133
+ )
134
+ @connection.connect
135
+ @connected = true
136
+ end
137
+
138
+ def handle_open
139
+ # Send setup message immediately after connection opens
140
+ setup_message = MessageBuilder.setup(@configuration)
141
+ @connection.send(setup_message)
142
+ end
143
+
144
+ def handle_message(data)
145
+ parsed = JSON.parse(data, symbolize_names: true)
146
+
147
+ if parsed[:setupComplete]
148
+ @setup_complete = true
149
+ emit(:setup_complete)
150
+ elsif parsed[:serverContent]
151
+ handle_server_content(parsed[:serverContent])
152
+ elsif parsed[:toolCall]
153
+ emit(:tool_call, parsed[:toolCall][:functionCalls])
154
+ elsif parsed[:usageMetadata]
155
+ @usage_metadata = parsed[:usageMetadata]
156
+ emit(:usage_metadata, parsed[:usageMetadata])
157
+ elsif parsed[:sessionResumptionUpdate]
158
+ handle_session_resumption(parsed[:sessionResumptionUpdate])
159
+ elsif parsed[:goAway]
160
+ emit(:go_away, parsed[:goAway])
161
+ end
162
+ rescue JSON::ParserError => e
163
+ emit(:error, e)
164
+ end
165
+
166
+ def handle_server_content(content)
167
+ # Check for interruption
168
+ if content[:interrupted]
169
+ emit(:interrupted)
170
+ return
171
+ end
172
+
173
+ # Check for generation complete
174
+ if content[:generationComplete]
175
+ emit(:generation_complete)
176
+ end
177
+
178
+ # Process model turn
179
+ model_turn = content[:modelTurn]
180
+ if model_turn
181
+ model_turn[:parts]&.each do |part|
182
+ if part[:text]
183
+ emit(:text, part[:text])
184
+ elsif part[:inlineData]
185
+ inline = part[:inlineData]
186
+ if inline[:mimeType]&.start_with?("audio/")
187
+ emit(:audio, inline[:data], inline[:mimeType])
188
+ else
189
+ emit(:data, inline[:data], inline[:mimeType])
190
+ end
191
+ end
192
+ end
193
+ end
194
+
195
+ # Check for turn complete
196
+ emit(:turn_complete) if content[:turnComplete]
197
+ end
198
+
199
+ def handle_session_resumption(update)
200
+ @last_resumption_token = update[:newHandle]
201
+ emit(:session_resumption, update)
202
+ end
203
+
204
+ def handle_error(error)
205
+ emit(:error, error)
206
+ end
207
+
208
+ def handle_close(code, reason)
209
+ @connected = false
210
+ @setup_complete = false
211
+ emit(:close, code, reason)
212
+ end
213
+
214
+ def emit(event, *args)
215
+ @event_handlers[event].each { |handler| handler.call(*args) }
216
+ end
217
+
218
+ def ensure_setup_complete!
219
+ raise Gemini::Error, "Session setup not complete. Wait for :setup_complete event." unless @setup_complete
220
+ end
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "live/configuration"
4
+ require_relative "live/message_builder"
5
+ require_relative "live/connection"
6
+ require_relative "live/session"
7
+
8
+ module Gemini
9
+ # Live API client for real-time audio/video/text interactions
10
+ #
11
+ # @example Basic text conversation
12
+ # client = Gemini::Client.new(api_key)
13
+ # session = client.live.connect(model: "gemini-2.5-flash-live-preview")
14
+ #
15
+ # session.on(:setup_complete) { puts "Connected!" }
16
+ # session.on(:text) { |text| puts "AI: #{text}" }
17
+ # session.on(:error) { |e| puts "Error: #{e}" }
18
+ #
19
+ # session.send_text("Hello!")
20
+ # sleep 5
21
+ # session.close
22
+ #
23
+ # @example Audio conversation
24
+ # session = client.live.connect(
25
+ # model: "gemini-2.5-flash-live-preview",
26
+ # response_modality: "AUDIO",
27
+ # voice_name: "Puck"
28
+ # )
29
+ #
30
+ # session.on(:audio) { |data, mime| play_audio(data) }
31
+ # session.send_audio(pcm_data) # 16-bit PCM, 16kHz, mono
32
+ #
33
+ # @example With block (auto-close)
34
+ # client.live.connect(model: "gemini-2.5-flash-live-preview") do |session|
35
+ # session.on(:text) { |text| puts text }
36
+ # session.send_text("Hello!")
37
+ # sleep 5
38
+ # end # session.close called automatically
39
+ #
40
+ class Live
41
+ def initialize(client:)
42
+ @client = client
43
+ end
44
+
45
+ # Establish a WebSocket connection and return a session
46
+ #
47
+ # @param model [String] Model to use (default: "gemini-2.5-flash-live-preview")
48
+ # @param response_modality [String] "TEXT" or "AUDIO" (default: "TEXT")
49
+ # @param voice_name [String] Voice for audio responses (Puck, Charon, Kore, etc.)
50
+ # @param system_instruction [String] System prompt
51
+ # @param tools [Array] Tool definitions for function calling
52
+ # @param context_window_compression [Hash] Compression settings for long sessions
53
+ # @param session_resumption [Hash] Session resumption settings
54
+ # @param automatic_activity_detection [Boolean] Enable/disable automatic VAD (default: true)
55
+ # @param media_resolution [String] Media resolution setting
56
+ # @param output_audio_transcription [Boolean] Enable audio transcription (default: false)
57
+ # @yield [session] If block given, yields the session and closes it when block returns
58
+ # @return [Gemini::Live::Session] The live session
59
+ #
60
+ def connect(
61
+ model: Configuration::DEFAULT_MODEL,
62
+ response_modality: "TEXT",
63
+ voice_name: nil,
64
+ system_instruction: nil,
65
+ tools: nil,
66
+ context_window_compression: nil,
67
+ session_resumption: nil,
68
+ automatic_activity_detection: true,
69
+ media_resolution: nil,
70
+ output_audio_transcription: false,
71
+ &block
72
+ )
73
+ config = Configuration.new(
74
+ model: model,
75
+ response_modality: response_modality,
76
+ voice_name: voice_name,
77
+ system_instruction: system_instruction,
78
+ tools: tools,
79
+ context_window_compression: context_window_compression,
80
+ session_resumption: session_resumption,
81
+ automatic_activity_detection: automatic_activity_detection,
82
+ media_resolution: media_resolution,
83
+ output_audio_transcription: output_audio_transcription
84
+ )
85
+
86
+ session = Session.new(
87
+ api_key: @client.api_key,
88
+ configuration: config
89
+ )
90
+
91
+ if block_given?
92
+ begin
93
+ yield session
94
+ ensure
95
+ session.close
96
+ end
97
+ else
98
+ session
99
+ end
100
+ end
101
+ end
102
+ end
@@ -70,9 +70,49 @@ module Gemini
70
70
 
71
71
  # Check if response is valid
72
72
  def valid?
73
- !@raw_data.nil? &&
74
- ((@raw_data.key?("candidates") && !@raw_data["candidates"].empty?) ||
75
- (@raw_data.key?("predictions") && !@raw_data["predictions"].empty?))
73
+ !@raw_data.nil? &&
74
+ ((@raw_data.key?("candidates") && !@raw_data["candidates"].empty?) ||
75
+ (@raw_data.key?("predictions") && !@raw_data["predictions"].empty?) ||
76
+ embedding_response?)
77
+ end
78
+
79
+ # Check if the raw response contains embedding data
80
+ def embedding_response?
81
+ return false if @raw_data.nil?
82
+ (@raw_data.key?("embedding") && !@raw_data["embedding"].nil?) ||
83
+ (@raw_data.key?("embeddings") && @raw_data["embeddings"].is_a?(Array) && !@raw_data["embeddings"].empty?)
84
+ end
85
+
86
+ # Get the embedding values as an Array of Floats.
87
+ # For single embedContent responses returns the values array.
88
+ # For batchEmbedContents responses returns the first embedding's values.
89
+ def embedding
90
+ return nil unless @raw_data
91
+ if @raw_data["embedding"].is_a?(Hash)
92
+ @raw_data["embedding"]["values"]
93
+ elsif @raw_data["embeddings"].is_a?(Array) && @raw_data["embeddings"].first.is_a?(Hash)
94
+ @raw_data["embeddings"].first["values"]
95
+ end
96
+ end
97
+
98
+ # Get all embedding value arrays for batch responses.
99
+ # Returns an Array of Arrays of Floats.
100
+ # For single embedContent responses, returns a single-element array.
101
+ def embeddings
102
+ return [] unless @raw_data
103
+ if @raw_data["embeddings"].is_a?(Array)
104
+ @raw_data["embeddings"].map { |e| e["values"] }.compact
105
+ elsif @raw_data["embedding"].is_a?(Hash) && @raw_data["embedding"]["values"]
106
+ [@raw_data["embedding"]["values"]]
107
+ else
108
+ []
109
+ end
110
+ end
111
+
112
+ # Get the dimensionality (length) of the first embedding vector
113
+ def embedding_dimension
114
+ values = embedding
115
+ values.is_a?(Array) ? values.length : 0
76
116
  end
77
117
 
78
118
  # Get error message if any
@@ -223,7 +263,52 @@ module Gemini
223
263
  def safety_ratings
224
264
  first_candidate&.dig("safetyRatings") || []
225
265
  end
226
-
266
+
267
+ # Thinking関連メソッド
268
+
269
+ # 思考トークン数を取得
270
+ def thoughts_token_count
271
+ @raw_data.dig('usageMetadata', 'thoughtsTokenCount')
272
+ end
273
+
274
+ # Thought Signatureを取得(配列)
275
+ def thought_signatures
276
+ parts.filter_map { |p| p['thoughtSignature'] }
277
+ end
278
+
279
+ # 最初のThought Signatureを取得
280
+ def first_thought_signature
281
+ thought_signatures.first
282
+ end
283
+
284
+ # Signatureが存在するか
285
+ def has_thought_signature?
286
+ !thought_signatures.empty?
287
+ end
288
+
289
+ # モデルバージョンを取得
290
+ def model_version
291
+ @raw_data['modelVersion']
292
+ end
293
+
294
+ # Gemini 3系かどうか
295
+ def gemini_3?
296
+ model_version&.start_with?('gemini-3') || false
297
+ end
298
+
299
+ # 関数呼び出しにSignatureを付与してパーツを構築
300
+ def build_function_call_parts_with_signature
301
+ function_call_parts = parts.select { |p| p['functionCall'] }
302
+ signature = first_thought_signature
303
+
304
+ function_call_parts.map.with_index do |part, index|
305
+ fc_part = { functionCall: part['functionCall'] }
306
+ # 最初のパートにのみSignatureを付与
307
+ fc_part[:thoughtSignature] = signature if index == 0 && signature
308
+ fc_part
309
+ end
310
+ end
311
+
227
312
  # 画像生成結果から最初の画像を取得(Base64エンコード形式)
228
313
  def image
229
314
  images.first
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gemini
4
- VERSION = "0.1.7"
4
+ VERSION = "1.1.0"
5
5
  end
data/lib/gemini.rb CHANGED
@@ -16,9 +16,11 @@ require_relative "gemini/audio"
16
16
  require_relative "gemini/files"
17
17
  require_relative "gemini/images"
18
18
  require_relative "gemini/response"
19
+ require_relative "gemini/function_calling_helper"
19
20
  require_relative "gemini/documents"
20
21
  require_relative "gemini/cached_content"
21
22
  require_relative "gemini/video"
23
+ require_relative "gemini/live"
22
24
 
23
25
  module Gemini
24
26
  class Error < StandardError; end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-gemini-api
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - rira100000000
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2026-01-13 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: faraday
@@ -52,6 +51,20 @@ dependencies:
52
51
  - - "~>"
53
52
  - !ruby/object:Gem::Version
54
53
  version: '2.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: websocket-client-simple
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '0.8'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '0.8'
55
68
  - !ruby/object:Gem::Dependency
56
69
  name: rake
57
70
  requirement: !ruby/object:Gem::Requirement
@@ -153,9 +166,15 @@ files:
153
166
  - lib/gemini/documents.rb
154
167
  - lib/gemini/embeddings.rb
155
168
  - lib/gemini/files.rb
169
+ - lib/gemini/function_calling_helper.rb
156
170
  - lib/gemini/http.rb
157
171
  - lib/gemini/http_headers.rb
158
172
  - lib/gemini/images.rb
173
+ - lib/gemini/live.rb
174
+ - lib/gemini/live/configuration.rb
175
+ - lib/gemini/live/connection.rb
176
+ - lib/gemini/live/message_builder.rb
177
+ - lib/gemini/live/session.rb
159
178
  - lib/gemini/messages.rb
160
179
  - lib/gemini/models.rb
161
180
  - lib/gemini/response.rb
@@ -173,7 +192,6 @@ metadata:
173
192
  source_code_uri: https://github.com/rira100000000/ruby-gemini-api
174
193
  changelog_uri: https://github.com/rira100000000/ruby-gemini-api/blob/main/CHANGELOG.md
175
194
  rubygems_mfa_required: 'true'
176
- post_install_message:
177
195
  rdoc_options: []
178
196
  require_paths:
179
197
  - lib
@@ -188,8 +206,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
188
206
  - !ruby/object:Gem::Version
189
207
  version: '0'
190
208
  requirements: []
191
- rubygems_version: 3.3.26
192
- signing_key:
209
+ rubygems_version: 3.6.9
193
210
  specification_version: 4
194
211
  summary: Ruby client for Google's Gemini API
195
212
  test_files: []