ruby-gemini-api 0.1.7 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/gemini/client.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  module Gemini
2
2
  class Client
3
3
  include Gemini::HTTP
4
-
4
+
5
5
  SENSITIVE_ATTRIBUTES = %i[@api_key @extra_headers].freeze
6
6
  CONFIG_KEYS = %i[api_key uri_base extra_headers log_errors request_timeout].freeze
7
+ VALID_THINKING_LEVELS = %w[minimal low medium high].freeze
7
8
 
8
9
  attr_reader(*CONFIG_KEYS, :faraday_middleware)
9
10
  attr_writer :api_key
@@ -69,6 +70,16 @@ module Gemini
69
70
  @cached_content ||= Gemini::CachedContent.new(client: self)
70
71
  end
71
72
 
73
+ # Live APIアクセサ
74
+ def live
75
+ @live ||= Gemini::Live.new(client: self)
76
+ end
77
+
78
+ # Embeddings APIアクセサ
79
+ def embeddings_api
80
+ @embeddings_api ||= Gemini::Embeddings.new(client: self)
81
+ end
82
+
72
83
  def reset_headers
73
84
  @extra_headers = {}
74
85
  end
@@ -83,7 +94,18 @@ module Gemini
83
94
  # Extended to support streaming callbacks
84
95
  def chat(parameters: {}, &stream_callback)
85
96
  model = parameters.delete(:model) || "gemini-2.5-flash"
86
-
97
+
98
+ # thinking_budget / thinking_level をパラメータから抽出
99
+ thinking_budget = parameters.delete(:thinking_budget)
100
+ thinking_level = parameters.delete(:thinking_level)
101
+
102
+ # Thinking設定
103
+ thinking_config = build_thinking_config(thinking_budget, thinking_level)
104
+ if thinking_config
105
+ parameters[:generationConfig] ||= {}
106
+ parameters[:generationConfig][:thinkingConfig] = thinking_config
107
+ end
108
+
87
109
  # If streaming callback is provided
88
110
  if block_given?
89
111
  path = "models/#{model}:streamGenerateContent"
@@ -100,10 +122,25 @@ module Gemini
100
122
  end
101
123
  end
102
124
 
103
- # Method corresponding to OpenAI's embeddings
125
+ # Generate embeddings for the given input.
126
+ # input can be a String (single embed) or Array of Strings (batch embed).
127
+ # Supports task_type, title (RETRIEVAL_DOCUMENT only), and output_dimensionality.
128
+ def embed_content(input, model: Gemini::Embeddings::DEFAULT_MODEL, task_type: nil,
129
+ title: nil, output_dimensionality: nil, **parameters)
130
+ embeddings_api.create(
131
+ input: input,
132
+ model: model,
133
+ task_type: task_type,
134
+ title: title,
135
+ output_dimensionality: output_dimensionality,
136
+ **parameters
137
+ )
138
+ end
139
+
140
+ # Method corresponding to OpenAI's embeddings (kept for compatibility)
104
141
  def embeddings(parameters: {})
105
- model = parameters.delete(:model) || "text-embedding-model"
106
- path = "models/#{model}:embedContent"
142
+ model = parameters.delete(:model) || Gemini::Embeddings::DEFAULT_MODEL
143
+ path = "models/#{model.to_s.delete_prefix("models/")}:embedContent"
107
144
  response = json_post(path: path, parameters: parameters)
108
145
  Gemini::Response.new(response)
109
146
  end
@@ -121,10 +158,12 @@ module Gemini
121
158
 
122
159
  # Helper methods for convenience
123
160
 
124
- # Method with usage similar to OpenAI's chat
161
+ # Method with usage similar to OpenAI's chat
125
162
  def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
126
163
  response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
127
- url_context: false, google_search: false, **parameters, &stream_callback)
164
+ url_context: false, google_search: false,
165
+ thinking_budget: nil, thinking_level: nil,
166
+ **parameters, &stream_callback)
128
167
  content = format_content(prompt)
129
168
  params = {
130
169
  contents: [content],
@@ -144,6 +183,12 @@ module Gemini
144
183
  params[:generation_config]["response_schema"] = response_schema
145
184
  end
146
185
 
186
+ # Thinking設定を追加
187
+ thinking_config = build_thinking_config(thinking_budget, thinking_level)
188
+ if thinking_config
189
+ params[:generation_config][:thinkingConfig] = thinking_config
190
+ end
191
+
147
192
  # Handle tool shortcuts
148
193
  tools = build_tools_array(tools, url_context: url_context, google_search: google_search)
149
194
  params[:tools] = tools if tools && !tools.empty?
@@ -416,6 +461,39 @@ module Gemini
416
461
 
417
462
  private
418
463
 
464
+ # Build thinking config from budget and level options
465
+ def build_thinking_config(budget, level)
466
+ return nil unless budget || level
467
+
468
+ config = {}
469
+
470
+ if budget
471
+ validate_thinking_budget!(budget)
472
+ config[:thinkingBudget] = budget
473
+ end
474
+
475
+ if level
476
+ level_str = level.to_s
477
+ validate_thinking_level!(level_str)
478
+ config[:thinkingLevel] = level_str
479
+ end
480
+
481
+ config
482
+ end
483
+
484
+ def validate_thinking_budget!(budget)
485
+ return if budget == -1 || budget == 0
486
+ unless budget.is_a?(Integer) && budget > 0 && budget <= 32768
487
+ raise ArgumentError, "thinking_budget must be -1, 0, or 1-32768"
488
+ end
489
+ end
490
+
491
+ def validate_thinking_level!(level)
492
+ unless VALID_THINKING_LEVELS.include?(level)
493
+ raise ArgumentError, "thinking_level must be one of: #{VALID_THINKING_LEVELS.join(', ')}"
494
+ end
495
+ end
496
+
419
497
  # Build tools array from explicit tools parameter and shortcuts
420
498
  def build_tools_array(tools, url_context: false, google_search: false)
421
499
  result_tools = []
@@ -1,27 +1,118 @@
1
1
  module Gemini
2
2
  class Embeddings
3
+ DEFAULT_MODEL = "gemini-embedding-001".freeze
4
+
5
+ VALID_TASK_TYPES = %w[
6
+ RETRIEVAL_QUERY
7
+ RETRIEVAL_DOCUMENT
8
+ SEMANTIC_SIMILARITY
9
+ CLASSIFICATION
10
+ CLUSTERING
11
+ QUESTION_ANSWERING
12
+ FACT_VERIFICATION
13
+ CODE_RETRIEVAL_QUERY
14
+ ].freeze
15
+
3
16
  def initialize(client:)
4
17
  @client = client
5
18
  end
6
19
 
7
- def create(input:, model: "text-embedding-model", **parameters)
8
- content = case input
9
- when String
10
- { parts: [{ text: input }] }
11
- when Array
12
- { parts: input.map { |text| { text: text.to_s } } }
13
- else
14
- { parts: [{ text: input.to_s }] }
15
- end
16
-
17
- payload = {
18
- content: content
19
- }.merge(parameters)
20
-
21
- @client.json_post(
22
- path: "models/#{model}:embedContent",
20
+ # Generate an embedding for a single content, or batch when input is an Array
21
+ def create(input:, model: DEFAULT_MODEL, task_type: nil, title: nil,
22
+ output_dimensionality: nil, **parameters)
23
+ if input.is_a?(Array)
24
+ return batch_create(
25
+ inputs: input,
26
+ model: model,
27
+ task_type: task_type,
28
+ title: title,
29
+ output_dimensionality: output_dimensionality,
30
+ **parameters
31
+ )
32
+ end
33
+
34
+ payload = build_embed_payload(
35
+ input: input,
36
+ task_type: task_type,
37
+ title: title,
38
+ output_dimensionality: output_dimensionality
39
+ ).merge(parameters)
40
+
41
+ response = @client.json_post(
42
+ path: "models/#{normalize_model(model)}:embedContent",
23
43
  parameters: payload
24
44
  )
45
+ Gemini::Response.new(response)
46
+ end
47
+
48
+ # Generate embeddings for multiple inputs in a single batch request
49
+ def batch_create(inputs:, model: DEFAULT_MODEL, task_type: nil, title: nil,
50
+ output_dimensionality: nil, **parameters)
51
+ requests = inputs.map do |input|
52
+ req = build_embed_payload(
53
+ input: input,
54
+ task_type: task_type,
55
+ title: title,
56
+ output_dimensionality: output_dimensionality
57
+ )
58
+ req[:model] = "models/#{normalize_model(model)}"
59
+ req
60
+ end
61
+
62
+ payload = { requests: requests }.merge(parameters)
63
+
64
+ response = @client.json_post(
65
+ path: "models/#{normalize_model(model)}:batchEmbedContents",
66
+ parameters: payload
67
+ )
68
+ Gemini::Response.new(response)
69
+ end
70
+
71
+ private
72
+
73
+ def build_embed_payload(input:, task_type:, title:, output_dimensionality:)
74
+ payload = { content: format_content(input) }
75
+
76
+ if task_type
77
+ validate_task_type!(task_type)
78
+ payload[:taskType] = task_type.to_s.upcase
79
+ end
80
+
81
+ payload[:title] = title if title
82
+ payload[:outputDimensionality] = output_dimensionality if output_dimensionality
83
+
84
+ payload
85
+ end
86
+
87
+ def format_content(input)
88
+ case input
89
+ when String
90
+ { parts: [{ text: input }] }
91
+ when Hash
92
+ if input.key?(:parts) || input.key?("parts")
93
+ input
94
+ elsif input.key?(:text) || input.key?("text") ||
95
+ input.key?(:inline_data) || input.key?("inline_data") ||
96
+ input.key?(:file_data) || input.key?("file_data")
97
+ { parts: [input] }
98
+ else
99
+ input
100
+ end
101
+ else
102
+ { parts: [{ text: input.to_s }] }
103
+ end
104
+ end
105
+
106
+ def normalize_model(model)
107
+ model_str = model.to_s
108
+ model_str.start_with?("models/") ? model_str.delete_prefix("models/") : model_str
109
+ end
110
+
111
+ def validate_task_type!(task_type)
112
+ task_type_str = task_type.to_s.upcase
113
+ unless VALID_TASK_TYPES.include?(task_type_str)
114
+ raise ArgumentError, "task_type must be one of: #{VALID_TASK_TYPES.join(', ')}"
115
+ end
25
116
  end
26
117
  end
27
- end
118
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gemini
4
+ module FunctionCallingHelper
5
+ # Function Callレスポンスから継続用のcontentsを構築
6
+ # Gemini 3では関数呼び出しの継続時にThought Signatureが必須
7
+ #
8
+ # @param original_contents [Array] 元の会話履歴
9
+ # @param model_response [Gemini::Response] モデルの応答(function call含む)
10
+ # @param function_responses [Array<Hash>] 関数の結果の配列
11
+ # 各要素は { name: "function_name", response: { ... } } の形式
12
+ # @return [Array] 継続リクエスト用のcontents配列
13
+ #
14
+ # @example
15
+ # contents = Gemini::FunctionCallingHelper.build_continuation(
16
+ # original_contents: [{ role: "user", parts: [{ text: "東京の天気を教えて" }] }],
17
+ # model_response: response,
18
+ # function_responses: [
19
+ # { name: "get_weather", response: { temperature: 20, condition: "晴れ" } }
20
+ # ]
21
+ # )
22
+ def self.build_continuation(original_contents:, model_response:, function_responses:)
23
+ # 元の会話履歴
24
+ contents = original_contents.dup
25
+
26
+ # モデルの応答(Signature付き)
27
+ contents << {
28
+ role: "model",
29
+ parts: model_response.build_function_call_parts_with_signature
30
+ }
31
+
32
+ # 関数の結果
33
+ function_response_parts = function_responses.map do |fr|
34
+ { functionResponse: fr }
35
+ end
36
+
37
+ contents << {
38
+ role: "user",
39
+ parts: function_response_parts
40
+ }
41
+
42
+ contents
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gemini
4
+ class Live
5
+ # Configuration class for Live API sessions
6
+ class Configuration
7
+ attr_accessor :model, :response_modality, :voice_name,
8
+ :system_instruction, :tools,
9
+ :context_window_compression, :session_resumption,
10
+ :automatic_activity_detection,
11
+ :media_resolution, :output_audio_transcription
12
+
13
+ VALID_MODALITIES = %w[TEXT AUDIO].freeze
14
+ VALID_VOICES = %w[Puck Charon Kore Fenrir Aoede Leda Orus Zephyr].freeze
15
+ # NOTE: gemini-2.5-flash-live-preview is listed in the public Live API
16
+ # tools documentation as the recommended model, but is not currently
17
+ # deployed (returns "model not found" on bidiGenerateContent). The
18
+ # native-audio preview model is the only Live model on which function
19
+ # calling currently works in practice (with AUDIO modality).
20
+ DEFAULT_MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
21
+
22
+ def initialize(
23
+ model: DEFAULT_MODEL,
24
+ response_modality: "TEXT",
25
+ voice_name: nil,
26
+ system_instruction: nil,
27
+ tools: nil,
28
+ context_window_compression: nil,
29
+ session_resumption: nil,
30
+ automatic_activity_detection: true,
31
+ media_resolution: nil,
32
+ output_audio_transcription: false
33
+ )
34
+ @model = model
35
+ @response_modality = validate_modality(response_modality)
36
+ @voice_name = validate_voice(voice_name)
37
+ @system_instruction = system_instruction
38
+ @tools = tools
39
+ @context_window_compression = context_window_compression
40
+ @session_resumption = session_resumption
41
+ @automatic_activity_detection = automatic_activity_detection
42
+ @media_resolution = media_resolution
43
+ @output_audio_transcription = output_audio_transcription
44
+ end
45
+
46
+ private
47
+
48
+ def validate_modality(modality)
49
+ modality = modality.to_s.upcase
50
+ unless VALID_MODALITIES.include?(modality)
51
+ raise ArgumentError, "Invalid modality: #{modality}. Must be one of: #{VALID_MODALITIES.join(', ')}"
52
+ end
53
+ modality
54
+ end
55
+
56
+ def validate_voice(voice)
57
+ return nil if voice.nil?
58
+ unless VALID_VOICES.include?(voice)
59
+ raise ArgumentError, "Invalid voice: #{voice}. Must be one of: #{VALID_VOICES.join(', ')}"
60
+ end
61
+ voice
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "websocket-client-simple"
4
+ require "json"
5
+
6
+ module Gemini
7
+ class Live
8
+ # WebSocket connection manager for Live API
9
+ class Connection
10
+ WEBSOCKET_BASE_URL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
11
+
12
+ attr_reader :connected
13
+
14
+ def initialize(api_key:, on_message:, on_open:, on_error:, on_close:)
15
+ @api_key = api_key
16
+ @on_message = on_message
17
+ @on_open = on_open
18
+ @on_error = on_error
19
+ @on_close = on_close
20
+ @ws = nil
21
+ @connected = false
22
+ @mutex = Mutex.new
23
+ end
24
+
25
+ def connect
26
+ url = "#{WEBSOCKET_BASE_URL}?key=#{@api_key}"
27
+
28
+ # Store callbacks in local variables for closure
29
+ on_message_callback = @on_message
30
+ on_open_callback = @on_open
31
+ on_error_callback = @on_error
32
+ on_close_callback = @on_close
33
+ connection = self
34
+
35
+ @ws = WebSocket::Client::Simple.connect(url) do |ws|
36
+ ws.on :open do
37
+ connection.instance_variable_set(:@connected, true)
38
+ on_open_callback.call if on_open_callback
39
+ end
40
+
41
+ ws.on :message do |msg|
42
+ on_message_callback.call(msg.data) if on_message_callback
43
+ end
44
+
45
+ ws.on :error do |e|
46
+ on_error_callback.call(e) if on_error_callback
47
+ end
48
+
49
+ ws.on :close do |e|
50
+ connection.instance_variable_set(:@connected, false)
51
+ code = e.respond_to?(:code) ? e.code : nil
52
+ reason = e.respond_to?(:reason) ? e.reason : nil
53
+ on_close_callback.call(code, reason) if on_close_callback
54
+ end
55
+ end
56
+
57
+ self
58
+ end
59
+
60
+ def send(data)
61
+ return false unless @ws && @connected
62
+
63
+ @mutex.synchronize do
64
+ json_data = data.is_a?(String) ? data : data.to_json
65
+ @ws.send(json_data)
66
+ end
67
+ true
68
+ rescue StandardError => e
69
+ @on_error&.call(e)
70
+ false
71
+ end
72
+
73
+ def close
74
+ @ws&.close
75
+ @connected = false
76
+ end
77
+
78
+ def connected?
79
+ @connected && @ws && !@ws.closed?
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gemini
4
+ class Live
5
+ # Helper class to build Live API messages
6
+ class MessageBuilder
7
+ VALID_SCHEDULING = %w[INTERRUPT WHEN_IDLE SILENT].freeze
8
+
9
+ class << self
10
+ # Build setup message from configuration
11
+ def setup(config)
12
+ message = {
13
+ setup: {
14
+ model: normalize_model_name(config.model)
15
+ }
16
+ }
17
+
18
+ generation_config = build_generation_config(config)
19
+ message[:setup][:generationConfig] = generation_config unless generation_config.empty?
20
+
21
+ # System instruction
22
+ if config.system_instruction
23
+ message[:setup][:systemInstruction] = {
24
+ parts: [{ text: config.system_instruction }]
25
+ }
26
+ end
27
+
28
+ # Tools configuration
29
+ message[:setup][:tools] = config.tools if config.tools
30
+
31
+ # Context window compression
32
+ if config.context_window_compression
33
+ message[:setup][:contextWindowCompression] = config.context_window_compression
34
+ end
35
+
36
+ # Session resumption
37
+ if config.session_resumption
38
+ message[:setup][:sessionResumption] = config.session_resumption
39
+ end
40
+
41
+ # VAD (Voice Activity Detection) settings
42
+ unless config.automatic_activity_detection
43
+ message[:setup][:realtimeInputConfig] = {
44
+ automaticActivityDetection: {
45
+ disabled: true
46
+ }
47
+ }
48
+ end
49
+
50
+ message
51
+ end
52
+
53
+ # Build client content message (text)
54
+ def client_content(text:, turn_complete: true, role: "user")
55
+ {
56
+ clientContent: {
57
+ turns: [
58
+ {
59
+ role: role,
60
+ parts: [{ text: text }]
61
+ }
62
+ ],
63
+ turnComplete: turn_complete
64
+ }
65
+ }
66
+ end
67
+
68
+ # Build client content with multiple parts
69
+ def client_content_parts(parts:, turn_complete: true, role: "user")
70
+ {
71
+ clientContent: {
72
+ turns: [
73
+ {
74
+ role: role,
75
+ parts: parts
76
+ }
77
+ ],
78
+ turnComplete: turn_complete
79
+ }
80
+ }
81
+ end
82
+
83
+ # Build realtime input message (audio/video) using the legacy
84
+ # mediaChunks field. NOTE: mediaChunks is deprecated by the API in
85
+ # favor of the dedicated audio/video fields built by realtime_audio
86
+ # and realtime_video. Kept for backward compatibility with older
87
+ # Live models that still accept it.
88
+ def realtime_input(audio_data: nil, video_data: nil, mime_type:)
89
+ data = audio_data || video_data
90
+ {
91
+ realtimeInput: {
92
+ mediaChunks: [
93
+ {
94
+ mimeType: mime_type,
95
+ data: data
96
+ }
97
+ ]
98
+ }
99
+ }
100
+ end
101
+
102
+ # Build a realtime text input message. This is the universal
103
+ # text-input form for the Live API and is required by newer Live
104
+ # models such as gemini-3.1-flash-live-preview, which reject the
105
+ # turn-based clientContent payload.
106
+ def realtime_text(text)
107
+ { realtimeInput: { text: text.to_s } }
108
+ end
109
+
110
+ # Build activity start message (for manual VAD)
111
+ def activity_start
112
+ {
113
+ realtimeInput: {
114
+ activityStart: {}
115
+ }
116
+ }
117
+ end
118
+
119
+ # Build activity end message (for manual VAD)
120
+ def activity_end
121
+ {
122
+ realtimeInput: {
123
+ activityEnd: {}
124
+ }
125
+ }
126
+ end
127
+
128
+ # Build tool response message.
129
+ #
130
+ # Each function response hash supports:
131
+ # :id - The function call id from the server
132
+ # :name - The function name
133
+ # :response - The function result (Hash or scalar). When using
134
+ # NON_BLOCKING (async) function calls, include
135
+ # `scheduling: "INTERRUPT" | "WHEN_IDLE" | "SILENT"`
136
+ # inside the response hash.
137
+ # :scheduling - (optional) Top-level shortcut. When provided,
138
+ # it is merged into the response hash as
139
+ # `response[:scheduling]`. Accepts Symbol or String.
140
+ #
141
+ # Raises ArgumentError if scheduling is not one of the valid values.
142
+ def tool_response(function_responses)
143
+ {
144
+ toolResponse: {
145
+ functionResponses: function_responses.map { |resp| build_function_response(resp) }
146
+ }
147
+ }
148
+ end
149
+
150
+ private
151
+
152
+ def build_function_response(resp)
153
+ response_payload =
154
+ case resp[:response]
155
+ when Hash then resp[:response].dup
156
+ when nil then {}
157
+ else { result: resp[:response] }
158
+ end
159
+
160
+ if (top_level_scheduling = resp[:scheduling])
161
+ response_payload[:scheduling] = normalize_scheduling(top_level_scheduling)
162
+ elsif (sched = response_payload[:scheduling] || response_payload["scheduling"])
163
+ normalized = normalize_scheduling(sched)
164
+ response_payload.delete("scheduling")
165
+ response_payload[:scheduling] = normalized
166
+ end
167
+
168
+ { id: resp[:id], name: resp[:name], response: response_payload }
169
+ end
170
+
171
+ def normalize_scheduling(value)
172
+ value_str = value.to_s.upcase
173
+ unless VALID_SCHEDULING.include?(value_str)
174
+ raise ArgumentError,
175
+ "scheduling must be one of: #{VALID_SCHEDULING.join(', ')} (got #{value.inspect})"
176
+ end
177
+ value_str
178
+ end
179
+
180
+
181
+ def normalize_model_name(model)
182
+ model.start_with?("models/") ? model : "models/#{model}"
183
+ end
184
+
185
+ def build_generation_config(config)
186
+ generation_config = {}
187
+
188
+ # Response modality
189
+ generation_config[:responseModalities] = [config.response_modality]
190
+
191
+ # Speech/Voice configuration for AUDIO modality
192
+ if config.response_modality == "AUDIO" && config.voice_name
193
+ generation_config[:speechConfig] = {
194
+ voiceConfig: {
195
+ prebuiltVoiceConfig: {
196
+ voiceName: config.voice_name
197
+ }
198
+ }
199
+ }
200
+ end
201
+
202
+ # Media resolution
203
+ if config.media_resolution
204
+ generation_config[:mediaResolution] = config.media_resolution
205
+ end
206
+
207
+ # Output audio transcription
208
+ if config.output_audio_transcription
209
+ generation_config[:outputAudioTranscription] = {}
210
+ end
211
+
212
+ generation_config
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end