ruby-gemini-api 0.1.7 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +59 -21
- data/README.md +397 -0
- data/lib/gemini/client.rb +85 -7
- data/lib/gemini/embeddings.rb +108 -17
- data/lib/gemini/function_calling_helper.rb +45 -0
- data/lib/gemini/live/configuration.rb +65 -0
- data/lib/gemini/live/connection.rb +83 -0
- data/lib/gemini/live/message_builder.rb +217 -0
- data/lib/gemini/live/session.rb +223 -0
- data/lib/gemini/live.rb +102 -0
- data/lib/gemini/response.rb +89 -4
- data/lib/gemini/version.rb +1 -1
- data/lib/gemini.rb +2 -0
- metadata +23 -6
data/lib/gemini/client.rb
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
module Gemini
|
|
2
2
|
class Client
|
|
3
3
|
include Gemini::HTTP
|
|
4
|
-
|
|
4
|
+
|
|
5
5
|
SENSITIVE_ATTRIBUTES = %i[@api_key @extra_headers].freeze
|
|
6
6
|
CONFIG_KEYS = %i[api_key uri_base extra_headers log_errors request_timeout].freeze
|
|
7
|
+
VALID_THINKING_LEVELS = %w[minimal low medium high].freeze
|
|
7
8
|
|
|
8
9
|
attr_reader(*CONFIG_KEYS, :faraday_middleware)
|
|
9
10
|
attr_writer :api_key
|
|
@@ -69,6 +70,16 @@ module Gemini
|
|
|
69
70
|
@cached_content ||= Gemini::CachedContent.new(client: self)
|
|
70
71
|
end
|
|
71
72
|
|
|
73
|
+
# Live APIアクセサ
|
|
74
|
+
def live
|
|
75
|
+
@live ||= Gemini::Live.new(client: self)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Embeddings APIアクセサ
|
|
79
|
+
def embeddings_api
|
|
80
|
+
@embeddings_api ||= Gemini::Embeddings.new(client: self)
|
|
81
|
+
end
|
|
82
|
+
|
|
72
83
|
def reset_headers
|
|
73
84
|
@extra_headers = {}
|
|
74
85
|
end
|
|
@@ -83,7 +94,18 @@ module Gemini
|
|
|
83
94
|
# Extended to support streaming callbacks
|
|
84
95
|
def chat(parameters: {}, &stream_callback)
|
|
85
96
|
model = parameters.delete(:model) || "gemini-2.5-flash"
|
|
86
|
-
|
|
97
|
+
|
|
98
|
+
# thinking_budget / thinking_level をパラメータから抽出
|
|
99
|
+
thinking_budget = parameters.delete(:thinking_budget)
|
|
100
|
+
thinking_level = parameters.delete(:thinking_level)
|
|
101
|
+
|
|
102
|
+
# Thinking設定
|
|
103
|
+
thinking_config = build_thinking_config(thinking_budget, thinking_level)
|
|
104
|
+
if thinking_config
|
|
105
|
+
parameters[:generationConfig] ||= {}
|
|
106
|
+
parameters[:generationConfig][:thinkingConfig] = thinking_config
|
|
107
|
+
end
|
|
108
|
+
|
|
87
109
|
# If streaming callback is provided
|
|
88
110
|
if block_given?
|
|
89
111
|
path = "models/#{model}:streamGenerateContent"
|
|
@@ -100,10 +122,25 @@ module Gemini
|
|
|
100
122
|
end
|
|
101
123
|
end
|
|
102
124
|
|
|
103
|
-
#
|
|
125
|
+
# Generate embeddings for the given input.
|
|
126
|
+
# input can be a String (single embed) or Array of Strings (batch embed).
|
|
127
|
+
# Supports task_type, title (RETRIEVAL_DOCUMENT only), and output_dimensionality.
|
|
128
|
+
def embed_content(input, model: Gemini::Embeddings::DEFAULT_MODEL, task_type: nil,
|
|
129
|
+
title: nil, output_dimensionality: nil, **parameters)
|
|
130
|
+
embeddings_api.create(
|
|
131
|
+
input: input,
|
|
132
|
+
model: model,
|
|
133
|
+
task_type: task_type,
|
|
134
|
+
title: title,
|
|
135
|
+
output_dimensionality: output_dimensionality,
|
|
136
|
+
**parameters
|
|
137
|
+
)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Method corresponding to OpenAI's embeddings (kept for compatibility)
|
|
104
141
|
def embeddings(parameters: {})
|
|
105
|
-
model = parameters.delete(:model) ||
|
|
106
|
-
path = "models/#{model}:embedContent"
|
|
142
|
+
model = parameters.delete(:model) || Gemini::Embeddings::DEFAULT_MODEL
|
|
143
|
+
path = "models/#{model.to_s.delete_prefix("models/")}:embedContent"
|
|
107
144
|
response = json_post(path: path, parameters: parameters)
|
|
108
145
|
Gemini::Response.new(response)
|
|
109
146
|
end
|
|
@@ -121,10 +158,12 @@ module Gemini
|
|
|
121
158
|
|
|
122
159
|
# Helper methods for convenience
|
|
123
160
|
|
|
124
|
-
|
|
161
|
+
# Method with usage similar to OpenAI's chat
|
|
125
162
|
def generate_content(prompt, model: "gemini-2.5-flash", system_instruction: nil,
|
|
126
163
|
response_mime_type: nil, response_schema: nil, temperature: 0.5, tools: nil,
|
|
127
|
-
url_context: false, google_search: false,
|
|
164
|
+
url_context: false, google_search: false,
|
|
165
|
+
thinking_budget: nil, thinking_level: nil,
|
|
166
|
+
**parameters, &stream_callback)
|
|
128
167
|
content = format_content(prompt)
|
|
129
168
|
params = {
|
|
130
169
|
contents: [content],
|
|
@@ -144,6 +183,12 @@ module Gemini
|
|
|
144
183
|
params[:generation_config]["response_schema"] = response_schema
|
|
145
184
|
end
|
|
146
185
|
|
|
186
|
+
# Thinking設定を追加
|
|
187
|
+
thinking_config = build_thinking_config(thinking_budget, thinking_level)
|
|
188
|
+
if thinking_config
|
|
189
|
+
params[:generation_config][:thinkingConfig] = thinking_config
|
|
190
|
+
end
|
|
191
|
+
|
|
147
192
|
# Handle tool shortcuts
|
|
148
193
|
tools = build_tools_array(tools, url_context: url_context, google_search: google_search)
|
|
149
194
|
params[:tools] = tools if tools && !tools.empty?
|
|
@@ -416,6 +461,39 @@ module Gemini
|
|
|
416
461
|
|
|
417
462
|
private
|
|
418
463
|
|
|
464
|
+
# Build thinking config from budget and level options
|
|
465
|
+
def build_thinking_config(budget, level)
|
|
466
|
+
return nil unless budget || level
|
|
467
|
+
|
|
468
|
+
config = {}
|
|
469
|
+
|
|
470
|
+
if budget
|
|
471
|
+
validate_thinking_budget!(budget)
|
|
472
|
+
config[:thinkingBudget] = budget
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
if level
|
|
476
|
+
level_str = level.to_s
|
|
477
|
+
validate_thinking_level!(level_str)
|
|
478
|
+
config[:thinkingLevel] = level_str
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
config
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def validate_thinking_budget!(budget)
|
|
485
|
+
return if budget == -1 || budget == 0
|
|
486
|
+
unless budget.is_a?(Integer) && budget > 0 && budget <= 32768
|
|
487
|
+
raise ArgumentError, "thinking_budget must be -1, 0, or 1-32768"
|
|
488
|
+
end
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def validate_thinking_level!(level)
|
|
492
|
+
unless VALID_THINKING_LEVELS.include?(level)
|
|
493
|
+
raise ArgumentError, "thinking_level must be one of: #{VALID_THINKING_LEVELS.join(', ')}"
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
|
|
419
497
|
# Build tools array from explicit tools parameter and shortcuts
|
|
420
498
|
def build_tools_array(tools, url_context: false, google_search: false)
|
|
421
499
|
result_tools = []
|
data/lib/gemini/embeddings.rb
CHANGED
|
@@ -1,27 +1,118 @@
|
|
|
1
1
|
module Gemini
|
|
2
2
|
class Embeddings
|
|
3
|
+
DEFAULT_MODEL = "gemini-embedding-001".freeze
|
|
4
|
+
|
|
5
|
+
VALID_TASK_TYPES = %w[
|
|
6
|
+
RETRIEVAL_QUERY
|
|
7
|
+
RETRIEVAL_DOCUMENT
|
|
8
|
+
SEMANTIC_SIMILARITY
|
|
9
|
+
CLASSIFICATION
|
|
10
|
+
CLUSTERING
|
|
11
|
+
QUESTION_ANSWERING
|
|
12
|
+
FACT_VERIFICATION
|
|
13
|
+
CODE_RETRIEVAL_QUERY
|
|
14
|
+
].freeze
|
|
15
|
+
|
|
3
16
|
def initialize(client:)
|
|
4
17
|
@client = client
|
|
5
18
|
end
|
|
6
19
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
# Generate an embedding for a single content, or batch when input is an Array
|
|
21
|
+
def create(input:, model: DEFAULT_MODEL, task_type: nil, title: nil,
|
|
22
|
+
output_dimensionality: nil, **parameters)
|
|
23
|
+
if input.is_a?(Array)
|
|
24
|
+
return batch_create(
|
|
25
|
+
inputs: input,
|
|
26
|
+
model: model,
|
|
27
|
+
task_type: task_type,
|
|
28
|
+
title: title,
|
|
29
|
+
output_dimensionality: output_dimensionality,
|
|
30
|
+
**parameters
|
|
31
|
+
)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
payload = build_embed_payload(
|
|
35
|
+
input: input,
|
|
36
|
+
task_type: task_type,
|
|
37
|
+
title: title,
|
|
38
|
+
output_dimensionality: output_dimensionality
|
|
39
|
+
).merge(parameters)
|
|
40
|
+
|
|
41
|
+
response = @client.json_post(
|
|
42
|
+
path: "models/#{normalize_model(model)}:embedContent",
|
|
23
43
|
parameters: payload
|
|
24
44
|
)
|
|
45
|
+
Gemini::Response.new(response)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Generate embeddings for multiple inputs in a single batch request
|
|
49
|
+
def batch_create(inputs:, model: DEFAULT_MODEL, task_type: nil, title: nil,
|
|
50
|
+
output_dimensionality: nil, **parameters)
|
|
51
|
+
requests = inputs.map do |input|
|
|
52
|
+
req = build_embed_payload(
|
|
53
|
+
input: input,
|
|
54
|
+
task_type: task_type,
|
|
55
|
+
title: title,
|
|
56
|
+
output_dimensionality: output_dimensionality
|
|
57
|
+
)
|
|
58
|
+
req[:model] = "models/#{normalize_model(model)}"
|
|
59
|
+
req
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
payload = { requests: requests }.merge(parameters)
|
|
63
|
+
|
|
64
|
+
response = @client.json_post(
|
|
65
|
+
path: "models/#{normalize_model(model)}:batchEmbedContents",
|
|
66
|
+
parameters: payload
|
|
67
|
+
)
|
|
68
|
+
Gemini::Response.new(response)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
def build_embed_payload(input:, task_type:, title:, output_dimensionality:)
|
|
74
|
+
payload = { content: format_content(input) }
|
|
75
|
+
|
|
76
|
+
if task_type
|
|
77
|
+
validate_task_type!(task_type)
|
|
78
|
+
payload[:taskType] = task_type.to_s.upcase
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
payload[:title] = title if title
|
|
82
|
+
payload[:outputDimensionality] = output_dimensionality if output_dimensionality
|
|
83
|
+
|
|
84
|
+
payload
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def format_content(input)
|
|
88
|
+
case input
|
|
89
|
+
when String
|
|
90
|
+
{ parts: [{ text: input }] }
|
|
91
|
+
when Hash
|
|
92
|
+
if input.key?(:parts) || input.key?("parts")
|
|
93
|
+
input
|
|
94
|
+
elsif input.key?(:text) || input.key?("text") ||
|
|
95
|
+
input.key?(:inline_data) || input.key?("inline_data") ||
|
|
96
|
+
input.key?(:file_data) || input.key?("file_data")
|
|
97
|
+
{ parts: [input] }
|
|
98
|
+
else
|
|
99
|
+
input
|
|
100
|
+
end
|
|
101
|
+
else
|
|
102
|
+
{ parts: [{ text: input.to_s }] }
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def normalize_model(model)
|
|
107
|
+
model_str = model.to_s
|
|
108
|
+
model_str.start_with?("models/") ? model_str.delete_prefix("models/") : model_str
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def validate_task_type!(task_type)
|
|
112
|
+
task_type_str = task_type.to_s.upcase
|
|
113
|
+
unless VALID_TASK_TYPES.include?(task_type_str)
|
|
114
|
+
raise ArgumentError, "task_type must be one of: #{VALID_TASK_TYPES.join(', ')}"
|
|
115
|
+
end
|
|
25
116
|
end
|
|
26
117
|
end
|
|
27
|
-
end
|
|
118
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Gemini
|
|
4
|
+
module FunctionCallingHelper
|
|
5
|
+
# Function Callレスポンスから継続用のcontentsを構築
|
|
6
|
+
# Gemini 3では関数呼び出しの継続時にThought Signatureが必須
|
|
7
|
+
#
|
|
8
|
+
# @param original_contents [Array] 元の会話履歴
|
|
9
|
+
# @param model_response [Gemini::Response] モデルの応答(function call含む)
|
|
10
|
+
# @param function_responses [Array<Hash>] 関数の結果の配列
|
|
11
|
+
# 各要素は { name: "function_name", response: { ... } } の形式
|
|
12
|
+
# @return [Array] 継続リクエスト用のcontents配列
|
|
13
|
+
#
|
|
14
|
+
# @example
|
|
15
|
+
# contents = Gemini::FunctionCallingHelper.build_continuation(
|
|
16
|
+
# original_contents: [{ role: "user", parts: [{ text: "東京の天気を教えて" }] }],
|
|
17
|
+
# model_response: response,
|
|
18
|
+
# function_responses: [
|
|
19
|
+
# { name: "get_weather", response: { temperature: 20, condition: "晴れ" } }
|
|
20
|
+
# ]
|
|
21
|
+
# )
|
|
22
|
+
def self.build_continuation(original_contents:, model_response:, function_responses:)
|
|
23
|
+
# 元の会話履歴
|
|
24
|
+
contents = original_contents.dup
|
|
25
|
+
|
|
26
|
+
# モデルの応答(Signature付き)
|
|
27
|
+
contents << {
|
|
28
|
+
role: "model",
|
|
29
|
+
parts: model_response.build_function_call_parts_with_signature
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# 関数の結果
|
|
33
|
+
function_response_parts = function_responses.map do |fr|
|
|
34
|
+
{ functionResponse: fr }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
contents << {
|
|
38
|
+
role: "user",
|
|
39
|
+
parts: function_response_parts
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
contents
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Gemini
|
|
4
|
+
class Live
|
|
5
|
+
# Configuration class for Live API sessions
|
|
6
|
+
class Configuration
|
|
7
|
+
attr_accessor :model, :response_modality, :voice_name,
|
|
8
|
+
:system_instruction, :tools,
|
|
9
|
+
:context_window_compression, :session_resumption,
|
|
10
|
+
:automatic_activity_detection,
|
|
11
|
+
:media_resolution, :output_audio_transcription
|
|
12
|
+
|
|
13
|
+
VALID_MODALITIES = %w[TEXT AUDIO].freeze
|
|
14
|
+
VALID_VOICES = %w[Puck Charon Kore Fenrir Aoede Leda Orus Zephyr].freeze
|
|
15
|
+
# NOTE: gemini-2.5-flash-live-preview is listed in the public Live API
|
|
16
|
+
# tools documentation as the recommended model, but is not currently
|
|
17
|
+
# deployed (returns "model not found" on bidiGenerateContent). The
|
|
18
|
+
# native-audio preview model is the only Live model on which function
|
|
19
|
+
# calling currently works in practice (with AUDIO modality).
|
|
20
|
+
DEFAULT_MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
|
|
21
|
+
|
|
22
|
+
def initialize(
|
|
23
|
+
model: DEFAULT_MODEL,
|
|
24
|
+
response_modality: "TEXT",
|
|
25
|
+
voice_name: nil,
|
|
26
|
+
system_instruction: nil,
|
|
27
|
+
tools: nil,
|
|
28
|
+
context_window_compression: nil,
|
|
29
|
+
session_resumption: nil,
|
|
30
|
+
automatic_activity_detection: true,
|
|
31
|
+
media_resolution: nil,
|
|
32
|
+
output_audio_transcription: false
|
|
33
|
+
)
|
|
34
|
+
@model = model
|
|
35
|
+
@response_modality = validate_modality(response_modality)
|
|
36
|
+
@voice_name = validate_voice(voice_name)
|
|
37
|
+
@system_instruction = system_instruction
|
|
38
|
+
@tools = tools
|
|
39
|
+
@context_window_compression = context_window_compression
|
|
40
|
+
@session_resumption = session_resumption
|
|
41
|
+
@automatic_activity_detection = automatic_activity_detection
|
|
42
|
+
@media_resolution = media_resolution
|
|
43
|
+
@output_audio_transcription = output_audio_transcription
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def validate_modality(modality)
|
|
49
|
+
modality = modality.to_s.upcase
|
|
50
|
+
unless VALID_MODALITIES.include?(modality)
|
|
51
|
+
raise ArgumentError, "Invalid modality: #{modality}. Must be one of: #{VALID_MODALITIES.join(', ')}"
|
|
52
|
+
end
|
|
53
|
+
modality
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def validate_voice(voice)
|
|
57
|
+
return nil if voice.nil?
|
|
58
|
+
unless VALID_VOICES.include?(voice)
|
|
59
|
+
raise ArgumentError, "Invalid voice: #{voice}. Must be one of: #{VALID_VOICES.join(', ')}"
|
|
60
|
+
end
|
|
61
|
+
voice
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "websocket-client-simple"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module Gemini
|
|
7
|
+
class Live
|
|
8
|
+
# WebSocket connection manager for Live API
|
|
9
|
+
class Connection
|
|
10
|
+
WEBSOCKET_BASE_URL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
|
|
11
|
+
|
|
12
|
+
attr_reader :connected
|
|
13
|
+
|
|
14
|
+
def initialize(api_key:, on_message:, on_open:, on_error:, on_close:)
|
|
15
|
+
@api_key = api_key
|
|
16
|
+
@on_message = on_message
|
|
17
|
+
@on_open = on_open
|
|
18
|
+
@on_error = on_error
|
|
19
|
+
@on_close = on_close
|
|
20
|
+
@ws = nil
|
|
21
|
+
@connected = false
|
|
22
|
+
@mutex = Mutex.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def connect
|
|
26
|
+
url = "#{WEBSOCKET_BASE_URL}?key=#{@api_key}"
|
|
27
|
+
|
|
28
|
+
# Store callbacks in local variables for closure
|
|
29
|
+
on_message_callback = @on_message
|
|
30
|
+
on_open_callback = @on_open
|
|
31
|
+
on_error_callback = @on_error
|
|
32
|
+
on_close_callback = @on_close
|
|
33
|
+
connection = self
|
|
34
|
+
|
|
35
|
+
@ws = WebSocket::Client::Simple.connect(url) do |ws|
|
|
36
|
+
ws.on :open do
|
|
37
|
+
connection.instance_variable_set(:@connected, true)
|
|
38
|
+
on_open_callback.call if on_open_callback
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
ws.on :message do |msg|
|
|
42
|
+
on_message_callback.call(msg.data) if on_message_callback
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
ws.on :error do |e|
|
|
46
|
+
on_error_callback.call(e) if on_error_callback
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
ws.on :close do |e|
|
|
50
|
+
connection.instance_variable_set(:@connected, false)
|
|
51
|
+
code = e.respond_to?(:code) ? e.code : nil
|
|
52
|
+
reason = e.respond_to?(:reason) ? e.reason : nil
|
|
53
|
+
on_close_callback.call(code, reason) if on_close_callback
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
self
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def send(data)
|
|
61
|
+
return false unless @ws && @connected
|
|
62
|
+
|
|
63
|
+
@mutex.synchronize do
|
|
64
|
+
json_data = data.is_a?(String) ? data : data.to_json
|
|
65
|
+
@ws.send(json_data)
|
|
66
|
+
end
|
|
67
|
+
true
|
|
68
|
+
rescue StandardError => e
|
|
69
|
+
@on_error&.call(e)
|
|
70
|
+
false
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def close
|
|
74
|
+
@ws&.close
|
|
75
|
+
@connected = false
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def connected?
|
|
79
|
+
@connected && @ws && !@ws.closed?
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Gemini
|
|
4
|
+
class Live
|
|
5
|
+
# Helper class to build Live API messages
|
|
6
|
+
class MessageBuilder
|
|
7
|
+
VALID_SCHEDULING = %w[INTERRUPT WHEN_IDLE SILENT].freeze
|
|
8
|
+
|
|
9
|
+
class << self
|
|
10
|
+
# Build setup message from configuration
|
|
11
|
+
def setup(config)
|
|
12
|
+
message = {
|
|
13
|
+
setup: {
|
|
14
|
+
model: normalize_model_name(config.model)
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
generation_config = build_generation_config(config)
|
|
19
|
+
message[:setup][:generationConfig] = generation_config unless generation_config.empty?
|
|
20
|
+
|
|
21
|
+
# System instruction
|
|
22
|
+
if config.system_instruction
|
|
23
|
+
message[:setup][:systemInstruction] = {
|
|
24
|
+
parts: [{ text: config.system_instruction }]
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Tools configuration
|
|
29
|
+
message[:setup][:tools] = config.tools if config.tools
|
|
30
|
+
|
|
31
|
+
# Context window compression
|
|
32
|
+
if config.context_window_compression
|
|
33
|
+
message[:setup][:contextWindowCompression] = config.context_window_compression
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Session resumption
|
|
37
|
+
if config.session_resumption
|
|
38
|
+
message[:setup][:sessionResumption] = config.session_resumption
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# VAD (Voice Activity Detection) settings
|
|
42
|
+
unless config.automatic_activity_detection
|
|
43
|
+
message[:setup][:realtimeInputConfig] = {
|
|
44
|
+
automaticActivityDetection: {
|
|
45
|
+
disabled: true
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
message
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Build client content message (text)
|
|
54
|
+
def client_content(text:, turn_complete: true, role: "user")
|
|
55
|
+
{
|
|
56
|
+
clientContent: {
|
|
57
|
+
turns: [
|
|
58
|
+
{
|
|
59
|
+
role: role,
|
|
60
|
+
parts: [{ text: text }]
|
|
61
|
+
}
|
|
62
|
+
],
|
|
63
|
+
turnComplete: turn_complete
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build client content with multiple parts
|
|
69
|
+
def client_content_parts(parts:, turn_complete: true, role: "user")
|
|
70
|
+
{
|
|
71
|
+
clientContent: {
|
|
72
|
+
turns: [
|
|
73
|
+
{
|
|
74
|
+
role: role,
|
|
75
|
+
parts: parts
|
|
76
|
+
}
|
|
77
|
+
],
|
|
78
|
+
turnComplete: turn_complete
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Build realtime input message (audio/video) using the legacy
|
|
84
|
+
# mediaChunks field. NOTE: mediaChunks is deprecated by the API in
|
|
85
|
+
# favor of the dedicated audio/video fields built by realtime_audio
|
|
86
|
+
# and realtime_video. Kept for backward compatibility with older
|
|
87
|
+
# Live models that still accept it.
|
|
88
|
+
def realtime_input(audio_data: nil, video_data: nil, mime_type:)
|
|
89
|
+
data = audio_data || video_data
|
|
90
|
+
{
|
|
91
|
+
realtimeInput: {
|
|
92
|
+
mediaChunks: [
|
|
93
|
+
{
|
|
94
|
+
mimeType: mime_type,
|
|
95
|
+
data: data
|
|
96
|
+
}
|
|
97
|
+
]
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Build a realtime text input message. This is the universal
|
|
103
|
+
# text-input form for the Live API and is required by newer Live
|
|
104
|
+
# models such as gemini-3.1-flash-live-preview, which reject the
|
|
105
|
+
# turn-based clientContent payload.
|
|
106
|
+
def realtime_text(text)
|
|
107
|
+
{ realtimeInput: { text: text.to_s } }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Build activity start message (for manual VAD)
|
|
111
|
+
def activity_start
|
|
112
|
+
{
|
|
113
|
+
realtimeInput: {
|
|
114
|
+
activityStart: {}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# Build activity end message (for manual VAD)
|
|
120
|
+
def activity_end
|
|
121
|
+
{
|
|
122
|
+
realtimeInput: {
|
|
123
|
+
activityEnd: {}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Build tool response message.
|
|
129
|
+
#
|
|
130
|
+
# Each function response hash supports:
|
|
131
|
+
# :id - The function call id from the server
|
|
132
|
+
# :name - The function name
|
|
133
|
+
# :response - The function result (Hash or scalar). When using
|
|
134
|
+
# NON_BLOCKING (async) function calls, include
|
|
135
|
+
# `scheduling: "INTERRUPT" | "WHEN_IDLE" | "SILENT"`
|
|
136
|
+
# inside the response hash.
|
|
137
|
+
# :scheduling - (optional) Top-level shortcut. When provided,
|
|
138
|
+
# it is merged into the response hash as
|
|
139
|
+
# `response[:scheduling]`. Accepts Symbol or String.
|
|
140
|
+
#
|
|
141
|
+
# Raises ArgumentError if scheduling is not one of the valid values.
|
|
142
|
+
def tool_response(function_responses)
|
|
143
|
+
{
|
|
144
|
+
toolResponse: {
|
|
145
|
+
functionResponses: function_responses.map { |resp| build_function_response(resp) }
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
private
|
|
151
|
+
|
|
152
|
+
def build_function_response(resp)
|
|
153
|
+
response_payload =
|
|
154
|
+
case resp[:response]
|
|
155
|
+
when Hash then resp[:response].dup
|
|
156
|
+
when nil then {}
|
|
157
|
+
else { result: resp[:response] }
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
if (top_level_scheduling = resp[:scheduling])
|
|
161
|
+
response_payload[:scheduling] = normalize_scheduling(top_level_scheduling)
|
|
162
|
+
elsif (sched = response_payload[:scheduling] || response_payload["scheduling"])
|
|
163
|
+
normalized = normalize_scheduling(sched)
|
|
164
|
+
response_payload.delete("scheduling")
|
|
165
|
+
response_payload[:scheduling] = normalized
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
{ id: resp[:id], name: resp[:name], response: response_payload }
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def normalize_scheduling(value)
|
|
172
|
+
value_str = value.to_s.upcase
|
|
173
|
+
unless VALID_SCHEDULING.include?(value_str)
|
|
174
|
+
raise ArgumentError,
|
|
175
|
+
"scheduling must be one of: #{VALID_SCHEDULING.join(', ')} (got #{value.inspect})"
|
|
176
|
+
end
|
|
177
|
+
value_str
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def normalize_model_name(model)
|
|
182
|
+
model.start_with?("models/") ? model : "models/#{model}"
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def build_generation_config(config)
|
|
186
|
+
generation_config = {}
|
|
187
|
+
|
|
188
|
+
# Response modality
|
|
189
|
+
generation_config[:responseModalities] = [config.response_modality]
|
|
190
|
+
|
|
191
|
+
# Speech/Voice configuration for AUDIO modality
|
|
192
|
+
if config.response_modality == "AUDIO" && config.voice_name
|
|
193
|
+
generation_config[:speechConfig] = {
|
|
194
|
+
voiceConfig: {
|
|
195
|
+
prebuiltVoiceConfig: {
|
|
196
|
+
voiceName: config.voice_name
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Media resolution
|
|
203
|
+
if config.media_resolution
|
|
204
|
+
generation_config[:mediaResolution] = config.media_resolution
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Output audio transcription
|
|
208
|
+
if config.output_audio_transcription
|
|
209
|
+
generation_config[:outputAudioTranscription] = {}
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
generation_config
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|