ruby-gemini-api 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,118 @@
1
1
  module Gemini
2
2
  class Embeddings
3
+ DEFAULT_MODEL = "gemini-embedding-001".freeze
4
+
5
+ VALID_TASK_TYPES = %w[
6
+ RETRIEVAL_QUERY
7
+ RETRIEVAL_DOCUMENT
8
+ SEMANTIC_SIMILARITY
9
+ CLASSIFICATION
10
+ CLUSTERING
11
+ QUESTION_ANSWERING
12
+ FACT_VERIFICATION
13
+ CODE_RETRIEVAL_QUERY
14
+ ].freeze
15
+
3
16
  def initialize(client:)
4
17
  @client = client
5
18
  end
6
19
 
7
- def create(input:, model: "text-embedding-model", **parameters)
8
- content = case input
9
- when String
10
- { parts: [{ text: input }] }
11
- when Array
12
- { parts: input.map { |text| { text: text.to_s } } }
13
- else
14
- { parts: [{ text: input.to_s }] }
15
- end
16
-
17
- payload = {
18
- content: content
19
- }.merge(parameters)
20
-
21
- @client.json_post(
22
- path: "models/#{model}:embedContent",
20
+ # Generate an embedding for a single content, or batch when input is an Array
21
+ def create(input:, model: DEFAULT_MODEL, task_type: nil, title: nil,
22
+ output_dimensionality: nil, **parameters)
23
+ if input.is_a?(Array)
24
+ return batch_create(
25
+ inputs: input,
26
+ model: model,
27
+ task_type: task_type,
28
+ title: title,
29
+ output_dimensionality: output_dimensionality,
30
+ **parameters
31
+ )
32
+ end
33
+
34
+ payload = build_embed_payload(
35
+ input: input,
36
+ task_type: task_type,
37
+ title: title,
38
+ output_dimensionality: output_dimensionality
39
+ ).merge(parameters)
40
+
41
+ response = @client.json_post(
42
+ path: "models/#{normalize_model(model)}:embedContent",
23
43
  parameters: payload
24
44
  )
45
+ Gemini::Response.new(response)
46
+ end
47
+
48
+ # Generate embeddings for multiple inputs in a single batch request
49
+ def batch_create(inputs:, model: DEFAULT_MODEL, task_type: nil, title: nil,
50
+ output_dimensionality: nil, **parameters)
51
+ requests = inputs.map do |input|
52
+ req = build_embed_payload(
53
+ input: input,
54
+ task_type: task_type,
55
+ title: title,
56
+ output_dimensionality: output_dimensionality
57
+ )
58
+ req[:model] = "models/#{normalize_model(model)}"
59
+ req
60
+ end
61
+
62
+ payload = { requests: requests }.merge(parameters)
63
+
64
+ response = @client.json_post(
65
+ path: "models/#{normalize_model(model)}:batchEmbedContents",
66
+ parameters: payload
67
+ )
68
+ Gemini::Response.new(response)
69
+ end
70
+
71
+ private
72
+
73
+ def build_embed_payload(input:, task_type:, title:, output_dimensionality:)
74
+ payload = { content: format_content(input) }
75
+
76
+ if task_type
77
+ validate_task_type!(task_type)
78
+ payload[:taskType] = task_type.to_s.upcase
79
+ end
80
+
81
+ payload[:title] = title if title
82
+ payload[:outputDimensionality] = output_dimensionality if output_dimensionality
83
+
84
+ payload
85
+ end
86
+
87
+ def format_content(input)
88
+ case input
89
+ when String
90
+ { parts: [{ text: input }] }
91
+ when Hash
92
+ if input.key?(:parts) || input.key?("parts")
93
+ input
94
+ elsif input.key?(:text) || input.key?("text") ||
95
+ input.key?(:inline_data) || input.key?("inline_data") ||
96
+ input.key?(:file_data) || input.key?("file_data")
97
+ { parts: [input] }
98
+ else
99
+ input
100
+ end
101
+ else
102
+ { parts: [{ text: input.to_s }] }
103
+ end
104
+ end
105
+
106
+ def normalize_model(model)
107
+ model_str = model.to_s
108
+ model_str.start_with?("models/") ? model_str.delete_prefix("models/") : model_str
109
+ end
110
+
111
+ def validate_task_type!(task_type)
112
+ task_type_str = task_type.to_s.upcase
113
+ unless VALID_TASK_TYPES.include?(task_type_str)
114
+ raise ArgumentError, "task_type must be one of: #{VALID_TASK_TYPES.join(', ')}"
115
+ end
25
116
  end
26
117
  end
27
- end
118
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gemini
4
+ class Live
5
+ # Configuration class for Live API sessions
6
+ class Configuration
7
+ attr_accessor :model, :response_modality, :voice_name,
8
+ :system_instruction, :tools,
9
+ :context_window_compression, :session_resumption,
10
+ :automatic_activity_detection,
11
+ :media_resolution, :output_audio_transcription
12
+
13
+ VALID_MODALITIES = %w[TEXT AUDIO].freeze
14
+ VALID_VOICES = %w[Puck Charon Kore Fenrir Aoede Leda Orus Zephyr].freeze
15
+ # NOTE: gemini-2.5-flash-live-preview is listed in the public Live API
16
+ # tools documentation as the recommended model, but is not currently
17
+ # deployed (returns "model not found" on bidiGenerateContent). The
18
+ # native-audio preview model is the only Live model on which function
19
+ # calling currently works in practice (with AUDIO modality).
20
+ DEFAULT_MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
21
+
22
+ def initialize(
23
+ model: DEFAULT_MODEL,
24
+ response_modality: "TEXT",
25
+ voice_name: nil,
26
+ system_instruction: nil,
27
+ tools: nil,
28
+ context_window_compression: nil,
29
+ session_resumption: nil,
30
+ automatic_activity_detection: true,
31
+ media_resolution: nil,
32
+ output_audio_transcription: false
33
+ )
34
+ @model = model
35
+ @response_modality = validate_modality(response_modality)
36
+ @voice_name = validate_voice(voice_name)
37
+ @system_instruction = system_instruction
38
+ @tools = tools
39
+ @context_window_compression = context_window_compression
40
+ @session_resumption = session_resumption
41
+ @automatic_activity_detection = automatic_activity_detection
42
+ @media_resolution = media_resolution
43
+ @output_audio_transcription = output_audio_transcription
44
+ end
45
+
46
+ private
47
+
48
+ def validate_modality(modality)
49
+ modality = modality.to_s.upcase
50
+ unless VALID_MODALITIES.include?(modality)
51
+ raise ArgumentError, "Invalid modality: #{modality}. Must be one of: #{VALID_MODALITIES.join(', ')}"
52
+ end
53
+ modality
54
+ end
55
+
56
+ def validate_voice(voice)
57
+ return nil if voice.nil?
58
+ unless VALID_VOICES.include?(voice)
59
+ raise ArgumentError, "Invalid voice: #{voice}. Must be one of: #{VALID_VOICES.join(', ')}"
60
+ end
61
+ voice
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "websocket-client-simple"
4
+ require "json"
5
+
6
+ module Gemini
7
+ class Live
8
+ # WebSocket connection manager for Live API
9
+ class Connection
10
+ WEBSOCKET_BASE_URL = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent"
11
+
12
+ attr_reader :connected
13
+
14
+ def initialize(api_key:, on_message:, on_open:, on_error:, on_close:)
15
+ @api_key = api_key
16
+ @on_message = on_message
17
+ @on_open = on_open
18
+ @on_error = on_error
19
+ @on_close = on_close
20
+ @ws = nil
21
+ @connected = false
22
+ @mutex = Mutex.new
23
+ end
24
+
25
+ def connect
26
+ url = "#{WEBSOCKET_BASE_URL}?key=#{@api_key}"
27
+
28
+ # Store callbacks in local variables for closure
29
+ on_message_callback = @on_message
30
+ on_open_callback = @on_open
31
+ on_error_callback = @on_error
32
+ on_close_callback = @on_close
33
+ connection = self
34
+
35
+ @ws = WebSocket::Client::Simple.connect(url) do |ws|
36
+ ws.on :open do
37
+ connection.instance_variable_set(:@connected, true)
38
+ on_open_callback.call if on_open_callback
39
+ end
40
+
41
+ ws.on :message do |msg|
42
+ on_message_callback.call(msg.data) if on_message_callback
43
+ end
44
+
45
+ ws.on :error do |e|
46
+ on_error_callback.call(e) if on_error_callback
47
+ end
48
+
49
+ ws.on :close do |e|
50
+ connection.instance_variable_set(:@connected, false)
51
+ code = e.respond_to?(:code) ? e.code : nil
52
+ reason = e.respond_to?(:reason) ? e.reason : nil
53
+ on_close_callback.call(code, reason) if on_close_callback
54
+ end
55
+ end
56
+
57
+ self
58
+ end
59
+
60
+ def send(data)
61
+ return false unless @ws && @connected
62
+
63
+ @mutex.synchronize do
64
+ json_data = data.is_a?(String) ? data : data.to_json
65
+ @ws.send(json_data)
66
+ end
67
+ true
68
+ rescue StandardError => e
69
+ @on_error&.call(e)
70
+ false
71
+ end
72
+
73
+ def close
74
+ @ws&.close
75
+ @connected = false
76
+ end
77
+
78
+ def connected?
79
+ @connected && @ws && !@ws.closed?
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,217 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gemini
4
+ class Live
5
+ # Helper class to build Live API messages
6
+ class MessageBuilder
7
+ VALID_SCHEDULING = %w[INTERRUPT WHEN_IDLE SILENT].freeze
8
+
9
+ class << self
10
+ # Build setup message from configuration
11
+ def setup(config)
12
+ message = {
13
+ setup: {
14
+ model: normalize_model_name(config.model)
15
+ }
16
+ }
17
+
18
+ generation_config = build_generation_config(config)
19
+ message[:setup][:generationConfig] = generation_config unless generation_config.empty?
20
+
21
+ # System instruction
22
+ if config.system_instruction
23
+ message[:setup][:systemInstruction] = {
24
+ parts: [{ text: config.system_instruction }]
25
+ }
26
+ end
27
+
28
+ # Tools configuration
29
+ message[:setup][:tools] = config.tools if config.tools
30
+
31
+ # Context window compression
32
+ if config.context_window_compression
33
+ message[:setup][:contextWindowCompression] = config.context_window_compression
34
+ end
35
+
36
+ # Session resumption
37
+ if config.session_resumption
38
+ message[:setup][:sessionResumption] = config.session_resumption
39
+ end
40
+
41
+ # VAD (Voice Activity Detection) settings
42
+ unless config.automatic_activity_detection
43
+ message[:setup][:realtimeInputConfig] = {
44
+ automaticActivityDetection: {
45
+ disabled: true
46
+ }
47
+ }
48
+ end
49
+
50
+ message
51
+ end
52
+
53
+ # Build client content message (text)
54
+ def client_content(text:, turn_complete: true, role: "user")
55
+ {
56
+ clientContent: {
57
+ turns: [
58
+ {
59
+ role: role,
60
+ parts: [{ text: text }]
61
+ }
62
+ ],
63
+ turnComplete: turn_complete
64
+ }
65
+ }
66
+ end
67
+
68
+ # Build client content with multiple parts
69
+ def client_content_parts(parts:, turn_complete: true, role: "user")
70
+ {
71
+ clientContent: {
72
+ turns: [
73
+ {
74
+ role: role,
75
+ parts: parts
76
+ }
77
+ ],
78
+ turnComplete: turn_complete
79
+ }
80
+ }
81
+ end
82
+
83
+ # Build realtime input message (audio/video) using the legacy
84
+ # mediaChunks field. NOTE: mediaChunks is deprecated by the API in
85
+ # favor of the dedicated audio/video fields built by realtime_audio
86
+ # and realtime_video. Kept for backward compatibility with older
87
+ # Live models that still accept it.
88
+ def realtime_input(audio_data: nil, video_data: nil, mime_type:)
89
+ data = audio_data || video_data
90
+ {
91
+ realtimeInput: {
92
+ mediaChunks: [
93
+ {
94
+ mimeType: mime_type,
95
+ data: data
96
+ }
97
+ ]
98
+ }
99
+ }
100
+ end
101
+
102
+ # Build a realtime text input message. This is the universal
103
+ # text-input form for the Live API and is required by newer Live
104
+ # models such as gemini-3.1-flash-live-preview, which reject the
105
+ # turn-based clientContent payload.
106
+ def realtime_text(text)
107
+ { realtimeInput: { text: text.to_s } }
108
+ end
109
+
110
+ # Build activity start message (for manual VAD)
111
+ def activity_start
112
+ {
113
+ realtimeInput: {
114
+ activityStart: {}
115
+ }
116
+ }
117
+ end
118
+
119
+ # Build activity end message (for manual VAD)
120
+ def activity_end
121
+ {
122
+ realtimeInput: {
123
+ activityEnd: {}
124
+ }
125
+ }
126
+ end
127
+
128
+ # Build tool response message.
129
+ #
130
+ # Each function response hash supports:
131
+ # :id - The function call id from the server
132
+ # :name - The function name
133
+ # :response - The function result (Hash or scalar). When using
134
+ # NON_BLOCKING (async) function calls, include
135
+ # `scheduling: "INTERRUPT" | "WHEN_IDLE" | "SILENT"`
136
+ # inside the response hash.
137
+ # :scheduling - (optional) Top-level shortcut. When provided,
138
+ # it is merged into the response hash as
139
+ # `response[:scheduling]`. Accepts Symbol or String.
140
+ #
141
+ # Raises ArgumentError if scheduling is not one of the valid values.
142
+ def tool_response(function_responses)
143
+ {
144
+ toolResponse: {
145
+ functionResponses: function_responses.map { |resp| build_function_response(resp) }
146
+ }
147
+ }
148
+ end
149
+
150
+ private
151
+
152
+ def build_function_response(resp)
153
+ response_payload =
154
+ case resp[:response]
155
+ when Hash then resp[:response].dup
156
+ when nil then {}
157
+ else { result: resp[:response] }
158
+ end
159
+
160
+ if (top_level_scheduling = resp[:scheduling])
161
+ response_payload[:scheduling] = normalize_scheduling(top_level_scheduling)
162
+ elsif (sched = response_payload[:scheduling] || response_payload["scheduling"])
163
+ normalized = normalize_scheduling(sched)
164
+ response_payload.delete("scheduling")
165
+ response_payload[:scheduling] = normalized
166
+ end
167
+
168
+ { id: resp[:id], name: resp[:name], response: response_payload }
169
+ end
170
+
171
+ def normalize_scheduling(value)
172
+ value_str = value.to_s.upcase
173
+ unless VALID_SCHEDULING.include?(value_str)
174
+ raise ArgumentError,
175
+ "scheduling must be one of: #{VALID_SCHEDULING.join(', ')} (got #{value.inspect})"
176
+ end
177
+ value_str
178
+ end
179
+
180
+
181
+ def normalize_model_name(model)
182
+ model.start_with?("models/") ? model : "models/#{model}"
183
+ end
184
+
185
+ def build_generation_config(config)
186
+ generation_config = {}
187
+
188
+ # Response modality
189
+ generation_config[:responseModalities] = [config.response_modality]
190
+
191
+ # Speech/Voice configuration for AUDIO modality
192
+ if config.response_modality == "AUDIO" && config.voice_name
193
+ generation_config[:speechConfig] = {
194
+ voiceConfig: {
195
+ prebuiltVoiceConfig: {
196
+ voiceName: config.voice_name
197
+ }
198
+ }
199
+ }
200
+ end
201
+
202
+ # Media resolution
203
+ if config.media_resolution
204
+ generation_config[:mediaResolution] = config.media_resolution
205
+ end
206
+
207
+ # Output audio transcription
208
+ if config.output_audio_transcription
209
+ generation_config[:outputAudioTranscription] = {}
210
+ end
211
+
212
+ generation_config
213
+ end
214
+ end
215
+ end
216
+ end
217
+ end