prompt_builder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +24 -0
- data/MIT-LICENSE +20 -0
- data/README.md +763 -0
- data/VERSION +1 -0
- data/lib/prompt_builder/content/base.rb +44 -0
- data/lib/prompt_builder/content/input_file.rb +63 -0
- data/lib/prompt_builder/content/input_image.rb +64 -0
- data/lib/prompt_builder/content/input_text.rb +42 -0
- data/lib/prompt_builder/content/input_video.rb +43 -0
- data/lib/prompt_builder/content/output_text.rb +59 -0
- data/lib/prompt_builder/content/reasoning_text.rb +42 -0
- data/lib/prompt_builder/content/refusal_content.rb +42 -0
- data/lib/prompt_builder/content/summary_text.rb +42 -0
- data/lib/prompt_builder/content/text.rb +42 -0
- data/lib/prompt_builder/content.rb +28 -0
- data/lib/prompt_builder/errors.rb +18 -0
- data/lib/prompt_builder/items/base.rb +41 -0
- data/lib/prompt_builder/items/compaction.rb +60 -0
- data/lib/prompt_builder/items/function_call.rb +97 -0
- data/lib/prompt_builder/items/function_call_output.rb +110 -0
- data/lib/prompt_builder/items/item_reference.rb +42 -0
- data/lib/prompt_builder/items/message.rb +113 -0
- data/lib/prompt_builder/items/reasoning.rb +75 -0
- data/lib/prompt_builder/items.rb +13 -0
- data/lib/prompt_builder/response.rb +257 -0
- data/lib/prompt_builder/serializers/base.rb +37 -0
- data/lib/prompt_builder/serializers/chat_completion/request.rb +389 -0
- data/lib/prompt_builder/serializers/chat_completion/response.rb +139 -0
- data/lib/prompt_builder/serializers/chat_completion.rb +30 -0
- data/lib/prompt_builder/serializers/converse/request.rb +623 -0
- data/lib/prompt_builder/serializers/converse/response.rb +140 -0
- data/lib/prompt_builder/serializers/converse.rb +30 -0
- data/lib/prompt_builder/serializers/gemini/request.rb +562 -0
- data/lib/prompt_builder/serializers/gemini/response.rb +233 -0
- data/lib/prompt_builder/serializers/gemini.rb +30 -0
- data/lib/prompt_builder/serializers/messages/request.rb +634 -0
- data/lib/prompt_builder/serializers/messages/response.rb +157 -0
- data/lib/prompt_builder/serializers/messages.rb +30 -0
- data/lib/prompt_builder/serializers/open_responses/request.rb +229 -0
- data/lib/prompt_builder/serializers/open_responses/response.rb +18 -0
- data/lib/prompt_builder/serializers/open_responses.rb +30 -0
- data/lib/prompt_builder/serializers.rb +35 -0
- data/lib/prompt_builder/session.rb +383 -0
- data/lib/prompt_builder/tool_registry.rb +75 -0
- data/lib/prompt_builder/tools/definition.rb +66 -0
- data/lib/prompt_builder/tools.rb +7 -0
- data/lib/prompt_builder/usage.rb +100 -0
- data/lib/prompt_builder.rb +86 -0
- data/prompt_builder.gemspec +41 -0
- metadata +107 -0
|
@@ -0,0 +1,562 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module PromptBuilder
|
|
6
|
+
module Serializers
|
|
7
|
+
class Gemini < Base
|
|
8
|
+
# Request serializer for the Google Gemini API format.
|
|
9
|
+
#
|
|
10
|
+
# === Unsupported Open Responses features
|
|
11
|
+
#
|
|
12
|
+
# These session fields are not supported and are silently omitted from the
|
|
13
|
+
# serialized output:
|
|
14
|
+
# - +background+ — Gemini has no background/async mode on the generate endpoint
|
|
15
|
+
# - +include+ — response-field inclusion is an Open Responses-only concept
|
|
16
|
+
# - +max_tool_calls+ — per-request tool-call caps are not supported
|
|
17
|
+
# - +metadata+ — arbitrary metadata is not supported
|
|
18
|
+
# - +parallel_tool_calls+ — parallel tool call control is not supported
|
|
19
|
+
# - +prompt_cache_key+ / +prompt_cache_retention+ — explicit prompt cache keys are not supported
|
|
20
|
+
# - +safety_identifier+ — no equivalent user-safety field on the generate endpoint
|
|
21
|
+
# - +stream_options+ — stream event options are not supported
|
|
22
|
+
# - +truncation+ — server-side context truncation is not supported
|
|
23
|
+
#
|
|
24
|
+
# Input content restrictions:
|
|
25
|
+
# - +InputImage+ content is only supported in user messages (assistant images are omitted)
|
|
26
|
+
# - +InputImage+ with +image_url+ requires either base64 +data+ or a URL;
|
|
27
|
+
# content without +image_url+ or +file_id+ raises
|
|
28
|
+
# - +InputFile+ content is only supported in user messages (assistant files are omitted)
|
|
29
|
+
# - +InputFile+ requires +media_type+ when +file_data+ is provided, or a
|
|
30
|
+
# recognized extension on +filename+ / +file_url+
|
|
31
|
+
# - +InputVideo+ requires +video_url+
|
|
32
|
+
# - +RefusalContent+ is dropped silently (a parsed Chat Completions
|
|
33
|
+
# refusal can stay in session history without breaking subsequent
|
|
34
|
+
# request_payload calls)
|
|
35
|
+
# - +redacted_thinking+ and unknown reasoning blocks are silently skipped
|
|
36
|
+
# - +Reasoning+ items with +summary+ blocks have the summary skipped
|
|
37
|
+
# - +FunctionCallOutput+ array contents must be text-only; other content is omitted
|
|
38
|
+
# - +Compaction+ and +ItemReference+ items are silently skipped
|
|
39
|
+
#
|
|
40
|
+
# === Features in Gemini not available through Open Responses
|
|
41
|
+
#
|
|
42
|
+
# The following Gemini parameters cannot be set through the Open Responses
|
|
43
|
+
# canonical format:
|
|
44
|
+
# - +thinkingConfig.thinkingBudget+ — use +reasoning.budget_tokens+ instead
|
|
45
|
+
# - +thinkingConfig.thinkingLevel+ — use +reasoning.effort+ instead
|
|
46
|
+
# - +thinkingConfig.includeThoughts+ — use +reasoning.summary = "auto"+ instead
|
|
47
|
+
# - +topK+ — top-K sampling parameter
|
|
48
|
+
# - +seed+ — for reproducible outputs (model-dependent)
|
|
49
|
+
# - +stopSequences+ — custom stop sequences
|
|
50
|
+
# - +candidateCount+ — requesting multiple response candidates
|
|
51
|
+
# - +responseModalities+ — selecting TEXT/IMAGE/AUDIO output channels
|
|
52
|
+
# - +responseJsonSchema+ — newer JSON Schema variant of +responseSchema+
|
|
53
|
+
# - +safetySettings+ — configurable harm-category safety thresholds
|
|
54
|
+
# - +mediaResolution+ — controls token cost of image/video inputs
|
|
55
|
+
# - +audioTimestamp+, +speechConfig+ — audio output configuration
|
|
56
|
+
# - +enableEnhancedCivicAnswers+
|
|
57
|
+
# - +routingConfig+, +modelSelectionConfig+
|
|
58
|
+
# - Video metadata controls (+videoMetadata+ offset, FPS) on +Part+s
|
|
59
|
+
# - Audio input model capability controls (audio can be sent as
|
|
60
|
+
# +InputFile+ with an audio MIME type)
|
|
61
|
+
# - Built-in Gemini tools: +googleSearch+ / +googleSearchRetrieval+,
|
|
62
|
+
# +codeExecution+, +urlContext+, +computerUse+, +fileSearch+,
|
|
63
|
+
# +googleMaps+, +mcpServers+
|
|
64
|
+
# - +functionCallingConfig.mode = VALIDATED+
|
|
65
|
+
# - +toolConfig.retrievalConfig+, +includeServerSideToolInvocations+
|
|
66
|
+
# - +cachedContent+ — referencing a CachedContent resource by name
|
|
67
|
+
# - Top-level +labels+ (Vertex flavor only)
|
|
68
|
+
class Request < Base
|
|
69
|
+
SUPPORTED_REASONING_EFFORTS = %w[minimal low medium high].freeze
|
|
70
|
+
SUPPORTED_REASONING_SUMMARIES = %w[auto].freeze
|
|
71
|
+
SUPPORTED_SERVICE_TIERS = %w[unspecified standard flex priority].freeze
|
|
72
|
+
|
|
73
|
+
FILE_EXTENSION_MIME_TYPES = {
|
|
74
|
+
"pdf" => "application/pdf",
|
|
75
|
+
"txt" => "text/plain",
|
|
76
|
+
"md" => "text/markdown",
|
|
77
|
+
"markdown" => "text/markdown",
|
|
78
|
+
"html" => "text/html",
|
|
79
|
+
"htm" => "text/html",
|
|
80
|
+
"csv" => "text/csv",
|
|
81
|
+
"json" => "application/json",
|
|
82
|
+
"xml" => "application/xml",
|
|
83
|
+
"rtf" => "application/rtf",
|
|
84
|
+
"png" => "image/png",
|
|
85
|
+
"jpg" => "image/jpeg",
|
|
86
|
+
"jpeg" => "image/jpeg",
|
|
87
|
+
"webp" => "image/webp",
|
|
88
|
+
"heic" => "image/heic",
|
|
89
|
+
"heif" => "image/heif",
|
|
90
|
+
"mp3" => "audio/mpeg",
|
|
91
|
+
"wav" => "audio/wav",
|
|
92
|
+
"aiff" => "audio/aiff",
|
|
93
|
+
"aac" => "audio/aac",
|
|
94
|
+
"ogg" => "audio/ogg",
|
|
95
|
+
"flac" => "audio/flac",
|
|
96
|
+
"mp4" => "video/mp4",
|
|
97
|
+
"mov" => "video/quicktime",
|
|
98
|
+
"webm" => "video/webm",
|
|
99
|
+
"mpeg" => "video/mpeg",
|
|
100
|
+
"mpg" => "video/mpeg"
|
|
101
|
+
}.freeze
|
|
102
|
+
private_constant :FILE_EXTENSION_MIME_TYPES
|
|
103
|
+
|
|
104
|
+
class << self
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
def serialize_request(session)
|
|
108
|
+
h = {}
|
|
109
|
+
raise UnsupportedFormatError, "Gemini format requires session.model" unless session.model
|
|
110
|
+
|
|
111
|
+
h["store"] = session.store unless session.store.nil?
|
|
112
|
+
service_tier = serialize_service_tier(session.service_tier) if session.service_tier
|
|
113
|
+
h["serviceTier"] = service_tier if service_tier
|
|
114
|
+
|
|
115
|
+
system_instruction = build_system_instruction(session)
|
|
116
|
+
h["systemInstruction"] = system_instruction if system_instruction
|
|
117
|
+
|
|
118
|
+
h["contents"] = build_contents(session)
|
|
119
|
+
|
|
120
|
+
generation_config = build_generation_config(session)
|
|
121
|
+
h["generationConfig"] = generation_config unless generation_config.empty?
|
|
122
|
+
|
|
123
|
+
tools = build_tools(session)
|
|
124
|
+
h["tools"] = tools unless tools.empty?
|
|
125
|
+
|
|
126
|
+
tool_config = build_tool_config(session.tool_choice, tools: tools)
|
|
127
|
+
h["toolConfig"] = tool_config if tool_config
|
|
128
|
+
|
|
129
|
+
# Session extra: recognized keys for Gemini API
|
|
130
|
+
apply_session_extra!(h, session.extra) if session.extra
|
|
131
|
+
|
|
132
|
+
# Gemini selects streaming via endpoint (:streamGenerateContent)
|
|
133
|
+
# rather than a request body field, so session.stream is a no-op
|
|
134
|
+
# at the payload level.
|
|
135
|
+
|
|
136
|
+
h
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def apply_session_extra!(h, extra)
|
|
140
|
+
h["safetySettings"] = extra["safety_settings"] if extra.key?("safety_settings")
|
|
141
|
+
h["cachedContent"] = extra["cached_content"] if extra.key?("cached_content")
|
|
142
|
+
# Generation config extras
|
|
143
|
+
generation_config = h["generationConfig"] ||= {}
|
|
144
|
+
generation_config["stopSequences"] = extra["stop_sequences"] if extra.key?("stop_sequences")
|
|
145
|
+
generation_config["topK"] = extra["top_k"] if extra.key?("top_k")
|
|
146
|
+
generation_config["seed"] = extra["seed"] if extra.key?("seed")
|
|
147
|
+
generation_config["candidateCount"] = extra["candidate_count"] if extra.key?("candidate_count")
|
|
148
|
+
generation_config["responseModalities"] = extra["response_modalities"] if extra.key?("response_modalities")
|
|
149
|
+
generation_config["mediaResolution"] = extra["media_resolution"] if extra.key?("media_resolution")
|
|
150
|
+
h.delete("generationConfig") if generation_config.empty?
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def build_system_instruction(session)
|
|
154
|
+
parts = []
|
|
155
|
+
|
|
156
|
+
if session.instructions
|
|
157
|
+
parts << {"text" => session.instructions}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
session.items.each do |item|
|
|
161
|
+
next unless item.is_a?(Items::Message)
|
|
162
|
+
next unless item.role == "system" || item.role == "developer"
|
|
163
|
+
|
|
164
|
+
item.content.each do |content|
|
|
165
|
+
parts << {"text" => content.text} if content.is_a?(Content::InputText)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
return nil if parts.empty?
|
|
170
|
+
|
|
171
|
+
{"parts" => parts}
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def build_contents(session)
|
|
175
|
+
raw_contents = []
|
|
176
|
+
|
|
177
|
+
# Map call_id -> function name so we can resolve a FunctionCallOutput's
|
|
178
|
+
# name field (Gemini's functionResponse requires the function name, not
|
|
179
|
+
# the call id) without a quadratic scan per output.
|
|
180
|
+
call_id_to_name = {}
|
|
181
|
+
session.items.each do |item|
|
|
182
|
+
call_id_to_name[item.call_id] = item.name if item.is_a?(Items::FunctionCall)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
session.items.each do |item|
|
|
186
|
+
case item
|
|
187
|
+
when Items::Message
|
|
188
|
+
next if item.role == "system" || item.role == "developer"
|
|
189
|
+
|
|
190
|
+
role = (item.role == "assistant") ? "model" : "user"
|
|
191
|
+
# RefusalContent is dropped silently; it can appear in history
|
|
192
|
+
# via a parsed Chat Completions response but cannot be sent.
|
|
193
|
+
visible_content = item.content.reject { |c| c.is_a?(Content::RefusalContent) }
|
|
194
|
+
next if visible_content.empty?
|
|
195
|
+
parts = visible_content.filter_map { |content| serialize_content(content, role: role) }
|
|
196
|
+
next if parts.empty?
|
|
197
|
+
raw_contents << {"role" => role, "parts" => parts}
|
|
198
|
+
when Items::FunctionCall
|
|
199
|
+
function_call = {
|
|
200
|
+
"name" => item.name,
|
|
201
|
+
"args" => parse_function_call_args(item)
|
|
202
|
+
}
|
|
203
|
+
function_call["id"] = item.call_id if item.call_id
|
|
204
|
+
part = {"functionCall" => function_call}
|
|
205
|
+
thought_sig = item.extra && item.extra["thought_signature"]
|
|
206
|
+
part["thoughtSignature"] = thought_sig if thought_sig
|
|
207
|
+
parts = [part]
|
|
208
|
+
raw_contents << {"role" => "model", "parts" => parts}
|
|
209
|
+
when Items::FunctionCallOutput
|
|
210
|
+
function_name = call_id_to_name[item.call_id]
|
|
211
|
+
unless function_name
|
|
212
|
+
raise UnsupportedFormatError,
|
|
213
|
+
"Gemini format requires a matching FunctionCall for FunctionCallOutput #{item.call_id.inspect}; " \
|
|
214
|
+
"Gemini's functionResponse.name must reference a previously-declared tool name"
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
function_response = {
|
|
218
|
+
"name" => function_name,
|
|
219
|
+
"response" => serialize_function_output(item.output)
|
|
220
|
+
}
|
|
221
|
+
function_response["id"] = item.call_id if item.call_id
|
|
222
|
+
parts = [{"functionResponse" => function_response}]
|
|
223
|
+
raw_contents << {"role" => "user", "parts" => parts}
|
|
224
|
+
when Items::Reasoning
|
|
225
|
+
# Reasoning items with summary blocks come from the Responses API
|
|
226
|
+
# and cannot be replayed in Gemini's thinking format; skip them.
|
|
227
|
+
next unless item.summary.empty?
|
|
228
|
+
|
|
229
|
+
thought_parts = item.content.filter_map { |block| serialize_thinking_block(block) }
|
|
230
|
+
raw_contents << {"role" => "model", "parts" => thought_parts} unless thought_parts.empty?
|
|
231
|
+
when Items::Compaction, Items::ItemReference
|
|
232
|
+
# Compaction and ItemReference items are not supported; skip them.
|
|
233
|
+
next
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
merge_consecutive_contents(raw_contents)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def serialize_thinking_block(block)
|
|
241
|
+
case block["type"]
|
|
242
|
+
when "thinking"
|
|
243
|
+
part = {"thought" => true, "text" => block.fetch("thinking", "")}
|
|
244
|
+
part["thoughtSignature"] = block["signature"] if block["signature"]
|
|
245
|
+
part
|
|
246
|
+
when "redacted_thinking"
|
|
247
|
+
# redacted_thinking blocks come from Claude/Converse responses and
|
|
248
|
+
# cannot be replayed in Gemini format; skip them.
|
|
249
|
+
nil
|
|
250
|
+
else
|
|
251
|
+
# Unknown reasoning block types cannot be meaningfully replayed;
|
|
252
|
+
# skip them rather than raising.
|
|
253
|
+
nil
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Gemini's functionResponse.response is a structured Struct (object).
|
|
258
|
+
# When the tool produced a JSON object, return it directly so callers
|
|
259
|
+
# can roundtrip structured output without lossy stringification; for
|
|
260
|
+
# plain strings or arrays of text, wrap in {"result" => ...} so the
|
|
261
|
+
# response is still a valid object.
|
|
262
|
+
def serialize_function_output(output)
|
|
263
|
+
if output.is_a?(Array)
|
|
264
|
+
# Only text content is supported in tool output; other content
|
|
265
|
+
# types are silently omitted.
|
|
266
|
+
text = output.filter_map do |content|
|
|
267
|
+
case content
|
|
268
|
+
when Content::InputText, Content::OutputText
|
|
269
|
+
content.text
|
|
270
|
+
end
|
|
271
|
+
end.join("\n")
|
|
272
|
+
{"result" => text}
|
|
273
|
+
else
|
|
274
|
+
raw = output || ""
|
|
275
|
+
if raw.is_a?(String) && !raw.empty?
|
|
276
|
+
begin
|
|
277
|
+
parsed = JSON.parse(raw)
|
|
278
|
+
return parsed if parsed.is_a?(Hash)
|
|
279
|
+
rescue JSON::ParserError
|
|
280
|
+
# fall through to wrapping
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
{"result" => raw}
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def parse_function_call_args(item)
|
|
288
|
+
parsed = item.parsed_arguments
|
|
289
|
+
unless parsed.is_a?(Hash)
|
|
290
|
+
raise UnsupportedFormatError,
|
|
291
|
+
"Gemini format requires FunctionCall arguments to be a JSON object"
|
|
292
|
+
end
|
|
293
|
+
parsed
|
|
294
|
+
rescue PromptBuilder::InvalidItemError => e
|
|
295
|
+
raise UnsupportedFormatError,
|
|
296
|
+
"Gemini format could not parse FunctionCall arguments: #{e.message}"
|
|
297
|
+
end
|
|
298
|
+
|
|
299
|
+
def serialize_content(content, role:)
|
|
300
|
+
case content
|
|
301
|
+
when Content::InputText, Content::OutputText
|
|
302
|
+
part = {"text" => content.text}
|
|
303
|
+
if content.extra && content.extra["thought_signature"]
|
|
304
|
+
part["thoughtSignature"] = content.extra["thought_signature"]
|
|
305
|
+
end
|
|
306
|
+
part
|
|
307
|
+
when Content::InputImage
|
|
308
|
+
# Assistant image content is not supported; omit it.
|
|
309
|
+
return nil if role == "model"
|
|
310
|
+
|
|
311
|
+
serialize_image(content)
|
|
312
|
+
when Content::InputFile
|
|
313
|
+
# Assistant file content is not supported; omit it.
|
|
314
|
+
return nil if role == "model"
|
|
315
|
+
|
|
316
|
+
serialize_file(content)
|
|
317
|
+
when Content::InputVideo
|
|
318
|
+
# Assistant video content is not supported; omit it.
|
|
319
|
+
return nil if role == "model"
|
|
320
|
+
|
|
321
|
+
unless content.url
|
|
322
|
+
raise UnsupportedFormatError,
|
|
323
|
+
"Gemini format requires InputVideo.url"
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
mime = video_mime_type(content.url)
|
|
327
|
+
{"fileData" => {"mimeType" => mime, "fileUri" => content.url}}
|
|
328
|
+
when Content::RefusalContent
|
|
329
|
+
# Filtered out before reaching here; defensive no-op.
|
|
330
|
+
nil
|
|
331
|
+
else
|
|
332
|
+
# Unsupported content types are silently omitted.
|
|
333
|
+
nil
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def serialize_image(content)
|
|
338
|
+
file_id = content.extra && content.extra["file_id"]
|
|
339
|
+
media_type = content.extra && content.extra["media_type"]
|
|
340
|
+
|
|
341
|
+
if file_id
|
|
342
|
+
return {"fileData" => {"mimeType" => media_type || "image/jpeg", "fileUri" => file_id}}
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
if content.url
|
|
346
|
+
parsed = PromptBuilder.parse_data_url(content.url)
|
|
347
|
+
if parsed
|
|
348
|
+
return {"inlineData" => {"mimeType" => parsed[0], "data" => parsed[1]}}
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
return {"fileData" => {"mimeType" => media_type || "image/jpeg", "fileUri" => content.url}}
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
raise UnsupportedFormatError,
|
|
355
|
+
"Gemini format requires InputImage.url or a file_id in extra"
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
def serialize_file(content)
|
|
359
|
+
file_id = content.extra && content.extra["file_id"]
|
|
360
|
+
media_type = content.extra && content.extra["media_type"]
|
|
361
|
+
|
|
362
|
+
if file_id
|
|
363
|
+
mime = media_type
|
|
364
|
+
unless mime
|
|
365
|
+
raise UnsupportedFormatError,
|
|
366
|
+
"Gemini format requires media_type in extra when using file_id in extra"
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
return {"fileData" => {"mimeType" => mime, "fileUri" => file_id}}
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
if content.url
|
|
373
|
+
parsed = PromptBuilder.parse_data_url(content.url)
|
|
374
|
+
if parsed
|
|
375
|
+
mime = media_type || file_mime_type(content)
|
|
376
|
+
if mime.nil? && parsed[0] == "application/octet-stream"
|
|
377
|
+
raise UnsupportedFormatError,
|
|
378
|
+
"Gemini format requires media_type in extra or a recognized filename extension for inline data"
|
|
379
|
+
end
|
|
380
|
+
mime ||= parsed[0]
|
|
381
|
+
return {"inlineData" => {"mimeType" => mime, "data" => parsed[1]}}
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
mime = media_type || file_mime_type(content)
|
|
385
|
+
unless mime
|
|
386
|
+
raise UnsupportedFormatError,
|
|
387
|
+
"Gemini format requires media_type in extra or a recognized filename extension for file URLs"
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
return {"fileData" => {"mimeType" => mime, "fileUri" => content.url}}
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
raise UnsupportedFormatError,
|
|
394
|
+
"Gemini format requires InputFile.url or file_id in extra"
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def file_mime_type(content)
|
|
398
|
+
[content.filename, content.url].each do |path|
|
|
399
|
+
next unless path
|
|
400
|
+
|
|
401
|
+
ext = File.extname(path).delete_prefix(".").downcase
|
|
402
|
+
mime = FILE_EXTENSION_MIME_TYPES[ext]
|
|
403
|
+
return mime if mime
|
|
404
|
+
end
|
|
405
|
+
nil
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
def video_mime_type(video_url)
|
|
409
|
+
ext = File.extname(video_url).delete_prefix(".").downcase
|
|
410
|
+
case ext
|
|
411
|
+
when "mp4" then "video/mp4"
|
|
412
|
+
when "mov" then "video/quicktime"
|
|
413
|
+
when "webm" then "video/webm"
|
|
414
|
+
when "mkv" then "video/x-matroska"
|
|
415
|
+
when "mpeg", "mpg" then "video/mpeg"
|
|
416
|
+
when "flv" then "video/x-flv"
|
|
417
|
+
when "wmv" then "video/x-ms-wmv"
|
|
418
|
+
when "3gp" then "video/3gpp"
|
|
419
|
+
else "video/mp4"
|
|
420
|
+
end
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def merge_consecutive_contents(contents)
|
|
424
|
+
return contents if contents.empty?
|
|
425
|
+
|
|
426
|
+
merged = [contents.first]
|
|
427
|
+
|
|
428
|
+
contents[1..].each do |content|
|
|
429
|
+
if merged.last["role"] == content["role"]
|
|
430
|
+
merged.last["parts"].concat(content["parts"])
|
|
431
|
+
else
|
|
432
|
+
merged << content
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
|
|
436
|
+
merged
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
def build_generation_config(session)
|
|
440
|
+
config = {}
|
|
441
|
+
|
|
442
|
+
config["temperature"] = session.temperature if session.temperature
|
|
443
|
+
config["topP"] = session.top_p if session.top_p
|
|
444
|
+
config["maxOutputTokens"] = session.max_output_tokens if session.max_output_tokens
|
|
445
|
+
config["presencePenalty"] = session.presence_penalty if session.presence_penalty
|
|
446
|
+
config["frequencyPenalty"] = session.frequency_penalty if session.frequency_penalty
|
|
447
|
+
|
|
448
|
+
if session.top_logprobs
|
|
449
|
+
config["responseLogprobs"] = true
|
|
450
|
+
config["logprobs"] = session.top_logprobs
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
if session.text
|
|
454
|
+
# Unsupported text.* keys are silently omitted; only format is mapped.
|
|
455
|
+
format = session.text["format"]
|
|
456
|
+
if format.is_a?(Hash)
|
|
457
|
+
case format["type"]
|
|
458
|
+
when "text"
|
|
459
|
+
config["responseMimeType"] = "text/plain"
|
|
460
|
+
when "json_object"
|
|
461
|
+
config["responseMimeType"] = "application/json"
|
|
462
|
+
when "json_schema"
|
|
463
|
+
config["responseMimeType"] = "application/json"
|
|
464
|
+
schema = format.dig("json_schema", "schema") || format["schema"]
|
|
465
|
+
config["responseSchema"] = schema if schema
|
|
466
|
+
# Unsupported format types are silently omitted.
|
|
467
|
+
end
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
if session.reasoning
|
|
472
|
+
# Unsupported reasoning.* keys are silently omitted.
|
|
473
|
+
thinking_config = {}
|
|
474
|
+
|
|
475
|
+
if session.reasoning["budget_tokens"]
|
|
476
|
+
thinking_config["thinkingBudget"] = session.reasoning["budget_tokens"]
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
effort = session.reasoning["effort"]
|
|
480
|
+
# Unsupported effort levels are silently omitted.
|
|
481
|
+
if effort && SUPPORTED_REASONING_EFFORTS.include?(effort)
|
|
482
|
+
thinking_config["thinkingLevel"] = effort.upcase
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
summary = session.reasoning["summary"]
|
|
486
|
+
# Unsupported summary values are silently omitted.
|
|
487
|
+
if summary && SUPPORTED_REASONING_SUMMARIES.include?(summary)
|
|
488
|
+
thinking_config["includeThoughts"] = true
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
config["thinkingConfig"] = thinking_config unless thinking_config.empty?
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
config
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Unsupported service_tier values are silently omitted.
|
|
498
|
+
def serialize_service_tier(service_tier)
|
|
499
|
+
service_tier if SUPPORTED_SERVICE_TIERS.include?(service_tier)
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
def build_tools(session)
|
|
503
|
+
return [] if session.tool_definitions.empty?
|
|
504
|
+
|
|
505
|
+
# Strict tool definitions are not supported; the strict flag is
|
|
506
|
+
# silently omitted (Gemini tools have no strict field).
|
|
507
|
+
[
|
|
508
|
+
{
|
|
509
|
+
"functionDeclarations" => session.tool_definitions.map do |definition|
|
|
510
|
+
tool = {"name" => definition.name}
|
|
511
|
+
tool["description"] = definition.description if definition.description
|
|
512
|
+
tool["parameters"] = definition.parameters || {"type" => "object", "properties" => {}}
|
|
513
|
+
tool
|
|
514
|
+
end
|
|
515
|
+
}
|
|
516
|
+
]
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
def build_tool_config(tool_choice, tools:)
|
|
520
|
+
return nil if tool_choice.nil?
|
|
521
|
+
|
|
522
|
+
# tool_choice cannot be expressed without tools (except "none", which
|
|
523
|
+
# has no effect); omit it.
|
|
524
|
+
return nil if tools.empty? && tool_choice != "none"
|
|
525
|
+
|
|
526
|
+
config = {}
|
|
527
|
+
|
|
528
|
+
case tool_choice
|
|
529
|
+
when "auto"
|
|
530
|
+
config["functionCallingConfig"] = {"mode" => "AUTO"}
|
|
531
|
+
when "none"
|
|
532
|
+
config["functionCallingConfig"] = {"mode" => "NONE"}
|
|
533
|
+
when "required"
|
|
534
|
+
config["functionCallingConfig"] = {"mode" => "ANY"}
|
|
535
|
+
when Hash
|
|
536
|
+
if tool_choice["type"] == "function"
|
|
537
|
+
name = tool_choice["name"] || tool_choice.dig("function", "name")
|
|
538
|
+
unless name
|
|
539
|
+
raise UnsupportedFormatError,
|
|
540
|
+
"Gemini format requires tool_choice.name for function tool choices"
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
config["functionCallingConfig"] = {
|
|
544
|
+
"mode" => "ANY",
|
|
545
|
+
"allowedFunctionNames" => [name]
|
|
546
|
+
}
|
|
547
|
+
else
|
|
548
|
+
# Unsupported tool_choice values are silently omitted.
|
|
549
|
+
return nil
|
|
550
|
+
end
|
|
551
|
+
else
|
|
552
|
+
# Unsupported tool_choice values are silently omitted.
|
|
553
|
+
return nil
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
config
|
|
557
|
+
end
|
|
558
|
+
end
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
end
|
|
562
|
+
end
|