@spinabot/brigade 1.9.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -10
- package/dist/agents/agent-loop.d.ts +55 -0
- package/dist/agents/agent-loop.d.ts.map +1 -1
- package/dist/agents/agent-loop.js +90 -1
- package/dist/agents/agent-loop.js.map +1 -1
- package/dist/agents/channels/inbound-pipeline.d.ts +22 -0
- package/dist/agents/channels/inbound-pipeline.d.ts.map +1 -1
- package/dist/agents/channels/inbound-pipeline.js +31 -1
- package/dist/agents/channels/inbound-pipeline.js.map +1 -1
- package/dist/agents/channels/media-capture.d.ts +69 -6
- package/dist/agents/channels/media-capture.d.ts.map +1 -1
- package/dist/agents/channels/media-capture.js +125 -8
- package/dist/agents/channels/media-capture.js.map +1 -1
- package/dist/agents/channels/telegram/media.d.ts.map +1 -1
- package/dist/agents/channels/telegram/media.js +16 -4
- package/dist/agents/channels/telegram/media.js.map +1 -1
- package/dist/agents/channels/whatsapp/media.d.ts +19 -0
- package/dist/agents/channels/whatsapp/media.d.ts.map +1 -1
- package/dist/agents/channels/whatsapp/media.js +37 -2
- package/dist/agents/channels/whatsapp/media.js.map +1 -1
- package/dist/agents/media-understanding/anthropic-adapter.d.ts +49 -0
- package/dist/agents/media-understanding/anthropic-adapter.d.ts.map +1 -0
- package/dist/agents/media-understanding/anthropic-adapter.js +162 -0
- package/dist/agents/media-understanding/anthropic-adapter.js.map +1 -0
- package/dist/agents/media-understanding/config.d.ts +57 -0
- package/dist/agents/media-understanding/config.d.ts.map +1 -0
- package/dist/agents/media-understanding/config.js +289 -0
- package/dist/agents/media-understanding/config.js.map +1 -0
- package/dist/agents/media-understanding/gemini-adapter.d.ts +57 -0
- package/dist/agents/media-understanding/gemini-adapter.d.ts.map +1 -0
- package/dist/agents/media-understanding/gemini-adapter.js +343 -0
- package/dist/agents/media-understanding/gemini-adapter.js.map +1 -0
- package/dist/agents/media-understanding/index.d.ts +58 -0
- package/dist/agents/media-understanding/index.d.ts.map +1 -0
- package/dist/agents/media-understanding/index.js +275 -0
- package/dist/agents/media-understanding/index.js.map +1 -0
- package/dist/agents/media-understanding/pi-adapter.d.ts +72 -0
- package/dist/agents/media-understanding/pi-adapter.d.ts.map +1 -0
- package/dist/agents/media-understanding/pi-adapter.js +160 -0
- package/dist/agents/media-understanding/pi-adapter.js.map +1 -0
- package/dist/agents/media-understanding/types.d.ts +189 -0
- package/dist/agents/media-understanding/types.d.ts.map +1 -0
- package/dist/agents/media-understanding/types.js +51 -0
- package/dist/agents/media-understanding/types.js.map +1 -0
- package/dist/agents/session-wiring.d.ts +11 -0
- package/dist/agents/session-wiring.d.ts.map +1 -1
- package/dist/agents/session-wiring.js +1 -0
- package/dist/agents/session-wiring.js.map +1 -1
- package/dist/agents/tools/analyze-media-tool.d.ts +263 -0
- package/dist/agents/tools/analyze-media-tool.d.ts.map +1 -0
- package/dist/agents/tools/analyze-media-tool.js +2321 -0
- package/dist/agents/tools/analyze-media-tool.js.map +1 -0
- package/dist/agents/tools/doc-shared.d.ts +187 -0
- package/dist/agents/tools/doc-shared.d.ts.map +1 -0
- package/dist/agents/tools/doc-shared.js +484 -0
- package/dist/agents/tools/doc-shared.js.map +1 -0
- package/dist/agents/tools/edit-document-tool.d.ts +133 -0
- package/dist/agents/tools/edit-document-tool.d.ts.map +1 -0
- package/dist/agents/tools/edit-document-tool.js +815 -0
- package/dist/agents/tools/edit-document-tool.js.map +1 -0
- package/dist/agents/tools/image-downscale.d.ts +93 -0
- package/dist/agents/tools/image-downscale.d.ts.map +1 -0
- package/dist/agents/tools/image-downscale.js +257 -0
- package/dist/agents/tools/image-downscale.js.map +1 -0
- package/dist/agents/tools/make-document-tool.d.ts +114 -0
- package/dist/agents/tools/make-document-tool.d.ts.map +1 -0
- package/dist/agents/tools/make-document-tool.js +542 -0
- package/dist/agents/tools/make-document-tool.js.map +1 -0
- package/dist/agents/tools/media-cache.d.ts +56 -0
- package/dist/agents/tools/media-cache.d.ts.map +1 -0
- package/dist/agents/tools/media-cache.js +133 -0
- package/dist/agents/tools/media-cache.js.map +1 -0
- package/dist/agents/tools/ooxml-images.d.ts +107 -0
- package/dist/agents/tools/ooxml-images.d.ts.map +1 -0
- package/dist/agents/tools/ooxml-images.js +308 -0
- package/dist/agents/tools/ooxml-images.js.map +1 -0
- package/dist/agents/tools/registry.d.ts +12 -0
- package/dist/agents/tools/registry.d.ts.map +1 -1
- package/dist/agents/tools/registry.js +47 -0
- package/dist/agents/tools/registry.js.map +1 -1
- package/dist/buildstamp.json +1 -1
- package/dist/cli/commands/doctor.d.ts.map +1 -1
- package/dist/cli/commands/doctor.js +41 -0
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/cli/commands/update.d.ts +48 -11
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +133 -46
- package/dist/cli/commands/update.js.map +1 -1
- package/dist/cli/program/build-program.d.ts.map +1 -1
- package/dist/cli/program/build-program.js +11 -5
- package/dist/cli/program/build-program.js.map +1 -1
- package/dist/core/console-stream.d.ts.map +1 -1
- package/dist/core/console-stream.js +7 -5
- package/dist/core/console-stream.js.map +1 -1
- package/dist/core/server.js +6 -1
- package/dist/core/server.js.map +1 -1
- package/dist/system-prompt/assembler.d.ts.map +1 -1
- package/dist/system-prompt/assembler.js +25 -1
- package/dist/system-prompt/assembler.js.map +1 -1
- package/dist/system-prompt/guidance.d.ts +30 -0
- package/dist/system-prompt/guidance.d.ts.map +1 -1
- package/dist/system-prompt/guidance.js +50 -0
- package/dist/system-prompt/guidance.js.map +1 -1
- package/package.json +9 -1
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini (Google) media-understanding adapter — talks to the Generative
|
|
3
|
+
* Language REST API at `generativelanguage.googleapis.com/v1beta` directly.
|
|
4
|
+
*
|
|
5
|
+
* Two paths:
|
|
6
|
+
* • VIDEO → the Files API. Inline base64 caps out well below real video
|
|
7
|
+
* sizes, so video is UPLOADED first (resumable upload), POLLED until the
|
|
8
|
+
* file state flips to ACTIVE (Gemini transcodes/indexes asynchronously),
|
|
9
|
+
* then referenced from `generateContent` via a `file_data` part holding
|
|
10
|
+
* the returned `fileUri`. This is the only way large video works.
|
|
11
|
+
* • image / pdf → an inline `inline_data` part (base64) + the prompt, sent
|
|
12
|
+
* straight to `generateContent`. These are small enough to inline.
|
|
13
|
+
*
|
|
14
|
+
* The Google key rides in the query string (`?key=…`), matching Brigade's
|
|
15
|
+
* existing validator (`providers/validate-key.ts`) and the Gemini convention
|
|
16
|
+
* — there is no auth header. Every call takes an injectable `fetchFn` so the
|
|
17
|
+
* adapter is exercised with zero real network.
|
|
18
|
+
*/
|
|
19
|
+
import { MediaUnderstandingProviderError, } from "./types.js";
|
|
20
|
+
/** Canonical Gemini API base — keep on the trusted Google host. */
|
|
21
|
+
export const DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
|
|
22
|
+
/**
|
|
23
|
+
* Default models per kind. Current Gemini multimodal models that read video,
|
|
24
|
+
* images and PDFs. `gemini-2.5-flash` is the cheap/fast default; callers can
|
|
25
|
+
* override per call or via config.
|
|
26
|
+
*/
|
|
27
|
+
export const DEFAULT_GEMINI_MODELS = {
|
|
28
|
+
video: "gemini-2.5-pro",
|
|
29
|
+
image: "gemini-2.5-flash",
|
|
30
|
+
pdf: "gemini-2.5-flash",
|
|
31
|
+
audio: "gemini-2.5-flash",
|
|
32
|
+
};
|
|
33
|
+
const DEFAULT_PROMPTS = {
|
|
34
|
+
video: "Describe this video in detail: the scenes, actions, on-screen text, and any spoken words.",
|
|
35
|
+
image: "Describe this image in detail.",
|
|
36
|
+
pdf: "Read this document and summarize its contents, preserving key facts, figures, and structure.",
|
|
37
|
+
audio: "Transcribe this audio.",
|
|
38
|
+
};
|
|
39
|
+
/** Per-request timeouts. Video upload + generation can be slow; bound each. */
|
|
40
|
+
const UPLOAD_TIMEOUT_MS = 120_000;
|
|
41
|
+
const GENERATE_TIMEOUT_MS = 180_000;
|
|
42
|
+
const POLL_TIMEOUT_MS = 15_000;
|
|
43
|
+
/** Total wall-clock budget waiting for an uploaded file to become ACTIVE. */
|
|
44
|
+
const ACTIVE_POLL_BUDGET_MS = 180_000;
|
|
45
|
+
/** Delay between file-state polls. */
|
|
46
|
+
const POLL_INTERVAL_MS = 2_000;
|
|
47
|
+
/** Hard ceiling on `maxTokens`. */
|
|
48
|
+
const MAX_OUTPUT_TOKENS_CEILING = 32_000;
|
|
49
|
+
/**
|
|
50
|
+
* Build the optional `generationConfig` for a Gemini request. When no
|
|
51
|
+
* `maxTokens` is given we omit it entirely (let the model use its default);
|
|
52
|
+
* otherwise clamp it to a sane window.
|
|
53
|
+
*/
|
|
54
|
+
function generationConfig(maxTokens) {
|
|
55
|
+
if (typeof maxTokens !== "number" || !Number.isFinite(maxTokens))
|
|
56
|
+
return undefined;
|
|
57
|
+
const v = Math.max(256, Math.min(MAX_OUTPUT_TOKENS_CEILING, Math.floor(maxTokens)));
|
|
58
|
+
return { maxOutputTokens: v };
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Strip a leading `models/` so callers can pass either `gemini-2.5-pro` or
|
|
62
|
+
* `models/gemini-2.5-pro`; we always rebuild the `models/<id>:method` path.
|
|
63
|
+
*/
|
|
64
|
+
function normalizeModelId(model) {
|
|
65
|
+
return model.replace(/^models\//, "").trim();
|
|
66
|
+
}
|
|
67
|
+
function resolveModel(kind, model) {
|
|
68
|
+
const trimmed = model?.trim();
|
|
69
|
+
if (trimmed)
|
|
70
|
+
return normalizeModelId(trimmed);
|
|
71
|
+
return DEFAULT_GEMINI_MODELS[kind] ?? DEFAULT_GEMINI_MODELS.image;
|
|
72
|
+
}
|
|
73
|
+
function resolvePrompt(kind, prompt) {
|
|
74
|
+
const trimmed = prompt?.trim();
|
|
75
|
+
return trimmed || DEFAULT_PROMPTS[kind];
|
|
76
|
+
}
|
|
77
|
+
/** Compose the caller signal with a per-request timeout. */
|
|
78
|
+
function withTimeout(signal, ms) {
|
|
79
|
+
const timeoutSignal = AbortSignal.timeout(ms);
|
|
80
|
+
if (!signal)
|
|
81
|
+
return timeoutSignal;
|
|
82
|
+
return AbortSignal.any([signal, timeoutSignal]);
|
|
83
|
+
}
|
|
84
|
+
const defaultSleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms).unref?.());
|
|
85
|
+
/** Extract the concatenated text from a generateContent response payload. */
|
|
86
|
+
function extractGeneratedText(payload) {
|
|
87
|
+
const candidates = payload
|
|
88
|
+
?.candidates;
|
|
89
|
+
const parts = candidates?.[0]?.content?.parts ?? [];
|
|
90
|
+
return parts
|
|
91
|
+
.map((p) => (typeof p?.text === "string" ? p.text.trim() : ""))
|
|
92
|
+
.filter(Boolean)
|
|
93
|
+
.join("\n")
|
|
94
|
+
.trim();
|
|
95
|
+
}
|
|
96
|
+
async function readErrorMessage(res) {
|
|
97
|
+
try {
|
|
98
|
+
const body = (await res.json());
|
|
99
|
+
if (body?.error?.message)
|
|
100
|
+
return body.error.message;
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
/* fall through */
|
|
104
|
+
}
|
|
105
|
+
return `HTTP ${res.status}`;
|
|
106
|
+
}
|
|
107
|
+
/* ─────────────────────────── inline path (image / pdf) ─────────────────────────── */
|
|
108
|
+
async function generateInline(params) {
|
|
109
|
+
const fetchFn = params.fetchFn ?? fetch;
|
|
110
|
+
const baseUrl = (params.baseUrl ?? DEFAULT_GEMINI_BASE_URL).replace(/\/+$/, "");
|
|
111
|
+
const model = resolveModel(params.kind, params.model);
|
|
112
|
+
const prompt = resolvePrompt(params.kind, params.prompt);
|
|
113
|
+
const url = `${baseUrl}/models/${model}:generateContent?key=${encodeURIComponent(params.apiKey)}`;
|
|
114
|
+
const genCfg = generationConfig(params.maxTokens);
|
|
115
|
+
const body = {
|
|
116
|
+
contents: [
|
|
117
|
+
{
|
|
118
|
+
role: "user",
|
|
119
|
+
parts: [
|
|
120
|
+
{ text: prompt },
|
|
121
|
+
{
|
|
122
|
+
inline_data: {
|
|
123
|
+
mime_type: params.mimeType,
|
|
124
|
+
data: params.bytes.toString("base64"),
|
|
125
|
+
},
|
|
126
|
+
},
|
|
127
|
+
],
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
...(genCfg ? { generationConfig: genCfg } : {}),
|
|
131
|
+
};
|
|
132
|
+
let res;
|
|
133
|
+
try {
|
|
134
|
+
res = await fetchFn(url, {
|
|
135
|
+
method: "POST",
|
|
136
|
+
headers: { "content-type": "application/json" },
|
|
137
|
+
body: JSON.stringify(body),
|
|
138
|
+
signal: withTimeout(params.signal, GENERATE_TIMEOUT_MS),
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
catch (err) {
|
|
142
|
+
throw new MediaUnderstandingProviderError("google", `Gemini request failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
143
|
+
}
|
|
144
|
+
if (!res.ok) {
|
|
145
|
+
throw new MediaUnderstandingProviderError("google", `Gemini error: ${await readErrorMessage(res)}`, res.status);
|
|
146
|
+
}
|
|
147
|
+
const payload = await res.json().catch(() => ({}));
|
|
148
|
+
const text = extractGeneratedText(payload);
|
|
149
|
+
if (!text) {
|
|
150
|
+
throw new MediaUnderstandingProviderError("google", "Gemini returned no text for the media.");
|
|
151
|
+
}
|
|
152
|
+
return { text, provider: "google", model };
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Upload bytes via the Gemini Files API resumable protocol:
|
|
156
|
+
* 1. POST to the upload endpoint with `x-goog-upload-protocol: resumable`
|
|
157
|
+
* + `x-goog-upload-command: start` and the file metadata → the response
|
|
158
|
+
* carries an `x-goog-upload-url` to PUT the bytes to.
|
|
159
|
+
* 2. PUT the bytes to that URL with `x-goog-upload-command: upload, finalize`
|
|
160
|
+
* → the response body is the `{ file: {...} }` resource.
|
|
161
|
+
*/
|
|
162
|
+
async function uploadFile(params) {
|
|
163
|
+
const fetchFn = params.fetchFn ?? fetch;
|
|
164
|
+
const baseUrl = (params.baseUrl ?? DEFAULT_GEMINI_BASE_URL).replace(/\/+$/, "");
|
|
165
|
+
// The upload endpoint shares the host but lives under `/upload/v1beta/files`.
|
|
166
|
+
// Derive it from the base (which ends in `/v1beta`) so a region/test override
|
|
167
|
+
// of the base carries through.
|
|
168
|
+
const uploadUrl = `${baseUrl.replace(/\/v1beta$/, "")}/upload/v1beta/files?key=${encodeURIComponent(params.apiKey)}`;
|
|
169
|
+
const numBytes = params.bytes.length;
|
|
170
|
+
// Step 1 — start a resumable upload session.
|
|
171
|
+
let startRes;
|
|
172
|
+
try {
|
|
173
|
+
startRes = await fetchFn(uploadUrl, {
|
|
174
|
+
method: "POST",
|
|
175
|
+
headers: {
|
|
176
|
+
"x-goog-upload-protocol": "resumable",
|
|
177
|
+
"x-goog-upload-command": "start",
|
|
178
|
+
"x-goog-upload-header-content-length": String(numBytes),
|
|
179
|
+
"x-goog-upload-header-content-type": params.mimeType,
|
|
180
|
+
"content-type": "application/json",
|
|
181
|
+
},
|
|
182
|
+
body: JSON.stringify({ file: { display_name: "brigade-media" } }),
|
|
183
|
+
signal: withTimeout(params.signal, UPLOAD_TIMEOUT_MS),
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
catch (err) {
|
|
187
|
+
throw new MediaUnderstandingProviderError("google", `Gemini file upload (start) failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
188
|
+
}
|
|
189
|
+
if (!startRes.ok) {
|
|
190
|
+
throw new MediaUnderstandingProviderError("google", `Gemini file upload (start) error: ${await readErrorMessage(startRes)}`, startRes.status);
|
|
191
|
+
}
|
|
192
|
+
const sessionUrl = startRes.headers.get("x-goog-upload-url") ?? startRes.headers.get("X-Goog-Upload-URL");
|
|
193
|
+
if (!sessionUrl) {
|
|
194
|
+
throw new MediaUnderstandingProviderError("google", "Gemini file upload did not return an upload session URL.");
|
|
195
|
+
}
|
|
196
|
+
// Step 2 — upload + finalize the bytes.
|
|
197
|
+
let putRes;
|
|
198
|
+
try {
|
|
199
|
+
putRes = await fetchFn(sessionUrl, {
|
|
200
|
+
method: "POST",
|
|
201
|
+
headers: {
|
|
202
|
+
"content-length": String(numBytes),
|
|
203
|
+
"x-goog-upload-offset": "0",
|
|
204
|
+
"x-goog-upload-command": "upload, finalize",
|
|
205
|
+
},
|
|
206
|
+
body: new Uint8Array(params.bytes),
|
|
207
|
+
signal: withTimeout(params.signal, UPLOAD_TIMEOUT_MS),
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
catch (err) {
|
|
211
|
+
throw new MediaUnderstandingProviderError("google", `Gemini file upload (finalize) failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
212
|
+
}
|
|
213
|
+
if (!putRes.ok) {
|
|
214
|
+
throw new MediaUnderstandingProviderError("google", `Gemini file upload (finalize) error: ${await readErrorMessage(putRes)}`, putRes.status);
|
|
215
|
+
}
|
|
216
|
+
const uploaded = (await putRes.json().catch(() => ({})));
|
|
217
|
+
const file = uploaded.file;
|
|
218
|
+
if (!file?.uri || !file?.name) {
|
|
219
|
+
throw new MediaUnderstandingProviderError("google", "Gemini file upload returned no file URI.");
|
|
220
|
+
}
|
|
221
|
+
return {
|
|
222
|
+
uri: file.uri,
|
|
223
|
+
name: file.name,
|
|
224
|
+
state: file.state ?? "PROCESSING",
|
|
225
|
+
mimeType: file.mimeType ?? params.mimeType,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
/** GET the current state of an uploaded file (`files/<id>`). */
|
|
229
|
+
async function getFileState(fileName, params) {
|
|
230
|
+
const fetchFn = params.fetchFn ?? fetch;
|
|
231
|
+
const baseUrl = (params.baseUrl ?? DEFAULT_GEMINI_BASE_URL).replace(/\/+$/, "");
|
|
232
|
+
// `fileName` is the resource name like `files/abc123`. Build `<base>/files/abc123`.
|
|
233
|
+
const rel = fileName.startsWith("files/") ? fileName : `files/${fileName}`;
|
|
234
|
+
const url = `${baseUrl}/${rel}?key=${encodeURIComponent(params.apiKey)}`;
|
|
235
|
+
let res;
|
|
236
|
+
try {
|
|
237
|
+
res = await fetchFn(url, {
|
|
238
|
+
method: "GET",
|
|
239
|
+
signal: withTimeout(params.signal, POLL_TIMEOUT_MS),
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
catch (err) {
|
|
243
|
+
throw new MediaUnderstandingProviderError("google", `Gemini file status poll failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
244
|
+
}
|
|
245
|
+
if (!res.ok) {
|
|
246
|
+
throw new MediaUnderstandingProviderError("google", `Gemini file status error: ${await readErrorMessage(res)}`, res.status);
|
|
247
|
+
}
|
|
248
|
+
const body = (await res.json().catch(() => ({})));
|
|
249
|
+
return {
|
|
250
|
+
state: body.state ?? "PROCESSING",
|
|
251
|
+
...(body.uri ? { uri: body.uri } : {}),
|
|
252
|
+
...(body.mimeType ? { mimeType: body.mimeType } : {}),
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
/** Poll a freshly-uploaded file until it reaches ACTIVE (or fails / times out). */
|
|
256
|
+
async function waitForActive(file, params) {
|
|
257
|
+
const sleep = params.sleepFn ?? defaultSleep;
|
|
258
|
+
if (file.state === "ACTIVE")
|
|
259
|
+
return file;
|
|
260
|
+
const deadline = Date.now() + ACTIVE_POLL_BUDGET_MS;
|
|
261
|
+
let current = file;
|
|
262
|
+
for (;;) {
|
|
263
|
+
if (current.state === "ACTIVE")
|
|
264
|
+
return current;
|
|
265
|
+
if (current.state === "FAILED") {
|
|
266
|
+
throw new MediaUnderstandingProviderError("google", "Gemini failed to process the uploaded video (file state FAILED).");
|
|
267
|
+
}
|
|
268
|
+
if (Date.now() >= deadline) {
|
|
269
|
+
throw new MediaUnderstandingProviderError("google", "Gemini did not finish processing the video within the time budget. Try a shorter clip.");
|
|
270
|
+
}
|
|
271
|
+
await sleep(POLL_INTERVAL_MS);
|
|
272
|
+
const next = await getFileState(current.name, params);
|
|
273
|
+
current = {
|
|
274
|
+
name: current.name,
|
|
275
|
+
uri: next.uri ?? current.uri,
|
|
276
|
+
mimeType: next.mimeType ?? current.mimeType,
|
|
277
|
+
state: next.state,
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
/** Reference an uploaded (ACTIVE) file from generateContent via a file_data part. */
|
|
282
|
+
async function generateFromFile(file, params) {
|
|
283
|
+
const fetchFn = params.fetchFn ?? fetch;
|
|
284
|
+
const baseUrl = (params.baseUrl ?? DEFAULT_GEMINI_BASE_URL).replace(/\/+$/, "");
|
|
285
|
+
const model = resolveModel(params.kind, params.model);
|
|
286
|
+
const prompt = resolvePrompt(params.kind, params.prompt);
|
|
287
|
+
const url = `${baseUrl}/models/${model}:generateContent?key=${encodeURIComponent(params.apiKey)}`;
|
|
288
|
+
const genCfg = generationConfig(params.maxTokens);
|
|
289
|
+
const body = {
|
|
290
|
+
contents: [
|
|
291
|
+
{
|
|
292
|
+
role: "user",
|
|
293
|
+
parts: [
|
|
294
|
+
{ text: prompt },
|
|
295
|
+
{ file_data: { file_uri: file.uri, mime_type: file.mimeType } },
|
|
296
|
+
],
|
|
297
|
+
},
|
|
298
|
+
],
|
|
299
|
+
...(genCfg ? { generationConfig: genCfg } : {}),
|
|
300
|
+
};
|
|
301
|
+
let res;
|
|
302
|
+
try {
|
|
303
|
+
res = await fetchFn(url, {
|
|
304
|
+
method: "POST",
|
|
305
|
+
headers: { "content-type": "application/json" },
|
|
306
|
+
body: JSON.stringify(body),
|
|
307
|
+
signal: withTimeout(params.signal, GENERATE_TIMEOUT_MS),
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
catch (err) {
|
|
311
|
+
throw new MediaUnderstandingProviderError("google", `Gemini generate (from file) failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
312
|
+
}
|
|
313
|
+
if (!res.ok) {
|
|
314
|
+
throw new MediaUnderstandingProviderError("google", `Gemini generate error: ${await readErrorMessage(res)}`, res.status);
|
|
315
|
+
}
|
|
316
|
+
const payload = await res.json().catch(() => ({}));
|
|
317
|
+
const text = extractGeneratedText(payload);
|
|
318
|
+
if (!text) {
|
|
319
|
+
throw new MediaUnderstandingProviderError("google", "Gemini returned no text for the video.");
|
|
320
|
+
}
|
|
321
|
+
return { text, provider: "google", model };
|
|
322
|
+
}
|
|
323
|
+
/* ─────────────────────────── entry point ─────────────────────────── */
|
|
324
|
+
/**
|
|
325
|
+
* Run a Gemini media-understanding request. Video goes through the Files API
|
|
326
|
+
* (upload → poll ACTIVE → generate); image/pdf go inline. Returns the model's
|
|
327
|
+
* textual answer.
|
|
328
|
+
*/
|
|
329
|
+
export async function runGemini(params) {
|
|
330
|
+
if (!params.apiKey) {
|
|
331
|
+
throw new MediaUnderstandingProviderError("google", "No Google/Gemini API key configured.");
|
|
332
|
+
}
|
|
333
|
+
if (params.kind === "video") {
|
|
334
|
+
const uploaded = await uploadFile(params);
|
|
335
|
+
const active = await waitForActive(uploaded, params);
|
|
336
|
+
return generateFromFile(active, params);
|
|
337
|
+
}
|
|
338
|
+
// image / pdf / audio → inline data.
|
|
339
|
+
return generateInline(params);
|
|
340
|
+
}
|
|
341
|
+
// Exported for tests.
|
|
342
|
+
export { extractGeneratedText, normalizeModelId };
|
|
343
|
+
//# sourceMappingURL=gemini-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gemini-adapter.js","sourceRoot":"","sources":["../../../src/agents/media-understanding/gemini-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EACN,+BAA+B,GAG/B,MAAM,YAAY,CAAC;AAEpB,mEAAmE;AACnE,MAAM,CAAC,MAAM,uBAAuB,GAAG,kDAAkD,CAAC;AAE1F;;;;GAIG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAuE;IACxG,KAAK,EAAE,gBAAgB;IACvB,KAAK,EAAE,kBAAkB;IACzB,GAAG,EAAE,kBAAkB;IACvB,KAAK,EAAE,kBAAkB;CACzB,CAAC;AAEF,MAAM,eAAe,GAA2C;IAC/D,KAAK,EAAE,2FAA2F;IAClG,KAAK,EAAE,gCAAgC;IACvC,GAAG,EAAE,8FAA8F;IACnG,KAAK,EAAE,wBAAwB;CAC/B,CAAC;AAEF,+EAA+E;AAC/E,MAAM,iBAAiB,GAAG,OAAO,CAAC;AAClC,MAAM,mBAAmB,GAAG,OAAO,CAAC;AACpC,MAAM,eAAe,GAAG,MAAM,CAAC;AAC/B,6EAA6E;AAC7E,MAAM,qBAAqB,GAAG,OAAO,CAAC;AACtC,sCAAsC;AACtC,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAkB/B,mCAAmC;AACnC,MAAM,yBAAyB,GAAG,MAAM,CAAC;AAEzC;;;;GAIG;AACH,SAAS,gBAAgB,CAAC,SAAkB;IAC3C,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IACnF,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,yBAAyB,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IACpF,OAAO,EAAE,eAAe,EAAE,CAAC,EAAE,CAAC;AAC/B,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,KAAa;IACtC,OAAO,KAAK,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;AAC9C,CAAC;AAED,SAAS,YAAY,CAAC,IAA4B,EAAE,KAAc;IACjE,MAAM,OAAO,GAAG,KAAK,EAAE,IAAI,EAAE,CAAC;IAC9B,IAAI,OAAO;QAAE,OAAO,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC9C,OAAO,qBAAqB,CAAC,IAAI,CAAC,IAAI,qBAAqB,CAAC,KAAK,CAAC;AACnE,CAAC;AAED,SAAS,aAAa,CAAC,IAA4B,EAAE,MAAe;IACnE,MAAM,OAAO,GAAG,MAAM,EAAE,IAAI,EAAE,CAAC;IAC/B,OAAO,OAAO,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC;AACzC,CAAC;AAED,4DAA4D;AAC5D,SAAS,WAAW,CAAC,MAA+B,EAAE,EAAU;IAC/D,MAAM,aAAa,GAAG,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC9C,IAAI,CAAC,MAAM;QAAE,OAAO,aAAa,CAAC;IAClC,OAAO,WAAW,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,YAAY,GAAG,CAAC,EAAU,EAAiB,EAAE,CAClD,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AAE7D,6EAA6E;AAC7E,SAAS,oBAAoB,CAAC,OAAgB;IAC7C,MAAM,UAAU,GAAI,OAAsF;QACzG,EAAE,UAAU,CAAC;IACd,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC;IACpD,OAAO,KAAK;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;SAC9D,MAAM,CAAC,OAAO,CAAC;SACf,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;AACV,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,GAAa;IAC5C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAqC,CAAC;QACpE,IAAI,IAAI,EAAE,KAAK,EAAE,OAAO;YAAE,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC;IACrD,CAAC;IAAC,MAAM,CAAC;QACR,kBAAkB;IACnB,CAAC;IACD,OAAO,QAAQ,GAAG,CAAC,MAAM,EAAE,CAAC;AAC7B,CAAC;AAED,uFAAuF;AAEvF,KAAK,UAAU,cAAc,CAAC,MAA2B;IACxD,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC;IACxC,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,uBAAuB,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAChF,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,GAAG,OAAO,WAAW,KAAK,wBAAwB,kBAAkB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IAClG,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAClD,MAAM,IAAI,GAAG;QACZ,QAAQ,EAAE;YACT;gBACC,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACN,EAAE,IAAI,EAAE,MAAM,EAAE;oBAChB;wBACC,WAAW,EAAE;4BACZ,SAAS,EAAE,MAAM,CAAC,QAAQ;4BAC1B,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;yBACrC;qBACD;iBACD;aACD;SACD;QACD,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC/C,CAAC;IACF,IAAI,GAAa,CAAC;IAClB,IAAI,CAAC;QACJ,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE;YACxB,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC;SACvD,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACd,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,0BAA0B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC5E,CAAC;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACb,MAAM,IAAI,+BAA+B,CAAC,QAAQ,EAAE,iBAAiB,MAAM,gBAAgB,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACjH,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACnD,MAAM,IAAI,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC3C,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,MAAM,IAAI,+BAA+B,CAAC,QAAQ,EAAE,wCAAwC,CAAC,CAAC;IAC/F,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC5C,CAAC;AAWD;;;;;;;GAOG;AACH,KAAK,UAAU,UAAU,CAAC,MAA2B;IACpD,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC;IACxC,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,uBAAuB,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAChF,8EAA8E;IAC9E,8EAA8E;IAC9E,+BAA+B;IAC/B,MAAM,SAAS,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,4BAA4B,kBAAkB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IACrH,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;IAErC,6CAA6C;IAC7C,IAAI,QAAkB,CAAC;IACvB,IAAI,CAAC;QACJ,QAAQ,GAAG,MAAM,OAAO,CAAC,SAAS,EAAE;YACnC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACR,wBAAwB,EAAE,WAAW;gBACrC,uBAAuB,EAAE,OAAO;gBAChC,qCAAqC,EAAE,MAAM,CAAC,QAAQ,CAAC;gBACvD,mCAAmC,EAAE,MAAM,CAAC,QAAQ;gBACpD,cAAc,EAAE,kBAAkB;aAClC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,EAAE,YAAY,EAAE,eAAe,EAAE,EAAE,CAAC;YACjE,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC;SACrD,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACd,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,sCAAsC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACxF,CAAC;IACH,CAAC;IACD,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QAClB,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,qCAAqC,MAAM,gBAAgB,CAAC,QAAQ,CAAC,EAAE,EACvE,QAAQ,CAAC,MAAM,CACf,CAAC;IACH,CAAC;IACD,MAAM,UAAU,GACf,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,IAAI,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACxF,IAAI,CAAC,UAAU,EAAE,CAAC;QACjB,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,0DAA0D,CAC1D,CAAC;IACH,CAAC;IAED,wCAAwC;IACxC,IAAI,MAAgB,CAAC;IACrB,IAAI,CAAC;QACJ,MAAM,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE;YAClC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACR,gBAAgB,EAAE,MAAM,CAAC,QAAQ,CAAC;gBAClC,sBAAsB,EAAE,GAAG;gBAC3B,uBAAuB,EAAE,kBAAkB;aAC3C;YACD,IAAI,EAAE,IAAI,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC;YAClC,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC;SACrD,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACd,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,yCAAyC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC3F,CAAC;IACH,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QAChB,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,wCAAwC,MAAM,gBAAgB,CAAC,MAAM,CAAC,EAAE,EACxE,MAAM,CAAC,MAAM,CACb,CAAC;IACH,CAAC;IACD,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAEtD,CAAC;IACF,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;IAC3B,IAAI,CAAC,IAAI,EAAE,GAAG,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/B,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,0CAA0C,CAC1C,CAAC;IACH,CAAC;IACD,OAAO;QACN,GAAG,EAAE,IAAI,CAAC,GAAG;QACb,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,YAAY;QACjC,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,MAAM,CAAC,QAAQ;KAC1C,CAAC;AACH,CAAC;AAED,gEAAgE;AAChE,KAAK,UAAU,YAAY,CAC1B,QAAgB,EAChB,MAA2B;IAE3B,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC;IACxC,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,uBAAuB,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAChF,oFAAoF;IACpF,MAAM,GAAG,GAAG,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,QAAQ,EAAE,CAAC;IAC3E,MAAM,GAAG,GAAG,GAAG,OAAO,IAAI,GAAG,QAAQ,kBAAkB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IACzE,IAAI,GAAa,CAAC;IAClB,IAAI,CAAC;QACJ,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE;YACxB,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC;SACnD,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACd,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,mCAAmC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACrF,CAAC;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACb,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,6BAA6B,MAAM,gBAAgB,CAAC,GAAG,CAAC,EAAE,EAC1D,GAAG,CAAC,MAAM,CACV,CAAC;IACH,CAAC;IACD,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAI/C,CAAC;IACF,OAAO;QACN,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,YAAY;QACjC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACtC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACrD,CAAC;AACH,CAAC;AAED,mFAAmF;AACnF,KAAK,UAAU,aAAa,CAC3B,IAAkB,EAClB,MAA2B;IAE3B,MAAM,KAAK,GAAG,MAAM,CAAC,OAAO,IAAI,YAAY,CAAC;IAC7C,IAAI,IAAI,CAAC,KAAK,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,qBAAqB,CAAC;IACpD,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,SAAS,CAAC;QACT,IAAI,OAAO,CAAC,KAAK,KAAK,QAAQ;YAAE,OAAO,OAAO,CAAC;QAC/C,IAAI,OAAO,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YAChC,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,kEAAkE,CAClE,CAAC;QACH,CAAC;QACD,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,wFAAwF,CACxF,CAAC;QACH,CAAC;QACD,MAAM,KAAK,CAAC,gBAAgB,CAAC,CAAC;QAC9B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACtD,OAAO,GAAG;YACT,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,GAAG,EAAE,IAAI,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG;YAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ;YAC3C,KAAK,EAAE,IAAI,CAAC,KAAK;SACjB,CAAC;IACH,CAAC;AACF,CAAC;AAED,qFAAqF;AACrF,KAAK,UAAU,gBAAgB,CAC9B,IAAkB,EAClB,MAA2B;IAE3B,MAAM,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK,CAAC;IACxC,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,uBAAuB,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAChF,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IACzD,MAAM,GAAG,GAAG,GAAG,OAAO,WAAW,KAAK,wBAAwB,kBAAkB,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;IAClG,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAClD,MAAM,IAAI,GAAG;QACZ,QAAQ,EAAE;YACT;gBACC,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE;oBACN,EAAE,IAAI,EAAE,MAAM,EAAE;oBAChB,EAAE,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,EAAE,IAAI,CAAC,QAAQ,EAAE,EAAE;iBAC/D;aACD;SACD;QACD,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC/C,CAAC;IACF,IAAI,GAAa,CAAC;IAClB,IAAI,CAAC;QACJ,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE;YACxB,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC;SACvD,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACd,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,uCAAuC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CACzF,CAAC;IACH,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QACb,MAAM,IAAI,+BAA+B,CACxC,QAAQ,EACR,0BAA0B,MAAM,gBAAgB,CAAC,GAAG,CAAC,EAAE,EACvD,GAAG,CAAC,MAAM,CACV,CAAC;IACH,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACnD,MAAM,IAAI,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC3C,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,MAAM,IAAI,+BAA+B,CAAC,QAAQ,EAAE,wCAAwC,CAAC,CAAC;IAC/F,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;AAC5C,CAAC;AAED,yEAAyE;AAEzE;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,MAA2B;IAC1D,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACpB,MAAM,IAAI,+BAA+B,CAAC,QAAQ,EAAE,sCAAsC,CAAC,CAAC;IAC7F,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,gBAAgB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,CAAC;IACD,qCAAqC;IACrC,OAAO,cAAc,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,sBAAsB;AACtB,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Media-understanding subsystem — single entry point + provider selection.
|
|
3
|
+
*
|
|
4
|
+
* `runMediaUnderstanding({ kind, bytes, mimeType, prompt, provider?, model?,
|
|
5
|
+
* cfg, fetchImpl? })` resolves a capable provider that has a configured key,
|
|
6
|
+
* calls its REST API directly with the media + prompt, and returns the model's
|
|
7
|
+
* TEXT answer. This is what lets `analyze_media` understand VIDEO (Gemini Files
|
|
8
|
+
* API) and native/scanned PDFs (Anthropic document blocks / Gemini) even though
|
|
9
|
+
* Pi's tool-result channel carries only text + image.
|
|
10
|
+
*
|
|
11
|
+
* Selection (`resolveMediaUnderstandingProvider`):
|
|
12
|
+
* • video → Gemini (the only adapter with video; via the Files API).
|
|
13
|
+
* • pdf → prefer Anthropic (native + OCR for scanned), else Gemini.
|
|
14
|
+
* • image → Anthropic, then Gemini (bespoke REST), then the Pi path (`pi`) —
|
|
15
|
+
* a one-shot `completeSimple` against ANY keyed provider with an
|
|
16
|
+
* image-capable model (OpenAI / Groq / Mistral / OpenRouter / xAI / Ollama
|
|
17
|
+
* / …), so image understanding is no longer limited to google + anthropic.
|
|
18
|
+
* • audio → Gemini ONLY. Pi's content model is text + image (`Model.input` is
|
|
19
|
+
* `("text"|"image")[]`) — NO provider Pi can drive accepts an audio block,
|
|
20
|
+
* so the Pi path is deliberately NOT in the audio chain: routing a voice
|
|
21
|
+
* note through it would stuff audio bytes into an IMAGE block and the
|
|
22
|
+
* provider would reject it (HTTP 400). Audio understanding is Gemini's real
|
|
23
|
+
* capability (inline audio); with no Google key the caller gets a clean
|
|
24
|
+
* "needs a Google/Gemini key" message instead of a 400.
|
|
25
|
+
* A `cfg.preferredProvider[kind]` override wins when that provider has a key.
|
|
26
|
+
* When NO provider with a key can handle the kind, a clear
|
|
27
|
+
* `MediaUnderstandingUnavailableError` is thrown.
|
|
28
|
+
*/
|
|
29
|
+
import { type MediaUnderstandingConfig, type MediaUnderstandingKind, type MediaUnderstandingProviderId, type RunMediaUnderstandingRequest, type RunMediaUnderstandingResult } from "./types.js";
|
|
30
|
+
export { MediaUnderstandingProviderError, MediaUnderstandingUnavailableError, type MediaUnderstandingConfig, type MediaUnderstandingKind, type MediaUnderstandingModel, type MediaUnderstandingProviderId, type PiCompleteFn, type PiCompleteRequest, type RunMediaUnderstandingRequest, type RunMediaUnderstandingResult, } from "./types.js";
|
|
31
|
+
export { DEFAULT_GEMINI_MODELS, DEFAULT_GEMINI_BASE_URL } from "./gemini-adapter.js";
|
|
32
|
+
export { DEFAULT_ANTHROPIC_MODEL, DEFAULT_ANTHROPIC_BASE_URL } from "./anthropic-adapter.js";
|
|
33
|
+
export { resolvePiModel, runPi, modelAcceptsImage, defaultPiComplete } from "./pi-adapter.js";
|
|
34
|
+
/**
|
|
35
|
+
* Pick a provider that (a) can handle `kind` and (b) has a resolved key.
|
|
36
|
+
* Honors `cfg.preferredProvider[kind]` first when that provider is both
|
|
37
|
+
* capable and keyed. For IMAGE / AUDIO, when neither google nor anthropic is
|
|
38
|
+
* keyed but the Pi path can resolve a capable model for SOME keyed provider,
|
|
39
|
+
* returns the virtual `"pi"` provider so every configured provider works.
|
|
40
|
+
* Returns `undefined` when nothing qualifies.
|
|
41
|
+
*/
|
|
42
|
+
export declare function resolveMediaUnderstandingProvider(kind: MediaUnderstandingKind, cfg: MediaUnderstandingConfig): MediaUnderstandingProviderId | undefined;
|
|
43
|
+
/**
|
|
44
|
+
* Run a media-understanding request against a capable, keyed provider and
|
|
45
|
+
* return the textual answer. Throws `MediaUnderstandingUnavailableError` when
|
|
46
|
+
* no provider can serve the kind, or `MediaUnderstandingProviderError` when the
|
|
47
|
+
* chosen provider's API call fails.
|
|
48
|
+
*/
|
|
49
|
+
export declare function runMediaUnderstanding(req: RunMediaUnderstandingRequest): Promise<RunMediaUnderstandingResult>;
|
|
50
|
+
/**
|
|
51
|
+
* A provider error is RETRYABLE (worth a retry / a fallover) when it is a rate
|
|
52
|
+
* limit (429) or a server/transport error (5xx, or a transport throw with no
|
|
53
|
+
* status). A 4xx other than 429 (bad request, auth) is NOT retryable — retrying
|
|
54
|
+
* or falling over wouldn't help and would waste calls. `Unavailable` errors are
|
|
55
|
+
* retryable in the fallover sense (the NEXT provider might serve the kind).
|
|
56
|
+
*/
|
|
57
|
+
export declare function isRetryableError(err: unknown): boolean;
|
|
58
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/agents/media-understanding/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAKH,OAAO,EAGN,KAAK,wBAAwB,EAC7B,KAAK,sBAAsB,EAC3B,KAAK,4BAA4B,EACjC,KAAK,4BAA4B,EACjC,KAAK,2BAA2B,EAChC,MAAM,YAAY,CAAC;AAEpB,OAAO,EACN,+BAA+B,EAC/B,kCAAkC,EAClC,KAAK,wBAAwB,EAC7B,KAAK,sBAAsB,EAC3B,KAAK,uBAAuB,EAC5B,KAAK,4BAA4B,EACjC,KAAK,YAAY,EACjB,KAAK,iBAAiB,EACtB,KAAK,4BAA4B,EACjC,KAAK,2BAA2B,GAChC,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,qBAAqB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AACrF,OAAO,EAAE,uBAAuB,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AA2E9F;;;;;;;GAOG;AACH,wBAAgB,iCAAiC,CAChD,IAAI,EAAE,sBAAsB,EAC5B,GAAG,EAAE,wBAAwB,GAC3B,4BAA4B,GAAG,SAAS,CAS1C;AAED;;;;;GAKG;AACH,wBAAsB,qBAAqB,CAC1C,GAAG,EAAE,4BAA4B,GAC/B,OAAO,CAAC,2BAA2B,CAAC,CAuDtC;AAgGD;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAStD"}
|