@spinabot/brigade 1.9.0 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -10
- package/dist/agents/agent-loop.d.ts +55 -0
- package/dist/agents/agent-loop.d.ts.map +1 -1
- package/dist/agents/agent-loop.js +90 -1
- package/dist/agents/agent-loop.js.map +1 -1
- package/dist/agents/channels/inbound-pipeline.d.ts +22 -0
- package/dist/agents/channels/inbound-pipeline.d.ts.map +1 -1
- package/dist/agents/channels/inbound-pipeline.js +31 -1
- package/dist/agents/channels/inbound-pipeline.js.map +1 -1
- package/dist/agents/channels/media-capture.d.ts +69 -6
- package/dist/agents/channels/media-capture.d.ts.map +1 -1
- package/dist/agents/channels/media-capture.js +125 -8
- package/dist/agents/channels/media-capture.js.map +1 -1
- package/dist/agents/channels/telegram/media.d.ts.map +1 -1
- package/dist/agents/channels/telegram/media.js +16 -4
- package/dist/agents/channels/telegram/media.js.map +1 -1
- package/dist/agents/channels/whatsapp/media.d.ts +19 -0
- package/dist/agents/channels/whatsapp/media.d.ts.map +1 -1
- package/dist/agents/channels/whatsapp/media.js +37 -2
- package/dist/agents/channels/whatsapp/media.js.map +1 -1
- package/dist/agents/media-understanding/anthropic-adapter.d.ts +49 -0
- package/dist/agents/media-understanding/anthropic-adapter.d.ts.map +1 -0
- package/dist/agents/media-understanding/anthropic-adapter.js +162 -0
- package/dist/agents/media-understanding/anthropic-adapter.js.map +1 -0
- package/dist/agents/media-understanding/config.d.ts +57 -0
- package/dist/agents/media-understanding/config.d.ts.map +1 -0
- package/dist/agents/media-understanding/config.js +289 -0
- package/dist/agents/media-understanding/config.js.map +1 -0
- package/dist/agents/media-understanding/gemini-adapter.d.ts +57 -0
- package/dist/agents/media-understanding/gemini-adapter.d.ts.map +1 -0
- package/dist/agents/media-understanding/gemini-adapter.js +343 -0
- package/dist/agents/media-understanding/gemini-adapter.js.map +1 -0
- package/dist/agents/media-understanding/index.d.ts +58 -0
- package/dist/agents/media-understanding/index.d.ts.map +1 -0
- package/dist/agents/media-understanding/index.js +275 -0
- package/dist/agents/media-understanding/index.js.map +1 -0
- package/dist/agents/media-understanding/pi-adapter.d.ts +72 -0
- package/dist/agents/media-understanding/pi-adapter.d.ts.map +1 -0
- package/dist/agents/media-understanding/pi-adapter.js +160 -0
- package/dist/agents/media-understanding/pi-adapter.js.map +1 -0
- package/dist/agents/media-understanding/types.d.ts +189 -0
- package/dist/agents/media-understanding/types.d.ts.map +1 -0
- package/dist/agents/media-understanding/types.js +51 -0
- package/dist/agents/media-understanding/types.js.map +1 -0
- package/dist/agents/session-wiring.d.ts +11 -0
- package/dist/agents/session-wiring.d.ts.map +1 -1
- package/dist/agents/session-wiring.js +1 -0
- package/dist/agents/session-wiring.js.map +1 -1
- package/dist/agents/tools/analyze-media-tool.d.ts +263 -0
- package/dist/agents/tools/analyze-media-tool.d.ts.map +1 -0
- package/dist/agents/tools/analyze-media-tool.js +2321 -0
- package/dist/agents/tools/analyze-media-tool.js.map +1 -0
- package/dist/agents/tools/doc-shared.d.ts +187 -0
- package/dist/agents/tools/doc-shared.d.ts.map +1 -0
- package/dist/agents/tools/doc-shared.js +484 -0
- package/dist/agents/tools/doc-shared.js.map +1 -0
- package/dist/agents/tools/edit-document-tool.d.ts +133 -0
- package/dist/agents/tools/edit-document-tool.d.ts.map +1 -0
- package/dist/agents/tools/edit-document-tool.js +815 -0
- package/dist/agents/tools/edit-document-tool.js.map +1 -0
- package/dist/agents/tools/image-downscale.d.ts +93 -0
- package/dist/agents/tools/image-downscale.d.ts.map +1 -0
- package/dist/agents/tools/image-downscale.js +257 -0
- package/dist/agents/tools/image-downscale.js.map +1 -0
- package/dist/agents/tools/make-document-tool.d.ts +114 -0
- package/dist/agents/tools/make-document-tool.d.ts.map +1 -0
- package/dist/agents/tools/make-document-tool.js +542 -0
- package/dist/agents/tools/make-document-tool.js.map +1 -0
- package/dist/agents/tools/media-cache.d.ts +56 -0
- package/dist/agents/tools/media-cache.d.ts.map +1 -0
- package/dist/agents/tools/media-cache.js +133 -0
- package/dist/agents/tools/media-cache.js.map +1 -0
- package/dist/agents/tools/ooxml-images.d.ts +107 -0
- package/dist/agents/tools/ooxml-images.d.ts.map +1 -0
- package/dist/agents/tools/ooxml-images.js +308 -0
- package/dist/agents/tools/ooxml-images.js.map +1 -0
- package/dist/agents/tools/registry.d.ts +12 -0
- package/dist/agents/tools/registry.d.ts.map +1 -1
- package/dist/agents/tools/registry.js +47 -0
- package/dist/agents/tools/registry.js.map +1 -1
- package/dist/buildstamp.json +1 -1
- package/dist/cli/commands/doctor.d.ts.map +1 -1
- package/dist/cli/commands/doctor.js +41 -0
- package/dist/cli/commands/doctor.js.map +1 -1
- package/dist/core/console-stream.d.ts.map +1 -1
- package/dist/core/console-stream.js +7 -5
- package/dist/core/console-stream.js.map +1 -1
- package/dist/core/server.js +6 -1
- package/dist/core/server.js.map +1 -1
- package/dist/system-prompt/assembler.d.ts.map +1 -1
- package/dist/system-prompt/assembler.js +25 -1
- package/dist/system-prompt/assembler.js.map +1 -1
- package/dist/system-prompt/guidance.d.ts +30 -0
- package/dist/system-prompt/guidance.d.ts.map +1 -1
- package/dist/system-prompt/guidance.js +50 -0
- package/dist/system-prompt/guidance.js.map +1 -1
- package/package.json +9 -1
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Media-understanding subsystem — single entry point + provider selection.
|
|
3
|
+
*
|
|
4
|
+
* `runMediaUnderstanding({ kind, bytes, mimeType, prompt, provider?, model?,
|
|
5
|
+
* cfg, fetchImpl? })` resolves a capable provider that has a configured key,
|
|
6
|
+
* calls its REST API directly with the media + prompt, and returns the model's
|
|
7
|
+
* TEXT answer. This is what lets `analyze_media` understand VIDEO (Gemini Files
|
|
8
|
+
* API) and native/scanned PDFs (Anthropic document blocks / Gemini) even though
|
|
9
|
+
* Pi's tool-result channel carries only text + image.
|
|
10
|
+
*
|
|
11
|
+
* Selection (`resolveMediaUnderstandingProvider`):
|
|
12
|
+
* • video → Gemini (the only adapter with video; via the Files API).
|
|
13
|
+
* • pdf → prefer Anthropic (native + OCR for scanned), else Gemini.
|
|
14
|
+
* • image → Anthropic, then Gemini (bespoke REST), then the Pi path (`pi`) —
|
|
15
|
+
* a one-shot `completeSimple` against ANY keyed provider with an
|
|
16
|
+
* image-capable model (OpenAI / Groq / Mistral / OpenRouter / xAI / Ollama
|
|
17
|
+
* / …), so image understanding is no longer limited to google + anthropic.
|
|
18
|
+
* • audio → Gemini ONLY. Pi's content model is text + image (`Model.input` is
|
|
19
|
+
* `("text"|"image")[]`) — NO provider Pi can drive accepts an audio block,
|
|
20
|
+
* so the Pi path is deliberately NOT in the audio chain: routing a voice
|
|
21
|
+
* note through it would stuff audio bytes into an IMAGE block and the
|
|
22
|
+
* provider would reject it (HTTP 400). Audio understanding is Gemini's real
|
|
23
|
+
* capability (inline audio); with no Google key the caller gets a clean
|
|
24
|
+
* "needs a Google/Gemini key" message instead of a 400.
|
|
25
|
+
* A `cfg.preferredProvider[kind]` override wins when that provider has a key.
|
|
26
|
+
* When NO provider with a key can handle the kind, a clear
|
|
27
|
+
* `MediaUnderstandingUnavailableError` is thrown.
|
|
28
|
+
*/
|
|
29
|
+
import { runAnthropic } from "./anthropic-adapter.js";
|
|
30
|
+
import { runGemini } from "./gemini-adapter.js";
|
|
31
|
+
import { resolvePiModel, runPi } from "./pi-adapter.js";
|
|
32
|
+
import { MediaUnderstandingProviderError, MediaUnderstandingUnavailableError, } from "./types.js";
|
|
33
|
+
export { MediaUnderstandingProviderError, MediaUnderstandingUnavailableError, } from "./types.js";
|
|
34
|
+
export { DEFAULT_GEMINI_MODELS, DEFAULT_GEMINI_BASE_URL } from "./gemini-adapter.js";
|
|
35
|
+
export { DEFAULT_ANTHROPIC_MODEL, DEFAULT_ANTHROPIC_BASE_URL } from "./anthropic-adapter.js";
|
|
36
|
+
export { resolvePiModel, runPi, modelAcceptsImage, defaultPiComplete } from "./pi-adapter.js";
|
|
37
|
+
/**
|
|
38
|
+
* Built-in provider preference per kind. The FIRST provider in the list that
|
|
39
|
+
* has a resolved key wins. Order encodes "best tool for the job":
|
|
40
|
+
* • video — only Gemini can.
|
|
41
|
+
* • pdf — Anthropic first (native ingestion + OCR for scanned), Gemini next.
|
|
42
|
+
* • image — Anthropic first, Gemini next (both capable; arbitrary tie-break),
|
|
43
|
+
* then the Pi catch-all.
|
|
44
|
+
* • audio — only Gemini. The Pi path is NOT here: Pi carries text + image
|
|
45
|
+
* only, so no Pi-drivable provider can ingest audio (it would 400).
|
|
46
|
+
*/
|
|
47
|
+
const PREFERENCE = {
|
|
48
|
+
video: ["google"],
|
|
49
|
+
pdf: ["anthropic", "google"],
|
|
50
|
+
// `pi` is listed LAST for image: the bespoke google/anthropic REST adapters
|
|
51
|
+
// stay the default when keyed (they're proven + cheap), and the Pi path is
|
|
52
|
+
// the catch-all that makes every OTHER provider work. `pi` is keyed (hasKey)
|
|
53
|
+
// only when `resolvePiModel` can resolve a capable model.
|
|
54
|
+
image: ["anthropic", "google", "pi"],
|
|
55
|
+
// Audio is Gemini-only: Pi's content model (text + image) has no audio block,
|
|
56
|
+
// so the Pi path is intentionally excluded — see the file header.
|
|
57
|
+
audio: ["google"],
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* True when the provider is usable for selection. For the bespoke REST
|
|
61
|
+
* providers this means a NON-EMPTY resolved key. The virtual `pi` provider is
|
|
62
|
+
* "available" iff the Pi path can resolve an image-capable model for some keyed
|
|
63
|
+
* provider — it has no key of its own (the per-model provider key is resolved
|
|
64
|
+
* inside the Pi adapter). `kind` is needed for the Pi probe (image vs audio).
|
|
65
|
+
*/
|
|
66
|
+
function hasKey(cfg, provider, kind) {
|
|
67
|
+
if (provider === "pi") {
|
|
68
|
+
return Boolean(resolvePiModel(kind, cfg));
|
|
69
|
+
}
|
|
70
|
+
try {
|
|
71
|
+
return Boolean(cfg.resolveKey(provider));
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/** Human-friendly "configure a key" hint per kind, naming the capable providers. */
|
|
78
|
+
function unavailableMessage(kind) {
|
|
79
|
+
switch (kind) {
|
|
80
|
+
case "video":
|
|
81
|
+
return ("Video understanding needs a Google/Gemini API key. " +
|
|
82
|
+
"Add one with `brigade onboard` (or set GEMINI_API_KEY) and try again.");
|
|
83
|
+
case "pdf":
|
|
84
|
+
return ("Native/scanned-PDF understanding needs an Anthropic or Google/Gemini API key. " +
|
|
85
|
+
"Add one with `brigade onboard` (the text-extraction fallback is used otherwise).");
|
|
86
|
+
case "audio":
|
|
87
|
+
return ("Audio understanding needs a Google/Gemini API key (the only provider here that ingests audio). " +
|
|
88
|
+
"Add one with `brigade onboard` (or set GEMINI_API_KEY) and try again.");
|
|
89
|
+
case "image":
|
|
90
|
+
default:
|
|
91
|
+
return ("Image understanding via a provider needs an Anthropic or Google/Gemini key, or any provider " +
|
|
92
|
+
"(OpenAI / OpenRouter / Groq / xAI / Mistral / Ollama / …) whose model accepts image input. " +
|
|
93
|
+
"Add one with `brigade onboard`.");
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Pick a provider that (a) can handle `kind` and (b) has a resolved key.
|
|
98
|
+
* Honors `cfg.preferredProvider[kind]` first when that provider is both
|
|
99
|
+
* capable and keyed. For IMAGE / AUDIO, when neither google nor anthropic is
|
|
100
|
+
* keyed but the Pi path can resolve a capable model for SOME keyed provider,
|
|
101
|
+
* returns the virtual `"pi"` provider so every configured provider works.
|
|
102
|
+
* Returns `undefined` when nothing qualifies.
|
|
103
|
+
*/
|
|
104
|
+
export function resolveMediaUnderstandingProvider(kind, cfg) {
|
|
105
|
+
const capable = PREFERENCE[kind] ?? [];
|
|
106
|
+
// Config-pinned preference wins when it is capable for this kind AND keyed.
|
|
107
|
+
const pinned = cfg.preferredProvider?.[kind];
|
|
108
|
+
if (pinned && capable.includes(pinned) && hasKey(cfg, pinned, kind))
|
|
109
|
+
return pinned;
|
|
110
|
+
for (const provider of capable) {
|
|
111
|
+
if (hasKey(cfg, provider, kind))
|
|
112
|
+
return provider;
|
|
113
|
+
}
|
|
114
|
+
return undefined;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Run a media-understanding request against a capable, keyed provider and
|
|
118
|
+
* return the textual answer. Throws `MediaUnderstandingUnavailableError` when
|
|
119
|
+
* no provider can serve the kind, or `MediaUnderstandingProviderError` when the
|
|
120
|
+
* chosen provider's API call fails.
|
|
121
|
+
*/
|
|
122
|
+
export async function runMediaUnderstanding(req) {
|
|
123
|
+
const { kind, cfg } = req;
|
|
124
|
+
// An explicit provider override must still be capable for the kind AND keyed.
|
|
125
|
+
let provider;
|
|
126
|
+
if (req.provider) {
|
|
127
|
+
const capable = PREFERENCE[kind] ?? [];
|
|
128
|
+
if (!capable.includes(req.provider)) {
|
|
129
|
+
throw new MediaUnderstandingUnavailableError(kind, `Provider "${req.provider}" cannot handle ${kind}. Capable: ${capable.join(", ") || "none"}.`);
|
|
130
|
+
}
|
|
131
|
+
if (!hasKey(cfg, req.provider, kind)) {
|
|
132
|
+
throw new MediaUnderstandingUnavailableError(kind, `Provider "${req.provider}" has no configured API key. ${unavailableMessage(kind)}`);
|
|
133
|
+
}
|
|
134
|
+
provider = req.provider;
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
provider = resolveMediaUnderstandingProvider(kind, cfg);
|
|
138
|
+
}
|
|
139
|
+
if (!provider) {
|
|
140
|
+
throw new MediaUnderstandingUnavailableError(kind, unavailableMessage(kind));
|
|
141
|
+
}
|
|
142
|
+
// Build the ordered provider CHAIN to try. An explicit override pins exactly
|
|
143
|
+
// one provider (honour the operator's pick — no cross-provider fallback). Auto
|
|
144
|
+
// selection walks every capable+keyed provider in preference order, so a 429 /
|
|
145
|
+
// 5xx on the first (e.g. Anthropic) fails over to the next (e.g. Gemini) before
|
|
146
|
+
// giving up. Each provider still gets a bounded retry (below).
|
|
147
|
+
const chain = req.provider
|
|
148
|
+
? [provider]
|
|
149
|
+
: orderedKeyedProviders(kind, cfg, provider);
|
|
150
|
+
let lastError;
|
|
151
|
+
for (let i = 0; i < chain.length; i++) {
|
|
152
|
+
const candidate = chain[i];
|
|
153
|
+
try {
|
|
154
|
+
return await runOneProviderWithRetry(candidate, req);
|
|
155
|
+
}
|
|
156
|
+
catch (err) {
|
|
157
|
+
lastError = err;
|
|
158
|
+
// Only fall over to the NEXT provider on a transient/availability error;
|
|
159
|
+
// a non-retryable provider error (e.g. a 400 bad request) on the LAST
|
|
160
|
+
// provider propagates. A retryable error on a non-last provider falls
|
|
161
|
+
// through to the next candidate.
|
|
162
|
+
const isLast = i === chain.length - 1;
|
|
163
|
+
if (isLast)
|
|
164
|
+
throw err;
|
|
165
|
+
if (!isRetryableError(err))
|
|
166
|
+
throw err;
|
|
167
|
+
// else: try the next provider in the chain.
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// Chain was non-empty (provider resolved), so we either returned or threw
|
|
171
|
+
// above; this is unreachable, but satisfy the type checker.
|
|
172
|
+
throw lastError ?? new MediaUnderstandingUnavailableError(kind, unavailableMessage(kind));
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* The capable+keyed providers for `kind`, in preference order, starting from the
|
|
176
|
+
* resolved `first`. De-duplicates and keeps only providers that currently have a
|
|
177
|
+
* key (or, for `pi`, a resolvable model).
|
|
178
|
+
*/
|
|
179
|
+
function orderedKeyedProviders(kind, cfg, first) {
|
|
180
|
+
const ordered = [];
|
|
181
|
+
const push = (p) => {
|
|
182
|
+
if (!ordered.includes(p) && hasKey(cfg, p, kind))
|
|
183
|
+
ordered.push(p);
|
|
184
|
+
};
|
|
185
|
+
push(first);
|
|
186
|
+
for (const p of PREFERENCE[kind] ?? [])
|
|
187
|
+
push(p);
|
|
188
|
+
return ordered;
|
|
189
|
+
}
|
|
190
|
+
/** Run ONE provider with a bounded retry on transient (429/5xx/transport) errors. */
|
|
191
|
+
async function runOneProviderWithRetry(provider, req) {
|
|
192
|
+
const maxRetries = Math.max(0, req.maxRetries ?? 1);
|
|
193
|
+
const sleep = req.sleepFn ?? defaultSleep;
|
|
194
|
+
let attempt = 0;
|
|
195
|
+
for (;;) {
|
|
196
|
+
try {
|
|
197
|
+
return await runOneProvider(provider, req);
|
|
198
|
+
}
|
|
199
|
+
catch (err) {
|
|
200
|
+
if (attempt >= maxRetries || !isRetryableError(err))
|
|
201
|
+
throw err;
|
|
202
|
+
attempt += 1;
|
|
203
|
+
// Exponential backoff: 250ms, 500ms, … (bounded by attempt count).
|
|
204
|
+
await sleep(250 * 2 ** (attempt - 1));
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
/** Dispatch a single attempt to the chosen provider's adapter. */
|
|
209
|
+
function runOneProvider(provider, req) {
|
|
210
|
+
const { kind, cfg } = req;
|
|
211
|
+
// Pi path (the general image/audio route — any provider with an image-capable
|
|
212
|
+
// model). Resolves its own per-model provider key inside the adapter.
|
|
213
|
+
if (provider === "pi") {
|
|
214
|
+
return runPi({
|
|
215
|
+
kind,
|
|
216
|
+
bytes: req.bytes,
|
|
217
|
+
mimeType: req.mimeType,
|
|
218
|
+
cfg,
|
|
219
|
+
...(req.prompt !== undefined ? { prompt: req.prompt } : {}),
|
|
220
|
+
...(req.signal !== undefined ? { signal: req.signal } : {}),
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
const model = req.model ?? cfg.defaultModels?.[kind];
|
|
224
|
+
const apiKey = cfg.resolveKey(provider);
|
|
225
|
+
if (provider === "google") {
|
|
226
|
+
return runGemini({
|
|
227
|
+
kind,
|
|
228
|
+
bytes: req.bytes,
|
|
229
|
+
mimeType: req.mimeType,
|
|
230
|
+
apiKey,
|
|
231
|
+
...(req.prompt !== undefined ? { prompt: req.prompt } : {}),
|
|
232
|
+
...(model !== undefined ? { model } : {}),
|
|
233
|
+
...(req.maxTokens !== undefined ? { maxTokens: req.maxTokens } : {}),
|
|
234
|
+
...(cfg.geminiBaseUrl !== undefined ? { baseUrl: cfg.geminiBaseUrl } : {}),
|
|
235
|
+
...(req.fetchImpl !== undefined ? { fetchFn: req.fetchImpl } : {}),
|
|
236
|
+
...(req.signal !== undefined ? { signal: req.signal } : {}),
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
// anthropic
|
|
240
|
+
return runAnthropic({
|
|
241
|
+
kind,
|
|
242
|
+
bytes: req.bytes,
|
|
243
|
+
mimeType: req.mimeType,
|
|
244
|
+
apiKey,
|
|
245
|
+
...(req.prompt !== undefined ? { prompt: req.prompt } : {}),
|
|
246
|
+
...(model !== undefined ? { model } : {}),
|
|
247
|
+
...(req.maxTokens !== undefined ? { maxTokens: req.maxTokens } : {}),
|
|
248
|
+
...(cfg.anthropicBaseUrl !== undefined ? { baseUrl: cfg.anthropicBaseUrl } : {}),
|
|
249
|
+
...(req.fetchImpl !== undefined ? { fetchFn: req.fetchImpl } : {}),
|
|
250
|
+
...(req.signal !== undefined ? { signal: req.signal } : {}),
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
/** Default inter-retry sleep (unref'd so it never holds the event loop open). */
|
|
254
|
+
const defaultSleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms).unref?.());
|
|
255
|
+
/**
|
|
256
|
+
* A provider error is RETRYABLE (worth a retry / a fallover) when it is a rate
|
|
257
|
+
* limit (429) or a server/transport error (5xx, or a transport throw with no
|
|
258
|
+
* status). A 4xx other than 429 (bad request, auth) is NOT retryable — retrying
|
|
259
|
+
* or falling over wouldn't help and would waste calls. `Unavailable` errors are
|
|
260
|
+
* retryable in the fallover sense (the NEXT provider might serve the kind).
|
|
261
|
+
*/
|
|
262
|
+
export function isRetryableError(err) {
|
|
263
|
+
if (err instanceof MediaUnderstandingUnavailableError)
|
|
264
|
+
return true;
|
|
265
|
+
if (err instanceof MediaUnderstandingProviderError) {
|
|
266
|
+
const status = err.status;
|
|
267
|
+
if (status === undefined)
|
|
268
|
+
return true; // transport throw — no HTTP status
|
|
269
|
+
if (status === 429)
|
|
270
|
+
return true;
|
|
271
|
+
return status >= 500 && status <= 599;
|
|
272
|
+
}
|
|
273
|
+
return false;
|
|
274
|
+
}
|
|
275
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/agents/media-understanding/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChD,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACxD,OAAO,EACN,+BAA+B,EAC/B,kCAAkC,GAMlC,MAAM,YAAY,CAAC;AAEpB,OAAO,EACN,+BAA+B,EAC/B,kCAAkC,GASlC,MAAM,YAAY,CAAC;AACpB,OAAO,EAAE,qBAAqB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AACrF,OAAO,EAAE,uBAAuB,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AAC7F,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,iBAAiB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAE9F;;;;;;;;;GASG;AACH,MAAM,UAAU,GAAmE;IAClF,KAAK,EAAE,CAAC,QAAQ,CAAC;IACjB,GAAG,EAAE,CAAC,WAAW,EAAE,QAAQ,CAAC;IAC5B,4EAA4E;IAC5E,2EAA2E;IAC3E,6EAA6E;IAC7E,0DAA0D;IAC1D,KAAK,EAAE,CAAC,WAAW,EAAE,QAAQ,EAAE,IAAI,CAAC;IACpC,8EAA8E;IAC9E,kEAAkE;IAClE,KAAK,EAAE,CAAC,QAAQ,CAAC;CACjB,CAAC;AAEF;;;;;;GAMG;AACH,SAAS,MAAM,CACd,GAA6B,EAC7B,QAAsC,EACtC,IAA4B;IAE5B,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,OAAO,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC;IAC3C,CAAC;IACD,IAAI,CAAC;QACJ,OAAO,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC1C,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAC;IACd,CAAC;AACF,CAAC;AAED,oFAAoF;AACpF,SAAS,kBAAkB,CAAC,IAA4B;IACvD,QAAQ,IAAI,EAAE,CAAC;QACd,KAAK,OAAO;YACX,OAAO,CACN,qDAAqD;gBACrD,uEAAuE,CACvE,CAAC;QACH,KAAK,KAAK;YACT,OAAO,CACN,gFAAgF;gBAChF,kFAAkF,CAClF,CAAC;QACH,KAAK,OAAO;YACX,OAAO,CACN,iGAAiG;gBACjG,uEAAuE,CACvE,CAAC;QACH,KAAK,OAAO,CAAC;QACb;YACC,OAAO,CACN,8FAA8F;gBAC9F,6FAA6F;gBAC7F,iCAAiC,CACjC,CAAC;IACJ,CAAC;AACF,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,iCAAiC,CAChD,IAA4B,EAC5B,GAA6B;IAE7B,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACvC,4EAA4E;IAC5E,MAAM,MAAM,GAAG,GAAG,CAAC,iBAAiB,EAAE,CAAC,IAAI,CAAC,CAAC;IAC7C,IAAI,MAAM,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC;QAAE,OAAO,MAAM,CAAC;IACnF,KAAK,MAAM,QAAQ,IAAI,OAAO,EAAE,CAAC;QAChC,IAAI,MAAM,CAAC,GAAG,EAAE,QAAQ,EAAE,IAAI,CAAC;YAAE,OAAO,QAAQ,CAAC;IAClD,CAAC;IACD,OAAO,SAAS,CAAC;AAClB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAC1C,GAAiC;IAEjC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,GAAG,CAAC;IAC1B,8EAA8E;IAC9E,IAAI,QAAkD,CAAC;IACvD,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;QAClB,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACrC,MAAM,IAAI,kCAAkC,CAC3C,IAAI,EACJ,aAAa,GAAG,CAAC,QAAQ,mBAAmB,IAAI,cAAc,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,CAC7F,CAAC;QACH,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,EAAE,CAAC;YACtC,MAAM,IAAI,kCAAkC,CAC3C,IAAI,EACJ,aAAa,GAAG,CAAC,QAAQ,gCAAgC,kBAAkB,CAAC,IAAI,CAAC,EAAE,CACnF,CAAC;QACH,CAAC;QACD,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;IACzB,CAAC;SAAM,CAAC;QACP,QAAQ,GAAG,iCAAiC,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzD,CAAC;IACD,IAAI,CAAC,QAAQ,EAAE,CAAC;QACf,MAAM,IAAI,kCAAkC,CAAC,IAAI,EAAE,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC;IAC9E,CAAC;IAED,6EAA6E;IAC7E,+EAA+E;IAC/E,+EAA+E;IAC/E,gFAAgF;IAChF,+DAA+D;IAC/D,MAAM,KAAK,GAAmC,GAAG,CAAC,QAAQ;QACzD,CAAC,CAAC,CAAC,QAAQ,CAAC;QACZ,CAAC,CAAC,qBAAqB,CAAC,IAAI,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC;IAE9C,IAAI,SAAkB,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAiC,CAAC;QAC3D,IAAI,CAAC;YACJ,OAAO,MAAM,uBAAuB,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QACtD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,SAAS,GAAG,GAAG,CAAC;YAChB,yEAAyE;YACzE,sEAAsE;YACtE,sEAAsE;YACtE,iCAAiC;YACjC,MAAM,MAAM,GAAG,CAAC,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;YACtC,IAAI,MAAM;gBAAE,MAAM,GAAG,CAAC;YACtB,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC;gBAAE,MAAM,GAAG,CAAC;YACtC,4CAA4C;QAC7C,CAAC;IACF,CAAC;IACD,0EAA0E;IAC1E,4DAA4D;IAC5D,MAAM,SAAS,IAAI,IAAI,kCAAkC,CAAC,IAAI,EAAE,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC;AAC3F,CAAC;AAED;;;;GAIG;AACH,SAAS,qBAAqB,CAC7B,IAA4B,EAC5B,GAA6B,EAC7B,KAAmC;IAEnC,MAAM,OAAO,GAAmC,EAAE,CAAC;IACnD,MAAM,IAAI,GAAG,CAAC,CAA+B,EAAE,EAAE;QAChD,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,GAAG,EAAE,CAAC,EAAE,IAAI,CAAC;YAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnE,CAAC,CAAC;IACF,IAAI,CAAC,KAAK,CAAC,CAAC;IACZ,KAAK,MAAM,CAAC,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,CAAC,CAAC,CAAC;IAChD,OAAO,OAAO,CAAC;AAChB,CAAC;AAED,qFAAqF;AACrF,KAAK,UAAU,uBAAuB,CACrC,QAAsC,EACtC,GAAiC;IAEjC,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IACpD,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,IAAI,YAAY,CAAC;IAC1C,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,SAAS,CAAC;QACT,IAAI,CAAC;YACJ,OAAO,MAAM,cAAc,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACd,IAAI,OAAO,IAAI,UAAU,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC;gBAAE,MAAM,GAAG,CAAC;YAC/D,OAAO,IAAI,CAAC,CAAC;YACb,mEAAmE;YACnE,MAAM,KAAK,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;IACF,CAAC;AACF,CAAC;AAED,kEAAkE;AAClE,SAAS,cAAc,CACtB,QAAsC,EACtC,GAAiC;IAEjC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,GAAG,CAAC;IAC1B,8EAA8E;IAC9E,sEAAsE;IACtE,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,KAAK,CAAC;YACZ,IAAI;YACJ,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,GAAG;YACH,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC3D,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC,aAAa,EAAE,CAAC,IAAI,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;IAExC,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAC3B,OAAO,SAAS,CAAC;YAChB,IAAI;YACJ,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,MAAM;YACN,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,GAAG,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACzC,GAAG,CAAC,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpE,GAAG,CAAC,GAAG,CAAC,aAAa,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC1E,GAAG,CAAC,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAClE,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC3D,CAAC,CAAC;IACJ,CAAC;IACD,YAAY;IACZ,OAAO,YAAY,CAAC;QACnB,IAAI;QACJ,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,MAAM;QACN,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC3D,GAAG,CAAC,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACzC,GAAG,CAAC,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpE,GAAG,CAAC,GAAG,CAAC,gBAAgB,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAChF,GAAG,CAAC,GAAG,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAClE,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC3D,CAAC,CAAC;AACJ,CAAC;AAED,iFAAiF;AACjF,MAAM,YAAY,GAAG,CAAC,EAAU,EAAiB,EAAE,CAClD,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;AAE7D;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAY;IAC5C,IAAI,GAAG,YAAY,kCAAkC;QAAE,OAAO,IAAI,CAAC;IACnE,IAAI,GAAG,YAAY,+BAA+B,EAAE,CAAC;QACpD,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;QAC1B,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,IAAI,CAAC,CAAC,mCAAmC;QAC1E,IAAI,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;QAChC,OAAO,MAAM,IAAI,GAAG,IAAI,MAAM,IAAI,GAAG,CAAC;IACvC,CAAC;IACD,OAAO,KAAK,CAAC;AACd,CAAC"}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pi-SDK media-understanding adapter — the GENERAL image path.
|
|
3
|
+
*
|
|
4
|
+
* The bespoke google/anthropic REST adapters only cover two providers, so an
|
|
5
|
+
* operator whose only key is OpenAI / Groq / Mistral / OpenRouter / xAI /
|
|
6
|
+
* Ollama would get NO image understanding. This adapter closes that gap by
|
|
7
|
+
* running a ONE-SHOT completion through the Pi SDK (`completeSimple` from
|
|
8
|
+
* `@earendil-works/pi-ai`): it resolves an image-capable `Model` for the
|
|
9
|
+
* provider, ships the media as an `ImageContent` block + the prompt, and
|
|
10
|
+
* returns the assistant's TEXT — exactly the same shape the REST adapters
|
|
11
|
+
* return. Pi's content model is text + image, so this covers IMAGE for every
|
|
12
|
+
* provider that declares image input.
|
|
13
|
+
*
|
|
14
|
+
* AUDIO is NOT served here. Pi's `Model.input` is `("text"|"image")[]` — there
|
|
15
|
+
* is no audio modality anywhere in the SDK content model — so a voice note
|
|
16
|
+
* routed through this path would be packed into an IMAGE block and rejected by
|
|
17
|
+
* the provider (HTTP 400). Audio understanding is Gemini-only (its real inline-
|
|
18
|
+
* audio capability); the audio chain in `index.ts` excludes `pi` accordingly.
|
|
19
|
+
*
|
|
20
|
+
* VIDEO (Gemini Files API) and native PDF (Anthropic `document` block) are NOT
|
|
21
|
+
* served here either — Pi has no video or document content block — so those
|
|
22
|
+
* stay on the bespoke adapters.
|
|
23
|
+
*
|
|
24
|
+
* The actual model call is injected as `cfg.piComplete` (defaulting to the
|
|
25
|
+
* `completeSimple` wrapper below) so the whole subsystem stays testable with
|
|
26
|
+
* zero real model traffic.
|
|
27
|
+
*/
|
|
28
|
+
import { type MediaUnderstandingConfig, type MediaUnderstandingKind, type MediaUnderstandingModel, type PiCompleteRequest, type RunMediaUnderstandingResult } from "./types.js";
|
|
29
|
+
/** True when a resolved model declares image input. */
|
|
30
|
+
export declare function modelAcceptsImage(model: MediaUnderstandingModel | undefined): boolean;
|
|
31
|
+
/**
|
|
32
|
+
* Pick the provider + image-capable model the Pi path should use.
|
|
33
|
+
* • An explicit `provider` override is tried first (must resolve a model).
|
|
34
|
+
* • Otherwise walk `cfg.listKeyedProviders()` (most-preferred first) and pick
|
|
35
|
+
* the first provider whose `cfg.resolveModel` yields an image-capable model.
|
|
36
|
+
* Returns `undefined` when the Pi path is unwired (no `resolveModel`) or no
|
|
37
|
+
* keyed provider has an image-capable model.
|
|
38
|
+
*
|
|
39
|
+
* The Pi path is image-only (Pi carries no audio block), so a resolved model
|
|
40
|
+
* MUST declare image input regardless of `kind` — there is no audio escape
|
|
41
|
+
* hatch. Audio never reaches here (the audio chain in `index.ts` excludes
|
|
42
|
+
* `pi`); the image gate below is the defensive backstop.
|
|
43
|
+
*/
|
|
44
|
+
export declare function resolvePiModel(kind: MediaUnderstandingKind, cfg: MediaUnderstandingConfig, providerOverride?: string): MediaUnderstandingModel | undefined;
|
|
45
|
+
export interface RunPiParams {
|
|
46
|
+
kind: MediaUnderstandingKind;
|
|
47
|
+
bytes: Buffer;
|
|
48
|
+
mimeType: string;
|
|
49
|
+
cfg: MediaUnderstandingConfig;
|
|
50
|
+
prompt?: string;
|
|
51
|
+
/** Explicit provider override (else the first keyed provider with a model). */
|
|
52
|
+
provider?: string;
|
|
53
|
+
/** Pre-resolved model (selection already done by the caller). */
|
|
54
|
+
model?: MediaUnderstandingModel;
|
|
55
|
+
signal?: AbortSignal;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Run a one-shot image/audio understanding via the Pi SDK. Resolves a model
|
|
59
|
+
* (unless one was passed), resolves its provider key, and calls `cfg.piComplete`
|
|
60
|
+
* (the `completeSimple` wrapper by default). Throws
|
|
61
|
+
* `MediaUnderstandingUnavailableError` when no capable model resolves, or
|
|
62
|
+
* `MediaUnderstandingProviderError` when the model call fails / returns empty.
|
|
63
|
+
*/
|
|
64
|
+
export declare function runPi(params: RunPiParams): Promise<RunMediaUnderstandingResult>;
|
|
65
|
+
/**
|
|
66
|
+
* Default `PiCompleteFn` — lazily imports `completeSimple` from
|
|
67
|
+
* `@earendil-works/pi-ai` and runs a single user turn carrying the media as an
|
|
68
|
+
* image content block + the prompt. Lazy import keeps the SDK off the
|
|
69
|
+
* subsystem's cold-start path and lets tests inject a stub without loading it.
|
|
70
|
+
*/
|
|
71
|
+
export declare function defaultPiComplete(req: PiCompleteRequest): Promise<string>;
|
|
72
|
+
//# sourceMappingURL=pi-adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pi-adapter.d.ts","sourceRoot":"","sources":["../../../src/agents/media-understanding/pi-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAGN,KAAK,wBAAwB,EAC7B,KAAK,sBAAsB,EAC3B,KAAK,uBAAuB,EAC5B,KAAK,iBAAiB,EACtB,KAAK,2BAA2B,EAChC,MAAM,YAAY,CAAC;AAOpB,uDAAuD;AACvD,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,uBAAuB,GAAG,SAAS,GAAG,OAAO,CAErF;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,cAAc,CAC7B,IAAI,EAAE,sBAAsB,EAC5B,GAAG,EAAE,wBAAwB,EAC7B,gBAAgB,CAAC,EAAE,MAAM,GACvB,uBAAuB,GAAG,SAAS,CAwBrC;AAED,MAAM,WAAW,WAAW;IAC3B,IAAI,EAAE,sBAAsB,CAAC;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,GAAG,EAAE,wBAAwB,CAAC;IAC9B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,+EAA+E;IAC/E,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,KAAK,CAAC,EAAE,uBAAuB,CAAC;IAChC,MAAM,CAAC,EAAE,WAAW,CAAC;CACrB;AAED;;;;;;GAMG;AACH,wBAAsB,KAAK,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO,CAAC,2BAA2B,CAAC,CAiDrF;AAED;;;;;GAKG;AACH,wBAAsB,iBAAiB,CAAC,GAAG,EAAE,iBAAiB,GAAG,OAAO,CAAC,MAAM,CAAC,CA2B/E"}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pi-SDK media-understanding adapter — the GENERAL image path.
|
|
3
|
+
*
|
|
4
|
+
* The bespoke google/anthropic REST adapters only cover two providers, so an
|
|
5
|
+
* operator whose only key is OpenAI / Groq / Mistral / OpenRouter / xAI /
|
|
6
|
+
* Ollama would get NO image understanding. This adapter closes that gap by
|
|
7
|
+
* running a ONE-SHOT completion through the Pi SDK (`completeSimple` from
|
|
8
|
+
* `@earendil-works/pi-ai`): it resolves an image-capable `Model` for the
|
|
9
|
+
* provider, ships the media as an `ImageContent` block + the prompt, and
|
|
10
|
+
* returns the assistant's TEXT — exactly the same shape the REST adapters
|
|
11
|
+
* return. Pi's content model is text + image, so this covers IMAGE for every
|
|
12
|
+
* provider that declares image input.
|
|
13
|
+
*
|
|
14
|
+
* AUDIO is NOT served here. Pi's `Model.input` is `("text"|"image")[]` — there
|
|
15
|
+
* is no audio modality anywhere in the SDK content model — so a voice note
|
|
16
|
+
* routed through this path would be packed into an IMAGE block and rejected by
|
|
17
|
+
* the provider (HTTP 400). Audio understanding is Gemini-only (its real inline-
|
|
18
|
+
* audio capability); the audio chain in `index.ts` excludes `pi` accordingly.
|
|
19
|
+
*
|
|
20
|
+
* VIDEO (Gemini Files API) and native PDF (Anthropic `document` block) are NOT
|
|
21
|
+
* served here either — Pi has no video or document content block — so those
|
|
22
|
+
* stay on the bespoke adapters.
|
|
23
|
+
*
|
|
24
|
+
* The actual model call is injected as `cfg.piComplete` (defaulting to the
|
|
25
|
+
* `completeSimple` wrapper below) so the whole subsystem stays testable with
|
|
26
|
+
* zero real model traffic.
|
|
27
|
+
*/
|
|
28
|
+
import { MediaUnderstandingProviderError, MediaUnderstandingUnavailableError, } from "./types.js";
|
|
29
|
+
/** Per-kind default instruction when the caller supplies no prompt. */
|
|
30
|
+
const DEFAULT_PROMPTS = {
|
|
31
|
+
image: "Describe this image in detail.",
|
|
32
|
+
};
|
|
33
|
+
/** True when a resolved model declares image input. */
|
|
34
|
+
export function modelAcceptsImage(model) {
|
|
35
|
+
return Array.isArray(model?.input) && model.input.includes("image");
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Pick the provider + image-capable model the Pi path should use.
|
|
39
|
+
* • An explicit `provider` override is tried first (must resolve a model).
|
|
40
|
+
* • Otherwise walk `cfg.listKeyedProviders()` (most-preferred first) and pick
|
|
41
|
+
* the first provider whose `cfg.resolveModel` yields an image-capable model.
|
|
42
|
+
* Returns `undefined` when the Pi path is unwired (no `resolveModel`) or no
|
|
43
|
+
* keyed provider has an image-capable model.
|
|
44
|
+
*
|
|
45
|
+
* The Pi path is image-only (Pi carries no audio block), so a resolved model
|
|
46
|
+
* MUST declare image input regardless of `kind` — there is no audio escape
|
|
47
|
+
* hatch. Audio never reaches here (the audio chain in `index.ts` excludes
|
|
48
|
+
* `pi`); the image gate below is the defensive backstop.
|
|
49
|
+
*/
|
|
50
|
+
export function resolvePiModel(kind, cfg, providerOverride) {
|
|
51
|
+
if (typeof cfg.resolveModel !== "function")
|
|
52
|
+
return undefined;
|
|
53
|
+
const tryProvider = (provider) => {
|
|
54
|
+
let model;
|
|
55
|
+
try {
|
|
56
|
+
model = cfg.resolveModel(provider, kind);
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
return undefined;
|
|
60
|
+
}
|
|
61
|
+
// The Pi path can only carry an IMAGE block, so require image input for
|
|
62
|
+
// every kind — a non-image model (or an audio request) cannot be served.
|
|
63
|
+
if (!model)
|
|
64
|
+
return undefined;
|
|
65
|
+
return modelAcceptsImage(model) ? model : undefined;
|
|
66
|
+
};
|
|
67
|
+
if (providerOverride)
|
|
68
|
+
return tryProvider(providerOverride);
|
|
69
|
+
const candidates = typeof cfg.listKeyedProviders === "function" ? cfg.listKeyedProviders() : [];
|
|
70
|
+
for (const provider of candidates) {
|
|
71
|
+
const model = tryProvider(provider);
|
|
72
|
+
if (model)
|
|
73
|
+
return model;
|
|
74
|
+
}
|
|
75
|
+
return undefined;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Run a one-shot image/audio understanding via the Pi SDK. Resolves a model
|
|
79
|
+
* (unless one was passed), resolves its provider key, and calls `cfg.piComplete`
|
|
80
|
+
* (the `completeSimple` wrapper by default). Throws
|
|
81
|
+
* `MediaUnderstandingUnavailableError` when no capable model resolves, or
|
|
82
|
+
* `MediaUnderstandingProviderError` when the model call fails / returns empty.
|
|
83
|
+
*/
|
|
84
|
+
export async function runPi(params) {
|
|
85
|
+
const model = params.model ?? resolvePiModel(params.kind, params.cfg, params.provider);
|
|
86
|
+
if (!model) {
|
|
87
|
+
throw new MediaUnderstandingUnavailableError(params.kind, `No image-capable model is configured for the Pi path (${params.kind}).`);
|
|
88
|
+
}
|
|
89
|
+
const complete = params.cfg.piComplete ?? defaultPiComplete;
|
|
90
|
+
// Resolve the provider key (keyless local providers like Ollama legitimately
|
|
91
|
+
// return ""); pass it through so `completeSimple` authenticates.
|
|
92
|
+
let apiKey = "";
|
|
93
|
+
try {
|
|
94
|
+
apiKey = params.cfg.resolveKey(model.provider) || "";
|
|
95
|
+
}
|
|
96
|
+
catch {
|
|
97
|
+
apiKey = "";
|
|
98
|
+
}
|
|
99
|
+
const prompt = params.prompt?.trim() || DEFAULT_PROMPTS[params.kind] || "Describe the attached media.";
|
|
100
|
+
const req = {
|
|
101
|
+
model,
|
|
102
|
+
bytes: params.bytes,
|
|
103
|
+
mimeType: params.mimeType,
|
|
104
|
+
prompt,
|
|
105
|
+
apiKey,
|
|
106
|
+
...(params.signal ? { signal: params.signal } : {}),
|
|
107
|
+
};
|
|
108
|
+
let text;
|
|
109
|
+
try {
|
|
110
|
+
text = await complete(req);
|
|
111
|
+
}
|
|
112
|
+
catch (err) {
|
|
113
|
+
if (err instanceof MediaUnderstandingProviderError ||
|
|
114
|
+
err instanceof MediaUnderstandingUnavailableError) {
|
|
115
|
+
throw err;
|
|
116
|
+
}
|
|
117
|
+
throw new MediaUnderstandingProviderError("pi", `Pi model call failed (${model.provider}/${model.id}): ${err instanceof Error ? err.message : String(err)}`);
|
|
118
|
+
}
|
|
119
|
+
if (!text || !text.trim()) {
|
|
120
|
+
throw new MediaUnderstandingProviderError("pi", `Model ${model.provider}/${model.id} returned no text for the ${params.kind}.`);
|
|
121
|
+
}
|
|
122
|
+
return { text: text.trim(), provider: "pi", model: `${model.provider}/${model.id}` };
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Default `PiCompleteFn` — lazily imports `completeSimple` from
|
|
126
|
+
* `@earendil-works/pi-ai` and runs a single user turn carrying the media as an
|
|
127
|
+
* image content block + the prompt. Lazy import keeps the SDK off the
|
|
128
|
+
* subsystem's cold-start path and lets tests inject a stub without loading it.
|
|
129
|
+
*/
|
|
130
|
+
export async function defaultPiComplete(req) {
|
|
131
|
+
const { completeSimple } = await import("@earendil-works/pi-ai");
|
|
132
|
+
const context = {
|
|
133
|
+
messages: [
|
|
134
|
+
{
|
|
135
|
+
role: "user",
|
|
136
|
+
content: [
|
|
137
|
+
// Pi's ImageContent: raw base64 (no data: prefix) + mimeType. Audio
|
|
138
|
+
// rides the same inline block on providers whose models accept it.
|
|
139
|
+
{ type: "image", data: req.bytes.toString("base64"), mimeType: req.mimeType },
|
|
140
|
+
{ type: "text", text: req.prompt },
|
|
141
|
+
],
|
|
142
|
+
timestamp: Date.now(),
|
|
143
|
+
},
|
|
144
|
+
],
|
|
145
|
+
};
|
|
146
|
+
const options = {};
|
|
147
|
+
if (req.apiKey)
|
|
148
|
+
options.apiKey = req.apiKey;
|
|
149
|
+
if (req.signal)
|
|
150
|
+
options.signal = req.signal;
|
|
151
|
+
const result = await completeSimple(req.model, context, options);
|
|
152
|
+
const content = result?.content ?? [];
|
|
153
|
+
return content
|
|
154
|
+
.filter((b) => b?.type === "text" && typeof b.text === "string")
|
|
155
|
+
.map((b) => (b.text ?? "").trim())
|
|
156
|
+
.filter(Boolean)
|
|
157
|
+
.join("\n")
|
|
158
|
+
.trim();
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=pi-adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pi-adapter.js","sourceRoot":"","sources":["../../../src/agents/media-understanding/pi-adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EACN,+BAA+B,EAC/B,kCAAkC,GAMlC,MAAM,YAAY,CAAC;AAEpB,uEAAuE;AACvE,MAAM,eAAe,GAAoD;IACxE,KAAK,EAAE,gCAAgC;CACvC,CAAC;AAEF,uDAAuD;AACvD,MAAM,UAAU,iBAAiB,CAAC,KAA0C;IAC3E,OAAO,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,KAAM,CAAC,KAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;AACvE,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,cAAc,CAC7B,IAA4B,EAC5B,GAA6B,EAC7B,gBAAyB;IAEzB,IAAI,OAAO,GAAG,CAAC,YAAY,KAAK,UAAU;QAAE,OAAO,SAAS,CAAC;IAC7D,MAAM,WAAW,GAAG,CAAC,QAA4B,EAAuC,EAAE;QACzF,IAAI,KAA0C,CAAC;QAC/C,IAAI,CAAC;YACJ,KAAK,GAAG,GAAG,CAAC,YAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAC3C,CAAC;QAAC,MAAM,CAAC;YACR,OAAO,SAAS,CAAC;QAClB,CAAC;QACD,wEAAwE;QACxE,yEAAyE;QACzE,IAAI,CAAC,KAAK;YAAE,OAAO,SAAS,CAAC;QAC7B,OAAO,iBAAiB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;IACrD,CAAC,CAAC;IAEF,IAAI,gBAAgB;QAAE,OAAO,WAAW,CAAC,gBAAgB,CAAC,CAAC;IAE3D,MAAM,UAAU,GACf,OAAO,GAAG,CAAC,kBAAkB,KAAK,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9E,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;QACpC,IAAI,KAAK;YAAE,OAAO,KAAK,CAAC;IACzB,CAAC;IACD,OAAO,SAAS,CAAC;AAClB,CAAC;AAeD;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,MAAmB;IAC9C,MAAM,KAAK,GACV,MAAM,CAAC,KAAK,IAAI,cAAc,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAC1E,IAAI,CAAC,KAAK,EAAE,CAAC;QACZ,MAAM,IAAI,kCAAkC,CAC3C,MAAM,CAAC,IAAI,EACX,yDAAyD,MAAM,CAAC,IAAI,IAAI,CACxE,CAAC;IACH,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,IAAI,iBAAiB,CAAC;IAC5D,6EAA6E;IAC7E,iEAAiE;IACjE,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,CAAC;QACJ,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtD,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,GAAG,EAAE,CAAC;IACb,CAAC;IACD,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,eAAe,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,8BAA8B,CAAC;IACvG,MAAM,GAAG,GAAsB;QAC9B,KAAK;QACL,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,MAAM;QACN,MAAM;QACN,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACnD,CAAC;IACF,IAAI,IAAY,CAAC;IACjB,IAAI,CAAC;QACJ,IAAI,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACd,IACC,GAAG,YAAY,+BAA+B;YAC9C,GAAG,YAAY,kCAAkC,EAChD,CAAC;YACF,MAAM,GAAG,CAAC;QACX,CAAC;QACD,MAAM,IAAI,+BAA+B,CACxC,IAAI,EACJ,yBAAyB,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,EAAE,MAAM,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAC3G,CAAC;IACH,CAAC;IACD,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;QAC3B,MAAM,IAAI,+BAA+B,CACxC,IAAI,EACJ,SAAS,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,EAAE,6BAA6B,MAAM,CAAC,IAAI,GAAG,CAC9E,CAAC;IACH,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,EAAE,EAAE,EAAE,CAAC;AACtF,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,GAAsB;IAC7D,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG;QACf,QAAQ,EAAE;YACT;gBACC,IAAI,EAAE,MAAe;gBACrB,OAAO,EAAE;oBACR,oEAAoE;oBACpE,mEAAmE;oBACnE,EAAE,IAAI,EAAE,OAAgB,EAAE,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE;oBACtF,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,GAAG,CAAC,MAAM,EAAE;iBAC3C;gBACD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACrB;SACD;KACD,CAAC;IACF,MAAM,OAAO,GAA4B,EAAE,CAAC;IAC5C,IAAI,GAAG,CAAC,MAAM;QAAE,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;IAC5C,IAAI,GAAG,CAAC,MAAM;QAAE,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;IAC5C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,GAAG,CAAC,KAAc,EAAE,OAAgB,EAAE,OAAgB,CAAC,CAAC;IAC5F,MAAM,OAAO,GAAI,MAAgE,EAAE,OAAO,IAAI,EAAE,CAAC;IACjG,OAAO,OAAO;SACZ,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC;SAC/D,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;SACjC,MAAM,CAAC,OAAO,CAAC;SACf,IAAI,CAAC,IAAI,CAAC;SACV,IAAI,EAAE,CAAC;AACV,CAAC"}
|