vidspotai-shared 1.0.83 → 1.0.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;IA+B1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAiJ3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAyG3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BvI"}
1
+ {"version":3,"file":"google.service.d.ts","sourceRoot":"","sources":["../../../../../src/services/aiGen/providers/google/google.service.ts"],"names":[],"mappings":"AAiBA,OAAO,EAAE,wBAAwB,EAAE,MAAM,8BAA8B,CAAC;AACxE,OAAO,EACL,iBAAiB,EACjB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,oBAAoB,EACpB,oBAAoB,EACpB,qBAAqB,EACrB,qBAAqB,EACrB,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,UAAU,CAAC;AAelB,qBAAa,aAAc,SAAQ,wBAAwB;IAKzD,OAAO,CAAC,EAAE,CAAc;IACxB,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,kBAAkB,CAAK;;IAQ/C;;;;;;;;;OASG;IACG,YAAY,CAChB,MAAM,EAAE,oBAAoB,GAC3B,OAAO,CAAC,oBAAoB,CAAC;IAoChC;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;OAIG;YACW,kBAAkB;IA+B1B,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAiJ3B,gBAAgB,CAAC,EACrB,IAAI,EACJ,cAAc,EACd,cAAyB,GAC1B,EAAE,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IAiH3C,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;YAiBnB,cAAc;IAwG5B;;;;;;OAMG;IACH;;;;OAIG;IACG,aAAa,CACjB,MAAM,EAAE,qBAAqB,GAC5B,OAAO,CAAC,qBAAqB,CAAC;IAIjC,aAAa,CAAC,EAAE,QAAQ,EAAE,QAAY,EAAE,UAAmB,EAAE,SAAiB,EAAE,SAAa,EAAE,SAAS,EAAE,EAAE,iBAAiB,GAAG,MAAM;CA8BvI"}
@@ -303,7 +303,15 @@ class GoogleService extends baseAiGenProvider_service_1.BaseAiGenProviderService
303
303
  const localPath = `${outputFilename}.mp4`; // use /tmp for cloud functions
304
304
  const filePath = `${outputFilePath}/${outputFilename}.mp4`;
305
305
  const file = (0, firebase_1.getBucket)().file(filePath);
306
- await this.withTransientRetry("files.download", () => this.ai.files.download({
306
+ // Download with the SAME key that submitted the task. The Veo output is
307
+ // stored via the Files API in the issuing key's GCP project, so it is
308
+ // project-scoped exactly like the operation — the default `this.ai`
309
+ // (legacy GOOGLE_API_KEY) client can only read files from its own
310
+ // project. With the multi-key pool, the drain-first key (k0) lives in a
311
+ // separate project, so downloading its videos via `this.ai` would 403/404.
312
+ // Use the re-selected `client` (falls back to `this.ai` for un-tagged
313
+ // legacy tasks, which were submitted on GOOGLE_API_KEY anyway).
314
+ await this.withTransientRetry("files.download", () => client.files.download({
307
315
  file: video,
308
316
  downloadPath: localPath,
309
317
  }));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidspotai-shared",
3
- "version": "1.0.83",
3
+ "version": "1.0.85",
4
4
  "main": "lib/index.js",
5
5
  "types": "lib/index.d.ts",
6
6
  "exports": {
@@ -1,141 +0,0 @@
1
- import { PlannedScene, VideoPlan } from "../../schemas/videoPlan.schema";
2
- import { ToolContext, ToolOutcome } from "./toolRegistry";
3
- import { ProviderTaskCache, ProviderTaskCacheEntry } from "./providerTaskCache";
4
- export type RunToolFn = <O = unknown>(name: string, input: unknown, ctx: ToolContext) => Promise<ToolOutcome<O>>;
5
- /**
6
- * Executor — turns an approved VideoPlan into per-scene asset results.
7
- *
8
- * Plan-then-execute split (not free-form ReAct): the executor does NOT pick
9
- * tools. The plan dictates which tool runs per scene; the executor only does
10
- * the dispatch + per-scene parallelism + idempotency-key generation.
11
- *
12
- * Returns a SceneOutcome per scene, leaving composition (compose_scene),
13
- * critique (Critic.review), and render to the orchestrator above.
14
- */
15
- export interface SceneAssetResult {
16
- /** Concrete asset URL for the visual track (image or video). */
17
- visualUrl?: string;
18
- /** Voiceover audio (base64 or url depending on host policy). */
19
- voiceoverAudioBase64?: string;
20
- /**
21
- * Resolved voiceover URL once the host has uploaded the base64 buffer to
22
- * its asset store (GCS / Firebase Storage). The agent.controller flow
23
- * sets this in `onSceneComplete` so `planToProject` can render a real VO
24
- * clip on `track-vo`. When unset, planToProject skips the VO clip.
25
- */
26
- voiceoverUrl?: string;
27
- /** Provider task id for async generations the host still needs to poll. */
28
- pendingTaskId?: string;
29
- /** Model key the pending task was triggered on — the poller needs this to pick the provider. */
30
- pendingModelKey?: string;
31
- /** AG-22: provider chain the tool walked (in order, primary first). For trace + post-mortem. */
32
- attemptedProviders?: string[];
33
- /** Tool name used. */
34
- tool: string;
35
- /** Idempotency key the executor minted; useful for retries. */
36
- idempotencyKey: string;
37
- /**
38
- * AG-28: estimated VO duration (ms) from the wpm heuristic. Set when
39
- * voiceoverAudioBase64 is set. Used pre-upload for budget math; the
40
- * editor-side display window prefers voiceoverActualMs when available.
41
- */
42
- voiceoverEstimateMs?: number;
43
- /**
44
- * Probed actual mp3 duration (ms). Stamped by `resolveVoiceoverUrl` after
45
- * generation, before the buffer is discarded. Reflects real playback
46
- * length — required to size the editor VO clip's display window so
47
- * Remotion's `endAt` doesn't truncate emotive/punctuated deliveries that
48
- * the wpm estimate undershoots. Falls back to `voiceoverEstimateMs` when
49
- * ffprobe is unavailable.
50
- */
51
- voiceoverActualMs?: number;
52
- /**
53
- * AG-28: TTS over-budget signal. True when the line + style produces audio
54
- * longer than scene.durationMs * 1.05 even after the executor's atempo
55
- * adjustment. Stitcher should freeze-frame-pad rather than clip.
56
- */
57
- voiceoverOverBudget?: boolean;
58
- }
59
- export interface SceneOutcome {
60
- scene: PlannedScene;
61
- ok: boolean;
62
- result?: SceneAssetResult;
63
- error?: {
64
- code: string;
65
- message: string;
66
- /**
67
- * Set when the failure came from a provider-fallback-wrapped tool whose
68
- * entire chain exhausted (or terminated on auth/safety/input). Used by
69
- * the orchestrator to decide between agent re-plan / scene degrade /
70
- * hard fail.
71
- */
72
- classification?: "transient" | "rate_limit" | "capability" | "safety" | "auth" | "quota" | "input" | "unknown";
73
- needsReplan?: boolean;
74
- attemptedProviders?: string[];
75
- };
76
- durationMs: number;
77
- }
78
- /**
79
- * Inspect a failed scene to decide the next orchestration step.
80
- *
81
- * - "replan" : agent should re-plan the scene (safety / bad input)
82
- * - "degrade" : try a cheaper strategy (e.g. T2/T3 chain exhausted on quota)
83
- * - "fail" : terminal — surface to user (auth / chain-exhausted-unknown)
84
- * - "retry" : transient — caller may re-run the scene as-is later
85
- */
86
- export declare function classifySceneFailure(outcome: SceneOutcome): "replan" | "degrade" | "fail" | "retry" | null;
87
- export interface ExecutorOptions {
88
- /** Max scenes to run in parallel. Generation providers rate-limit themselves; this guards on top. */
89
- concurrency?: number;
90
- /** Inject a recording wrapper (e.g. runToolRecorded) for eval / replay. */
91
- runTool?: RunToolFn;
92
- /**
93
- * Stage 3 hook: invoked after each scene finishes (success OR failure).
94
- * Lets the orchestrator stream incremental Project updates into Firestore
95
- * so the editor + agent drawer surfaces light up as scenes complete,
96
- * rather than waiting for the whole executor to finish. Errors thrown
97
- * from the callback are swallowed (logged) so a Firestore blip can't
98
- * crash the executor.
99
- */
100
- onSceneComplete?: (sceneIndex: number, outcome: SceneOutcome) => Promise<void>;
101
- /**
102
- * Stage 7 slice 1: provider task-id cache (read side). When the upstream
103
- * inputs for animate_image / generate_video / generate_avatar_video hash
104
- * to the same value as a cached entry (and entry is within TTL), the
105
- * executor returns the cached taskId + modelKey and skips the tool call.
106
- * /execute passes the project's cache here so re-runs after downstream
107
- * (poll/stitch/critic) bugs reuse the prior $$ gens. /regenerate-scene
108
- * deliberately omits this (user wants a fresh gen) but still mints.
109
- */
110
- taskCache?: ProviderTaskCache;
111
- /**
112
- * Stage 7 slice 1: cache write side. Invoked after a successful tool call
113
- * that produced a pendingTaskId. Orchestrator persists the entry on the
114
- * AgentProject doc so the NEXT execute can replay. Errors swallowed —
115
- * cache is best-effort optimisation, not correctness.
116
- */
117
- onTaskMint?: (clipId: string, entry: ProviderTaskCacheEntry) => Promise<void>;
118
- }
119
- export declare class Executor {
120
- private readonly concurrency;
121
- private readonly runTool;
122
- private readonly onSceneComplete?;
123
- private readonly taskCache?;
124
- private readonly onTaskMint?;
125
- constructor(opts?: ExecutorOptions);
126
- /**
127
- * Stage 7 slice 1 helper. Returns cached pending-task output if the
128
- * upstream input hash matches and entry is within TTL; null otherwise.
129
- * Logs the hit so trace shows where $$ was saved.
130
- */
131
- private checkTaskCache;
132
- /**
133
- * Best-effort mint. Swallows callback errors — a Firestore blip must not
134
- * fail the scene (the tool already succeeded and produced a real taskId).
135
- */
136
- private mintTaskCache;
137
- run(plan: VideoPlan, ctx: ToolContext): Promise<SceneOutcome[]>;
138
- private runScene;
139
- private runVisual;
140
- }
141
- //# sourceMappingURL=executor.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../../src/services/agent/executor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,gCAAgC,CAAC;AAIzE,OAAO,EAA6B,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAGrF,OAAO,EAGL,iBAAiB,EACjB,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AA2D7B,MAAM,MAAM,SAAS,GAAG,CAAC,CAAC,GAAG,OAAO,EAClC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,WAAW,KACb,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;AAE7B;;;;;;;;;GASG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gEAAgE;IAChE,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;;;OAKG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,2EAA2E;IAC3E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gGAAgG;IAChG,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gGAAgG;IAChG,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,sBAAsB;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,cAAc,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;;;;;;OAOG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,YAAY,CAAC;IACpB,EAAE,EAAE,OAAO,CAAC;IACZ,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,KAAK,CAAC,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB;;;;;WAKG;QACH,cAAc,CAAC,EACX,WAAW,GACX,YAAY,GACZ,YAAY,GACZ,QAAQ,GACR,MAAM,GACN,OAAO,GACP,OAAO,GACP,SAAS,CAAC;QACd,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC/B,CAAC;IACF,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,YAAY,GACpB,QAAQ,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAUhD;AAED,MAAM,WAAW,eAAe;IAC9B,qGAAqG;IACrG,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,2EAA2E;IAC3E,OAAO,CAAC,EAAE,SAAS,CAAC;IACpB;;;;;;;OAOG;IACH,eAAe,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/E;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,iBAAiB,CAAC;IAC9B;;;;;OAKG;IACH,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,sBAAsB,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAC/E;AAiDD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAY;IACpC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAqC;IACtE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAoB;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAgC;gBAEhD,IAAI,GAAE,eAAoB;IAQtC;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAmBtB;;;OAGG;YACW,aAAa;IAerB,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;YAiCvD,QAAQ;YA4FR,SAAS;CAsVxB"}
@@ -1,561 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Executor = void 0;
4
- exports.classifySceneFailure = classifySceneFailure;
5
- const voices_1 = require("../../globals/ttsModels/voices");
6
- const logger_1 = require("../../utils/logger");
7
- const referenceImageRenderer_1 = require("./referenceImageRenderer");
8
- const toolRegistry_1 = require("./toolRegistry");
9
- const ttsDuration_1 = require("./ttsDuration");
10
- const chains_1 = require("./providerFallback/chains");
11
- const providerTaskCache_1 = require("./providerTaskCache");
12
- /**
13
- * AG-37: planners (LLMs) keep inventing voice IDs like "female-young-adult-en"
14
- * or "narrator-1" that look reasonable but don't exist in any catalog. When
15
- * those flow into ElevenLabs the API returns a 400 and the silent-fail VO
16
- * outcome path drops the VO entirely — final video ships muted. Strip
17
- * unknown IDs here so the provider falls back to its default voice instead.
18
- */
19
- const ELEVENLABS_VOICE_IDS = new Set(voices_1.ELEVENLABS_VOICES.map((v) => v.id));
20
- function sanitizeVoiceId(voiceId) {
21
- if (!voiceId)
22
- return undefined;
23
- if (ELEVENLABS_VOICE_IDS.has(voiceId))
24
- return voiceId;
25
- // Heuristic for non-ElevenLabs providers (openai = "alloy"/"echo"/..., minimax = "male-qn-qingse").
26
- // Only strip strings that look like LLM-invented descriptive labels (contain a hyphen + recognisable word).
27
- if (/^(female|male|narrator|presenter|voice)[-_]/i.test(voiceId)) {
28
- logger_1.logger.warn("executor: stripping invalid voiceId (LLM hallucination)", { voiceId });
29
- return undefined;
30
- }
31
- return voiceId;
32
- }
33
- /**
34
- * AG-31: map a brief-level voice style adjective to ElevenLabs voice_settings.
35
- * `style` is the exaggeration param (use sparingly above 0.5 — artifacts);
36
- * `stability` is the consistency knob (low = more emotive, high = monotone);
37
- * `speed` is rate (0.7-1.2). Defaults match ElevenLabs' own (style=0,
38
- * stability=0.5, speed=1.0). Tuning per persona is conservative — we'd rather
39
- * be slightly flat than artifact-laden.
40
- */
41
- function resolveVoiceSettings(style) {
42
- switch (style) {
43
- case "calm":
44
- return { style: 0.15, stability: 0.7, speed: 0.95 };
45
- case "warm":
46
- return { style: 0.25, stability: 0.6, speed: 1.0 };
47
- case "casual":
48
- return { style: 0.3, stability: 0.5, speed: 1.0 };
49
- case "excited":
50
- return { style: 0.55, stability: 0.35, speed: 1.05 };
51
- case "energetic":
52
- return { style: 0.6, stability: 0.35, speed: 1.08 };
53
- case "happy":
54
- return { style: 0.5, stability: 0.4, speed: 1.05 };
55
- case "serious":
56
- return { style: 0.2, stability: 0.75, speed: 0.95 };
57
- case "dramatic":
58
- return { style: 0.65, stability: 0.4, speed: 0.95 };
59
- case "whisper":
60
- return { style: 0.2, stability: 0.85, speed: 0.9 };
61
- case "neutral":
62
- case undefined:
63
- default:
64
- return {};
65
- }
66
- }
67
- /**
68
- * Inspect a failed scene to decide the next orchestration step.
69
- *
70
- * - "replan" : agent should re-plan the scene (safety / bad input)
71
- * - "degrade" : try a cheaper strategy (e.g. T2/T3 chain exhausted on quota)
72
- * - "fail" : terminal — surface to user (auth / chain-exhausted-unknown)
73
- * - "retry" : transient — caller may re-run the scene as-is later
74
- */
75
- function classifySceneFailure(outcome) {
76
- if (outcome.ok)
77
- return null;
78
- const c = outcome.error?.classification;
79
- if (!c)
80
- return "fail";
81
- if (outcome.error?.needsReplan)
82
- return "replan"; // safety / input
83
- if (c === "auth")
84
- return "fail";
85
- if (c === "quota")
86
- return "degrade";
87
- if (c === "rate_limit" || c === "transient")
88
- return "retry";
89
- if (c === "capability")
90
- return "degrade";
91
- return "fail";
92
- }
93
- /**
94
- * OD-T7 / AG-42 — give the planner's `preferredModel` a chance to actually run
95
- * when the scene duration is just slightly off from what the model accepts.
96
- *
97
- * Decision tree:
98
- * 1. Model has no duration constraint → use planned duration
99
- * 2. Planned duration already legal → use planned duration
100
- * 3. Nearest legal is within snap tolerance → snap, log info
101
- * 4. Outside tolerance + explicit preferredModel → log warn ("dropping
102
- * planner pick — too far from any allowed value"); use planned duration
103
- * anyway so the capability filter drops it and the chain fallback runs
104
- * (the withFallback warn AG-42 also logs there now)
105
- *
106
- * We intentionally do NOT throw — throwing would crash a scene mid-run and
107
- * lose all the other planner output. The graceful fallback chain already
108
- * exists; this helper just keeps the planner's pick alive when the drift is
109
- * small, and makes the drop visible when it isn't.
110
- */
111
- function resolveDurationForPreferredModel(modelKey, plannedDurationSec, explicit, sceneIndex) {
112
- const snap = (0, chains_1.snapDurationForModel)(modelKey, plannedDurationSec);
113
- if (!snap) {
114
- if (explicit) {
115
- logger_1.logger.warn("executor: preferredModel duration outside snap tolerance — chain fallback will pick", {
116
- sceneIndex,
117
- modelKey,
118
- plannedDurationSec,
119
- });
120
- }
121
- return plannedDurationSec;
122
- }
123
- if (snap.driftSec > 0.01) {
124
- logger_1.logger.info("executor: snapped duration to fit preferredModel", {
125
- sceneIndex,
126
- modelKey,
127
- from: snap.requestedSec,
128
- to: snap.snappedSec,
129
- driftSec: Number(snap.driftSec.toFixed(2)),
130
- });
131
- }
132
- return snap.snappedSec;
133
- }
134
- class Executor {
135
- constructor(opts = {}) {
136
- this.concurrency = opts.concurrency ?? 4;
137
- this.runTool = opts.runTool ?? toolRegistry_1.runTool;
138
- this.onSceneComplete = opts.onSceneComplete;
139
- this.taskCache = opts.taskCache;
140
- this.onTaskMint = opts.onTaskMint;
141
- }
142
- /**
143
- * Stage 7 slice 1 helper. Returns cached pending-task output if the
144
- * upstream input hash matches and entry is within TTL; null otherwise.
145
- * Logs the hit so trace shows where $$ was saved.
146
- */
147
- checkTaskCache(clipId, tool, input) {
148
- const inputsHash = (0, providerTaskCache_1.hashTaskInputs)(input);
149
- const hit = (0, providerTaskCache_1.getCachedTask)(this.taskCache, clipId, inputsHash);
150
- if (hit) {
151
- logger_1.logger.info("executor: task-cache hit", {
152
- clipId,
153
- tool,
154
- modelKey: hit.modelKey,
155
- taskId: hit.taskId,
156
- ageMs: Date.now() - hit.createdAt,
157
- });
158
- }
159
- return { hit, inputsHash };
160
- }
161
- /**
162
- * Best-effort mint. Swallows callback errors — a Firestore blip must not
163
- * fail the scene (the tool already succeeded and produced a real taskId).
164
- */
165
- async mintTaskCache(clipId, entry) {
166
- if (!this.onTaskMint)
167
- return;
168
- try {
169
- await this.onTaskMint(clipId, entry);
170
- }
171
- catch (err) {
172
- logger_1.logger.warn("executor: onTaskMint callback threw — swallowed", {
173
- clipId,
174
- err: err.message,
175
- });
176
- }
177
- }
178
- async run(plan, ctx) {
179
- const queue = [...plan.scenes];
180
- const results = new Array(plan.scenes.length);
181
- const workers = [];
182
- let cursor = 0;
183
- const worker = async () => {
184
- while (true) {
185
- const i = cursor++;
186
- if (i >= queue.length)
187
- return;
188
- const scene = queue[i];
189
- const outcome = await this.runScene(scene, ctx, plan.aspect, plan);
190
- results[i] = outcome;
191
- if (this.onSceneComplete) {
192
- try {
193
- await this.onSceneComplete(scene.sceneIndex, outcome);
194
- }
195
- catch (err) {
196
- logger_1.logger.warn("executor: onSceneComplete callback threw — swallowed", {
197
- sceneIndex: scene.sceneIndex,
198
- err: err.message,
199
- });
200
- }
201
- }
202
- }
203
- };
204
- for (let i = 0; i < Math.min(this.concurrency, queue.length); i++) {
205
- workers.push(worker());
206
- }
207
- await Promise.all(workers);
208
- return results;
209
- }
210
- async runScene(scene, ctx, aspect, plan) {
211
- const start = Date.now();
212
- const idempotencyKey = `${ctx.agentRunId}:scene-${scene.sceneIndex}`;
213
- const sceneCtx = { ...ctx, idempotencyKey };
214
- const visualOutcome = await this.runVisual(scene, sceneCtx, aspect, plan);
215
- if (!visualOutcome.ok) {
216
- return {
217
- scene,
218
- ok: false,
219
- error: visualOutcome.error,
220
- durationMs: Date.now() - start,
221
- };
222
- }
223
- let voiceoverAudioBase64;
224
- let voiceoverEstimateMs;
225
- let voiceoverOverBudget = false;
226
- // talking-head-avatar bakes the VO into the rendered MP4; running TTS again
227
- // here would just waste credits and create a duplicate audio track.
228
- if (scene.voiceoverLine && scene.strategy !== "talking-head-avatar") {
229
- // AG-31: brief.voice.style → ElevenLabs voice_settings.
230
- // AG-28: if estimated VO > scene.durationMs, speed up TTS within natural
231
- // limits (≤1.15× extra over style.speed) before falling back to a
232
- // freeze-frame-pad signal for the stitcher.
233
- // AG-44: per-scene voiceStyle wins over plan-level when present, so the
234
- // emotional arc can shift across the timeline (frustrated → curious → excited).
235
- const effectiveVoiceStyle = (scene.voiceStyle ?? plan.voiceStyle);
236
- const baseSettings = resolveVoiceSettings(effectiveVoiceStyle);
237
- const baseSpeed = baseSettings.speed ?? 1.0;
238
- const rawEstMs = (0, ttsDuration_1.estimateTtsMs)(scene.voiceoverLine);
239
- const budgetMs = scene.durationMs * 1.05;
240
- // Adjusted speed: only kick in if natural speech would overrun. Cap the
241
- // speedup multiplier at 1.15× style-base; faster than that sounds rushed.
242
- let speed = baseSpeed;
243
- if (rawEstMs > budgetMs) {
244
- const needed = rawEstMs / budgetMs;
245
- const cappedMultiplier = Math.min(1.15, needed);
246
- speed = +(baseSpeed * cappedMultiplier).toFixed(2);
247
- }
248
- const adjustedEstMs = rawEstMs * (baseSpeed / speed);
249
- voiceoverEstimateMs = Math.round(adjustedEstMs);
250
- voiceoverOverBudget = adjustedEstMs > budgetMs;
251
- const voInput = {
252
- text: scene.voiceoverLine,
253
- ...baseSettings,
254
- // Override speed when AG-28 budget forced an adjustment.
255
- speed: +Math.max(0.7, Math.min(1.2, speed)).toFixed(2),
256
- };
257
- const sanitized = sanitizeVoiceId(plan.voiceId);
258
- if (sanitized)
259
- voInput.voiceId = sanitized;
260
- const voOutcome = await this.runTool("generate_voiceover", voInput, { ...sceneCtx, idempotencyKey: `${idempotencyKey}:vo` });
261
- if (voOutcome.ok) {
262
- voiceoverAudioBase64 = voOutcome.output.audioBase64;
263
- }
264
- else {
265
- // AG-37: don't swallow silently — final video shipping muted with no
266
- // log was the original bug. Surface the error in the result so the
267
- // trace shows WHY VO is missing.
268
- logger_1.logger.warn("executor: voiceover tool failed", {
269
- sceneIndex: scene.sceneIndex,
270
- error: voOutcome.error,
271
- voiceIdRequested: plan.voiceId,
272
- voiceIdSent: sanitized,
273
- });
274
- }
275
- // Don't fail the whole scene on VO miss — host can re-run just the VO.
276
- }
277
- return {
278
- scene,
279
- ok: true,
280
- result: {
281
- ...visualOutcome.output,
282
- voiceoverAudioBase64,
283
- voiceoverEstimateMs,
284
- voiceoverOverBudget,
285
- idempotencyKey,
286
- },
287
- durationMs: Date.now() - start,
288
- };
289
- }
290
- async runVisual(scene, ctx, aspect, plan) {
291
- const durationSec = Math.max(2, Math.round(scene.durationMs / 1000));
292
- // AG-13: collect reference images for this scene's bible entities. Empty
293
- // array when bible is C0/C1 or no entities are tagged — image-gen falls
294
- // back to pure text prompt.
295
- const referenceImageUrls = (0, referenceImageRenderer_1.collectReferenceImageUrls)(plan.bible, scene.bibleEntityIds ?? []);
296
- const wrap = (tool, outcome) => outcome.ok
297
- ? { ok: true, output: { tool, ...outcome.output } }
298
- : { ok: false, error: outcome.error };
299
- switch (scene.strategy) {
300
- case "stock-video":
301
- case "stock-image-ken-burns": {
302
- const out = await this.runTool("search_stock", {
303
- query: scene.prompt,
304
- kind: scene.strategy === "stock-video" ? "video" : "image",
305
- aspectRatio: aspect,
306
- minDurationSec: scene.strategy === "stock-video" ? durationSec : undefined,
307
- }, ctx);
308
- if (!out.ok)
309
- return wrap("search_stock", out);
310
- const url = out.output.assets[0]?.url;
311
- if (!url)
312
- return {
313
- ok: false,
314
- error: {
315
- code: "NO_STOCK_RESULTS",
316
- message: `No stock results for: ${scene.prompt}`,
317
- attemptedProviders: out.output.attemptedProviders,
318
- },
319
- };
320
- return {
321
- ok: true,
322
- output: {
323
- tool: "search_stock",
324
- visualUrl: url,
325
- attemptedProviders: out.output.attemptedProviders,
326
- },
327
- };
328
- }
329
- case "ai-image-static":
330
- case "ai-image-motion": {
331
- const out = await this.runTool("generate_image", {
332
- prompt: scene.prompt,
333
- modelKey: scene.preferredModel ?? "google-nano-banana",
334
- aspectRatio: aspect,
335
- ...(referenceImageUrls.length
336
- ? { inputImageUrls: referenceImageUrls }
337
- : {}),
338
- }, ctx);
339
- if (!out.ok)
340
- return wrap("generate_image", out);
341
- return { ok: true, output: { tool: "generate_image", visualUrl: out.output.imageUrl } };
342
- }
343
- case "ai-image-to-video": {
344
- const animateModelKey = scene.preferredModel ?? "kling-v2.6";
345
- const animateDurationSec = resolveDurationForPreferredModel(animateModelKey, durationSec, !!scene.preferredModel, scene.sceneIndex);
346
- // Stage 7 slice 1: cache key spans the WHOLE strategy (both image-gen
347
- // + animate). Hashing only upstream-stable inputs — prompt, model,
348
- // duration, aspect, reference images — lets a re-execute skip BOTH
349
- // tool calls, replaying the cached animate taskId through the poller.
350
- const clipId = `scene-${scene.sceneIndex}-visual`;
351
- const cacheInput = {
352
- strategy: "ai-image-to-video",
353
- prompt: scene.prompt,
354
- modelKey: animateModelKey,
355
- durationSec: animateDurationSec,
356
- aspect,
357
- referenceImageUrls,
358
- };
359
- const { hit, inputsHash } = this.checkTaskCache(clipId, "animate_image", cacheInput);
360
- if (hit) {
361
- return {
362
- ok: true,
363
- output: {
364
- tool: "animate_image",
365
- pendingTaskId: hit.taskId,
366
- pendingModelKey: hit.modelKey,
367
- },
368
- };
369
- }
370
- const img = await this.runTool("generate_image", {
371
- prompt: scene.prompt,
372
- modelKey: "google-nano-banana",
373
- aspectRatio: aspect,
374
- ...(referenceImageUrls.length
375
- ? { inputImageUrls: referenceImageUrls }
376
- : {}),
377
- }, { ...ctx, idempotencyKey: `${ctx.idempotencyKey}:img` });
378
- if (!img.ok)
379
- return wrap("generate_image", img);
380
- const animate = await this.runTool("animate_image", {
381
- imageUrl: img.output.imageUrl,
382
- motionHint: scene.prompt,
383
- durationSec: animateDurationSec,
384
- modelKey: animateModelKey,
385
- }, ctx);
386
- if (!animate.ok)
387
- return wrap("animate_image", animate);
388
- const mintedModelKey = animate.output.modelKeyUsed ?? animateModelKey;
389
- await this.mintTaskCache(clipId, {
390
- tool: "animate_image",
391
- taskId: animate.output.taskId,
392
- modelKey: mintedModelKey,
393
- inputsHash,
394
- createdAt: Date.now(),
395
- });
396
- return {
397
- ok: true,
398
- output: {
399
- tool: "animate_image",
400
- pendingTaskId: animate.output.taskId,
401
- pendingModelKey: mintedModelKey,
402
- attemptedProviders: animate.output.attemptedProviders,
403
- },
404
- };
405
- }
406
- case "ai-text-to-video": {
407
- const t2vModelKey = scene.preferredModel ?? "kling-v2.6";
408
- const t2vDurationSec = resolveDurationForPreferredModel(t2vModelKey, durationSec, !!scene.preferredModel, scene.sceneIndex);
409
- const clipId = `scene-${scene.sceneIndex}-visual`;
410
- const cacheInput = {
411
- strategy: "ai-text-to-video",
412
- prompt: scene.prompt,
413
- modelKey: t2vModelKey,
414
- durationSec: t2vDurationSec,
415
- };
416
- const { hit, inputsHash } = this.checkTaskCache(clipId, "generate_video", cacheInput);
417
- if (hit) {
418
- return {
419
- ok: true,
420
- output: {
421
- tool: "generate_video",
422
- pendingTaskId: hit.taskId,
423
- pendingModelKey: hit.modelKey,
424
- },
425
- };
426
- }
427
- const out = await this.runTool("generate_video", {
428
- prompt: scene.prompt,
429
- modelKey: t2vModelKey,
430
- durationSec: t2vDurationSec,
431
- }, ctx);
432
- if (!out.ok)
433
- return wrap("generate_video", out);
434
- const mintedModelKey = out.output.modelKeyUsed ?? t2vModelKey;
435
- await this.mintTaskCache(clipId, {
436
- tool: "generate_video",
437
- taskId: out.output.taskId,
438
- modelKey: mintedModelKey,
439
- inputsHash,
440
- createdAt: Date.now(),
441
- });
442
- return {
443
- ok: true,
444
- output: {
445
- tool: "generate_video",
446
- pendingTaskId: out.output.taskId,
447
- pendingModelKey: mintedModelKey,
448
- attemptedProviders: out.output.attemptedProviders,
449
- },
450
- };
451
- }
452
- case "user-asset": {
453
- // Per-user library search. The host injects the searcher at boot
454
- // (see userLibrarySearcher bootstrap); if no match, fall back to
455
- // stock so the scene still renders rather than failing hard.
456
- const out = await this.runTool("search_user_library", { query: scene.prompt, limit: 5 }, ctx);
457
- if (out.ok) {
458
- const first = out.output.assets[0];
459
- if (first) {
460
- return { ok: true, output: { tool: "search_user_library", visualUrl: first.url } };
461
- }
462
- }
463
- // No hits or searcher missing — fall back to stock-video as the
464
- // closest cheap substitute so the executor still produces an asset.
465
- const stock = await this.runTool("search_stock", {
466
- query: scene.prompt,
467
- kind: "video",
468
- aspectRatio: aspect,
469
- minDurationSec: durationSec,
470
- }, ctx);
471
- if (!stock.ok)
472
- return wrap("search_stock", stock);
473
- const fallbackUrl = stock.output.assets[0]?.url;
474
- if (!fallbackUrl) {
475
- return {
476
- ok: false,
477
- error: {
478
- code: "USER_ASSET_NO_MATCH",
479
- message: `No user-library match for "${scene.prompt}" and no stock fallback.`,
480
- attemptedProviders: stock.output.attemptedProviders,
481
- },
482
- };
483
- }
484
- return {
485
- ok: true,
486
- output: {
487
- tool: "search_user_library",
488
- visualUrl: fallbackUrl,
489
- attemptedProviders: stock.output.attemptedProviders,
490
- },
491
- };
492
- }
493
- case "talking-head-avatar": {
494
- if (!scene.avatarFaceUrl) {
495
- return {
496
- ok: false,
497
- error: {
498
- code: "AVATAR_FACE_REQUIRED",
499
- message: "talking-head-avatar requires scene.avatarFaceUrl (set on the user's brand kit).",
500
- },
501
- };
502
- }
503
- if (!scene.voiceoverLine) {
504
- return {
505
- ok: false,
506
- error: {
507
- code: "AVATAR_VO_REQUIRED",
508
- message: "talking-head-avatar requires scene.voiceoverLine for the lipsync audio.",
509
- },
510
- };
511
- }
512
- const avatarMode = scene.tier === "T3" ? "pro" : "std";
513
- const clipId = `scene-${scene.sceneIndex}-visual`;
514
- const cacheInput = {
515
- strategy: "talking-head-avatar",
516
- inputImageUrl: scene.avatarFaceUrl,
517
- ttsText: scene.voiceoverLine,
518
- mode: avatarMode,
519
- };
520
- const { hit, inputsHash } = this.checkTaskCache(clipId, "generate_avatar_video", cacheInput);
521
- if (hit) {
522
- return {
523
- ok: true,
524
- output: {
525
- tool: "generate_avatar_video",
526
- pendingTaskId: hit.taskId,
527
- pendingModelKey: hit.modelKey,
528
- },
529
- };
530
- }
531
- const out = await this.runTool("generate_avatar_video", {
532
- inputImageUrl: scene.avatarFaceUrl,
533
- ttsText: scene.voiceoverLine,
534
- mode: avatarMode,
535
- }, ctx);
536
- if (!out.ok)
537
- return wrap("generate_avatar_video", out);
538
- await this.mintTaskCache(clipId, {
539
- tool: "generate_avatar_video",
540
- taskId: out.output.taskId,
541
- // Avatar tool hardcodes Kling Avatar internally; poller needs the
542
- // actual model key to route via getAiGenProviderService (no special
543
- // resolver exists). AG-20: was "generate_avatar_video" which would
544
- // throw at factory lookup the moment the avatar path was polled.
545
- modelKey: "kling-avatar",
546
- inputsHash,
547
- createdAt: Date.now(),
548
- });
549
- return {
550
- ok: true,
551
- output: {
552
- tool: "generate_avatar_video",
553
- pendingTaskId: out.output.taskId,
554
- pendingModelKey: "kling-avatar",
555
- },
556
- };
557
- }
558
- }
559
- }
560
- }
561
- exports.Executor = Executor;
@@ -1,61 +0,0 @@
1
- import { z } from "zod";
2
- import { ChatRequest, ChatResponse, LlmCaller, StructuredRequest, StructuredResponse } from "./llmCaller";
3
- /**
4
- * Vercel AI Gateway LlmCaller — OpenAI-compatible /v1/chat/completions surface.
5
- * Single endpoint, multi-provider routing via "{provider}/{modelId}" model id.
6
- *
7
- * No SDK dependency: pure fetch keeps the shared package light. Structured
8
- * output uses JSON-schema response_format (supported by gateway across
9
- * Anthropic / OpenAI / Google providers — gateway normalises it).
10
- *
11
- * Reliability features (added Phase 1 of AGENT_TEST_PLAN):
12
- * - F1 transport-level retry: each HTTP call retries 3× with exp backoff
13
- * (250 / 1000 / 4000ms) on transient errors (network drops, 408/429/5xx).
14
- * - F2 model fallback: when a request supplies `fallbackModel` and the
15
- * primary exhausts retries on a transient error, we attempt the fallback
16
- * once (also with its own retry budget). Schema-validation failures and
17
- * other deterministic errors do NOT trigger fallback.
18
- */
19
- export interface GatewayConfig {
20
- /** Full base url, e.g. "https://ai-gateway.vercel.sh/v1". */
21
- baseUrl: string;
22
- /** Bearer token. */
23
- apiKey: string;
24
- /** Default request timeout in ms. */
25
- timeoutMs?: number;
26
- /** Optional fetch impl override (tests). */
27
- fetchImpl?: typeof fetch;
28
- /** Max retry attempts per model on transient errors. Default 3. */
29
- maxRetries?: number;
30
- /** Base backoff in ms (exponential: base, base*4, base*16). Default 250. */
31
- retryBaseMs?: number;
32
- }
33
- export declare class GatewayLlmCaller implements LlmCaller {
34
- private readonly cfg;
35
- private readonly fetchImpl;
36
- private readonly timeoutMs;
37
- private readonly maxRetries;
38
- private readonly retryBaseMs;
39
- constructor(cfg: GatewayConfig);
40
- chat(req: ChatRequest): Promise<ChatResponse>;
41
- structured<T extends z.ZodTypeAny>(req: StructuredRequest<T>): Promise<StructuredResponse<T>>;
42
- structuredStream<T extends z.ZodTypeAny>(req: StructuredRequest<T>): {
43
- tokens: AsyncIterable<string>;
44
- result: Promise<StructuredResponse<T>>;
45
- };
46
- private createStreamingIterator;
47
- private modelId;
48
- private encodeMessage;
49
- private toDataUri;
50
- private usage;
51
- /**
52
- * POST /chat/completions with transient-error retry (F1).
53
- * Throws TransientLlmError after exhausting retries on transient failures —
54
- * caller catches it to trigger model fallback (F2). Non-transient errors
55
- * (4xx other than 408/425/429, malformed responses) propagate as plain Error.
56
- */
57
- private postWithRetry;
58
- private withRetry;
59
- private postOnce;
60
- }
61
- //# sourceMappingURL=llmCallerGateway.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"llmCallerGateway.d.ts","sourceRoot":"","sources":["../../../src/services/agent/llmCallerGateway.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EACL,WAAW,EACX,YAAY,EACZ,SAAS,EACT,iBAAiB,EACjB,kBAAkB,EACnB,MAAM,aAAa,CAAC;AAGrB;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,WAAW,aAAa;IAC5B,6DAA6D;IAC7D,OAAO,EAAE,MAAM,CAAC;IAChB,oBAAoB;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB,mEAAmE;IACnE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4EAA4E;IAC5E,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AA2CD,qBAAa,gBAAiB,YAAW,SAAS;IAMpC,OAAO,CAAC,QAAQ,CAAC,GAAG;IALhC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IACzC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;gBAER,GAAG,EAAE,aAAa;IAOzC,IAAI,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;IAkC7C,UAAU,CAAC,CAAC,SAAS,CAAC,CAAC,UAAU,EACrC,GAAG,EAAE,iBAAiB,CAAC,CAAC,CAAC,GACxB,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC;IAgEjC,gBAAgB,CAAC,CAAC,SAAS,CAAC,CAAC,UAAU,EACrC,GAAG,EAAE,iBAAiB,CAAC,CAAC,CAAC,GACxB;QACD,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;QAC9B,MAAM,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;KACxC;YAqBc,uBAAuB;IA4ItC,OAAO,CAAC,OAAO;YAID,aAAa;YAqBb,SAAS;IAcvB,OAAO,CAAC,KAAK;IAQb;;;;;OAKG;YACW,aAAa;YAIb,SAAS;YA+BT,QAAQ;CA6BvB"}
@@ -1,368 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.GatewayLlmCaller = void 0;
4
- const zod_1 = require("zod");
5
- /**
6
- * Marks a transient error that should trigger HTTP retry (and, when exhausted,
7
- * model fallback). Non-transient errors (4xx other than 408/429, schema
8
- * validation, malformed JSON) are NOT wrapped and propagate immediately.
9
- */
10
- class TransientLlmError extends Error {
11
- constructor(message, cause) {
12
- super(message);
13
- this.cause = cause;
14
- this.name = "TransientLlmError";
15
- }
16
- }
17
- const TRANSIENT_HTTP_STATUSES = new Set([408, 425, 429, 500, 502, 503, 504]);
18
- function isTransientNetworkError(err) {
19
- if (!err || typeof err !== "object")
20
- return false;
21
- const e = err;
22
- const code = e.code ?? e.cause?.code;
23
- if (code && /^(ECONNRESET|ECONNREFUSED|ETIMEDOUT|EAI_AGAIN|EPIPE|UND_ERR_.*)$/.test(code)) {
24
- return true;
25
- }
26
- // Our own per-request timeout (ctrl.abort() in postOnce / structuredStream)
27
- // surfaces as AbortError "This operation was aborted". Treat as transient so
28
- // the retry loop (and then the fallback model) gets a chance — otherwise a
29
- // single slow Gemini Flash vision call sinks the entire critic stage with
30
- // no recovery.
31
- if (e.name === "AbortError")
32
- return true;
33
- const msg = String(e.message ?? "");
34
- return /fetch failed|socket hang up|other side closed|network|timeout|operation was aborted/i.test(msg);
35
- }
36
- class GatewayLlmCaller {
37
- constructor(cfg) {
38
- this.cfg = cfg;
39
- this.fetchImpl = cfg.fetchImpl ?? fetch;
40
- this.timeoutMs = cfg.timeoutMs ?? 60000;
41
- this.maxRetries = cfg.maxRetries ?? 3;
42
- this.retryBaseMs = cfg.retryBaseMs ?? 250;
43
- }
44
- async chat(req) {
45
- const attempted = [];
46
- const run = async (model) => {
47
- attempted.push(model);
48
- const messages = await Promise.all(req.messages.map((m) => this.encodeMessage(m)));
49
- const raw = await this.postWithRetry({
50
- model: this.modelId(model),
51
- messages,
52
- temperature: req.temperature,
53
- max_tokens: req.maxTokens,
54
- stop: req.stop,
55
- });
56
- const text = raw.choices?.[0]?.message?.content ?? "";
57
- return {
58
- text: typeof text === "string" ? text : JSON.stringify(text),
59
- usage: this.usage(raw),
60
- requestId: raw.id,
61
- };
62
- };
63
- try {
64
- const out = await run(req.model);
65
- return { ...out, attemptedModels: attempted, usedFallback: false };
66
- }
67
- catch (err) {
68
- if (req.fallbackModel && err instanceof TransientLlmError) {
69
- const out = await run(req.fallbackModel);
70
- return { ...out, attemptedModels: attempted, usedFallback: true };
71
- }
72
- throw err;
73
- }
74
- }
75
- async structured(req) {
76
- const attempted = [];
77
- const jsonSchema = zod_1.z.toJSONSchema(req.schema, { target: "draft-7" });
78
- const run = async (model) => {
79
- attempted.push(model);
80
- const messages = await Promise.all(req.messages.map((m) => this.encodeMessage(m)));
81
- const raw = await this.postWithRetry({
82
- model: this.modelId(model),
83
- messages,
84
- temperature: req.temperature,
85
- max_tokens: req.maxTokens,
86
- response_format: {
87
- type: "json_schema",
88
- json_schema: {
89
- name: req.schemaName,
90
- schema: jsonSchema,
91
- strict: true,
92
- },
93
- },
94
- });
95
- const txt = raw.choices?.[0]?.message?.content ?? "";
96
- if (!txt || typeof txt !== "string") {
97
- // Empty body from the model is not retryable across providers —
98
- // surface immediately; planner-level retry can re-prompt.
99
- throw new Error(`GatewayLlmCaller.structured: empty response for schema ${req.schemaName}`);
100
- }
101
- let parsed;
102
- try {
103
- parsed = JSON.parse(txt);
104
- }
105
- catch {
106
- throw new Error(`GatewayLlmCaller.structured: model returned non-JSON for schema ${req.schemaName}: ${txt.slice(0, 200)}`);
107
- }
108
- const result = req.schema.safeParse(parsed);
109
- if (!result.success) {
110
- throw new Error(`GatewayLlmCaller.structured: schema validation failed for ${req.schemaName}: ${result.error.message}`);
111
- }
112
- return {
113
- data: result.data,
114
- usage: this.usage(raw),
115
- requestId: raw.id,
116
- };
117
- };
118
- try {
119
- const out = await run(req.model);
120
- return { ...out, attemptedModels: attempted, usedFallback: false };
121
- }
122
- catch (err) {
123
- if (req.fallbackModel && err instanceof TransientLlmError) {
124
- const out = await run(req.fallbackModel);
125
- return { ...out, attemptedModels: attempted, usedFallback: true };
126
- }
127
- throw err;
128
- }
129
- }
130
- structuredStream(req) {
131
- // Streaming: fall back is NOT applied mid-stream (would confuse UI
132
- // chunks). Transport retry IS applied on the initial connect; once
133
- // bytes start flowing, a mid-stream drop propagates as an error and
134
- // the caller (planner) handles it via its own retry loop.
135
- let resolveResult;
136
- let rejectResult;
137
- const result = new Promise((resolve, reject) => {
138
- resolveResult = resolve;
139
- rejectResult = reject;
140
- });
141
- const self = this;
142
- const tokens = {
143
- [Symbol.asyncIterator]() {
144
- return self.createStreamingIterator(req, resolveResult, rejectResult);
145
- },
146
- };
147
- return { tokens, result };
148
- }
149
- async *createStreamingIterator(req, resolveResult, rejectResult) {
150
- const jsonSchema = zod_1.z.toJSONSchema(req.schema, { target: "draft-7" });
151
- const messages = await Promise.all(req.messages.map((m) => this.encodeMessage(m)));
152
- const ctrl = new AbortController();
153
- const t = setTimeout(() => ctrl.abort(), this.timeoutMs);
154
- let accumulated = "";
155
- let requestId;
156
- let usage;
157
- try {
158
- // Connect attempt with retry on transient connect failures (pre-stream).
159
- const res = await this.withRetry(() => this.fetchImpl(`${this.cfg.baseUrl.replace(/\/$/, "")}/chat/completions`, {
160
- method: "POST",
161
- headers: {
162
- "content-type": "application/json",
163
- authorization: `Bearer ${this.cfg.apiKey}`,
164
- },
165
- body: JSON.stringify({
166
- model: this.modelId(req.model),
167
- messages,
168
- temperature: req.temperature,
169
- max_tokens: req.maxTokens,
170
- stream: true,
171
- response_format: {
172
- type: "json_schema",
173
- json_schema: {
174
- name: req.schemaName,
175
- schema: jsonSchema,
176
- strict: true,
177
- },
178
- },
179
- }),
180
- signal: ctrl.signal,
181
- }).then((r) => {
182
- if (!r.ok && TRANSIENT_HTTP_STATUSES.has(r.status)) {
183
- throw new TransientLlmError(`GatewayLlmCaller.structuredStream: ${r.status} ${r.statusText}`);
184
- }
185
- return r;
186
- }));
187
- if (!res.ok || !res.body) {
188
- const text = await res.text().catch(() => "");
189
- const err = new Error(`GatewayLlmCaller.structuredStream: ${res.status} ${res.statusText} — ${text.slice(0, 500)}`);
190
- rejectResult(err);
191
- throw err;
192
- }
193
- const reader = res.body.getReader();
194
- const decoder = new TextDecoder();
195
- let buf = "";
196
- while (true) {
197
- const { value, done } = await reader.read();
198
- if (done)
199
- break;
200
- buf += decoder.decode(value, { stream: true });
201
- let nl;
202
- while ((nl = buf.indexOf("\n")) !== -1) {
203
- const line = buf.slice(0, nl).trim();
204
- buf = buf.slice(nl + 1);
205
- if (!line || !line.startsWith("data:"))
206
- continue;
207
- const payload = line.slice(5).trim();
208
- if (payload === "[DONE]")
209
- break;
210
- try {
211
- const obj = JSON.parse(payload);
212
- if (obj.id)
213
- requestId = obj.id;
214
- if (obj.usage) {
215
- usage = {
216
- promptTokens: obj.usage.prompt_tokens,
217
- completionTokens: obj.usage.completion_tokens,
218
- };
219
- }
220
- const delta = obj.choices?.[0]?.delta?.content ?? "";
221
- if (delta) {
222
- accumulated += delta;
223
- yield delta;
224
- }
225
- }
226
- catch {
227
- // skip malformed line
228
- }
229
- }
230
- }
231
- // Post-stream failures (bad JSON / schema mismatch) reject the RESULT
232
- // promise but must NOT throw out of this generator. The tokens already
233
- // streamed fine; throwing here would propagate into the caller's
234
- // `for await (...stream.tokens)` loop and abort it before the caller can
235
- // reach `await stream.result` — which is exactly where the planner's
236
- // non-streaming retry fallback lives (Planner.planStream). Returning
237
- // cleanly lets the token loop complete, then the rejected result drives
238
- // the retry. See plan.controller.ts:planProjectStream.
239
- let parsed;
240
- try {
241
- parsed = JSON.parse(accumulated);
242
- }
243
- catch {
244
- const err = new Error(`GatewayLlmCaller.structuredStream: model returned non-JSON for ${req.schemaName}: ${accumulated.slice(0, 200)}`);
245
- rejectResult(err);
246
- return;
247
- }
248
- const validated = req.schema.safeParse(parsed);
249
- if (!validated.success) {
250
- const err = new Error(`GatewayLlmCaller.structuredStream: schema validation failed for ${req.schemaName}: ${validated.error.message}`);
251
- rejectResult(err);
252
- return;
253
- }
254
- resolveResult({
255
- data: validated.data,
256
- usage,
257
- requestId,
258
- attemptedModels: [req.model],
259
- usedFallback: false,
260
- });
261
- }
262
- catch (e) {
263
- rejectResult(e);
264
- throw e;
265
- }
266
- finally {
267
- clearTimeout(t);
268
- }
269
- }
270
- modelId(m) {
271
- return `${m.provider}/${m.modelId}`;
272
- }
273
- async encodeMessage(m) {
274
- if (!m.imageUrls?.length) {
275
- return { role: m.role, content: m.content };
276
- }
277
- // Fetch + base64-encode each URL into a data URI. The gateway's Anthropic
278
- // translator rejects raw https URLs ("URL sources are not supported"); data
279
- // URIs round-trip through every provider's OpenAI-compat shim correctly.
280
- const dataUris = await Promise.all(m.imageUrls.map((u) => this.toDataUri(u)));
281
- return {
282
- role: m.role,
283
- content: [
284
- { type: "text", text: m.content },
285
- ...dataUris.map((url) => ({ type: "image_url", image_url: { url } })),
286
- ],
287
- };
288
- }
289
- async toDataUri(url) {
290
- if (url.startsWith("data:"))
291
- return url;
292
- const res = await this.fetchImpl(url);
293
- if (!res.ok) {
294
- throw new Error(`GatewayLlmCaller.toDataUri: ${res.status} ${res.statusText} fetching ${url}`);
295
- }
296
- const contentType = res.headers.get("content-type")?.split(";")[0]?.trim() || "image/jpeg";
297
- const buf = Buffer.from(await res.arrayBuffer());
298
- return `data:${contentType};base64,${buf.toString("base64")}`;
299
- }
300
- usage(raw) {
301
- if (!raw.usage)
302
- return undefined;
303
- return {
304
- promptTokens: raw.usage.prompt_tokens,
305
- completionTokens: raw.usage.completion_tokens,
306
- };
307
- }
308
- /**
309
- * POST /chat/completions with transient-error retry (F1).
310
- * Throws TransientLlmError after exhausting retries on transient failures —
311
- * caller catches it to trigger model fallback (F2). Non-transient errors
312
- * (4xx other than 408/425/429, malformed responses) propagate as plain Error.
313
- */
314
- async postWithRetry(body) {
315
- return this.withRetry(() => this.postOnce(body));
316
- }
317
- async withRetry(fn) {
318
- let lastErr;
319
- for (let attempt = 0; attempt < this.maxRetries; attempt++) {
320
- try {
321
- return await fn();
322
- }
323
- catch (err) {
324
- lastErr = err;
325
- const transient = err instanceof TransientLlmError || isTransientNetworkError(err);
326
- if (!transient || attempt === this.maxRetries - 1) {
327
- if (transient) {
328
- // Wrap so the fallback-model path can catch it explicitly.
329
- throw err instanceof TransientLlmError
330
- ? err
331
- : new TransientLlmError(`transient LLM error after ${attempt + 1} attempts: ${String(err?.message ?? err)}`, err);
332
- }
333
- throw err;
334
- }
335
- await new Promise((r) => setTimeout(r, this.retryBaseMs * Math.pow(4, attempt)));
336
- }
337
- }
338
- throw lastErr;
339
- }
340
- async postOnce(body) {
341
- const ctrl = new AbortController();
342
- const t = setTimeout(() => ctrl.abort(), this.timeoutMs);
343
- try {
344
- const res = await this.fetchImpl(`${this.cfg.baseUrl.replace(/\/$/, "")}/chat/completions`, {
345
- method: "POST",
346
- headers: {
347
- "content-type": "application/json",
348
- authorization: `Bearer ${this.cfg.apiKey}`,
349
- },
350
- body: JSON.stringify(body),
351
- signal: ctrl.signal,
352
- });
353
- if (!res.ok) {
354
- const text = await res.text().catch(() => "");
355
- const msg = `GatewayLlmCaller: ${res.status} ${res.statusText} — ${text.slice(0, 500)}`;
356
- if (TRANSIENT_HTTP_STATUSES.has(res.status)) {
357
- throw new TransientLlmError(msg);
358
- }
359
- throw new Error(msg);
360
- }
361
- return (await res.json());
362
- }
363
- finally {
364
- clearTimeout(t);
365
- }
366
- }
367
- }
368
- exports.GatewayLlmCaller = GatewayLlmCaller;