vidspotai-shared 1.0.84 → 1.0.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/lib/services/agent/executor.d.ts +0 -141
- package/lib/services/agent/executor.d.ts.map +0 -1
- package/lib/services/agent/executor.js +0 -561
- package/lib/services/agent/llmCallerGateway.d.ts +0 -61
- package/lib/services/agent/llmCallerGateway.d.ts.map +0 -1
- package/lib/services/agent/llmCallerGateway.js +0 -368
package/package.json
CHANGED
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
import { PlannedScene, VideoPlan } from "../../schemas/videoPlan.schema";
|
|
2
|
-
import { ToolContext, ToolOutcome } from "./toolRegistry";
|
|
3
|
-
import { ProviderTaskCache, ProviderTaskCacheEntry } from "./providerTaskCache";
|
|
4
|
-
export type RunToolFn = <O = unknown>(name: string, input: unknown, ctx: ToolContext) => Promise<ToolOutcome<O>>;
|
|
5
|
-
/**
|
|
6
|
-
* Executor — turns an approved VideoPlan into per-scene asset results.
|
|
7
|
-
*
|
|
8
|
-
* Plan-then-execute split (not free-form ReAct): the executor does NOT pick
|
|
9
|
-
* tools. The plan dictates which tool runs per scene; the executor only does
|
|
10
|
-
* the dispatch + per-scene parallelism + idempotency-key generation.
|
|
11
|
-
*
|
|
12
|
-
* Returns a SceneOutcome per scene, leaving composition (compose_scene),
|
|
13
|
-
* critique (Critic.review), and render to the orchestrator above.
|
|
14
|
-
*/
|
|
15
|
-
export interface SceneAssetResult {
|
|
16
|
-
/** Concrete asset URL for the visual track (image or video). */
|
|
17
|
-
visualUrl?: string;
|
|
18
|
-
/** Voiceover audio (base64 or url depending on host policy). */
|
|
19
|
-
voiceoverAudioBase64?: string;
|
|
20
|
-
/**
|
|
21
|
-
* Resolved voiceover URL once the host has uploaded the base64 buffer to
|
|
22
|
-
* its asset store (GCS / Firebase Storage). The agent.controller flow
|
|
23
|
-
* sets this in `onSceneComplete` so `planToProject` can render a real VO
|
|
24
|
-
* clip on `track-vo`. When unset, planToProject skips the VO clip.
|
|
25
|
-
*/
|
|
26
|
-
voiceoverUrl?: string;
|
|
27
|
-
/** Provider task id for async generations the host still needs to poll. */
|
|
28
|
-
pendingTaskId?: string;
|
|
29
|
-
/** Model key the pending task was triggered on — the poller needs this to pick the provider. */
|
|
30
|
-
pendingModelKey?: string;
|
|
31
|
-
/** AG-22: provider chain the tool walked (in order, primary first). For trace + post-mortem. */
|
|
32
|
-
attemptedProviders?: string[];
|
|
33
|
-
/** Tool name used. */
|
|
34
|
-
tool: string;
|
|
35
|
-
/** Idempotency key the executor minted; useful for retries. */
|
|
36
|
-
idempotencyKey: string;
|
|
37
|
-
/**
|
|
38
|
-
* AG-28: estimated VO duration (ms) from the wpm heuristic. Set when
|
|
39
|
-
* voiceoverAudioBase64 is set. Used pre-upload for budget math; the
|
|
40
|
-
* editor-side display window prefers voiceoverActualMs when available.
|
|
41
|
-
*/
|
|
42
|
-
voiceoverEstimateMs?: number;
|
|
43
|
-
/**
|
|
44
|
-
* Probed actual mp3 duration (ms). Stamped by `resolveVoiceoverUrl` after
|
|
45
|
-
* generation, before the buffer is discarded. Reflects real playback
|
|
46
|
-
* length — required to size the editor VO clip's display window so
|
|
47
|
-
* Remotion's `endAt` doesn't truncate emotive/punctuated deliveries that
|
|
48
|
-
* the wpm estimate undershoots. Falls back to `voiceoverEstimateMs` when
|
|
49
|
-
* ffprobe is unavailable.
|
|
50
|
-
*/
|
|
51
|
-
voiceoverActualMs?: number;
|
|
52
|
-
/**
|
|
53
|
-
* AG-28: TTS over-budget signal. True when the line + style produces audio
|
|
54
|
-
* longer than scene.durationMs * 1.05 even after the executor's atempo
|
|
55
|
-
* adjustment. Stitcher should freeze-frame-pad rather than clip.
|
|
56
|
-
*/
|
|
57
|
-
voiceoverOverBudget?: boolean;
|
|
58
|
-
}
|
|
59
|
-
export interface SceneOutcome {
|
|
60
|
-
scene: PlannedScene;
|
|
61
|
-
ok: boolean;
|
|
62
|
-
result?: SceneAssetResult;
|
|
63
|
-
error?: {
|
|
64
|
-
code: string;
|
|
65
|
-
message: string;
|
|
66
|
-
/**
|
|
67
|
-
* Set when the failure came from a provider-fallback-wrapped tool whose
|
|
68
|
-
* entire chain exhausted (or terminated on auth/safety/input). Used by
|
|
69
|
-
* the orchestrator to decide between agent re-plan / scene degrade /
|
|
70
|
-
* hard fail.
|
|
71
|
-
*/
|
|
72
|
-
classification?: "transient" | "rate_limit" | "capability" | "safety" | "auth" | "quota" | "input" | "unknown";
|
|
73
|
-
needsReplan?: boolean;
|
|
74
|
-
attemptedProviders?: string[];
|
|
75
|
-
};
|
|
76
|
-
durationMs: number;
|
|
77
|
-
}
|
|
78
|
-
/**
|
|
79
|
-
* Inspect a failed scene to decide the next orchestration step.
|
|
80
|
-
*
|
|
81
|
-
* - "replan" : agent should re-plan the scene (safety / bad input)
|
|
82
|
-
* - "degrade" : try a cheaper strategy (e.g. T2/T3 chain exhausted on quota)
|
|
83
|
-
* - "fail" : terminal — surface to user (auth / chain-exhausted-unknown)
|
|
84
|
-
* - "retry" : transient — caller may re-run the scene as-is later
|
|
85
|
-
*/
|
|
86
|
-
export declare function classifySceneFailure(outcome: SceneOutcome): "replan" | "degrade" | "fail" | "retry" | null;
|
|
87
|
-
export interface ExecutorOptions {
|
|
88
|
-
/** Max scenes to run in parallel. Generation providers rate-limit themselves; this guards on top. */
|
|
89
|
-
concurrency?: number;
|
|
90
|
-
/** Inject a recording wrapper (e.g. runToolRecorded) for eval / replay. */
|
|
91
|
-
runTool?: RunToolFn;
|
|
92
|
-
/**
|
|
93
|
-
* Stage 3 hook: invoked after each scene finishes (success OR failure).
|
|
94
|
-
* Lets the orchestrator stream incremental Project updates into Firestore
|
|
95
|
-
* so the editor + agent drawer surfaces light up as scenes complete,
|
|
96
|
-
* rather than waiting for the whole executor to finish. Errors thrown
|
|
97
|
-
* from the callback are swallowed (logged) so a Firestore blip can't
|
|
98
|
-
* crash the executor.
|
|
99
|
-
*/
|
|
100
|
-
onSceneComplete?: (sceneIndex: number, outcome: SceneOutcome) => Promise<void>;
|
|
101
|
-
/**
|
|
102
|
-
* Stage 7 slice 1: provider task-id cache (read side). When the upstream
|
|
103
|
-
* inputs for animate_image / generate_video / generate_avatar_video hash
|
|
104
|
-
* to the same value as a cached entry (and entry is within TTL), the
|
|
105
|
-
* executor returns the cached taskId + modelKey and skips the tool call.
|
|
106
|
-
* /execute passes the project's cache here so re-runs after downstream
|
|
107
|
-
* (poll/stitch/critic) bugs reuse the prior $$ gens. /regenerate-scene
|
|
108
|
-
* deliberately omits this (user wants a fresh gen) but still mints.
|
|
109
|
-
*/
|
|
110
|
-
taskCache?: ProviderTaskCache;
|
|
111
|
-
/**
|
|
112
|
-
* Stage 7 slice 1: cache write side. Invoked after a successful tool call
|
|
113
|
-
* that produced a pendingTaskId. Orchestrator persists the entry on the
|
|
114
|
-
* AgentProject doc so the NEXT execute can replay. Errors swallowed —
|
|
115
|
-
* cache is best-effort optimisation, not correctness.
|
|
116
|
-
*/
|
|
117
|
-
onTaskMint?: (clipId: string, entry: ProviderTaskCacheEntry) => Promise<void>;
|
|
118
|
-
}
|
|
119
|
-
export declare class Executor {
|
|
120
|
-
private readonly concurrency;
|
|
121
|
-
private readonly runTool;
|
|
122
|
-
private readonly onSceneComplete?;
|
|
123
|
-
private readonly taskCache?;
|
|
124
|
-
private readonly onTaskMint?;
|
|
125
|
-
constructor(opts?: ExecutorOptions);
|
|
126
|
-
/**
|
|
127
|
-
* Stage 7 slice 1 helper. Returns cached pending-task output if the
|
|
128
|
-
* upstream input hash matches and entry is within TTL; null otherwise.
|
|
129
|
-
* Logs the hit so trace shows where $$ was saved.
|
|
130
|
-
*/
|
|
131
|
-
private checkTaskCache;
|
|
132
|
-
/**
|
|
133
|
-
* Best-effort mint. Swallows callback errors — a Firestore blip must not
|
|
134
|
-
* fail the scene (the tool already succeeded and produced a real taskId).
|
|
135
|
-
*/
|
|
136
|
-
private mintTaskCache;
|
|
137
|
-
run(plan: VideoPlan, ctx: ToolContext): Promise<SceneOutcome[]>;
|
|
138
|
-
private runScene;
|
|
139
|
-
private runVisual;
|
|
140
|
-
}
|
|
141
|
-
//# sourceMappingURL=executor.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../../src/services/agent/executor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,gCAAgC,CAAC;AAIzE,OAAO,EAA6B,WAAW,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAGrF,OAAO,EAGL,iBAAiB,EACjB,sBAAsB,EACvB,MAAM,qBAAqB,CAAC;AA2D7B,MAAM,MAAM,SAAS,GAAG,CAAC,CAAC,GAAG,OAAO,EAClC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,WAAW,KACb,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;AAE7B;;;;;;;;;GASG;AAEH,MAAM,WAAW,gBAAgB;IAC/B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gEAAgE;IAChE,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;;;OAKG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,2EAA2E;IAC3E,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gGAAgG;IAChG,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,gGAAgG;IAChG,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,sBAAsB;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,cAAc,EAAE,MAAM,CAAC;IACvB;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B;;;;;;;OAOG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,YAAY,CAAC;IACpB,EAAE,EAAE,OAAO,CAAC;IACZ,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,KAAK,CAAC,EAAE;QACN,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB;;;;;WAKG;QACH,cAAc,CAAC,EACX,WAAW,GACX,YAAY,GACZ,YAAY,GACZ,QAAQ,GACR,MAAM,GACN,OAAO,GACP,OAAO,GACP,SAAS,CAAC;QACd,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;KAC/B,CAAC;IACF,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,YAAY,GACpB,QAAQ,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,IAAI,CAUhD;AAED,MAAM,WAAW,eAAe;IAC9B,qGAAqG;IACrG,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,2EAA2E;IAC3E,OAAO,CAAC,EAAE,SAAS,CAAC;IACpB;;;;;;;OAOG;IACH,eAAe,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC/E;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,iBAAiB,CAAC;IAC9B;;;;;OAKG;IACH,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,sBAAsB,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;CAC/E;AAiDD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAY;IACpC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAqC;IACtE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAoB;IAC/C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAgC;gBAEhD,IAAI,GAAE,eAAoB;IAQtC;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAmBtB;;;OAGG;YACW,aAAa;IAerB,GAAG,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;YAiCvD,QAAQ;YA4FR,SAAS;CAsVxB"}
|
|
@@ -1,561 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.Executor = void 0;
|
|
4
|
-
exports.classifySceneFailure = classifySceneFailure;
|
|
5
|
-
const voices_1 = require("../../globals/ttsModels/voices");
|
|
6
|
-
const logger_1 = require("../../utils/logger");
|
|
7
|
-
const referenceImageRenderer_1 = require("./referenceImageRenderer");
|
|
8
|
-
const toolRegistry_1 = require("./toolRegistry");
|
|
9
|
-
const ttsDuration_1 = require("./ttsDuration");
|
|
10
|
-
const chains_1 = require("./providerFallback/chains");
|
|
11
|
-
const providerTaskCache_1 = require("./providerTaskCache");
|
|
12
|
-
/**
|
|
13
|
-
* AG-37: planners (LLMs) keep inventing voice IDs like "female-young-adult-en"
|
|
14
|
-
* or "narrator-1" that look reasonable but don't exist in any catalog. When
|
|
15
|
-
* those flow into ElevenLabs the API returns a 400 and the silent-fail VO
|
|
16
|
-
* outcome path drops the VO entirely — final video ships muted. Strip
|
|
17
|
-
* unknown IDs here so the provider falls back to its default voice instead.
|
|
18
|
-
*/
|
|
19
|
-
const ELEVENLABS_VOICE_IDS = new Set(voices_1.ELEVENLABS_VOICES.map((v) => v.id));
|
|
20
|
-
function sanitizeVoiceId(voiceId) {
|
|
21
|
-
if (!voiceId)
|
|
22
|
-
return undefined;
|
|
23
|
-
if (ELEVENLABS_VOICE_IDS.has(voiceId))
|
|
24
|
-
return voiceId;
|
|
25
|
-
// Heuristic for non-ElevenLabs providers (openai = "alloy"/"echo"/..., minimax = "male-qn-qingse").
|
|
26
|
-
// Only strip strings that look like LLM-invented descriptive labels (contain a hyphen + recognisable word).
|
|
27
|
-
if (/^(female|male|narrator|presenter|voice)[-_]/i.test(voiceId)) {
|
|
28
|
-
logger_1.logger.warn("executor: stripping invalid voiceId (LLM hallucination)", { voiceId });
|
|
29
|
-
return undefined;
|
|
30
|
-
}
|
|
31
|
-
return voiceId;
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* AG-31: map a brief-level voice style adjective to ElevenLabs voice_settings.
|
|
35
|
-
* `style` is the exaggeration param (use sparingly above 0.5 — artifacts);
|
|
36
|
-
* `stability` is the consistency knob (low = more emotive, high = monotone);
|
|
37
|
-
* `speed` is rate (0.7-1.2). Defaults match ElevenLabs' own (style=0,
|
|
38
|
-
* stability=0.5, speed=1.0). Tuning per persona is conservative — we'd rather
|
|
39
|
-
* be slightly flat than artifact-laden.
|
|
40
|
-
*/
|
|
41
|
-
function resolveVoiceSettings(style) {
|
|
42
|
-
switch (style) {
|
|
43
|
-
case "calm":
|
|
44
|
-
return { style: 0.15, stability: 0.7, speed: 0.95 };
|
|
45
|
-
case "warm":
|
|
46
|
-
return { style: 0.25, stability: 0.6, speed: 1.0 };
|
|
47
|
-
case "casual":
|
|
48
|
-
return { style: 0.3, stability: 0.5, speed: 1.0 };
|
|
49
|
-
case "excited":
|
|
50
|
-
return { style: 0.55, stability: 0.35, speed: 1.05 };
|
|
51
|
-
case "energetic":
|
|
52
|
-
return { style: 0.6, stability: 0.35, speed: 1.08 };
|
|
53
|
-
case "happy":
|
|
54
|
-
return { style: 0.5, stability: 0.4, speed: 1.05 };
|
|
55
|
-
case "serious":
|
|
56
|
-
return { style: 0.2, stability: 0.75, speed: 0.95 };
|
|
57
|
-
case "dramatic":
|
|
58
|
-
return { style: 0.65, stability: 0.4, speed: 0.95 };
|
|
59
|
-
case "whisper":
|
|
60
|
-
return { style: 0.2, stability: 0.85, speed: 0.9 };
|
|
61
|
-
case "neutral":
|
|
62
|
-
case undefined:
|
|
63
|
-
default:
|
|
64
|
-
return {};
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Inspect a failed scene to decide the next orchestration step.
|
|
69
|
-
*
|
|
70
|
-
* - "replan" : agent should re-plan the scene (safety / bad input)
|
|
71
|
-
* - "degrade" : try a cheaper strategy (e.g. T2/T3 chain exhausted on quota)
|
|
72
|
-
* - "fail" : terminal — surface to user (auth / chain-exhausted-unknown)
|
|
73
|
-
* - "retry" : transient — caller may re-run the scene as-is later
|
|
74
|
-
*/
|
|
75
|
-
function classifySceneFailure(outcome) {
|
|
76
|
-
if (outcome.ok)
|
|
77
|
-
return null;
|
|
78
|
-
const c = outcome.error?.classification;
|
|
79
|
-
if (!c)
|
|
80
|
-
return "fail";
|
|
81
|
-
if (outcome.error?.needsReplan)
|
|
82
|
-
return "replan"; // safety / input
|
|
83
|
-
if (c === "auth")
|
|
84
|
-
return "fail";
|
|
85
|
-
if (c === "quota")
|
|
86
|
-
return "degrade";
|
|
87
|
-
if (c === "rate_limit" || c === "transient")
|
|
88
|
-
return "retry";
|
|
89
|
-
if (c === "capability")
|
|
90
|
-
return "degrade";
|
|
91
|
-
return "fail";
|
|
92
|
-
}
|
|
93
|
-
/**
|
|
94
|
-
* OD-T7 / AG-42 — give the planner's `preferredModel` a chance to actually run
|
|
95
|
-
* when the scene duration is just slightly off from what the model accepts.
|
|
96
|
-
*
|
|
97
|
-
* Decision tree:
|
|
98
|
-
* 1. Model has no duration constraint → use planned duration
|
|
99
|
-
* 2. Planned duration already legal → use planned duration
|
|
100
|
-
* 3. Nearest legal is within snap tolerance → snap, log info
|
|
101
|
-
* 4. Outside tolerance + explicit preferredModel → log warn ("dropping
|
|
102
|
-
* planner pick — too far from any allowed value"); use planned duration
|
|
103
|
-
* anyway so the capability filter drops it and the chain fallback runs
|
|
104
|
-
* (the withFallback warn AG-42 also logs there now)
|
|
105
|
-
*
|
|
106
|
-
* We intentionally do NOT throw — throwing would crash a scene mid-run and
|
|
107
|
-
* lose all the other planner output. The graceful fallback chain already
|
|
108
|
-
* exists; this helper just keeps the planner's pick alive when the drift is
|
|
109
|
-
* small, and makes the drop visible when it isn't.
|
|
110
|
-
*/
|
|
111
|
-
function resolveDurationForPreferredModel(modelKey, plannedDurationSec, explicit, sceneIndex) {
|
|
112
|
-
const snap = (0, chains_1.snapDurationForModel)(modelKey, plannedDurationSec);
|
|
113
|
-
if (!snap) {
|
|
114
|
-
if (explicit) {
|
|
115
|
-
logger_1.logger.warn("executor: preferredModel duration outside snap tolerance — chain fallback will pick", {
|
|
116
|
-
sceneIndex,
|
|
117
|
-
modelKey,
|
|
118
|
-
plannedDurationSec,
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
return plannedDurationSec;
|
|
122
|
-
}
|
|
123
|
-
if (snap.driftSec > 0.01) {
|
|
124
|
-
logger_1.logger.info("executor: snapped duration to fit preferredModel", {
|
|
125
|
-
sceneIndex,
|
|
126
|
-
modelKey,
|
|
127
|
-
from: snap.requestedSec,
|
|
128
|
-
to: snap.snappedSec,
|
|
129
|
-
driftSec: Number(snap.driftSec.toFixed(2)),
|
|
130
|
-
});
|
|
131
|
-
}
|
|
132
|
-
return snap.snappedSec;
|
|
133
|
-
}
|
|
134
|
-
class Executor {
|
|
135
|
-
constructor(opts = {}) {
|
|
136
|
-
this.concurrency = opts.concurrency ?? 4;
|
|
137
|
-
this.runTool = opts.runTool ?? toolRegistry_1.runTool;
|
|
138
|
-
this.onSceneComplete = opts.onSceneComplete;
|
|
139
|
-
this.taskCache = opts.taskCache;
|
|
140
|
-
this.onTaskMint = opts.onTaskMint;
|
|
141
|
-
}
|
|
142
|
-
/**
|
|
143
|
-
* Stage 7 slice 1 helper. Returns cached pending-task output if the
|
|
144
|
-
* upstream input hash matches and entry is within TTL; null otherwise.
|
|
145
|
-
* Logs the hit so trace shows where $$ was saved.
|
|
146
|
-
*/
|
|
147
|
-
checkTaskCache(clipId, tool, input) {
|
|
148
|
-
const inputsHash = (0, providerTaskCache_1.hashTaskInputs)(input);
|
|
149
|
-
const hit = (0, providerTaskCache_1.getCachedTask)(this.taskCache, clipId, inputsHash);
|
|
150
|
-
if (hit) {
|
|
151
|
-
logger_1.logger.info("executor: task-cache hit", {
|
|
152
|
-
clipId,
|
|
153
|
-
tool,
|
|
154
|
-
modelKey: hit.modelKey,
|
|
155
|
-
taskId: hit.taskId,
|
|
156
|
-
ageMs: Date.now() - hit.createdAt,
|
|
157
|
-
});
|
|
158
|
-
}
|
|
159
|
-
return { hit, inputsHash };
|
|
160
|
-
}
|
|
161
|
-
/**
|
|
162
|
-
* Best-effort mint. Swallows callback errors — a Firestore blip must not
|
|
163
|
-
* fail the scene (the tool already succeeded and produced a real taskId).
|
|
164
|
-
*/
|
|
165
|
-
async mintTaskCache(clipId, entry) {
|
|
166
|
-
if (!this.onTaskMint)
|
|
167
|
-
return;
|
|
168
|
-
try {
|
|
169
|
-
await this.onTaskMint(clipId, entry);
|
|
170
|
-
}
|
|
171
|
-
catch (err) {
|
|
172
|
-
logger_1.logger.warn("executor: onTaskMint callback threw — swallowed", {
|
|
173
|
-
clipId,
|
|
174
|
-
err: err.message,
|
|
175
|
-
});
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
async run(plan, ctx) {
|
|
179
|
-
const queue = [...plan.scenes];
|
|
180
|
-
const results = new Array(plan.scenes.length);
|
|
181
|
-
const workers = [];
|
|
182
|
-
let cursor = 0;
|
|
183
|
-
const worker = async () => {
|
|
184
|
-
while (true) {
|
|
185
|
-
const i = cursor++;
|
|
186
|
-
if (i >= queue.length)
|
|
187
|
-
return;
|
|
188
|
-
const scene = queue[i];
|
|
189
|
-
const outcome = await this.runScene(scene, ctx, plan.aspect, plan);
|
|
190
|
-
results[i] = outcome;
|
|
191
|
-
if (this.onSceneComplete) {
|
|
192
|
-
try {
|
|
193
|
-
await this.onSceneComplete(scene.sceneIndex, outcome);
|
|
194
|
-
}
|
|
195
|
-
catch (err) {
|
|
196
|
-
logger_1.logger.warn("executor: onSceneComplete callback threw — swallowed", {
|
|
197
|
-
sceneIndex: scene.sceneIndex,
|
|
198
|
-
err: err.message,
|
|
199
|
-
});
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
};
|
|
204
|
-
for (let i = 0; i < Math.min(this.concurrency, queue.length); i++) {
|
|
205
|
-
workers.push(worker());
|
|
206
|
-
}
|
|
207
|
-
await Promise.all(workers);
|
|
208
|
-
return results;
|
|
209
|
-
}
|
|
210
|
-
async runScene(scene, ctx, aspect, plan) {
|
|
211
|
-
const start = Date.now();
|
|
212
|
-
const idempotencyKey = `${ctx.agentRunId}:scene-${scene.sceneIndex}`;
|
|
213
|
-
const sceneCtx = { ...ctx, idempotencyKey };
|
|
214
|
-
const visualOutcome = await this.runVisual(scene, sceneCtx, aspect, plan);
|
|
215
|
-
if (!visualOutcome.ok) {
|
|
216
|
-
return {
|
|
217
|
-
scene,
|
|
218
|
-
ok: false,
|
|
219
|
-
error: visualOutcome.error,
|
|
220
|
-
durationMs: Date.now() - start,
|
|
221
|
-
};
|
|
222
|
-
}
|
|
223
|
-
let voiceoverAudioBase64;
|
|
224
|
-
let voiceoverEstimateMs;
|
|
225
|
-
let voiceoverOverBudget = false;
|
|
226
|
-
// talking-head-avatar bakes the VO into the rendered MP4; running TTS again
|
|
227
|
-
// here would just waste credits and create a duplicate audio track.
|
|
228
|
-
if (scene.voiceoverLine && scene.strategy !== "talking-head-avatar") {
|
|
229
|
-
// AG-31: brief.voice.style → ElevenLabs voice_settings.
|
|
230
|
-
// AG-28: if estimated VO > scene.durationMs, speed up TTS within natural
|
|
231
|
-
// limits (≤1.15× extra over style.speed) before falling back to a
|
|
232
|
-
// freeze-frame-pad signal for the stitcher.
|
|
233
|
-
// AG-44: per-scene voiceStyle wins over plan-level when present, so the
|
|
234
|
-
// emotional arc can shift across the timeline (frustrated → curious → excited).
|
|
235
|
-
const effectiveVoiceStyle = (scene.voiceStyle ?? plan.voiceStyle);
|
|
236
|
-
const baseSettings = resolveVoiceSettings(effectiveVoiceStyle);
|
|
237
|
-
const baseSpeed = baseSettings.speed ?? 1.0;
|
|
238
|
-
const rawEstMs = (0, ttsDuration_1.estimateTtsMs)(scene.voiceoverLine);
|
|
239
|
-
const budgetMs = scene.durationMs * 1.05;
|
|
240
|
-
// Adjusted speed: only kick in if natural speech would overrun. Cap the
|
|
241
|
-
// speedup multiplier at 1.15× style-base; faster than that sounds rushed.
|
|
242
|
-
let speed = baseSpeed;
|
|
243
|
-
if (rawEstMs > budgetMs) {
|
|
244
|
-
const needed = rawEstMs / budgetMs;
|
|
245
|
-
const cappedMultiplier = Math.min(1.15, needed);
|
|
246
|
-
speed = +(baseSpeed * cappedMultiplier).toFixed(2);
|
|
247
|
-
}
|
|
248
|
-
const adjustedEstMs = rawEstMs * (baseSpeed / speed);
|
|
249
|
-
voiceoverEstimateMs = Math.round(adjustedEstMs);
|
|
250
|
-
voiceoverOverBudget = adjustedEstMs > budgetMs;
|
|
251
|
-
const voInput = {
|
|
252
|
-
text: scene.voiceoverLine,
|
|
253
|
-
...baseSettings,
|
|
254
|
-
// Override speed when AG-28 budget forced an adjustment.
|
|
255
|
-
speed: +Math.max(0.7, Math.min(1.2, speed)).toFixed(2),
|
|
256
|
-
};
|
|
257
|
-
const sanitized = sanitizeVoiceId(plan.voiceId);
|
|
258
|
-
if (sanitized)
|
|
259
|
-
voInput.voiceId = sanitized;
|
|
260
|
-
const voOutcome = await this.runTool("generate_voiceover", voInput, { ...sceneCtx, idempotencyKey: `${idempotencyKey}:vo` });
|
|
261
|
-
if (voOutcome.ok) {
|
|
262
|
-
voiceoverAudioBase64 = voOutcome.output.audioBase64;
|
|
263
|
-
}
|
|
264
|
-
else {
|
|
265
|
-
// AG-37: don't swallow silently — final video shipping muted with no
|
|
266
|
-
// log was the original bug. Surface the error in the result so the
|
|
267
|
-
// trace shows WHY VO is missing.
|
|
268
|
-
logger_1.logger.warn("executor: voiceover tool failed", {
|
|
269
|
-
sceneIndex: scene.sceneIndex,
|
|
270
|
-
error: voOutcome.error,
|
|
271
|
-
voiceIdRequested: plan.voiceId,
|
|
272
|
-
voiceIdSent: sanitized,
|
|
273
|
-
});
|
|
274
|
-
}
|
|
275
|
-
// Don't fail the whole scene on VO miss — host can re-run just the VO.
|
|
276
|
-
}
|
|
277
|
-
return {
|
|
278
|
-
scene,
|
|
279
|
-
ok: true,
|
|
280
|
-
result: {
|
|
281
|
-
...visualOutcome.output,
|
|
282
|
-
voiceoverAudioBase64,
|
|
283
|
-
voiceoverEstimateMs,
|
|
284
|
-
voiceoverOverBudget,
|
|
285
|
-
idempotencyKey,
|
|
286
|
-
},
|
|
287
|
-
durationMs: Date.now() - start,
|
|
288
|
-
};
|
|
289
|
-
}
|
|
290
|
-
async runVisual(scene, ctx, aspect, plan) {
|
|
291
|
-
const durationSec = Math.max(2, Math.round(scene.durationMs / 1000));
|
|
292
|
-
// AG-13: collect reference images for this scene's bible entities. Empty
|
|
293
|
-
// array when bible is C0/C1 or no entities are tagged — image-gen falls
|
|
294
|
-
// back to pure text prompt.
|
|
295
|
-
const referenceImageUrls = (0, referenceImageRenderer_1.collectReferenceImageUrls)(plan.bible, scene.bibleEntityIds ?? []);
|
|
296
|
-
const wrap = (tool, outcome) => outcome.ok
|
|
297
|
-
? { ok: true, output: { tool, ...outcome.output } }
|
|
298
|
-
: { ok: false, error: outcome.error };
|
|
299
|
-
switch (scene.strategy) {
|
|
300
|
-
case "stock-video":
|
|
301
|
-
case "stock-image-ken-burns": {
|
|
302
|
-
const out = await this.runTool("search_stock", {
|
|
303
|
-
query: scene.prompt,
|
|
304
|
-
kind: scene.strategy === "stock-video" ? "video" : "image",
|
|
305
|
-
aspectRatio: aspect,
|
|
306
|
-
minDurationSec: scene.strategy === "stock-video" ? durationSec : undefined,
|
|
307
|
-
}, ctx);
|
|
308
|
-
if (!out.ok)
|
|
309
|
-
return wrap("search_stock", out);
|
|
310
|
-
const url = out.output.assets[0]?.url;
|
|
311
|
-
if (!url)
|
|
312
|
-
return {
|
|
313
|
-
ok: false,
|
|
314
|
-
error: {
|
|
315
|
-
code: "NO_STOCK_RESULTS",
|
|
316
|
-
message: `No stock results for: ${scene.prompt}`,
|
|
317
|
-
attemptedProviders: out.output.attemptedProviders,
|
|
318
|
-
},
|
|
319
|
-
};
|
|
320
|
-
return {
|
|
321
|
-
ok: true,
|
|
322
|
-
output: {
|
|
323
|
-
tool: "search_stock",
|
|
324
|
-
visualUrl: url,
|
|
325
|
-
attemptedProviders: out.output.attemptedProviders,
|
|
326
|
-
},
|
|
327
|
-
};
|
|
328
|
-
}
|
|
329
|
-
case "ai-image-static":
|
|
330
|
-
case "ai-image-motion": {
|
|
331
|
-
const out = await this.runTool("generate_image", {
|
|
332
|
-
prompt: scene.prompt,
|
|
333
|
-
modelKey: scene.preferredModel ?? "google-nano-banana",
|
|
334
|
-
aspectRatio: aspect,
|
|
335
|
-
...(referenceImageUrls.length
|
|
336
|
-
? { inputImageUrls: referenceImageUrls }
|
|
337
|
-
: {}),
|
|
338
|
-
}, ctx);
|
|
339
|
-
if (!out.ok)
|
|
340
|
-
return wrap("generate_image", out);
|
|
341
|
-
return { ok: true, output: { tool: "generate_image", visualUrl: out.output.imageUrl } };
|
|
342
|
-
}
|
|
343
|
-
case "ai-image-to-video": {
|
|
344
|
-
const animateModelKey = scene.preferredModel ?? "kling-v2.6";
|
|
345
|
-
const animateDurationSec = resolveDurationForPreferredModel(animateModelKey, durationSec, !!scene.preferredModel, scene.sceneIndex);
|
|
346
|
-
// Stage 7 slice 1: cache key spans the WHOLE strategy (both image-gen
|
|
347
|
-
// + animate). Hashing only upstream-stable inputs — prompt, model,
|
|
348
|
-
// duration, aspect, reference images — lets a re-execute skip BOTH
|
|
349
|
-
// tool calls, replaying the cached animate taskId through the poller.
|
|
350
|
-
const clipId = `scene-${scene.sceneIndex}-visual`;
|
|
351
|
-
const cacheInput = {
|
|
352
|
-
strategy: "ai-image-to-video",
|
|
353
|
-
prompt: scene.prompt,
|
|
354
|
-
modelKey: animateModelKey,
|
|
355
|
-
durationSec: animateDurationSec,
|
|
356
|
-
aspect,
|
|
357
|
-
referenceImageUrls,
|
|
358
|
-
};
|
|
359
|
-
const { hit, inputsHash } = this.checkTaskCache(clipId, "animate_image", cacheInput);
|
|
360
|
-
if (hit) {
|
|
361
|
-
return {
|
|
362
|
-
ok: true,
|
|
363
|
-
output: {
|
|
364
|
-
tool: "animate_image",
|
|
365
|
-
pendingTaskId: hit.taskId,
|
|
366
|
-
pendingModelKey: hit.modelKey,
|
|
367
|
-
},
|
|
368
|
-
};
|
|
369
|
-
}
|
|
370
|
-
const img = await this.runTool("generate_image", {
|
|
371
|
-
prompt: scene.prompt,
|
|
372
|
-
modelKey: "google-nano-banana",
|
|
373
|
-
aspectRatio: aspect,
|
|
374
|
-
...(referenceImageUrls.length
|
|
375
|
-
? { inputImageUrls: referenceImageUrls }
|
|
376
|
-
: {}),
|
|
377
|
-
}, { ...ctx, idempotencyKey: `${ctx.idempotencyKey}:img` });
|
|
378
|
-
if (!img.ok)
|
|
379
|
-
return wrap("generate_image", img);
|
|
380
|
-
const animate = await this.runTool("animate_image", {
|
|
381
|
-
imageUrl: img.output.imageUrl,
|
|
382
|
-
motionHint: scene.prompt,
|
|
383
|
-
durationSec: animateDurationSec,
|
|
384
|
-
modelKey: animateModelKey,
|
|
385
|
-
}, ctx);
|
|
386
|
-
if (!animate.ok)
|
|
387
|
-
return wrap("animate_image", animate);
|
|
388
|
-
const mintedModelKey = animate.output.modelKeyUsed ?? animateModelKey;
|
|
389
|
-
await this.mintTaskCache(clipId, {
|
|
390
|
-
tool: "animate_image",
|
|
391
|
-
taskId: animate.output.taskId,
|
|
392
|
-
modelKey: mintedModelKey,
|
|
393
|
-
inputsHash,
|
|
394
|
-
createdAt: Date.now(),
|
|
395
|
-
});
|
|
396
|
-
return {
|
|
397
|
-
ok: true,
|
|
398
|
-
output: {
|
|
399
|
-
tool: "animate_image",
|
|
400
|
-
pendingTaskId: animate.output.taskId,
|
|
401
|
-
pendingModelKey: mintedModelKey,
|
|
402
|
-
attemptedProviders: animate.output.attemptedProviders,
|
|
403
|
-
},
|
|
404
|
-
};
|
|
405
|
-
}
|
|
406
|
-
case "ai-text-to-video": {
|
|
407
|
-
const t2vModelKey = scene.preferredModel ?? "kling-v2.6";
|
|
408
|
-
const t2vDurationSec = resolveDurationForPreferredModel(t2vModelKey, durationSec, !!scene.preferredModel, scene.sceneIndex);
|
|
409
|
-
const clipId = `scene-${scene.sceneIndex}-visual`;
|
|
410
|
-
const cacheInput = {
|
|
411
|
-
strategy: "ai-text-to-video",
|
|
412
|
-
prompt: scene.prompt,
|
|
413
|
-
modelKey: t2vModelKey,
|
|
414
|
-
durationSec: t2vDurationSec,
|
|
415
|
-
};
|
|
416
|
-
const { hit, inputsHash } = this.checkTaskCache(clipId, "generate_video", cacheInput);
|
|
417
|
-
if (hit) {
|
|
418
|
-
return {
|
|
419
|
-
ok: true,
|
|
420
|
-
output: {
|
|
421
|
-
tool: "generate_video",
|
|
422
|
-
pendingTaskId: hit.taskId,
|
|
423
|
-
pendingModelKey: hit.modelKey,
|
|
424
|
-
},
|
|
425
|
-
};
|
|
426
|
-
}
|
|
427
|
-
const out = await this.runTool("generate_video", {
|
|
428
|
-
prompt: scene.prompt,
|
|
429
|
-
modelKey: t2vModelKey,
|
|
430
|
-
durationSec: t2vDurationSec,
|
|
431
|
-
}, ctx);
|
|
432
|
-
if (!out.ok)
|
|
433
|
-
return wrap("generate_video", out);
|
|
434
|
-
const mintedModelKey = out.output.modelKeyUsed ?? t2vModelKey;
|
|
435
|
-
await this.mintTaskCache(clipId, {
|
|
436
|
-
tool: "generate_video",
|
|
437
|
-
taskId: out.output.taskId,
|
|
438
|
-
modelKey: mintedModelKey,
|
|
439
|
-
inputsHash,
|
|
440
|
-
createdAt: Date.now(),
|
|
441
|
-
});
|
|
442
|
-
return {
|
|
443
|
-
ok: true,
|
|
444
|
-
output: {
|
|
445
|
-
tool: "generate_video",
|
|
446
|
-
pendingTaskId: out.output.taskId,
|
|
447
|
-
pendingModelKey: mintedModelKey,
|
|
448
|
-
attemptedProviders: out.output.attemptedProviders,
|
|
449
|
-
},
|
|
450
|
-
};
|
|
451
|
-
}
|
|
452
|
-
case "user-asset": {
|
|
453
|
-
// Per-user library search. The host injects the searcher at boot
|
|
454
|
-
// (see userLibrarySearcher bootstrap); if no match, fall back to
|
|
455
|
-
// stock so the scene still renders rather than failing hard.
|
|
456
|
-
const out = await this.runTool("search_user_library", { query: scene.prompt, limit: 5 }, ctx);
|
|
457
|
-
if (out.ok) {
|
|
458
|
-
const first = out.output.assets[0];
|
|
459
|
-
if (first) {
|
|
460
|
-
return { ok: true, output: { tool: "search_user_library", visualUrl: first.url } };
|
|
461
|
-
}
|
|
462
|
-
}
|
|
463
|
-
// No hits or searcher missing — fall back to stock-video as the
|
|
464
|
-
// closest cheap substitute so the executor still produces an asset.
|
|
465
|
-
const stock = await this.runTool("search_stock", {
|
|
466
|
-
query: scene.prompt,
|
|
467
|
-
kind: "video",
|
|
468
|
-
aspectRatio: aspect,
|
|
469
|
-
minDurationSec: durationSec,
|
|
470
|
-
}, ctx);
|
|
471
|
-
if (!stock.ok)
|
|
472
|
-
return wrap("search_stock", stock);
|
|
473
|
-
const fallbackUrl = stock.output.assets[0]?.url;
|
|
474
|
-
if (!fallbackUrl) {
|
|
475
|
-
return {
|
|
476
|
-
ok: false,
|
|
477
|
-
error: {
|
|
478
|
-
code: "USER_ASSET_NO_MATCH",
|
|
479
|
-
message: `No user-library match for "${scene.prompt}" and no stock fallback.`,
|
|
480
|
-
attemptedProviders: stock.output.attemptedProviders,
|
|
481
|
-
},
|
|
482
|
-
};
|
|
483
|
-
}
|
|
484
|
-
return {
|
|
485
|
-
ok: true,
|
|
486
|
-
output: {
|
|
487
|
-
tool: "search_user_library",
|
|
488
|
-
visualUrl: fallbackUrl,
|
|
489
|
-
attemptedProviders: stock.output.attemptedProviders,
|
|
490
|
-
},
|
|
491
|
-
};
|
|
492
|
-
}
|
|
493
|
-
case "talking-head-avatar": {
|
|
494
|
-
if (!scene.avatarFaceUrl) {
|
|
495
|
-
return {
|
|
496
|
-
ok: false,
|
|
497
|
-
error: {
|
|
498
|
-
code: "AVATAR_FACE_REQUIRED",
|
|
499
|
-
message: "talking-head-avatar requires scene.avatarFaceUrl (set on the user's brand kit).",
|
|
500
|
-
},
|
|
501
|
-
};
|
|
502
|
-
}
|
|
503
|
-
if (!scene.voiceoverLine) {
|
|
504
|
-
return {
|
|
505
|
-
ok: false,
|
|
506
|
-
error: {
|
|
507
|
-
code: "AVATAR_VO_REQUIRED",
|
|
508
|
-
message: "talking-head-avatar requires scene.voiceoverLine for the lipsync audio.",
|
|
509
|
-
},
|
|
510
|
-
};
|
|
511
|
-
}
|
|
512
|
-
const avatarMode = scene.tier === "T3" ? "pro" : "std";
|
|
513
|
-
const clipId = `scene-${scene.sceneIndex}-visual`;
|
|
514
|
-
const cacheInput = {
|
|
515
|
-
strategy: "talking-head-avatar",
|
|
516
|
-
inputImageUrl: scene.avatarFaceUrl,
|
|
517
|
-
ttsText: scene.voiceoverLine,
|
|
518
|
-
mode: avatarMode,
|
|
519
|
-
};
|
|
520
|
-
const { hit, inputsHash } = this.checkTaskCache(clipId, "generate_avatar_video", cacheInput);
|
|
521
|
-
if (hit) {
|
|
522
|
-
return {
|
|
523
|
-
ok: true,
|
|
524
|
-
output: {
|
|
525
|
-
tool: "generate_avatar_video",
|
|
526
|
-
pendingTaskId: hit.taskId,
|
|
527
|
-
pendingModelKey: hit.modelKey,
|
|
528
|
-
},
|
|
529
|
-
};
|
|
530
|
-
}
|
|
531
|
-
const out = await this.runTool("generate_avatar_video", {
|
|
532
|
-
inputImageUrl: scene.avatarFaceUrl,
|
|
533
|
-
ttsText: scene.voiceoverLine,
|
|
534
|
-
mode: avatarMode,
|
|
535
|
-
}, ctx);
|
|
536
|
-
if (!out.ok)
|
|
537
|
-
return wrap("generate_avatar_video", out);
|
|
538
|
-
await this.mintTaskCache(clipId, {
|
|
539
|
-
tool: "generate_avatar_video",
|
|
540
|
-
taskId: out.output.taskId,
|
|
541
|
-
// Avatar tool hardcodes Kling Avatar internally; poller needs the
|
|
542
|
-
// actual model key to route via getAiGenProviderService (no special
|
|
543
|
-
// resolver exists). AG-20: was "generate_avatar_video" which would
|
|
544
|
-
// throw at factory lookup the moment the avatar path was polled.
|
|
545
|
-
modelKey: "kling-avatar",
|
|
546
|
-
inputsHash,
|
|
547
|
-
createdAt: Date.now(),
|
|
548
|
-
});
|
|
549
|
-
return {
|
|
550
|
-
ok: true,
|
|
551
|
-
output: {
|
|
552
|
-
tool: "generate_avatar_video",
|
|
553
|
-
pendingTaskId: out.output.taskId,
|
|
554
|
-
pendingModelKey: "kling-avatar",
|
|
555
|
-
},
|
|
556
|
-
};
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
}
|
|
560
|
-
}
|
|
561
|
-
exports.Executor = Executor;
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { ChatRequest, ChatResponse, LlmCaller, StructuredRequest, StructuredResponse } from "./llmCaller";
|
|
3
|
-
/**
|
|
4
|
-
* Vercel AI Gateway LlmCaller — OpenAI-compatible /v1/chat/completions surface.
|
|
5
|
-
* Single endpoint, multi-provider routing via "{provider}/{modelId}" model id.
|
|
6
|
-
*
|
|
7
|
-
* No SDK dependency: pure fetch keeps the shared package light. Structured
|
|
8
|
-
* output uses JSON-schema response_format (supported by gateway across
|
|
9
|
-
* Anthropic / OpenAI / Google providers — gateway normalises it).
|
|
10
|
-
*
|
|
11
|
-
* Reliability features (added Phase 1 of AGENT_TEST_PLAN):
|
|
12
|
-
* - F1 transport-level retry: each HTTP call retries 3× with exp backoff
|
|
13
|
-
* (250 / 1000 / 4000ms) on transient errors (network drops, 408/429/5xx).
|
|
14
|
-
* - F2 model fallback: when a request supplies `fallbackModel` and the
|
|
15
|
-
* primary exhausts retries on a transient error, we attempt the fallback
|
|
16
|
-
* once (also with its own retry budget). Schema-validation failures and
|
|
17
|
-
* other deterministic errors do NOT trigger fallback.
|
|
18
|
-
*/
|
|
19
|
-
export interface GatewayConfig {
|
|
20
|
-
/** Full base url, e.g. "https://ai-gateway.vercel.sh/v1". */
|
|
21
|
-
baseUrl: string;
|
|
22
|
-
/** Bearer token. */
|
|
23
|
-
apiKey: string;
|
|
24
|
-
/** Default request timeout in ms. */
|
|
25
|
-
timeoutMs?: number;
|
|
26
|
-
/** Optional fetch impl override (tests). */
|
|
27
|
-
fetchImpl?: typeof fetch;
|
|
28
|
-
/** Max retry attempts per model on transient errors. Default 3. */
|
|
29
|
-
maxRetries?: number;
|
|
30
|
-
/** Base backoff in ms (exponential: base, base*4, base*16). Default 250. */
|
|
31
|
-
retryBaseMs?: number;
|
|
32
|
-
}
|
|
33
|
-
export declare class GatewayLlmCaller implements LlmCaller {
|
|
34
|
-
private readonly cfg;
|
|
35
|
-
private readonly fetchImpl;
|
|
36
|
-
private readonly timeoutMs;
|
|
37
|
-
private readonly maxRetries;
|
|
38
|
-
private readonly retryBaseMs;
|
|
39
|
-
constructor(cfg: GatewayConfig);
|
|
40
|
-
chat(req: ChatRequest): Promise<ChatResponse>;
|
|
41
|
-
structured<T extends z.ZodTypeAny>(req: StructuredRequest<T>): Promise<StructuredResponse<T>>;
|
|
42
|
-
structuredStream<T extends z.ZodTypeAny>(req: StructuredRequest<T>): {
|
|
43
|
-
tokens: AsyncIterable<string>;
|
|
44
|
-
result: Promise<StructuredResponse<T>>;
|
|
45
|
-
};
|
|
46
|
-
private createStreamingIterator;
|
|
47
|
-
private modelId;
|
|
48
|
-
private encodeMessage;
|
|
49
|
-
private toDataUri;
|
|
50
|
-
private usage;
|
|
51
|
-
/**
|
|
52
|
-
* POST /chat/completions with transient-error retry (F1).
|
|
53
|
-
* Throws TransientLlmError after exhausting retries on transient failures —
|
|
54
|
-
* caller catches it to trigger model fallback (F2). Non-transient errors
|
|
55
|
-
* (4xx other than 408/425/429, malformed responses) propagate as plain Error.
|
|
56
|
-
*/
|
|
57
|
-
private postWithRetry;
|
|
58
|
-
private withRetry;
|
|
59
|
-
private postOnce;
|
|
60
|
-
}
|
|
61
|
-
//# sourceMappingURL=llmCallerGateway.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"llmCallerGateway.d.ts","sourceRoot":"","sources":["../../../src/services/agent/llmCallerGateway.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EACL,WAAW,EACX,YAAY,EACZ,SAAS,EACT,iBAAiB,EACjB,kBAAkB,EACnB,MAAM,aAAa,CAAC;AAGrB;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,WAAW,aAAa;IAC5B,6DAA6D;IAC7D,OAAO,EAAE,MAAM,CAAC;IAChB,oBAAoB;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;IACzB,mEAAmE;IACnE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,4EAA4E;IAC5E,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AA2CD,qBAAa,gBAAiB,YAAW,SAAS;IAMpC,OAAO,CAAC,QAAQ,CAAC,GAAG;IALhC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IACzC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;gBAER,GAAG,EAAE,aAAa;IAOzC,IAAI,CAAC,GAAG,EAAE,WAAW,GAAG,OAAO,CAAC,YAAY,CAAC;IAkC7C,UAAU,CAAC,CAAC,SAAS,CAAC,CAAC,UAAU,EACrC,GAAG,EAAE,iBAAiB,CAAC,CAAC,CAAC,GACxB,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC;IAgEjC,gBAAgB,CAAC,CAAC,SAAS,CAAC,CAAC,UAAU,EACrC,GAAG,EAAE,iBAAiB,CAAC,CAAC,CAAC,GACxB;QACD,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;QAC9B,MAAM,EAAE,OAAO,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,CAAC;KACxC;YAqBc,uBAAuB;IA4ItC,OAAO,CAAC,OAAO;YAID,aAAa;YAqBb,SAAS;IAcvB,OAAO,CAAC,KAAK;IAQb;;;;;OAKG;YACW,aAAa;YAIb,SAAS;YA+BT,QAAQ;CA6BvB"}
|
|
@@ -1,368 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.GatewayLlmCaller = void 0;
|
|
4
|
-
const zod_1 = require("zod");
|
|
5
|
-
/**
|
|
6
|
-
* Marks a transient error that should trigger HTTP retry (and, when exhausted,
|
|
7
|
-
* model fallback). Non-transient errors (4xx other than 408/429, schema
|
|
8
|
-
* validation, malformed JSON) are NOT wrapped and propagate immediately.
|
|
9
|
-
*/
|
|
10
|
-
class TransientLlmError extends Error {
|
|
11
|
-
constructor(message, cause) {
|
|
12
|
-
super(message);
|
|
13
|
-
this.cause = cause;
|
|
14
|
-
this.name = "TransientLlmError";
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
const TRANSIENT_HTTP_STATUSES = new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
18
|
-
function isTransientNetworkError(err) {
|
|
19
|
-
if (!err || typeof err !== "object")
|
|
20
|
-
return false;
|
|
21
|
-
const e = err;
|
|
22
|
-
const code = e.code ?? e.cause?.code;
|
|
23
|
-
if (code && /^(ECONNRESET|ECONNREFUSED|ETIMEDOUT|EAI_AGAIN|EPIPE|UND_ERR_.*)$/.test(code)) {
|
|
24
|
-
return true;
|
|
25
|
-
}
|
|
26
|
-
// Our own per-request timeout (ctrl.abort() in postOnce / structuredStream)
|
|
27
|
-
// surfaces as AbortError "This operation was aborted". Treat as transient so
|
|
28
|
-
// the retry loop (and then the fallback model) gets a chance — otherwise a
|
|
29
|
-
// single slow Gemini Flash vision call sinks the entire critic stage with
|
|
30
|
-
// no recovery.
|
|
31
|
-
if (e.name === "AbortError")
|
|
32
|
-
return true;
|
|
33
|
-
const msg = String(e.message ?? "");
|
|
34
|
-
return /fetch failed|socket hang up|other side closed|network|timeout|operation was aborted/i.test(msg);
|
|
35
|
-
}
|
|
36
|
-
class GatewayLlmCaller {
|
|
37
|
-
constructor(cfg) {
|
|
38
|
-
this.cfg = cfg;
|
|
39
|
-
this.fetchImpl = cfg.fetchImpl ?? fetch;
|
|
40
|
-
this.timeoutMs = cfg.timeoutMs ?? 60000;
|
|
41
|
-
this.maxRetries = cfg.maxRetries ?? 3;
|
|
42
|
-
this.retryBaseMs = cfg.retryBaseMs ?? 250;
|
|
43
|
-
}
|
|
44
|
-
async chat(req) {
|
|
45
|
-
const attempted = [];
|
|
46
|
-
const run = async (model) => {
|
|
47
|
-
attempted.push(model);
|
|
48
|
-
const messages = await Promise.all(req.messages.map((m) => this.encodeMessage(m)));
|
|
49
|
-
const raw = await this.postWithRetry({
|
|
50
|
-
model: this.modelId(model),
|
|
51
|
-
messages,
|
|
52
|
-
temperature: req.temperature,
|
|
53
|
-
max_tokens: req.maxTokens,
|
|
54
|
-
stop: req.stop,
|
|
55
|
-
});
|
|
56
|
-
const text = raw.choices?.[0]?.message?.content ?? "";
|
|
57
|
-
return {
|
|
58
|
-
text: typeof text === "string" ? text : JSON.stringify(text),
|
|
59
|
-
usage: this.usage(raw),
|
|
60
|
-
requestId: raw.id,
|
|
61
|
-
};
|
|
62
|
-
};
|
|
63
|
-
try {
|
|
64
|
-
const out = await run(req.model);
|
|
65
|
-
return { ...out, attemptedModels: attempted, usedFallback: false };
|
|
66
|
-
}
|
|
67
|
-
catch (err) {
|
|
68
|
-
if (req.fallbackModel && err instanceof TransientLlmError) {
|
|
69
|
-
const out = await run(req.fallbackModel);
|
|
70
|
-
return { ...out, attemptedModels: attempted, usedFallback: true };
|
|
71
|
-
}
|
|
72
|
-
throw err;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
async structured(req) {
|
|
76
|
-
const attempted = [];
|
|
77
|
-
const jsonSchema = zod_1.z.toJSONSchema(req.schema, { target: "draft-7" });
|
|
78
|
-
const run = async (model) => {
|
|
79
|
-
attempted.push(model);
|
|
80
|
-
const messages = await Promise.all(req.messages.map((m) => this.encodeMessage(m)));
|
|
81
|
-
const raw = await this.postWithRetry({
|
|
82
|
-
model: this.modelId(model),
|
|
83
|
-
messages,
|
|
84
|
-
temperature: req.temperature,
|
|
85
|
-
max_tokens: req.maxTokens,
|
|
86
|
-
response_format: {
|
|
87
|
-
type: "json_schema",
|
|
88
|
-
json_schema: {
|
|
89
|
-
name: req.schemaName,
|
|
90
|
-
schema: jsonSchema,
|
|
91
|
-
strict: true,
|
|
92
|
-
},
|
|
93
|
-
},
|
|
94
|
-
});
|
|
95
|
-
const txt = raw.choices?.[0]?.message?.content ?? "";
|
|
96
|
-
if (!txt || typeof txt !== "string") {
|
|
97
|
-
// Empty body from the model is not retryable across providers —
|
|
98
|
-
// surface immediately; planner-level retry can re-prompt.
|
|
99
|
-
throw new Error(`GatewayLlmCaller.structured: empty response for schema ${req.schemaName}`);
|
|
100
|
-
}
|
|
101
|
-
let parsed;
|
|
102
|
-
try {
|
|
103
|
-
parsed = JSON.parse(txt);
|
|
104
|
-
}
|
|
105
|
-
catch {
|
|
106
|
-
throw new Error(`GatewayLlmCaller.structured: model returned non-JSON for schema ${req.schemaName}: ${txt.slice(0, 200)}`);
|
|
107
|
-
}
|
|
108
|
-
const result = req.schema.safeParse(parsed);
|
|
109
|
-
if (!result.success) {
|
|
110
|
-
throw new Error(`GatewayLlmCaller.structured: schema validation failed for ${req.schemaName}: ${result.error.message}`);
|
|
111
|
-
}
|
|
112
|
-
return {
|
|
113
|
-
data: result.data,
|
|
114
|
-
usage: this.usage(raw),
|
|
115
|
-
requestId: raw.id,
|
|
116
|
-
};
|
|
117
|
-
};
|
|
118
|
-
try {
|
|
119
|
-
const out = await run(req.model);
|
|
120
|
-
return { ...out, attemptedModels: attempted, usedFallback: false };
|
|
121
|
-
}
|
|
122
|
-
catch (err) {
|
|
123
|
-
if (req.fallbackModel && err instanceof TransientLlmError) {
|
|
124
|
-
const out = await run(req.fallbackModel);
|
|
125
|
-
return { ...out, attemptedModels: attempted, usedFallback: true };
|
|
126
|
-
}
|
|
127
|
-
throw err;
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
structuredStream(req) {
|
|
131
|
-
// Streaming: fall back is NOT applied mid-stream (would confuse UI
|
|
132
|
-
// chunks). Transport retry IS applied on the initial connect; once
|
|
133
|
-
// bytes start flowing, a mid-stream drop propagates as an error and
|
|
134
|
-
// the caller (planner) handles it via its own retry loop.
|
|
135
|
-
let resolveResult;
|
|
136
|
-
let rejectResult;
|
|
137
|
-
const result = new Promise((resolve, reject) => {
|
|
138
|
-
resolveResult = resolve;
|
|
139
|
-
rejectResult = reject;
|
|
140
|
-
});
|
|
141
|
-
const self = this;
|
|
142
|
-
const tokens = {
|
|
143
|
-
[Symbol.asyncIterator]() {
|
|
144
|
-
return self.createStreamingIterator(req, resolveResult, rejectResult);
|
|
145
|
-
},
|
|
146
|
-
};
|
|
147
|
-
return { tokens, result };
|
|
148
|
-
}
|
|
149
|
-
async *createStreamingIterator(req, resolveResult, rejectResult) {
|
|
150
|
-
const jsonSchema = zod_1.z.toJSONSchema(req.schema, { target: "draft-7" });
|
|
151
|
-
const messages = await Promise.all(req.messages.map((m) => this.encodeMessage(m)));
|
|
152
|
-
const ctrl = new AbortController();
|
|
153
|
-
const t = setTimeout(() => ctrl.abort(), this.timeoutMs);
|
|
154
|
-
let accumulated = "";
|
|
155
|
-
let requestId;
|
|
156
|
-
let usage;
|
|
157
|
-
try {
|
|
158
|
-
// Connect attempt with retry on transient connect failures (pre-stream).
|
|
159
|
-
const res = await this.withRetry(() => this.fetchImpl(`${this.cfg.baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
|
160
|
-
method: "POST",
|
|
161
|
-
headers: {
|
|
162
|
-
"content-type": "application/json",
|
|
163
|
-
authorization: `Bearer ${this.cfg.apiKey}`,
|
|
164
|
-
},
|
|
165
|
-
body: JSON.stringify({
|
|
166
|
-
model: this.modelId(req.model),
|
|
167
|
-
messages,
|
|
168
|
-
temperature: req.temperature,
|
|
169
|
-
max_tokens: req.maxTokens,
|
|
170
|
-
stream: true,
|
|
171
|
-
response_format: {
|
|
172
|
-
type: "json_schema",
|
|
173
|
-
json_schema: {
|
|
174
|
-
name: req.schemaName,
|
|
175
|
-
schema: jsonSchema,
|
|
176
|
-
strict: true,
|
|
177
|
-
},
|
|
178
|
-
},
|
|
179
|
-
}),
|
|
180
|
-
signal: ctrl.signal,
|
|
181
|
-
}).then((r) => {
|
|
182
|
-
if (!r.ok && TRANSIENT_HTTP_STATUSES.has(r.status)) {
|
|
183
|
-
throw new TransientLlmError(`GatewayLlmCaller.structuredStream: ${r.status} ${r.statusText}`);
|
|
184
|
-
}
|
|
185
|
-
return r;
|
|
186
|
-
}));
|
|
187
|
-
if (!res.ok || !res.body) {
|
|
188
|
-
const text = await res.text().catch(() => "");
|
|
189
|
-
const err = new Error(`GatewayLlmCaller.structuredStream: ${res.status} ${res.statusText} — ${text.slice(0, 500)}`);
|
|
190
|
-
rejectResult(err);
|
|
191
|
-
throw err;
|
|
192
|
-
}
|
|
193
|
-
const reader = res.body.getReader();
|
|
194
|
-
const decoder = new TextDecoder();
|
|
195
|
-
let buf = "";
|
|
196
|
-
while (true) {
|
|
197
|
-
const { value, done } = await reader.read();
|
|
198
|
-
if (done)
|
|
199
|
-
break;
|
|
200
|
-
buf += decoder.decode(value, { stream: true });
|
|
201
|
-
let nl;
|
|
202
|
-
while ((nl = buf.indexOf("\n")) !== -1) {
|
|
203
|
-
const line = buf.slice(0, nl).trim();
|
|
204
|
-
buf = buf.slice(nl + 1);
|
|
205
|
-
if (!line || !line.startsWith("data:"))
|
|
206
|
-
continue;
|
|
207
|
-
const payload = line.slice(5).trim();
|
|
208
|
-
if (payload === "[DONE]")
|
|
209
|
-
break;
|
|
210
|
-
try {
|
|
211
|
-
const obj = JSON.parse(payload);
|
|
212
|
-
if (obj.id)
|
|
213
|
-
requestId = obj.id;
|
|
214
|
-
if (obj.usage) {
|
|
215
|
-
usage = {
|
|
216
|
-
promptTokens: obj.usage.prompt_tokens,
|
|
217
|
-
completionTokens: obj.usage.completion_tokens,
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
const delta = obj.choices?.[0]?.delta?.content ?? "";
|
|
221
|
-
if (delta) {
|
|
222
|
-
accumulated += delta;
|
|
223
|
-
yield delta;
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
catch {
|
|
227
|
-
// skip malformed line
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
// Post-stream failures (bad JSON / schema mismatch) reject the RESULT
|
|
232
|
-
// promise but must NOT throw out of this generator. The tokens already
|
|
233
|
-
// streamed fine; throwing here would propagate into the caller's
|
|
234
|
-
// `for await (...stream.tokens)` loop and abort it before the caller can
|
|
235
|
-
// reach `await stream.result` — which is exactly where the planner's
|
|
236
|
-
// non-streaming retry fallback lives (Planner.planStream). Returning
|
|
237
|
-
// cleanly lets the token loop complete, then the rejected result drives
|
|
238
|
-
// the retry. See plan.controller.ts:planProjectStream.
|
|
239
|
-
let parsed;
|
|
240
|
-
try {
|
|
241
|
-
parsed = JSON.parse(accumulated);
|
|
242
|
-
}
|
|
243
|
-
catch {
|
|
244
|
-
const err = new Error(`GatewayLlmCaller.structuredStream: model returned non-JSON for ${req.schemaName}: ${accumulated.slice(0, 200)}`);
|
|
245
|
-
rejectResult(err);
|
|
246
|
-
return;
|
|
247
|
-
}
|
|
248
|
-
const validated = req.schema.safeParse(parsed);
|
|
249
|
-
if (!validated.success) {
|
|
250
|
-
const err = new Error(`GatewayLlmCaller.structuredStream: schema validation failed for ${req.schemaName}: ${validated.error.message}`);
|
|
251
|
-
rejectResult(err);
|
|
252
|
-
return;
|
|
253
|
-
}
|
|
254
|
-
resolveResult({
|
|
255
|
-
data: validated.data,
|
|
256
|
-
usage,
|
|
257
|
-
requestId,
|
|
258
|
-
attemptedModels: [req.model],
|
|
259
|
-
usedFallback: false,
|
|
260
|
-
});
|
|
261
|
-
}
|
|
262
|
-
catch (e) {
|
|
263
|
-
rejectResult(e);
|
|
264
|
-
throw e;
|
|
265
|
-
}
|
|
266
|
-
finally {
|
|
267
|
-
clearTimeout(t);
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
modelId(m) {
|
|
271
|
-
return `${m.provider}/${m.modelId}`;
|
|
272
|
-
}
|
|
273
|
-
async encodeMessage(m) {
|
|
274
|
-
if (!m.imageUrls?.length) {
|
|
275
|
-
return { role: m.role, content: m.content };
|
|
276
|
-
}
|
|
277
|
-
// Fetch + base64-encode each URL into a data URI. The gateway's Anthropic
|
|
278
|
-
// translator rejects raw https URLs ("URL sources are not supported"); data
|
|
279
|
-
// URIs round-trip through every provider's OpenAI-compat shim correctly.
|
|
280
|
-
const dataUris = await Promise.all(m.imageUrls.map((u) => this.toDataUri(u)));
|
|
281
|
-
return {
|
|
282
|
-
role: m.role,
|
|
283
|
-
content: [
|
|
284
|
-
{ type: "text", text: m.content },
|
|
285
|
-
...dataUris.map((url) => ({ type: "image_url", image_url: { url } })),
|
|
286
|
-
],
|
|
287
|
-
};
|
|
288
|
-
}
|
|
289
|
-
async toDataUri(url) {
|
|
290
|
-
if (url.startsWith("data:"))
|
|
291
|
-
return url;
|
|
292
|
-
const res = await this.fetchImpl(url);
|
|
293
|
-
if (!res.ok) {
|
|
294
|
-
throw new Error(`GatewayLlmCaller.toDataUri: ${res.status} ${res.statusText} fetching ${url}`);
|
|
295
|
-
}
|
|
296
|
-
const contentType = res.headers.get("content-type")?.split(";")[0]?.trim() || "image/jpeg";
|
|
297
|
-
const buf = Buffer.from(await res.arrayBuffer());
|
|
298
|
-
return `data:${contentType};base64,${buf.toString("base64")}`;
|
|
299
|
-
}
|
|
300
|
-
usage(raw) {
|
|
301
|
-
if (!raw.usage)
|
|
302
|
-
return undefined;
|
|
303
|
-
return {
|
|
304
|
-
promptTokens: raw.usage.prompt_tokens,
|
|
305
|
-
completionTokens: raw.usage.completion_tokens,
|
|
306
|
-
};
|
|
307
|
-
}
|
|
308
|
-
/**
|
|
309
|
-
* POST /chat/completions with transient-error retry (F1).
|
|
310
|
-
* Throws TransientLlmError after exhausting retries on transient failures —
|
|
311
|
-
* caller catches it to trigger model fallback (F2). Non-transient errors
|
|
312
|
-
* (4xx other than 408/425/429, malformed responses) propagate as plain Error.
|
|
313
|
-
*/
|
|
314
|
-
async postWithRetry(body) {
|
|
315
|
-
return this.withRetry(() => this.postOnce(body));
|
|
316
|
-
}
|
|
317
|
-
async withRetry(fn) {
|
|
318
|
-
let lastErr;
|
|
319
|
-
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
320
|
-
try {
|
|
321
|
-
return await fn();
|
|
322
|
-
}
|
|
323
|
-
catch (err) {
|
|
324
|
-
lastErr = err;
|
|
325
|
-
const transient = err instanceof TransientLlmError || isTransientNetworkError(err);
|
|
326
|
-
if (!transient || attempt === this.maxRetries - 1) {
|
|
327
|
-
if (transient) {
|
|
328
|
-
// Wrap so the fallback-model path can catch it explicitly.
|
|
329
|
-
throw err instanceof TransientLlmError
|
|
330
|
-
? err
|
|
331
|
-
: new TransientLlmError(`transient LLM error after ${attempt + 1} attempts: ${String(err?.message ?? err)}`, err);
|
|
332
|
-
}
|
|
333
|
-
throw err;
|
|
334
|
-
}
|
|
335
|
-
await new Promise((r) => setTimeout(r, this.retryBaseMs * Math.pow(4, attempt)));
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
throw lastErr;
|
|
339
|
-
}
|
|
340
|
-
async postOnce(body) {
|
|
341
|
-
const ctrl = new AbortController();
|
|
342
|
-
const t = setTimeout(() => ctrl.abort(), this.timeoutMs);
|
|
343
|
-
try {
|
|
344
|
-
const res = await this.fetchImpl(`${this.cfg.baseUrl.replace(/\/$/, "")}/chat/completions`, {
|
|
345
|
-
method: "POST",
|
|
346
|
-
headers: {
|
|
347
|
-
"content-type": "application/json",
|
|
348
|
-
authorization: `Bearer ${this.cfg.apiKey}`,
|
|
349
|
-
},
|
|
350
|
-
body: JSON.stringify(body),
|
|
351
|
-
signal: ctrl.signal,
|
|
352
|
-
});
|
|
353
|
-
if (!res.ok) {
|
|
354
|
-
const text = await res.text().catch(() => "");
|
|
355
|
-
const msg = `GatewayLlmCaller: ${res.status} ${res.statusText} — ${text.slice(0, 500)}`;
|
|
356
|
-
if (TRANSIENT_HTTP_STATUSES.has(res.status)) {
|
|
357
|
-
throw new TransientLlmError(msg);
|
|
358
|
-
}
|
|
359
|
-
throw new Error(msg);
|
|
360
|
-
}
|
|
361
|
-
return (await res.json());
|
|
362
|
-
}
|
|
363
|
-
finally {
|
|
364
|
-
clearTimeout(t);
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
exports.GatewayLlmCaller = GatewayLlmCaller;
|