@lingjingai/scriptctl 0.7.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
- import Anthropic from "@anthropic-ai/sdk";
2
- import { ASSET_CURATION_SCHEMA, CliError, DEFAULT_BATCH_MAX_TOKENS, DEFAULT_MAX_TOKENS, DEFAULT_THINKING_BUDGET_TOKENS, EPISODE_TITLE_EXTRACTION_SCHEMA, EXIT_RUNTIME, EXIT_USAGE, MARKDOWN_BATCH_PROMPT_SPEC, METADATA_EXTRACTION_SCHEMA, NONSTREAMING_MAX_TOKENS, WORLDVIEW_VALUES, } from "../common.js";
1
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
2
+ import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
3
+ import { APICallError, NoObjectGeneratedError, RetryError, generateObject, generateText, } from "ai";
4
+ import { z } from "zod";
5
+ import { CliError, DEFAULT_BATCH_MAX_TOKENS, DEFAULT_MAX_TOKENS, EXIT_RUNTIME, EXIT_USAGE, MARKDOWN_BATCH_PROMPT_SPEC, ROLE_TYPE_VALUES, WORLDVIEW_VALUES, } from "../common.js";
3
6
  import { _md_push_asset, _normalize_speaker_list, buildAssetCurationContext, buildEpisodeTitleContext, buildMetadataContext, deterministicEpisodeShortTitle, deterministicExtractEpisode, deterministicExtractMetadata, episodesNeedingGeneratedTitles, formatBatchSource, parseMarkdownBatch, } from "../domain/direct-core.js";
4
7
  function strOf(v) {
5
8
  if (v === null || v === undefined)
@@ -12,50 +15,43 @@ function isList(v) {
12
15
  function asList(v) {
13
16
  return Array.isArray(v) ? v : [];
14
17
  }
15
- // aihubmix / litellm gateways frequently drop long-running SSE connections with
16
- // "Premature close" or fetch failures. The Anthropic SDK's automatic retry does
17
- // not cover mid-stream failures, so we wrap each provider call in our own
18
- // bounded retry loop on transient transport errors only.
19
- function isTransientStreamError(err) {
20
- if (!err || typeof err !== "object")
21
- return false;
22
- const e = err;
23
- const name = strOf(e.name);
24
- const msg = strOf(e.message);
25
- const code = strOf(e.code);
26
- if (name === "APIConnectionError" || name === "APIConnectionTimeoutError")
27
- return true;
28
- if (/Premature close/i.test(msg))
29
- return true;
30
- if (/fetch failed/i.test(msg))
31
- return true;
32
- if (/socket hang up/i.test(msg))
33
- return true;
34
- if (/ECONNRESET|ETIMEDOUT|EPIPE|UND_ERR_SOCKET|UND_ERR_CONNECT_TIMEOUT/i.test(`${msg} ${code}`))
35
- return true;
36
- if (e.cause && isTransientStreamError(e.cause))
37
- return true;
38
- return false;
39
- }
40
- const STREAM_RETRY_MAX_ATTEMPTS = 3;
41
- async function runWithStreamRetry(fn) {
42
- let attempt = 0;
43
- let lastErr;
44
- while (attempt < STREAM_RETRY_MAX_ATTEMPTS) {
45
- try {
46
- return await fn();
47
- }
48
- catch (err) {
49
- lastErr = err;
50
- attempt += 1;
51
- if (attempt >= STREAM_RETRY_MAX_ATTEMPTS || !isTransientStreamError(err))
52
- throw err;
53
- const delayMs = 1000 * Math.pow(2, attempt - 1); // 1s, 2s
54
- await new Promise((resolve) => setTimeout(resolve, delayMs));
55
- }
56
- }
57
- throw lastErr;
58
- }
18
+ // ---------------------------------------------------------------------------
19
+ // Zod schemas for structured extraction (LLM JSON-mode output)
20
+ // ---------------------------------------------------------------------------
21
+ const ROLE_TYPE_TUPLE = ROLE_TYPE_VALUES;
22
+ const WORLDVIEW_TUPLE = WORLDVIEW_VALUES;
23
+ const METADATA_SCHEMA = z.object({
24
+ confidence: z.enum(["high", "medium", "low"]),
25
+ worldview: z.enum(WORLDVIEW_TUPLE),
26
+ worldview_raw: z.string(),
27
+ actors: z.array(z.object({
28
+ actor_id: z.string(),
29
+ role_type: z.enum(ROLE_TYPE_TUPLE),
30
+ description: z.string(),
31
+ })),
32
+ locations: z.array(z.object({
33
+ location_id: z.string(),
34
+ description: z.string(),
35
+ })),
36
+ props: z.array(z.object({
37
+ prop_id: z.string(),
38
+ description: z.string(),
39
+ })),
40
+ });
41
+ const ASSET_CURATION_SCHEMA = z.object({
42
+ locations: z.array(z.object({
43
+ location_id: z.string(),
44
+ decision: z.enum(["keep", "merge"]),
45
+ target_location_id: z.string().nullable(),
46
+ reason: z.string(),
47
+ })),
48
+ });
49
+ const EPISODE_TITLE_SCHEMA = z.object({
50
+ episode_titles: z.array(z.object({
51
+ episode: z.number().int(),
52
+ title: z.string(),
53
+ })),
54
+ });
59
55
  // ---------------------------------------------------------------------------
60
56
  // Mock provider
61
57
  // ---------------------------------------------------------------------------
@@ -142,13 +138,23 @@ export class MockProvider {
142
138
  }
143
139
  }
144
140
  // ---------------------------------------------------------------------------
145
- // Anthropic provider
141
+ // LiteLLM provider — talks to PROJECT_LITELLM_GATEWAY via OpenAI-compatible
142
+ // /v1/chat/completions. Backs every Claude / DeepSeek / etc. model fronted by
143
+ // the gateway. The protocol switch from /v1/messages (Anthropic) to
144
+ // /v1/chat/completions is intentional: it lets gateway-routed models that lack
145
+ // Anthropic tool_use support (e.g. deepseek-v4-pro-packy) reuse the same
146
+ // structured-output path via JSON-mode schemas.
147
+ //
148
+ // `name = "anthropic"` is preserved (not renamed to "litellm") so previously
149
+ // written checkpoint metadata (`{"provider": "anthropic"}`) keeps validating
150
+ // after upgrade — no forced re-extraction.
146
151
  // ---------------------------------------------------------------------------
147
- export class AnthropicProvider {
152
+ export class LiteLLMProvider {
148
153
  name = "anthropic";
149
- client;
150
154
  model;
151
- constructor(model) {
155
+ modelId;
156
+ configuredMaxTokens;
157
+ constructor(modelId) {
152
158
  const apiKey = (process.env.PROJECT_LITELLM_GATEWAY_API_KEY ?? "").trim();
153
159
  if (!apiKey) {
154
160
  throw new CliError("INIT FAILED: provider not configured", "Provider credentials are not configured for this environment.", {
@@ -159,87 +165,60 @@ export class AnthropicProvider {
159
165
  errorCode: "PROVIDER_AUTH_MISSING",
160
166
  });
161
167
  }
162
- const baseUrl = (process.env.PROJECT_LITELLM_GATEWAY_BASE_URL ?? "").trim();
163
- const opts = { apiKey };
164
- if (baseUrl)
165
- opts.baseURL = baseUrl;
166
- this.client = new Anthropic(opts);
167
- this.model = model;
168
- }
169
- messageRequest(prompt, maxTokens) {
170
- const rawMaxTokens = (process.env.SCRIPTCTL_MAX_TOKENS ?? "").trim();
171
- let configuredMaxTokens = DEFAULT_MAX_TOKENS;
172
- if (rawMaxTokens) {
173
- const parsed = parseInt(rawMaxTokens, 10);
174
- if (!Number.isNaN(parsed))
175
- configuredMaxTokens = parsed;
168
+ const baseURL = (process.env.PROJECT_LITELLM_GATEWAY_BASE_URL ?? "").trim();
169
+ if (!baseURL) {
170
+ throw new CliError("INIT FAILED: provider not configured", "Provider gateway URL is not configured for this environment.", {
171
+ exitCode: EXIT_RUNTIME,
172
+ required: ["PROJECT_LITELLM_GATEWAY_BASE_URL"],
173
+ received: ["no gateway base URL environment variable"],
174
+ nextSteps: ["Run `scriptctl doctor` to identify missing configuration."],
175
+ errorCode: "PROVIDER_AUTH_MISSING",
176
+ });
176
177
  }
177
- let tokenBudget = maxTokens !== undefined ? maxTokens : configuredMaxTokens;
178
- tokenBudget = Math.max(1024, Math.min(tokenBudget, configuredMaxTokens, DEFAULT_MAX_TOKENS));
179
- const request = {
180
- model: this.model,
181
- max_tokens: tokenBudget,
182
- messages: [{ role: "user", content: prompt }],
183
- };
184
- if (this.model.endsWith("-think")) {
185
- const rawBudget = (process.env.SCRIPTCTL_THINKING_BUDGET_TOKENS ?? "").trim();
186
- let budget = DEFAULT_THINKING_BUDGET_TOKENS;
187
- if (rawBudget) {
188
- const parsed = parseInt(rawBudget, 10);
189
- if (!Number.isNaN(parsed))
190
- budget = parsed;
191
- }
192
- request.thinking = { type: "enabled", budget_tokens: Math.max(1024, Math.min(budget, tokenBudget - 1000)) };
178
+ const provider = createOpenAICompatible({
179
+ name: "litellm",
180
+ apiKey,
181
+ baseURL,
182
+ });
183
+ this.modelId = modelId;
184
+ this.model = provider(modelId);
185
+ const raw = (process.env.SCRIPTCTL_MAX_TOKENS ?? "").trim();
186
+ let mt = DEFAULT_MAX_TOKENS;
187
+ if (raw) {
188
+ const parsed = parseInt(raw, 10);
189
+ if (!Number.isNaN(parsed))
190
+ mt = parsed;
193
191
  }
194
- return request;
192
+ this.configuredMaxTokens = Math.max(1024, Math.min(mt, DEFAULT_MAX_TOKENS));
195
193
  }
196
- async collectResponseText(request) {
197
- return runWithStreamRetry(async () => {
198
- if (request.max_tokens > NONSTREAMING_MAX_TOKENS) {
199
- const stream = await this.client.messages.stream(request);
200
- const text = await stream.finalText();
201
- const message = await stream.finalMessage();
202
- return [text.trim(), strOf(message.stop_reason)];
203
- }
204
- const response = await this.client.messages.create(request);
205
- return [textFromResponse(response), strOf(response.stop_reason)];
206
- });
194
+ capTokens(maxTokens, fallback) {
195
+ const requested = maxTokens !== undefined ? maxTokens : fallback;
196
+ return Math.max(1024, Math.min(requested, this.configuredMaxTokens, DEFAULT_MAX_TOKENS));
207
197
  }
208
- /**
209
- * Run a request that forces a specific tool call via `tool_choice`, then return that
210
- * tool's `input` dict (already parsed by the SDK — no JSON.parse needed). Returns
211
- * `null` when the provider didn't emit the expected tool_use block (e.g. refusal,
212
- * truncated before tool call, gateway dropped tools field), letting the caller
213
- * raise a method-specific CliError.
214
- */
215
- async collectToolUseInput(request, toolName) {
216
- const maxTokens = Number(request["max_tokens"] ?? 0);
217
- return runWithStreamRetry(async () => {
218
- if (maxTokens > NONSTREAMING_MAX_TOKENS) {
219
- const stream = await this.client.messages.stream(request);
220
- const message = (await stream.finalMessage());
221
- return [findToolUseInput(message.content ?? [], toolName), strOf(message.stop_reason)];
222
- }
223
- const response = (await this.client.messages.create(request));
224
- return [findToolUseInput(response.content ?? [], toolName), strOf(response.stop_reason)];
225
- });
226
- }
227
- /**
228
- * Generic completion entry point. Used by episode subcommand's gemini-writer to draft
229
- * one episode's spec markdown from the assembled prompt. Throws CliError when the
230
- * provider truncates output (so the caller can surface a deterministic error rather
231
- * than committing a half-baked episode).
232
- */
233
198
  async complete(prompt, maxTokens) {
234
- const request = this.messageRequest(prompt, maxTokens ?? DEFAULT_BATCH_MAX_TOKENS);
235
- const [raw, stopReason] = await this.collectResponseText(request);
236
- if (stopReason === "max_tokens") {
199
+ const max = this.capTokens(maxTokens, DEFAULT_BATCH_MAX_TOKENS);
200
+ let raw;
201
+ let finishReason;
202
+ try {
203
+ const result = await generateText({
204
+ model: this.model,
205
+ prompt,
206
+ maxOutputTokens: max,
207
+ maxRetries: 0,
208
+ });
209
+ raw = result.text.trim();
210
+ finishReason = strOf(result.finishReason);
211
+ }
212
+ catch (exc) {
213
+ throw translateLiteLLMError(exc, "DRAFT FAILED");
214
+ }
215
+ if (finishReason === "length") {
237
216
  throw new CliError("DRAFT FAILED: Provider output truncated", "Provider output truncated.", {
238
217
  exitCode: EXIT_RUNTIME,
239
218
  required: ["complete markdown within provider max_tokens"],
240
219
  received: [
241
- `stop_reason: ${stopReason}`,
242
- `max_tokens: ${request.max_tokens}`,
220
+ `finishReason: ${finishReason}`,
221
+ `max_tokens: ${max}`,
243
222
  `raw chars: ${raw.length}`,
244
223
  `tail: ${raw.slice(-160) || "<empty response>"}`,
245
224
  ],
@@ -255,7 +234,7 @@ export class AnthropicProvider {
255
234
  return this.extractBatch(sourceText, episodePlan);
256
235
  }
257
236
  async extractBatch(sourceText, batchPlan) {
258
- const context = isList(batchPlan["context"]) || typeof batchPlan["context"] === "object" && batchPlan["context"] !== null
237
+ const context = isList(batchPlan["context"]) || (typeof batchPlan["context"] === "object" && batchPlan["context"] !== null)
259
238
  ? batchPlan["context"]
260
239
  : {};
261
240
  const numberedSource = formatBatchSource(sourceText, batchPlan);
@@ -271,15 +250,29 @@ export class AnthropicProvider {
271
250
  "\n" +
272
251
  "Batch Source:\n" +
273
252
  `${numberedSource}\n`;
274
- const request = this.messageRequest(prompt, DEFAULT_BATCH_MAX_TOKENS);
275
- const [raw, stopReason] = await this.collectResponseText(request);
276
- if (stopReason === "max_tokens") {
253
+ const max = this.capTokens(DEFAULT_BATCH_MAX_TOKENS, DEFAULT_BATCH_MAX_TOKENS);
254
+ let raw;
255
+ let finishReason;
256
+ try {
257
+ const result = await generateText({
258
+ model: this.model,
259
+ prompt,
260
+ maxOutputTokens: max,
261
+ maxRetries: 0,
262
+ });
263
+ raw = result.text.trim();
264
+ finishReason = strOf(result.finishReason);
265
+ }
266
+ catch (exc) {
267
+ throw translateLiteLLMError(exc, "INIT FAILED");
268
+ }
269
+ if (finishReason === "length") {
277
270
  throw new CliError("INIT FAILED: Provider output truncated", "Provider output truncated.", {
278
271
  exitCode: EXIT_RUNTIME,
279
272
  required: ["complete markdown within provider max_tokens"],
280
273
  received: [
281
- `stop_reason: ${stopReason}`,
282
- `max_tokens: ${request.max_tokens}`,
274
+ `finishReason: ${finishReason}`,
275
+ `max_tokens: ${max}`,
283
276
  `raw chars: ${raw.length}`,
284
277
  `tail: ${raw.slice(-160) || "<empty response>"}`,
285
278
  ],
@@ -307,32 +300,19 @@ export class AnthropicProvider {
307
300
  "\n" +
308
301
  "Episodes needing titles:\n" +
309
302
  `${JSON.stringify(context)}\n`;
310
- const toolName = "submit_episode_titles";
311
- const request = { ...this.messageRequest(prompt, 4096) };
312
- request["tools"] = [{
313
- name: toolName,
314
- description: "Return generated short Chinese titles for every input episode.",
315
- input_schema: EPISODE_TITLE_EXTRACTION_SCHEMA,
316
- }];
317
- request["tool_choice"] = { type: "tool", name: toolName };
318
- const [input, stopReason] = await this.collectToolUseInput(request, toolName);
319
- if (stopReason === "max_tokens") {
320
- throw new CliError("INIT FAILED: Episode title output truncated", "Episode title output truncated.", {
321
- exitCode: EXIT_RUNTIME,
322
- required: ["complete episode title JSON within provider max_tokens"],
323
- received: [`stop_reason: ${stopReason}`, `max_tokens: ${request["max_tokens"]}`],
324
- nextSteps: ["Rerun init; if this repeats, split the source or provide explicit episode titles."],
325
- });
326
- }
327
- if (!input) {
328
- throw new CliError("INIT FAILED: Provider returned invalid episode title JSON", "Provider returned invalid episode title JSON.", {
329
- exitCode: EXIT_RUNTIME,
330
- required: [`tool_use block with name "${toolName}"`],
331
- received: [`stop_reason: ${stopReason || "unknown"}`, `missing tool_use block: ${toolName}`],
332
- nextSteps: ["Rerun init; title generation will retry before batch extraction."],
333
- });
334
- }
335
- return input;
303
+ return this.extractStructured({
304
+ prompt,
305
+ maxTokens: 4096,
306
+ schema: EPISODE_TITLE_SCHEMA,
307
+ title: "INIT FAILED: Episode title output truncated",
308
+ truncationMessage: "Episode title output truncated.",
309
+ required: ["complete episode title JSON within provider max_tokens"],
310
+ invalidTitle: "INIT FAILED: Provider returned invalid episode title JSON",
311
+ invalidMessage: "Provider returned invalid episode title JSON.",
312
+ invalidRequired: ["valid JSON matching the configured schema"],
313
+ truncationNext: ["Rerun init; if this repeats, split the source or provide explicit episode titles."],
314
+ invalidNext: ["Rerun init; title generation will retry before batch extraction."],
315
+ });
336
316
  }
337
317
  async extractAssetCuration(_sourceText, script) {
338
318
  const context = buildAssetCurationContext(script);
@@ -350,32 +330,19 @@ export class AnthropicProvider {
350
330
  "\n" +
351
331
  "Script asset curation context:\n" +
352
332
  `${JSON.stringify(context)}\n`;
353
- const toolName = "submit_asset_curation";
354
- const request = { ...this.messageRequest(prompt, DEFAULT_MAX_TOKENS) };
355
- request["tools"] = [{
356
- name: toolName,
357
- description: "Return location merge decisions for the provided script.",
358
- input_schema: ASSET_CURATION_SCHEMA,
359
- }];
360
- request["tool_choice"] = { type: "tool", name: toolName };
361
- const [input, stopReason] = await this.collectToolUseInput(request, toolName);
362
- if (stopReason === "max_tokens") {
363
- throw new CliError("INIT FAILED: Asset curation output truncated", "Asset curation output truncated.", {
364
- exitCode: EXIT_RUNTIME,
365
- required: ["complete asset curation JSON within provider max_tokens"],
366
- received: [`stop_reason: ${stopReason}`, `max_tokens: ${request["max_tokens"]}`],
367
- nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
368
- });
369
- }
370
- if (!input) {
371
- throw new CliError("INIT FAILED: Provider returned invalid asset curation JSON", "Provider returned invalid asset curation JSON.", {
372
- exitCode: EXIT_RUNTIME,
373
- required: [`tool_use block with name "${toolName}"`],
374
- received: [`stop_reason: ${stopReason || "unknown"}`, `missing tool_use block: ${toolName}`],
375
- nextSteps: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
376
- });
377
- }
378
- return input;
333
+ return this.extractStructured({
334
+ prompt,
335
+ maxTokens: DEFAULT_MAX_TOKENS,
336
+ schema: ASSET_CURATION_SCHEMA,
337
+ title: "INIT FAILED: Asset curation output truncated",
338
+ truncationMessage: "Asset curation output truncated.",
339
+ required: ["complete asset curation JSON within provider max_tokens"],
340
+ invalidTitle: "INIT FAILED: Provider returned invalid asset curation JSON",
341
+ invalidMessage: "Provider returned invalid asset curation JSON.",
342
+ invalidRequired: ["valid JSON matching the configured schema"],
343
+ truncationNext: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
344
+ invalidNext: ["Rerun init; extraction checkpoints will be reused and asset curation will retry."],
345
+ });
379
346
  }
380
347
  async extractMetadata(_sourceText, script) {
381
348
  const context = buildMetadataContext(script);
@@ -393,56 +360,68 @@ export class AnthropicProvider {
393
360
  "\n" +
394
361
  "Script metadata context:\n" +
395
362
  `${JSON.stringify(context)}\n`;
396
- const toolName = "submit_script_metadata";
397
- const request = { ...this.messageRequest(prompt, DEFAULT_MAX_TOKENS) };
398
- request["tools"] = [{
399
- name: toolName,
400
- description: "Return global script metadata (worldview, actors, locations, props).",
401
- input_schema: METADATA_EXTRACTION_SCHEMA,
402
- }];
403
- request["tool_choice"] = { type: "tool", name: toolName };
404
- const [input, stopReason] = await this.collectToolUseInput(request, toolName);
405
- if (stopReason === "max_tokens") {
406
- throw new CliError("INIT FAILED: Metadata output truncated", "Metadata output truncated.", {
407
- exitCode: EXIT_RUNTIME,
408
- required: ["complete metadata JSON within provider max_tokens"],
409
- received: [`stop_reason: ${stopReason}`, `max_tokens: ${request["max_tokens"]}`],
410
- nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
363
+ return this.extractStructured({
364
+ prompt,
365
+ maxTokens: DEFAULT_MAX_TOKENS,
366
+ schema: METADATA_SCHEMA,
367
+ title: "INIT FAILED: Metadata output truncated",
368
+ truncationMessage: "Metadata output truncated.",
369
+ required: ["complete metadata JSON within provider max_tokens"],
370
+ invalidTitle: "INIT FAILED: Provider returned invalid metadata JSON",
371
+ invalidMessage: "Provider returned invalid metadata JSON.",
372
+ invalidRequired: ["valid JSON matching the configured schema"],
373
+ truncationNext: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
374
+ invalidNext: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
375
+ });
376
+ }
377
+ async extractStructured(args) {
378
+ const max = this.capTokens(args.maxTokens, args.maxTokens);
379
+ try {
380
+ const result = await generateObject({
381
+ model: this.model,
382
+ prompt: args.prompt,
383
+ schema: args.schema,
384
+ maxOutputTokens: max,
385
+ maxRetries: 0,
411
386
  });
387
+ const finishReason = strOf(result.finishReason);
388
+ if (finishReason === "length") {
389
+ throw new CliError(args.title, args.truncationMessage, {
390
+ exitCode: EXIT_RUNTIME,
391
+ required: args.required,
392
+ received: [`finishReason: ${finishReason}`, `max_tokens: ${max}`],
393
+ nextSteps: args.truncationNext,
394
+ });
395
+ }
396
+ return result.object;
412
397
  }
413
- if (!input) {
414
- throw new CliError("INIT FAILED: Provider returned invalid metadata JSON", "Provider returned invalid metadata JSON.", {
415
- exitCode: EXIT_RUNTIME,
416
- required: [`tool_use block with name "${toolName}"`],
417
- received: [`stop_reason: ${stopReason || "unknown"}`, `missing tool_use block: ${toolName}`],
418
- nextSteps: ["Rerun init; extraction checkpoints will be reused and metadata will retry."],
419
- });
398
+ catch (exc) {
399
+ if (exc instanceof CliError)
400
+ throw exc;
401
+ if (NoObjectGeneratedError.isInstance(exc)) {
402
+ const cause = exc.cause;
403
+ throw new CliError(args.invalidTitle, args.invalidMessage, {
404
+ exitCode: EXIT_RUNTIME,
405
+ required: args.invalidRequired,
406
+ received: [
407
+ `error: ${exc.message?.slice(0, 160) || "no object generated"}`,
408
+ ...(cause?.message ? [`cause: ${cause.message.slice(0, 160)}`] : []),
409
+ ],
410
+ nextSteps: args.invalidNext,
411
+ });
412
+ }
413
+ throw translateLiteLLMError(exc, "INIT FAILED");
420
414
  }
421
- return input;
422
415
  }
423
416
  }
424
417
  // ---------------------------------------------------------------------------
425
- // Gemini provider (used by `scriptctl episode draft` by default)
418
+ // Gemini provider @ai-sdk/google direct to Google AI Studio.
419
+ // Used by `scriptctl episode draft` when --provider gemini.
426
420
  // ---------------------------------------------------------------------------
427
- /**
428
- * Google Gemini provider via REST API. No SDK dependency (uses fetch).
429
- *
430
- * Used as the default writer for `scriptctl episode draft`. The Anthropic provider
431
- * stays available for other internal extraction tasks (metadata / title generation /
432
- * direct-init batch extraction) where Claude tends to outperform Gemini on JSON
433
- * schema adherence.
434
- *
435
- * Env:
436
- * - `SCRIPTCTL_GEMINI_API_KEY` (or fallback `GEMINI_API_KEY`) required
437
- * - `SCRIPTCTL_GEMINI_BASE_URL` optional override
438
- * - `SCRIPTCTL_GEMINI_MODEL` optional default-model override
439
- */
440
421
  export class GeminiProvider {
441
422
  name = "gemini";
442
- apiKey;
443
- baseUrl;
444
423
  model;
445
- constructor(model) {
424
+ constructor(modelId) {
446
425
  const apiKey = (process.env.SCRIPTCTL_GEMINI_API_KEY ?? process.env.GEMINI_API_KEY ?? "").trim();
447
426
  if (!apiKey) {
448
427
  throw new CliError("DRAFT FAILED: provider not configured", "Provider credentials are not configured for this environment.", {
@@ -453,138 +432,169 @@ export class GeminiProvider {
453
432
  errorCode: "PROVIDER_AUTH_MISSING",
454
433
  });
455
434
  }
456
- this.apiKey = apiKey;
457
- const baseUrl = (process.env.SCRIPTCTL_GEMINI_BASE_URL ?? "https://generativelanguage.googleapis.com").trim();
458
- this.baseUrl = baseUrl.replace(/\/+$/, "");
459
- this.model = (model || process.env.SCRIPTCTL_GEMINI_MODEL || "gemini-2.5-pro").trim();
435
+ const baseURL = (process.env.SCRIPTCTL_GEMINI_BASE_URL ?? "").trim();
436
+ const opts = { apiKey };
437
+ if (baseURL)
438
+ opts.baseURL = baseURL.replace(/\/+$/, "");
439
+ const provider = createGoogleGenerativeAI(opts);
440
+ const id = (modelId || process.env.SCRIPTCTL_GEMINI_MODEL || "gemini-2.5-pro").trim();
441
+ this.model = provider(id);
460
442
  }
461
443
  /**
462
444
  * Plain-text completion. Used by `episode draft` to write a spec-md episode body.
463
445
  */
464
446
  async complete(prompt, maxTokens) {
465
447
  const tokens = Math.max(1024, Math.min(maxTokens ?? DEFAULT_BATCH_MAX_TOKENS, DEFAULT_MAX_TOKENS));
466
- const url = `${this.baseUrl}/v1beta/models/${encodeURIComponent(this.model)}:generateContent?key=${encodeURIComponent(this.apiKey)}`;
467
- const body = {
468
- contents: [{ role: "user", parts: [{ text: prompt }] }],
469
- generationConfig: {
448
+ let raw;
449
+ let finishReason;
450
+ try {
451
+ const result = await generateText({
452
+ model: this.model,
453
+ prompt,
470
454
  maxOutputTokens: tokens,
471
455
  temperature: 0,
472
- },
473
- };
474
- let response;
475
- try {
476
- response = await fetch(url, {
477
- method: "POST",
478
- headers: { "Content-Type": "application/json" },
479
- body: JSON.stringify(body),
480
- });
481
- }
482
- catch (exc) {
483
- throw new CliError("DRAFT FAILED: provider unreachable", "Provider endpoint is unreachable.", {
484
- exitCode: EXIT_RUNTIME,
485
- required: ["reachable Gemini endpoint"],
486
- received: [String(exc.message ?? exc)],
487
- nextSteps: ["Check network connectivity and retry. Run `scriptctl doctor` if the issue persists."],
488
- errorCode: "PROVIDER_NETWORK",
489
- });
490
- }
491
- const text = await response.text();
492
- if (!response.ok) {
493
- const isAuth = response.status === 401 || response.status === 403;
494
- const isRateLimit = response.status === 429;
495
- const publicMessage = isAuth
496
- ? "Provider rejected the request: authentication failed."
497
- : isRateLimit
498
- ? "Provider is rate-limited."
499
- : "Provider returned an HTTP error.";
500
- throw new CliError("DRAFT FAILED: provider returned error", publicMessage, {
501
- exitCode: EXIT_RUNTIME,
502
- required: ["HTTP 2xx from Gemini"],
503
- received: [`status: ${response.status}`, `body: ${text.slice(0, 320) || "<empty>"}`],
504
- nextSteps: isRateLimit
505
- ? ["Back off and retry after a short wait."]
506
- : isAuth
507
- ? ["Run `scriptctl doctor` to verify provider configuration."]
508
- : ["Retry once; if the issue persists, run `scriptctl doctor`."],
509
- errorCode: isAuth
510
- ? "PROVIDER_AUTH_REJECTED"
511
- : isRateLimit
512
- ? "PROVIDER_RATE_LIMITED"
513
- : "PROVIDER_HTTP_ERROR",
456
+ maxRetries: 0,
514
457
  });
515
- }
516
- let payload;
517
- try {
518
- payload = JSON.parse(text);
458
+ raw = result.text.trim();
459
+ finishReason = strOf(result.finishReason);
519
460
  }
520
461
  catch (exc) {
521
- throw new CliError("DRAFT FAILED: provider returned invalid response", "Provider returned a response that could not be parsed.", {
522
- exitCode: EXIT_RUNTIME,
523
- required: ["valid JSON response"],
524
- received: [`body head: ${text.slice(0, 160)}`, `parse error: ${exc.message}`],
525
- nextSteps: ["Retry once; if the issue persists, run `scriptctl doctor`."],
526
- errorCode: "PROVIDER_INVALID_RESPONSE",
527
- });
462
+ throw translateGeminiError(exc, tokens);
528
463
  }
529
- const candidate = payload.candidates?.[0];
530
- const finishReason = strOf(candidate?.finishReason ?? "");
531
- if (finishReason === "MAX_TOKENS") {
464
+ if (finishReason === "length") {
532
465
  throw new CliError("DRAFT FAILED: Provider output truncated", "Provider output truncated.", {
533
466
  exitCode: EXIT_RUNTIME,
534
467
  required: ["complete markdown within provider max_tokens"],
535
- received: [`finishReason: MAX_TOKENS`, `maxOutputTokens: ${tokens}`],
468
+ received: [`finishReason: ${finishReason}`, `maxOutputTokens: ${tokens}`],
536
469
  nextSteps: [
537
470
  "Re-run with --regen, or split the episode outline into smaller scopes.",
538
471
  ],
539
472
  errorCode: "PROVIDER_OUTPUT_TRUNCATED",
540
473
  });
541
474
  }
542
- if (finishReason && finishReason !== "STOP" && finishReason !== "MAX_TOKENS") {
543
- // SAFETY / RECITATION / OTHER — surface to agent rather than silently treat as success.
475
+ if (finishReason && finishReason !== "stop" && finishReason !== "length") {
476
+ // content-filter / error / other — surface to agent rather than silently treat as success.
544
477
  throw new CliError("DRAFT FAILED: provider stopped abnormally", "Provider stopped before completing the response.", {
545
478
  exitCode: EXIT_RUNTIME,
546
- required: ["finishReason: STOP"],
479
+ required: ["finishReason: stop"],
547
480
  received: [`finishReason: ${finishReason}`],
548
481
  nextSteps: ["Inspect prompt / outline for triggering content; retry with --regen if it looks transient."],
549
482
  errorCode: "PROVIDER_ABNORMAL_STOP",
550
483
  });
551
484
  }
552
- const parts = candidate?.content?.parts ?? [];
553
- const out = parts.map((p) => (typeof p.text === "string" ? p.text : "")).join("").trim();
554
- if (!out) {
485
+ if (!raw) {
555
486
  throw new CliError("DRAFT FAILED: provider returned empty content", "Provider returned an empty response.", {
556
487
  exitCode: EXIT_RUNTIME,
557
- required: ["non-empty candidate content"],
558
- received: [`finishReason: ${finishReason || "<unset>"}`, `parts: ${parts.length}`],
488
+ required: ["non-empty model output"],
489
+ received: [`finishReason: ${finishReason || "<unset>"}`, "text: <empty>"],
559
490
  nextSteps: ["Retry once; if it persists, run `scriptctl doctor`."],
560
491
  errorCode: "PROVIDER_EMPTY_RESPONSE",
561
492
  });
562
493
  }
563
- return out;
494
+ return raw;
564
495
  }
565
496
  }
566
- function textFromResponse(response) {
567
- const parts = [];
568
- for (const block of response.content ?? []) {
569
- if (block.text)
570
- parts.push(block.text);
497
+ // ---------------------------------------------------------------------------
498
+ // Error translation
499
+ // ---------------------------------------------------------------------------
500
+ function translateLiteLLMError(exc, titlePrefix) {
501
+ // ai-sdk wraps transient errors in RetryError after retries exhaust.
502
+ // Unwrap so 401/403/429 still resolve to their precise CliError variant
503
+ // instead of falling through to generic PROVIDER_NETWORK.
504
+ if (RetryError.isInstance(exc) && APICallError.isInstance(exc.lastError)) {
505
+ exc = exc.lastError;
571
506
  }
572
- return parts.join("\n").trim();
507
+ if (APICallError.isInstance(exc)) {
508
+ const status = exc.statusCode ?? 0;
509
+ const isAuth = status === 401 || status === 403;
510
+ const isRateLimit = status === 429;
511
+ const publicMessage = isAuth
512
+ ? "Provider rejected the request: authentication failed."
513
+ : isRateLimit
514
+ ? "Provider is rate-limited."
515
+ : "Provider returned an HTTP error.";
516
+ const body = strOf(exc.responseBody).slice(0, 320) || "<empty>";
517
+ return new CliError(`${titlePrefix}: provider returned error`, publicMessage, {
518
+ exitCode: EXIT_RUNTIME,
519
+ required: ["HTTP 2xx from provider"],
520
+ received: [`status: ${status || "unknown"}`, `body: ${body}`],
521
+ nextSteps: isRateLimit
522
+ ? ["Back off and retry after a short wait."]
523
+ : isAuth
524
+ ? ["Run `scriptctl doctor` to verify provider configuration."]
525
+ : ["Retry once; if the issue persists, run `scriptctl doctor`."],
526
+ errorCode: isAuth
527
+ ? "PROVIDER_AUTH_REJECTED"
528
+ : isRateLimit
529
+ ? "PROVIDER_RATE_LIMITED"
530
+ : "PROVIDER_HTTP_ERROR",
531
+ });
532
+ }
533
+ if (RetryError.isInstance(exc)) {
534
+ return new CliError(`${titlePrefix}: provider unreachable`, "Provider endpoint is unreachable.", {
535
+ exitCode: EXIT_RUNTIME,
536
+ required: ["reachable provider endpoint"],
537
+ received: [strOf(exc.message).slice(0, 320) || "retries exhausted"],
538
+ nextSteps: ["Check network connectivity and retry. Run `scriptctl doctor` if the issue persists."],
539
+ errorCode: "PROVIDER_NETWORK",
540
+ });
541
+ }
542
+ const msg = strOf(exc?.message).slice(0, 320);
543
+ return new CliError(`${titlePrefix}: provider returned error`, "Provider returned an unexpected error.", {
544
+ exitCode: EXIT_RUNTIME,
545
+ required: ["successful provider response"],
546
+ received: [msg || String(exc)],
547
+ nextSteps: ["Retry once; if the issue persists, run `scriptctl doctor`."],
548
+ errorCode: "PROVIDER_HTTP_ERROR",
549
+ });
573
550
  }
574
- /**
575
- * Find a forced tool_use block by name in a Message's content array.
576
- * Returns its `input` (already parsed by the SDK) or `null` when missing/non-object.
577
- */
578
- function findToolUseInput(content, toolName) {
579
- for (const block of content) {
580
- if (block?.type === "tool_use" && block.name === toolName) {
581
- const inp = block.input;
582
- if (inp && typeof inp === "object" && !Array.isArray(inp))
583
- return inp;
584
- return null;
585
- }
551
+ function translateGeminiError(exc, tokens) {
552
+ if (RetryError.isInstance(exc) && APICallError.isInstance(exc.lastError)) {
553
+ exc = exc.lastError;
586
554
  }
587
- return null;
555
+ if (APICallError.isInstance(exc)) {
556
+ const status = exc.statusCode ?? 0;
557
+ const isAuth = status === 401 || status === 403;
558
+ const isRateLimit = status === 429;
559
+ const publicMessage = isAuth
560
+ ? "Provider rejected the request: authentication failed."
561
+ : isRateLimit
562
+ ? "Provider is rate-limited."
563
+ : "Provider returned an HTTP error.";
564
+ const body = strOf(exc.responseBody).slice(0, 320) || "<empty>";
565
+ return new CliError("DRAFT FAILED: provider returned error", publicMessage, {
566
+ exitCode: EXIT_RUNTIME,
567
+ required: ["HTTP 2xx from Gemini"],
568
+ received: [`status: ${status || "unknown"}`, `body: ${body}`],
569
+ nextSteps: isRateLimit
570
+ ? ["Back off and retry after a short wait."]
571
+ : isAuth
572
+ ? ["Run `scriptctl doctor` to verify provider configuration."]
573
+ : ["Retry once; if the issue persists, run `scriptctl doctor`."],
574
+ errorCode: isAuth
575
+ ? "PROVIDER_AUTH_REJECTED"
576
+ : isRateLimit
577
+ ? "PROVIDER_RATE_LIMITED"
578
+ : "PROVIDER_HTTP_ERROR",
579
+ });
580
+ }
581
+ if (RetryError.isInstance(exc)) {
582
+ return new CliError("DRAFT FAILED: provider unreachable", "Provider endpoint is unreachable.", {
583
+ exitCode: EXIT_RUNTIME,
584
+ required: ["reachable Gemini endpoint"],
585
+ received: [strOf(exc.message).slice(0, 320) || "retries exhausted"],
586
+ nextSteps: ["Check network connectivity and retry. Run `scriptctl doctor` if the issue persists."],
587
+ errorCode: "PROVIDER_NETWORK",
588
+ });
589
+ }
590
+ const msg = strOf(exc?.message).slice(0, 320);
591
+ return new CliError("DRAFT FAILED: provider returned invalid response", "Provider returned a response that could not be parsed.", {
592
+ exitCode: EXIT_RUNTIME,
593
+ required: ["valid response from Gemini"],
594
+ received: [msg || String(exc), `maxOutputTokens: ${tokens}`],
595
+ nextSteps: ["Retry once; if the issue persists, run `scriptctl doctor`."],
596
+ errorCode: "PROVIDER_INVALID_RESPONSE",
597
+ });
588
598
  }
589
599
  // ---------------------------------------------------------------------------
590
600
  // Factory
@@ -593,7 +603,7 @@ export function makeProvider(name, model) {
593
603
  if (name === "mock")
594
604
  return new MockProvider();
595
605
  if (name === "anthropic")
596
- return new AnthropicProvider(model);
606
+ return new LiteLLMProvider(model);
597
607
  if (name === "gemini")
598
608
  return new GeminiProvider(model);
599
609
  throw new CliError("INIT FAILED: Unsupported provider", "Unsupported provider.", {