libretto 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,13 +8,6 @@ import { extname, isAbsolute, join, resolve } from "node:path";
8
8
  import { spawn } from "node:child_process";
9
9
  import { tmpdir } from "node:os";
10
10
  import { z } from "zod";
11
- import {
12
- formatCommandPrefix,
13
- readAiConfig
14
- } from "./ai-config.js";
15
- import {
16
- getLLMClientFactory
17
- } from "./context.js";
18
11
  const InterpretResultSchema = z.object({
19
12
  answer: z.string(),
20
13
  selectors: z.array(
@@ -23,8 +16,8 @@ const InterpretResultSchema = z.object({
23
16
  selector: z.string(),
24
17
  rationale: z.string()
25
18
  })
26
- ).default([]),
27
- notes: z.string().optional().default("")
19
+ ),
20
+ notes: z.string()
28
21
  });
29
22
  class UserCodingAgent {
30
23
  constructor(config) {
@@ -41,7 +34,7 @@ class UserCodingAgent {
41
34
  }
42
35
  }
43
36
  static readConfiguredConfig() {
44
- return readAiConfig();
37
+ return null;
45
38
  }
46
39
  static getConfigured() {
47
40
  const config = this.readConfiguredConfig();
@@ -70,7 +63,7 @@ Use the screenshot alongside the HTML snapshot context above.`;
70
63
  const result = await runExternalCommand(this.command, args, logger, stdinText);
71
64
  if (result.exitCode !== 0) {
72
65
  throw new Error(
73
- `Analyzer command failed (${formatCommandPrefix([this.command, ...args])}).
66
+ `Analyzer command failed (${[this.command, ...args].join(" ")}).
74
67
  ${stripAnsi(result.stderr).trim() || stripAnsi(result.stdout).trim() || "No error output."}`
75
68
  );
76
69
  }
@@ -427,37 +420,50 @@ function collectSelectorHints(html, limit = 120) {
427
420
  }
428
421
  return candidates;
429
422
  }
430
- async function runInterpret(args, logger) {
431
- logger.info("interpret-start", {
432
- objective: args.objective,
433
- pngPath: args.pngPath,
434
- htmlPath: args.htmlPath
435
- });
436
- process.env.NODE_ENV = "development";
437
- const pngPath = resolvePath(args.pngPath);
438
- const htmlPath = resolvePath(args.htmlPath);
439
- if (!existsSync(pngPath)) {
440
- throw new Error(`PNG file not found: ${pngPath}`);
423
+ function estimateTokensFromChars(chars) {
424
+ return Math.ceil(chars / 4);
425
+ }
426
+ function inferContextWindowTokens(model) {
427
+ const normalized = model.trim().toLowerCase();
428
+ if (normalized.includes("claude")) {
429
+ return { contextWindowTokens: 2e5, source: "model:claude" };
441
430
  }
442
- if (!existsSync(htmlPath)) {
443
- throw new Error(`HTML file not found: ${htmlPath}`);
431
+ if (normalized.includes("gpt-5") || normalized.includes("o3") || normalized.includes("o4")) {
432
+ return { contextWindowTokens: 2e5, source: "model:openai" };
433
+ }
434
+ if (normalized.includes("gemini")) {
435
+ return { contextWindowTokens: 1e6, source: "model:gemini" };
444
436
  }
445
- const htmlContent = readFileSync(htmlPath, "utf-8");
446
- const htmlCharLimit = 5e5;
447
- const { text: trimmedHtml, truncated } = truncateText(
448
- htmlContent,
449
- htmlCharLimit
437
+ if (normalized.startsWith("openai/") || normalized.startsWith("codex/")) {
438
+ return { contextWindowTokens: 2e5, source: "provider:openai" };
439
+ }
440
+ if (normalized.startsWith("anthropic/")) {
441
+ return { contextWindowTokens: 2e5, source: "provider:anthropic" };
442
+ }
443
+ if (normalized.startsWith("google/") || normalized.startsWith("vertex/")) {
444
+ return { contextWindowTokens: 1e6, source: "provider:google" };
445
+ }
446
+ return { contextWindowTokens: 128e3, source: "default" };
447
+ }
448
+ function buildSnapshotBudget(model) {
449
+ const { contextWindowTokens, source } = inferContextWindowTokens(model);
450
+ const outputReserveTokens = Math.min(
451
+ 32e3,
452
+ Math.max(8e3, Math.floor(contextWindowTokens * 0.1))
450
453
  );
451
- const selectorHints = collectSelectorHints(htmlContent, 120);
452
- let prompt = `# Objective
453
- ${args.objective}
454
-
455
- `;
456
- prompt += `# Context
457
- ${args.context}
458
-
459
- `;
460
- prompt += `# Instructions
454
+ const promptBudgetTokens = Math.max(
455
+ 8e3,
456
+ contextWindowTokens - outputReserveTokens - 2e3
457
+ );
458
+ return {
459
+ contextWindowTokens,
460
+ outputReserveTokens,
461
+ promptBudgetTokens,
462
+ source
463
+ };
464
+ }
465
+ function buildInterpretInstructions() {
466
+ let prompt = `# Instructions
461
467
  `;
462
468
  prompt += `You are analyzing a screenshot and HTML snapshot of the same web page on behalf of an automation agent.
463
469
  `;
@@ -483,66 +489,135 @@ ${args.context}
483
489
  prompt += `Selectors should prefer robust attributes: data-testid, data-test, aria-label, name, id, role. Avoid fragile class-based or positional selectors.
484
490
  `;
485
491
  prompt += `Only include selectors that exist in the HTML snapshot.
492
+ `;
493
+ return prompt;
494
+ }
495
+ function buildInlineHtmlPrompt(args, options) {
496
+ const selectorHints = collectSelectorHints(options.htmlContent, 120);
497
+ let prompt = `# Objective
498
+ ${args.objective}
499
+
500
+ `;
501
+ prompt += `# Context
502
+ ${args.context}
486
503
 
487
504
  `;
505
+ prompt += `# Snapshot Selection
506
+ `;
507
+ prompt += `- Selected HTML snapshot: ${options.domLabel}
508
+ `;
509
+ prompt += `- Selection reason: ${options.selectionReason}
510
+
511
+ `;
512
+ prompt += buildInterpretInstructions();
488
513
  if (selectorHints.length > 0) {
489
- prompt += `Selector hints from HTML attributes (use if relevant):
514
+ prompt += `
515
+ Selector hints from HTML attributes (use if relevant):
490
516
  `;
491
517
  prompt += selectorHints.map((hint) => `- ${hint}`).join("\n");
492
- prompt += "\n\n";
518
+ prompt += "\n";
493
519
  }
494
- if (truncated) {
495
- prompt += `HTML content is truncated to fit token limits.
496
-
520
+ if (options.truncated) {
521
+ prompt += `
522
+ HTML content is truncated to fit token limits.
497
523
  `;
498
524
  }
499
- prompt += `HTML snapshot:
525
+ prompt += `
526
+ HTML snapshot (${options.domLabel}):
500
527
 
501
- ${trimmedHtml}`;
528
+ ${options.htmlContent}`;
502
529
  prompt += "\n\nReturn only a JSON object. Do not include markdown code fences or extra commentary.";
503
- let parsed;
504
- const configuredAgent = UserCodingAgent.getConfigured();
505
- if (configuredAgent) {
506
- const configuredAnalyzer = configuredAgent.snapshotAnalyzerConfig;
507
- logger.info("interpret-analyzer-config", {
508
- preset: configuredAnalyzer.preset,
509
- commandPrefix: configuredAnalyzer.commandPrefix
530
+ return prompt;
531
+ }
532
+ function buildInlinePromptSelection(args, fullHtmlContent, condensedHtmlContent, model) {
533
+ const budget = buildSnapshotBudget(model);
534
+ const stats = {
535
+ fullDomChars: fullHtmlContent.length,
536
+ fullDomEstimatedTokens: estimateTokensFromChars(fullHtmlContent.length),
537
+ condensedDomChars: condensedHtmlContent.length,
538
+ condensedDomEstimatedTokens: estimateTokensFromChars(condensedHtmlContent.length),
539
+ configuredModel: model
540
+ };
541
+ const buildCandidate = (domSource, htmlContent, selectionReason, truncated) => {
542
+ const domLabel = domSource === "full" ? "full DOM" : "condensed DOM";
543
+ const prompt = buildInlineHtmlPrompt(args, {
544
+ htmlContent,
545
+ domLabel,
546
+ truncated,
547
+ selectionReason,
548
+ budget,
549
+ stats
510
550
  });
511
- parsed = await configuredAgent.analyzeSnapshot(prompt, pngPath, logger);
512
- } else {
513
- const llmClientFactory = getLLMClientFactory();
514
- if (!llmClientFactory) {
515
- throw new Error(
516
- "No AI config set. Run 'libretto-cli ai configure codex' (or claude/gemini). Library integrations can still set a factory via setLLMClientFactory()."
517
- );
518
- }
519
- logger.info("interpret-analyzer-factory-fallback", {});
520
- const imageBase64 = readFileAsBase64(pngPath);
521
- const client = await llmClientFactory(logger, "google/gemini-3-flash-preview");
522
- const result = await client.generateObjectFromMessages({
523
- schema: InterpretResultSchema,
524
- messages: [
525
- {
526
- role: "user",
527
- content: [
528
- { type: "text", text: prompt },
529
- {
530
- type: "image",
531
- image: `data:${getMimeType(pngPath)};base64,${imageBase64}`
532
- }
533
- ]
534
- }
535
- ],
536
- temperature: 0.1
551
+ return {
552
+ prompt,
553
+ domSource,
554
+ domLabel,
555
+ htmlChars: htmlContent.length,
556
+ htmlEstimatedTokens: estimateTokensFromChars(htmlContent.length),
557
+ promptEstimatedTokens: estimateTokensFromChars(prompt.length),
558
+ truncated,
559
+ selectionReason,
560
+ budget,
561
+ stats
562
+ };
563
+ };
564
+ const fullCandidate = buildCandidate(
565
+ "full",
566
+ fullHtmlContent,
567
+ "placeholder",
568
+ false
569
+ );
570
+ if (fullCandidate.promptEstimatedTokens <= budget.promptBudgetTokens) {
571
+ const selectionReason = `Full DOM fits within the estimated prompt budget (~${fullCandidate.promptEstimatedTokens.toLocaleString()} <= ${budget.promptBudgetTokens.toLocaleString()} tokens), so the analyzer receives the uncondensed page HTML.`;
572
+ const prompt = buildInlineHtmlPrompt(args, {
573
+ htmlContent: fullHtmlContent,
574
+ domLabel: "full DOM",
575
+ truncated: false,
576
+ selectionReason,
577
+ budget,
578
+ stats
537
579
  });
538
- parsed = InterpretResultSchema.parse(result);
580
+ return {
581
+ ...fullCandidate,
582
+ selectionReason,
583
+ prompt,
584
+ promptEstimatedTokens: estimateTokensFromChars(prompt.length)
585
+ };
586
+ }
587
+ const condensedReason = `Full DOM would exceed the estimated prompt budget (~${fullCandidate.promptEstimatedTokens.toLocaleString()} > ${budget.promptBudgetTokens.toLocaleString()} tokens), so the analyzer receives the condensed DOM instead.`;
588
+ const condensedCandidate = buildCandidate(
589
+ "condensed",
590
+ condensedHtmlContent,
591
+ condensedReason,
592
+ false
593
+ );
594
+ if (condensedCandidate.promptEstimatedTokens <= budget.promptBudgetTokens) {
595
+ return condensedCandidate;
539
596
  }
540
- logger.info("interpret-success", {
541
- selectorCount: parsed.selectors.length,
542
- answer: parsed.answer.slice(0, 200)
597
+ const truncateReason = `Both full and condensed DOM snapshots exceed the estimated prompt budget (full ~${fullCandidate.promptEstimatedTokens.toLocaleString()}, condensed ~${condensedCandidate.promptEstimatedTokens.toLocaleString()}, budget ${budget.promptBudgetTokens.toLocaleString()} tokens), so the condensed DOM is truncated to fit.`;
598
+ const basePrompt = buildInlineHtmlPrompt(args, {
599
+ htmlContent: "",
600
+ domLabel: "condensed DOM",
601
+ truncated: true,
602
+ selectionReason: truncateReason,
603
+ budget,
604
+ stats
543
605
  });
606
+ const availableHtmlTokens = Math.max(
607
+ 2e3,
608
+ budget.promptBudgetTokens - estimateTokensFromChars(basePrompt.length)
609
+ );
610
+ const truncatedHtml = truncateText(condensedHtmlContent, availableHtmlTokens * 4);
611
+ return buildCandidate(
612
+ "condensed",
613
+ truncatedHtml.text,
614
+ truncateReason,
615
+ truncatedHtml.truncated
616
+ );
617
+ }
618
+ function formatInterpretationOutput(parsed, header = "Interpretation:") {
544
619
  const outputLines = [];
545
- outputLines.push("Interpretation:");
620
+ outputLines.push(header);
546
621
  outputLines.push(`Answer: ${parsed.answer}`);
547
622
  outputLines.push("");
548
623
  if (parsed.selectors.length === 0) {
@@ -555,16 +630,54 @@ ${trimmedHtml}`;
555
630
  outputLines.push(` rationale: ${selector.rationale}`);
556
631
  });
557
632
  }
558
- if (parsed.notes.trim()) {
633
+ if (parsed.notes && parsed.notes.trim()) {
559
634
  outputLines.push("");
560
635
  outputLines.push(`Notes: ${parsed.notes.trim()}`);
561
636
  }
562
- console.log(outputLines.join("\n"));
637
+ return outputLines.join("\n");
638
+ }
639
+ async function runInterpret(args, logger) {
640
+ logger.info("interpret-start", {
641
+ objective: args.objective,
642
+ pngPath: args.pngPath,
643
+ htmlPath: args.htmlPath,
644
+ condensedHtmlPath: args.condensedHtmlPath
645
+ });
646
+ process.env.NODE_ENV = "development";
647
+ const pngPath = resolvePath(args.pngPath);
648
+ const htmlPath = resolvePath(args.htmlPath);
649
+ const condensedHtmlPath = resolvePath(args.condensedHtmlPath);
650
+ if (!existsSync(pngPath)) {
651
+ throw new Error(`PNG file not found: ${pngPath}`);
652
+ }
653
+ if (!existsSync(htmlPath)) {
654
+ throw new Error(`HTML file not found: ${htmlPath}`);
655
+ }
656
+ if (!existsSync(condensedHtmlPath)) {
657
+ throw new Error(`Condensed HTML file not found: ${condensedHtmlPath}`);
658
+ }
659
+ const fullHtmlContent = readFileSync(htmlPath, "utf-8");
660
+ const condensedHtmlContent = readFileSync(condensedHtmlPath, "utf-8");
661
+ const configuredAgent = UserCodingAgent.getConfigured();
662
+ if (!configuredAgent) {
663
+ throw new Error(
664
+ "No AI config set. Run 'npx libretto ai configure codex' (or claude/gemini), or set API credentials in your .env file for direct API analysis."
665
+ );
666
+ }
667
+ const configuredAnalyzer = configuredAgent.snapshotAnalyzerConfig;
668
+ throw new Error(
669
+ "The CLI-agent snapshot analysis path is not active. Update your config to the current format with `npx libretto ai configure <provider>`, or set API credentials in .env for direct API analysis."
670
+ );
563
671
  }
564
672
  function canAnalyzeSnapshots() {
565
- return UserCodingAgent.getConfigured() !== null || getLLMClientFactory() !== null;
673
+ return UserCodingAgent.getConfigured() !== null;
566
674
  }
567
675
  export {
676
+ InterpretResultSchema,
677
+ buildInlinePromptSelection,
568
678
  canAnalyzeSnapshots,
679
+ formatInterpretationOutput,
680
+ getMimeType,
681
+ readFileAsBase64,
569
682
  runInterpret
570
683
  };
@@ -0,0 +1,137 @@
1
+ import { existsSync, readFileSync } from "node:fs";
2
+ import { dirname, join, resolve } from "node:path";
3
+ import {
4
+ readAiConfig
5
+ } from "./ai-config.js";
6
+ import { REPO_ROOT } from "./context.js";
7
+ import {
8
+ hasProviderCredentials,
9
+ missingProviderCredentialsMessage,
10
+ parseModel
11
+ } from "../../shared/llm/client.js";
12
+ const DEFAULT_SNAPSHOT_MODELS = {
13
+ openai: "openai/gpt-5.4",
14
+ anthropic: "anthropic/claude-sonnet-4-6",
15
+ google: "google/gemini-2.5-flash",
16
+ vertex: "vertex/gemini-2.5-pro"
17
+ };
18
+ class SnapshotApiUnavailableError extends Error {
19
+ constructor(message) {
20
+ super(message);
21
+ this.name = "SnapshotApiUnavailableError";
22
+ }
23
+ }
24
+ function readWorktreeEnvPath() {
25
+ const gitPath = join(REPO_ROOT, ".git");
26
+ if (!existsSync(gitPath)) return null;
27
+ try {
28
+ const gitPointer = readFileSync(gitPath, "utf-8").trim();
29
+ const match = gitPointer.match(/^gitdir:\s*(.+)$/i);
30
+ if (!match?.[1]) return null;
31
+ const worktreeGitDir = resolve(REPO_ROOT, match[1].trim());
32
+ const commonGitDir = resolve(worktreeGitDir, "..", "..");
33
+ return join(dirname(commonGitDir), ".env");
34
+ } catch {
35
+ return null;
36
+ }
37
+ }
38
+ function loadSnapshotEnv() {
39
+ if (process.env.LIBRETTO_DISABLE_DOTENV?.trim() === "1") return;
40
+ const envPathCandidates = [
41
+ join(REPO_ROOT, ".env"),
42
+ readWorktreeEnvPath()
43
+ ].filter((value) => Boolean(value));
44
+ const envPath = envPathCandidates.find((candidate) => existsSync(candidate));
45
+ if (!envPath) return;
46
+ for (const line of readFileSync(envPath, "utf-8").split("\n")) {
47
+ const parsed = parseDotEnvAssignment(line);
48
+ if (!parsed) continue;
49
+ if (!(parsed.key in process.env)) {
50
+ process.env[parsed.key] = parsed.value;
51
+ }
52
+ }
53
+ }
54
+ function parseDotEnvAssignment(line) {
55
+ const trimmed = line.trim();
56
+ if (!trimmed || trimmed.startsWith("#")) return null;
57
+ const withoutExport = trimmed.startsWith("export ") ? trimmed.slice("export ".length).trimStart() : trimmed;
58
+ const eqIdx = withoutExport.indexOf("=");
59
+ if (eqIdx < 1) return null;
60
+ const key = withoutExport.slice(0, eqIdx).trim();
61
+ if (!key) return null;
62
+ const rawValue = withoutExport.slice(eqIdx + 1).trimStart();
63
+ if (!rawValue) {
64
+ return { key, value: "" };
65
+ }
66
+ if (rawValue.startsWith('"')) {
67
+ const closeIdx = rawValue.indexOf('"', 1);
68
+ if (closeIdx > 0) {
69
+ return { key, value: rawValue.slice(1, closeIdx) };
70
+ }
71
+ return { key, value: rawValue.slice(1) };
72
+ }
73
+ if (rawValue.startsWith("'")) {
74
+ const closeIdx = rawValue.indexOf("'", 1);
75
+ if (closeIdx > 0) {
76
+ return { key, value: rawValue.slice(1, closeIdx) };
77
+ }
78
+ return { key, value: rawValue.slice(1) };
79
+ }
80
+ const inlineCommentIndex = rawValue.search(/\s#/);
81
+ const value = inlineCommentIndex >= 0 ? rawValue.slice(0, inlineCommentIndex).trimEnd() : rawValue.trim();
82
+ return { key, value };
83
+ }
84
+ function inferAutoSnapshotModel() {
85
+ const providersInPriorityOrder = [
86
+ "openai",
87
+ "anthropic",
88
+ "google",
89
+ "vertex"
90
+ ];
91
+ for (const provider of providersInPriorityOrder) {
92
+ if (!hasProviderCredentials(provider)) continue;
93
+ return {
94
+ model: DEFAULT_SNAPSHOT_MODELS[provider],
95
+ provider,
96
+ source: `env:auto-${provider}`
97
+ };
98
+ }
99
+ return null;
100
+ }
101
+ function resolveSnapshotApiModel(config = readAiConfig()) {
102
+ loadSnapshotEnv();
103
+ if (config?.model) {
104
+ const { provider } = parseModel(config.model);
105
+ return {
106
+ model: config.model,
107
+ provider,
108
+ source: "config"
109
+ };
110
+ }
111
+ return inferAutoSnapshotModel();
112
+ }
113
+ function resolveSnapshotApiModelOrThrow(config = readAiConfig()) {
114
+ const selection = resolveSnapshotApiModel(config);
115
+ if (!selection) {
116
+ throw new SnapshotApiUnavailableError(
117
+ "No API snapshot analyzer is available. Set OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY/GOOGLE_GENERATIVE_AI_API_KEY, or GOOGLE_CLOUD_PROJECT, or run `npx libretto ai configure <provider>` to set a default model."
118
+ );
119
+ }
120
+ if (!hasProviderCredentials(selection.provider)) {
121
+ throw new SnapshotApiUnavailableError(
122
+ missingProviderCredentialsMessage(selection.provider)
123
+ );
124
+ }
125
+ return selection;
126
+ }
127
+ function isSnapshotApiUnavailableError(error) {
128
+ return error instanceof SnapshotApiUnavailableError;
129
+ }
130
+ export {
131
+ SnapshotApiUnavailableError,
132
+ isSnapshotApiUnavailableError,
133
+ loadSnapshotEnv,
134
+ parseDotEnvAssignment,
135
+ resolveSnapshotApiModel,
136
+ resolveSnapshotApiModelOrThrow
137
+ };