libretto 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +128 -126
  2. package/dist/cli/cli.js +2 -0
  3. package/dist/cli/commands/browser.js +4 -1
  4. package/dist/cli/commands/execution.js +21 -6
  5. package/dist/cli/commands/logs.js +36 -8
  6. package/dist/cli/commands/snapshot.js +14 -7
  7. package/dist/cli/core/browser.js +89 -253
  8. package/dist/cli/core/session-telemetry.js +491 -0
  9. package/dist/cli/core/telemetry.js +18 -6
  10. package/dist/cli/workers/run-integration-runtime.js +19 -1
  11. package/dist/index.cjs +2 -6
  12. package/dist/index.d.cts +2 -5
  13. package/dist/index.d.ts +2 -5
  14. package/dist/index.js +2 -5
  15. package/dist/runtime/download/download.d.cts +2 -2
  16. package/dist/runtime/download/download.d.ts +2 -2
  17. package/dist/runtime/extract/extract.cjs +2 -1
  18. package/dist/runtime/extract/extract.d.cts +5 -5
  19. package/dist/runtime/extract/extract.d.ts +5 -5
  20. package/dist/runtime/extract/extract.js +2 -1
  21. package/dist/runtime/network/network.d.cts +6 -6
  22. package/dist/runtime/network/network.d.ts +6 -6
  23. package/dist/runtime/recovery/agent.cjs +12 -7
  24. package/dist/runtime/recovery/agent.d.cts +2 -2
  25. package/dist/runtime/recovery/agent.d.ts +2 -2
  26. package/dist/runtime/recovery/agent.js +12 -7
  27. package/dist/runtime/recovery/errors.cjs +8 -6
  28. package/dist/runtime/recovery/errors.d.cts +2 -2
  29. package/dist/runtime/recovery/errors.d.ts +2 -2
  30. package/dist/runtime/recovery/errors.js +8 -6
  31. package/dist/runtime/recovery/recovery.cjs +5 -3
  32. package/dist/runtime/recovery/recovery.d.cts +2 -2
  33. package/dist/runtime/recovery/recovery.d.ts +2 -2
  34. package/dist/runtime/recovery/recovery.js +5 -3
  35. package/dist/shared/instrumentation/instrument.d.cts +2 -2
  36. package/dist/shared/instrumentation/instrument.d.ts +2 -2
  37. package/dist/shared/llm/types.d.cts +5 -5
  38. package/dist/shared/llm/types.d.ts +5 -5
  39. package/dist/shared/logger/index.cjs +2 -0
  40. package/dist/shared/logger/index.d.cts +1 -1
  41. package/dist/shared/logger/index.d.ts +1 -1
  42. package/dist/shared/logger/index.js +2 -1
  43. package/dist/shared/logger/logger.cjs +15 -2
  44. package/dist/shared/logger/logger.d.cts +13 -1
  45. package/dist/shared/logger/logger.d.ts +13 -1
  46. package/dist/shared/logger/logger.js +13 -1
  47. package/dist/shared/state/session-state.d.cts +2 -2
  48. package/dist/shared/state/session-state.d.ts +2 -2
  49. package/package.json +15 -11
  50. package/scripts/postinstall.mjs +48 -0
  51. package/skill/SKILL.md +438 -0
  52. package/skill/code-generation-rules.md +190 -0
  53. package/skill/integration-approach-selection.md +174 -0
  54. package/dist/runtime/step/index.cjs +0 -31
  55. package/dist/runtime/step/index.d.cts +0 -7
  56. package/dist/runtime/step/index.d.ts +0 -7
  57. package/dist/runtime/step/index.js +0 -6
  58. package/dist/runtime/step/runner.cjs +0 -208
  59. package/dist/runtime/step/runner.d.cts +0 -16
  60. package/dist/runtime/step/runner.d.ts +0 -16
  61. package/dist/runtime/step/runner.js +0 -187
  62. package/dist/runtime/step/step.cjs +0 -67
  63. package/dist/runtime/step/step.d.cts +0 -23
  64. package/dist/runtime/step/step.d.ts +0 -23
  65. package/dist/runtime/step/step.js +0 -43
  66. package/dist/runtime/step/types.cjs +0 -16
  67. package/dist/runtime/step/types.d.cts +0 -72
  68. package/dist/runtime/step/types.d.ts +0 -72
  69. package/dist/runtime/step/types.js +0 -0
package/README.md CHANGED
@@ -4,13 +4,14 @@ A TypeScript library for browser automation with AI-powered recovery and data ex
4
4
 
5
5
  ## Features
6
6
 
7
- - **Step-based workflows** — Define automation as named steps with built-in error handling and recovery
8
7
  - **AI-powered recovery** — Vision-based agent that automatically detects and dismisses popups or obstacles using an LLM
9
8
  - **Structured data extraction** — Extract typed data from web pages using AI vision + Zod schemas
10
9
  - **Error detection** — Classify form/submission errors against known patterns
11
- - **Debug bundles** — On failure, captures screenshots, DOM, logs, and step history for investigation
12
- - **Dry-run mode** — Run workflows in simulation without side effects
10
+ - **In-browser network requests** — Execute authenticated fetch calls inside the page context with optional Zod validation
11
+ - **File downloads** — Trigger and intercept file downloads via click, with optional save-to-disk
12
+ - **Dry-run mode** — Skip mutations in development without side effects
13
13
  - **Pluggable LLM** — Bring your own LLM provider (Claude, GPT, etc.) via a simple interface
14
+ - **Pluggable logging** — All runtime functions accept an optional logger; defaults to console output
14
15
 
15
16
  ## Installation
16
17
 
@@ -24,112 +25,139 @@ pnpm add libretto playwright zod
24
25
 
25
26
  ```typescript
26
27
  import { chromium } from "playwright";
27
- import { step, createRunner } from "libretto";
28
+ import { extractFromPage, attemptWithRecovery } from "libretto";
28
29
 
29
- const runner = createRunner({
30
- llmClient: myLLMClient, // optional — enables AI recovery & extraction
31
- });
30
+ const browser = await chromium.launch();
31
+ const page = await browser.newPage();
32
32
 
33
- const steps = [
34
- step("navigate", async ({ page, logger }) => {
35
- await page.goto("https://example.com/login");
36
- logger.info("navigated to login page");
37
- }),
33
+ await page.goto("https://example.com/login");
34
+ await page.fill("#email", "user@example.com");
35
+ await page.fill("#password", "secret");
38
36
 
39
- step("login", async ({ page }) => {
40
- await page.fill("#email", "user@example.com");
41
- await page.fill("#password", "secret");
42
- await page.click('button[type="submit"]');
43
- await page.waitForURL("**/dashboard");
44
- }),
37
+ // Automatically retry with AI popup recovery on failure
38
+ await attemptWithRecovery(page, () => page.click('button[type="submit"]'));
45
39
 
46
- step("scrape-data", async ({ page, logger }) => {
47
- const title = await page.textContent("h1");
48
- logger.info("page title", { title });
49
- }),
50
- ];
51
-
52
- const browser = await chromium.launch();
53
- const page = await browser.newPage();
54
- await runner.run(page, steps);
55
40
  await browser.close();
56
41
  ```
57
42
 
58
- ## Core Concepts
43
+ ## Runtime Functions
59
44
 
60
- ### Steps
45
+ ### Recovery
61
46
 
62
- A step is a named unit of work. Create one with the `step()` factory:
47
+ #### `attemptWithRecovery(page, fn, logger?, llmClient?)`
48
+
49
+ Executes a function and, if it fails, uses AI vision to detect and dismiss popups before retrying once.
50
+
51
+ ```typescript
52
+ import { attemptWithRecovery } from "libretto";
53
+
54
+ await attemptWithRecovery(page, async () => {
55
+ await page.click('button[type="submit"]');
56
+ }, undefined, llmClient);
57
+ ```
58
+
59
+ #### `executeRecoveryAgent(page, instruction, logger?, llmClient?)`
60
+
61
+ Runs a multi-step vision-based recovery agent that takes screenshots and executes browser actions (click, type, scroll, etc.) to resolve obstacles.
63
62
 
64
63
  ```typescript
65
- step("step-name", async ({ page, logger, config }) => {
66
- // page: Playwright Page instance
67
- // logger: scoped logger for this step
68
- // config: { dryRun, debug, logDir }
64
+ import { executeRecoveryAgent } from "libretto";
65
+
66
+ await executeRecoveryAgent(
67
+ page,
68
+ "Close the cookie consent banner",
69
+ undefined,
70
+ llmClient,
71
+ );
72
+ ```
73
+
74
+ #### `detectSubmissionError(page, error, logContext, llmClient, knownErrors?, logger?)`
75
+
76
+ Uses a screenshot + LLM vision to detect if an error occurred during a form submission. Matches against provided known error patterns.
77
+
78
+ ```typescript
79
+ import { detectSubmissionError } from "libretto";
80
+
81
+ try {
82
+ await page.click("#submit");
83
+ } catch (error) {
84
+ const result = await detectSubmissionError(page, error, "checkout", llmClient, [
85
+ { id: "duplicate", errorPatterns: ["already exists"], userMessage: "Duplicate entry" },
86
+ ]);
87
+ console.log(result.errorId, result.message);
88
+ }
89
+ ```
90
+
91
+ ### Data Extraction
92
+
93
+ #### `extractFromPage(options)`
94
+
95
+ Extract structured data from a page using AI vision + a Zod schema.
96
+
97
+ ```typescript
98
+ import { extractFromPage } from "libretto";
99
+ import { z } from "zod";
100
+
101
+ const result = await extractFromPage({
102
+ page,
103
+ llmClient,
104
+ instruction: "Extract the product name and price",
105
+ schema: z.object({
106
+ name: z.string(),
107
+ price: z.number(),
108
+ }),
109
+ selector: ".product-card", // optional — scopes to a specific element
69
110
  });
111
+ // result is typed as { name: string; price: number }
70
112
  ```
71
113
 
72
- ### Step Options
114
+ ### Network
115
+
116
+ #### `pageRequest(page, config, options?)`
117
+
118
+ Executes a fetch call inside the browser context via `page.evaluate()`, inheriting the page's cookies and auth state. Supports optional Zod validation.
73
119
 
74
120
  ```typescript
75
- step("submit-form", handler, {
76
- dryRun: "skip", // "skip" (default) | "execute" | "simulate"
77
- simulate: async ({ logger }) => {
78
- logger.info("simulated form submission");
121
+ import { pageRequest } from "libretto";
122
+ import { z } from "zod";
123
+
124
+ const data = await pageRequest(
125
+ page,
126
+ {
127
+ url: "https://example.com/api/profile",
128
+ method: "GET",
129
+ responseType: "json",
79
130
  },
80
- recovery: {
81
- "session-expired": async ({ page, logger }) => {
82
- await page.click("#re-login");
83
- },
131
+ {
132
+ schema: z.object({ name: z.string(), email: z.string() }),
84
133
  },
85
- });
134
+ );
86
135
  ```
87
136
 
88
- - **`dryRun`** — Controls behavior when the runner is in dry-run mode:
89
- - `"skip"` — Skip the step entirely
90
- - `"execute"` — Run normally even in dry-run mode
91
- - `"simulate"` — Call the `simulate` function instead
92
- - **`recovery`** — Named recovery handlers tried after AI recovery fails
137
+ ### Downloads
93
138
 
94
- ### Extending Steps
139
+ #### `downloadViaClick(page, selector, options?)`
95
140
 
96
- Use `step.extend()` to create a step factory with shared recovery handlers:
141
+ Triggers a file download by clicking a DOM element and intercepts the result.
97
142
 
98
143
  ```typescript
99
- const myStep = step.extend({
100
- recovery: {
101
- "cookie-banner": async ({ page }) => {
102
- await page.click("#accept-cookies");
103
- },
104
- },
105
- });
144
+ import { downloadViaClick } from "libretto";
106
145
 
107
- // Every step created with myStep inherits the cookie-banner recovery
108
- myStep("checkout", async ({ page }) => { /* ... */ });
146
+ const { buffer, filename } = await downloadViaClick(page, "#download-btn");
109
147
  ```
110
148
 
111
- ### Runner
149
+ #### `downloadAndSave(page, selector, options?)`
150
+
151
+ Same as `downloadViaClick` but also writes the file to disk.
112
152
 
113
153
  ```typescript
114
- import { createRunner } from "libretto";
154
+ import { downloadAndSave } from "libretto";
115
155
 
116
- const runner = createRunner({
117
- llmClient, // optional — enables AI recovery & extraction
118
- dryRun: false, // run in dry-run mode
119
- debug: false, // enable debug mode
120
- logDir: "./logs", // defaults to .libretto/sessions/<sessionName>/logs
156
+ const { savedTo } = await downloadAndSave(page, "#export-csv", {
157
+ savePath: "./exports/report.csv",
121
158
  });
122
-
123
- await runner.run(page, steps);
124
159
  ```
125
160
 
126
- The runner executes steps sequentially. For each step it:
127
- 1. Captures a start screenshot
128
- 2. Runs the handler with automatic popup recovery (if `llmClient` provided)
129
- 3. Falls back to custom recovery handlers on failure
130
- 4. Generates a debug bundle if all recovery fails
131
- 5. Captures an end screenshot
132
-
133
161
  ## LLM Client Interface
134
162
 
135
163
  Provide your own implementation backed by any LLM provider:
@@ -147,45 +175,14 @@ const myLLMClient: LLMClient = {
147
175
  };
148
176
  ```
149
177
 
150
- ## Data Extraction
151
-
152
- Extract structured data from a page using AI vision:
153
-
154
- ```typescript
155
- import { extractFromPage } from "libretto/extract";
156
- import { z } from "zod";
157
-
158
- const result = await extractFromPage(page, llmClient, {
159
- prompt: "Extract the product name and price from this page",
160
- schema: z.object({
161
- name: z.string(),
162
- price: z.number(),
163
- }),
164
- });
165
- // result is typed as { name: string; price: number }
166
- ```
167
-
168
- ## Error Detection
169
-
170
- Detect and classify form submission errors:
171
-
172
- ```typescript
173
- import { detectSubmissionError } from "libretto/recovery";
174
-
175
- const error = await detectSubmissionError(page, llmClient, [
176
- { name: "duplicate-entry", description: "Record already exists" },
177
- { name: "invalid-field", description: "A form field has a validation error" },
178
- ]);
178
+ ## Logging
179
179
 
180
- if (error) {
181
- console.log(error.name, error.details);
182
- }
183
- ```
180
+ All runtime functions accept an optional `logger` parameter. When omitted, output goes to `console.log` with `[INFO]`, `[WARN]`, `[ERROR]` prefixes.
184
181
 
185
- ## Logging
182
+ For structured logging, use the built-in `Logger` class:
186
183
 
187
184
  ```typescript
188
- import { Logger, createFileLogSink, prettyConsoleSink } from "libretto/logger";
185
+ import { Logger, createFileLogSink, prettyConsoleSink } from "libretto";
189
186
 
190
187
  const logger = new Logger()
191
188
  .withSink(createFileLogSink({ filePath: "./app.log" }))
@@ -200,25 +197,30 @@ scoped.error("login failed", { reason: "bad password" });
200
197
 
201
198
  Libretto provides granular imports:
202
199
 
203
- | Import | Contents |
204
- | ------------------------ | --------------------------------------------- |
205
- | `libretto` | Everything |
206
- | `libretto/step` | `step`, `createRunner` |
207
- | `libretto/logger` | `Logger`, sinks |
208
- | `libretto/recovery` | `attemptWithRecovery`, `detectSubmissionError` |
209
- | `libretto/extract` | `extractFromPage` |
210
- | `libretto/network` | `pageRequest` |
211
- | `libretto/debug` | `debugPause` |
212
- | `libretto/config` | `isDryRun`, `isDebugMode`, etc. |
200
+ | Import | Contents |
201
+ | ------------------------ | --------------------------------------------------------- |
202
+ | `libretto` | Everything |
203
+ | `libretto/logger` | `Logger`, `defaultLogger`, sinks |
204
+ | `libretto/recovery` | `attemptWithRecovery`, `executeRecoveryAgent`, `detectSubmissionError` |
205
+ | `libretto/extract` | `extractFromPage` |
206
+ | `libretto/network` | `pageRequest` |
207
+ | `libretto/download` | `downloadViaClick`, `downloadAndSave` |
208
+ | `libretto/debug` | `debugPause` |
209
+ | `libretto/config` | `isDryRun`, `isDebugMode`, `shouldPauseBeforeMutation` |
210
+ | `libretto/instrumentation` | `instrumentPage`, `installInstrumentation` |
211
+ | `libretto/visualization` | Ghost cursor and highlight helpers |
212
+ | `libretto/run` | `launchBrowser` |
213
+ | `libretto/state` | Session state serialization and parsing |
214
+ | `libretto/llm` | `LLMClient` type |
213
215
 
214
216
  ## Configuration
215
217
 
216
- Runtime flags can be set via runner config or environment variables:
218
+ Runtime flags via environment variables:
217
219
 
218
- | Env Variable | Effect |
219
- | --------------------- | ------------------------ |
220
- | `LIBRETTO_DEBUG` | Enable debug mode |
221
- | `LIBRETTO_DRY_RUN` | Enable dry-run mode |
220
+ | Env Variable | Effect |
221
+ | --------------------- | --------------------------------------------------- |
222
+ | `LIBRETTO_DEBUG` | Enable debug mode |
223
+ | `LIBRETTO_DRY_RUN` | Enable dry-run mode (defaults to `true` in development) |
222
224
 
223
225
  ## Development
224
226
 
package/dist/cli/cli.js CHANGED
@@ -23,6 +23,7 @@ const CLI_COMMANDS = /* @__PURE__ */ new Set([
23
23
  "snapshot",
24
24
  "network",
25
25
  "actions",
26
+ "pages",
26
27
  "resume",
27
28
  "close",
28
29
  "--help",
@@ -42,6 +43,7 @@ Commands:
42
43
  snapshot [--objective <text> --context <text>] Capture PNG + HTML; analyze when objective is provided (context optional)
43
44
  network [--last N] [--filter regex] [--method M] [--clear] View captured network requests
44
45
  actions [--last N] [--filter regex] [--action TYPE] [--source SOURCE] [--clear] View captured actions
46
+ pages List open pages in the active session
45
47
  resume Resume a paused workflow in the active session
46
48
  close Close the browser
47
49
 
@@ -1,6 +1,7 @@
1
1
  import {
2
2
  runClose as runCloseWithLogger,
3
3
  runOpen,
4
+ runPages,
4
5
  runSave
5
6
  } from "../core/browser.js";
6
7
  import { withSessionLogger } from "../core/context.js";
@@ -41,7 +42,9 @@ function registerBrowserCommands(yargs, logger) {
41
42
  }
42
43
  await runSave(urlOrDomain, String(argv.session), logger);
43
44
  }
44
- ).command("close", "Close the browser", (cmd) => cmd, async (argv) => {
45
+ ).command("pages", "List open pages in the session", (cmd) => cmd, async (argv) => {
46
+ await runPages(String(argv.session), logger);
47
+ }).command("close", "Close the browser", (cmd) => cmd, async (argv) => {
45
48
  await runCloseWithLogger(String(argv.session), logger);
46
49
  });
47
50
  }
@@ -58,15 +58,24 @@ function compileExecFunction(code, helperNames) {
58
58
  }).constructor;
59
59
  return new AsyncFunction(...helperNames, code);
60
60
  }
61
- async function runExec(code, session, logger, visualize = false) {
61
+ async function runExec(code, session, logger, visualize = false, pageId) {
62
62
  readSessionStateOrThrow(session);
63
63
  logger.info("exec-start", {
64
64
  session,
65
65
  codeLength: code.length,
66
66
  codePreview: code.slice(0, 200),
67
- visualize
67
+ visualize,
68
+ pageId
68
69
  });
69
- const { browser, context, page } = await connect(session, logger);
70
+ const { browser, context, page, pageId: resolvedPageId } = await connect(
71
+ session,
72
+ logger,
73
+ 1e4,
74
+ {
75
+ pageId,
76
+ requireSinglePage: true
77
+ }
78
+ );
70
79
  const STALL_THRESHOLD_MS = 6e4;
71
80
  let lastActivityTs = Date.now();
72
81
  const onActivity = () => {
@@ -94,7 +103,7 @@ async function runExec(code, session, logger, visualize = false) {
94
103
  });
95
104
  };
96
105
  process.on("SIGINT", sigintHandler);
97
- wrapPageForActionLogging(page, session, onActivity);
106
+ wrapPageForActionLogging(page, session, resolvedPageId, onActivity);
98
107
  if (visualize) {
99
108
  await installInstrumentation(page, { visualize: true, logger });
100
109
  }
@@ -331,7 +340,7 @@ function registerExecutionCommands(yargs, logger) {
331
340
  return yargs.command(
332
341
  "exec [code..]",
333
342
  "Execute Playwright TypeScript code",
334
- (cmd) => cmd.option("visualize", { type: "boolean", default: false }),
343
+ (cmd) => cmd.option("visualize", { type: "boolean", default: false }).option("page", { type: "string" }),
335
344
  async (argv) => {
336
345
  const codeParts = Array.isArray(argv.code) ? argv.code : argv.code ? [String(argv.code)] : [];
337
346
  const code = codeParts.join(" ");
@@ -340,7 +349,13 @@ function registerExecutionCommands(yargs, logger) {
340
349
  "Usage: libretto-cli exec <code> [--session <name>] [--visualize]"
341
350
  );
342
351
  }
343
- await runExec(code, String(argv.session), logger, Boolean(argv.visualize));
352
+ await runExec(
353
+ code,
354
+ String(argv.session),
355
+ logger,
356
+ Boolean(argv.visualize),
357
+ argv.page ? String(argv.page) : void 0
358
+ );
344
359
  }
345
360
  ).command(
346
361
  "run [integrationFile] [integrationExport]",
@@ -1,3 +1,5 @@
1
+ import { listOpenPages } from "../core/browser.js";
2
+ import { withSessionLogger } from "../core/context.js";
1
3
  import {
2
4
  clearActionLog,
3
5
  clearNetworkLog,
@@ -6,21 +8,41 @@ import {
6
8
  readActionLog,
7
9
  readNetworkLog
8
10
  } from "../core/telemetry.js";
11
+ async function resolvePageId(session, pageId) {
12
+ if (!pageId) return void 0;
13
+ const pages = await withSessionLogger(
14
+ session,
15
+ async (logger) => listOpenPages(session, logger)
16
+ );
17
+ const foundPage = pages.find((page) => page.id === pageId);
18
+ if (!foundPage) {
19
+ throw new Error(
20
+ `Page "${pageId}" was not found in session "${session}". Run "libretto-cli pages --session ${session}" to list ids.`
21
+ );
22
+ }
23
+ return pageId;
24
+ }
9
25
  function registerLogCommands(yargs) {
10
26
  return yargs.command(
11
27
  "network",
12
28
  "View captured network requests",
13
- (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("method", { type: "string" }).option("clear", { type: "boolean", default: false }),
29
+ (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("method", { type: "string" }).option("page", { type: "string" }).option("clear", { type: "boolean", default: false }),
14
30
  async (argv) => {
31
+ const session = String(argv.session);
15
32
  if (argv.clear) {
16
- clearNetworkLog(String(argv.session));
33
+ clearNetworkLog(session);
17
34
  console.log("Network log cleared.");
18
35
  return;
19
36
  }
20
- const entries = readNetworkLog(String(argv.session), {
37
+ const pageId = await resolvePageId(
38
+ session,
39
+ argv.page ? String(argv.page) : void 0
40
+ );
41
+ const entries = readNetworkLog(session, {
21
42
  last: typeof argv.last === "number" ? argv.last : void 0,
22
43
  filter: argv.filter,
23
- method: argv.method
44
+ method: argv.method,
45
+ pageId
24
46
  });
25
47
  if (entries.length === 0) {
26
48
  console.log("No network requests captured.");
@@ -35,18 +57,24 @@ ${entries.length} request(s) shown.`);
35
57
  ).command(
36
58
  "actions",
37
59
  "View captured actions",
38
- (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("action", { type: "string" }).option("source", { type: "string" }).option("clear", { type: "boolean", default: false }),
60
+ (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("action", { type: "string" }).option("source", { type: "string" }).option("page", { type: "string" }).option("clear", { type: "boolean", default: false }),
39
61
  async (argv) => {
62
+ const session = String(argv.session);
40
63
  if (argv.clear) {
41
- clearActionLog(String(argv.session));
64
+ clearActionLog(session);
42
65
  console.log("Action log cleared.");
43
66
  return;
44
67
  }
45
- const entries = readActionLog(String(argv.session), {
68
+ const pageId = await resolvePageId(
69
+ session,
70
+ argv.page ? String(argv.page) : void 0
71
+ );
72
+ const entries = readActionLog(session, {
46
73
  last: typeof argv.last === "number" ? argv.last : void 0,
47
74
  filter: argv.filter,
48
75
  action: argv.action,
49
- source: argv.source
76
+ source: argv.source,
77
+ pageId
50
78
  });
51
79
  if (entries.length === 0) {
52
80
  console.log("No actions captured.");
@@ -6,12 +6,18 @@ import {
6
6
  runInterpret
7
7
  } from "../core/snapshot-analyzer.js";
8
8
  const DEFAULT_SNAPSHOT_CONTEXT = "No additional user context provided.";
9
- async function captureScreenshot(session, logger) {
10
- logger.info("screenshot-start", { session });
11
- const snapshotRunId = `snapshot-${Date.now()}`;
9
+ function generateSnapshotRunId() {
10
+ return `snapshot-${Date.now()}`;
11
+ }
12
+ async function captureScreenshot(session, logger, pageId) {
13
+ logger.info("screenshot-start", { session, pageId });
14
+ const snapshotRunId = generateSnapshotRunId();
12
15
  const snapshotRunDir = getSessionSnapshotRunDir(session, snapshotRunId);
13
16
  mkdirSync(snapshotRunDir, { recursive: true });
14
- const { browser, page } = await connect(session, logger);
17
+ const { browser, page } = await connect(session, logger, 1e4, {
18
+ pageId,
19
+ requireSinglePage: true
20
+ });
15
21
  try {
16
22
  const title = await page.title();
17
23
  const pageUrl = page.url();
@@ -50,8 +56,8 @@ async function captureScreenshot(session, logger) {
50
56
  disconnectBrowser(browser, logger, session);
51
57
  }
52
58
  }
53
- async function runSnapshot(session, logger, objective, context) {
54
- const { pngPath, htmlPath } = await captureScreenshot(session, logger);
59
+ async function runSnapshot(session, logger, pageId, objective, context) {
60
+ const { pngPath, htmlPath } = await captureScreenshot(session, logger, pageId);
55
61
  console.log("Screenshot saved:");
56
62
  console.log(` PNG: ${pngPath}`);
57
63
  console.log(` HTML: ${htmlPath}`);
@@ -83,11 +89,12 @@ function registerSnapshotCommands(yargs, logger) {
83
89
  return yargs.command(
84
90
  "snapshot",
85
91
  "Capture PNG + HTML; analyze when --objective is provided (--context optional)",
86
- (cmd) => cmd.option("objective", { type: "string" }).option("context", { type: "string" }),
92
+ (cmd) => cmd.option("page", { type: "string" }).option("objective", { type: "string" }).option("context", { type: "string" }),
87
93
  async (argv) => {
88
94
  await runSnapshot(
89
95
  String(argv.session),
90
96
  logger,
97
+ argv.page ? String(argv.page) : void 0,
91
98
  argv.objective,
92
99
  argv.context
93
100
  );