npm - libretto - Versions diffs - 0.2.0 → 0.2.2 - Mend

libretto 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/README.md +128 -126
package/dist/cli/cli.js +2 -0
package/dist/cli/commands/browser.js +4 -1
package/dist/cli/commands/execution.js +21 -6
package/dist/cli/commands/logs.js +36 -8
package/dist/cli/commands/snapshot.js +14 -7
package/dist/cli/core/browser.js +89 -253
package/dist/cli/core/session-telemetry.js +491 -0
package/dist/cli/core/telemetry.js +18 -6
package/dist/cli/workers/run-integration-runtime.js +19 -1
package/dist/index.cjs +2 -6
package/dist/index.d.cts +2 -5
package/dist/index.d.ts +2 -5
package/dist/index.js +2 -5
package/dist/runtime/download/download.d.cts +2 -2
package/dist/runtime/download/download.d.ts +2 -2
package/dist/runtime/extract/extract.cjs +2 -1
package/dist/runtime/extract/extract.d.cts +5 -5
package/dist/runtime/extract/extract.d.ts +5 -5
package/dist/runtime/extract/extract.js +2 -1
package/dist/runtime/network/network.d.cts +6 -6
package/dist/runtime/network/network.d.ts +6 -6
package/dist/runtime/recovery/agent.cjs +12 -7
package/dist/runtime/recovery/agent.d.cts +2 -2
package/dist/runtime/recovery/agent.d.ts +2 -2
package/dist/runtime/recovery/agent.js +12 -7
package/dist/runtime/recovery/errors.cjs +8 -6
package/dist/runtime/recovery/errors.d.cts +2 -2
package/dist/runtime/recovery/errors.d.ts +2 -2
package/dist/runtime/recovery/errors.js +8 -6
package/dist/runtime/recovery/recovery.cjs +5 -3
package/dist/runtime/recovery/recovery.d.cts +2 -2
package/dist/runtime/recovery/recovery.d.ts +2 -2
package/dist/runtime/recovery/recovery.js +5 -3
package/dist/shared/instrumentation/instrument.d.cts +2 -2
package/dist/shared/instrumentation/instrument.d.ts +2 -2
package/dist/shared/llm/types.d.cts +5 -5
package/dist/shared/llm/types.d.ts +5 -5
package/dist/shared/logger/index.cjs +2 -0
package/dist/shared/logger/index.d.cts +1 -1
package/dist/shared/logger/index.d.ts +1 -1
package/dist/shared/logger/index.js +2 -1
package/dist/shared/logger/logger.cjs +15 -2
package/dist/shared/logger/logger.d.cts +13 -1
package/dist/shared/logger/logger.d.ts +13 -1
package/dist/shared/logger/logger.js +13 -1
package/dist/shared/state/session-state.d.cts +2 -2
package/dist/shared/state/session-state.d.ts +2 -2
package/package.json +15 -11
package/scripts/postinstall.mjs +48 -0
package/skill/SKILL.md +438 -0
package/skill/code-generation-rules.md +190 -0
package/skill/integration-approach-selection.md +174 -0
package/dist/runtime/step/index.cjs +0 -31
package/dist/runtime/step/index.d.cts +0 -7
package/dist/runtime/step/index.d.ts +0 -7
package/dist/runtime/step/index.js +0 -6
package/dist/runtime/step/runner.cjs +0 -208
package/dist/runtime/step/runner.d.cts +0 -16
package/dist/runtime/step/runner.d.ts +0 -16
package/dist/runtime/step/runner.js +0 -187
package/dist/runtime/step/step.cjs +0 -67
package/dist/runtime/step/step.d.cts +0 -23
package/dist/runtime/step/step.d.ts +0 -23
package/dist/runtime/step/step.js +0 -43
package/dist/runtime/step/types.cjs +0 -16
package/dist/runtime/step/types.d.cts +0 -72
package/dist/runtime/step/types.d.ts +0 -72
package/dist/runtime/step/types.js +0 -0

package/README.md CHANGED Viewed

@@ -4,13 +4,14 @@ A TypeScript library for browser automation with AI-powered recovery and data ex
 ## Features
-- **Step-based workflows** — Define automation as named steps with built-in error handling and recovery
 - **AI-powered recovery** — Vision-based agent that automatically detects and dismisses popups or obstacles using an LLM
 - **Structured data extraction** — Extract typed data from web pages using AI vision + Zod schemas
 - **Error detection** — Classify form/submission errors against known patterns
-- **Debug bundles** — On failure, captures screenshots, DOM, logs, and step history for investigation
-- **Dry-run mode** — Run workflows in simulation without side effects
+- **In-browser network requests** — Execute authenticated fetch calls inside the page context with optional Zod validation
+- **File downloads** — Trigger and intercept file downloads via click, with optional save-to-disk
+- **Dry-run mode** — Skip mutations in development without side effects
 - **Pluggable LLM** — Bring your own LLM provider (Claude, GPT, etc.) via a simple interface
+- **Pluggable logging** — All runtime functions accept an optional logger; defaults to console output
 ## Installation
@@ -24,112 +25,139 @@ pnpm add libretto playwright zod
 ```typescript
 import { chromium } from "playwright";
-import { step, createRunner } from "libretto";
+import { extractFromPage, attemptWithRecovery } from "libretto";
-const runner = createRunner({
-  llmClient: myLLMClient, // optional — enables AI recovery & extraction
-});
+const browser = await chromium.launch();
+const page = await browser.newPage();
-const steps = [
-  step("navigate", async ({ page, logger }) => {
-    await page.goto("https://example.com/login");
-    logger.info("navigated to login page");
-  }),
+await page.goto("https://example.com/login");
+await page.fill("#email", "user@example.com");
+await page.fill("#password", "secret");
-  step("login", async ({ page }) => {
-    await page.fill("#email", "user@example.com");
-    await page.fill("#password", "secret");
-    await page.click('button[type="submit"]');
-    await page.waitForURL("**/dashboard");
-  }),
+// Automatically retry with AI popup recovery on failure
+await attemptWithRecovery(page, () => page.click('button[type="submit"]'));
-  step("scrape-data", async ({ page, logger }) => {
-    const title = await page.textContent("h1");
-    logger.info("page title", { title });
-  }),
-];
-const browser = await chromium.launch();
-const page = await browser.newPage();
-await runner.run(page, steps);
 await browser.close();
 ```
-## Core Concepts
+## Runtime Functions
-### Steps
+### Recovery
-A step is a named unit of work. Create one with the `step()` factory:
+#### `attemptWithRecovery(page, fn, logger?, llmClient?)`
+Executes a function and, if it fails, uses AI vision to detect and dismiss popups before retrying once.
+```typescript
+import { attemptWithRecovery } from "libretto";
+await attemptWithRecovery(page, async () => {
+  await page.click('button[type="submit"]');
+}, undefined, llmClient);
+```
+#### `executeRecoveryAgent(page, instruction, logger?, llmClient?)`
+Runs a multi-step vision-based recovery agent that takes screenshots and executes browser actions (click, type, scroll, etc.) to resolve obstacles.
 ```typescript
-step("step-name", async ({ page, logger, config }) => {
-  // page:   Playwright Page instance
-  // logger: scoped logger for this step
-  // config: { dryRun, debug, logDir }
+import { executeRecoveryAgent } from "libretto";
+await executeRecoveryAgent(
+  page,
+  "Close the cookie consent banner",
+  undefined,
+  llmClient,
+);
+```
+#### `detectSubmissionError(page, error, logContext, llmClient, knownErrors?, logger?)`
+Uses a screenshot + LLM vision to detect if an error occurred during a form submission. Matches against provided known error patterns.
+```typescript
+import { detectSubmissionError } from "libretto";
+try {
+  await page.click("#submit");
+} catch (error) {
+  const result = await detectSubmissionError(page, error, "checkout", llmClient, [
+    { id: "duplicate", errorPatterns: ["already exists"], userMessage: "Duplicate entry" },
+  ]);
+  console.log(result.errorId, result.message);
+}
+```
+### Data Extraction
+#### `extractFromPage(options)`
+Extract structured data from a page using AI vision + a Zod schema.
+```typescript
+import { extractFromPage } from "libretto";
+import { z } from "zod";
+const result = await extractFromPage({
+  page,
+  llmClient,
+  instruction: "Extract the product name and price",
+  schema: z.object({
+    name: z.string(),
+    price: z.number(),
+  }),
+  selector: ".product-card", // optional — scopes to a specific element
 });
+// result is typed as { name: string; price: number }
 ```
-### Step Options
+### Network
+#### `pageRequest(page, config, options?)`
+Executes a fetch call inside the browser context via `page.evaluate()`, inheriting the page's cookies and auth state. Supports optional Zod validation.
 ```typescript
-step("submit-form", handler, {
-  dryRun: "skip",      // "skip" (default) | "execute" | "simulate"
-  simulate: async ({ logger }) => {
-    logger.info("simulated form submission");
+import { pageRequest } from "libretto";
+import { z } from "zod";
+const data = await pageRequest(
+  page,
+  {
+    url: "https://example.com/api/profile",
+    method: "GET",
+    responseType: "json",
   },
-  recovery: {
-    "session-expired": async ({ page, logger }) => {
-      await page.click("#re-login");
-    },
+  {
+    schema: z.object({ name: z.string(), email: z.string() }),
   },
-});
+);
 ```
-- **`dryRun`** — Controls behavior when the runner is in dry-run mode:
-  - `"skip"` — Skip the step entirely
-  - `"execute"` — Run normally even in dry-run mode
-  - `"simulate"` — Call the `simulate` function instead
-- **`recovery`** — Named recovery handlers tried after AI recovery fails
+### Downloads
-### Extending Steps
+#### `downloadViaClick(page, selector, options?)`
-Use `step.extend()` to create a step factory with shared recovery handlers:
+Triggers a file download by clicking a DOM element and intercepts the result.
 ```typescript
-const myStep = step.extend({
-  recovery: {
-    "cookie-banner": async ({ page }) => {
-      await page.click("#accept-cookies");
-    },
-  },
-});
+import { downloadViaClick } from "libretto";
-// Every step created with myStep inherits the cookie-banner recovery
-myStep("checkout", async ({ page }) => { /* ... */ });
+const { buffer, filename } = await downloadViaClick(page, "#download-btn");
 ```
-### Runner
+#### `downloadAndSave(page, selector, options?)`
+Same as `downloadViaClick` but also writes the file to disk.
 ```typescript
-import { createRunner } from "libretto";
+import { downloadAndSave } from "libretto";
-const runner = createRunner({
-  llmClient,            // optional — enables AI recovery & extraction
-  dryRun: false,        // run in dry-run mode
-  debug: false,         // enable debug mode
-  logDir: "./logs",     // defaults to .libretto/sessions/<sessionName>/logs
+const { savedTo } = await downloadAndSave(page, "#export-csv", {
+  savePath: "./exports/report.csv",
 });
-await runner.run(page, steps);
 ```
-The runner executes steps sequentially. For each step it:
-1. Captures a start screenshot
-2. Runs the handler with automatic popup recovery (if `llmClient` provided)
-3. Falls back to custom recovery handlers on failure
-4. Generates a debug bundle if all recovery fails
-5. Captures an end screenshot
 ## LLM Client Interface
 Provide your own implementation backed by any LLM provider:
@@ -147,45 +175,14 @@ const myLLMClient: LLMClient = {
 };
 ```
-## Data Extraction
-Extract structured data from a page using AI vision:
-```typescript
-import { extractFromPage } from "libretto/extract";
-import { z } from "zod";
-const result = await extractFromPage(page, llmClient, {
-  prompt: "Extract the product name and price from this page",
-  schema: z.object({
-    name: z.string(),
-    price: z.number(),
-  }),
-});
-// result is typed as { name: string; price: number }
-```
-## Error Detection
-Detect and classify form submission errors:
-```typescript
-import { detectSubmissionError } from "libretto/recovery";
-const error = await detectSubmissionError(page, llmClient, [
-  { name: "duplicate-entry", description: "Record already exists" },
-  { name: "invalid-field", description: "A form field has a validation error" },
-]);
+## Logging
-if (error) {
-  console.log(error.name, error.details);
-}
-```
+All runtime functions accept an optional `logger` parameter. When omitted, output goes to `console.log` with `[INFO]`, `[WARN]`, `[ERROR]` prefixes.
-## Logging
+For structured logging, use the built-in `Logger` class:
 ```typescript
-import { Logger, createFileLogSink, prettyConsoleSink } from "libretto/logger";
+import { Logger, createFileLogSink, prettyConsoleSink } from "libretto";
 const logger = new Logger()
   .withSink(createFileLogSink({ filePath: "./app.log" }))
@@ -200,25 +197,30 @@ scoped.error("login failed", { reason: "bad password" });
 Libretto provides granular imports:
-| Import                   | Contents                                      |
-| ------------------------ | --------------------------------------------- |
-| `libretto`               | Everything                                    |
-| `libretto/step`          | `step`, `createRunner`                        |
-| `libretto/logger`        | `Logger`, sinks                               |
-| `libretto/recovery`      | `attemptWithRecovery`, `detectSubmissionError` |
-| `libretto/extract`       | `extractFromPage`                             |
-| `libretto/network`       | `pageRequest`                                 |
-| `libretto/debug`         | `debugPause`                                  |
-| `libretto/config`        | `isDryRun`, `isDebugMode`, etc.               |
+| Import                   | Contents                                                  |
+| ------------------------ | --------------------------------------------------------- |
+| `libretto`               | Everything                                                |
+| `libretto/logger`        | `Logger`, `defaultLogger`, sinks                          |
+| `libretto/recovery`      | `attemptWithRecovery`, `executeRecoveryAgent`, `detectSubmissionError` |
+| `libretto/extract`       | `extractFromPage`                                         |
+| `libretto/network`       | `pageRequest`                                             |
+| `libretto/download`      | `downloadViaClick`, `downloadAndSave`                     |
+| `libretto/debug`         | `debugPause`                                              |
+| `libretto/config`        | `isDryRun`, `isDebugMode`, `shouldPauseBeforeMutation`    |
+| `libretto/instrumentation` | `instrumentPage`, `installInstrumentation`              |
+| `libretto/visualization` | Ghost cursor and highlight helpers                        |
+| `libretto/run`           | `launchBrowser`                                           |
+| `libretto/state`         | Session state serialization and parsing                   |
+| `libretto/llm`           | `LLMClient` type                                          |
 ## Configuration
-Runtime flags can be set via runner config or environment variables:
+Runtime flags via environment variables:
-| Env Variable          | Effect                   |
-| --------------------- | ------------------------ |
-| `LIBRETTO_DEBUG`      | Enable debug mode        |
-| `LIBRETTO_DRY_RUN`   | Enable dry-run mode      |
+| Env Variable          | Effect                                              |
+| --------------------- | --------------------------------------------------- |
+| `LIBRETTO_DEBUG`      | Enable debug mode                                   |
+| `LIBRETTO_DRY_RUN`   | Enable dry-run mode (defaults to `true` in development) |
 ## Development

package/dist/cli/cli.js CHANGED Viewed

@@ -23,6 +23,7 @@ const CLI_COMMANDS = /* @__PURE__ */ new Set([
   "snapshot",
   "network",
   "actions",
+  "pages",
   "resume",
   "close",
   "--help",
@@ -42,6 +43,7 @@ Commands:
   snapshot [--objective <text> --context <text>]  Capture PNG + HTML; analyze when objective is provided (context optional)
   network [--last N] [--filter regex] [--method M] [--clear]  View captured network requests
   actions [--last N] [--filter regex] [--action TYPE] [--source SOURCE] [--clear]  View captured actions
+  pages                   List open pages in the active session
   resume                  Resume a paused workflow in the active session
   close                   Close the browser

package/dist/cli/commands/browser.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import {
   runClose as runCloseWithLogger,
   runOpen,
+  runPages,
   runSave
 } from "../core/browser.js";
 import { withSessionLogger } from "../core/context.js";
@@ -41,7 +42,9 @@ function registerBrowserCommands(yargs, logger) {
       }
       await runSave(urlOrDomain, String(argv.session), logger);
     }
-  ).command("close", "Close the browser", (cmd) => cmd, async (argv) => {
+  ).command("pages", "List open pages in the session", (cmd) => cmd, async (argv) => {
+    await runPages(String(argv.session), logger);
+  }).command("close", "Close the browser", (cmd) => cmd, async (argv) => {
     await runCloseWithLogger(String(argv.session), logger);
   });
 }

package/dist/cli/commands/execution.js CHANGED Viewed

@@ -58,15 +58,24 @@ function compileExecFunction(code, helperNames) {
   }).constructor;
   return new AsyncFunction(...helperNames, code);
 }
-async function runExec(code, session, logger, visualize = false) {
+async function runExec(code, session, logger, visualize = false, pageId) {
   readSessionStateOrThrow(session);
   logger.info("exec-start", {
     session,
     codeLength: code.length,
     codePreview: code.slice(0, 200),
-    visualize
+    visualize,
+    pageId
   });
-  const { browser, context, page } = await connect(session, logger);
+  const { browser, context, page, pageId: resolvedPageId } = await connect(
+    session,
+    logger,
+    1e4,
+    {
+      pageId,
+      requireSinglePage: true
+    }
+  );
   const STALL_THRESHOLD_MS = 6e4;
   let lastActivityTs = Date.now();
   const onActivity = () => {
@@ -94,7 +103,7 @@ async function runExec(code, session, logger, visualize = false) {
     });
   };
   process.on("SIGINT", sigintHandler);
-  wrapPageForActionLogging(page, session, onActivity);
+  wrapPageForActionLogging(page, session, resolvedPageId, onActivity);
   if (visualize) {
     await installInstrumentation(page, { visualize: true, logger });
   }
@@ -331,7 +340,7 @@ function registerExecutionCommands(yargs, logger) {
   return yargs.command(
     "exec [code..]",
     "Execute Playwright TypeScript code",
-    (cmd) => cmd.option("visualize", { type: "boolean", default: false }),
+    (cmd) => cmd.option("visualize", { type: "boolean", default: false }).option("page", { type: "string" }),
     async (argv) => {
       const codeParts = Array.isArray(argv.code) ? argv.code : argv.code ? [String(argv.code)] : [];
       const code = codeParts.join(" ");
@@ -340,7 +349,13 @@ function registerExecutionCommands(yargs, logger) {
           "Usage: libretto-cli exec <code> [--session <name>] [--visualize]"
         );
       }
-      await runExec(code, String(argv.session), logger, Boolean(argv.visualize));
+      await runExec(
+        code,
+        String(argv.session),
+        logger,
+        Boolean(argv.visualize),
+        argv.page ? String(argv.page) : void 0
+      );
     }
   ).command(
     "run [integrationFile] [integrationExport]",

package/dist/cli/commands/logs.js CHANGED Viewed

@@ -1,3 +1,5 @@
+import { listOpenPages } from "../core/browser.js";
+import { withSessionLogger } from "../core/context.js";
 import {
   clearActionLog,
   clearNetworkLog,
@@ -6,21 +8,41 @@ import {
   readActionLog,
   readNetworkLog
 } from "../core/telemetry.js";
+async function resolvePageId(session, pageId) {
+  if (!pageId) return void 0;
+  const pages = await withSessionLogger(
+    session,
+    async (logger) => listOpenPages(session, logger)
+  );
+  const foundPage = pages.find((page) => page.id === pageId);
+  if (!foundPage) {
+    throw new Error(
+      `Page "${pageId}" was not found in session "${session}". Run "libretto-cli pages --session ${session}" to list ids.`
+    );
+  }
+  return pageId;
+}
 function registerLogCommands(yargs) {
   return yargs.command(
     "network",
     "View captured network requests",
-    (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("method", { type: "string" }).option("clear", { type: "boolean", default: false }),
+    (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("method", { type: "string" }).option("page", { type: "string" }).option("clear", { type: "boolean", default: false }),
     async (argv) => {
+      const session = String(argv.session);
       if (argv.clear) {
-        clearNetworkLog(String(argv.session));
+        clearNetworkLog(session);
         console.log("Network log cleared.");
         return;
       }
-      const entries = readNetworkLog(String(argv.session), {
+      const pageId = await resolvePageId(
+        session,
+        argv.page ? String(argv.page) : void 0
+      );
+      const entries = readNetworkLog(session, {
         last: typeof argv.last === "number" ? argv.last : void 0,
         filter: argv.filter,
-        method: argv.method
+        method: argv.method,
+        pageId
       });
       if (entries.length === 0) {
         console.log("No network requests captured.");
@@ -35,18 +57,24 @@ ${entries.length} request(s) shown.`);
   ).command(
     "actions",
     "View captured actions",
-    (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("action", { type: "string" }).option("source", { type: "string" }).option("clear", { type: "boolean", default: false }),
+    (cmd) => cmd.option("last", { type: "number" }).option("filter", { type: "string" }).option("action", { type: "string" }).option("source", { type: "string" }).option("page", { type: "string" }).option("clear", { type: "boolean", default: false }),
     async (argv) => {
+      const session = String(argv.session);
       if (argv.clear) {
-        clearActionLog(String(argv.session));
+        clearActionLog(session);
         console.log("Action log cleared.");
         return;
       }
-      const entries = readActionLog(String(argv.session), {
+      const pageId = await resolvePageId(
+        session,
+        argv.page ? String(argv.page) : void 0
+      );
+      const entries = readActionLog(session, {
         last: typeof argv.last === "number" ? argv.last : void 0,
         filter: argv.filter,
         action: argv.action,
-        source: argv.source
+        source: argv.source,
+        pageId
       });
       if (entries.length === 0) {
         console.log("No actions captured.");

package/dist/cli/commands/snapshot.js CHANGED Viewed

@@ -6,12 +6,18 @@ import {
   runInterpret
 } from "../core/snapshot-analyzer.js";
 const DEFAULT_SNAPSHOT_CONTEXT = "No additional user context provided.";
-async function captureScreenshot(session, logger) {
-  logger.info("screenshot-start", { session });
-  const snapshotRunId = `snapshot-${Date.now()}`;
+function generateSnapshotRunId() {
+  return `snapshot-${Date.now()}`;
+}
+async function captureScreenshot(session, logger, pageId) {
+  logger.info("screenshot-start", { session, pageId });
+  const snapshotRunId = generateSnapshotRunId();
   const snapshotRunDir = getSessionSnapshotRunDir(session, snapshotRunId);
   mkdirSync(snapshotRunDir, { recursive: true });
-  const { browser, page } = await connect(session, logger);
+  const { browser, page } = await connect(session, logger, 1e4, {
+    pageId,
+    requireSinglePage: true
+  });
   try {
     const title = await page.title();
     const pageUrl = page.url();
@@ -50,8 +56,8 @@ async function captureScreenshot(session, logger) {
     disconnectBrowser(browser, logger, session);
   }
 }
-async function runSnapshot(session, logger, objective, context) {
-  const { pngPath, htmlPath } = await captureScreenshot(session, logger);
+async function runSnapshot(session, logger, pageId, objective, context) {
+  const { pngPath, htmlPath } = await captureScreenshot(session, logger, pageId);
   console.log("Screenshot saved:");
   console.log(`  PNG:  ${pngPath}`);
   console.log(`  HTML: ${htmlPath}`);
@@ -83,11 +89,12 @@ function registerSnapshotCommands(yargs, logger) {
   return yargs.command(
     "snapshot",
     "Capture PNG + HTML; analyze when --objective is provided (--context optional)",
-    (cmd) => cmd.option("objective", { type: "string" }).option("context", { type: "string" }),
+    (cmd) => cmd.option("page", { type: "string" }).option("objective", { type: "string" }).option("context", { type: "string" }),
     async (argv) => {
       await runSnapshot(
         String(argv.session),
         logger,
+        argv.page ? String(argv.page) : void 0,
         argv.objective,
         argv.context
       );