libretto 0.5.1 → 0.5.3-experimental.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +10 -5
  2. package/dist/cli/commands/execution.js +38 -12
  3. package/dist/cli/commands/init.js +4 -21
  4. package/dist/cli/core/ai-config.js +12 -2
  5. package/dist/cli/core/browser.js +75 -8
  6. package/dist/cli/core/session-telemetry.js +429 -172
  7. package/dist/cli/core/telemetry.js +10 -2
  8. package/dist/cli/framework/simple-cli.js +4 -0
  9. package/dist/cli/workers/run-integration-runtime.js +18 -41
  10. package/dist/cli/workers/run-integration-worker-protocol.js +2 -1
  11. package/dist/index.d.ts +1 -1
  12. package/dist/index.js +6 -0
  13. package/dist/shared/condense-dom/condense-dom.js +11 -56
  14. package/dist/shared/dom-semantics.d.ts +8 -0
  15. package/dist/shared/dom-semantics.js +69 -0
  16. package/dist/shared/run/browser.js +40 -1
  17. package/dist/shared/visualization/ghost-cursor.js +17 -4
  18. package/dist/shared/workflow/workflow.d.ts +14 -3
  19. package/dist/shared/workflow/workflow.js +50 -3
  20. package/package.json +7 -4
  21. package/scripts/check-skills-sync.mjs +1 -1
  22. package/scripts/generate-changelog.ts +132 -0
  23. package/scripts/skills-libretto.mjs +1 -1
  24. package/scripts/sync-skills.mjs +1 -1
  25. package/skills/libretto/SKILL.md +54 -38
  26. package/skills/libretto/references/action-logs.md +101 -0
  27. package/skills/libretto/references/auth-profiles.md +1 -2
  28. package/skills/libretto/references/code-generation-rules.md +10 -6
  29. package/skills/libretto/references/pages-and-page-targeting.md +1 -1
  30. package/src/cli/commands/execution.ts +39 -11
  31. package/src/cli/commands/init.ts +5 -24
  32. package/src/cli/core/ai-config.ts +12 -1
  33. package/src/cli/core/browser.ts +82 -8
  34. package/src/cli/core/session-telemetry.ts +431 -190
  35. package/src/cli/core/telemetry.ts +23 -1
  36. package/src/cli/framework/simple-cli.ts +5 -0
  37. package/src/cli/workers/run-integration-runtime.ts +24 -52
  38. package/src/cli/workers/run-integration-worker-protocol.ts +2 -1
  39. package/src/index.ts +4 -0
  40. package/src/shared/condense-dom/condense-dom.ts +12 -64
  41. package/src/shared/dom-semantics.ts +68 -0
  42. package/src/shared/run/browser.ts +53 -0
  43. package/src/shared/visualization/ghost-cursor.ts +22 -4
  44. package/src/shared/workflow/workflow.ts +88 -2
  45. package/scripts/prepare-release.sh +0 -97
package/README.md CHANGED
@@ -13,15 +13,17 @@ Libretto is a toolkit for building robust web integrations. It gives your coding
13
13
 
14
14
  We at [Saffron Health](https://saffron.health) built Libretto to help us maintain our browser integrations to common healthcare software. We're open-sourcing it so other teams have an easier time doing the same thing.
15
15
 
16
+ https://github.com/user-attachments/assets/9b9a0ab3-5133-4b20-b3be-459943349d18
17
+
16
18
  ## Installation
17
19
 
18
20
  ```bash
19
- npm install --save-dev libretto
21
+ npm install libretto
20
22
 
21
23
  # Install skill, download Chromium if not already installed, configure snapshot analysis
22
24
  npx libretto init
23
25
 
24
- # Configure snapshot analysis model (see Configuration section below)
26
+ # Configure or change the snapshot analysis model (see Configuration section below). `npx libretto init` sets this up the first time.
25
27
  npx libretto ai configure <openai | anthropic | gemini | vertex>
26
28
  ```
27
29
 
@@ -58,11 +60,12 @@ Agents can use Libretto to reproduce the failure, pause the workflow at any poin
58
60
  You can also use Libretto directly from the command line. All commands accept `--session <name>` to target a specific session.
59
61
 
60
62
  ```bash
61
- npx libretto init # initialize libretto in the current project
63
+ npx libretto init # interactive; run yourself, not through an agent
62
64
  npx libretto open <url> # launch browser and open a URL (headed by default)
63
65
  npx libretto snapshot --objective "..." --context "..." # capture PNG + HTML and analyze with an LLM
64
- npx libretto exec "<code>" # execute Playwright TypeScript against the open page
65
- npx libretto run <file> <export> # run an exported workflow from a file
66
+ npx libretto exec "<code>" # execute Playwright TypeScript against the open page (single quoted argument)
67
+ echo "<code>" | npx libretto exec - # intentionally read Playwright TypeScript from stdin
68
+ npx libretto run <file> <workflowName> # run an exported workflow from a file
66
69
  npx libretto resume # resume a paused workflow
67
70
  npx libretto network # view captured network requests
68
71
  npx libretto actions # view captured user/agent actions
@@ -134,6 +137,8 @@ Maintained by the team at [Saffron Health](https://saffron.health).
134
137
 
135
138
  ## Development
136
139
 
140
+ For local development in this repository:
141
+
137
142
  ```bash
138
143
  pnpm i
139
144
  pnpm build
@@ -445,9 +445,10 @@ async function runIntegrationFromFile(args, logger) {
445
445
  );
446
446
  const payload = JSON.stringify({
447
447
  integrationPath: args.integrationPath,
448
- exportName: args.exportName,
448
+ workflowName: args.workflowName,
449
449
  session: args.session,
450
450
  params: args.params,
451
+ credentials: args.credentials,
451
452
  headless: args.headless,
452
453
  visualize: args.visualize,
453
454
  authProfileDomain: args.authProfileDomain,
@@ -496,11 +497,19 @@ Browser is still open. You can use \`exec\` to inspect it. Call \`run\` to re-ru
496
497
  setSessionStatus(args.session, "completed", logger);
497
498
  console.log("Integration completed.");
498
499
  }
500
+ function readStdinSync() {
501
+ if (process.stdin.isTTY === true) return null;
502
+ try {
503
+ const content = readFileSync(0, "utf8");
504
+ return content.trim().length > 0 ? content : null;
505
+ } catch {
506
+ return null;
507
+ }
508
+ }
499
509
  const execInput = SimpleCLI.input({
500
510
  positionals: [
501
- SimpleCLI.positional("codeParts", z.array(z.string()).default([]), {
502
- help: "Playwright TypeScript code to execute",
503
- variadic: true
511
+ SimpleCLI.positional("code", z.string().optional(), {
512
+ help: "Playwright TypeScript code to execute"
504
513
  })
505
514
  ],
506
515
  named: {
@@ -511,28 +520,36 @@ const execInput = SimpleCLI.input({
511
520
  page: pageOption()
512
521
  }
513
522
  }).refine(
514
- (input) => input.codeParts.length > 0,
515
- `Usage: libretto exec <code> [--session <name>] [--visualize]`
523
+ (input) => input.code !== void 0,
524
+ `Usage: libretto exec <code|-> [--session <name>] [--visualize]
525
+ echo '<code>' | libretto exec - [--session <name>] [--visualize]`
516
526
  );
517
527
  const execCommand = SimpleCLI.command({
518
528
  description: "Execute Playwright TypeScript code"
519
529
  }).input(execInput).use(withRequiredSession()).handle(async ({ input, ctx }) => {
530
+ const code = input.code;
531
+ const codeFromArgsOrStdin = code === "-" ? readStdinSync() : code;
532
+ if (codeFromArgsOrStdin === null) {
533
+ throw new Error(
534
+ "Missing stdin input for `exec -`. Pipe Playwright code into stdin."
535
+ );
536
+ }
520
537
  await runExec(
521
- input.codeParts.join(" "),
538
+ codeFromArgsOrStdin,
522
539
  ctx.session,
523
540
  ctx.logger,
524
541
  input.visualize,
525
542
  input.page
526
543
  );
527
544
  });
528
- const runUsage = `Usage: libretto run <integrationFile> <integrationExport> [--params <json> | --params-file <path>] [--tsconfig <path>] [--headed|--headless] [--no-visualize] [--viewport WxH]`;
545
+ const runUsage = `Usage: libretto run <integrationFile> <workflowName> [--params <json> | --params-file <path>] [--credentials <json>] [--tsconfig <path>] [--headed|--headless] [--no-visualize] [--viewport WxH]`;
529
546
  const runInput = SimpleCLI.input({
530
547
  positionals: [
531
548
  SimpleCLI.positional("integrationFile", z.string().optional(), {
532
549
  help: "Path to the integration file"
533
550
  }),
534
- SimpleCLI.positional("integrationExport", z.string().optional(), {
535
- help: "Named workflow export to run"
551
+ SimpleCLI.positional("workflowName", z.string().optional(), {
552
+ help: "Workflow name to run (from workflow(name, handler))"
536
553
  })
537
554
  ],
538
555
  named: {
@@ -544,6 +561,9 @@ const runInput = SimpleCLI.input({
544
561
  name: "params-file",
545
562
  help: "Path to a JSON params file"
546
563
  }),
564
+ credentials: SimpleCLI.option(z.string().optional(), {
565
+ help: "Inline JSON credentials passed to ctx.credentials"
566
+ }),
547
567
  tsconfig: SimpleCLI.option(z.string().optional(), {
548
568
  help: "Path to a tsconfig used for workflow module resolution"
549
569
  }),
@@ -562,7 +582,7 @@ const runInput = SimpleCLI.input({
562
582
  })
563
583
  }
564
584
  }).refine(
565
- (input) => Boolean(input.integrationFile && input.integrationExport),
585
+ (input) => Boolean(input.integrationFile && input.workflowName),
566
586
  runUsage
567
587
  ).refine(
568
588
  (input) => !(input.params && input.paramsFile),
@@ -594,6 +614,11 @@ const runCommand = SimpleCLI.command({
594
614
  await stopExistingFailedRunSession(ctx.session, ctx.logger);
595
615
  assertSessionAvailableForStart(ctx.session, ctx.logger);
596
616
  const params = resolveRunParams(input.params, input.paramsFile);
617
+ const rawCredentials = input.credentials ? parseJsonArg("--credentials", input.credentials) : void 0;
618
+ if (rawCredentials !== void 0 && (typeof rawCredentials !== "object" || rawCredentials === null || Array.isArray(rawCredentials))) {
619
+ throw new Error(`--credentials must be a JSON object (e.g., '{"key": "value"}').`);
620
+ }
621
+ const credentials = rawCredentials;
597
622
  const headlessMode = input.headed ? false : input.headless ? true : void 0;
598
623
  const visualize = !input.noVisualize;
599
624
  const viewport = resolveViewport(
@@ -603,9 +628,10 @@ const runCommand = SimpleCLI.command({
603
628
  await runIntegrationFromFile(
604
629
  {
605
630
  integrationPath: input.integrationFile,
606
- exportName: input.integrationExport,
631
+ workflowName: input.workflowName,
607
632
  session: ctx.session,
608
633
  params,
634
+ credentials,
609
635
  tsconfigPath: input.tsconfig,
610
636
  headless: headlessMode ?? false,
611
637
  visualize,
@@ -17,8 +17,8 @@ import {
17
17
  loadSnapshotEnv,
18
18
  resolveSnapshotApiModel
19
19
  } from "../core/snapshot-api-config.js";
20
- import { SimpleCLI } from "../framework/simple-cli.js";
21
20
  import { hasProviderCredentials } from "../../shared/llm/client.js";
21
+ import { SimpleCLI } from "../framework/simple-cli.js";
22
22
  const PROVIDER_CHOICES = [
23
23
  {
24
24
  key: "1",
@@ -52,15 +52,6 @@ function promptUser(rl, question) {
52
52
  });
53
53
  });
54
54
  }
55
- function askYesNo(question) {
56
- const rl = createInterface({ input: process.stdin, output: process.stdout });
57
- return new Promise((resolve) => {
58
- rl.question(`${question} (y/N) `, (answer) => {
59
- rl.close();
60
- resolve(answer.trim().toLowerCase() === "y");
61
- });
62
- });
63
- }
64
55
  function safeReadAiConfig() {
65
56
  try {
66
57
  return readAiConfig();
@@ -233,7 +224,7 @@ function detectAgentDirs(root) {
233
224
  if (existsSync(join(root, ".claude"))) dirs.push(join(root, ".claude"));
234
225
  return dirs;
235
226
  }
236
- async function copySkills() {
227
+ function copySkills() {
237
228
  const agentDirs = detectAgentDirs(REPO_ROOT);
238
229
  if (agentDirs.length === 0) {
239
230
  console.log(
@@ -242,15 +233,6 @@ async function copySkills() {
242
233
  return;
243
234
  }
244
235
  const destinations = agentDirs.map((d) => join(d, "skills", "libretto"));
245
- const dirNames = agentDirs.map((d) => basename(d)).join(" and ");
246
- const existing = destinations.filter((d) => existsSync(d));
247
- const verb = existing.length > 0 ? "Overwrite" : "Install";
248
- const proceed = await askYesNo(`
249
- ${verb} libretto skills in ${dirNames}?`);
250
- if (!proceed) {
251
- console.log(" Skipping skill copy.");
252
- return;
253
- }
254
236
  let sourceDir;
255
237
  try {
256
238
  sourceDir = getPackageSkillsDir();
@@ -289,10 +271,11 @@ const initCommand = SimpleCLI.command({
289
271
  } else {
290
272
  console.log("\nSkipping browser installation (--skip-browsers)");
291
273
  }
274
+ copySkills();
292
275
  if (process.stdin.isTTY) {
293
- await copySkills();
294
276
  await runInteractiveApiSetup();
295
277
  } else {
278
+ loadSnapshotEnv();
296
279
  printSnapshotApiStatus();
297
280
  }
298
281
  console.log("\n\u2713 libretto init complete");
@@ -11,10 +11,15 @@ const ViewportConfigSchema = z.object({
11
11
  width: z.number().int().min(1),
12
12
  height: z.number().int().min(1)
13
13
  });
14
+ const WindowPositionConfigSchema = z.object({
15
+ x: z.number().int(),
16
+ y: z.number().int()
17
+ });
14
18
  const LibrettoConfigSchema = z.object({
15
19
  version: z.literal(CURRENT_CONFIG_VERSION),
16
20
  ai: AiConfigSchema.optional(),
17
- viewport: ViewportConfigSchema.optional()
21
+ viewport: ViewportConfigSchema.optional(),
22
+ windowPosition: WindowPositionConfigSchema.optional()
18
23
  }).passthrough();
19
24
  const DEFAULT_MODELS = {
20
25
  openai: "openai/gpt-5.4",
@@ -49,6 +54,10 @@ function formatExpectedConfigExample() {
49
54
  viewport: {
50
55
  width: 1280,
51
56
  height: 800
57
+ },
58
+ windowPosition: {
59
+ x: 1600,
60
+ y: 120
52
61
  }
53
62
  },
54
63
  null,
@@ -64,7 +73,7 @@ ${detail}` : null,
64
73
  "Expected config example:",
65
74
  formatExpectedConfigExample(),
66
75
  "Notes:",
67
- ' - "ai" and "viewport" are optional.',
76
+ ' - "ai", "viewport", and "windowPosition" are optional.',
68
77
  ' - "ai.model" must be a provider/model string like "openai/gpt-5.4" or "anthropic/claude-sonnet-4-6".',
69
78
  "Fix the file to match this shape, or delete it and rerun:",
70
79
  ` npx libretto ai configure ${formatConfigureProviders()}`
@@ -189,6 +198,7 @@ export {
189
198
  CURRENT_CONFIG_VERSION,
190
199
  LibrettoConfigSchema,
191
200
  ViewportConfigSchema,
201
+ WindowPositionConfigSchema,
192
202
  clearAiConfig,
193
203
  readAiConfig,
194
204
  readLibrettoConfig,
@@ -6,6 +6,14 @@ import { dirname, join, resolve } from "node:path";
6
6
  import { fileURLToPath } from "node:url";
7
7
  import { createServer } from "node:net";
8
8
  import { spawn } from "node:child_process";
9
+ import {
10
+ filterSemanticClasses,
11
+ INTERACTIVE_ROLE_NAMES,
12
+ INTERACTIVE_TAG_NAMES,
13
+ isObfuscatedClass,
14
+ TEST_ATTRIBUTE_NAMES,
15
+ TRUSTED_ATTRIBUTE_NAMES
16
+ } from "../../shared/dom-semantics.js";
9
17
  import {
10
18
  getSessionActionsLogPath,
11
19
  getSessionNetworkLogPath,
@@ -245,10 +253,22 @@ function resolveViewport(cliViewport, logger) {
245
253
  });
246
254
  return DEFAULT_VIEWPORT;
247
255
  }
256
+ function resolveWindowPosition(logger) {
257
+ const config = readLibrettoConfig();
258
+ if (config.windowPosition) {
259
+ logger.info("window-position-source", {
260
+ source: "config",
261
+ windowPosition: config.windowPosition
262
+ });
263
+ return config.windowPosition;
264
+ }
265
+ return void 0;
266
+ }
248
267
  async function runOpen(rawUrl, headed, session, logger, options) {
249
268
  const url = normalizeUrl(rawUrl);
250
269
  const viewport = resolveViewport(options?.viewport, logger);
251
- logger.info("open-start", { url, headed, session, viewport });
270
+ const windowPosition = headed ? resolveWindowPosition(logger) : void 0;
271
+ logger.info("open-start", { url, headed, session, viewport, windowPosition });
252
272
  assertSessionAvailableForStart(session, logger);
253
273
  const port = await pickFreePort();
254
274
  const runLogPath = logFileForSession(session);
@@ -277,6 +297,45 @@ async function runOpen(rawUrl, headed, session, logger, options) {
277
297
  const escapedLogPath = runLogPath.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
278
298
  const escapedNetworkLogPath = networkLogPath.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
279
299
  const escapedActionsLogPath = actionsLogPath.replace(/\\/g, "\\\\").replace(/'/g, "\\'");
300
+ const windowPositionArg = windowPosition ? `, '--window-position=${windowPosition.x},${windowPosition.y}'` : "";
301
+ const windowBoundsSetupCode = windowPosition ? `
302
+ const requestedWindowBounds = { left: ${windowPosition.x}, top: ${windowPosition.y}, windowState: 'normal' };
303
+ const pageCdp = await context.newCDPSession(page);
304
+ let browserCdp;
305
+ try {
306
+ const targetInfo = await pageCdp.send('Target.getTargetInfo');
307
+ const targetId = targetInfo?.targetInfo?.targetId;
308
+ browserCdp = await browser.newBrowserCDPSession();
309
+ const windowResult = await browserCdp.send(
310
+ 'Browser.getWindowForTarget',
311
+ targetId ? { targetId } : {},
312
+ );
313
+ await browserCdp.send('Browser.setWindowBounds', {
314
+ windowId: windowResult.windowId,
315
+ bounds: requestedWindowBounds,
316
+ });
317
+ await new Promise((resolve) => setTimeout(resolve, 250));
318
+ const actualWindow = await browserCdp.send('Browser.getWindowBounds', {
319
+ windowId: windowResult.windowId,
320
+ });
321
+ childLog('info', 'window-bounds-set', {
322
+ windowId: windowResult.windowId,
323
+ requestedBounds: requestedWindowBounds,
324
+ actualBounds: actualWindow.bounds,
325
+ });
326
+ } catch (error) {
327
+ childLog('warn', 'window-bounds-set-failed', {
328
+ requestedBounds: requestedWindowBounds,
329
+ message: error instanceof Error ? error.message : String(error),
330
+ stack: error instanceof Error ? error.stack : undefined,
331
+ });
332
+ } finally {
333
+ await pageCdp.detach().catch(() => {});
334
+ if (browserCdp) {
335
+ await browserCdp.detach().catch(() => {});
336
+ }
337
+ }
338
+ ` : "";
280
339
  const launcherCode = `
281
340
  import { chromium } from 'playwright';
282
341
  import { appendFileSync, mkdirSync } from 'node:fs';
@@ -288,14 +347,21 @@ const ACTIONS_LOG = '${escapedActionsLogPath}';
288
347
  mkdirSync(dirname(NETWORK_LOG), { recursive: true });
289
348
 
290
349
  // tsx/esbuild may emit __name() wrappers in Function#toString output.
291
- const __name = (target, value) =>
292
- Object.defineProperty(target, 'name', { value, configurable: true });
350
+ const __name = (target, value) =>
351
+ Object.defineProperty(target, 'name', { value, configurable: true });
293
352
 
294
- ${installSessionTelemetry.toString()}
353
+ const TEST_ATTRIBUTE_NAMES = ${JSON.stringify([...TEST_ATTRIBUTE_NAMES])};
354
+ const TRUSTED_ATTRIBUTE_NAMES = ${JSON.stringify([...TRUSTED_ATTRIBUTE_NAMES])};
355
+ const INTERACTIVE_TAG_NAMES = ${JSON.stringify([...INTERACTIVE_TAG_NAMES])};
356
+ const INTERACTIVE_ROLE_NAMES = ${JSON.stringify([...INTERACTIVE_ROLE_NAMES])};
357
+ const filterSemanticClasses = ${filterSemanticClasses.toString()};
358
+ const isObfuscatedClass = ${isObfuscatedClass.toString()};
295
359
 
296
- function logAction(entry) {
297
- appendFileSync(ACTIONS_LOG, JSON.stringify(entry) + '\\n');
298
- }
360
+ ${installSessionTelemetry.toString()}
361
+
362
+ function logAction(entry) {
363
+ appendFileSync(ACTIONS_LOG, JSON.stringify(entry) + '\\n');
364
+ }
299
365
 
300
366
  function logNetwork(entry) {
301
367
  appendFileSync(NETWORK_LOG, JSON.stringify(entry) + '\\n');
@@ -317,7 +383,7 @@ function childLog(level, event, data = {}) {
317
383
 
318
384
  const browser = await chromium.launch({
319
385
  headless: ${!headed},
320
- args: ['--disable-blink-features=AutomationControlled', '--remote-debugging-port=${port}', '--remote-debugging-address=127.0.0.1', '--no-focus-on-check'],
386
+ args: ['--disable-blink-features=AutomationControlled', '--remote-debugging-port=${port}', '--remote-debugging-address=127.0.0.1', '--no-focus-on-check'${windowPositionArg}],
321
387
  });
322
388
 
323
389
  browser.on('disconnected', () => {
@@ -331,6 +397,7 @@ const context = await browser.newContext({
331
397
  });
332
398
 
333
399
  const page = await context.newPage();
400
+ ${windowBoundsSetupCode}
334
401
  page.setDefaultTimeout(30000);
335
402
  page.setDefaultNavigationTimeout(45000);
336
403