libretto 0.6.21 → 0.6.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +5 -1
  2. package/README.template.md +5 -1
  3. package/dist/cli/commands/execution.js +8 -1
  4. package/dist/cli/core/browser.js +8 -3
  5. package/dist/cli/core/daemon/daemon.js +8 -6
  6. package/dist/cli/core/providers/kernel.js +107 -29
  7. package/dist/cli/core/providers/steel.js +10 -1
  8. package/dist/index.d.ts +3 -2
  9. package/dist/index.js +15 -1
  10. package/dist/runtime/recovery/agent.d.ts +50 -2
  11. package/dist/runtime/recovery/agent.js +159 -45
  12. package/dist/runtime/recovery/index.d.ts +2 -1
  13. package/dist/runtime/recovery/index.js +16 -2
  14. package/dist/runtime/recovery/page-fallbacks.d.ts +45 -0
  15. package/dist/runtime/recovery/page-fallbacks.js +342 -0
  16. package/dist/shared/state/index.d.ts +1 -1
  17. package/dist/shared/state/session-state.d.ts +4 -1
  18. package/dist/shared/state/session-state.js +2 -1
  19. package/dist/shared/workflow/workflow.d.ts +19 -6
  20. package/dist/shared/workflow/workflow.js +38 -9
  21. package/docs/reference/runtime/page-fallbacks.mdx +85 -0
  22. package/docs/understand-libretto/error-handling-and-recovery.mdx +45 -0
  23. package/package.json +1 -1
  24. package/skills/libretto/SKILL.md +8 -2
  25. package/skills/libretto/references/code-generation-rules.md +23 -6
  26. package/skills/libretto-readonly/SKILL.md +1 -1
  27. package/src/cli/commands/execution.ts +8 -1
  28. package/src/cli/core/browser.ts +7 -2
  29. package/src/cli/core/daemon/daemon.ts +9 -4
  30. package/src/cli/core/daemon/ipc.ts +1 -0
  31. package/src/cli/core/providers/kernel.ts +153 -29
  32. package/src/cli/core/providers/steel.ts +11 -1
  33. package/src/cli/core/providers/types.ts +3 -0
  34. package/src/index.ts +22 -2
  35. package/src/runtime/recovery/agent.ts +227 -50
  36. package/src/runtime/recovery/index.ts +21 -1
  37. package/src/runtime/recovery/page-fallbacks.ts +476 -0
  38. package/src/shared/state/index.ts +1 -0
  39. package/src/shared/state/session-state.ts +2 -0
  40. package/src/shared/workflow/workflow.ts +90 -20
package/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  <!-- Generated from packages/libretto/README.template.md by `pnpm sync:mirrors`. Do not edit directly. -->
2
2
 
3
+ <p align="center">
4
+ <img src="assets/readme-banner.png" alt="Libretto" />
5
+ </p>
6
+
3
7
  # Libretto
4
8
 
5
9
  [![npm version](https://img.shields.io/npm/v/libretto)](https://www.npmjs.com/package/libretto)
@@ -87,7 +91,7 @@ Run `npx libretto help` for the full list of commands.
87
91
 
88
92
  ## Configuration
89
93
 
90
- All Libretto state lives in a `.libretto/` directory at your project root. See the [configuration docs](https://libretto.sh/docs/configuration) for details on config files, sessions, and profiles.
94
+ All Libretto state lives in a `.libretto/` directory at your project root. See the [configuration docs](https://libretto.sh/docs/understand-libretto/configuration) for details on config files, sessions, and profiles.
91
95
 
92
96
  ## Join the Community
93
97
 
@@ -1,3 +1,7 @@
1
+ <p align="center">
2
+ <img src="{{LIBRETTO_PATH_PREFIX}}assets/readme-banner.png" alt="Libretto" />
3
+ </p>
4
+
1
5
  # Libretto
2
6
 
3
7
  [![npm version](https://img.shields.io/npm/v/libretto)](https://www.npmjs.com/package/libretto)
@@ -85,7 +89,7 @@ Run `npx libretto help` for the full list of commands.
85
89
 
86
90
  ## Configuration
87
91
 
88
- All Libretto state lives in a `.libretto/` directory at your project root. See the [configuration docs](https://libretto.sh/docs/configuration) for details on config files, sessions, and profiles.
92
+ All Libretto state lives in a `.libretto/` directory at your project root. See the [configuration docs](https://libretto.sh/docs/understand-libretto/configuration) for details on config files, sessions, and profiles.
89
93
 
90
94
  ## Join the Community
91
95
 
@@ -476,13 +476,20 @@ async function runIntegrationFromFile(args, logger) {
476
476
  viewport: args.viewport,
477
477
  stayOpenOnSuccess: args.stayOpenOnSuccess,
478
478
  daemonSocketPath,
479
- provider: provider ? { name: provider.name, sessionId: provider.sessionId } : void 0
479
+ provider: provider ? {
480
+ name: provider.name,
481
+ sessionId: provider.sessionId,
482
+ recordingUrl: provider.recordingUrl
483
+ } : void 0
480
484
  },
481
485
  logger
482
486
  );
483
487
  if (provider?.liveViewUrl) {
484
488
  console.log(`View live session: ${provider.liveViewUrl}`);
485
489
  }
490
+ if (provider?.recordingUrl) {
491
+ console.log(`View recording: ${provider.recordingUrl}`);
492
+ }
486
493
  let outcome;
487
494
  try {
488
495
  outcome = await waitForWorkflowOutcome(pid, workflowOutcome.promise);
@@ -414,11 +414,15 @@ async function runOpenWithProvider(rawUrl, providerName, session, logger, access
414
414
  provider: providerName,
415
415
  sessionId: providerSession.sessionId,
416
416
  cdpEndpoint: providerSession.cdpEndpoint,
417
- liveViewUrl: providerSession.liveViewUrl
417
+ liveViewUrl: providerSession.liveViewUrl,
418
+ recordingUrl: providerSession.recordingUrl
418
419
  });
419
420
  if (providerSession.liveViewUrl) {
420
421
  console.log(`View live session: ${providerSession.liveViewUrl}`);
421
422
  }
423
+ if (providerSession.recordingUrl) {
424
+ console.log(`View recording: ${providerSession.recordingUrl}`);
425
+ }
422
426
  writeSessionState(
423
427
  {
424
428
  port: 0,
@@ -431,7 +435,8 @@ async function runOpenWithProvider(rawUrl, providerName, session, logger, access
431
435
  daemonSocketPath,
432
436
  provider: {
433
437
  name: providerName,
434
- sessionId: providerSession.sessionId
438
+ sessionId: providerSession.sessionId,
439
+ recordingUrl: providerSession.recordingUrl
435
440
  }
436
441
  },
437
442
  logger
@@ -657,7 +662,7 @@ async function closeProviderSessionDirectly(session, providerState, logger) {
657
662
  sessionId: providerState.sessionId,
658
663
  replayUrl: result.replayUrl
659
664
  });
660
- return result.replayUrl;
665
+ return result.replayUrl ?? providerState.recordingUrl;
661
666
  } catch (error) {
662
667
  logger.warn("close-provider-direct-fallback-failed", {
663
668
  session,
@@ -352,12 +352,14 @@ class BrowserDaemon {
352
352
  name: config.providerName,
353
353
  sessionId: providerSession.sessionId,
354
354
  cdpEndpoint: providerSession.cdpEndpoint,
355
- liveViewUrl: providerSession.liveViewUrl
355
+ liveViewUrl: providerSession.liveViewUrl,
356
+ recordingUrl: providerSession.recordingUrl
356
357
  },
357
358
  providerSession: {
358
359
  provider,
359
360
  name: config.providerName,
360
- sessionId: providerSession.sessionId
361
+ sessionId: providerSession.sessionId,
362
+ recordingUrl: providerSession.recordingUrl
361
363
  },
362
364
  beforeReady: startupCleanup.dispose
363
365
  });
@@ -407,13 +409,13 @@ class BrowserDaemon {
407
409
  const result = await this.providerSession.provider.closeSession(
408
410
  this.providerSession.sessionId
409
411
  );
410
- replayUrl = result.replayUrl;
411
- if (result.replayUrl) {
412
+ replayUrl = result.replayUrl ?? this.providerSession.recordingUrl;
413
+ if (replayUrl) {
412
414
  this.logger.info("provider-recording", {
413
415
  session: this.session,
414
416
  provider: this.providerSession.name,
415
417
  sessionId: this.providerSession.sessionId,
416
- replayUrl: result.replayUrl
418
+ replayUrl
417
419
  });
418
420
  }
419
421
  writeFileSync(
@@ -422,7 +424,7 @@ class BrowserDaemon {
422
424
  {
423
425
  provider: this.providerSession.name,
424
426
  sessionId: this.providerSession.sessionId,
425
- replayUrl: result.replayUrl
427
+ replayUrl
426
428
  },
427
429
  null,
428
430
  2
@@ -1,44 +1,122 @@
1
- const KERNEL_API_ENDPOINT = "https://api.onkernel.com";
2
- function createKernelProvider() {
3
- const apiKey = process.env.KERNEL_API_KEY;
1
+ function readBooleanEnv(name, defaultValue) {
2
+ const value = process.env[name]?.trim().toLowerCase();
3
+ if (!value) return defaultValue;
4
+ return value === "1" || value === "true" || value === "yes";
5
+ }
6
+ function readTimeoutSeconds(options) {
7
+ if (options.timeoutSeconds !== void 0) return options.timeoutSeconds;
8
+ return Number(process.env.KERNEL_TIMEOUT_SECONDS ?? 300);
9
+ }
10
+ async function kernelFetchJson(endpoint, apiKey, path, init) {
11
+ const resp = await fetch(`${endpoint}${path}`, {
12
+ ...init,
13
+ headers: {
14
+ Authorization: `Bearer ${apiKey}`,
15
+ "Content-Type": "application/json",
16
+ ...init.headers
17
+ }
18
+ });
19
+ if (!resp.ok) {
20
+ const body = await resp.text();
21
+ throw new Error(`Kernel API error (${resp.status}): ${body}`);
22
+ }
23
+ return await resp.json();
24
+ }
25
+ async function kernelFetchNoBody(endpoint, apiKey, path, init) {
26
+ const resp = await fetch(`${endpoint}${path}`, {
27
+ ...init,
28
+ headers: {
29
+ Authorization: `Bearer ${apiKey}`,
30
+ ...init.headers
31
+ }
32
+ });
33
+ if (!resp.ok) {
34
+ const body = await resp.text();
35
+ throw new Error(`Kernel API error (${resp.status}): ${body}`);
36
+ }
37
+ }
38
+ function readEndpoint() {
39
+ return process.env.KERNEL_API_ENDPOINT?.trim() || process.env.KERNEL_ENDPOINT?.trim() || "https://api.onkernel.com";
40
+ }
41
+ function createKernelProvider(options = {}) {
42
+ const apiKey = options.apiKey ?? process.env.KERNEL_API_KEY;
4
43
  if (!apiKey)
5
44
  throw new Error("KERNEL_API_KEY is required for Kernel provider.");
45
+ const endpoint = readEndpoint();
46
+ const headless = options.headless ?? process.env.KERNEL_HEADLESS !== "false";
47
+ const stealth = options.stealth ?? readBooleanEnv("KERNEL_STEALTH", false);
48
+ const timeoutSeconds = readTimeoutSeconds(options);
49
+ const enableRecording = options.enableRecording ?? readBooleanEnv("KERNEL_ENABLE_RECORDING", false);
50
+ const replays = /* @__PURE__ */ new Map();
6
51
  return {
7
52
  async createSession() {
8
- const resp = await fetch(`${KERNEL_API_ENDPOINT}/browsers`, {
9
- method: "POST",
10
- headers: {
11
- Authorization: `Bearer ${apiKey}`,
12
- "Content-Type": "application/json"
13
- },
14
- body: JSON.stringify({
15
- headless: process.env.KERNEL_HEADLESS !== "false",
16
- stealth: process.env.KERNEL_STEALTH === "true",
17
- timeout_seconds: Number(process.env.KERNEL_TIMEOUT_SECONDS ?? 300)
18
- })
19
- });
20
- if (!resp.ok) {
21
- const body = await resp.text();
22
- throw new Error(`Kernel API error (${resp.status}): ${body}`);
53
+ const json = await kernelFetchJson(
54
+ endpoint,
55
+ apiKey,
56
+ "/browsers",
57
+ {
58
+ method: "POST",
59
+ body: JSON.stringify({
60
+ headless,
61
+ stealth,
62
+ timeout_seconds: timeoutSeconds
63
+ })
64
+ }
65
+ );
66
+ let replay;
67
+ if (enableRecording) {
68
+ try {
69
+ replay = await kernelFetchJson(
70
+ endpoint,
71
+ apiKey,
72
+ `/browsers/${json.session_id}/replays`,
73
+ { method: "POST", body: JSON.stringify({}) }
74
+ );
75
+ replays.set(json.session_id, {
76
+ replayId: replay.replay_id,
77
+ replayViewUrl: replay.replay_view_url ?? void 0
78
+ });
79
+ } catch (error) {
80
+ await kernelFetchNoBody(
81
+ endpoint,
82
+ apiKey,
83
+ `/browsers/${json.session_id}`,
84
+ { method: "DELETE" }
85
+ ).catch(() => {
86
+ });
87
+ throw error;
88
+ }
23
89
  }
24
- const json = await resp.json();
25
90
  return {
26
91
  sessionId: json.session_id,
27
- cdpEndpoint: json.cdp_ws_url
92
+ cdpEndpoint: json.cdp_ws_url,
93
+ liveViewUrl: json.browser_live_view_url ?? void 0,
94
+ recordingUrl: replay?.replay_view_url ?? void 0
28
95
  };
29
96
  },
30
97
  async closeSession(sessionId) {
31
- const resp = await fetch(`${KERNEL_API_ENDPOINT}/browsers/${sessionId}`, {
32
- method: "DELETE",
33
- headers: { Authorization: `Bearer ${apiKey}` }
98
+ const replay = replays.get(sessionId);
99
+ let replayStopError;
100
+ if (replay) {
101
+ try {
102
+ await kernelFetchNoBody(
103
+ endpoint,
104
+ apiKey,
105
+ `/browsers/${sessionId}/replays/${replay.replayId}/stop`,
106
+ { method: "POST" }
107
+ );
108
+ } catch (error) {
109
+ replayStopError = error;
110
+ }
111
+ }
112
+ await kernelFetchNoBody(endpoint, apiKey, `/browsers/${sessionId}`, {
113
+ method: "DELETE"
34
114
  });
35
- if (!resp.ok) {
36
- const body = await resp.text();
37
- throw new Error(
38
- `Kernel API error closing session ${sessionId} (${resp.status}): ${body}`
39
- );
115
+ replays.delete(sessionId);
116
+ if (replayStopError) {
117
+ throw replayStopError;
40
118
  }
41
- return {};
119
+ return { replayUrl: replay?.replayViewUrl };
42
120
  }
43
121
  };
44
122
  }
@@ -1,5 +1,14 @@
1
1
  const DEFAULT_STEEL_API_ENDPOINT = "https://api.steel.dev";
2
2
  const DEFAULT_STEEL_CONNECT_ENDPOINT = "wss://connect.steel.dev";
3
+ const STEEL_STEALTH_SESSION_OPTIONS = {
4
+ solveCaptcha: true,
5
+ useProxy: true,
6
+ stealthConfig: {
7
+ humanizeInteractions: true,
8
+ autoCaptchaSolving: true,
9
+ skipFingerprintInjection: false
10
+ }
11
+ };
3
12
  function createSteelProvider(options = {}) {
4
13
  const apiKey = options.apiKey ?? process.env.STEEL_API_KEY;
5
14
  if (!apiKey) throw new Error("STEEL_API_KEY is required for Steel provider.");
@@ -13,7 +22,7 @@ function createSteelProvider(options = {}) {
13
22
  "steel-api-key": apiKey,
14
23
  "Content-Type": "application/json"
15
24
  },
16
- body: JSON.stringify({})
25
+ body: JSON.stringify(STEEL_STEALTH_SESSION_OPTIONS)
17
26
  });
18
27
  if (!resp.ok) {
19
28
  const body = await resp.text();
package/dist/index.d.ts CHANGED
@@ -1,9 +1,10 @@
1
1
  export { LogOptions, Logger, LoggerApi, LoggerSink, MinimalLogger, defaultLogger } from './shared/logger/logger.js';
2
2
  export { createFileLogSink, jsonlConsoleSink, prettyConsoleSink } from './shared/logger/sinks.js';
3
3
  export { SESSION_STATE_VERSION, SessionState, SessionStateFile, SessionStateFileSchema, SessionStatus, SessionStatusSchema, parseSessionStateContent, parseSessionStateData, serializeSessionState } from './shared/state/session-state.js';
4
- export { executeRecoveryAgent } from './runtime/recovery/agent.js';
4
+ export { BrowserAction, RecoveryAgentResult, RecoveryAgentStep, executeRecoveryAgent } from './runtime/recovery/agent.js';
5
5
  export { attemptWithRecovery } from './runtime/recovery/recovery.js';
6
6
  export { DetectedSubmissionError, KnownSubmissionError, detectSubmissionError } from './runtime/recovery/errors.js';
7
+ export { COMPUTER_USE_RECOVERY_MODELS, ComputerUseRecoveryActionOptions, POPUP_RECOVERY_INSTRUCTION, PopupRecoveryActionOptions, RecoveryAction, RecoveryActionContext, RecoveryActionHandler, RecoveryActionOptions, RecoveryActionResult, RecoveryActionTargetType, computerUseRecoveryAction, createRecoveryPage, popupRecoveryAction } from './runtime/recovery/page-fallbacks.js';
7
8
  export { ExtractOptions, extractFromPage } from './runtime/extract/extract.js';
8
9
  export { PageRequestOptions, RequestConfig, pageRequest } from './runtime/network/network.js';
9
10
  export { DownloadResult, DownloadViaClickOptions, downloadViaClick } from './runtime/download/download.js';
@@ -12,7 +13,7 @@ export { InstrumentationOptions, InstrumentedPage, installInstrumentation, instr
12
13
  export { GhostCursorOptions, ensureGhostCursor, ghostClick, hideGhostCursor, moveGhostCursor } from './shared/visualization/ghost-cursor.js';
13
14
  export { HighlightOptions, clearHighlights, ensureHighlightLayer, showHighlight } from './shared/visualization/highlight.js';
14
15
  export { BrowserSession, LaunchBrowserArgs, launchBrowser } from './shared/run/browser.js';
15
- export { ExportedLibrettoWorkflow, LIBRETTO_WORKFLOW_BRAND, LibrettoWorkflow, LibrettoWorkflowContext, LibrettoWorkflowHandler, LibrettoWorkflowInputError, LibrettoWorkflowSchemas, WorkflowInputValidator, getDefaultWorkflowFromModuleExports, getWorkflowFromModuleExports, getWorkflowsFromModuleExports, isLibrettoWorkflow, validateWorkflowInput, workflow } from './shared/workflow/workflow.js';
16
+ export { ExportedLibrettoWorkflow, LIBRETTO_WORKFLOW_BRAND, LibrettoWorkflow, LibrettoWorkflowContext, LibrettoWorkflowHandler, LibrettoWorkflowInputError, LibrettoWorkflowOptions, WorkflowInputValidator, getDefaultWorkflowFromModuleExports, getWorkflowFromModuleExports, getWorkflowsFromModuleExports, isLibrettoWorkflow, validateWorkflowInput, workflow } from './shared/workflow/workflow.js';
16
17
  import 'zod';
17
18
  import 'playwright';
18
19
  import 'ai';
package/dist/index.js CHANGED
@@ -17,11 +17,20 @@ import {
17
17
  parseSessionStateContent,
18
18
  serializeSessionState
19
19
  } from "./shared/state/index.js";
20
- import { executeRecoveryAgent } from "./runtime/recovery/agent.js";
20
+ import {
21
+ executeRecoveryAgent
22
+ } from "./runtime/recovery/agent.js";
21
23
  import { attemptWithRecovery } from "./runtime/recovery/recovery.js";
22
24
  import {
23
25
  detectSubmissionError
24
26
  } from "./runtime/recovery/errors.js";
27
+ import {
28
+ COMPUTER_USE_RECOVERY_MODELS,
29
+ POPUP_RECOVERY_INSTRUCTION,
30
+ computerUseRecoveryAction,
31
+ createRecoveryPage,
32
+ popupRecoveryAction
33
+ } from "./runtime/recovery/page-fallbacks.js";
25
34
  import {
26
35
  extractFromPage
27
36
  } from "./runtime/extract/extract.js";
@@ -78,16 +87,20 @@ if (isDirectExecution()) {
78
87
  });
79
88
  }
80
89
  export {
90
+ COMPUTER_USE_RECOVERY_MODELS,
81
91
  LIBRETTO_WORKFLOW_BRAND,
82
92
  LibrettoWorkflow,
83
93
  LibrettoWorkflowInputError,
84
94
  Logger,
95
+ POPUP_RECOVERY_INSTRUCTION,
85
96
  SESSION_STATE_VERSION,
86
97
  SessionStateFileSchema,
87
98
  SessionStatusSchema,
88
99
  attemptWithRecovery,
89
100
  clearHighlights,
101
+ computerUseRecoveryAction,
90
102
  createFileLogSink,
103
+ createRecoveryPage,
91
104
  defaultLogger,
92
105
  detectSubmissionError,
93
106
  downloadViaClick,
@@ -111,6 +124,7 @@ export {
111
124
  parseSessionStateContent,
112
125
  parseSessionStateData,
113
126
  pause,
127
+ popupRecoveryAction,
114
128
  prettyConsoleSink,
115
129
  serializeSessionState,
116
130
  showHighlight,
@@ -2,11 +2,59 @@ import { Page } from 'playwright';
2
2
  import { MinimalLogger } from '../../shared/logger/logger.js';
3
3
  import { LanguageModel } from 'ai';
4
4
 
5
+ type BrowserAction = {
6
+ type: "click";
7
+ x: number;
8
+ y: number;
9
+ button?: string;
10
+ } | {
11
+ type: "double_click";
12
+ x: number;
13
+ y: number;
14
+ } | {
15
+ type: "scroll";
16
+ x: number;
17
+ y: number;
18
+ scroll_x: number;
19
+ scroll_y: number;
20
+ } | {
21
+ type: "keypress";
22
+ keys: string[];
23
+ } | {
24
+ type: "type";
25
+ text: string;
26
+ } | {
27
+ type: "wait";
28
+ } | {
29
+ type: "screenshot";
30
+ } | {
31
+ type: "drag";
32
+ path: {
33
+ x: number;
34
+ y: number;
35
+ }[];
36
+ } | {
37
+ type: "move";
38
+ x: number;
39
+ y: number;
40
+ } | {
41
+ type: "done";
42
+ };
43
+ type RecoveryAgentStep = {
44
+ step: number;
45
+ reasoning: string;
46
+ action: BrowserAction;
47
+ };
48
+ type RecoveryAgentStatus = "skipped" | "no-action-needed" | "action-taken" | "incomplete";
49
+ type RecoveryAgentResult = {
50
+ status: RecoveryAgentStatus;
51
+ steps: RecoveryAgentStep[];
52
+ };
5
53
  /**
6
54
  * Executes a vision-based recovery agent to recover from browser automation failures.
7
55
  * Takes a screenshot, sends it to the LLM with the instruction, and executes
8
56
  * the LLM's suggested browser actions.
9
57
  */
10
- declare function executeRecoveryAgent(page: Page, instruction: string, logger?: MinimalLogger, model?: LanguageModel): Promise<void>;
58
+ declare function executeRecoveryAgent(page: Page, instruction: string, logger?: MinimalLogger, model?: LanguageModel, maxSteps?: number): Promise<RecoveryAgentResult>;
11
59
 
12
- export { executeRecoveryAgent };
60
+ export { type BrowserAction, type RecoveryAgentResult, type RecoveryAgentStatus, type RecoveryAgentStep, executeRecoveryAgent };