libretto 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -36
- package/dist/cli/cli.js +22 -97
- package/dist/cli/commands/browser.js +86 -59
- package/dist/cli/commands/execution.js +199 -86
- package/dist/cli/commands/init.js +30 -8
- package/dist/cli/commands/logs.js +4 -5
- package/dist/cli/commands/shared.js +30 -29
- package/dist/cli/commands/snapshot.js +26 -39
- package/dist/cli/core/ai-config.js +9 -2
- package/dist/cli/core/api-snapshot-analyzer.js +15 -5
- package/dist/cli/core/browser.js +132 -29
- package/dist/cli/core/context.js +4 -1
- package/dist/cli/core/session-telemetry.js +5 -2
- package/dist/cli/core/session.js +21 -8
- package/dist/cli/core/snapshot-analyzer.js +14 -31
- package/dist/cli/core/snapshot-api-config.js +2 -6
- package/dist/cli/core/telemetry.js +10 -2
- package/dist/cli/framework/simple-cli.js +45 -25
- package/dist/cli/router.js +14 -21
- package/dist/cli/workers/run-integration-runtime.js +24 -5
- package/dist/cli/workers/run-integration-worker-protocol.js +3 -1
- package/dist/cli/workers/run-integration-worker.js +1 -4
- package/dist/index.d.ts +1 -2
- package/dist/index.js +7 -10
- package/dist/runtime/download/download.js +5 -1
- package/dist/runtime/extract/extract.js +11 -2
- package/dist/runtime/network/network.js +8 -1
- package/dist/runtime/recovery/agent.js +6 -2
- package/dist/runtime/recovery/errors.js +3 -1
- package/dist/runtime/recovery/recovery.js +3 -1
- package/dist/shared/condense-dom/condense-dom.js +6 -13
- package/dist/shared/config/config.d.ts +1 -9
- package/dist/shared/config/config.js +0 -18
- package/dist/shared/config/index.d.ts +2 -1
- package/dist/shared/config/index.js +0 -10
- package/dist/shared/debug/pause.js +9 -3
- package/dist/shared/instrumentation/instrument.js +101 -5
- package/dist/shared/llm/ai-sdk-adapter.js +3 -1
- package/dist/shared/llm/client.js +3 -1
- package/dist/shared/logger/index.js +4 -1
- package/dist/shared/run/api.js +3 -1
- package/dist/shared/run/browser.js +7 -2
- package/dist/shared/state/session-state.d.ts +2 -1
- package/dist/shared/state/session-state.js +5 -2
- package/dist/shared/visualization/ghost-cursor.js +19 -10
- package/dist/shared/visualization/highlight.js +9 -6
- package/dist/shared/workflow/workflow.d.ts +4 -5
- package/dist/shared/workflow/workflow.js +3 -5
- package/package.json +6 -2
- package/scripts/check-skills-sync.mjs +25 -0
- package/scripts/compare-eval-summary.mjs +47 -0
- package/scripts/postinstall.mjs +15 -15
- package/scripts/prepare-release.sh +97 -0
- package/scripts/skills-libretto.mjs +103 -0
- package/scripts/summarize-evals.mjs +135 -0
- package/scripts/sync-skills.mjs +12 -0
- package/skills/libretto/SKILL.md +113 -49
- package/skills/libretto/references/code-generation-rules.md +208 -0
- package/skills/libretto/references/configuration-file-reference.md +53 -0
- package/skills/libretto/references/site-security-review.md +143 -0
- package/src/cli/cli.ts +23 -110
- package/src/cli/commands/browser.ts +94 -70
- package/src/cli/commands/execution.ts +233 -102
- package/src/cli/commands/init.ts +32 -9
- package/src/cli/commands/logs.ts +7 -7
- package/src/cli/commands/shared.ts +36 -37
- package/src/cli/commands/snapshot.ts +44 -59
- package/src/cli/core/ai-config.ts +12 -3
- package/src/cli/core/api-snapshot-analyzer.ts +17 -6
- package/src/cli/core/browser.ts +178 -41
- package/src/cli/core/context.ts +7 -2
- package/src/cli/core/session-telemetry.ts +19 -8
- package/src/cli/core/session.ts +21 -7
- package/src/cli/core/snapshot-analyzer.ts +26 -46
- package/src/cli/core/snapshot-api-config.ts +170 -175
- package/src/cli/core/telemetry.ts +16 -3
- package/src/cli/framework/simple-cli.ts +144 -77
- package/src/cli/router.ts +13 -21
- package/src/cli/workers/run-integration-runtime.ts +36 -9
- package/src/cli/workers/run-integration-worker-protocol.ts +2 -0
- package/src/cli/workers/run-integration-worker.ts +1 -4
- package/src/index.ts +73 -66
- package/src/runtime/download/download.ts +62 -58
- package/src/runtime/download/index.ts +5 -5
- package/src/runtime/extract/extract.ts +71 -61
- package/src/runtime/network/index.ts +3 -3
- package/src/runtime/network/network.ts +99 -93
- package/src/runtime/recovery/agent.ts +217 -212
- package/src/runtime/recovery/errors.ts +107 -104
- package/src/runtime/recovery/index.ts +3 -3
- package/src/runtime/recovery/recovery.ts +38 -35
- package/src/shared/condense-dom/condense-dom.ts +15 -18
- package/src/shared/config/config.ts +0 -19
- package/src/shared/config/index.ts +0 -5
- package/src/shared/debug/pause.ts +57 -51
- package/src/shared/instrumentation/errors.ts +64 -62
- package/src/shared/instrumentation/index.ts +5 -5
- package/src/shared/instrumentation/instrument.ts +339 -209
- package/src/shared/llm/ai-sdk-adapter.ts +58 -55
- package/src/shared/llm/client.ts +181 -174
- package/src/shared/llm/types.ts +39 -39
- package/src/shared/logger/index.ts +11 -4
- package/src/shared/logger/logger.ts +312 -306
- package/src/shared/logger/sinks.ts +118 -114
- package/src/shared/paths/paths.ts +50 -49
- package/src/shared/paths/repo-root.ts +17 -17
- package/src/shared/run/api.ts +5 -1
- package/src/shared/run/browser.ts +12 -3
- package/src/shared/state/index.ts +9 -9
- package/src/shared/state/session-state.ts +46 -43
- package/src/shared/visualization/ghost-cursor.ts +161 -148
- package/src/shared/visualization/highlight.ts +89 -86
- package/src/shared/visualization/index.ts +13 -13
- package/src/shared/workflow/workflow.ts +19 -25
- package/skills/libretto/references/reverse-engineering-network-requests.md +0 -39
- package/skills/libretto/references/user-action-log.md +0 -31
package/src/index.ts
CHANGED
|
@@ -2,119 +2,126 @@ import { resolve } from "node:path";
|
|
|
2
2
|
import { pathToFileURL } from "node:url";
|
|
3
3
|
|
|
4
4
|
// Logger
|
|
5
|
-
export { Logger, defaultLogger, type LoggerApi, type MinimalLogger, type LoggerSink, type LogOptions } from "./shared/logger/logger.js";
|
|
6
5
|
export {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
Logger,
|
|
7
|
+
defaultLogger,
|
|
8
|
+
type LoggerApi,
|
|
9
|
+
type MinimalLogger,
|
|
10
|
+
type LoggerSink,
|
|
11
|
+
type LogOptions,
|
|
12
|
+
} from "./shared/logger/logger.js";
|
|
13
|
+
export {
|
|
14
|
+
createFileLogSink,
|
|
15
|
+
prettyConsoleSink,
|
|
16
|
+
jsonlConsoleSink,
|
|
10
17
|
} from "./shared/logger/sinks.js";
|
|
11
18
|
|
|
12
19
|
// LLM client interface
|
|
13
|
-
export type {
|
|
20
|
+
export type {
|
|
21
|
+
LLMClient,
|
|
22
|
+
Message,
|
|
23
|
+
MessageContentPart,
|
|
24
|
+
} from "./shared/llm/types.js";
|
|
14
25
|
export { createLLMClientFromModel } from "./shared/llm/ai-sdk-adapter.js";
|
|
15
26
|
export {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
SESSION_STATE_VERSION,
|
|
28
|
+
SessionStatusSchema,
|
|
29
|
+
SessionStateFileSchema,
|
|
30
|
+
parseSessionStateData,
|
|
31
|
+
parseSessionStateContent,
|
|
32
|
+
serializeSessionState,
|
|
33
|
+
type SessionStatus,
|
|
34
|
+
type SessionState,
|
|
35
|
+
type SessionStateFile,
|
|
25
36
|
} from "./shared/state/index.js";
|
|
26
37
|
|
|
27
38
|
// Recovery
|
|
28
39
|
export { executeRecoveryAgent } from "./runtime/recovery/agent.js";
|
|
29
40
|
export { attemptWithRecovery } from "./runtime/recovery/recovery.js";
|
|
30
41
|
export {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
42
|
+
detectSubmissionError,
|
|
43
|
+
type KnownSubmissionError,
|
|
44
|
+
type DetectedSubmissionError,
|
|
34
45
|
} from "./runtime/recovery/errors.js";
|
|
35
46
|
|
|
36
47
|
// AI extraction
|
|
37
|
-
export {
|
|
48
|
+
export {
|
|
49
|
+
extractFromPage,
|
|
50
|
+
type ExtractOptions,
|
|
51
|
+
} from "./runtime/extract/extract.js";
|
|
38
52
|
|
|
39
53
|
// Network helpers
|
|
40
54
|
export {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
55
|
+
pageRequest,
|
|
56
|
+
type RequestConfig,
|
|
57
|
+
type PageRequestOptions,
|
|
44
58
|
} from "./runtime/network/network.js";
|
|
45
59
|
|
|
46
60
|
// Download helpers
|
|
47
61
|
export {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
62
|
+
downloadViaClick,
|
|
63
|
+
downloadAndSave,
|
|
64
|
+
type DownloadResult,
|
|
65
|
+
type DownloadViaClickOptions,
|
|
66
|
+
type SaveDownloadOptions,
|
|
53
67
|
} from "./runtime/download/download.js";
|
|
54
68
|
|
|
55
69
|
// Debug / Pause
|
|
56
70
|
export { pause } from "./shared/debug/pause.js";
|
|
57
71
|
|
|
58
|
-
// Config
|
|
59
|
-
export {
|
|
60
|
-
isDebugMode,
|
|
61
|
-
isDryRun,
|
|
62
|
-
shouldPauseBeforeMutation,
|
|
63
|
-
} from "./shared/config/config.js";
|
|
64
|
-
|
|
65
72
|
// Instrumentation
|
|
66
73
|
export {
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
74
|
+
instrumentPage,
|
|
75
|
+
installInstrumentation,
|
|
76
|
+
instrumentContext,
|
|
77
|
+
type InstrumentationOptions,
|
|
78
|
+
type InstrumentedPage,
|
|
72
79
|
} from "./shared/instrumentation/instrument.js";
|
|
73
80
|
|
|
74
81
|
// Visualization
|
|
75
82
|
export {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
83
|
+
ensureGhostCursor,
|
|
84
|
+
moveGhostCursor,
|
|
85
|
+
ghostClick,
|
|
86
|
+
hideGhostCursor,
|
|
87
|
+
type GhostCursorOptions,
|
|
81
88
|
} from "./shared/visualization/ghost-cursor.js";
|
|
82
89
|
export {
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
90
|
+
ensureHighlightLayer,
|
|
91
|
+
showHighlight,
|
|
92
|
+
clearHighlights,
|
|
93
|
+
type HighlightOptions,
|
|
87
94
|
} from "./shared/visualization/highlight.js";
|
|
88
95
|
|
|
89
96
|
// Run helpers
|
|
90
97
|
export {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
98
|
+
launchBrowser,
|
|
99
|
+
type LaunchBrowserArgs,
|
|
100
|
+
type BrowserSession,
|
|
94
101
|
} from "./shared/run/api.js";
|
|
95
102
|
|
|
96
103
|
// Workflow helpers
|
|
97
104
|
export {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
type LibrettoWorkflowHandler,
|
|
105
|
+
LibrettoWorkflow,
|
|
106
|
+
LIBRETTO_WORKFLOW_BRAND,
|
|
107
|
+
workflow,
|
|
108
|
+
type LibrettoWorkflowContext,
|
|
109
|
+
type LibrettoWorkflowHandler,
|
|
104
110
|
} from "./shared/workflow/workflow.js";
|
|
105
111
|
|
|
106
112
|
const isDirectExecution = (): boolean => {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
113
|
+
const entryArg = process.argv[1];
|
|
114
|
+
if (!entryArg) {
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
return pathToFileURL(resolve(entryArg)).href === import.meta.url;
|
|
112
118
|
};
|
|
113
119
|
|
|
114
120
|
if (isDirectExecution()) {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
121
|
+
void import("./cli/index.js").catch((error: unknown) => {
|
|
122
|
+
const message =
|
|
123
|
+
error instanceof Error ? (error.stack ?? error.message) : String(error);
|
|
124
|
+
process.stderr.write(`${message}\n`);
|
|
125
|
+
process.exitCode = 1;
|
|
126
|
+
});
|
|
120
127
|
}
|
|
@@ -4,16 +4,16 @@ import type { Page, Download } from "playwright";
|
|
|
4
4
|
import type { MinimalLogger } from "../../shared/logger/logger.js";
|
|
5
5
|
|
|
6
6
|
export type DownloadResult = {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
/** The raw file contents. */
|
|
8
|
+
buffer: Buffer;
|
|
9
|
+
/** The filename suggested by the server (Content-Disposition header or URL). */
|
|
10
|
+
filename: string;
|
|
11
11
|
};
|
|
12
12
|
|
|
13
13
|
export type DownloadViaClickOptions = {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
logger?: MinimalLogger;
|
|
15
|
+
/** Timeout in milliseconds for waiting on the download event. Defaults to 30 000. */
|
|
16
|
+
timeout?: number;
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
/**
|
|
@@ -24,55 +24,55 @@ export type DownloadViaClickOptions = {
|
|
|
24
24
|
* never missed.
|
|
25
25
|
*/
|
|
26
26
|
export async function downloadViaClick(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
page: Page,
|
|
28
|
+
selector: string,
|
|
29
|
+
options?: DownloadViaClickOptions,
|
|
30
30
|
): Promise<DownloadResult> {
|
|
31
|
-
|
|
31
|
+
const { logger, timeout = 30_000 } = options ?? {};
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
const startTime = Date.now();
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
// 1. Register the download listener BEFORE clicking
|
|
36
|
+
const downloadPromise = page.waitForEvent("download", { timeout });
|
|
37
37
|
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
// 2. Click the element that triggers the download
|
|
39
|
+
await page.locator(selector).click();
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
|
|
41
|
+
// 3. Await the download event
|
|
42
|
+
const download: Download = await downloadPromise;
|
|
43
43
|
|
|
44
|
-
|
|
45
|
-
|
|
44
|
+
// 4. Get the suggested filename
|
|
45
|
+
const filename = download.suggestedFilename();
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
47
|
+
// 5. Read the downloaded file into a buffer
|
|
48
|
+
const readStream = await download.createReadStream();
|
|
49
|
+
if (!readStream) {
|
|
50
|
+
throw new Error(
|
|
51
|
+
`Download stream unavailable for "${filename}". The browser may have been closed before the download completed.`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
55
|
+
const chunks: Buffer[] = [];
|
|
56
|
+
for await (const chunk of readStream) {
|
|
57
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
58
|
+
}
|
|
59
|
+
const buffer = Buffer.concat(chunks);
|
|
60
60
|
|
|
61
|
-
|
|
61
|
+
const duration = Date.now() - startTime;
|
|
62
62
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
63
|
+
logger?.info("download:click", {
|
|
64
|
+
selector,
|
|
65
|
+
filename,
|
|
66
|
+
size: buffer.length,
|
|
67
|
+
duration,
|
|
68
|
+
});
|
|
69
69
|
|
|
70
|
-
|
|
70
|
+
return { buffer, filename };
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
export type SaveDownloadOptions = DownloadViaClickOptions & {
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
/** Absolute or relative path to save the file to. When omitted the suggested filename is used in the current working directory. */
|
|
75
|
+
savePath?: string;
|
|
76
76
|
};
|
|
77
77
|
|
|
78
78
|
/**
|
|
@@ -80,21 +80,25 @@ export type SaveDownloadOptions = DownloadViaClickOptions & {
|
|
|
80
80
|
* downloaded file to disk.
|
|
81
81
|
*/
|
|
82
82
|
export async function downloadAndSave(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
page: Page,
|
|
84
|
+
selector: string,
|
|
85
|
+
options?: SaveDownloadOptions,
|
|
86
86
|
): Promise<DownloadResult & { savedTo: string }> {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
87
|
+
const { savePath, ...downloadOpts } = options ?? {};
|
|
88
|
+
const { buffer, filename } = await downloadViaClick(
|
|
89
|
+
page,
|
|
90
|
+
selector,
|
|
91
|
+
downloadOpts,
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
const dest = resolve(savePath ?? filename);
|
|
95
|
+
await writeFile(dest, buffer);
|
|
96
|
+
|
|
97
|
+
options?.logger?.info("download:saved", {
|
|
98
|
+
filename,
|
|
99
|
+
savedTo: dest,
|
|
100
|
+
size: buffer.length,
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
return { buffer, filename, savedTo: dest };
|
|
100
104
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
downloadViaClick,
|
|
3
|
+
downloadAndSave,
|
|
4
|
+
type DownloadResult,
|
|
5
|
+
type DownloadViaClickOptions,
|
|
6
|
+
type SaveDownloadOptions,
|
|
7
7
|
} from "./download.js";
|
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
import type { Page } from "playwright";
|
|
2
2
|
import type z from "zod";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
type MinimalLogger,
|
|
5
|
+
defaultLogger,
|
|
6
|
+
} from "../../shared/logger/logger.js";
|
|
4
7
|
import type { LLMClient } from "../../shared/llm/types.js";
|
|
5
8
|
|
|
6
9
|
export type ExtractOptions<T extends z.ZodType> = {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
page: Page;
|
|
11
|
+
instruction: string;
|
|
12
|
+
schema: T;
|
|
13
|
+
llmClient: LLMClient;
|
|
14
|
+
logger?: MinimalLogger;
|
|
15
|
+
/** Optional CSS selector to scope extraction to a specific element. */
|
|
16
|
+
selector?: string;
|
|
14
17
|
};
|
|
15
18
|
|
|
16
19
|
/**
|
|
@@ -20,48 +23,55 @@ export type ExtractOptions<T extends z.ZodType> = {
|
|
|
20
23
|
* matching the provided Zod schema.
|
|
21
24
|
*/
|
|
22
25
|
export async function extractFromPage<T extends z.ZodType>(
|
|
23
|
-
|
|
26
|
+
options: ExtractOptions<T>,
|
|
24
27
|
): Promise<z.infer<T>> {
|
|
25
|
-
|
|
28
|
+
const {
|
|
29
|
+
page,
|
|
30
|
+
instruction,
|
|
31
|
+
schema,
|
|
32
|
+
selector,
|
|
33
|
+
logger = defaultLogger,
|
|
34
|
+
llmClient,
|
|
35
|
+
} = options;
|
|
26
36
|
|
|
27
|
-
|
|
28
|
-
|
|
37
|
+
let screenshot: string;
|
|
38
|
+
let domContent: string | undefined;
|
|
29
39
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
40
|
+
if (selector) {
|
|
41
|
+
const element = page.locator(selector);
|
|
42
|
+
await element.waitFor({ state: "visible", timeout: 10_000 });
|
|
33
43
|
|
|
34
|
-
|
|
35
|
-
|
|
44
|
+
const screenshotBuffer = await element.screenshot();
|
|
45
|
+
screenshot = screenshotBuffer.toString("base64");
|
|
36
46
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
47
|
+
try {
|
|
48
|
+
domContent = await element.innerHTML();
|
|
49
|
+
if (domContent.length > 30000) {
|
|
50
|
+
domContent = domContent.slice(0, 30000) + "\n... [truncated]";
|
|
51
|
+
}
|
|
52
|
+
} catch {
|
|
53
|
+
domContent = undefined;
|
|
54
|
+
}
|
|
55
|
+
} else {
|
|
56
|
+
const cdpClient = await page.context().newCDPSession(page);
|
|
57
|
+
await cdpClient.send("Page.enable");
|
|
58
|
+
const { data } = await cdpClient.send("Page.captureScreenshot", {
|
|
59
|
+
format: "png",
|
|
60
|
+
});
|
|
61
|
+
screenshot = data;
|
|
52
62
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
+
try {
|
|
64
|
+
const htmlContent = await page.content();
|
|
65
|
+
domContent =
|
|
66
|
+
htmlContent.length > 50000
|
|
67
|
+
? htmlContent.slice(0, 50000) + "\n... [truncated]"
|
|
68
|
+
: htmlContent;
|
|
69
|
+
} catch {
|
|
70
|
+
domContent = undefined;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
63
73
|
|
|
64
|
-
|
|
74
|
+
const prompt = `You are analyzing a screenshot${selector ? " of a specific element" : ""} from a web page to extract structured data.
|
|
65
75
|
|
|
66
76
|
Instruction: ${instruction}
|
|
67
77
|
|
|
@@ -69,24 +79,24 @@ ${domContent ? `Here is the HTML content for additional context:\n<html>\n${domC
|
|
|
69
79
|
|
|
70
80
|
Extract the requested information from the screenshot and return it in the specified format. Be precise and only extract what is visible.`;
|
|
71
81
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
const result = await llmClient.generateObjectFromMessages({
|
|
83
|
+
schema,
|
|
84
|
+
messages: [
|
|
85
|
+
{
|
|
86
|
+
role: "user",
|
|
87
|
+
content: [
|
|
88
|
+
{ type: "text", text: prompt },
|
|
89
|
+
{ type: "image", image: `data:image/png;base64,${screenshot}` },
|
|
90
|
+
],
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
temperature: 0,
|
|
94
|
+
});
|
|
85
95
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
96
|
+
logger.info("extractFromPage completed", {
|
|
97
|
+
selector,
|
|
98
|
+
instruction: instruction.slice(0, 100),
|
|
99
|
+
});
|
|
90
100
|
|
|
91
|
-
|
|
101
|
+
return result;
|
|
92
102
|
}
|