imprint-mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +168 -0
- package/LICENSE +21 -0
- package/README.md +322 -0
- package/examples/discoverandgo/README.md +57 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/cron.json +8 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/index.ts +89 -0
- package/examples/discoverandgo/book_discoverandgo_museum_pass/workflow.json +39 -0
- package/examples/echo/README.md +37 -0
- package/examples/echo/echo_test/index.ts +31 -0
- package/examples/google-flights/search_google_flights/index.ts +101 -0
- package/examples/google-flights/search_google_flights/parser.test.ts +140 -0
- package/examples/google-flights/search_google_flights/parser.ts +189 -0
- package/examples/google-flights/search_google_flights/playbook.yaml +130 -0
- package/examples/google-flights/search_google_flights/workflow.json +48 -0
- package/examples/google-hotels/search_google_hotels/index.ts +194 -0
- package/examples/google-hotels/search_google_hotels/parser.test.ts +168 -0
- package/examples/google-hotels/search_google_hotels/parser.ts +330 -0
- package/examples/google-hotels/search_google_hotels/playbook.yaml +125 -0
- package/examples/google-hotels/search_google_hotels/workflow.json +111 -0
- package/examples/namecheap-domains/search_namecheap_domains/index.ts +144 -0
- package/examples/namecheap-domains/search_namecheap_domains/parser.ts +380 -0
- package/examples/namecheap-domains/search_namecheap_domains/playbook.yaml +50 -0
- package/examples/namecheap-domains/search_namecheap_domains/request-transform.ts +136 -0
- package/examples/namecheap-domains/search_namecheap_domains/workflow.json +97 -0
- package/examples/southwest/README.md +81 -0
- package/examples/southwest/search_southwest_flights/backends.json +23 -0
- package/examples/southwest/search_southwest_flights/cron.json +19 -0
- package/examples/southwest/search_southwest_flights/index.ts +110 -0
- package/examples/southwest/search_southwest_flights/playbook.yaml +46 -0
- package/examples/southwest/search_southwest_flights/workflow.json +54 -0
- package/package.json +78 -0
- package/prompts/compile-agent.md +580 -0
- package/prompts/intent-detection.md +198 -0
- package/prompts/playbook-compilation.md +279 -0
- package/prompts/request-triage.md +74 -0
- package/prompts/tool-candidate-detection.md +104 -0
- package/src/cli.ts +1287 -0
- package/src/imprint/agent.ts +468 -0
- package/src/imprint/app-api-hosts.ts +53 -0
- package/src/imprint/backend-ladder.ts +568 -0
- package/src/imprint/check.ts +136 -0
- package/src/imprint/chromium.ts +211 -0
- package/src/imprint/claude-cli-compile.ts +640 -0
- package/src/imprint/cli-credential.ts +394 -0
- package/src/imprint/codex-cli-compile.ts +712 -0
- package/src/imprint/compile-agent-types.ts +40 -0
- package/src/imprint/compile-agent.ts +404 -0
- package/src/imprint/compile-tools.ts +1389 -0
- package/src/imprint/compile.ts +720 -0
- package/src/imprint/cookie-jar.ts +246 -0
- package/src/imprint/credential-bundle.ts +195 -0
- package/src/imprint/credential-extract.ts +290 -0
- package/src/imprint/credential-store.ts +707 -0
- package/src/imprint/cron.ts +312 -0
- package/src/imprint/doctor.ts +223 -0
- package/src/imprint/emit.ts +154 -0
- package/src/imprint/etld.ts +134 -0
- package/src/imprint/freeform-redact.ts +216 -0
- package/src/imprint/inject-listener.ts +137 -0
- package/src/imprint/install.ts +795 -0
- package/src/imprint/integrations.ts +385 -0
- package/src/imprint/is-compiled.ts +2 -0
- package/src/imprint/json-path.ts +100 -0
- package/src/imprint/llm.ts +998 -0
- package/src/imprint/load-json.ts +54 -0
- package/src/imprint/log.ts +33 -0
- package/src/imprint/login.ts +166 -0
- package/src/imprint/mcp-compile-server.ts +282 -0
- package/src/imprint/mcp-maintenance.ts +1790 -0
- package/src/imprint/mcp-server.ts +350 -0
- package/src/imprint/multi-progress.ts +69 -0
- package/src/imprint/notify.ts +155 -0
- package/src/imprint/paths.ts +64 -0
- package/src/imprint/playbook-parser.ts +21 -0
- package/src/imprint/playbook-runner.ts +465 -0
- package/src/imprint/probe-backends.ts +251 -0
- package/src/imprint/progress.ts +28 -0
- package/src/imprint/record.ts +470 -0
- package/src/imprint/redact.ts +550 -0
- package/src/imprint/replay-capture.ts +387 -0
- package/src/imprint/request-context.ts +66 -0
- package/src/imprint/runtime-link.ts +73 -0
- package/src/imprint/runtime.ts +942 -0
- package/src/imprint/sensitive-keys.ts +156 -0
- package/src/imprint/session-diff.ts +409 -0
- package/src/imprint/session-merge.ts +198 -0
- package/src/imprint/session-writer.ts +149 -0
- package/src/imprint/sites.ts +27 -0
- package/src/imprint/stealth-fetch.ts +434 -0
- package/src/imprint/teach-state.ts +235 -0
- package/src/imprint/teach.ts +2120 -0
- package/src/imprint/tool-candidates.ts +423 -0
- package/src/imprint/tool-loader.ts +186 -0
- package/src/imprint/tool-selection.ts +70 -0
- package/src/imprint/tracing.ts +508 -0
- package/src/imprint/types.ts +472 -0
- package/src/imprint/version.ts +21 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
/** Execute a parsed Playbook against a real Chromium via Playwright. */
|
|
2
|
+
|
|
3
|
+
import { existsSync, readFileSync } from 'node:fs';
|
|
4
|
+
import {
|
|
5
|
+
isAbsolute as pathIsAbsolute,
|
|
6
|
+
relative as pathRelative,
|
|
7
|
+
resolve as pathResolve,
|
|
8
|
+
} from 'node:path';
|
|
9
|
+
import type { Browser, BrowserContext, Locator as PWLocator, Page } from 'playwright';
|
|
10
|
+
import { extractAt } from './json-path.ts';
|
|
11
|
+
import { createLog } from './log.ts';
|
|
12
|
+
import { imprintHomeDir } from './paths.ts';
|
|
13
|
+
import { parsePlaybook } from './playbook-parser.ts';
|
|
14
|
+
import { substituteString } from './runtime.ts';
|
|
15
|
+
import type {
|
|
16
|
+
Locator,
|
|
17
|
+
Playbook,
|
|
18
|
+
PlaybookResult,
|
|
19
|
+
PlaybookStep,
|
|
20
|
+
ToolResult,
|
|
21
|
+
WaitFor,
|
|
22
|
+
} from './types.ts';
|
|
23
|
+
|
|
24
|
+
interface RunPlaybookOptions {
|
|
25
|
+
/** Path to playbook.yaml OR an already-parsed Playbook. */
|
|
26
|
+
playbook: string | Playbook;
|
|
27
|
+
params: Record<string, string | number | boolean>;
|
|
28
|
+
/** Run with a visible browser window. Default false (headless). */
|
|
29
|
+
headed?: boolean;
|
|
30
|
+
/** Per-step timeout in ms. Default 30000. */
|
|
31
|
+
stepTimeoutMs?: number;
|
|
32
|
+
/** Screenshot after every step (not just on failure). */
|
|
33
|
+
trace?: boolean;
|
|
34
|
+
/** Inject a Playwright Page for tests. */
|
|
35
|
+
pageOverride?: Page;
|
|
36
|
+
/** Site key — used to look up persisted cookies in the credential store
|
|
37
|
+
* and inject them into the browser context before navigation. Required
|
|
38
|
+
* for authenticated playbooks. Callers (backend-ladder, the `playbook`
|
|
39
|
+
* CLI verb) should pass it explicitly so this works regardless of
|
|
40
|
+
* whether the skill lives under `~/.imprint/`, `~/.hermes/skills/`,
|
|
41
|
+
* `~/.openclaw/skills/`, or anywhere else. */
|
|
42
|
+
site?: string;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const log = createLog('playbook');
|
|
46
|
+
|
|
47
|
+
export async function runPlaybook(opts: RunPlaybookOptions): Promise<ToolResult> {
|
|
48
|
+
let playbook: Playbook;
|
|
49
|
+
let params: Record<string, string | number | boolean>;
|
|
50
|
+
try {
|
|
51
|
+
playbook = await loadPlaybook(opts.playbook);
|
|
52
|
+
params = coerceParams(opts.params, playbook);
|
|
53
|
+
} catch (err) {
|
|
54
|
+
return { ok: false, error: 'UNKNOWN', message: errMsg(err) };
|
|
55
|
+
}
|
|
56
|
+
// Generous default — Akamai sensor JS, A/B loaders, lazy bundles all
|
|
57
|
+
// need real time to settle. Tight timeouts make broken sites look
|
|
58
|
+
// worse than they are.
|
|
59
|
+
const stepTimeoutMs = opts.stepTimeoutMs ?? 30000;
|
|
60
|
+
|
|
61
|
+
let browser: Browser | undefined;
|
|
62
|
+
let context: BrowserContext | undefined;
|
|
63
|
+
let page: Page;
|
|
64
|
+
if (opts.pageOverride) {
|
|
65
|
+
page = opts.pageOverride;
|
|
66
|
+
} else {
|
|
67
|
+
// playwright-extra + stealth plugin patches navigator.webdriver,
|
|
68
|
+
// plugin enumeration, WebGL vendor strings, etc. Vanilla headless
|
|
69
|
+
// Playwright eats a 403 from any decent enterprise site (verified:
|
|
70
|
+
// Southwest 403 → 200 with stealth).
|
|
71
|
+
let chromium: typeof import('playwright').chromium;
|
|
72
|
+
try {
|
|
73
|
+
const pwExtra = await import('playwright-extra');
|
|
74
|
+
const stealthMod = await import('puppeteer-extra-plugin-stealth');
|
|
75
|
+
const stealthFactory =
|
|
76
|
+
(stealthMod as { default?: () => unknown }).default ??
|
|
77
|
+
(stealthMod as unknown as () => unknown);
|
|
78
|
+
pwExtra.chromium.use(stealthFactory() as never);
|
|
79
|
+
chromium = pwExtra.chromium as unknown as typeof import('playwright').chromium;
|
|
80
|
+
} catch {
|
|
81
|
+
try {
|
|
82
|
+
const pw = await import('playwright');
|
|
83
|
+
chromium = pw.chromium;
|
|
84
|
+
} catch (innerErr) {
|
|
85
|
+
return {
|
|
86
|
+
ok: false,
|
|
87
|
+
error: 'UNKNOWN',
|
|
88
|
+
message: `Playwright not available: ${errMsg(innerErr)}. Run: bunx playwright install chromium`,
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
try {
|
|
93
|
+
browser = await chromium.launch({ headless: !opts.headed });
|
|
94
|
+
} catch (err) {
|
|
95
|
+
return {
|
|
96
|
+
ok: false,
|
|
97
|
+
error: 'UNKNOWN',
|
|
98
|
+
message: `Could not launch Chromium: ${errMsg(err)}. Run: bunx playwright install chromium`,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
context = await browser.newContext();
|
|
102
|
+
page = await context.newPage();
|
|
103
|
+
|
|
104
|
+
// Inject credentials.cookies into the browser so the playbook can navigate
|
|
105
|
+
// an authenticated flow (e.g., my-trips → reservation → seat map). Prefer
|
|
106
|
+
// the explicit opts.site. Fall back to path inference only when the caller
|
|
107
|
+
// hasn't supplied one and the playbook lives under IMPRINT_HOME.
|
|
108
|
+
const site = opts.site ?? inferSiteFromPath(opts.playbook);
|
|
109
|
+
if (site) {
|
|
110
|
+
try {
|
|
111
|
+
const { loadSiteCredentials } = await import('./credential-store.ts');
|
|
112
|
+
const view = await loadSiteCredentials(site);
|
|
113
|
+
const playwrightCookies = view.cookies
|
|
114
|
+
.map((c) => ({ name: c.name, value: c.value, domain: c.domain, path: c.path }))
|
|
115
|
+
.filter((c) => c.name && c.value);
|
|
116
|
+
if (playwrightCookies.length > 0) {
|
|
117
|
+
await context.addCookies(playwrightCookies);
|
|
118
|
+
log(`injected ${playwrightCookies.length} cookies for site ${site}`);
|
|
119
|
+
}
|
|
120
|
+
} catch (err) {
|
|
121
|
+
log(`failed to inject cookies: ${errMsg(err)} (proceeding without)`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Read body text inside the response handler — Playwright/CDP GCs
|
|
127
|
+
// response bodies aggressively, so a lazy text() at extraction time
|
|
128
|
+
// often fails with "no resource with given identifier found." Track
|
|
129
|
+
// pending reads so extraction waits for them all.
|
|
130
|
+
const captured: Array<{ url: string; method: string; status: number; body: string | null }> = [];
|
|
131
|
+
const pendingBodyReads: Array<Promise<unknown>> = [];
|
|
132
|
+
let lastStep = 0;
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
page.on('response', (resp) => {
|
|
136
|
+
const url = resp.url();
|
|
137
|
+
const method = resp.request().method();
|
|
138
|
+
const status = resp.status();
|
|
139
|
+
const p = resp
|
|
140
|
+
.text()
|
|
141
|
+
.then((body) => captured.push({ url, method, status, body }))
|
|
142
|
+
.catch(() => captured.push({ url, method, status, body: null }));
|
|
143
|
+
pendingBodyReads.push(p);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
for (const [i, step] of playbook.steps.entries()) {
|
|
147
|
+
lastStep = i + 1;
|
|
148
|
+
log(`step ${i + 1}/${playbook.steps.length}: ${step.action}`);
|
|
149
|
+
await executeStep(page, step, params, stepTimeoutMs);
|
|
150
|
+
if (opts.trace) {
|
|
151
|
+
const traceShot = await screenshot(page, `${playbook.toolName}-trace`, lastStep);
|
|
152
|
+
log(` url=${page.url()}`);
|
|
153
|
+
if (traceShot) log(` trace screenshot: ${traceShot}`);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
await Promise.allSettled(pendingBodyReads);
|
|
157
|
+
const data = await extractResult(page, playbook.result, captured);
|
|
158
|
+
return { ok: true, data };
|
|
159
|
+
} catch (err) {
|
|
160
|
+
const screenshotPath = await screenshot(page, playbook.toolName, lastStep);
|
|
161
|
+
const suffix = screenshotPath ? `\nscreenshot: ${screenshotPath}` : '';
|
|
162
|
+
return {
|
|
163
|
+
ok: false,
|
|
164
|
+
error: 'BAD_RESPONSE',
|
|
165
|
+
message: `Playbook failed at step ${lastStep}: ${errMsg(err)}${suffix}`,
|
|
166
|
+
};
|
|
167
|
+
} finally {
|
|
168
|
+
if (!opts.pageOverride) {
|
|
169
|
+
await context?.close().catch(() => {});
|
|
170
|
+
await browser?.close().catch(() => {});
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async function screenshot(page: Page, toolName: string, stepNum: number): Promise<string | null> {
|
|
176
|
+
try {
|
|
177
|
+
const { tmpdir } = await import('node:os');
|
|
178
|
+
const { join } = await import('node:path');
|
|
179
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
180
|
+
const path = join(tmpdir(), `imprint-playbook-${toolName}-step${stepNum}-${ts}.png`);
|
|
181
|
+
await page.screenshot({ path, fullPage: true });
|
|
182
|
+
return path;
|
|
183
|
+
} catch {
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async function loadPlaybook(input: string | Playbook): Promise<Playbook> {
|
|
189
|
+
if (typeof input !== 'string') return input;
|
|
190
|
+
if (!existsSync(input)) {
|
|
191
|
+
throw new Error(
|
|
192
|
+
`Playbook not found: ${input}\n→ run \`imprint compile-playbook <session.json>\` to create one.`,
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
return parsePlaybook(readFileSync(input, 'utf8'));
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function coerceParams(
|
|
199
|
+
params: Record<string, string | number | boolean>,
|
|
200
|
+
playbook: Playbook,
|
|
201
|
+
): Record<string, string | number | boolean> {
|
|
202
|
+
const merged: Record<string, string | number | boolean> = {};
|
|
203
|
+
for (const p of playbook.parameters) {
|
|
204
|
+
if (params[p.name] !== undefined) {
|
|
205
|
+
merged[p.name] = params[p.name] as string | number | boolean;
|
|
206
|
+
} else if (p.default !== undefined) {
|
|
207
|
+
merged[p.name] = p.default;
|
|
208
|
+
} else {
|
|
209
|
+
throw new Error(
|
|
210
|
+
`Missing required parameter: ${p.name}\n→ pass --param ${p.name}=<value> on the CLI, or set it in cron.json.`,
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return merged;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
async function executeStep(
|
|
218
|
+
page: Page,
|
|
219
|
+
step: PlaybookStep,
|
|
220
|
+
params: Record<string, string | number | boolean>,
|
|
221
|
+
timeoutMs: number,
|
|
222
|
+
): Promise<void> {
|
|
223
|
+
switch (step.action) {
|
|
224
|
+
case 'navigate': {
|
|
225
|
+
// 'domcontentloaded' instead of 'load' — SPAs behind enterprise
|
|
226
|
+
// WAFs keep persistent connections alive so 'load' hangs forever.
|
|
227
|
+
// Explicit wait_for handles "page is ready" semantics.
|
|
228
|
+
await page.goto(subst(step.url, params), {
|
|
229
|
+
timeout: timeoutMs,
|
|
230
|
+
waitUntil: 'domcontentloaded',
|
|
231
|
+
});
|
|
232
|
+
await applyWait(page, step.wait_for, undefined, timeoutMs);
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
case 'click': {
|
|
236
|
+
const locator = await firstMatching(page, step.locators, params, timeoutMs);
|
|
237
|
+
try {
|
|
238
|
+
await locator.click({ timeout: timeoutMs });
|
|
239
|
+
} catch (err) {
|
|
240
|
+
// Styled wrappers (role=checkbox/option, positioned overlays)
|
|
241
|
+
// often intercept pointer events. force:true bubbles the event
|
|
242
|
+
// through to the wrapper's handler.
|
|
243
|
+
if (errMsg(err).includes('intercepts pointer events')) {
|
|
244
|
+
await locator.click({ timeout: timeoutMs, force: true });
|
|
245
|
+
} else {
|
|
246
|
+
throw err;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
await applyWait(page, step.wait_for, locator, timeoutMs);
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
case 'type': {
|
|
253
|
+
const locator = await firstMatching(page, step.locators, params, timeoutMs);
|
|
254
|
+
const value = subst(step.value, params);
|
|
255
|
+
if (step.clear === false) {
|
|
256
|
+
await locator.pressSequentially(value, { timeout: timeoutMs });
|
|
257
|
+
} else {
|
|
258
|
+
await locator.fill(value, { timeout: timeoutMs });
|
|
259
|
+
}
|
|
260
|
+
await applyWait(page, step.wait_for, locator, timeoutMs);
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
case 'submit': {
|
|
264
|
+
// Press Enter on the focused form — more reliable cross-site than
|
|
265
|
+
// clicking a submit-typed descendant.
|
|
266
|
+
const locator = await firstMatching(page, step.locators, params, timeoutMs);
|
|
267
|
+
await locator.press('Enter', { timeout: timeoutMs });
|
|
268
|
+
await applyWait(page, step.wait_for, locator, timeoutMs);
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
case 'press': {
|
|
272
|
+
let focusedLocator: PWLocator | undefined;
|
|
273
|
+
if (step.locators && step.locators.length > 0) {
|
|
274
|
+
focusedLocator = await firstMatching(page, step.locators, params, timeoutMs);
|
|
275
|
+
await focusedLocator.press(step.key, { timeout: timeoutMs });
|
|
276
|
+
} else {
|
|
277
|
+
await page.keyboard.press(step.key);
|
|
278
|
+
}
|
|
279
|
+
await applyWait(page, step.wait_for, focusedLocator, timeoutMs);
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
case 'wait':
|
|
283
|
+
await applyWait(page, step.wait_for, undefined, timeoutMs);
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Try each locator in priority order with a tight per-locator timeout.
|
|
290
|
+
* Filter to visible elements before .first() — many sites have hidden
|
|
291
|
+
* mirrors (e.g. a hidden native <select> alongside a custom dropdown).
|
|
292
|
+
*/
|
|
293
|
+
async function firstMatching(
|
|
294
|
+
page: Page,
|
|
295
|
+
locators: Locator[],
|
|
296
|
+
params: Record<string, string | number | boolean>,
|
|
297
|
+
timeoutMs: number,
|
|
298
|
+
): Promise<PWLocator> {
|
|
299
|
+
const probeMs = Math.max(1000, Math.floor(timeoutMs / Math.max(locators.length, 1)));
|
|
300
|
+
const errors: string[] = [];
|
|
301
|
+
for (const loc of locators) {
|
|
302
|
+
const visibleOnly = buildLocator(page, loc, params).locator('visible=true');
|
|
303
|
+
try {
|
|
304
|
+
await visibleOnly.first().waitFor({ state: 'visible', timeout: probeMs });
|
|
305
|
+
return visibleOnly.first();
|
|
306
|
+
} catch (err) {
|
|
307
|
+
errors.push(`${describeLocator(loc)}: ${errMsg(err)}`);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
throw new Error(`No locator matched. Tried:\n - ${errors.join('\n - ')}`);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
function buildLocator(
|
|
314
|
+
page: Page,
|
|
315
|
+
loc: Locator,
|
|
316
|
+
params: Record<string, string | number | boolean>,
|
|
317
|
+
): PWLocator {
|
|
318
|
+
switch (loc.by) {
|
|
319
|
+
case 'role': {
|
|
320
|
+
const opts = loc.name ? { name: loc.name } : undefined;
|
|
321
|
+
// biome-ignore lint/suspicious/noExplicitAny: Playwright's role enum is opaque
|
|
322
|
+
return page.getByRole(loc.value as any, opts);
|
|
323
|
+
}
|
|
324
|
+
case 'aria_label': {
|
|
325
|
+
if (loc.value !== undefined) return page.getByLabel(loc.value, { exact: true });
|
|
326
|
+
if (loc.value_pattern !== undefined) {
|
|
327
|
+
const pattern = subst(loc.value_pattern, params);
|
|
328
|
+
return page.locator(`[aria-label*="${escapeAttr(pattern)}" i]`);
|
|
329
|
+
}
|
|
330
|
+
throw new Error('aria_label locator requires value or value_pattern');
|
|
331
|
+
}
|
|
332
|
+
case 'text': {
|
|
333
|
+
if (loc.value !== undefined) return page.getByText(loc.value, { exact: true });
|
|
334
|
+
if (loc.value_pattern !== undefined) {
|
|
335
|
+
const pattern = subst(loc.value_pattern, params);
|
|
336
|
+
return page.getByText(new RegExp(escapeRegex(pattern), 'i'));
|
|
337
|
+
}
|
|
338
|
+
throw new Error('text locator requires value or value_pattern');
|
|
339
|
+
}
|
|
340
|
+
case 'id':
|
|
341
|
+
return page.locator(`#${cssEscape(loc.value)}`);
|
|
342
|
+
case 'css':
|
|
343
|
+
return page.locator(loc.value);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function describeLocator(loc: Locator): string {
|
|
348
|
+
switch (loc.by) {
|
|
349
|
+
case 'role':
|
|
350
|
+
return `role=${loc.value}${loc.name ? ` name="${loc.name}"` : ''}`;
|
|
351
|
+
case 'aria_label':
|
|
352
|
+
return `aria_label=${loc.value ?? loc.value_pattern}`;
|
|
353
|
+
case 'text':
|
|
354
|
+
return `text=${loc.value ?? loc.value_pattern}`;
|
|
355
|
+
case 'id':
|
|
356
|
+
return `id=${loc.value}`;
|
|
357
|
+
case 'css':
|
|
358
|
+
return `css=${loc.value}`;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
async function applyWait(
|
|
363
|
+
page: Page,
|
|
364
|
+
wait: WaitFor | undefined,
|
|
365
|
+
ctxLocator: PWLocator | undefined,
|
|
366
|
+
timeoutMs: number,
|
|
367
|
+
): Promise<void> {
|
|
368
|
+
if (!wait) return;
|
|
369
|
+
if (typeof wait === 'string') {
|
|
370
|
+
if (wait === 'networkidle' || wait === 'load') {
|
|
371
|
+
await page.waitForLoadState(wait, { timeout: timeoutMs });
|
|
372
|
+
} else if ((wait === 'visible' || wait === 'hidden') && ctxLocator) {
|
|
373
|
+
await ctxLocator.waitFor({ state: wait, timeout: timeoutMs });
|
|
374
|
+
}
|
|
375
|
+
return;
|
|
376
|
+
}
|
|
377
|
+
if ('xhr' in wait) {
|
|
378
|
+
const re = new RegExp(wait.xhr);
|
|
379
|
+
await page.waitForResponse(
|
|
380
|
+
(resp) => re.test(resp.url()) && (!wait.method || resp.request().method() === wait.method),
|
|
381
|
+
{ timeout: wait.timeout_ms ?? timeoutMs },
|
|
382
|
+
);
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
if ('sleep_ms' in wait) {
|
|
386
|
+
await page.waitForTimeout(wait.sleep_ms);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
async function extractResult(
|
|
391
|
+
page: Page,
|
|
392
|
+
result: PlaybookResult,
|
|
393
|
+
captured: Array<{ url: string; method: string; status: number; body: string | null }>,
|
|
394
|
+
): Promise<Record<string, unknown>> {
|
|
395
|
+
if (result.source === 'xhr') {
|
|
396
|
+
const re = new RegExp(result.url_pattern);
|
|
397
|
+
const matches = captured.filter(
|
|
398
|
+
(c) => re.test(c.url) && (!result.method || c.method === result.method) && c.body !== null,
|
|
399
|
+
);
|
|
400
|
+
const last = matches.at(-1);
|
|
401
|
+
if (!last || last.body === null) {
|
|
402
|
+
throw new Error(`No captured XHR matched ${result.url_pattern} (with a readable body)`);
|
|
403
|
+
}
|
|
404
|
+
if (last.status >= 400) {
|
|
405
|
+
const hint =
|
|
406
|
+
last.status === 403
|
|
407
|
+
? ' Likely bot detection — try --headed, or capture a fresh recording.'
|
|
408
|
+
: '';
|
|
409
|
+
throw new Error(
|
|
410
|
+
`Result XHR returned ${last.status} (${last.url}): ${last.body.slice(0, 300)}.${hint}`,
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
let parsed: unknown;
|
|
414
|
+
try {
|
|
415
|
+
parsed = JSON.parse(last.body);
|
|
416
|
+
} catch {
|
|
417
|
+
throw new Error(`Result XHR body was not JSON (${last.url}): ${last.body.slice(0, 200)}`);
|
|
418
|
+
}
|
|
419
|
+
// `*` returns the full parsed JSON unchanged — useful when the consumer
|
|
420
|
+
// (parser.ts, MCP caller) wants the rich object graph rather than just
|
|
421
|
+
// numeric leaves.
|
|
422
|
+
if (result.extract === '*') {
|
|
423
|
+
return { [result.return_as]: parsed, source_url: last.url };
|
|
424
|
+
}
|
|
425
|
+
return { [result.return_as]: extractAt(parsed, result.extract), source_url: last.url };
|
|
426
|
+
}
|
|
427
|
+
// dom source
|
|
428
|
+
const locator = await firstMatching(page, result.locators, {}, 5000);
|
|
429
|
+
const value =
|
|
430
|
+
result.extract === 'text'
|
|
431
|
+
? await locator.textContent()
|
|
432
|
+
: await locator.getAttribute(result.extract);
|
|
433
|
+
return { [result.return_as]: value };
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
/** Substitute ${X} or ${param.X} (we accept both for ergonomics). */
|
|
437
|
+
function subst(template: string, params: Record<string, string | number | boolean>): string {
|
|
438
|
+
const mapped = template.replace(/\$\{([a-zA-Z_][a-zA-Z0-9_]*)\}/g, '${param.$1}');
|
|
439
|
+
return substituteString(mapped, params, { site: '', cookies: [], values: {} }, []);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
function escapeAttr(s: string): string {
|
|
443
|
+
return s.replace(/"/g, '\\"');
|
|
444
|
+
}
|
|
445
|
+
function escapeRegex(s: string): string {
|
|
446
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
447
|
+
}
|
|
448
|
+
function cssEscape(s: string): string {
|
|
449
|
+
return s.replace(/([!"#$%&'()*+,./:;<=>?@[\\\]^`{|}~])/g, '\\$1');
|
|
450
|
+
}
|
|
451
|
+
function errMsg(e: unknown): string {
|
|
452
|
+
return e instanceof Error ? e.message : String(e);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/** Fallback for callers that don't pass opts.site explicitly.
|
|
456
|
+
* Only fires for the `<IMPRINT_HOME>/<site>/<tool>/playbook.yaml` layout. */
|
|
457
|
+
function inferSiteFromPath(playbookInput: string | Playbook): string | null {
|
|
458
|
+
if (typeof playbookInput !== 'string') return null;
|
|
459
|
+
const root = imprintHomeDir();
|
|
460
|
+
const target = pathResolve(playbookInput);
|
|
461
|
+
const relative = pathRelative(root, target);
|
|
462
|
+
if (relative.startsWith('..') || pathIsAbsolute(relative)) return null;
|
|
463
|
+
const [site] = relative.split('/');
|
|
464
|
+
return site || null;
|
|
465
|
+
}
|