autokap 1.8.9 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analytics-blocklist.d.ts +85 -0
- package/dist/analytics-blocklist.js +201 -0
- package/dist/browser-pool.d.ts +1 -0
- package/dist/browser-pool.js +10 -1
- package/dist/browser.js +25 -7
- package/dist/cli-contract.d.ts +2 -0
- package/dist/cli-runner.d.ts +7 -0
- package/dist/cli-runner.js +21 -11
- package/dist/cli.js +13 -0
- package/dist/execution-schema.d.ts +2 -0
- package/dist/execution-schema.js +6 -0
- package/dist/execution-types.d.ts +19 -0
- package/dist/log-collector.d.ts +5 -1
- package/dist/log-collector.js +2 -1
- package/dist/login-detection.d.ts +52 -0
- package/dist/login-detection.js +126 -0
- package/dist/opcode-actions.js +10 -9
- package/dist/opcode-runner.js +8 -0
- package/dist/program-signing.d.ts +1 -0
- package/dist/types.d.ts +7 -0
- package/dist/video-narration-schema.d.ts +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import type { BrowserContext } from 'playwright';
|
|
2
|
+
/**
|
|
3
|
+
* Privacy-signal request headers attached to every capture context.
|
|
4
|
+
*
|
|
5
|
+
* These are standard, widely-sent browser headers (Firefox sends `DNT`, Brave
|
|
6
|
+
* sends `Sec-GPC`), so they're invisible to origin anti-bot defenses — they
|
|
7
|
+
* only ever observe a normal first-party page load. Privacy-first analytics
|
|
8
|
+
* (Plausible, Fathom, …) honor them and suppress the pageview, which
|
|
9
|
+
* complements the network-level blocking below for the providers that respect
|
|
10
|
+
* the signal. Free, zero-risk belt-and-suspenders.
|
|
11
|
+
*/
|
|
12
|
+
export declare const PRIVACY_HEADERS: Record<string, string>;
|
|
13
|
+
/**
|
|
14
|
+
* Hostnames of DEDICATED web-analytics / product-telemetry / session-replay
|
|
15
|
+
* endpoints. These domains serve ONLY analytics, so aborting requests to them
|
|
16
|
+
* during a capture has zero functional impact on the page being screenshotted —
|
|
17
|
+
* it only prevents AutoKap's automated visit from registering as a phantom
|
|
18
|
+
* "visitor" in the site owner's analytics (AUT-234).
|
|
19
|
+
*
|
|
20
|
+
* Matched by exact host OR sub-domain suffix
|
|
21
|
+
* (`host === h || host.endsWith('.' + h)`), so regional shards
|
|
22
|
+
* (`eu.i.posthog.com`, `region1.google-analytics.com`, `*.matomo.cloud`, …) are
|
|
23
|
+
* covered without enumerating each one.
|
|
24
|
+
*
|
|
25
|
+
* Deliberately ABSENT: `googletagmanager.com`. GTM can inject functional tags,
|
|
26
|
+
* and loading `gtm.js` does NOT itself record a visit — the GA *beacon* to
|
|
27
|
+
* `google-analytics.com` does, and that host IS blocked. So we neutralize the
|
|
28
|
+
* GA visit without risking a broken page.
|
|
29
|
+
*
|
|
30
|
+
* Self-hosted / first-party-proxied analytics (e.g. Plausible reverse-proxied
|
|
31
|
+
* on the site's own domain) is intentionally NOT covered: it reads as
|
|
32
|
+
* first-party (see the `isFirstPartyUrl` guard in {@link installAnalyticsBlock})
|
|
33
|
+
* and is impossible to detect universally. That edge case is out of scope by
|
|
34
|
+
* design — catching it would depend on per-site configuration.
|
|
35
|
+
*/
|
|
36
|
+
export declare const ANALYTICS_HOSTS: readonly string[];
|
|
37
|
+
/**
|
|
38
|
+
* True when `url` targets a dedicated web-analytics *ingestion* endpoint (see
|
|
39
|
+
* {@link ANALYTICS_HOSTS}). Host-suffix aware so regional/sub-domain shards
|
|
40
|
+
* match their parent. Fail-CLOSED on unparseable or non-http(s) URLs (returns
|
|
41
|
+
* `false`): we never abort a request we can't confidently classify.
|
|
42
|
+
*
|
|
43
|
+
* For hosts that co-serve functional config on the same domain as their
|
|
44
|
+
* analytics (PostHog), only the event-capture paths count — feature-flag /
|
|
45
|
+
* config / library paths are preserved so the captured UI never changes
|
|
46
|
+
* (see {@link POSTHOG_FUNCTIONAL_PATH_RE}).
|
|
47
|
+
*/
|
|
48
|
+
export declare function isAnalyticsRequest(url: string): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* The per-request block decision, factored out of {@link installAnalyticsBlock}
|
|
51
|
+
* so the guard composition is unit-testable without a real browser context.
|
|
52
|
+
*
|
|
53
|
+
* Blocks ONLY a third-party analytics request. A first-party one — analytics
|
|
54
|
+
* self-hosted or reverse-proxied on the captured site's OWN domain — is
|
|
55
|
+
* preserved so we can never break the site's own functionality.
|
|
56
|
+
*
|
|
57
|
+
* `pageUrl` is the request's frame URL. When it's empty/unknown (a request in
|
|
58
|
+
* flight before the first navigation commits, a detached/teardown frame, some
|
|
59
|
+
* worker-originated requests) `isFirstPartyUrl` fail-OPENS to first-party, so
|
|
60
|
+
* the beacon is NOT aborted. That's the deliberate safe direction — never break
|
|
61
|
+
* a page — at the cost of a rare phantom-visit leak in that narrow window; real
|
|
62
|
+
* analytics beacons fire after navigation commits, so the frame URL is present.
|
|
63
|
+
*/
|
|
64
|
+
export declare function shouldBlockAnalyticsRequest(pageUrl: string, requestUrl: string): boolean;
|
|
65
|
+
/**
|
|
66
|
+
* Install a context-level route that aborts outgoing requests to dedicated
|
|
67
|
+
* third-party analytics endpoints, so capturing a site never registers a
|
|
68
|
+
* phantom "visit" in its analytics (AUT-234).
|
|
69
|
+
*
|
|
70
|
+
* Only THIRD-party analytics is blocked (the `!isFirstPartyUrl` guard): a
|
|
71
|
+
* first-party request is never aborted, so we can never break the captured
|
|
72
|
+
* site's own functionality. Aborting a third-party beacon is invisible to the
|
|
73
|
+
* origin's anti-bot (it only ever sees a normal first-party page load) — exactly
|
|
74
|
+
* what an ad-blocker does — so this carries no risk of tripping bot defenses.
|
|
75
|
+
*
|
|
76
|
+
* Registered at the CONTEXT level so it (a) covers every page/frame in the
|
|
77
|
+
* context, (b) survives `page.unrouteAll()` from `clearRouteInterception()`
|
|
78
|
+
* (which only clears page-level routes), and (c) composes with the page-level
|
|
79
|
+
* mock routes from `setupRouteInterception()` — page routes run first; this
|
|
80
|
+
* catch-all `fallback()`s every non-analytics request back to the network (or
|
|
81
|
+
* the next handler). Aborting also lowers in-flight count, which only helps
|
|
82
|
+
* `networkidle` settle. The adaptive-wait progress signal already ignores
|
|
83
|
+
* third-party traffic (AUT-240), so there's no interaction there.
|
|
84
|
+
*/
|
|
85
|
+
export declare function installAnalyticsBlock(context: BrowserContext): Promise<void>;
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { isFirstPartyUrl } from './security.js';
|
|
2
|
+
/**
|
|
3
|
+
* Privacy-signal request headers attached to every capture context.
|
|
4
|
+
*
|
|
5
|
+
* These are standard, widely-sent browser headers (Firefox sends `DNT`, Brave
|
|
6
|
+
* sends `Sec-GPC`), so they're invisible to origin anti-bot defenses — they
|
|
7
|
+
* only ever observe a normal first-party page load. Privacy-first analytics
|
|
8
|
+
* (Plausible, Fathom, …) honor them and suppress the pageview, which
|
|
9
|
+
* complements the network-level blocking below for the providers that respect
|
|
10
|
+
* the signal. Free, zero-risk belt-and-suspenders.
|
|
11
|
+
*/
|
|
12
|
+
export const PRIVACY_HEADERS = {
|
|
13
|
+
DNT: '1',
|
|
14
|
+
'Sec-GPC': '1',
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* Hostnames of DEDICATED web-analytics / product-telemetry / session-replay
|
|
18
|
+
* endpoints. These domains serve ONLY analytics, so aborting requests to them
|
|
19
|
+
* during a capture has zero functional impact on the page being screenshotted —
|
|
20
|
+
* it only prevents AutoKap's automated visit from registering as a phantom
|
|
21
|
+
* "visitor" in the site owner's analytics (AUT-234).
|
|
22
|
+
*
|
|
23
|
+
* Matched by exact host OR sub-domain suffix
|
|
24
|
+
* (`host === h || host.endsWith('.' + h)`), so regional shards
|
|
25
|
+
* (`eu.i.posthog.com`, `region1.google-analytics.com`, `*.matomo.cloud`, …) are
|
|
26
|
+
* covered without enumerating each one.
|
|
27
|
+
*
|
|
28
|
+
* Deliberately ABSENT: `googletagmanager.com`. GTM can inject functional tags,
|
|
29
|
+
* and loading `gtm.js` does NOT itself record a visit — the GA *beacon* to
|
|
30
|
+
* `google-analytics.com` does, and that host IS blocked. So we neutralize the
|
|
31
|
+
* GA visit without risking a broken page.
|
|
32
|
+
*
|
|
33
|
+
* Self-hosted / first-party-proxied analytics (e.g. Plausible reverse-proxied
|
|
34
|
+
* on the site's own domain) is intentionally NOT covered: it reads as
|
|
35
|
+
* first-party (see the `isFirstPartyUrl` guard in {@link installAnalyticsBlock})
|
|
36
|
+
* and is impossible to detect universally. That edge case is out of scope by
|
|
37
|
+
* design — catching it would depend on per-site configuration.
|
|
38
|
+
*/
|
|
39
|
+
export const ANALYTICS_HOSTS = [
|
|
40
|
+
// Google Analytics / GA4 / Universal Analytics collection endpoints
|
|
41
|
+
'google-analytics.com',
|
|
42
|
+
'analytics.google.com',
|
|
43
|
+
'ssl.google-analytics.com',
|
|
44
|
+
'region1.google-analytics.com',
|
|
45
|
+
'stats.g.doubleclick.net',
|
|
46
|
+
// Plausible
|
|
47
|
+
'plausible.io',
|
|
48
|
+
// PostHog — only its event-ingestion paths are blocked; feature-flag / config
|
|
49
|
+
// / library paths (/decide, /flags, /static, /array) are preserved so a
|
|
50
|
+
// flag-gated app never renders with default flags (see isAnalyticsRequest).
|
|
51
|
+
'posthog.com',
|
|
52
|
+
'i.posthog.com',
|
|
53
|
+
// Matomo / Piwik (cloud)
|
|
54
|
+
'matomo.cloud',
|
|
55
|
+
'matomo.org',
|
|
56
|
+
// Segment
|
|
57
|
+
'segment.io',
|
|
58
|
+
'segment.com',
|
|
59
|
+
// Mixpanel
|
|
60
|
+
'mixpanel.com',
|
|
61
|
+
'mxpnl.com',
|
|
62
|
+
// Amplitude
|
|
63
|
+
'amplitude.com',
|
|
64
|
+
// Heap
|
|
65
|
+
'heapanalytics.com',
|
|
66
|
+
'heap.io',
|
|
67
|
+
// Hotjar (heatmaps / session replay)
|
|
68
|
+
'hotjar.com',
|
|
69
|
+
'hotjar.io',
|
|
70
|
+
// Microsoft Clarity (session replay)
|
|
71
|
+
'clarity.ms',
|
|
72
|
+
// Cloudflare Web Analytics
|
|
73
|
+
'cloudflareinsights.com',
|
|
74
|
+
// Vercel Analytics / Speed Insights
|
|
75
|
+
'vercel-insights.com',
|
|
76
|
+
'va.vercel-scripts.com',
|
|
77
|
+
// Fathom
|
|
78
|
+
'usefathom.com',
|
|
79
|
+
// Session replay / heatmaps
|
|
80
|
+
'fullstory.com',
|
|
81
|
+
'mouseflow.com',
|
|
82
|
+
'crazyegg.com',
|
|
83
|
+
// Yandex Metrica
|
|
84
|
+
'mc.yandex.ru',
|
|
85
|
+
// Quantcast
|
|
86
|
+
'quantserve.com',
|
|
87
|
+
'quantcount.com',
|
|
88
|
+
// Adobe Analytics (Omniture)
|
|
89
|
+
'omtrdc.net',
|
|
90
|
+
'2o7.net',
|
|
91
|
+
// Misc product analytics
|
|
92
|
+
'pendo.io',
|
|
93
|
+
'woopra.com',
|
|
94
|
+
'kissmetrics.io',
|
|
95
|
+
];
|
|
96
|
+
const ANALYTICS_HOST_SET = new Set(ANALYTICS_HOSTS.map((h) => h.toLowerCase()));
|
|
97
|
+
/**
|
|
98
|
+
* Path prefixes on PostHog hosts that are NOT analytics ingestion: feature
|
|
99
|
+
* flags (`/decide`, `/flags`), the JS library and its remote config
|
|
100
|
+
* (`/static`, `/array`). PostHog co-serves these from the SAME host as its
|
|
101
|
+
* event capture, so blocking them would make a flag-gated app render with
|
|
102
|
+
* default flags during capture — a visible change to the captured UI. We only
|
|
103
|
+
* block the ingestion paths (`/e/`, `/i/v0/e/`, `/batch/`, `/capture/`, `/s/`),
|
|
104
|
+
* which is what records the phantom visit.
|
|
105
|
+
*/
|
|
106
|
+
const POSTHOG_FUNCTIONAL_PATH_RE = /^\/(decide|flags|static|array)\b/i;
|
|
107
|
+
function matchesAnalyticsHost(host) {
|
|
108
|
+
if (ANALYTICS_HOST_SET.has(host))
|
|
109
|
+
return true;
|
|
110
|
+
for (const h of ANALYTICS_HOST_SET) {
|
|
111
|
+
if (host.endsWith(`.${h}`))
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
function isPosthogHost(host) {
|
|
117
|
+
return host === 'posthog.com' || host.endsWith('.posthog.com');
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* True when `url` targets a dedicated web-analytics *ingestion* endpoint (see
|
|
121
|
+
* {@link ANALYTICS_HOSTS}). Host-suffix aware so regional/sub-domain shards
|
|
122
|
+
* match their parent. Fail-CLOSED on unparseable or non-http(s) URLs (returns
|
|
123
|
+
* `false`): we never abort a request we can't confidently classify.
|
|
124
|
+
*
|
|
125
|
+
* For hosts that co-serve functional config on the same domain as their
|
|
126
|
+
* analytics (PostHog), only the event-capture paths count — feature-flag /
|
|
127
|
+
* config / library paths are preserved so the captured UI never changes
|
|
128
|
+
* (see {@link POSTHOG_FUNCTIONAL_PATH_RE}).
|
|
129
|
+
*/
|
|
130
|
+
export function isAnalyticsRequest(url) {
|
|
131
|
+
let parsed;
|
|
132
|
+
try {
|
|
133
|
+
parsed = new URL(url);
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
139
|
+
return false;
|
|
140
|
+
const host = parsed.hostname.toLowerCase();
|
|
141
|
+
if (!matchesAnalyticsHost(host))
|
|
142
|
+
return false;
|
|
143
|
+
if (isPosthogHost(host) && POSTHOG_FUNCTIONAL_PATH_RE.test(parsed.pathname)) {
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* The per-request block decision, factored out of {@link installAnalyticsBlock}
|
|
150
|
+
* so the guard composition is unit-testable without a real browser context.
|
|
151
|
+
*
|
|
152
|
+
* Blocks ONLY a third-party analytics request. A first-party one — analytics
|
|
153
|
+
* self-hosted or reverse-proxied on the captured site's OWN domain — is
|
|
154
|
+
* preserved so we can never break the site's own functionality.
|
|
155
|
+
*
|
|
156
|
+
* `pageUrl` is the request's frame URL. When it's empty/unknown (a request in
|
|
157
|
+
* flight before the first navigation commits, a detached/teardown frame, some
|
|
158
|
+
* worker-originated requests) `isFirstPartyUrl` fail-OPENS to first-party, so
|
|
159
|
+
* the beacon is NOT aborted. That's the deliberate safe direction — never break
|
|
160
|
+
* a page — at the cost of a rare phantom-visit leak in that narrow window; real
|
|
161
|
+
* analytics beacons fire after navigation commits, so the frame URL is present.
|
|
162
|
+
*/
|
|
163
|
+
export function shouldBlockAnalyticsRequest(pageUrl, requestUrl) {
|
|
164
|
+
return isAnalyticsRequest(requestUrl) && !isFirstPartyUrl(pageUrl, requestUrl);
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Install a context-level route that aborts outgoing requests to dedicated
|
|
168
|
+
* third-party analytics endpoints, so capturing a site never registers a
|
|
169
|
+
* phantom "visit" in its analytics (AUT-234).
|
|
170
|
+
*
|
|
171
|
+
* Only THIRD-party analytics is blocked (the `!isFirstPartyUrl` guard): a
|
|
172
|
+
* first-party request is never aborted, so we can never break the captured
|
|
173
|
+
* site's own functionality. Aborting a third-party beacon is invisible to the
|
|
174
|
+
* origin's anti-bot (it only ever sees a normal first-party page load) — exactly
|
|
175
|
+
* what an ad-blocker does — so this carries no risk of tripping bot defenses.
|
|
176
|
+
*
|
|
177
|
+
* Registered at the CONTEXT level so it (a) covers every page/frame in the
|
|
178
|
+
* context, (b) survives `page.unrouteAll()` from `clearRouteInterception()`
|
|
179
|
+
* (which only clears page-level routes), and (c) composes with the page-level
|
|
180
|
+
* mock routes from `setupRouteInterception()` — page routes run first; this
|
|
181
|
+
* catch-all `fallback()`s every non-analytics request back to the network (or
|
|
182
|
+
* the next handler). Aborting also lowers in-flight count, which only helps
|
|
183
|
+
* `networkidle` settle. The adaptive-wait progress signal already ignores
|
|
184
|
+
* third-party traffic (AUT-240), so there's no interaction there.
|
|
185
|
+
*/
|
|
186
|
+
export async function installAnalyticsBlock(context) {
|
|
187
|
+
await context.route('**/*', (route) => {
|
|
188
|
+
const request = route.request();
|
|
189
|
+
const url = request.url();
|
|
190
|
+
// Derive the page origin from the request's own frame so analytics
|
|
191
|
+
// self-hosted on the captured site's domain reads as first-party and is
|
|
192
|
+
// left untouched. `'blockedbyclient'` mimics an ad-blocker (the page just
|
|
193
|
+
// sees a failed beacon, like every uBlock user).
|
|
194
|
+
const pageUrl = request.frame()?.url() || '';
|
|
195
|
+
if (shouldBlockAnalyticsRequest(pageUrl, url)) {
|
|
196
|
+
return route.abort('blockedbyclient').catch(() => undefined);
|
|
197
|
+
}
|
|
198
|
+
return route.fallback();
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
//# sourceMappingURL=analytics-blocklist.js.map
|
package/dist/browser-pool.d.ts
CHANGED
|
@@ -20,6 +20,7 @@ declare class BrowserPool {
|
|
|
20
20
|
colorScheme?: 'light' | 'dark';
|
|
21
21
|
storageState?: BrowserStorageState;
|
|
22
22
|
extraHttpHeaders?: Record<string, string>;
|
|
23
|
+
blockAnalytics?: boolean;
|
|
23
24
|
}): Promise<BrowserContext>;
|
|
24
25
|
/**
|
|
25
26
|
* Release a context back to the pool. Closes the context and unblocks
|
package/dist/browser-pool.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { chromium } from 'playwright';
|
|
2
|
+
import { installAnalyticsBlock, PRIVACY_HEADERS } from './analytics-blocklist.js';
|
|
2
3
|
/** Chromium flags for server-side headless operation (used by pool and standalone launches). */
|
|
3
4
|
export const CHROMIUM_ARGS = [
|
|
4
5
|
// Linux/Docker-only: required when running Chromium as root or with limited /dev/shm
|
|
@@ -74,8 +75,16 @@ class BrowserPool {
|
|
|
74
75
|
locale: options?.lang ? options.lang : 'en-US',
|
|
75
76
|
colorScheme: options?.colorScheme ?? 'light',
|
|
76
77
|
storageState: options?.storageState,
|
|
77
|
-
|
|
78
|
+
// Privacy signals first, then merge user/env auth headers (which win on
|
|
79
|
+
// any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
|
|
80
|
+
extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(extra ?? {}) },
|
|
78
81
|
});
|
|
82
|
+
// Block third-party analytics so pooled (server-side) captures don't
|
|
83
|
+
// register a phantom visit in the captured site's analytics (AUT-234).
|
|
84
|
+
// Skippable per-project via blockAnalytics === false.
|
|
85
|
+
if (options?.blockAnalytics !== false) {
|
|
86
|
+
await installAnalyticsBlock(context);
|
|
87
|
+
}
|
|
79
88
|
this.activeContexts++;
|
|
80
89
|
this.captureCount++;
|
|
81
90
|
return context;
|
package/dist/browser.js
CHANGED
|
@@ -6,6 +6,7 @@ import { join } from 'path';
|
|
|
6
6
|
import { DOM_QUIET_WINDOW_MS, GLOBAL_WAIT_CAP_MS, PIXEL_FALLBACK_DIFF_THRESHOLD, PIXEL_FALLBACK_MAX_PASSES, } from './wait-contract.js';
|
|
7
7
|
import { buildAKNodeRuntimeIndex, deriveInteractiveElementsFromAKTree, disambiguateFingerprint, focusAKTree, fingerprintAKNode, serializeAKTree, } from './ak-tree.js';
|
|
8
8
|
import { isFirstPartyUrl } from './security.js';
|
|
9
|
+
import { installAnalyticsBlock, PRIVACY_HEADERS } from './analytics-blocklist.js';
|
|
9
10
|
/**
|
|
10
11
|
* Set-of-Marks (SoM) annotation: overlays colored [N] badges on each visible
|
|
11
12
|
* interactive element so the vision model can reference elements by their badge index.
|
|
@@ -949,6 +950,7 @@ export class Browser {
|
|
|
949
950
|
colorScheme: options.colorScheme ?? 'light',
|
|
950
951
|
storageState: options.storageState,
|
|
951
952
|
extraHttpHeaders: options.extraHttpHeaders,
|
|
953
|
+
blockAnalytics: options.blockAnalytics,
|
|
952
954
|
});
|
|
953
955
|
instance.page = await instance.context.newPage();
|
|
954
956
|
instance.poolContext = true;
|
|
@@ -1068,9 +1070,9 @@ export class Browser {
|
|
|
1068
1070
|
locale: langToLocale(options.lang ?? 'en'),
|
|
1069
1071
|
colorScheme: options.colorScheme ?? 'light',
|
|
1070
1072
|
storageState: options.storageState,
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1073
|
+
// Privacy signals first, then merge user/env auth headers (which win on
|
|
1074
|
+
// any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
|
|
1075
|
+
extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(options.extraHttpHeaders ?? {}) },
|
|
1074
1076
|
};
|
|
1075
1077
|
// Dedicated browser process for clip capture. Not pooled because clip
|
|
1076
1078
|
// capture installs context-level init scripts (cursor overlay). Cloud Run
|
|
@@ -1109,6 +1111,13 @@ export class Browser {
|
|
|
1109
1111
|
});
|
|
1110
1112
|
instance.context = await instance.browser.newContext(contextOptions);
|
|
1111
1113
|
}
|
|
1114
|
+
// Block third-party analytics beacons so clip/video capture doesn't
|
|
1115
|
+
// register a phantom visit either (AUT-234). Context-level so it covers
|
|
1116
|
+
// every page in both the persistent (cloud) and incognito (local) paths.
|
|
1117
|
+
// Skippable per-project via blockAnalytics === false.
|
|
1118
|
+
if (options.blockAnalytics !== false) {
|
|
1119
|
+
await installAnalyticsBlock(instance.context);
|
|
1120
|
+
}
|
|
1112
1121
|
// Cloud Run only: inject the notranslate meta on every navigation so
|
|
1113
1122
|
// Chromium's translate UI never prompts. The --disable-features=Translate*
|
|
1114
1123
|
// launch flags are unreliable across Chromium versions (some translate
|
|
@@ -1244,6 +1253,9 @@ export class Browser {
|
|
|
1244
1253
|
args: CHROMIUM_ARGS,
|
|
1245
1254
|
});
|
|
1246
1255
|
this.context = await this.browser.newContext(this.buildContextOptions());
|
|
1256
|
+
if (this.options.blockAnalytics !== false) {
|
|
1257
|
+
await installAnalyticsBlock(this.context);
|
|
1258
|
+
}
|
|
1247
1259
|
this.page = await this.context.newPage();
|
|
1248
1260
|
this.attachDebugLifecycleListeners();
|
|
1249
1261
|
}
|
|
@@ -1333,6 +1345,9 @@ export class Browser {
|
|
|
1333
1345
|
this.context = null;
|
|
1334
1346
|
}
|
|
1335
1347
|
this.context = await this.browser.newContext(this.buildContextOptions());
|
|
1348
|
+
if (this.options.blockAnalytics !== false) {
|
|
1349
|
+
await installAnalyticsBlock(this.context);
|
|
1350
|
+
}
|
|
1336
1351
|
this.page = await this.context.newPage();
|
|
1337
1352
|
this.elementMap.clear();
|
|
1338
1353
|
this.attachDebugLifecycleListeners();
|
|
@@ -5631,10 +5646,11 @@ export class Browser {
|
|
|
5631
5646
|
async setLanguage(lang) {
|
|
5632
5647
|
const context = this.ensureContext();
|
|
5633
5648
|
const page = this.ensurePage();
|
|
5634
|
-
// `setExtraHTTPHeaders` REPLACES the header map — merge with the
|
|
5635
|
-
// environment-level auth headers so a SET_LOCALE opcode
|
|
5636
|
-
// them mid-run.
|
|
5649
|
+
// `setExtraHTTPHeaders` REPLACES the header map — merge with the privacy
|
|
5650
|
+
// signals and the environment-level auth headers so a SET_LOCALE opcode
|
|
5651
|
+
// doesn't strip them mid-run.
|
|
5637
5652
|
await context.setExtraHTTPHeaders({
|
|
5653
|
+
...PRIVACY_HEADERS,
|
|
5638
5654
|
...(this.options.extraHttpHeaders ?? {}),
|
|
5639
5655
|
'Accept-Language': lang,
|
|
5640
5656
|
});
|
|
@@ -5761,7 +5777,9 @@ export class Browser {
|
|
|
5761
5777
|
locale: langToLocale(this.options.lang ?? 'en'),
|
|
5762
5778
|
colorScheme: this.options.colorScheme ?? 'light',
|
|
5763
5779
|
storageState: this.options.storageState,
|
|
5764
|
-
|
|
5780
|
+
// Privacy signals first, then merge user/env auth headers (which win on
|
|
5781
|
+
// any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
|
|
5782
|
+
extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(extra ?? {}) },
|
|
5765
5783
|
};
|
|
5766
5784
|
}
|
|
5767
5785
|
}
|
package/dist/cli-contract.d.ts
CHANGED
|
@@ -117,6 +117,8 @@ export type ArtifactUploadObjectId = "screenshotRaw" | "screenshot" | "clipGif"
|
|
|
117
117
|
export interface ArtifactUploadMetadata {
|
|
118
118
|
presetId: string;
|
|
119
119
|
runId: string;
|
|
120
|
+
/** Per-invocation session id grouping every charge of one CLI run (migration 267). */
|
|
121
|
+
sessionId?: string | null;
|
|
120
122
|
variantId: string;
|
|
121
123
|
targetId?: string | null;
|
|
122
124
|
targetLabel?: string | null;
|
package/dist/cli-runner.d.ts
CHANGED
|
@@ -49,6 +49,13 @@ export interface CLIRunnerOptions {
|
|
|
49
49
|
* When `false`, skips the failed-run debug logs export (AUT-149).
|
|
50
50
|
*/
|
|
51
51
|
exportDebugLogs?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Shared invocation id for billing. A multi-preset invocation (`run --outdated`,
|
|
54
|
+
* `auto-recapture`) passes one sessionId for every preset so all their charges
|
|
55
|
+
* group into a single "CLI capture" entry (migration 267). Defaults to the
|
|
56
|
+
* run's own id for a single-preset invocation.
|
|
57
|
+
*/
|
|
58
|
+
sessionId?: string;
|
|
52
59
|
}
|
|
53
60
|
export interface CLIRunResult {
|
|
54
61
|
success: boolean;
|
package/dist/cli-runner.js
CHANGED
|
@@ -156,6 +156,10 @@ export async function runCapture(options) {
|
|
|
156
156
|
// here; we seed mockup.ts so the export pipeline can apply them locally.
|
|
157
157
|
seedDeviceConfigs(program.deviceConfigs ?? null);
|
|
158
158
|
const runId = randomUUID();
|
|
159
|
+
// A multi-preset invocation shares one sessionId so every preset's charges
|
|
160
|
+
// group into one "CLI capture" billing entry; a lone run groups under its own
|
|
161
|
+
// runId (migration 267).
|
|
162
|
+
const sessionId = options.sessionId ?? runId;
|
|
159
163
|
let videoAudioAssets;
|
|
160
164
|
let videoAudioAssetsByLocale;
|
|
161
165
|
try {
|
|
@@ -170,7 +174,7 @@ export async function runCapture(options) {
|
|
|
170
174
|
// durations + audio assets are only consumed on the upload path (signalVideoComplete), which a dry
|
|
171
175
|
// run never reaches, so skipping prep here is side-effect-free for dry.
|
|
172
176
|
if (!options.dryRun && !options.program && program.mediaMode === 'video') {
|
|
173
|
-
const prepareResult = await prepareVideoSpeechForRun(config, options.presetId, runId, options.regenerateTts ?? false);
|
|
177
|
+
const prepareResult = await prepareVideoSpeechForRun(config, options.presetId, runId, options.regenerateTts ?? false, sessionId);
|
|
174
178
|
if (!prepareResult.success) {
|
|
175
179
|
return { success: false, runId, error: prepareResult.error };
|
|
176
180
|
}
|
|
@@ -254,6 +258,7 @@ export async function runCapture(options) {
|
|
|
254
258
|
colorScheme: variant.theme,
|
|
255
259
|
storageState: program.preconditions.storageState,
|
|
256
260
|
extraHttpHeaders: program.environmentHttpHeaders,
|
|
261
|
+
blockAnalytics: program.blockAnalytics,
|
|
257
262
|
};
|
|
258
263
|
let recordingDir;
|
|
259
264
|
let browser;
|
|
@@ -308,7 +313,7 @@ export async function runCapture(options) {
|
|
|
308
313
|
message: 'saving captures',
|
|
309
314
|
});
|
|
310
315
|
const provenance = buildRunProvenance(program, schemaVersionOrigin);
|
|
311
|
-
const uploadOutcome = await uploadResults(config, program, runResult, runId, provenance);
|
|
316
|
+
const uploadOutcome = await uploadResults(config, program, runResult, runId, sessionId, provenance);
|
|
312
317
|
if (program.mediaMode === 'video' && runResult.success) {
|
|
313
318
|
await signalVideoComplete(config, program, runResult, uploadOutcome.runId, videoAudioAssets, videoAudioAssetsByLocale);
|
|
314
319
|
}
|
|
@@ -356,7 +361,7 @@ export async function runCapture(options) {
|
|
|
356
361
|
&& (runResult ? !runResult.success : true);
|
|
357
362
|
if (shouldExport) {
|
|
358
363
|
logger.info('[debug-logs] Exporting debug logs to AutoKap…');
|
|
359
|
-
await logCollector.flushTo(runId, program.presetId, config.apiBaseUrl, config.apiKey, options.env);
|
|
364
|
+
await logCollector.flushTo(runId, program.presetId, config.apiBaseUrl, config.apiKey, options.env, runResult?.failureKind);
|
|
360
365
|
}
|
|
361
366
|
logCollector.stop();
|
|
362
367
|
}
|
|
@@ -430,7 +435,7 @@ async function fetchProgram(config, presetId, environmentName) {
|
|
|
430
435
|
}
|
|
431
436
|
return { success: false, error: 'failed to fetch program: retry attempts exhausted' };
|
|
432
437
|
}
|
|
433
|
-
async function prepareVideoSpeechForRun(config, videoId, runId, regenerateTts) {
|
|
438
|
+
async function prepareVideoSpeechForRun(config, videoId, runId, regenerateTts, sessionId) {
|
|
434
439
|
if (regenerateTts) {
|
|
435
440
|
logger.info('[capture] Forcing TTS regeneration — all cached segments will be re-synthesized and billed.');
|
|
436
441
|
}
|
|
@@ -445,7 +450,9 @@ async function prepareVideoSpeechForRun(config, videoId, runId, regenerateTts) {
|
|
|
445
450
|
'Content-Type': 'application/json',
|
|
446
451
|
[CLI_VERSION_HEADER]: APP_VERSION,
|
|
447
452
|
},
|
|
448
|
-
body: JSON.stringify(regenerateTts
|
|
453
|
+
body: JSON.stringify(regenerateTts
|
|
454
|
+
? { videoId, runId, sessionId, regenerateTts: true }
|
|
455
|
+
: { videoId, runId, sessionId }),
|
|
449
456
|
});
|
|
450
457
|
}
|
|
451
458
|
catch (err) {
|
|
@@ -686,7 +693,7 @@ async function postRunStart(config, runId, presetId, variantCount, env) {
|
|
|
686
693
|
logger.warn(`[capture] Run registration error: ${message}`);
|
|
687
694
|
}
|
|
688
695
|
}
|
|
689
|
-
async function uploadResults(config, program, result, runId, provenance) {
|
|
696
|
+
async function uploadResults(config, program, result, runId, sessionId, provenance) {
|
|
690
697
|
const artifactJobs = result.variantResults.flatMap((variant) => {
|
|
691
698
|
const variantSpec = program.variants.find((entry) => entry.id === variant.variantId);
|
|
692
699
|
return variant.artifacts.map((artifact) => ({
|
|
@@ -701,7 +708,7 @@ async function uploadResults(config, program, result, runId, provenance) {
|
|
|
701
708
|
logger.info(`[capture] Uploading ${totalArtifacts} capture artifacts with concurrency ${artifactUploadConcurrency}`);
|
|
702
709
|
}
|
|
703
710
|
await runWithConcurrency(artifactJobs, artifactUploadConcurrency, async (job, index) => {
|
|
704
|
-
await uploadArtifact(config, program, runId, totalArtifacts, index + 1, job, provenance);
|
|
711
|
+
await uploadArtifact(config, program, runId, sessionId, totalArtifacts, index + 1, job, provenance);
|
|
705
712
|
});
|
|
706
713
|
// Strip binary buffers from artifacts before sending. The raw PNG/video
|
|
707
714
|
// buffers were already uploaded via /api/cli/artifacts above, and the
|
|
@@ -862,18 +869,19 @@ function inferVariantLocale(variantId) {
|
|
|
862
869
|
function inferVariantTheme(variantId) {
|
|
863
870
|
return variantId.endsWith('-dark') ? 'dark' : 'light';
|
|
864
871
|
}
|
|
865
|
-
async function uploadArtifact(config, program, runId, totalArtifacts, uploadNumber, job, provenance) {
|
|
872
|
+
async function uploadArtifact(config, program, runId, sessionId, totalArtifacts, uploadNumber, job, provenance) {
|
|
866
873
|
const { artifact, variant, variantSpec } = job;
|
|
867
874
|
const filename = buildArtifactFilename(program.presetId, variant.variantId, artifact);
|
|
868
875
|
const label = artifact.captureName ?? artifact.clipName ?? filename;
|
|
869
876
|
logger.info(`[capture] Exporting capture ${uploadNumber}/${totalArtifacts}: ${label}`);
|
|
870
877
|
if (process.env.AUTOKAP_USE_LEGACY_MULTIPART_UPLOADS === '1') {
|
|
871
|
-
await uploadArtifactMultipart(config, program, runId, job, filename, provenance);
|
|
878
|
+
await uploadArtifactMultipart(config, program, runId, sessionId, job, filename, provenance);
|
|
872
879
|
return;
|
|
873
880
|
}
|
|
874
881
|
const prepared = await prepareDirectArtifactUpload({
|
|
875
882
|
program,
|
|
876
883
|
runId,
|
|
884
|
+
sessionId,
|
|
877
885
|
artifact,
|
|
878
886
|
variant,
|
|
879
887
|
variantSpec,
|
|
@@ -938,7 +946,7 @@ async function uploadArtifact(config, program, runId, totalArtifacts, uploadNumb
|
|
|
938
946
|
throw new Error(`artifact completion failed for ${variant.variantId}: ${await formatServerError(completeResponse, completeUrl)}`);
|
|
939
947
|
}
|
|
940
948
|
}
|
|
941
|
-
async function uploadArtifactMultipart(config, program, runId, job, filename, provenance) {
|
|
949
|
+
async function uploadArtifactMultipart(config, program, runId, sessionId, job, filename, provenance) {
|
|
942
950
|
const { artifact, variant, variantSpec } = job;
|
|
943
951
|
const formData = new FormData();
|
|
944
952
|
formData.append('file', new Blob([new Uint8Array(artifact.buffer)], { type: artifact.mimeType }), filename);
|
|
@@ -952,6 +960,7 @@ async function uploadArtifactMultipart(config, program, runId, job, filename, pr
|
|
|
952
960
|
formData.append('cliVersion', provenance.cliVersion);
|
|
953
961
|
formData.append('programHash', provenance.programHash);
|
|
954
962
|
formData.append('runId', runId);
|
|
963
|
+
formData.append('sessionId', sessionId);
|
|
955
964
|
formData.append('variantId', variant.variantId);
|
|
956
965
|
formData.append('targetId', variantSpec?.targetId ?? variant.variantId);
|
|
957
966
|
formData.append('targetLabel', variantSpec?.targetLabel ?? variantSpec?.deviceFrame ?? variant.variantId);
|
|
@@ -1023,7 +1032,7 @@ async function uploadArtifactMultipart(config, program, runId, job, filename, pr
|
|
|
1023
1032
|
}
|
|
1024
1033
|
}
|
|
1025
1034
|
async function prepareDirectArtifactUpload(params) {
|
|
1026
|
-
const { program, runId, artifact, variant, variantSpec, provenance } = params;
|
|
1035
|
+
const { program, runId, sessionId, artifact, variant, variantSpec, provenance } = params;
|
|
1027
1036
|
const requestedDeviceScaleFactor = variantSpec?.deviceScaleFactor ?? program.outputScale ?? 2;
|
|
1028
1037
|
const isFrameCapture = artifact.mediaMode === 'clip' || artifact.mediaMode === 'video';
|
|
1029
1038
|
const deviceScaleFactor = isFrameCapture && Number.isFinite(requestedDeviceScaleFactor)
|
|
@@ -1040,6 +1049,7 @@ async function prepareDirectArtifactUpload(params) {
|
|
|
1040
1049
|
cliVersion: provenance.cliVersion,
|
|
1041
1050
|
programHash: provenance.programHash,
|
|
1042
1051
|
runId,
|
|
1052
|
+
sessionId,
|
|
1043
1053
|
variantId: variant.variantId,
|
|
1044
1054
|
targetId: variantSpec?.targetId ?? variant.variantId,
|
|
1045
1055
|
targetLabel: variantSpec?.targetLabel ?? variantSpec?.deviceFrame ?? variant.variantId,
|
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Command } from 'commander';
|
|
3
3
|
import { createRequire } from 'node:module';
|
|
4
|
+
import { randomUUID } from 'node:crypto';
|
|
4
5
|
import path from 'node:path';
|
|
5
6
|
import fs from 'node:fs/promises';
|
|
6
7
|
const require = createRequire(import.meta.url);
|
|
@@ -137,6 +138,9 @@ async function runOutdatedPresetsLocally(opts) {
|
|
|
137
138
|
const { runCapture } = await import('./cli-runner.js');
|
|
138
139
|
const failures = [];
|
|
139
140
|
logger.info(`[capture] Running ${data.presets.length} outdated preset(s)`);
|
|
141
|
+
// One session id for the whole invocation so every preset's screenshot/clip/
|
|
142
|
+
// video charges group into a single "CLI capture" billing entry (migration 267).
|
|
143
|
+
const sessionId = randomUUID();
|
|
140
144
|
for (const preset of data.presets) {
|
|
141
145
|
const label = preset.name ? `${preset.name} (${preset.id})` : preset.id;
|
|
142
146
|
logger.info(`[capture] Running outdated preset ${label}`);
|
|
@@ -147,6 +151,7 @@ async function runOutdatedPresetsLocally(opts) {
|
|
|
147
151
|
allowUploadFailure: opts.allowUploadFailure,
|
|
148
152
|
dryRun: opts.dry,
|
|
149
153
|
regenerateTts: opts.regenerateTts,
|
|
154
|
+
sessionId,
|
|
150
155
|
});
|
|
151
156
|
if (!result.success) {
|
|
152
157
|
failures.push({
|
|
@@ -397,6 +402,10 @@ program
|
|
|
397
402
|
failedPresets: 0,
|
|
398
403
|
message: `Runner started: ${data.presets.length} preset(s) to capture`,
|
|
399
404
|
});
|
|
405
|
+
// One session id for the whole invocation so every preset's charges group
|
|
406
|
+
// into a single "CLI capture" billing entry (migration 267). For cloud runs
|
|
407
|
+
// child artifact billing is suppressed server-side, so this is a no-op there.
|
|
408
|
+
const sessionId = randomUUID();
|
|
400
409
|
for (const [index, preset] of data.presets.entries()) {
|
|
401
410
|
const presetDisplayName = displayPresetName(preset);
|
|
402
411
|
const label = preset.name ? `${preset.name} (${preset.id})` : preset.id;
|
|
@@ -417,6 +426,7 @@ program
|
|
|
417
426
|
headed: opts.headed,
|
|
418
427
|
allowUploadFailure: opts.allowUploadFailure,
|
|
419
428
|
regenerateTts: opts.regenerateTts,
|
|
429
|
+
sessionId,
|
|
420
430
|
// Each preset runs under its own ephemeral runId, which is NOT a
|
|
421
431
|
// capture_runs row in a cloud batch (the parent cloud run owns the row),
|
|
422
432
|
// so the per-preset error-log export would 404. Failure telemetry for
|
|
@@ -457,6 +467,9 @@ program
|
|
|
457
467
|
childRunId,
|
|
458
468
|
status: 'failed',
|
|
459
469
|
errorMessage: error,
|
|
470
|
+
...(result.runResult?.failureKind
|
|
471
|
+
? { failureKind: result.runResult.failureKind }
|
|
472
|
+
: {}),
|
|
460
473
|
message: `Preset failed: ${presetDisplayName}`,
|
|
461
474
|
});
|
|
462
475
|
}
|
|
@@ -2233,6 +2233,7 @@ export declare const ExecutionProgramSchema: z.ZodObject<{
|
|
|
2233
2233
|
deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
2234
2234
|
publicUrl: z.ZodOptional<z.ZodString>;
|
|
2235
2235
|
environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
2236
|
+
blockAnalytics: z.ZodOptional<z.ZodBoolean>;
|
|
2236
2237
|
}, z.core.$strict>;
|
|
2237
2238
|
export declare const HealerPatchSchema: z.ZodObject<{
|
|
2238
2239
|
opcodeIndex: z.ZodNumber;
|
|
@@ -4936,6 +4937,7 @@ export declare function safeParseProgramResult(data: unknown): z.ZodSafeParseRes
|
|
|
4936
4937
|
deviceConfigs?: Record<string, Record<string, unknown>> | undefined;
|
|
4937
4938
|
publicUrl?: string | undefined;
|
|
4938
4939
|
environmentHttpHeaders?: Record<string, string> | undefined;
|
|
4940
|
+
blockAnalytics?: boolean | undefined;
|
|
4939
4941
|
}>;
|
|
4940
4942
|
export interface ClipNavigationViolation {
|
|
4941
4943
|
/** Index of the offending NAVIGATE opcode in `program.steps`. */
|
package/dist/execution-schema.js
CHANGED
|
@@ -682,6 +682,12 @@ export const ExecutionProgramSchema = z.object({
|
|
|
682
682
|
// pairs that Playwright will inject as `extraHTTPHeaders` on the
|
|
683
683
|
// BrowserContext so protected staging/preview URLs load successfully.
|
|
684
684
|
environmentHttpHeaders: z.record(z.string().min(1), z.string().min(1)).optional(),
|
|
685
|
+
// Per-project opt-out for analytics blocking (AUT-234). Optional and WITHOUT a
|
|
686
|
+
// Zod default for the same signing reason as `programSchemaVersion` above:
|
|
687
|
+
// this schema is reused in signature verification, so a default would mutate
|
|
688
|
+
// the signed payload and break symmetry for programs signed without the field.
|
|
689
|
+
// Absent / true ⇒ block (engine default); only an explicit false disables.
|
|
690
|
+
blockAnalytics: z.boolean().optional(),
|
|
685
691
|
}).strict().superRefine((value, ctx) => {
|
|
686
692
|
if (value.mediaMode !== value.artifactPlan.mediaMode) {
|
|
687
693
|
ctx.addIssue({
|
|
@@ -614,6 +614,14 @@ export interface ExecutionProgram {
|
|
|
614
614
|
* and embedded in the signed program envelope.
|
|
615
615
|
*/
|
|
616
616
|
environmentHttpHeaders?: Record<string, string>;
|
|
617
|
+
/**
|
|
618
|
+
* Per-project opt-out for third-party analytics blocking (AUT-234). Default
|
|
619
|
+
* behavior (field absent / `true`) blocks analytics beacons during capture so
|
|
620
|
+
* a run never registers a phantom "visit". Set to `false` only when the
|
|
621
|
+
* project disabled it (`projects.block_analytics_enabled = false`). Server-set
|
|
622
|
+
* BEFORE signing, so it lives inside the signed envelope.
|
|
623
|
+
*/
|
|
624
|
+
blockAnalytics?: boolean;
|
|
617
625
|
}
|
|
618
626
|
export interface CircuitBreakerConfig {
|
|
619
627
|
/** Max recovery attempts per opcode. Default: 3 */
|
|
@@ -654,6 +662,13 @@ export interface OpcodeResult {
|
|
|
654
662
|
/** Error message if failed */
|
|
655
663
|
error?: string;
|
|
656
664
|
}
|
|
665
|
+
/**
|
|
666
|
+
* Structured failure category, set on top of the free-text `error`. Lets the
|
|
667
|
+
* server surface a specific preset state instead of a generic "failed":
|
|
668
|
+
* `login_failed` = an opcode inside the login window (credential typing → first
|
|
669
|
+
* post-login assertion) failed, so the credentials are likely wrong.
|
|
670
|
+
*/
|
|
671
|
+
export type RunFailureKind = 'login_failed';
|
|
657
672
|
export interface VariantResult {
|
|
658
673
|
variantId: string;
|
|
659
674
|
success: boolean;
|
|
@@ -670,6 +685,8 @@ export interface VariantResult {
|
|
|
670
685
|
*/
|
|
671
686
|
detectedAppVersion?: string | null;
|
|
672
687
|
error?: string;
|
|
688
|
+
/** Set when the failure falls inside the login window — see RunFailureKind. */
|
|
689
|
+
failureKind?: RunFailureKind;
|
|
673
690
|
}
|
|
674
691
|
export interface ArtifactResult {
|
|
675
692
|
mediaMode: MediaMode;
|
|
@@ -821,6 +838,8 @@ export interface RunResult {
|
|
|
821
838
|
*/
|
|
822
839
|
warnings?: string[];
|
|
823
840
|
error?: string;
|
|
841
|
+
/** First non-null variant `failureKind` — see RunFailureKind. */
|
|
842
|
+
failureKind?: RunFailureKind;
|
|
824
843
|
}
|
|
825
844
|
export interface WaitCondition {
|
|
826
845
|
selector: string;
|
package/dist/log-collector.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { LogEntry } from './logger.js';
|
|
2
2
|
import type { ProgressEvent } from './opcode-runner.js';
|
|
3
|
+
import type { RunFailureKind } from './execution-types.js';
|
|
3
4
|
export interface OpcodeContext {
|
|
4
5
|
index: number;
|
|
5
6
|
kind: string;
|
|
@@ -24,6 +25,9 @@ export interface ErrorLogsPayload {
|
|
|
24
25
|
entries: ExportedLogEntry[];
|
|
25
26
|
envName?: string;
|
|
26
27
|
endedAt: string;
|
|
28
|
+
/** Structured failure category (e.g. 'login_failed'). Lets the server tag the
|
|
29
|
+
* preset's capture_failed event with a specific kind. */
|
|
30
|
+
failureKind?: RunFailureKind;
|
|
27
31
|
}
|
|
28
32
|
export declare class LogCollector {
|
|
29
33
|
private entries;
|
|
@@ -35,7 +39,7 @@ export declare class LogCollector {
|
|
|
35
39
|
onProgress(event: ProgressEvent): void;
|
|
36
40
|
snapshot(): ExportedLogEntry[];
|
|
37
41
|
size(): number;
|
|
38
|
-
flushTo(runId: string, presetId: string, apiBaseUrl: string, apiKey: string, envName?: string): Promise<{
|
|
42
|
+
flushTo(runId: string, presetId: string, apiBaseUrl: string, apiKey: string, envName?: string, failureKind?: RunFailureKind): Promise<{
|
|
39
43
|
ok: boolean;
|
|
40
44
|
status?: number;
|
|
41
45
|
error?: string;
|
package/dist/log-collector.js
CHANGED
|
@@ -61,7 +61,7 @@ export class LogCollector {
|
|
|
61
61
|
size() {
|
|
62
62
|
return this.entries.length;
|
|
63
63
|
}
|
|
64
|
-
async flushTo(runId, presetId, apiBaseUrl, apiKey, envName) {
|
|
64
|
+
async flushTo(runId, presetId, apiBaseUrl, apiKey, envName, failureKind) {
|
|
65
65
|
if (this.entries.length === 0) {
|
|
66
66
|
return { ok: true, status: 204 };
|
|
67
67
|
}
|
|
@@ -71,6 +71,7 @@ export class LogCollector {
|
|
|
71
71
|
entries: this.entries,
|
|
72
72
|
envName,
|
|
73
73
|
endedAt: new Date().toISOString(),
|
|
74
|
+
...(failureKind ? { failureKind } : {}),
|
|
74
75
|
};
|
|
75
76
|
const controller = new AbortController();
|
|
76
77
|
const timeout = setTimeout(() => controller.abort(), FLUSH_TIMEOUT_MS);
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Login detection — pure, dependency-light helpers shared by the opcode runner
|
|
3
|
+
* (to classify a failure as login-related) and credential substitution
|
|
4
|
+
* (opcode-actions.ts owns the actual `{{token}}` replacement).
|
|
5
|
+
*
|
|
6
|
+
* A program "logs in" when it types credentials: a TYPE opcode whose text (or a
|
|
7
|
+
* locale override) contains the `{{email}}` / `{{password}}` placeholder. The
|
|
8
|
+
* server resolves these placeholders at capture time from the preset's linked
|
|
9
|
+
* credentials account; the stored program only ever holds the placeholders.
|
|
10
|
+
*
|
|
11
|
+
* This module imports ONLY types so it stays safe to reference from any layer.
|
|
12
|
+
*/
|
|
13
|
+
import type { ExecutionOpcode } from './execution-types.js';
|
|
14
|
+
/** The credential placeholder tokens — the contract between the authored
|
|
15
|
+
* program and the server-side substitution. Single source of truth. */
|
|
16
|
+
export declare const CREDENTIAL_TOKEN_EMAIL = "{{email}}";
|
|
17
|
+
export declare const CREDENTIAL_TOKEN_PASSWORD = "{{password}}";
|
|
18
|
+
export declare const CREDENTIAL_TOKEN_LOGIN_URL = "{{loginUrl}}";
|
|
19
|
+
/**
|
|
20
|
+
* Which credential fields the program actually requires. Lets a caller compare
|
|
21
|
+
* against the linked account's available fields (has_email / has_password)
|
|
22
|
+
* without decrypting anything.
|
|
23
|
+
*/
|
|
24
|
+
export declare function programRequiredCredentialFields(steps: ExecutionOpcode[]): {
|
|
25
|
+
email: boolean;
|
|
26
|
+
password: boolean;
|
|
27
|
+
};
|
|
28
|
+
/** True when the program logs in (types an email or password). */
|
|
29
|
+
export declare function programRequiresLogin(steps: ExecutionOpcode[]): boolean;
|
|
30
|
+
/**
|
|
31
|
+
* The contiguous index range that constitutes the login flow:
|
|
32
|
+
* - `start` = first credential-typing opcode,
|
|
33
|
+
* - `end` = first post-login assertion after the last credential opcode —
|
|
34
|
+
* EITHER a standalone assertion opcode (ASSERT_ROUTE /
|
|
35
|
+
* ASSERT_SURFACE / WAIT_FOR) OR, as the generator actually emits,
|
|
36
|
+
* the submit CLICK whose own postcondition asserts the post-login
|
|
37
|
+
* route/element (route_matches / element_visible) — else the last
|
|
38
|
+
* credential opcode itself.
|
|
39
|
+
*
|
|
40
|
+
* Scanning stops at that FIRST asserting opcode, so the window covers the
|
|
41
|
+
* credential typing and the submit (which proves login) but does NOT extend
|
|
42
|
+
* across the post-login navigation. A failure anywhere in `[start, end]` means
|
|
43
|
+
* the login did not go through: a TYPE failing (form gone/changed), the submit
|
|
44
|
+
* failing, or its post-login assertion failing (still on /login). Returns null
|
|
45
|
+
* when the program does not log in.
|
|
46
|
+
*/
|
|
47
|
+
export declare function getLoginWindow(steps: ExecutionOpcode[]): {
|
|
48
|
+
start: number;
|
|
49
|
+
end: number;
|
|
50
|
+
} | null;
|
|
51
|
+
/** True when a failure at `failedIndex` falls inside the login window. */
|
|
52
|
+
export declare function isLoginFailureIndex(steps: ExecutionOpcode[], failedIndex: number): boolean;
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Login detection — pure, dependency-light helpers shared by the opcode runner
|
|
3
|
+
* (to classify a failure as login-related) and credential substitution
|
|
4
|
+
* (opcode-actions.ts owns the actual `{{token}}` replacement).
|
|
5
|
+
*
|
|
6
|
+
* A program "logs in" when it types credentials: a TYPE opcode whose text (or a
|
|
7
|
+
* locale override) contains the `{{email}}` / `{{password}}` placeholder. The
|
|
8
|
+
* server resolves these placeholders at capture time from the preset's linked
|
|
9
|
+
* credentials account; the stored program only ever holds the placeholders.
|
|
10
|
+
*
|
|
11
|
+
* This module imports ONLY types so it stays safe to reference from any layer.
|
|
12
|
+
*/
|
|
13
|
+
/** The credential placeholder tokens — the contract between the authored
|
|
14
|
+
* program and the server-side substitution. Single source of truth. */
|
|
15
|
+
export const CREDENTIAL_TOKEN_EMAIL = '{{email}}';
|
|
16
|
+
export const CREDENTIAL_TOKEN_PASSWORD = '{{password}}';
|
|
17
|
+
export const CREDENTIAL_TOKEN_LOGIN_URL = '{{loginUrl}}';
|
|
18
|
+
/** Opcode kinds that prove a login advanced past the form: an explicit route /
|
|
19
|
+
* surface assertion, or a wait for a post-login element. The first such opcode
|
|
20
|
+
* after the credential typing closes the "login window". */
|
|
21
|
+
const POSTLOGIN_ASSERTION_KINDS = new Set([
|
|
22
|
+
'ASSERT_ROUTE',
|
|
23
|
+
'ASSERT_SURFACE',
|
|
24
|
+
'WAIT_FOR',
|
|
25
|
+
]);
|
|
26
|
+
/** Postcondition types that assert the login advanced past the form. The
|
|
27
|
+
* canonical generated login flow does NOT emit a standalone assertion opcode —
|
|
28
|
+
* it encodes the post-login check on the submit CLICK's own postcondition
|
|
29
|
+
* (route_matches the post-login route, or element_visible a post-login
|
|
30
|
+
* element). The first opcode after the credentials whose postcondition is one
|
|
31
|
+
* of these closes the window too, so the submit click is inside it. */
|
|
32
|
+
const POSTLOGIN_POSTCONDITION_TYPES = new Set([
|
|
33
|
+
'route_matches',
|
|
34
|
+
'element_visible',
|
|
35
|
+
]);
|
|
36
|
+
/** Every text field of an opcode that may carry a credential placeholder. */
|
|
37
|
+
function credentialTexts(opcode) {
|
|
38
|
+
if (opcode.kind === 'TYPE') {
|
|
39
|
+
const texts = [opcode.text];
|
|
40
|
+
if (opcode.textByLocale)
|
|
41
|
+
texts.push(...Object.values(opcode.textByLocale));
|
|
42
|
+
return texts;
|
|
43
|
+
}
|
|
44
|
+
if (opcode.kind === 'NAVIGATE')
|
|
45
|
+
return [opcode.url];
|
|
46
|
+
return [];
|
|
47
|
+
}
|
|
48
|
+
/** True when the opcode types/uses email or password credentials. */
|
|
49
|
+
function isCredentialOpcode(opcode) {
|
|
50
|
+
return credentialTexts(opcode).some((text) => typeof text === 'string' &&
|
|
51
|
+
(text.includes(CREDENTIAL_TOKEN_EMAIL) || text.includes(CREDENTIAL_TOKEN_PASSWORD)));
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Which credential fields the program actually requires. Lets a caller compare
|
|
55
|
+
* against the linked account's available fields (has_email / has_password)
|
|
56
|
+
* without decrypting anything.
|
|
57
|
+
*/
|
|
58
|
+
export function programRequiredCredentialFields(steps) {
|
|
59
|
+
let email = false;
|
|
60
|
+
let password = false;
|
|
61
|
+
for (const opcode of steps) {
|
|
62
|
+
for (const text of credentialTexts(opcode)) {
|
|
63
|
+
if (typeof text !== 'string')
|
|
64
|
+
continue;
|
|
65
|
+
if (text.includes(CREDENTIAL_TOKEN_EMAIL))
|
|
66
|
+
email = true;
|
|
67
|
+
if (text.includes(CREDENTIAL_TOKEN_PASSWORD))
|
|
68
|
+
password = true;
|
|
69
|
+
}
|
|
70
|
+
if (email && password)
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
return { email, password };
|
|
74
|
+
}
|
|
75
|
+
/** True when the program logs in (types an email or password). */
|
|
76
|
+
export function programRequiresLogin(steps) {
|
|
77
|
+
const { email, password } = programRequiredCredentialFields(steps);
|
|
78
|
+
return email || password;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* The contiguous index range that constitutes the login flow:
|
|
82
|
+
* - `start` = first credential-typing opcode,
|
|
83
|
+
* - `end` = first post-login assertion after the last credential opcode —
|
|
84
|
+
* EITHER a standalone assertion opcode (ASSERT_ROUTE /
|
|
85
|
+
* ASSERT_SURFACE / WAIT_FOR) OR, as the generator actually emits,
|
|
86
|
+
* the submit CLICK whose own postcondition asserts the post-login
|
|
87
|
+
* route/element (route_matches / element_visible) — else the last
|
|
88
|
+
* credential opcode itself.
|
|
89
|
+
*
|
|
90
|
+
* Scanning stops at that FIRST asserting opcode, so the window covers the
|
|
91
|
+
* credential typing and the submit (which proves login) but does NOT extend
|
|
92
|
+
* across the post-login navigation. A failure anywhere in `[start, end]` means
|
|
93
|
+
* the login did not go through: a TYPE failing (form gone/changed), the submit
|
|
94
|
+
* failing, or its post-login assertion failing (still on /login). Returns null
|
|
95
|
+
* when the program does not log in.
|
|
96
|
+
*/
|
|
97
|
+
export function getLoginWindow(steps) {
|
|
98
|
+
let start = -1;
|
|
99
|
+
let lastCred = -1;
|
|
100
|
+
for (let i = 0; i < steps.length; i++) {
|
|
101
|
+
if (isCredentialOpcode(steps[i])) {
|
|
102
|
+
if (start < 0)
|
|
103
|
+
start = i;
|
|
104
|
+
lastCred = i;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (start < 0)
|
|
108
|
+
return null;
|
|
109
|
+
let end = lastCred;
|
|
110
|
+
for (let i = lastCred + 1; i < steps.length; i++) {
|
|
111
|
+
const step = steps[i];
|
|
112
|
+
if (POSTLOGIN_ASSERTION_KINDS.has(step.kind) ||
|
|
113
|
+
(step.postcondition != null &&
|
|
114
|
+
POSTLOGIN_POSTCONDITION_TYPES.has(step.postcondition.type))) {
|
|
115
|
+
end = i;
|
|
116
|
+
break;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return { start, end };
|
|
120
|
+
}
|
|
121
|
+
/** True when a failure at `failedIndex` falls inside the login window. */
|
|
122
|
+
export function isLoginFailureIndex(steps, failedIndex) {
|
|
123
|
+
const window = getLoginWindow(steps);
|
|
124
|
+
return window !== null && failedIndex >= window.start && failedIndex <= window.end;
|
|
125
|
+
}
|
|
126
|
+
//# sourceMappingURL=login-detection.js.map
|
package/dist/opcode-actions.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { VARIANT_PLACEHOLDER } from './execution-types.js';
|
|
8
8
|
import { dismissAllOverlays } from './overlay-engine.js';
|
|
9
|
+
import { CREDENTIAL_TOKEN_EMAIL, CREDENTIAL_TOKEN_PASSWORD, CREDENTIAL_TOKEN_LOGIN_URL, } from './login-detection.js';
|
|
9
10
|
/**
|
|
10
11
|
* Substitute credential placeholders inside opcode text fields.
|
|
11
12
|
* Only the {{email}}, {{password}} and {{loginUrl}} tokens are replaced.
|
|
@@ -16,9 +17,9 @@ export function substituteCredentialPlaceholders(text, credentials) {
|
|
|
16
17
|
return text;
|
|
17
18
|
}
|
|
18
19
|
return text
|
|
19
|
-
.replaceAll(
|
|
20
|
-
.replaceAll(
|
|
21
|
-
.replaceAll(
|
|
20
|
+
.replaceAll(CREDENTIAL_TOKEN_EMAIL, credentials?.email ?? '')
|
|
21
|
+
.replaceAll(CREDENTIAL_TOKEN_PASSWORD, credentials?.password ?? '')
|
|
22
|
+
.replaceAll(CREDENTIAL_TOKEN_LOGIN_URL, credentials?.loginUrl ?? '');
|
|
22
23
|
}
|
|
23
24
|
/**
|
|
24
25
|
* Returns the list of credential placeholders (`{{email}}`, `{{password}}`,
|
|
@@ -32,14 +33,14 @@ export function findUnresolvedCredentialPlaceholders(text, credentials) {
|
|
|
32
33
|
if (typeof text !== 'string' || !text.includes('{{'))
|
|
33
34
|
return [];
|
|
34
35
|
const missing = [];
|
|
35
|
-
if (text.includes(
|
|
36
|
-
missing.push(
|
|
36
|
+
if (text.includes(CREDENTIAL_TOKEN_EMAIL) && !credentials?.email?.trim()) {
|
|
37
|
+
missing.push(CREDENTIAL_TOKEN_EMAIL);
|
|
37
38
|
}
|
|
38
|
-
if (text.includes(
|
|
39
|
-
missing.push(
|
|
39
|
+
if (text.includes(CREDENTIAL_TOKEN_PASSWORD) && !credentials?.password) {
|
|
40
|
+
missing.push(CREDENTIAL_TOKEN_PASSWORD);
|
|
40
41
|
}
|
|
41
|
-
if (text.includes(
|
|
42
|
-
missing.push(
|
|
42
|
+
if (text.includes(CREDENTIAL_TOKEN_LOGIN_URL) && !credentials?.loginUrl?.trim()) {
|
|
43
|
+
missing.push(CREDENTIAL_TOKEN_LOGIN_URL);
|
|
43
44
|
}
|
|
44
45
|
return missing;
|
|
45
46
|
}
|
package/dist/opcode-runner.js
CHANGED
|
@@ -14,6 +14,7 @@ import { smartWaitForStability } from './smart-wait.js';
|
|
|
14
14
|
import { verifyCaptureQuality } from './capture-verification.js';
|
|
15
15
|
import { generateAltText } from './alt-text.js';
|
|
16
16
|
import { executeOpcodeCoreAction } from './opcode-actions.js';
|
|
17
|
+
import { isLoginFailureIndex } from './login-detection.js';
|
|
17
18
|
import { logger } from './logger.js';
|
|
18
19
|
function formatOpcodeDebug(opcode) {
|
|
19
20
|
const fields = [];
|
|
@@ -172,6 +173,7 @@ export async function executeProgram(program, createAdapter, options = {}) {
|
|
|
172
173
|
detectedAppVersion,
|
|
173
174
|
warnings: aggregatedWarnings.length ? aggregatedWarnings : undefined,
|
|
174
175
|
error: aborted ? 'aborted' : (success ? undefined : completedVariantResults.find(v => !v.success)?.error),
|
|
176
|
+
failureKind: success ? undefined : completedVariantResults.find(v => v.failureKind)?.failureKind,
|
|
175
177
|
};
|
|
176
178
|
}
|
|
177
179
|
// ── Variant execution ───────────────────────────────────────────────
|
|
@@ -247,6 +249,12 @@ async function executeVariant(program, variant, createAdapter, recoveryChain, te
|
|
|
247
249
|
durationMs: Date.now() - startTime,
|
|
248
250
|
artifacts,
|
|
249
251
|
error: `opcode ${i} (${opcode.kind}) failed: ${result.error}`,
|
|
252
|
+
// Tag failures inside the login window (credential typing → first
|
|
253
|
+
// post-login assertion) so the server can surface "login failed"
|
|
254
|
+
// instead of a generic capture failure.
|
|
255
|
+
...(isLoginFailureIndex(program.steps, i)
|
|
256
|
+
? { failureKind: 'login_failed' }
|
|
257
|
+
: {}),
|
|
250
258
|
};
|
|
251
259
|
}
|
|
252
260
|
}
|
|
@@ -1139,6 +1139,7 @@ export declare const SignedExecutionProgramEnvelopeSchema: z.ZodObject<{
|
|
|
1139
1139
|
deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1140
1140
|
publicUrl: z.ZodOptional<z.ZodString>;
|
|
1141
1141
|
environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
1142
|
+
blockAnalytics: z.ZodOptional<z.ZodBoolean>;
|
|
1142
1143
|
}, z.core.$strict>;
|
|
1143
1144
|
signature: z.ZodString;
|
|
1144
1145
|
meta: z.ZodOptional<z.ZodObject<{
|
package/dist/types.d.ts
CHANGED
|
@@ -257,6 +257,13 @@ export interface BrowserOptions {
|
|
|
257
257
|
* secrets here.
|
|
258
258
|
*/
|
|
259
259
|
extraHttpHeaders?: Record<string, string>;
|
|
260
|
+
/**
|
|
261
|
+
* When `false`, the engine does NOT block third-party web-analytics beacons
|
|
262
|
+
* during capture (AUT-234). Default behavior (`undefined` / `true`) blocks
|
|
263
|
+
* them so a capture never registers a phantom "visit" in the site's
|
|
264
|
+
* analytics. Opt-out is a per-project setting (`projects.block_analytics_enabled`).
|
|
265
|
+
*/
|
|
266
|
+
blockAnalytics?: boolean;
|
|
260
267
|
}
|
|
261
268
|
export interface OutscaleConfig {
|
|
262
269
|
/** Uniform padding on all 4 sides (pixels). */
|
|
@@ -1158,6 +1158,7 @@ export declare const VideoIngestPayloadSchema: z.ZodObject<{
|
|
|
1158
1158
|
deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1159
1159
|
publicUrl: z.ZodOptional<z.ZodString>;
|
|
1160
1160
|
environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
1161
|
+
blockAnalytics: z.ZodOptional<z.ZodBoolean>;
|
|
1161
1162
|
}, z.core.$strict>;
|
|
1162
1163
|
narration: z.ZodOptional<z.ZodObject<{
|
|
1163
1164
|
voice: z.ZodString;
|