autokap 1.9.0 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analytics-blocklist.d.ts +85 -0
- package/dist/analytics-blocklist.js +201 -0
- package/dist/browser-pool.d.ts +1 -0
- package/dist/browser-pool.js +10 -1
- package/dist/browser.js +25 -7
- package/dist/cli-runner.js +1 -0
- package/dist/execution-schema.d.ts +2 -0
- package/dist/execution-schema.js +6 -0
- package/dist/execution-types.d.ts +8 -0
- package/dist/program-signing.d.ts +1 -0
- package/dist/types.d.ts +7 -0
- package/dist/video-narration-schema.d.ts +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import type { BrowserContext } from 'playwright';
|
|
2
|
+
/**
|
|
3
|
+
* Privacy-signal request headers attached to every capture context.
|
|
4
|
+
*
|
|
5
|
+
* These are standard, widely-sent browser headers (Firefox sends `DNT`, Brave
|
|
6
|
+
* sends `Sec-GPC`), so they're invisible to origin anti-bot defenses — they
|
|
7
|
+
* only ever observe a normal first-party page load. Privacy-first analytics
|
|
8
|
+
* (Plausible, Fathom, …) honor them and suppress the pageview, which
|
|
9
|
+
* complements the network-level blocking below for the providers that respect
|
|
10
|
+
* the signal. Free, zero-risk belt-and-suspenders.
|
|
11
|
+
*/
|
|
12
|
+
export declare const PRIVACY_HEADERS: Record<string, string>;
|
|
13
|
+
/**
|
|
14
|
+
* Hostnames of DEDICATED web-analytics / product-telemetry / session-replay
|
|
15
|
+
* endpoints. These domains serve ONLY analytics, so aborting requests to them
|
|
16
|
+
* during a capture has zero functional impact on the page being screenshotted —
|
|
17
|
+
* it only prevents AutoKap's automated visit from registering as a phantom
|
|
18
|
+
* "visitor" in the site owner's analytics (AUT-234).
|
|
19
|
+
*
|
|
20
|
+
* Matched by exact host OR sub-domain suffix
|
|
21
|
+
* (`host === h || host.endsWith('.' + h)`), so regional shards
|
|
22
|
+
* (`eu.i.posthog.com`, `region1.google-analytics.com`, `*.matomo.cloud`, …) are
|
|
23
|
+
* covered without enumerating each one.
|
|
24
|
+
*
|
|
25
|
+
* Deliberately ABSENT: `googletagmanager.com`. GTM can inject functional tags,
|
|
26
|
+
* and loading `gtm.js` does NOT itself record a visit — the GA *beacon* to
|
|
27
|
+
* `google-analytics.com` does, and that host IS blocked. So we neutralize the
|
|
28
|
+
* GA visit without risking a broken page.
|
|
29
|
+
*
|
|
30
|
+
* Self-hosted / first-party-proxied analytics (e.g. Plausible reverse-proxied
|
|
31
|
+
* on the site's own domain) is intentionally NOT covered: it reads as
|
|
32
|
+
* first-party (see the `isFirstPartyUrl` guard in {@link installAnalyticsBlock})
|
|
33
|
+
* and is impossible to detect universally. That edge case is out of scope by
|
|
34
|
+
* design — catching it would depend on per-site configuration.
|
|
35
|
+
*/
|
|
36
|
+
export declare const ANALYTICS_HOSTS: readonly string[];
|
|
37
|
+
/**
|
|
38
|
+
* True when `url` targets a dedicated web-analytics *ingestion* endpoint (see
|
|
39
|
+
* {@link ANALYTICS_HOSTS}). Host-suffix aware so regional/sub-domain shards
|
|
40
|
+
* match their parent. Fail-CLOSED on unparseable or non-http(s) URLs (returns
|
|
41
|
+
* `false`): we never abort a request we can't confidently classify.
|
|
42
|
+
*
|
|
43
|
+
* For hosts that co-serve functional config on the same domain as their
|
|
44
|
+
* analytics (PostHog), only the event-capture paths count — feature-flag /
|
|
45
|
+
* config / library paths are preserved so the captured UI never changes
|
|
46
|
+
* (see {@link POSTHOG_FUNCTIONAL_PATH_RE}).
|
|
47
|
+
*/
|
|
48
|
+
export declare function isAnalyticsRequest(url: string): boolean;
|
|
49
|
+
/**
|
|
50
|
+
* The per-request block decision, factored out of {@link installAnalyticsBlock}
|
|
51
|
+
* so the guard composition is unit-testable without a real browser context.
|
|
52
|
+
*
|
|
53
|
+
* Blocks ONLY a third-party analytics request. A first-party one — analytics
|
|
54
|
+
* self-hosted or reverse-proxied on the captured site's OWN domain — is
|
|
55
|
+
* preserved so we can never break the site's own functionality.
|
|
56
|
+
*
|
|
57
|
+
* `pageUrl` is the request's frame URL. When it's empty/unknown (a request in
|
|
58
|
+
* flight before the first navigation commits, a detached/teardown frame, some
|
|
59
|
+
* worker-originated requests) `isFirstPartyUrl` fail-OPENS to first-party, so
|
|
60
|
+
* the beacon is NOT aborted. That's the deliberate safe direction — never break
|
|
61
|
+
* a page — at the cost of a rare phantom-visit leak in that narrow window; real
|
|
62
|
+
* analytics beacons fire after navigation commits, so the frame URL is present.
|
|
63
|
+
*/
|
|
64
|
+
export declare function shouldBlockAnalyticsRequest(pageUrl: string, requestUrl: string): boolean;
|
|
65
|
+
/**
|
|
66
|
+
* Install a context-level route that aborts outgoing requests to dedicated
|
|
67
|
+
* third-party analytics endpoints, so capturing a site never registers a
|
|
68
|
+
* phantom "visit" in its analytics (AUT-234).
|
|
69
|
+
*
|
|
70
|
+
* Only THIRD-party analytics is blocked (the `!isFirstPartyUrl` guard): a
|
|
71
|
+
* first-party request is never aborted, so we can never break the captured
|
|
72
|
+
* site's own functionality. Aborting a third-party beacon is invisible to the
|
|
73
|
+
* origin's anti-bot (it only ever sees a normal first-party page load) — exactly
|
|
74
|
+
* what an ad-blocker does — so this carries no risk of tripping bot defenses.
|
|
75
|
+
*
|
|
76
|
+
* Registered at the CONTEXT level so it (a) covers every page/frame in the
|
|
77
|
+
* context, (b) survives `page.unrouteAll()` from `clearRouteInterception()`
|
|
78
|
+
* (which only clears page-level routes), and (c) composes with the page-level
|
|
79
|
+
* mock routes from `setupRouteInterception()` — page routes run first; this
|
|
80
|
+
* catch-all `fallback()`s every non-analytics request back to the network (or
|
|
81
|
+
* the next handler). Aborting also lowers in-flight count, which only helps
|
|
82
|
+
* `networkidle` settle. The adaptive-wait progress signal already ignores
|
|
83
|
+
* third-party traffic (AUT-240), so there's no interaction there.
|
|
84
|
+
*/
|
|
85
|
+
export declare function installAnalyticsBlock(context: BrowserContext): Promise<void>;
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { isFirstPartyUrl } from './security.js';
|
|
2
|
+
/**
|
|
3
|
+
* Privacy-signal request headers attached to every capture context.
|
|
4
|
+
*
|
|
5
|
+
* These are standard, widely-sent browser headers (Firefox sends `DNT`, Brave
|
|
6
|
+
* sends `Sec-GPC`), so they're invisible to origin anti-bot defenses — they
|
|
7
|
+
* only ever observe a normal first-party page load. Privacy-first analytics
|
|
8
|
+
* (Plausible, Fathom, …) honor them and suppress the pageview, which
|
|
9
|
+
* complements the network-level blocking below for the providers that respect
|
|
10
|
+
* the signal. Free, zero-risk belt-and-suspenders.
|
|
11
|
+
*/
|
|
12
|
+
export const PRIVACY_HEADERS = {
|
|
13
|
+
DNT: '1',
|
|
14
|
+
'Sec-GPC': '1',
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* Hostnames of DEDICATED web-analytics / product-telemetry / session-replay
|
|
18
|
+
* endpoints. These domains serve ONLY analytics, so aborting requests to them
|
|
19
|
+
* during a capture has zero functional impact on the page being screenshotted —
|
|
20
|
+
* it only prevents AutoKap's automated visit from registering as a phantom
|
|
21
|
+
* "visitor" in the site owner's analytics (AUT-234).
|
|
22
|
+
*
|
|
23
|
+
* Matched by exact host OR sub-domain suffix
|
|
24
|
+
* (`host === h || host.endsWith('.' + h)`), so regional shards
|
|
25
|
+
* (`eu.i.posthog.com`, `region1.google-analytics.com`, `*.matomo.cloud`, …) are
|
|
26
|
+
* covered without enumerating each one.
|
|
27
|
+
*
|
|
28
|
+
* Deliberately ABSENT: `googletagmanager.com`. GTM can inject functional tags,
|
|
29
|
+
* and loading `gtm.js` does NOT itself record a visit — the GA *beacon* to
|
|
30
|
+
* `google-analytics.com` does, and that host IS blocked. So we neutralize the
|
|
31
|
+
* GA visit without risking a broken page.
|
|
32
|
+
*
|
|
33
|
+
* Self-hosted / first-party-proxied analytics (e.g. Plausible reverse-proxied
|
|
34
|
+
* on the site's own domain) is intentionally NOT covered: it reads as
|
|
35
|
+
* first-party (see the `isFirstPartyUrl` guard in {@link installAnalyticsBlock})
|
|
36
|
+
* and is impossible to detect universally. That edge case is out of scope by
|
|
37
|
+
* design — catching it would depend on per-site configuration.
|
|
38
|
+
*/
|
|
39
|
+
export const ANALYTICS_HOSTS = [
|
|
40
|
+
// Google Analytics / GA4 / Universal Analytics collection endpoints
|
|
41
|
+
'google-analytics.com',
|
|
42
|
+
'analytics.google.com',
|
|
43
|
+
'ssl.google-analytics.com',
|
|
44
|
+
'region1.google-analytics.com',
|
|
45
|
+
'stats.g.doubleclick.net',
|
|
46
|
+
// Plausible
|
|
47
|
+
'plausible.io',
|
|
48
|
+
// PostHog — only its event-ingestion paths are blocked; feature-flag / config
|
|
49
|
+
// / library paths (/decide, /flags, /static, /array) are preserved so a
|
|
50
|
+
// flag-gated app never renders with default flags (see isAnalyticsRequest).
|
|
51
|
+
'posthog.com',
|
|
52
|
+
'i.posthog.com',
|
|
53
|
+
// Matomo / Piwik (cloud)
|
|
54
|
+
'matomo.cloud',
|
|
55
|
+
'matomo.org',
|
|
56
|
+
// Segment
|
|
57
|
+
'segment.io',
|
|
58
|
+
'segment.com',
|
|
59
|
+
// Mixpanel
|
|
60
|
+
'mixpanel.com',
|
|
61
|
+
'mxpnl.com',
|
|
62
|
+
// Amplitude
|
|
63
|
+
'amplitude.com',
|
|
64
|
+
// Heap
|
|
65
|
+
'heapanalytics.com',
|
|
66
|
+
'heap.io',
|
|
67
|
+
// Hotjar (heatmaps / session replay)
|
|
68
|
+
'hotjar.com',
|
|
69
|
+
'hotjar.io',
|
|
70
|
+
// Microsoft Clarity (session replay)
|
|
71
|
+
'clarity.ms',
|
|
72
|
+
// Cloudflare Web Analytics
|
|
73
|
+
'cloudflareinsights.com',
|
|
74
|
+
// Vercel Analytics / Speed Insights
|
|
75
|
+
'vercel-insights.com',
|
|
76
|
+
'va.vercel-scripts.com',
|
|
77
|
+
// Fathom
|
|
78
|
+
'usefathom.com',
|
|
79
|
+
// Session replay / heatmaps
|
|
80
|
+
'fullstory.com',
|
|
81
|
+
'mouseflow.com',
|
|
82
|
+
'crazyegg.com',
|
|
83
|
+
// Yandex Metrica
|
|
84
|
+
'mc.yandex.ru',
|
|
85
|
+
// Quantcast
|
|
86
|
+
'quantserve.com',
|
|
87
|
+
'quantcount.com',
|
|
88
|
+
// Adobe Analytics (Omniture)
|
|
89
|
+
'omtrdc.net',
|
|
90
|
+
'2o7.net',
|
|
91
|
+
// Misc product analytics
|
|
92
|
+
'pendo.io',
|
|
93
|
+
'woopra.com',
|
|
94
|
+
'kissmetrics.io',
|
|
95
|
+
];
|
|
96
|
+
const ANALYTICS_HOST_SET = new Set(ANALYTICS_HOSTS.map((h) => h.toLowerCase()));
|
|
97
|
+
/**
|
|
98
|
+
* Path prefixes on PostHog hosts that are NOT analytics ingestion: feature
|
|
99
|
+
* flags (`/decide`, `/flags`), the JS library and its remote config
|
|
100
|
+
* (`/static`, `/array`). PostHog co-serves these from the SAME host as its
|
|
101
|
+
* event capture, so blocking them would make a flag-gated app render with
|
|
102
|
+
* default flags during capture — a visible change to the captured UI. We only
|
|
103
|
+
* block the ingestion paths (`/e/`, `/i/v0/e/`, `/batch/`, `/capture/`, `/s/`),
|
|
104
|
+
* which is what records the phantom visit.
|
|
105
|
+
*/
|
|
106
|
+
const POSTHOG_FUNCTIONAL_PATH_RE = /^\/(decide|flags|static|array)\b/i;
|
|
107
|
+
function matchesAnalyticsHost(host) {
|
|
108
|
+
if (ANALYTICS_HOST_SET.has(host))
|
|
109
|
+
return true;
|
|
110
|
+
for (const h of ANALYTICS_HOST_SET) {
|
|
111
|
+
if (host.endsWith(`.${h}`))
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
function isPosthogHost(host) {
|
|
117
|
+
return host === 'posthog.com' || host.endsWith('.posthog.com');
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* True when `url` targets a dedicated web-analytics *ingestion* endpoint (see
|
|
121
|
+
* {@link ANALYTICS_HOSTS}). Host-suffix aware so regional/sub-domain shards
|
|
122
|
+
* match their parent. Fail-CLOSED on unparseable or non-http(s) URLs (returns
|
|
123
|
+
* `false`): we never abort a request we can't confidently classify.
|
|
124
|
+
*
|
|
125
|
+
* For hosts that co-serve functional config on the same domain as their
|
|
126
|
+
* analytics (PostHog), only the event-capture paths count — feature-flag /
|
|
127
|
+
* config / library paths are preserved so the captured UI never changes
|
|
128
|
+
* (see {@link POSTHOG_FUNCTIONAL_PATH_RE}).
|
|
129
|
+
*/
|
|
130
|
+
export function isAnalyticsRequest(url) {
|
|
131
|
+
let parsed;
|
|
132
|
+
try {
|
|
133
|
+
parsed = new URL(url);
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
return false;
|
|
137
|
+
}
|
|
138
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
139
|
+
return false;
|
|
140
|
+
const host = parsed.hostname.toLowerCase();
|
|
141
|
+
if (!matchesAnalyticsHost(host))
|
|
142
|
+
return false;
|
|
143
|
+
if (isPosthogHost(host) && POSTHOG_FUNCTIONAL_PATH_RE.test(parsed.pathname)) {
|
|
144
|
+
return false;
|
|
145
|
+
}
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* The per-request block decision, factored out of {@link installAnalyticsBlock}
|
|
150
|
+
* so the guard composition is unit-testable without a real browser context.
|
|
151
|
+
*
|
|
152
|
+
* Blocks ONLY a third-party analytics request. A first-party one — analytics
|
|
153
|
+
* self-hosted or reverse-proxied on the captured site's OWN domain — is
|
|
154
|
+
* preserved so we can never break the site's own functionality.
|
|
155
|
+
*
|
|
156
|
+
* `pageUrl` is the request's frame URL. When it's empty/unknown (a request in
|
|
157
|
+
* flight before the first navigation commits, a detached/teardown frame, some
|
|
158
|
+
* worker-originated requests) `isFirstPartyUrl` fail-OPENS to first-party, so
|
|
159
|
+
* the beacon is NOT aborted. That's the deliberate safe direction — never break
|
|
160
|
+
* a page — at the cost of a rare phantom-visit leak in that narrow window; real
|
|
161
|
+
* analytics beacons fire after navigation commits, so the frame URL is present.
|
|
162
|
+
*/
|
|
163
|
+
export function shouldBlockAnalyticsRequest(pageUrl, requestUrl) {
|
|
164
|
+
return isAnalyticsRequest(requestUrl) && !isFirstPartyUrl(pageUrl, requestUrl);
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Install a context-level route that aborts outgoing requests to dedicated
|
|
168
|
+
* third-party analytics endpoints, so capturing a site never registers a
|
|
169
|
+
* phantom "visit" in its analytics (AUT-234).
|
|
170
|
+
*
|
|
171
|
+
* Only THIRD-party analytics is blocked (the `!isFirstPartyUrl` guard): a
|
|
172
|
+
* first-party request is never aborted, so we can never break the captured
|
|
173
|
+
* site's own functionality. Aborting a third-party beacon is invisible to the
|
|
174
|
+
* origin's anti-bot (it only ever sees a normal first-party page load) — exactly
|
|
175
|
+
* what an ad-blocker does — so this carries no risk of tripping bot defenses.
|
|
176
|
+
*
|
|
177
|
+
* Registered at the CONTEXT level so it (a) covers every page/frame in the
|
|
178
|
+
* context, (b) survives `page.unrouteAll()` from `clearRouteInterception()`
|
|
179
|
+
* (which only clears page-level routes), and (c) composes with the page-level
|
|
180
|
+
* mock routes from `setupRouteInterception()` — page routes run first; this
|
|
181
|
+
* catch-all `fallback()`s every non-analytics request back to the network (or
|
|
182
|
+
* the next handler). Aborting also lowers in-flight count, which only helps
|
|
183
|
+
* `networkidle` settle. The adaptive-wait progress signal already ignores
|
|
184
|
+
* third-party traffic (AUT-240), so there's no interaction there.
|
|
185
|
+
*/
|
|
186
|
+
export async function installAnalyticsBlock(context) {
|
|
187
|
+
await context.route('**/*', (route) => {
|
|
188
|
+
const request = route.request();
|
|
189
|
+
const url = request.url();
|
|
190
|
+
// Derive the page origin from the request's own frame so analytics
|
|
191
|
+
// self-hosted on the captured site's domain reads as first-party and is
|
|
192
|
+
// left untouched. `'blockedbyclient'` mimics an ad-blocker (the page just
|
|
193
|
+
// sees a failed beacon, like every uBlock user).
|
|
194
|
+
const pageUrl = request.frame()?.url() || '';
|
|
195
|
+
if (shouldBlockAnalyticsRequest(pageUrl, url)) {
|
|
196
|
+
return route.abort('blockedbyclient').catch(() => undefined);
|
|
197
|
+
}
|
|
198
|
+
return route.fallback();
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
//# sourceMappingURL=analytics-blocklist.js.map
|
package/dist/browser-pool.d.ts
CHANGED
|
@@ -20,6 +20,7 @@ declare class BrowserPool {
|
|
|
20
20
|
colorScheme?: 'light' | 'dark';
|
|
21
21
|
storageState?: BrowserStorageState;
|
|
22
22
|
extraHttpHeaders?: Record<string, string>;
|
|
23
|
+
blockAnalytics?: boolean;
|
|
23
24
|
}): Promise<BrowserContext>;
|
|
24
25
|
/**
|
|
25
26
|
* Release a context back to the pool. Closes the context and unblocks
|
package/dist/browser-pool.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { chromium } from 'playwright';
|
|
2
|
+
import { installAnalyticsBlock, PRIVACY_HEADERS } from './analytics-blocklist.js';
|
|
2
3
|
/** Chromium flags for server-side headless operation (used by pool and standalone launches). */
|
|
3
4
|
export const CHROMIUM_ARGS = [
|
|
4
5
|
// Linux/Docker-only: required when running Chromium as root or with limited /dev/shm
|
|
@@ -74,8 +75,16 @@ class BrowserPool {
|
|
|
74
75
|
locale: options?.lang ? options.lang : 'en-US',
|
|
75
76
|
colorScheme: options?.colorScheme ?? 'light',
|
|
76
77
|
storageState: options?.storageState,
|
|
77
|
-
|
|
78
|
+
// Privacy signals first, then merge user/env auth headers (which win on
|
|
79
|
+
// any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
|
|
80
|
+
extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(extra ?? {}) },
|
|
78
81
|
});
|
|
82
|
+
// Block third-party analytics so pooled (server-side) captures don't
|
|
83
|
+
// register a phantom visit in the captured site's analytics (AUT-234).
|
|
84
|
+
// Skippable per-project via blockAnalytics === false.
|
|
85
|
+
if (options?.blockAnalytics !== false) {
|
|
86
|
+
await installAnalyticsBlock(context);
|
|
87
|
+
}
|
|
79
88
|
this.activeContexts++;
|
|
80
89
|
this.captureCount++;
|
|
81
90
|
return context;
|
package/dist/browser.js
CHANGED
|
@@ -6,6 +6,7 @@ import { join } from 'path';
|
|
|
6
6
|
import { DOM_QUIET_WINDOW_MS, GLOBAL_WAIT_CAP_MS, PIXEL_FALLBACK_DIFF_THRESHOLD, PIXEL_FALLBACK_MAX_PASSES, } from './wait-contract.js';
|
|
7
7
|
import { buildAKNodeRuntimeIndex, deriveInteractiveElementsFromAKTree, disambiguateFingerprint, focusAKTree, fingerprintAKNode, serializeAKTree, } from './ak-tree.js';
|
|
8
8
|
import { isFirstPartyUrl } from './security.js';
|
|
9
|
+
import { installAnalyticsBlock, PRIVACY_HEADERS } from './analytics-blocklist.js';
|
|
9
10
|
/**
|
|
10
11
|
* Set-of-Marks (SoM) annotation: overlays colored [N] badges on each visible
|
|
11
12
|
* interactive element so the vision model can reference elements by their badge index.
|
|
@@ -949,6 +950,7 @@ export class Browser {
|
|
|
949
950
|
colorScheme: options.colorScheme ?? 'light',
|
|
950
951
|
storageState: options.storageState,
|
|
951
952
|
extraHttpHeaders: options.extraHttpHeaders,
|
|
953
|
+
blockAnalytics: options.blockAnalytics,
|
|
952
954
|
});
|
|
953
955
|
instance.page = await instance.context.newPage();
|
|
954
956
|
instance.poolContext = true;
|
|
@@ -1068,9 +1070,9 @@ export class Browser {
|
|
|
1068
1070
|
locale: langToLocale(options.lang ?? 'en'),
|
|
1069
1071
|
colorScheme: options.colorScheme ?? 'light',
|
|
1070
1072
|
storageState: options.storageState,
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1073
|
+
// Privacy signals first, then merge user/env auth headers (which win on
|
|
1074
|
+
// any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
|
|
1075
|
+
extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(options.extraHttpHeaders ?? {}) },
|
|
1074
1076
|
};
|
|
1075
1077
|
// Dedicated browser process for clip capture. Not pooled because clip
|
|
1076
1078
|
// capture installs context-level init scripts (cursor overlay). Cloud Run
|
|
@@ -1109,6 +1111,13 @@ export class Browser {
|
|
|
1109
1111
|
});
|
|
1110
1112
|
instance.context = await instance.browser.newContext(contextOptions);
|
|
1111
1113
|
}
|
|
1114
|
+
// Block third-party analytics beacons so clip/video capture doesn't
|
|
1115
|
+
// register a phantom visit either (AUT-234). Context-level so it covers
|
|
1116
|
+
// every page in both the persistent (cloud) and incognito (local) paths.
|
|
1117
|
+
// Skippable per-project via blockAnalytics === false.
|
|
1118
|
+
if (options.blockAnalytics !== false) {
|
|
1119
|
+
await installAnalyticsBlock(instance.context);
|
|
1120
|
+
}
|
|
1112
1121
|
// Cloud Run only: inject the notranslate meta on every navigation so
|
|
1113
1122
|
// Chromium's translate UI never prompts. The --disable-features=Translate*
|
|
1114
1123
|
// launch flags are unreliable across Chromium versions (some translate
|
|
@@ -1244,6 +1253,9 @@ export class Browser {
|
|
|
1244
1253
|
args: CHROMIUM_ARGS,
|
|
1245
1254
|
});
|
|
1246
1255
|
this.context = await this.browser.newContext(this.buildContextOptions());
|
|
1256
|
+
if (this.options.blockAnalytics !== false) {
|
|
1257
|
+
await installAnalyticsBlock(this.context);
|
|
1258
|
+
}
|
|
1247
1259
|
this.page = await this.context.newPage();
|
|
1248
1260
|
this.attachDebugLifecycleListeners();
|
|
1249
1261
|
}
|
|
@@ -1333,6 +1345,9 @@ export class Browser {
|
|
|
1333
1345
|
this.context = null;
|
|
1334
1346
|
}
|
|
1335
1347
|
this.context = await this.browser.newContext(this.buildContextOptions());
|
|
1348
|
+
if (this.options.blockAnalytics !== false) {
|
|
1349
|
+
await installAnalyticsBlock(this.context);
|
|
1350
|
+
}
|
|
1336
1351
|
this.page = await this.context.newPage();
|
|
1337
1352
|
this.elementMap.clear();
|
|
1338
1353
|
this.attachDebugLifecycleListeners();
|
|
@@ -5631,10 +5646,11 @@ export class Browser {
|
|
|
5631
5646
|
async setLanguage(lang) {
|
|
5632
5647
|
const context = this.ensureContext();
|
|
5633
5648
|
const page = this.ensurePage();
|
|
5634
|
-
// `setExtraHTTPHeaders` REPLACES the header map — merge with the
|
|
5635
|
-
// environment-level auth headers so a SET_LOCALE opcode
|
|
5636
|
-
// them mid-run.
|
|
5649
|
+
// `setExtraHTTPHeaders` REPLACES the header map — merge with the privacy
|
|
5650
|
+
// signals and the environment-level auth headers so a SET_LOCALE opcode
|
|
5651
|
+
// doesn't strip them mid-run.
|
|
5637
5652
|
await context.setExtraHTTPHeaders({
|
|
5653
|
+
...PRIVACY_HEADERS,
|
|
5638
5654
|
...(this.options.extraHttpHeaders ?? {}),
|
|
5639
5655
|
'Accept-Language': lang,
|
|
5640
5656
|
});
|
|
@@ -5761,7 +5777,9 @@ export class Browser {
|
|
|
5761
5777
|
locale: langToLocale(this.options.lang ?? 'en'),
|
|
5762
5778
|
colorScheme: this.options.colorScheme ?? 'light',
|
|
5763
5779
|
storageState: this.options.storageState,
|
|
5764
|
-
|
|
5780
|
+
// Privacy signals first, then merge user/env auth headers (which win on
|
|
5781
|
+
// any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
|
|
5782
|
+
extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(extra ?? {}) },
|
|
5765
5783
|
};
|
|
5766
5784
|
}
|
|
5767
5785
|
}
|
package/dist/cli-runner.js
CHANGED
|
@@ -258,6 +258,7 @@ export async function runCapture(options) {
|
|
|
258
258
|
colorScheme: variant.theme,
|
|
259
259
|
storageState: program.preconditions.storageState,
|
|
260
260
|
extraHttpHeaders: program.environmentHttpHeaders,
|
|
261
|
+
blockAnalytics: program.blockAnalytics,
|
|
261
262
|
};
|
|
262
263
|
let recordingDir;
|
|
263
264
|
let browser;
|
|
@@ -2233,6 +2233,7 @@ export declare const ExecutionProgramSchema: z.ZodObject<{
|
|
|
2233
2233
|
deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
2234
2234
|
publicUrl: z.ZodOptional<z.ZodString>;
|
|
2235
2235
|
environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
2236
|
+
blockAnalytics: z.ZodOptional<z.ZodBoolean>;
|
|
2236
2237
|
}, z.core.$strict>;
|
|
2237
2238
|
export declare const HealerPatchSchema: z.ZodObject<{
|
|
2238
2239
|
opcodeIndex: z.ZodNumber;
|
|
@@ -4936,6 +4937,7 @@ export declare function safeParseProgramResult(data: unknown): z.ZodSafeParseRes
|
|
|
4936
4937
|
deviceConfigs?: Record<string, Record<string, unknown>> | undefined;
|
|
4937
4938
|
publicUrl?: string | undefined;
|
|
4938
4939
|
environmentHttpHeaders?: Record<string, string> | undefined;
|
|
4940
|
+
blockAnalytics?: boolean | undefined;
|
|
4939
4941
|
}>;
|
|
4940
4942
|
export interface ClipNavigationViolation {
|
|
4941
4943
|
/** Index of the offending NAVIGATE opcode in `program.steps`. */
|
package/dist/execution-schema.js
CHANGED
|
@@ -682,6 +682,12 @@ export const ExecutionProgramSchema = z.object({
|
|
|
682
682
|
// pairs that Playwright will inject as `extraHTTPHeaders` on the
|
|
683
683
|
// BrowserContext so protected staging/preview URLs load successfully.
|
|
684
684
|
environmentHttpHeaders: z.record(z.string().min(1), z.string().min(1)).optional(),
|
|
685
|
+
// Per-project opt-out for analytics blocking (AUT-234). Optional and WITHOUT a
|
|
686
|
+
// Zod default for the same signing reason as `programSchemaVersion` above:
|
|
687
|
+
// this schema is reused in signature verification, so a default would mutate
|
|
688
|
+
// the signed payload and break symmetry for programs signed without the field.
|
|
689
|
+
// Absent / true ⇒ block (engine default); only an explicit false disables.
|
|
690
|
+
blockAnalytics: z.boolean().optional(),
|
|
685
691
|
}).strict().superRefine((value, ctx) => {
|
|
686
692
|
if (value.mediaMode !== value.artifactPlan.mediaMode) {
|
|
687
693
|
ctx.addIssue({
|
|
@@ -614,6 +614,14 @@ export interface ExecutionProgram {
|
|
|
614
614
|
* and embedded in the signed program envelope.
|
|
615
615
|
*/
|
|
616
616
|
environmentHttpHeaders?: Record<string, string>;
|
|
617
|
+
/**
|
|
618
|
+
* Per-project opt-out for third-party analytics blocking (AUT-234). Default
|
|
619
|
+
* behavior (field absent / `true`) blocks analytics beacons during capture so
|
|
620
|
+
* a run never registers a phantom "visit". Set to `false` only when the
|
|
621
|
+
* project disabled it (`projects.block_analytics_enabled = false`). Server-set
|
|
622
|
+
* BEFORE signing, so it lives inside the signed envelope.
|
|
623
|
+
*/
|
|
624
|
+
blockAnalytics?: boolean;
|
|
617
625
|
}
|
|
618
626
|
export interface CircuitBreakerConfig {
|
|
619
627
|
/** Max recovery attempts per opcode. Default: 3 */
|
|
@@ -1139,6 +1139,7 @@ export declare const SignedExecutionProgramEnvelopeSchema: z.ZodObject<{
|
|
|
1139
1139
|
deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1140
1140
|
publicUrl: z.ZodOptional<z.ZodString>;
|
|
1141
1141
|
environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
1142
|
+
blockAnalytics: z.ZodOptional<z.ZodBoolean>;
|
|
1142
1143
|
}, z.core.$strict>;
|
|
1143
1144
|
signature: z.ZodString;
|
|
1144
1145
|
meta: z.ZodOptional<z.ZodObject<{
|
package/dist/types.d.ts
CHANGED
|
@@ -257,6 +257,13 @@ export interface BrowserOptions {
|
|
|
257
257
|
* secrets here.
|
|
258
258
|
*/
|
|
259
259
|
extraHttpHeaders?: Record<string, string>;
|
|
260
|
+
/**
|
|
261
|
+
* When `false`, the engine does NOT block third-party web-analytics beacons
|
|
262
|
+
* during capture (AUT-234). Default behavior (`undefined` / `true`) blocks
|
|
263
|
+
* them so a capture never registers a phantom "visit" in the site's
|
|
264
|
+
* analytics. Opt-out is a per-project setting (`projects.block_analytics_enabled`).
|
|
265
|
+
*/
|
|
266
|
+
blockAnalytics?: boolean;
|
|
260
267
|
}
|
|
261
268
|
export interface OutscaleConfig {
|
|
262
269
|
/** Uniform padding on all 4 sides (pixels). */
|
|
@@ -1158,6 +1158,7 @@ export declare const VideoIngestPayloadSchema: z.ZodObject<{
|
|
|
1158
1158
|
deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
1159
1159
|
publicUrl: z.ZodOptional<z.ZodString>;
|
|
1160
1160
|
environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
1161
|
+
blockAnalytics: z.ZodOptional<z.ZodBoolean>;
|
|
1161
1162
|
}, z.core.$strict>;
|
|
1162
1163
|
narration: z.ZodOptional<z.ZodObject<{
|
|
1163
1164
|
voice: z.ZodString;
|