autokap 1.9.0 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ import type { BrowserContext } from 'playwright';
2
+ /**
3
+ * Privacy-signal request headers attached to every capture context.
4
+ *
5
+ * These are standard, widely-sent browser headers (Firefox sends `DNT`, Brave
6
+ * sends `Sec-GPC`), so they're invisible to origin anti-bot defenses — they
7
+ * only ever observe a normal first-party page load. Privacy-first analytics
8
+ * (Plausible, Fathom, …) honor them and suppress the pageview, which
9
+ * complements the network-level blocking below for the providers that respect
10
+ * the signal. Free, zero-risk belt-and-suspenders.
11
+ */
12
+ export declare const PRIVACY_HEADERS: Record<string, string>;
13
+ /**
14
+ * Hostnames of DEDICATED web-analytics / product-telemetry / session-replay
15
+ * endpoints. These domains serve ONLY analytics, so aborting requests to them
16
+ * during a capture has zero functional impact on the page being screenshotted —
17
+ * it only prevents AutoKap's automated visit from registering as a phantom
18
+ * "visitor" in the site owner's analytics (AUT-234).
19
+ *
20
+ * Matched by exact host OR sub-domain suffix
21
+ * (`host === h || host.endsWith('.' + h)`), so regional shards
22
+ * (`eu.i.posthog.com`, `region1.google-analytics.com`, `*.matomo.cloud`, …) are
23
+ * covered without enumerating each one.
24
+ *
25
+ * Deliberately ABSENT: `googletagmanager.com`. GTM can inject functional tags,
26
+ * and loading `gtm.js` does NOT itself record a visit — the GA *beacon* to
27
+ * `google-analytics.com` does, and that host IS blocked. So we neutralize the
28
+ * GA visit without risking a broken page.
29
+ *
30
+ * Self-hosted / first-party-proxied analytics (e.g. Plausible reverse-proxied
31
+ * on the site's own domain) is intentionally NOT covered: it reads as
32
+ * first-party (see the `isFirstPartyUrl` guard in {@link installAnalyticsBlock})
33
+ * and is impossible to detect universally. That edge case is out of scope by
34
+ * design — catching it would depend on per-site configuration.
35
+ */
36
+ export declare const ANALYTICS_HOSTS: readonly string[];
37
+ /**
38
+ * True when `url` targets a dedicated web-analytics *ingestion* endpoint (see
39
+ * {@link ANALYTICS_HOSTS}). Host-suffix aware so regional/sub-domain shards
40
+ * match their parent. Fail-CLOSED on unparseable or non-http(s) URLs (returns
41
+ * `false`): we never abort a request we can't confidently classify.
42
+ *
43
+ * For hosts that co-serve functional config on the same domain as their
44
+ * analytics (PostHog), only the event-capture paths count — feature-flag /
45
+ * config / library paths are preserved so the captured UI never changes
46
+ * (see {@link POSTHOG_FUNCTIONAL_PATH_RE}).
47
+ */
48
+ export declare function isAnalyticsRequest(url: string): boolean;
49
+ /**
50
+ * The per-request block decision, factored out of {@link installAnalyticsBlock}
51
+ * so the guard composition is unit-testable without a real browser context.
52
+ *
53
+ * Blocks ONLY a third-party analytics request. A first-party one — analytics
54
+ * self-hosted or reverse-proxied on the captured site's OWN domain — is
55
+ * preserved so we can never break the site's own functionality.
56
+ *
57
+ * `pageUrl` is the request's frame URL. When it's empty/unknown (a request in
58
+ * flight before the first navigation commits, a detached/teardown frame, some
59
+ * worker-originated requests) `isFirstPartyUrl` fail-OPENS to first-party, so
60
+ * the beacon is NOT aborted. That's the deliberate safe direction — never break
61
+ * a page — at the cost of a rare phantom-visit leak in that narrow window; real
62
+ * analytics beacons fire after navigation commits, so the frame URL is present.
63
+ */
64
+ export declare function shouldBlockAnalyticsRequest(pageUrl: string, requestUrl: string): boolean;
65
+ /**
66
+ * Install a context-level route that aborts outgoing requests to dedicated
67
+ * third-party analytics endpoints, so capturing a site never registers a
68
+ * phantom "visit" in its analytics (AUT-234).
69
+ *
70
+ * Only THIRD-party analytics is blocked (the `!isFirstPartyUrl` guard): a
71
+ * first-party request is never aborted, so we can never break the captured
72
+ * site's own functionality. Aborting a third-party beacon is invisible to the
73
+ * origin's anti-bot (it only ever sees a normal first-party page load) — exactly
74
+ * what an ad-blocker does — so this carries no risk of tripping bot defenses.
75
+ *
76
+ * Registered at the CONTEXT level so it (a) covers every page/frame in the
77
+ * context, (b) survives `page.unrouteAll()` from `clearRouteInterception()`
78
+ * (which only clears page-level routes), and (c) composes with the page-level
79
+ * mock routes from `setupRouteInterception()` — page routes run first; this
80
+ * catch-all `fallback()`s every non-analytics request back to the network (or
81
+ * the next handler). Aborting also lowers in-flight count, which only helps
82
+ * `networkidle` settle. The adaptive-wait progress signal already ignores
83
+ * third-party traffic (AUT-240), so there's no interaction there.
84
+ */
85
+ export declare function installAnalyticsBlock(context: BrowserContext): Promise<void>;
@@ -0,0 +1,201 @@
1
+ import { isFirstPartyUrl } from './security.js';
2
+ /**
3
+ * Privacy-signal request headers attached to every capture context.
4
+ *
5
+ * These are standard, widely-sent browser headers (Firefox sends `DNT`, Brave
6
+ * sends `Sec-GPC`), so they're invisible to origin anti-bot defenses — they
7
+ * only ever observe a normal first-party page load. Privacy-first analytics
8
+ * (Plausible, Fathom, …) honor them and suppress the pageview, which
9
+ * complements the network-level blocking below for the providers that respect
10
+ * the signal. Free, zero-risk belt-and-suspenders.
11
+ */
12
+ export const PRIVACY_HEADERS = {
13
+ DNT: '1',
14
+ 'Sec-GPC': '1',
15
+ };
16
+ /**
17
+ * Hostnames of DEDICATED web-analytics / product-telemetry / session-replay
18
+ * endpoints. These domains serve ONLY analytics, so aborting requests to them
19
+ * during a capture has zero functional impact on the page being screenshotted —
20
+ * it only prevents AutoKap's automated visit from registering as a phantom
21
+ * "visitor" in the site owner's analytics (AUT-234).
22
+ *
23
+ * Matched by exact host OR sub-domain suffix
24
+ * (`host === h || host.endsWith('.' + h)`), so regional shards
25
+ * (`eu.i.posthog.com`, `region1.google-analytics.com`, `*.matomo.cloud`, …) are
26
+ * covered without enumerating each one.
27
+ *
28
+ * Deliberately ABSENT: `googletagmanager.com`. GTM can inject functional tags,
29
+ * and loading `gtm.js` does NOT itself record a visit — the GA *beacon* to
30
+ * `google-analytics.com` does, and that host IS blocked. So we neutralize the
31
+ * GA visit without risking a broken page.
32
+ *
33
+ * Self-hosted / first-party-proxied analytics (e.g. Plausible reverse-proxied
34
+ * on the site's own domain) is intentionally NOT covered: it reads as
35
+ * first-party (see the `isFirstPartyUrl` guard in {@link installAnalyticsBlock})
36
+ * and is impossible to detect universally. That edge case is out of scope by
37
+ * design — catching it would depend on per-site configuration.
38
+ */
39
+ export const ANALYTICS_HOSTS = [
40
+ // Google Analytics / GA4 / Universal Analytics collection endpoints
41
+ 'google-analytics.com',
42
+ 'analytics.google.com',
43
+ 'ssl.google-analytics.com',
44
+ 'region1.google-analytics.com',
45
+ 'stats.g.doubleclick.net',
46
+ // Plausible
47
+ 'plausible.io',
48
+ // PostHog — only its event-ingestion paths are blocked; feature-flag / config
49
+ // / library paths (/decide, /flags, /static, /array) are preserved so a
50
+ // flag-gated app never renders with default flags (see isAnalyticsRequest).
51
+ 'posthog.com',
52
+ 'i.posthog.com',
53
+ // Matomo / Piwik (cloud)
54
+ 'matomo.cloud',
55
+ 'matomo.org',
56
+ // Segment
57
+ 'segment.io',
58
+ 'segment.com',
59
+ // Mixpanel
60
+ 'mixpanel.com',
61
+ 'mxpnl.com',
62
+ // Amplitude
63
+ 'amplitude.com',
64
+ // Heap
65
+ 'heapanalytics.com',
66
+ 'heap.io',
67
+ // Hotjar (heatmaps / session replay)
68
+ 'hotjar.com',
69
+ 'hotjar.io',
70
+ // Microsoft Clarity (session replay)
71
+ 'clarity.ms',
72
+ // Cloudflare Web Analytics
73
+ 'cloudflareinsights.com',
74
+ // Vercel Analytics / Speed Insights
75
+ 'vercel-insights.com',
76
+ 'va.vercel-scripts.com',
77
+ // Fathom
78
+ 'usefathom.com',
79
+ // Session replay / heatmaps
80
+ 'fullstory.com',
81
+ 'mouseflow.com',
82
+ 'crazyegg.com',
83
+ // Yandex Metrica
84
+ 'mc.yandex.ru',
85
+ // Quantcast
86
+ 'quantserve.com',
87
+ 'quantcount.com',
88
+ // Adobe Analytics (Omniture)
89
+ 'omtrdc.net',
90
+ '2o7.net',
91
+ // Misc product analytics
92
+ 'pendo.io',
93
+ 'woopra.com',
94
+ 'kissmetrics.io',
95
+ ];
96
+ const ANALYTICS_HOST_SET = new Set(ANALYTICS_HOSTS.map((h) => h.toLowerCase()));
97
+ /**
98
+ * Path prefixes on PostHog hosts that are NOT analytics ingestion: feature
99
+ * flags (`/decide`, `/flags`), the JS library and its remote config
100
+ * (`/static`, `/array`). PostHog co-serves these from the SAME host as its
101
+ * event capture, so blocking them would make a flag-gated app render with
102
+ * default flags during capture — a visible change to the captured UI. We only
103
+ * block the ingestion paths (`/e/`, `/i/v0/e/`, `/batch/`, `/capture/`, `/s/`),
104
+ * which is what records the phantom visit.
105
+ */
106
+ const POSTHOG_FUNCTIONAL_PATH_RE = /^\/(decide|flags|static|array)\b/i;
107
+ function matchesAnalyticsHost(host) {
108
+ if (ANALYTICS_HOST_SET.has(host))
109
+ return true;
110
+ for (const h of ANALYTICS_HOST_SET) {
111
+ if (host.endsWith(`.${h}`))
112
+ return true;
113
+ }
114
+ return false;
115
+ }
116
+ function isPosthogHost(host) {
117
+ return host === 'posthog.com' || host.endsWith('.posthog.com');
118
+ }
119
+ /**
120
+ * True when `url` targets a dedicated web-analytics *ingestion* endpoint (see
121
+ * {@link ANALYTICS_HOSTS}). Host-suffix aware so regional/sub-domain shards
122
+ * match their parent. Fail-CLOSED on unparseable or non-http(s) URLs (returns
123
+ * `false`): we never abort a request we can't confidently classify.
124
+ *
125
+ * For hosts that co-serve functional config on the same domain as their
126
+ * analytics (PostHog), only the event-capture paths count — feature-flag /
127
+ * config / library paths are preserved so the captured UI never changes
128
+ * (see {@link POSTHOG_FUNCTIONAL_PATH_RE}).
129
+ */
130
+ export function isAnalyticsRequest(url) {
131
+ let parsed;
132
+ try {
133
+ parsed = new URL(url);
134
+ }
135
+ catch {
136
+ return false;
137
+ }
138
+ if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
139
+ return false;
140
+ const host = parsed.hostname.toLowerCase();
141
+ if (!matchesAnalyticsHost(host))
142
+ return false;
143
+ if (isPosthogHost(host) && POSTHOG_FUNCTIONAL_PATH_RE.test(parsed.pathname)) {
144
+ return false;
145
+ }
146
+ return true;
147
+ }
148
+ /**
149
+ * The per-request block decision, factored out of {@link installAnalyticsBlock}
150
+ * so the guard composition is unit-testable without a real browser context.
151
+ *
152
+ * Blocks ONLY a third-party analytics request. A first-party one — analytics
153
+ * self-hosted or reverse-proxied on the captured site's OWN domain — is
154
+ * preserved so we can never break the site's own functionality.
155
+ *
156
+ * `pageUrl` is the request's frame URL. When it's empty/unknown (a request in
157
+ * flight before the first navigation commits, a detached/teardown frame, some
158
+ * worker-originated requests) `isFirstPartyUrl` fail-OPENS to first-party, so
159
+ * the beacon is NOT aborted. That's the deliberate safe direction — never break
160
+ * a page — at the cost of a rare phantom-visit leak in that narrow window; real
161
+ * analytics beacons fire after navigation commits, so the frame URL is present.
162
+ */
163
+ export function shouldBlockAnalyticsRequest(pageUrl, requestUrl) {
164
+ return isAnalyticsRequest(requestUrl) && !isFirstPartyUrl(pageUrl, requestUrl);
165
+ }
166
+ /**
167
+ * Install a context-level route that aborts outgoing requests to dedicated
168
+ * third-party analytics endpoints, so capturing a site never registers a
169
+ * phantom "visit" in its analytics (AUT-234).
170
+ *
171
+ * Only THIRD-party analytics is blocked (the `!isFirstPartyUrl` guard): a
172
+ * first-party request is never aborted, so we can never break the captured
173
+ * site's own functionality. Aborting a third-party beacon is invisible to the
174
+ * origin's anti-bot (it only ever sees a normal first-party page load) — exactly
175
+ * what an ad-blocker does — so this carries no risk of tripping bot defenses.
176
+ *
177
+ * Registered at the CONTEXT level so it (a) covers every page/frame in the
178
+ * context, (b) survives `page.unrouteAll()` from `clearRouteInterception()`
179
+ * (which only clears page-level routes), and (c) composes with the page-level
180
+ * mock routes from `setupRouteInterception()` — page routes run first; this
181
+ * catch-all `fallback()`s every non-analytics request back to the network (or
182
+ * the next handler). Aborting also lowers in-flight count, which only helps
183
+ * `networkidle` settle. The adaptive-wait progress signal already ignores
184
+ * third-party traffic (AUT-240), so there's no interaction there.
185
+ */
186
+ export async function installAnalyticsBlock(context) {
187
+ await context.route('**/*', (route) => {
188
+ const request = route.request();
189
+ const url = request.url();
190
+ // Derive the page origin from the request's own frame so analytics
191
+ // self-hosted on the captured site's domain reads as first-party and is
192
+ // left untouched. `'blockedbyclient'` mimics an ad-blocker (the page just
193
+ // sees a failed beacon, like every uBlock user).
194
+ const pageUrl = request.frame()?.url() || '';
195
+ if (shouldBlockAnalyticsRequest(pageUrl, url)) {
196
+ return route.abort('blockedbyclient').catch(() => undefined);
197
+ }
198
+ return route.fallback();
199
+ });
200
+ }
201
+ //# sourceMappingURL=analytics-blocklist.js.map
@@ -20,6 +20,7 @@ declare class BrowserPool {
20
20
  colorScheme?: 'light' | 'dark';
21
21
  storageState?: BrowserStorageState;
22
22
  extraHttpHeaders?: Record<string, string>;
23
+ blockAnalytics?: boolean;
23
24
  }): Promise<BrowserContext>;
24
25
  /**
25
26
  * Release a context back to the pool. Closes the context and unblocks
@@ -1,4 +1,5 @@
1
1
  import { chromium } from 'playwright';
2
+ import { installAnalyticsBlock, PRIVACY_HEADERS } from './analytics-blocklist.js';
2
3
  /** Chromium flags for server-side headless operation (used by pool and standalone launches). */
3
4
  export const CHROMIUM_ARGS = [
4
5
  // Linux/Docker-only: required when running Chromium as root or with limited /dev/shm
@@ -74,8 +75,16 @@ class BrowserPool {
74
75
  locale: options?.lang ? options.lang : 'en-US',
75
76
  colorScheme: options?.colorScheme ?? 'light',
76
77
  storageState: options?.storageState,
77
- ...(extra && Object.keys(extra).length > 0 ? { extraHTTPHeaders: extra } : {}),
78
+ // Privacy signals first, then merge user/env auth headers (which win on
79
+ // any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
80
+ extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(extra ?? {}) },
78
81
  });
82
+ // Block third-party analytics so pooled (server-side) captures don't
83
+ // register a phantom visit in the captured site's analytics (AUT-234).
84
+ // Skippable per-project via blockAnalytics === false.
85
+ if (options?.blockAnalytics !== false) {
86
+ await installAnalyticsBlock(context);
87
+ }
79
88
  this.activeContexts++;
80
89
  this.captureCount++;
81
90
  return context;
package/dist/browser.js CHANGED
@@ -6,6 +6,7 @@ import { join } from 'path';
6
6
  import { DOM_QUIET_WINDOW_MS, GLOBAL_WAIT_CAP_MS, PIXEL_FALLBACK_DIFF_THRESHOLD, PIXEL_FALLBACK_MAX_PASSES, } from './wait-contract.js';
7
7
  import { buildAKNodeRuntimeIndex, deriveInteractiveElementsFromAKTree, disambiguateFingerprint, focusAKTree, fingerprintAKNode, serializeAKTree, } from './ak-tree.js';
8
8
  import { isFirstPartyUrl } from './security.js';
9
+ import { installAnalyticsBlock, PRIVACY_HEADERS } from './analytics-blocklist.js';
9
10
  /**
10
11
  * Set-of-Marks (SoM) annotation: overlays colored [N] badges on each visible
11
12
  * interactive element so the vision model can reference elements by their badge index.
@@ -949,6 +950,7 @@ export class Browser {
949
950
  colorScheme: options.colorScheme ?? 'light',
950
951
  storageState: options.storageState,
951
952
  extraHttpHeaders: options.extraHttpHeaders,
953
+ blockAnalytics: options.blockAnalytics,
952
954
  });
953
955
  instance.page = await instance.context.newPage();
954
956
  instance.poolContext = true;
@@ -1068,9 +1070,9 @@ export class Browser {
1068
1070
  locale: langToLocale(options.lang ?? 'en'),
1069
1071
  colorScheme: options.colorScheme ?? 'light',
1070
1072
  storageState: options.storageState,
1071
- ...(options.extraHttpHeaders && Object.keys(options.extraHttpHeaders).length > 0
1072
- ? { extraHTTPHeaders: options.extraHttpHeaders }
1073
- : {}),
1073
+ // Privacy signals first, then merge user/env auth headers (which win on
1074
+ // any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
1075
+ extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(options.extraHttpHeaders ?? {}) },
1074
1076
  };
1075
1077
  // Dedicated browser process for clip capture. Not pooled because clip
1076
1078
  // capture installs context-level init scripts (cursor overlay). Cloud Run
@@ -1109,6 +1111,13 @@ export class Browser {
1109
1111
  });
1110
1112
  instance.context = await instance.browser.newContext(contextOptions);
1111
1113
  }
1114
+ // Block third-party analytics beacons so clip/video capture doesn't
1115
+ // register a phantom visit either (AUT-234). Context-level so it covers
1116
+ // every page in both the persistent (cloud) and incognito (local) paths.
1117
+ // Skippable per-project via blockAnalytics === false.
1118
+ if (options.blockAnalytics !== false) {
1119
+ await installAnalyticsBlock(instance.context);
1120
+ }
1112
1121
  // Cloud Run only: inject the notranslate meta on every navigation so
1113
1122
  // Chromium's translate UI never prompts. The --disable-features=Translate*
1114
1123
  // launch flags are unreliable across Chromium versions (some translate
@@ -1244,6 +1253,9 @@ export class Browser {
1244
1253
  args: CHROMIUM_ARGS,
1245
1254
  });
1246
1255
  this.context = await this.browser.newContext(this.buildContextOptions());
1256
+ if (this.options.blockAnalytics !== false) {
1257
+ await installAnalyticsBlock(this.context);
1258
+ }
1247
1259
  this.page = await this.context.newPage();
1248
1260
  this.attachDebugLifecycleListeners();
1249
1261
  }
@@ -1333,6 +1345,9 @@ export class Browser {
1333
1345
  this.context = null;
1334
1346
  }
1335
1347
  this.context = await this.browser.newContext(this.buildContextOptions());
1348
+ if (this.options.blockAnalytics !== false) {
1349
+ await installAnalyticsBlock(this.context);
1350
+ }
1336
1351
  this.page = await this.context.newPage();
1337
1352
  this.elementMap.clear();
1338
1353
  this.attachDebugLifecycleListeners();
@@ -5631,10 +5646,11 @@ export class Browser {
5631
5646
  async setLanguage(lang) {
5632
5647
  const context = this.ensureContext();
5633
5648
  const page = this.ensurePage();
5634
- // `setExtraHTTPHeaders` REPLACES the header map — merge with the
5635
- // environment-level auth headers so a SET_LOCALE opcode doesn't strip
5636
- // them mid-run.
5649
+ // `setExtraHTTPHeaders` REPLACES the header map — merge with the privacy
5650
+ // signals and the environment-level auth headers so a SET_LOCALE opcode
5651
+ // doesn't strip them mid-run.
5637
5652
  await context.setExtraHTTPHeaders({
5653
+ ...PRIVACY_HEADERS,
5638
5654
  ...(this.options.extraHttpHeaders ?? {}),
5639
5655
  'Accept-Language': lang,
5640
5656
  });
@@ -5761,7 +5777,9 @@ export class Browser {
5761
5777
  locale: langToLocale(this.options.lang ?? 'en'),
5762
5778
  colorScheme: this.options.colorScheme ?? 'light',
5763
5779
  storageState: this.options.storageState,
5764
- ...(extra && Object.keys(extra).length > 0 ? { extraHTTPHeaders: extra } : {}),
5780
+ // Privacy signals first, then merge user/env auth headers (which win on
5781
+ // any conflict). See PRIVACY_HEADERS / installAnalyticsBlock (AUT-234).
5782
+ extraHTTPHeaders: { ...PRIVACY_HEADERS, ...(extra ?? {}) },
5765
5783
  };
5766
5784
  }
5767
5785
  }
@@ -258,6 +258,7 @@ export async function runCapture(options) {
258
258
  colorScheme: variant.theme,
259
259
  storageState: program.preconditions.storageState,
260
260
  extraHttpHeaders: program.environmentHttpHeaders,
261
+ blockAnalytics: program.blockAnalytics,
261
262
  };
262
263
  let recordingDir;
263
264
  let browser;
@@ -2233,6 +2233,7 @@ export declare const ExecutionProgramSchema: z.ZodObject<{
2233
2233
  deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
2234
2234
  publicUrl: z.ZodOptional<z.ZodString>;
2235
2235
  environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
2236
+ blockAnalytics: z.ZodOptional<z.ZodBoolean>;
2236
2237
  }, z.core.$strict>;
2237
2238
  export declare const HealerPatchSchema: z.ZodObject<{
2238
2239
  opcodeIndex: z.ZodNumber;
@@ -4936,6 +4937,7 @@ export declare function safeParseProgramResult(data: unknown): z.ZodSafeParseRes
4936
4937
  deviceConfigs?: Record<string, Record<string, unknown>> | undefined;
4937
4938
  publicUrl?: string | undefined;
4938
4939
  environmentHttpHeaders?: Record<string, string> | undefined;
4940
+ blockAnalytics?: boolean | undefined;
4939
4941
  }>;
4940
4942
  export interface ClipNavigationViolation {
4941
4943
  /** Index of the offending NAVIGATE opcode in `program.steps`. */
@@ -682,6 +682,12 @@ export const ExecutionProgramSchema = z.object({
682
682
  // pairs that Playwright will inject as `extraHTTPHeaders` on the
683
683
  // BrowserContext so protected staging/preview URLs load successfully.
684
684
  environmentHttpHeaders: z.record(z.string().min(1), z.string().min(1)).optional(),
685
+ // Per-project opt-out for analytics blocking (AUT-234). Optional and WITHOUT a
686
+ // Zod default for the same signing reason as `programSchemaVersion` above:
687
+ // this schema is reused in signature verification, so a default would mutate
688
+ // the signed payload and break symmetry for programs signed without the field.
689
+ // Absent / true ⇒ block (engine default); only an explicit false disables.
690
+ blockAnalytics: z.boolean().optional(),
685
691
  }).strict().superRefine((value, ctx) => {
686
692
  if (value.mediaMode !== value.artifactPlan.mediaMode) {
687
693
  ctx.addIssue({
@@ -614,6 +614,14 @@ export interface ExecutionProgram {
614
614
  * and embedded in the signed program envelope.
615
615
  */
616
616
  environmentHttpHeaders?: Record<string, string>;
617
+ /**
618
+ * Per-project opt-out for third-party analytics blocking (AUT-234). Default
619
+ * behavior (field absent / `true`) blocks analytics beacons during capture so
620
+ * a run never registers a phantom "visit". Set to `false` only when the
621
+ * project disabled it (`projects.block_analytics_enabled = false`). Server-set
622
+ * BEFORE signing, so it lives inside the signed envelope.
623
+ */
624
+ blockAnalytics?: boolean;
617
625
  }
618
626
  export interface CircuitBreakerConfig {
619
627
  /** Max recovery attempts per opcode. Default: 3 */
@@ -1139,6 +1139,7 @@ export declare const SignedExecutionProgramEnvelopeSchema: z.ZodObject<{
1139
1139
  deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1140
1140
  publicUrl: z.ZodOptional<z.ZodString>;
1141
1141
  environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
1142
+ blockAnalytics: z.ZodOptional<z.ZodBoolean>;
1142
1143
  }, z.core.$strict>;
1143
1144
  signature: z.ZodString;
1144
1145
  meta: z.ZodOptional<z.ZodObject<{
package/dist/types.d.ts CHANGED
@@ -257,6 +257,13 @@ export interface BrowserOptions {
257
257
  * secrets here.
258
258
  */
259
259
  extraHttpHeaders?: Record<string, string>;
260
+ /**
261
+ * When `false`, the engine does NOT block third-party web-analytics beacons
262
+ * during capture (AUT-234). Default behavior (`undefined` / `true`) blocks
263
+ * them so a capture never registers a phantom "visit" in the site's
264
+ * analytics. Opt-out is a per-project setting (`projects.block_analytics_enabled`).
265
+ */
266
+ blockAnalytics?: boolean;
260
267
  }
261
268
  export interface OutscaleConfig {
262
269
  /** Uniform padding on all 4 sides (pixels). */
@@ -1158,6 +1158,7 @@ export declare const VideoIngestPayloadSchema: z.ZodObject<{
1158
1158
  deviceConfigs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
1159
1159
  publicUrl: z.ZodOptional<z.ZodString>;
1160
1160
  environmentHttpHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
1161
+ blockAnalytics: z.ZodOptional<z.ZodBoolean>;
1161
1162
  }, z.core.$strict>;
1162
1163
  narration: z.ZodOptional<z.ZodObject<{
1163
1164
  voice: z.ZodString;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.9.0",
3
+ "version": "1.9.1",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",