arn-browser 0.1.34 → 0.1.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arn-browser",
3
- "version": "0.1.34",
3
+ "version": "0.1.35",
4
4
  "description": "A lightweight, browser autmation helper.",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
@@ -4,6 +4,38 @@ import { Browser, BrowserContext, Page } from "playwright-core";
4
4
  // ROUTING & CACHE TYPES
5
5
  // ============================================================================
6
6
 
7
+ /**
8
+ * Supported browser resource types for interception.
9
+ *
10
+ * - `"document"` — HTML pages
11
+ * - `"stylesheet"` — CSS files
12
+ * - `"image"` — Images (png, jpg, svg, etc.)
13
+ * - `"media"` — Audio/Video
14
+ * - `"font"` — Web fonts (woff, woff2, ttf, etc.)
15
+ * - `"script"` — JavaScript files
16
+ * - `"xhr"` — XMLHttpRequest calls
17
+ * - `"fetch"` — Fetch API calls
18
+ */
19
+ export type ResourceType = "document" | "stylesheet" | "image" | "media" | "font" | "script" | "xhr" | "fetch";
20
+
21
+ /** Control object returned by pwRoute for runtime resource management */
22
+ export interface RouteControl {
23
+ /**
24
+ * Add a resource type to intercept via Superagent at runtime.
25
+ *
26
+ * Supported: `"document"` | `"stylesheet"` | `"image"` | `"media"` | `"font"` | `"script"` | `"xhr"` | `"fetch"`
27
+ */
28
+ addResource(type: ResourceType): void;
29
+ /**
30
+ * Remove a resource type from Superagent interception at runtime.
31
+ *
32
+ * Supported: `"document"` | `"stylesheet"` | `"image"` | `"media"` | `"font"` | `"script"` | `"xhr"` | `"fetch"`
33
+ */
34
+ removeResource(type: ResourceType): void;
35
+ /** Get currently intercepted resource types */
36
+ getResources(): ResourceType[];
37
+ }
38
+
7
39
  /**
8
40
  * Options for the pwRoute function.
9
41
  */
@@ -62,12 +94,47 @@ export interface PwRouteOptions {
62
94
  * Playwright network stack will be used instead.
63
95
  */
64
96
  skipGotPatterns?: string[];
97
+
98
+ /**
99
+ * Additional resource types to intercept via Superagent.
100
+ * These are added on top of the defaults: `["stylesheet", "script", "font"]`
101
+ *
102
+ * - `"document"` — HTML pages
103
+ * - `"stylesheet"` — CSS files
104
+ * - `"image"` — Images (png, jpg, svg, etc.)
105
+ * - `"media"` — Audio/Video
106
+ * - `"font"` — Web fonts (woff, woff2, ttf, etc.)
107
+ * - `"script"` — JavaScript files
108
+ * - `"xhr"` — XMLHttpRequest calls
109
+ * - `"fetch"` — Fetch API calls
110
+ *
111
+ * @example `gotAddResources: ["document", "xhr"]`
112
+ */
113
+ gotAddResources?: ResourceType[];
114
+
115
+ /**
116
+ * Resource types to exclude from Superagent interception.
117
+ * These are removed from the defaults: `["stylesheet", "script", "font"]`
118
+ *
119
+ * - `"document"` — HTML pages
120
+ * - `"stylesheet"` — CSS files
121
+ * - `"image"` — Images (png, jpg, svg, etc.)
122
+ * - `"media"` — Audio/Video
123
+ * - `"font"` — Web fonts (woff, woff2, ttf, etc.)
124
+ * - `"script"` — JavaScript files
125
+ * - `"xhr"` — XMLHttpRequest calls
126
+ * - `"fetch"` — Fetch API calls
127
+ *
128
+ * @example `gotRemoveResources: ["font"]`
129
+ */
130
+ gotRemoveResources?: ResourceType[];
65
131
  }
66
132
 
67
133
  /**
68
134
  * Sets up request interception, caching, and ad-blocking on a Playwright page or context.
135
+ * Returns a control object for runtime resource management.
69
136
  */
70
- export function pwRoute(options: PwRouteOptions): Promise<void>;
137
+ export function pwRoute(options: PwRouteOptions): Promise<RouteControl>;
71
138
 
72
139
  /**
73
140
  * Starts logging cache statistics to the console at a set interval.
@@ -76,7 +76,7 @@ function createProxyAgent(proxyUrl) {
76
76
  * Only keeps essential headers needed for the server to respond correctly.
77
77
  * Everything else (cookies, auth, fingerprint hints, HTTP/2 pseudo-headers, etc.) is stripped.
78
78
  */
79
- function sanitizeRequestHeaders(headers, logger, url) {
79
+ function sanitizeRequestHeaders(headers, logger, url, resourceType) {
80
80
  // Allowlist of essential request headers to keep
81
81
  const allowedHeaders = new Set([
82
82
  "accept",
@@ -93,6 +93,16 @@ function sanitizeRequestHeaders(headers, logger, url) {
93
93
  "sec-fetch-site",
94
94
  ]);
95
95
 
96
+ // For document/xhr/fetch types, preserve session-critical headers
97
+ const sessionTypes = new Set(["document", "xhr", "fetch"]);
98
+ if (sessionTypes.has(resourceType)) {
99
+ allowedHeaders.add("cookie");
100
+ allowedHeaders.add("authorization");
101
+ allowedHeaders.add("origin");
102
+ allowedHeaders.add("content-type");
103
+ allowedHeaders.add("x-requested-with");
104
+ }
105
+
96
106
  const cleaned = {};
97
107
  const stripped = [];
98
108
 
@@ -160,7 +170,7 @@ function sanitizeResponseHeaders(headers, logger, url) {
160
170
  * @param {boolean} stripLogger - Whether to log stripped headers
161
171
  * @returns {Promise<Object>} - The response object containing status, headers, and body
162
172
  */
163
- async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger, proxyAgent, stripHeaders, stripLogger) {
173
+ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger, proxyAgent, stripHeaders, stripLogger, resourceType) {
164
174
  // Determine the cache key based on configuration
165
175
  let mainUrl = new URL(url).origin + new URL(url).pathname;
166
176
  if (useFullUrl) {
@@ -180,7 +190,7 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
180
190
 
181
191
  try {
182
192
  // Sanitize outgoing request headers if stripHeaders is enabled
183
- const finalHeaders = stripHeaders ? sanitizeRequestHeaders(requestHeaders, stripLogger, url) : requestHeaders;
193
+ const finalHeaders = stripHeaders ? sanitizeRequestHeaders(requestHeaders, stripLogger, url, resourceType) : requestHeaders;
184
194
 
185
195
  // Fetch the resource using superagent
186
196
  // buffer(true) ensures we get the raw binary data (essential for images/fonts)
@@ -238,6 +248,8 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
238
248
  * @param {Object} options.m4w_send_on_message - Custom handler data for Doublelist messages
239
249
  * @param {Array<string>} options.allowImagePatterns - Array of strings/patterns. If a URL contains any of these, it will NOT be blocked even if blockImage is true.
240
250
  * @param {Array<string>} options.skipGotPatterns - Array of strings/patterns. If a URL contains any of these, it will skip the custom Superagent fetch.
251
+ * @param {Array<string>} options.gotAddResources - Additional resource types to intercept via Superagent, e.g. ["document", "xhr", "fetch"]
252
+ * @param {Array<string>} options.gotRemoveResources - Resource types to exclude from Superagent interception, e.g. ["font"]
241
253
  */
242
254
  export async function pwRoute({
243
255
  context = null,
@@ -255,6 +267,8 @@ export async function pwRoute({
255
267
  m4w_send_on_message = null,
256
268
  allowImagePatterns = [], // Default empty, merged inside
257
269
  skipGotPatterns = [], // Default empty, merged inside
270
+ gotAddResources = [], // Default empty, add to intercepted types
271
+ gotRemoveResources = [], // Default empty, remove from intercepted types
258
272
  }) {
259
273
  // Validation: Ensure we have a target to attach the route to
260
274
  if (!context && !page) {
@@ -289,16 +303,25 @@ export async function pwRoute({
289
303
  }
290
304
  }
291
305
 
306
+ // Supported resource types reference (for documentation)
307
+ // "document", "stylesheet", "image", "media", "font", "script",
308
+ // "texttrack", "xhr", "fetch", "eventsource", "websocket", "manifest", "other"
309
+
292
310
  // Define resource types to intercept for custom fetching (useGot)
293
- const interceptedResourceTypes = ["stylesheet", "script", "font"];
311
+ // Start with defaults, add user extras, then remove user exclusions
312
+ const defaultResourceTypes = ["stylesheet", "script", "font"];
313
+ const interceptedResourceTypes = new Set([...defaultResourceTypes, ...gotAddResources]);
314
+ for (const type of gotRemoveResources) {
315
+ interceptedResourceTypes.delete(type);
316
+ }
294
317
 
295
318
  // Create proxy agent once (reused for all requests in this route)
296
319
  const proxyUrl = formatProxyUrl(proxy);
297
320
  const proxyAgent = createProxyAgent(proxyUrl);
298
321
 
299
322
  // If images are NOT blocked, we generally want to intercept/cache them too.
300
- if (!blockImage) {
301
- interceptedResourceTypes.push("image");
323
+ if (!blockImage && !gotRemoveResources.includes("image")) {
324
+ interceptedResourceTypes.add("image");
302
325
  }
303
326
 
304
327
  // Set up the global route interception
@@ -424,7 +447,7 @@ export async function pwRoute({
424
447
  // ============================================================
425
448
  // Group 6: Resource Interception (Custom Fetch/Cache)
426
449
  // ============================================================
427
- if (useGot && interceptedResourceTypes.includes(resourceType) && !url.startsWith("data:")) {
450
+ if (useGot && interceptedResourceTypes.has(resourceType) && !url.startsWith("data:")) {
428
451
  // Check against the normalized host list (defaults + user input)
429
452
  let shouldSkipGot = false;
430
453
  try {
@@ -444,7 +467,8 @@ export async function pwRoute({
444
467
  logger,
445
468
  proxyAgent,
446
469
  stripGotHeaders,
447
- stripGotLogger
470
+ stripGotLogger,
471
+ resourceType
448
472
  );
449
473
 
450
474
  if (response) {
@@ -468,4 +492,20 @@ export async function pwRoute({
468
492
  await route.continue();
469
493
  return;
470
494
  });
495
+
496
+ // Return control object for runtime resource management
497
+ return {
498
+ /** Add a resource type to intercept via Superagent at runtime */
499
+ addResource(type) {
500
+ interceptedResourceTypes.add(type);
501
+ },
502
+ /** Remove a resource type from Superagent interception at runtime */
503
+ removeResource(type) {
504
+ interceptedResourceTypes.delete(type);
505
+ },
506
+ /** Get currently intercepted resource types */
507
+ getResources() {
508
+ return [...interceptedResourceTypes];
509
+ },
510
+ };
471
511
  }
@@ -4,6 +4,38 @@ import { Browser, Page } from "puppeteer-core";
4
4
  // ROUTING & CACHE TYPES
5
5
  // ============================================================================
6
6
 
7
+ /**
8
+ * Supported browser resource types for interception.
9
+ *
10
+ * - `"document"` — HTML pages
11
+ * - `"stylesheet"` — CSS files
12
+ * - `"image"` — Images (png, jpg, svg, etc.)
13
+ * - `"media"` — Audio/Video
14
+ * - `"font"` — Web fonts (woff, woff2, ttf, etc.)
15
+ * - `"script"` — JavaScript files
16
+ * - `"xhr"` — XMLHttpRequest calls
17
+ * - `"fetch"` — Fetch API calls
18
+ */
19
+ export type ResourceType = "document" | "stylesheet" | "image" | "media" | "font" | "script" | "xhr" | "fetch";
20
+
21
+ /** Control object returned by ppRoute for runtime resource management */
22
+ export interface RouteControl {
23
+ /**
24
+ * Add a resource type to intercept via Superagent at runtime.
25
+ *
26
+ * Supported: `"document"` | `"stylesheet"` | `"image"` | `"media"` | `"font"` | `"script"` | `"xhr"` | `"fetch"`
27
+ */
28
+ addResource(type: ResourceType): void;
29
+ /**
30
+ * Remove a resource type from Superagent interception at runtime.
31
+ *
32
+ * Supported: `"document"` | `"stylesheet"` | `"image"` | `"media"` | `"font"` | `"script"` | `"xhr"` | `"fetch"`
33
+ */
34
+ removeResource(type: ResourceType): void;
35
+ /** Get currently intercepted resource types */
36
+ getResources(): ResourceType[];
37
+ }
38
+
7
39
  /**
8
40
  * Options for the ppRoute function.
9
41
  */
@@ -59,12 +91,47 @@ export interface PpRouteOptions {
59
91
  * Playwright network stack will be used instead.
60
92
  */
61
93
  skipGotPatterns?: string[];
94
+
95
+ /**
96
+ * Additional resource types to intercept via Superagent.
97
+ * These are added on top of the defaults: `["stylesheet", "script", "font"]`
98
+ *
99
+ * - `"document"` — HTML pages
100
+ * - `"stylesheet"` — CSS files
101
+ * - `"image"` — Images (png, jpg, svg, etc.)
102
+ * - `"media"` — Audio/Video
103
+ * - `"font"` — Web fonts (woff, woff2, ttf, etc.)
104
+ * - `"script"` — JavaScript files
105
+ * - `"xhr"` — XMLHttpRequest calls
106
+ * - `"fetch"` — Fetch API calls
107
+ *
108
+ * @example `gotAddResources: ["document", "xhr"]`
109
+ */
110
+ gotAddResources?: ResourceType[];
111
+
112
+ /**
113
+ * Resource types to exclude from Superagent interception.
114
+ * These are removed from the defaults: `["stylesheet", "script", "font"]`
115
+ *
116
+ * - `"document"` — HTML pages
117
+ * - `"stylesheet"` — CSS files
118
+ * - `"image"` — Images (png, jpg, svg, etc.)
119
+ * - `"media"` — Audio/Video
120
+ * - `"font"` — Web fonts (woff, woff2, ttf, etc.)
121
+ * - `"script"` — JavaScript files
122
+ * - `"xhr"` — XMLHttpRequest calls
123
+ * - `"fetch"` — Fetch API calls
124
+ *
125
+ * @example `gotRemoveResources: ["font"]`
126
+ */
127
+ gotRemoveResources?: ResourceType[];
62
128
  }
63
129
 
64
130
  /**
65
131
  * Sets up request interception, caching, and ad-blocking on a Puppeteer page.
132
+ * Returns a control object for runtime resource management.
66
133
  */
67
- export function ppRoute(options: PpRouteOptions): Promise<void>;
134
+ export function ppRoute(options: PpRouteOptions): Promise<RouteControl>;
68
135
 
69
136
  /**
70
137
  * Starts logging cache statistics to the console at a set interval.
@@ -76,7 +76,7 @@ function createProxyAgent(proxyUrl) {
76
76
  * Only keeps essential headers needed for the server to respond correctly.
77
77
  * Everything else (cookies, auth, fingerprint hints, HTTP/2 pseudo-headers, etc.) is stripped.
78
78
  */
79
- function sanitizeRequestHeaders(headers, logger, url) {
79
+ function sanitizeRequestHeaders(headers, logger, url, resourceType) {
80
80
  // Allowlist of essential request headers to keep
81
81
  const allowedHeaders = new Set([
82
82
  "accept",
@@ -93,6 +93,16 @@ function sanitizeRequestHeaders(headers, logger, url) {
93
93
  "sec-fetch-site",
94
94
  ]);
95
95
 
96
+ // For document/xhr/fetch types, preserve session-critical headers
97
+ const sessionTypes = new Set(["document", "xhr", "fetch"]);
98
+ if (sessionTypes.has(resourceType)) {
99
+ allowedHeaders.add("cookie");
100
+ allowedHeaders.add("authorization");
101
+ allowedHeaders.add("origin");
102
+ allowedHeaders.add("content-type");
103
+ allowedHeaders.add("x-requested-with");
104
+ }
105
+
96
106
  const cleaned = {};
97
107
  const stripped = [];
98
108
 
@@ -163,7 +173,7 @@ function sanitizeResponseHeaders(headers, logger, url) {
163
173
  * @param {boolean} stripLogger - Whether to log stripped headers
164
174
  * @returns {Promise<Object>} - The response object containing status, headers, and body
165
175
  */
166
- async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger, proxyAgent, stripHeaders, stripLogger) {
176
+ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger, proxyAgent, stripHeaders, stripLogger, resourceType) {
167
177
  // Determine the cache key based on configuration
168
178
  let mainUrl = new URL(url).origin + new URL(url).pathname;
169
179
  if (useFullUrl) {
@@ -183,7 +193,7 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
183
193
 
184
194
  try {
185
195
  // Sanitize outgoing request headers if stripHeaders is enabled
186
- const finalHeaders = stripHeaders ? sanitizeRequestHeaders(requestHeaders, stripLogger, url) : requestHeaders;
196
+ const finalHeaders = stripHeaders ? sanitizeRequestHeaders(requestHeaders, stripLogger, url, resourceType) : requestHeaders;
187
197
 
188
198
  // Fetch the resource using superagent
189
199
  // buffer(true) ensures we get the raw binary data (essential for images/fonts)
@@ -240,6 +250,8 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
240
250
  * @param {Object} options.m4w_send_on_message - Custom handler data for Doublelist messages
241
251
  * @param {Array<string>} options.allowImagePatterns - Array of strings/patterns. If a URL contains any of these, it will NOT be blocked even if blockImage is true.
242
252
  * @param {Array<string>} options.skipGotPatterns - Array of strings/patterns. If a URL contains any of these, it will skip the custom Superagent fetch.
253
+ * @param {Array<string>} options.gotAddResources - Additional resource types to intercept via Superagent, e.g. ["document", "xhr", "fetch"]
254
+ * @param {Array<string>} options.gotRemoveResources - Resource types to exclude from Superagent interception, e.g. ["font"]
243
255
  */
244
256
  export async function ppRoute({
245
257
  page = null,
@@ -256,6 +268,8 @@ export async function ppRoute({
256
268
  m4w_send_on_message = null,
257
269
  allowImagePatterns = [], // Default empty, merged inside
258
270
  skipGotPatterns = [], // Default empty, merged inside
271
+ gotAddResources = [], // Default empty, add to intercepted types
272
+ gotRemoveResources = [], // Default empty, remove from intercepted types
259
273
  }) {
260
274
  // Validation: Ensure we have a page
261
275
  if (!page) {
@@ -289,16 +303,25 @@ export async function ppRoute({
289
303
  }
290
304
  }
291
305
 
306
+ // Supported resource types reference (for documentation)
307
+ // "document", "stylesheet", "image", "media", "font", "script",
308
+ // "texttrack", "xhr", "fetch", "eventsource", "websocket", "manifest", "other"
309
+
292
310
  // Define resource types to intercept for custom fetching (useGot)
293
- const interceptedResourceTypes = ["stylesheet", "script", "font"];
311
+ // Start with defaults, add user extras, then remove user exclusions
312
+ const defaultResourceTypes = ["stylesheet", "script", "font"];
313
+ const interceptedResourceTypes = new Set([...defaultResourceTypes, ...gotAddResources]);
314
+ for (const type of gotRemoveResources) {
315
+ interceptedResourceTypes.delete(type);
316
+ }
294
317
 
295
318
  // Create proxy agent once (reused for all requests in this route)
296
319
  const proxyUrl = formatProxyUrl(proxy);
297
320
  const proxyAgent = createProxyAgent(proxyUrl);
298
321
 
299
322
  // If images are NOT blocked, we generally want to intercept/cache them too.
300
- if (!blockImage) {
301
- interceptedResourceTypes.push("image");
323
+ if (!blockImage && !gotRemoveResources.includes("image")) {
324
+ interceptedResourceTypes.add("image");
302
325
  }
303
326
 
304
327
  // Enable request interception in Puppeteer
@@ -430,7 +453,7 @@ export async function ppRoute({
430
453
  // ============================================================
431
454
  // Group 6: Resource Interception (Custom Fetch/Cache)
432
455
  // ============================================================
433
- if (useGot && interceptedResourceTypes.includes(resourceType) && !url.startsWith("data:")) {
456
+ if (useGot && interceptedResourceTypes.has(resourceType) && !url.startsWith("data:")) {
434
457
  // Check against the normalized host list (defaults + user input)
435
458
  let shouldSkipGot = false;
436
459
  try {
@@ -450,7 +473,8 @@ export async function ppRoute({
450
473
  logger,
451
474
  proxyAgent,
452
475
  stripGotHeaders,
453
- stripGotLogger
476
+ stripGotLogger,
477
+ resourceType
454
478
  );
455
479
 
456
480
  if (response) {
@@ -474,4 +498,20 @@ export async function ppRoute({
474
498
  await request.continue();
475
499
  return;
476
500
  });
501
+
502
+ // Return control object for runtime resource management
503
+ return {
504
+ /** Add a resource type to intercept via Superagent at runtime */
505
+ addResource(type) {
506
+ interceptedResourceTypes.add(type);
507
+ },
508
+ /** Remove a resource type from Superagent interception at runtime */
509
+ removeResource(type) {
510
+ interceptedResourceTypes.delete(type);
511
+ },
512
+ /** Get currently intercepted resource types */
513
+ getResources() {
514
+ return [...interceptedResourceTypes];
515
+ },
516
+ };
477
517
  }