arn-browser 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "arn-browser",
3
- "version": "0.1.15",
3
+ "version": "0.1.17",
4
4
  "description": "A lightweight, browser autmation helper.",
5
5
  "main": "src/index.js",
6
6
  "types": "src/index.d.ts",
@@ -32,6 +32,13 @@ export interface PwRouteOptions {
32
32
  /** Enable caching for requests */
33
33
  useCache?: boolean;
34
34
 
35
+ /**
36
+ * Proxy for custom fetch requests (only used when useGot is true).
37
+ * String: "http://host:port", "socks5://user:pass@host:port"
38
+ * Object: { type, host, port, user, pass }
39
+ */
40
+ proxy?: string | { type?: string; host: string; port: number; user?: string; pass?: string } | null;
41
+
35
42
  /** Data object for Doublelist message interception (POST logic) */
36
43
  m4w_send_on_post?: Record<string, any> | null;
37
44
 
@@ -2,6 +2,8 @@
2
2
  import superagent from "superagent";
3
3
  import { FiltersEngine, Request } from "@ghostery/adblocker";
4
4
  import fetch from "node-fetch";
5
+ import { HttpsProxyAgent } from "https-proxy-agent";
6
+ import { SocksProxyAgent } from "socks-proxy-agent";
5
7
  import NodeCache from "node-cache";
6
8
 
7
9
  let AdBlockEngine;
@@ -37,6 +39,38 @@ export function pwCacheLogs(log_cache = globalCache, interval = 10) {
37
39
  }
38
40
  }
39
41
 
42
+ /**
43
+ * Normalizes proxy input (string or object) into a URL string.
44
+ * Accepts:
45
+ * - String: "http://host:port", "socks5://user:pass@host:port", etc.
46
+ * - Object: { type, host, port, user, pass }
47
+ * @param {string|Object|null} proxy
48
+ * @returns {string|null} - Proxy URL string or null
49
+ */
50
+ function formatProxyUrl(proxy) {
51
+ if (!proxy) return null;
52
+ if (typeof proxy === "string") return proxy;
53
+ if (typeof proxy === "object") {
54
+ const { type = "http", host, port, user, pass } = proxy;
55
+ const auth = user && pass ? `${user}:${pass}@` : "";
56
+ return `${type}://${auth}${host}:${port}`;
57
+ }
58
+ return null;
59
+ }
60
+
61
+ /**
62
+ * Creates an HTTP agent for the given proxy URL.
63
+ * @param {string|null} proxyUrl
64
+ * @returns {Object|null} - HttpsProxyAgent or SocksProxyAgent instance
65
+ */
66
+ function createProxyAgent(proxyUrl) {
67
+ if (!proxyUrl) return null;
68
+ if (proxyUrl.startsWith("socks")) {
69
+ return new SocksProxyAgent(proxyUrl);
70
+ }
71
+ return new HttpsProxyAgent(proxyUrl);
72
+ }
73
+
40
74
  /**
41
75
  * Function to fetch resources using Superagent library with optional caching.
42
76
  * This mimics the browser's request but handles it in Node.js to allow caching or header manipulation.
@@ -46,20 +80,23 @@ export function pwCacheLogs(log_cache = globalCache, interval = 10) {
46
80
  * @param {string} method - HTTP method (GET, POST, etc.)
47
81
  * @param {boolean} useFullUrl - Whether to use the full URL as cache key or just origin+path
48
82
  * @param {string|false} logger - Log level: "info" (success+error), "error" (errors only), false (no logs)
83
+ * @param {Object|null} proxyAgent - Proxy agent to use for the request
49
84
  * @returns {Promise<Object>} - The response object containing status, headers, and body
50
85
  */
51
- async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger) {
86
+ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger, proxyAgent) {
52
87
  // Determine the cache key based on configuration
53
88
  let mainUrl = new URL(url).origin + new URL(url).pathname;
54
89
  if (useFullUrl) {
55
90
  mainUrl = url;
56
91
  }
57
92
 
93
+ const viaProxy = proxyAgent ? " via Proxy" : "";
94
+
58
95
  // Check if the response is cached
59
96
  if (useCache) {
60
97
  const cachedResponse = globalCache.get(mainUrl);
61
98
  if (cachedResponse) {
62
- if (logger === "info") console.log(`Serving from globalCache: ${mainUrl}`);
99
+ if (logger === "info") console.log(`Serving from globalCache${viaProxy}: ${mainUrl}`);
63
100
  return cachedResponse;
64
101
  }
65
102
  }
@@ -67,18 +104,29 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
67
104
  try {
68
105
  // Fetch the resource using superagent
69
106
  // buffer(true) ensures we get the raw binary data (essential for images/fonts)
70
- const response = await superagent(method, url).set(requestHeaders).buffer(true);
107
+ let request = superagent(method, url).set(requestHeaders).buffer(true);
108
+
109
+ // Apply proxy agent if provided
110
+ if (proxyAgent) {
111
+ request = request.agent(proxyAgent);
112
+ }
113
+
114
+ const response = await request;
71
115
 
72
116
  // Determine the correct body type (Buffer for binary, text for others)
73
117
  const responseBody = response.body instanceof Buffer ? response.body : response.text;
74
118
 
75
- // Save to cache
76
- globalCache.set(mainUrl, {
77
- status: response.status,
78
- headers: response.headers,
79
- body: responseBody,
80
- });
81
- if (logger === "info") console.log(`Success (cached): ${mainUrl}`);
119
+ // Save to cache only when caching is enabled
120
+ if (useCache) {
121
+ globalCache.set(mainUrl, {
122
+ status: response.status,
123
+ headers: response.headers,
124
+ body: responseBody,
125
+ });
126
+ if (logger === "info") console.log(`Success (cached${viaProxy}): ${mainUrl}`);
127
+ } else {
128
+ if (logger === "info") console.log(`Success (not cached${viaProxy}): ${mainUrl}`);
129
+ }
82
130
 
83
131
  return {
84
132
  status: response.status,
@@ -102,6 +150,7 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
102
150
  * @param {boolean} options.useGot - Enable custom fetching via Superagent (bypassing browser network stack for intercepted types)
103
151
  * @param {boolean} options.useFullUrl - Use full URL for cache keys
104
152
  * @param {boolean} options.useCache - Enable caching
153
+ * @param {string|Object|null} options.proxy - Proxy for custom fetch. String: "http://host:port" or "socks5://user:pass@host:port". Object: { type, host, port, user, pass }
105
154
  * @param {Object} options.m4w_send_on_post - Custom handler data for Doublelist posts
106
155
  * @param {Object} options.m4w_send_on_message - Custom handler data for Doublelist messages
107
156
  * @param {Array<string>} options.allowImagePatterns - Array of strings/patterns. If a URL contains any of these, it will NOT be blocked even if blockImage is true.
@@ -113,9 +162,10 @@ export async function pwRoute({
113
162
  logger = false,
114
163
  blockAds = true,
115
164
  blockImage = true,
116
- useGot = true,
165
+ useGot = false,
117
166
  useFullUrl = true,
118
167
  useCache = true,
168
+ proxy = null,
119
169
  m4w_send_on_post = null,
120
170
  m4w_send_on_message = null,
121
171
  allowImagePatterns = [], // Default empty, merged inside
@@ -135,7 +185,15 @@ export async function pwRoute({
135
185
  // --- SETUP: Merge Defaults for skipGotPatterns ---
136
186
  // Always skip custom fetch for Cloudflare challenges (let browser handle it)
137
187
  const defaultSkipPatterns = [];
138
- const finalSkipPatterns = [...defaultSkipPatterns, ...skipGotPatterns];
188
+ // Normalize: if entry contains "://", extract hostname. Otherwise keep as-is (assumed to be a hostname).
189
+ const finalSkipHosts = new Set(
190
+ [...defaultSkipPatterns, ...skipGotPatterns].map((entry) => {
191
+ try {
192
+ if (entry.includes("://")) return new URL(entry).hostname;
193
+ } catch {}
194
+ return entry;
195
+ })
196
+ );
139
197
 
140
198
  // Initialize ad blocking AdBlockEngine if enabled and not already loaded
141
199
  if (blockAds && !AdBlockEngine) {
@@ -149,6 +207,10 @@ export async function pwRoute({
149
207
  // Define resource types to intercept for custom fetching (useGot)
150
208
  const interceptedResourceTypes = ["stylesheet", "script", "font"];
151
209
 
210
+ // Create proxy agent once (reused for all requests in this route)
211
+ const proxyUrl = formatProxyUrl(proxy);
212
+ const proxyAgent = createProxyAgent(proxyUrl);
213
+
152
214
  // If images are NOT blocked, we generally want to intercept/cache them too.
153
215
  if (!blockImage) {
154
216
  interceptedResourceTypes.push("image");
@@ -277,9 +339,12 @@ export async function pwRoute({
277
339
  // ============================================================
278
340
  // Group 6: Resource Interception (Custom Fetch/Cache)
279
341
  // ============================================================
280
- if (useGot && interceptedResourceTypes.includes(resourceType)) {
281
- // Check against the merged list (defaults + user input)
282
- const shouldSkipGot = finalSkipPatterns.some((pattern) => url.includes(pattern));
342
+ if (useGot && interceptedResourceTypes.includes(resourceType) && !url.startsWith("data:")) {
343
+ // Check against the normalized host list (defaults + user input)
344
+ let shouldSkipGot = false;
345
+ try {
346
+ shouldSkipGot = finalSkipHosts.has(new URL(url).hostname);
347
+ } catch {}
283
348
 
284
349
  if (!shouldSkipGot) {
285
350
  const requestHeaders = request.headers();
@@ -291,7 +356,8 @@ export async function pwRoute({
291
356
  requestHeaders,
292
357
  requestMethod,
293
358
  useFullUrl,
294
- logger
359
+ logger,
360
+ proxyAgent
295
361
  );
296
362
 
297
363
  if (response) {
@@ -29,6 +29,13 @@ export interface PpRouteOptions {
29
29
  /** Enable caching for requests */
30
30
  useCache?: boolean;
31
31
 
32
+ /**
33
+ * Proxy for custom fetch requests (only used when useGot is true).
34
+ * String: "http://host:port", "socks5://user:pass@host:port"
35
+ * Object: { type, host, port, user, pass }
36
+ */
37
+ proxy?: string | { type?: string; host: string; port: number; user?: string; pass?: string } | null;
38
+
32
39
  /** Data object for Doublelist message interception (POST logic) */
33
40
  m4w_send_on_post?: Record<string, any> | null;
34
41
 
@@ -2,6 +2,8 @@
2
2
  import superagent from "superagent";
3
3
  import { FiltersEngine, Request } from "@ghostery/adblocker";
4
4
  import fetch from "node-fetch";
5
+ import { HttpsProxyAgent } from "https-proxy-agent";
6
+ import { SocksProxyAgent } from "socks-proxy-agent";
5
7
  import NodeCache from "node-cache";
6
8
 
7
9
  let AdBlockEngine;
@@ -37,6 +39,38 @@ export function ppCacheLogs(log_cache = globalCache, interval = 10) {
37
39
  }
38
40
  }
39
41
 
42
+ /**
43
+ * Normalizes proxy input (string or object) into a URL string.
44
+ * Accepts:
45
+ * - String: "http://host:port", "socks5://user:pass@host:port", etc.
46
+ * - Object: { type, host, port, user, pass }
47
+ * @param {string|Object|null} proxy
48
+ * @returns {string|null} - Proxy URL string or null
49
+ */
50
+ function formatProxyUrl(proxy) {
51
+ if (!proxy) return null;
52
+ if (typeof proxy === "string") return proxy;
53
+ if (typeof proxy === "object") {
54
+ const { type = "http", host, port, user, pass } = proxy;
55
+ const auth = user && pass ? `${user}:${pass}@` : "";
56
+ return `${type}://${auth}${host}:${port}`;
57
+ }
58
+ return null;
59
+ }
60
+
61
+ /**
62
+ * Creates an HTTP agent for the given proxy URL.
63
+ * @param {string|null} proxyUrl
64
+ * @returns {Object|null} - HttpsProxyAgent or SocksProxyAgent instance
65
+ */
66
+ function createProxyAgent(proxyUrl) {
67
+ if (!proxyUrl) return null;
68
+ if (proxyUrl.startsWith("socks")) {
69
+ return new SocksProxyAgent(proxyUrl);
70
+ }
71
+ return new HttpsProxyAgent(proxyUrl);
72
+ }
73
+
40
74
  /**
41
75
  * Function to fetch resources using Superagent library with optional caching.
42
76
  * This mimics the browser's request but handles it in Node.js to allow caching or header manipulation.
@@ -46,20 +80,23 @@ export function ppCacheLogs(log_cache = globalCache, interval = 10) {
46
80
  * @param {string} method - HTTP method (GET, POST, etc.)
47
81
  * @param {boolean} useFullUrl - Whether to use the full URL as cache key or just origin+path
48
82
  * @param {string|false} logger - Log level: "info" (success+error), "error" (errors only), false (no logs)
83
+ * @param {Object|null} proxyAgent - Proxy agent to use for the request
49
84
  * @returns {Promise<Object>} - The response object containing status, headers, and body
50
85
  */
51
- async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger) {
86
+ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl, logger, proxyAgent) {
52
87
  // Determine the cache key based on configuration
53
88
  let mainUrl = new URL(url).origin + new URL(url).pathname;
54
89
  if (useFullUrl) {
55
90
  mainUrl = url;
56
91
  }
57
92
 
93
+ const viaProxy = proxyAgent ? " via Proxy" : "";
94
+
58
95
  // Check if the response is cached
59
96
  if (useCache) {
60
97
  const cachedResponse = globalCache.get(mainUrl);
61
98
  if (cachedResponse) {
62
- if (logger === "info") console.log(`Serving from globalCache: ${mainUrl}`);
99
+ if (logger === "info") console.log(`Serving from globalCache${viaProxy}: ${mainUrl}`);
63
100
  return cachedResponse;
64
101
  }
65
102
  }
@@ -67,18 +104,29 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
67
104
  try {
68
105
  // Fetch the resource using superagent
69
106
  // buffer(true) ensures we get the raw binary data (essential for images/fonts)
70
- const response = await superagent(method, url).set(requestHeaders).buffer(true);
107
+ let request = superagent(method, url).set(requestHeaders).buffer(true);
108
+
109
+ // Apply proxy agent if provided
110
+ if (proxyAgent) {
111
+ request = request.agent(proxyAgent);
112
+ }
113
+
114
+ const response = await request;
71
115
 
72
116
  // Determine the correct body type (Buffer for binary, text for others)
73
117
  const responseBody = response.body instanceof Buffer ? response.body : response.text;
74
118
 
75
- // Save to cache
76
- globalCache.set(mainUrl, {
77
- status: response.status,
78
- headers: response.headers,
79
- body: responseBody,
80
- });
81
- if (logger === "info") console.log(`Success (cached): ${mainUrl}`);
119
+ // Save to cache only when caching is enabled
120
+ if (useCache) {
121
+ globalCache.set(mainUrl, {
122
+ status: response.status,
123
+ headers: response.headers,
124
+ body: responseBody,
125
+ });
126
+ if (logger === "info") console.log(`Success (cached${viaProxy}): ${mainUrl}`);
127
+ } else {
128
+ if (logger === "info") console.log(`Success (not cached${viaProxy}): ${mainUrl}`);
129
+ }
82
130
 
83
131
  return {
84
132
  status: response.status,
@@ -92,16 +140,16 @@ async function fetchWithClient(useCache, url, requestHeaders, method, useFullUrl
92
140
  }
93
141
 
94
142
  /**
95
- * Main function to set up routing, ad blocking, and request interception in Playwright.
143
+ * Main function to set up routing, ad blocking, and request interception in Puppeteer.
96
144
  * @param {Object} options - Configuration options
97
- * @param {Object} options.context - Playwright context (optional, one is required)
98
- * @param {Object} options.page - Playwright page (optional, one is required)
145
+ * @param {Object} options.page - Puppeteer page (required)
99
146
  * @param {boolean} options.blockImage - Enable global image blocking
100
147
  * @param {boolean} options.blockAds - Enable Ghostery ad blocking
101
148
  * @param {string|false} [options.logger="error"] - Log level: "info" (success+error), "error" (errors only), false (no logs)
102
149
  * @param {boolean} options.useGot - Enable custom fetching via Superagent (bypassing browser network stack for intercepted types)
103
150
  * @param {boolean} options.useFullUrl - Use full URL for cache keys
104
151
  * @param {boolean} options.useCache - Enable caching
152
+ * @param {string|Object|null} options.proxy - Proxy for custom fetch. String: "http://host:port" or "socks5://user:pass@host:port". Object: { type, host, port, user, pass }
105
153
  * @param {Object} options.m4w_send_on_post - Custom handler data for Doublelist posts
106
154
  * @param {Object} options.m4w_send_on_message - Custom handler data for Doublelist messages
107
155
  * @param {Array<string>} options.allowImagePatterns - Array of strings/patterns. If a URL contains any of these, it will NOT be blocked even if blockImage is true.
@@ -112,9 +160,10 @@ export async function ppRoute({
112
160
  logger = false,
113
161
  blockAds = true,
114
162
  blockImage = true,
115
- useGot = true,
163
+ useGot = false,
116
164
  useFullUrl = true,
117
165
  useCache = true,
166
+ proxy = null,
118
167
  m4w_send_on_post = null,
119
168
  m4w_send_on_message = null,
120
169
  allowImagePatterns = [], // Default empty, merged inside
@@ -133,7 +182,15 @@ export async function ppRoute({
133
182
  // --- SETUP: Merge Defaults for skipGotPatterns ---
134
183
  // Always skip custom fetch for Cloudflare challenges (let browser handle it)
135
184
  const defaultSkipPatterns = [];
136
- const finalSkipPatterns = [...defaultSkipPatterns, ...skipGotPatterns];
185
+ // Normalize: if entry contains "://", extract hostname. Otherwise keep as-is (assumed to be a hostname).
186
+ const finalSkipHosts = new Set(
187
+ [...defaultSkipPatterns, ...skipGotPatterns].map((entry) => {
188
+ try {
189
+ if (entry.includes("://")) return new URL(entry).hostname;
190
+ } catch {}
191
+ return entry;
192
+ })
193
+ );
137
194
 
138
195
  // Initialize ad blocking AdBlockEngine if enabled and not already loaded
139
196
  if (blockAds && !AdBlockEngine) {
@@ -147,6 +204,10 @@ export async function ppRoute({
147
204
  // Define resource types to intercept for custom fetching (useGot)
148
205
  const interceptedResourceTypes = ["stylesheet", "script", "font"];
149
206
 
207
+ // Create proxy agent once (reused for all requests in this route)
208
+ const proxyUrl = formatProxyUrl(proxy);
209
+ const proxyAgent = createProxyAgent(proxyUrl);
210
+
150
211
  // If images are NOT blocked, we generally want to intercept/cache them too.
151
212
  if (!blockImage) {
152
213
  interceptedResourceTypes.push("image");
@@ -281,9 +342,12 @@ export async function ppRoute({
281
342
  // ============================================================
282
343
  // Group 6: Resource Interception (Custom Fetch/Cache)
283
344
  // ============================================================
284
- if (useGot && interceptedResourceTypes.includes(resourceType)) {
285
- // Check against the merged list (defaults + user input)
286
- const shouldSkipGot = finalSkipPatterns.some((pattern) => url.includes(pattern));
345
+ if (useGot && interceptedResourceTypes.includes(resourceType) && !url.startsWith("data:")) {
346
+ // Check against the normalized host list (defaults + user input)
347
+ let shouldSkipGot = false;
348
+ try {
349
+ shouldSkipGot = finalSkipHosts.has(new URL(url).hostname);
350
+ } catch {}
287
351
 
288
352
  if (!shouldSkipGot) {
289
353
  const requestHeaders = request.headers();
@@ -295,7 +359,8 @@ export async function ppRoute({
295
359
  requestHeaders,
296
360
  requestMethod,
297
361
  useFullUrl,
298
- logger
362
+ logger,
363
+ proxyAgent
299
364
  );
300
365
 
301
366
  if (response) {