rezo 1.0.42 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/curl.cjs +131 -29
- package/dist/adapters/curl.js +131 -29
- package/dist/adapters/entries/curl.d.ts +65 -0
- package/dist/adapters/entries/fetch.d.ts +65 -0
- package/dist/adapters/entries/http.d.ts +65 -0
- package/dist/adapters/entries/http2.d.ts +65 -0
- package/dist/adapters/entries/react-native.d.ts +65 -0
- package/dist/adapters/entries/xhr.d.ts +65 -0
- package/dist/adapters/http2.cjs +209 -22
- package/dist/adapters/http2.js +209 -22
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -13
- package/dist/cache/index.js +0 -2
- package/dist/core/rezo.cjs +7 -0
- package/dist/core/rezo.js +7 -0
- package/dist/crawler/addon/decodo/index.cjs +1 -0
- package/dist/crawler/addon/decodo/index.js +1 -0
- package/dist/crawler/crawler-options.cjs +1 -0
- package/dist/crawler/crawler-options.js +1 -0
- package/dist/crawler/crawler.cjs +1070 -0
- package/dist/crawler/crawler.js +1068 -0
- package/dist/crawler/index.cjs +40 -0
- package/dist/{plugin → crawler}/index.js +4 -2
- package/dist/crawler/plugin/file-cacher.cjs +19 -0
- package/dist/crawler/plugin/file-cacher.js +19 -0
- package/dist/crawler/plugin/index.cjs +1 -0
- package/dist/crawler/plugin/index.js +1 -0
- package/dist/crawler/plugin/navigation-history.cjs +43 -0
- package/dist/crawler/plugin/navigation-history.js +43 -0
- package/dist/crawler/plugin/robots-txt.cjs +2 -0
- package/dist/crawler/plugin/robots-txt.js +2 -0
- package/dist/crawler/plugin/url-store.cjs +18 -0
- package/dist/crawler/plugin/url-store.js +18 -0
- package/dist/crawler.d.ts +511 -183
- package/dist/entries/crawler.cjs +5 -5
- package/dist/entries/crawler.js +2 -2
- package/dist/index.cjs +27 -24
- package/dist/index.d.ts +73 -0
- package/dist/index.js +1 -0
- package/dist/internal/agents/base.cjs +113 -0
- package/dist/internal/agents/base.js +110 -0
- package/dist/internal/agents/http-proxy.cjs +89 -0
- package/dist/internal/agents/http-proxy.js +86 -0
- package/dist/internal/agents/https-proxy.cjs +176 -0
- package/dist/internal/agents/https-proxy.js +173 -0
- package/dist/internal/agents/index.cjs +10 -0
- package/dist/internal/agents/index.js +5 -0
- package/dist/internal/agents/socks-client.cjs +571 -0
- package/dist/internal/agents/socks-client.js +567 -0
- package/dist/internal/agents/socks-proxy.cjs +75 -0
- package/dist/internal/agents/socks-proxy.js +72 -0
- package/dist/platform/browser.d.ts +65 -0
- package/dist/platform/bun.d.ts +65 -0
- package/dist/platform/deno.d.ts +65 -0
- package/dist/platform/node.d.ts +65 -0
- package/dist/platform/react-native.d.ts +65 -0
- package/dist/platform/worker.d.ts +65 -0
- package/dist/proxy/index.cjs +18 -16
- package/dist/proxy/index.js +17 -12
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/buildError.cjs +11 -2
- package/dist/responses/buildError.js +11 -2
- package/dist/responses/universal/index.cjs +11 -11
- package/dist/utils/curl.cjs +317 -0
- package/dist/utils/curl.js +314 -0
- package/package.json +2 -6
- package/dist/cache/file-cacher.cjs +0 -264
- package/dist/cache/file-cacher.js +0 -261
- package/dist/cache/url-store.cjs +0 -288
- package/dist/cache/url-store.js +0 -285
- package/dist/plugin/addon/decodo/index.cjs +0 -1
- package/dist/plugin/addon/decodo/index.js +0 -1
- package/dist/plugin/crawler-options.cjs +0 -1
- package/dist/plugin/crawler-options.js +0 -1
- package/dist/plugin/crawler.cjs +0 -519
- package/dist/plugin/crawler.js +0 -517
- package/dist/plugin/index.cjs +0 -36
- /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
- /package/dist/{plugin → crawler}/scraper.cjs +0 -0
- /package/dist/{plugin → crawler}/scraper.js +0 -0
package/dist/adapters/http2.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as http2 from "node:http2";
|
|
2
|
+
import * as tls from "node:tls";
|
|
2
3
|
import * as zlib from "node:zlib";
|
|
3
4
|
import { URL } from "node:url";
|
|
4
5
|
import { Readable } from "node:stream";
|
|
@@ -14,6 +15,8 @@ import { DownloadResponse } from '../responses/download.js';
|
|
|
14
15
|
import { UploadResponse } from '../responses/upload.js';
|
|
15
16
|
import { CompressionUtil } from '../utils/compression.js';
|
|
16
17
|
import { isSameDomain, RezoPerformance } from '../utils/tools.js';
|
|
18
|
+
import { SocksClient } from '../internal/agents/socks-client.js';
|
|
19
|
+
import * as net from "node:net";
|
|
17
20
|
import { ResponseCache } from '../cache/response-cache.js';
|
|
18
21
|
let zstdDecompressSync = null;
|
|
19
22
|
let zstdChecked = false;
|
|
@@ -194,8 +197,9 @@ class Http2SessionPool {
|
|
|
194
197
|
this.cleanupInterval.unref();
|
|
195
198
|
}
|
|
196
199
|
}
|
|
197
|
-
getSessionKey(url, options) {
|
|
198
|
-
|
|
200
|
+
getSessionKey(url, options, proxy) {
|
|
201
|
+
const proxyKey = proxy ? typeof proxy === "string" ? proxy : `${proxy.protocol}://${proxy.host}:${proxy.port}` : "";
|
|
202
|
+
return `${url.protocol}//${url.host}${proxyKey ? `@${proxyKey}` : ""}`;
|
|
199
203
|
}
|
|
200
204
|
isSessionHealthy(session, entry) {
|
|
201
205
|
if (session.closed || session.destroyed)
|
|
@@ -207,8 +211,8 @@ class Http2SessionPool {
|
|
|
207
211
|
return false;
|
|
208
212
|
return true;
|
|
209
213
|
}
|
|
210
|
-
async getSession(url, options, timeout, forceNew = false) {
|
|
211
|
-
const key = this.getSessionKey(url, options);
|
|
214
|
+
async getSession(url, options, timeout, forceNew = false, proxy) {
|
|
215
|
+
const key = this.getSessionKey(url, options, proxy);
|
|
212
216
|
const existing = this.sessions.get(key);
|
|
213
217
|
if (!forceNew && existing && this.isSessionHealthy(existing.session, existing)) {
|
|
214
218
|
existing.lastUsed = Date.now();
|
|
@@ -221,12 +225,13 @@ class Http2SessionPool {
|
|
|
221
225
|
} catch {}
|
|
222
226
|
this.sessions.delete(key);
|
|
223
227
|
}
|
|
224
|
-
const session = await this.createSession(url, options, timeout);
|
|
228
|
+
const session = await this.createSession(url, options, timeout, proxy);
|
|
225
229
|
const entry = {
|
|
226
230
|
session,
|
|
227
231
|
lastUsed: Date.now(),
|
|
228
232
|
refCount: 1,
|
|
229
|
-
goawayReceived: false
|
|
233
|
+
goawayReceived: false,
|
|
234
|
+
proxy
|
|
230
235
|
};
|
|
231
236
|
this.sessions.set(key, entry);
|
|
232
237
|
session.on("close", () => {
|
|
@@ -240,15 +245,19 @@ class Http2SessionPool {
|
|
|
240
245
|
});
|
|
241
246
|
return session;
|
|
242
247
|
}
|
|
243
|
-
createSession(url, options, timeout) {
|
|
248
|
+
async createSession(url, options, timeout, proxy) {
|
|
249
|
+
const authority = `${url.protocol}//${url.host}`;
|
|
250
|
+
const sessionOptions = {
|
|
251
|
+
...options,
|
|
252
|
+
rejectUnauthorized: options?.rejectUnauthorized !== false,
|
|
253
|
+
ALPNProtocols: ["h2", "http/1.1"],
|
|
254
|
+
timeout
|
|
255
|
+
};
|
|
256
|
+
if (proxy) {
|
|
257
|
+
const tunnelSocket = await this.createProxyTunnel(url, proxy, timeout, options?.rejectUnauthorized);
|
|
258
|
+
sessionOptions.createConnection = () => tunnelSocket;
|
|
259
|
+
}
|
|
244
260
|
return new Promise((resolve, reject) => {
|
|
245
|
-
const authority = `${url.protocol}//${url.host}`;
|
|
246
|
-
const sessionOptions = {
|
|
247
|
-
...options,
|
|
248
|
-
rejectUnauthorized: options?.rejectUnauthorized !== false,
|
|
249
|
-
ALPNProtocols: ["h2", "http/1.1"],
|
|
250
|
-
timeout
|
|
251
|
-
};
|
|
252
261
|
const session = http2.connect(authority, sessionOptions);
|
|
253
262
|
let settled = false;
|
|
254
263
|
const timeoutId = timeout ? setTimeout(() => {
|
|
@@ -279,8 +288,186 @@ class Http2SessionPool {
|
|
|
279
288
|
});
|
|
280
289
|
});
|
|
281
290
|
}
|
|
282
|
-
|
|
283
|
-
|
|
291
|
+
async createProxyTunnel(url, proxy, timeout, rejectUnauthorized) {
|
|
292
|
+
return new Promise((resolve, reject) => {
|
|
293
|
+
let proxyUrl;
|
|
294
|
+
let proxyAuth;
|
|
295
|
+
if (typeof proxy === "string") {
|
|
296
|
+
proxyUrl = new URL(proxy);
|
|
297
|
+
if (proxyUrl.username || proxyUrl.password) {
|
|
298
|
+
proxyAuth = Buffer.from(`${decodeURIComponent(proxyUrl.username)}:${decodeURIComponent(proxyUrl.password)}`).toString("base64");
|
|
299
|
+
}
|
|
300
|
+
} else {
|
|
301
|
+
const protocol = proxy.protocol || "http";
|
|
302
|
+
let proxyUrlStr = `${protocol}://${proxy.host}:${proxy.port}`;
|
|
303
|
+
if (proxy.auth) {
|
|
304
|
+
const encodedUser = encodeURIComponent(proxy.auth.username);
|
|
305
|
+
const encodedPass = encodeURIComponent(proxy.auth.password);
|
|
306
|
+
proxyUrlStr = `${protocol}://${encodedUser}:${encodedPass}@${proxy.host}:${proxy.port}`;
|
|
307
|
+
proxyAuth = Buffer.from(`${proxy.auth.username}:${proxy.auth.password}`).toString("base64");
|
|
308
|
+
}
|
|
309
|
+
proxyUrl = new URL(proxyUrlStr);
|
|
310
|
+
}
|
|
311
|
+
const targetHost = url.hostname;
|
|
312
|
+
const targetPort = url.port || (url.protocol === "https:" ? "443" : "80");
|
|
313
|
+
if (proxyUrl.protocol.startsWith("socks")) {
|
|
314
|
+
const socksType = proxyUrl.protocol === "socks5:" || proxyUrl.protocol === "socks5h:" ? 5 : 4;
|
|
315
|
+
const socksOpts = {
|
|
316
|
+
proxy: {
|
|
317
|
+
host: proxyUrl.hostname,
|
|
318
|
+
port: parseInt(proxyUrl.port || "1080", 10),
|
|
319
|
+
type: socksType,
|
|
320
|
+
userId: proxyUrl.username ? decodeURIComponent(proxyUrl.username) : undefined,
|
|
321
|
+
password: proxyUrl.password ? decodeURIComponent(proxyUrl.password) : undefined
|
|
322
|
+
},
|
|
323
|
+
destination: {
|
|
324
|
+
host: targetHost,
|
|
325
|
+
port: parseInt(targetPort, 10)
|
|
326
|
+
},
|
|
327
|
+
command: "connect",
|
|
328
|
+
timeout
|
|
329
|
+
};
|
|
330
|
+
SocksClient.createConnection(socksOpts).then(({ socket }) => {
|
|
331
|
+
if (url.protocol === "https:") {
|
|
332
|
+
const tlsSocket = tls.connect({
|
|
333
|
+
socket,
|
|
334
|
+
host: targetHost,
|
|
335
|
+
servername: targetHost,
|
|
336
|
+
rejectUnauthorized: rejectUnauthorized !== false,
|
|
337
|
+
ALPNProtocols: ["h2", "http/1.1"]
|
|
338
|
+
});
|
|
339
|
+
const tlsTimeoutId = timeout ? setTimeout(() => {
|
|
340
|
+
tlsSocket.destroy();
|
|
341
|
+
reject(new Error(`TLS handshake timeout after ${timeout}ms`));
|
|
342
|
+
}, timeout) : null;
|
|
343
|
+
tlsSocket.on("secureConnect", () => {
|
|
344
|
+
if (tlsTimeoutId)
|
|
345
|
+
clearTimeout(tlsTimeoutId);
|
|
346
|
+
const alpn = tlsSocket.alpnProtocol;
|
|
347
|
+
if (alpn && alpn !== "h2") {
|
|
348
|
+
tlsSocket.destroy();
|
|
349
|
+
reject(new Error(`Server does not support HTTP/2 (negotiated: ${alpn})`));
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
resolve(tlsSocket);
|
|
353
|
+
});
|
|
354
|
+
tlsSocket.on("error", (err) => {
|
|
355
|
+
if (tlsTimeoutId)
|
|
356
|
+
clearTimeout(tlsTimeoutId);
|
|
357
|
+
reject(new Error(`TLS handshake failed: ${err.message}`));
|
|
358
|
+
});
|
|
359
|
+
} else {
|
|
360
|
+
resolve(socket);
|
|
361
|
+
}
|
|
362
|
+
}).catch((err) => {
|
|
363
|
+
reject(new Error(`SOCKS proxy connection failed: ${err.message}`));
|
|
364
|
+
});
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
const proxyHost = proxyUrl.hostname;
|
|
368
|
+
const proxyPort = parseInt(proxyUrl.port || (proxyUrl.protocol === "https:" ? "443" : "80"), 10);
|
|
369
|
+
let proxySocket;
|
|
370
|
+
const connectToProxy = () => {
|
|
371
|
+
if (proxyUrl.protocol === "https:") {
|
|
372
|
+
proxySocket = tls.connect({
|
|
373
|
+
host: proxyHost,
|
|
374
|
+
port: proxyPort,
|
|
375
|
+
rejectUnauthorized: rejectUnauthorized !== false
|
|
376
|
+
});
|
|
377
|
+
} else {
|
|
378
|
+
proxySocket = net.connect({
|
|
379
|
+
host: proxyHost,
|
|
380
|
+
port: proxyPort
|
|
381
|
+
});
|
|
382
|
+
}
|
|
383
|
+
let settled = false;
|
|
384
|
+
const timeoutId = timeout ? setTimeout(() => {
|
|
385
|
+
if (!settled) {
|
|
386
|
+
settled = true;
|
|
387
|
+
proxySocket.destroy();
|
|
388
|
+
reject(new Error(`Proxy connection timeout after ${timeout}ms`));
|
|
389
|
+
}
|
|
390
|
+
}, timeout) : null;
|
|
391
|
+
proxySocket.on("error", (err) => {
|
|
392
|
+
if (!settled) {
|
|
393
|
+
settled = true;
|
|
394
|
+
if (timeoutId)
|
|
395
|
+
clearTimeout(timeoutId);
|
|
396
|
+
reject(new Error(`Proxy connection error: ${err.message}`));
|
|
397
|
+
}
|
|
398
|
+
});
|
|
399
|
+
proxySocket.on("connect", () => {
|
|
400
|
+
const connectRequest = [
|
|
401
|
+
`CONNECT ${targetHost}:${targetPort} HTTP/1.1`,
|
|
402
|
+
`Host: ${targetHost}:${targetPort}`,
|
|
403
|
+
proxyAuth ? `Proxy-Authorization: Basic ${proxyAuth}` : "",
|
|
404
|
+
"",
|
|
405
|
+
""
|
|
406
|
+
].filter(Boolean).join(`\r
|
|
407
|
+
`);
|
|
408
|
+
proxySocket.write(connectRequest);
|
|
409
|
+
});
|
|
410
|
+
let responseBuffer = "";
|
|
411
|
+
proxySocket.on("data", function onData(data) {
|
|
412
|
+
if (settled)
|
|
413
|
+
return;
|
|
414
|
+
responseBuffer += data.toString();
|
|
415
|
+
const headerEnd = responseBuffer.indexOf(`\r
|
|
416
|
+
\r
|
|
417
|
+
`);
|
|
418
|
+
if (headerEnd !== -1) {
|
|
419
|
+
settled = true;
|
|
420
|
+
if (timeoutId)
|
|
421
|
+
clearTimeout(timeoutId);
|
|
422
|
+
proxySocket.removeListener("data", onData);
|
|
423
|
+
const statusLine = responseBuffer.split(`\r
|
|
424
|
+
`)[0];
|
|
425
|
+
const statusMatch = statusLine.match(/HTTP\/\d\.\d (\d{3})/);
|
|
426
|
+
const statusCode = statusMatch ? parseInt(statusMatch[1], 10) : 0;
|
|
427
|
+
if (statusCode === 200) {
|
|
428
|
+
if (url.protocol === "https:") {
|
|
429
|
+
const tlsSocket = tls.connect({
|
|
430
|
+
socket: proxySocket,
|
|
431
|
+
host: targetHost,
|
|
432
|
+
servername: targetHost,
|
|
433
|
+
rejectUnauthorized: rejectUnauthorized !== false,
|
|
434
|
+
ALPNProtocols: ["h2", "http/1.1"]
|
|
435
|
+
});
|
|
436
|
+
const tlsTimeoutId = timeout ? setTimeout(() => {
|
|
437
|
+
tlsSocket.destroy();
|
|
438
|
+
reject(new Error(`TLS handshake timeout after ${timeout}ms`));
|
|
439
|
+
}, timeout) : null;
|
|
440
|
+
tlsSocket.on("secureConnect", () => {
|
|
441
|
+
if (tlsTimeoutId)
|
|
442
|
+
clearTimeout(tlsTimeoutId);
|
|
443
|
+
const alpn = tlsSocket.alpnProtocol;
|
|
444
|
+
if (alpn && alpn !== "h2") {
|
|
445
|
+
tlsSocket.destroy();
|
|
446
|
+
reject(new Error(`Server does not support HTTP/2 (negotiated: ${alpn})`));
|
|
447
|
+
return;
|
|
448
|
+
}
|
|
449
|
+
resolve(tlsSocket);
|
|
450
|
+
});
|
|
451
|
+
tlsSocket.on("error", (err) => {
|
|
452
|
+
if (tlsTimeoutId)
|
|
453
|
+
clearTimeout(tlsTimeoutId);
|
|
454
|
+
reject(new Error(`TLS handshake failed: ${err.message}`));
|
|
455
|
+
});
|
|
456
|
+
} else {
|
|
457
|
+
resolve(proxySocket);
|
|
458
|
+
}
|
|
459
|
+
} else {
|
|
460
|
+
proxySocket.destroy();
|
|
461
|
+
reject(new Error(`Proxy CONNECT failed with status ${statusCode}: ${statusLine}`));
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
});
|
|
465
|
+
};
|
|
466
|
+
connectToProxy();
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
releaseSession(url, proxy) {
|
|
470
|
+
const key = this.getSessionKey(url, undefined, proxy);
|
|
284
471
|
const entry = this.sessions.get(key);
|
|
285
472
|
if (entry) {
|
|
286
473
|
entry.refCount = Math.max(0, entry.refCount - 1);
|
|
@@ -293,8 +480,8 @@ class Http2SessionPool {
|
|
|
293
480
|
}
|
|
294
481
|
}
|
|
295
482
|
}
|
|
296
|
-
closeSession(url) {
|
|
297
|
-
const key = this.getSessionKey(url);
|
|
483
|
+
closeSession(url, proxy) {
|
|
484
|
+
const key = this.getSessionKey(url, undefined, proxy);
|
|
298
485
|
const entry = this.sessions.get(key);
|
|
299
486
|
if (entry) {
|
|
300
487
|
entry.session.close();
|
|
@@ -1008,10 +1195,10 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1008
1195
|
const forceNewSession = requestCount > 0;
|
|
1009
1196
|
let session;
|
|
1010
1197
|
if (config.debug) {
|
|
1011
|
-
console.log(`[Rezo Debug] HTTP/2: Acquiring session for ${url.host}${forceNewSession ? " (forcing new for redirect)" : ""}...`);
|
|
1198
|
+
console.log(`[Rezo Debug] HTTP/2: Acquiring session for ${url.host}${forceNewSession ? " (forcing new for redirect)" : ""}${fetchOptions.proxy ? " (via proxy)" : ""}...`);
|
|
1012
1199
|
}
|
|
1013
1200
|
try {
|
|
1014
|
-
session = await (sessionPool || Http2SessionPool.getInstance()).getSession(url, sessionOptions, config.timeout !== null ? config.timeout : undefined, forceNewSession);
|
|
1201
|
+
session = await (sessionPool || Http2SessionPool.getInstance()).getSession(url, sessionOptions, config.timeout !== null ? config.timeout : undefined, forceNewSession, fetchOptions.proxy);
|
|
1015
1202
|
if (config.debug) {
|
|
1016
1203
|
console.log(`[Rezo Debug] HTTP/2: Session acquired successfully`);
|
|
1017
1204
|
}
|
|
@@ -1240,7 +1427,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1240
1427
|
config.transfer.requestSize = Buffer.byteLength(JSON.stringify(body), "utf8");
|
|
1241
1428
|
}
|
|
1242
1429
|
}
|
|
1243
|
-
(sessionPool || Http2SessionPool.getInstance()).releaseSession(url);
|
|
1430
|
+
(sessionPool || Http2SessionPool.getInstance()).releaseSession(url, fetchOptions.proxy);
|
|
1244
1431
|
if (isRedirect) {
|
|
1245
1432
|
_stats.statusOnNext = "redirect";
|
|
1246
1433
|
const partialResponse = {
|
|
@@ -1417,7 +1604,7 @@ async function executeHttp2Stream(config, fetchOptions, requestCount, timing, _s
|
|
|
1417
1604
|
if (config.debug) {
|
|
1418
1605
|
console.log(`[Rezo Debug] HTTP/2: Error in 'end' handler:`, endError.message);
|
|
1419
1606
|
}
|
|
1420
|
-
(sessionPool || Http2SessionPool.getInstance()).releaseSession(url);
|
|
1607
|
+
(sessionPool || Http2SessionPool.getInstance()).releaseSession(url, fetchOptions.proxy);
|
|
1421
1608
|
const error = buildSmartError(config, fetchOptions, endError);
|
|
1422
1609
|
_stats.statusOnNext = "error";
|
|
1423
1610
|
resolve(error);
|
package/dist/adapters/index.cjs
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.detectRuntime =
|
|
3
|
-
exports.getAdapterCapabilities =
|
|
4
|
-
exports.buildAdapterContext =
|
|
5
|
-
exports.getAvailableAdapters =
|
|
6
|
-
exports.selectAdapter =
|
|
1
|
+
const _mod_ggr948 = require('./picker.cjs');
|
|
2
|
+
exports.detectRuntime = _mod_ggr948.detectRuntime;
|
|
3
|
+
exports.getAdapterCapabilities = _mod_ggr948.getAdapterCapabilities;
|
|
4
|
+
exports.buildAdapterContext = _mod_ggr948.buildAdapterContext;
|
|
5
|
+
exports.getAvailableAdapters = _mod_ggr948.getAvailableAdapters;
|
|
6
|
+
exports.selectAdapter = _mod_ggr948.selectAdapter;;
|
package/dist/cache/index.cjs
CHANGED
|
@@ -1,13 +1,9 @@
|
|
|
1
|
-
const
|
|
2
|
-
exports.LRUCache =
|
|
3
|
-
const
|
|
4
|
-
exports.DNSCache =
|
|
5
|
-
exports.getGlobalDNSCache =
|
|
6
|
-
exports.resetGlobalDNSCache =
|
|
7
|
-
const
|
|
8
|
-
exports.ResponseCache =
|
|
9
|
-
exports.normalizeResponseCacheConfig =
|
|
10
|
-
const _mod_pmo9e4 = require('./file-cacher.cjs');
|
|
11
|
-
exports.FileCacher = _mod_pmo9e4.FileCacher;;
|
|
12
|
-
const _mod_h6jprd = require('./url-store.cjs');
|
|
13
|
-
exports.UrlStore = _mod_h6jprd.UrlStore;;
|
|
1
|
+
const _mod_dh1qvy = require('./lru-cache.cjs');
|
|
2
|
+
exports.LRUCache = _mod_dh1qvy.LRUCache;;
|
|
3
|
+
const _mod_2s112k = require('./dns-cache.cjs');
|
|
4
|
+
exports.DNSCache = _mod_2s112k.DNSCache;
|
|
5
|
+
exports.getGlobalDNSCache = _mod_2s112k.getGlobalDNSCache;
|
|
6
|
+
exports.resetGlobalDNSCache = _mod_2s112k.resetGlobalDNSCache;;
|
|
7
|
+
const _mod_xow5u3 = require('./response-cache.cjs');
|
|
8
|
+
exports.ResponseCache = _mod_xow5u3.ResponseCache;
|
|
9
|
+
exports.normalizeResponseCacheConfig = _mod_xow5u3.normalizeResponseCacheConfig;;
|
package/dist/cache/index.js
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
1
|
export { LRUCache } from './lru-cache.js';
|
|
2
2
|
export { DNSCache, getGlobalDNSCache, resetGlobalDNSCache } from './dns-cache.js';
|
|
3
3
|
export { ResponseCache, normalizeResponseCacheConfig } from './response-cache.js';
|
|
4
|
-
export { FileCacher } from './file-cacher.js';
|
|
5
|
-
export { UrlStore } from './url-store.js';
|
package/dist/core/rezo.cjs
CHANGED
|
@@ -7,6 +7,7 @@ const packageJson = require("../../package.json");
|
|
|
7
7
|
const { createDefaultHooks, mergeHooks, runVoidHooksSync, runTransformHooks } = require('./hooks.cjs');
|
|
8
8
|
const { ResponseCache, DNSCache } = require('../cache/index.cjs');
|
|
9
9
|
const { ProxyManager } = require('../proxy/manager.cjs');
|
|
10
|
+
const { toCurl: toCurlUtil, fromCurl: fromCurlUtil } = require('../utils/curl.cjs');
|
|
10
11
|
let globalAdapter = null;
|
|
11
12
|
function setGlobalAdapter(adapter) {
|
|
12
13
|
globalAdapter = adapter;
|
|
@@ -479,6 +480,12 @@ class Rezo {
|
|
|
479
480
|
clearCookies() {
|
|
480
481
|
this.jar?.removeAllCookiesSync();
|
|
481
482
|
}
|
|
483
|
+
static toCurl(config) {
|
|
484
|
+
return toCurlUtil(config);
|
|
485
|
+
}
|
|
486
|
+
static fromCurl(curlCommand) {
|
|
487
|
+
return fromCurlUtil(curlCommand);
|
|
488
|
+
}
|
|
482
489
|
}
|
|
483
490
|
const defaultTransforms = exports.defaultTransforms = {
|
|
484
491
|
request: [
|
package/dist/core/rezo.js
CHANGED
|
@@ -7,6 +7,7 @@ import packageJson from "../../package.json" with { type: 'json' };
|
|
|
7
7
|
import { createDefaultHooks, mergeHooks, runVoidHooksSync, runTransformHooks } from './hooks.js';
|
|
8
8
|
import { ResponseCache, DNSCache } from '../cache/index.js';
|
|
9
9
|
import { ProxyManager } from '../proxy/manager.js';
|
|
10
|
+
import { toCurl as toCurlUtil, fromCurl as fromCurlUtil } from '../utils/curl.js';
|
|
10
11
|
let globalAdapter = null;
|
|
11
12
|
export function setGlobalAdapter(adapter) {
|
|
12
13
|
globalAdapter = adapter;
|
|
@@ -479,6 +480,12 @@ export class Rezo {
|
|
|
479
480
|
clearCookies() {
|
|
480
481
|
this.jar?.removeAllCookiesSync();
|
|
481
482
|
}
|
|
483
|
+
static toCurl(config) {
|
|
484
|
+
return toCurlUtil(config);
|
|
485
|
+
}
|
|
486
|
+
static fromCurl(curlCommand) {
|
|
487
|
+
return fromCurlUtil(curlCommand);
|
|
488
|
+
}
|
|
482
489
|
}
|
|
483
490
|
export const defaultTransforms = {
|
|
484
491
|
request: [
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
var{Rezo:h}=require("../../../core/rezo.cjs"),r=require("./options.cjs");exports.DECODO_DEVICE_TYPES=r.DECODO_DEVICE_TYPES;exports.DECODO_HEADLESS_MODES=r.DECODO_HEADLESS_MODES;exports.DECODO_COMMON_LOCALES=r.DECODO_COMMON_LOCALES;exports.DECODO_COMMON_COUNTRIES=r.DECODO_COMMON_COUNTRIES;exports.DECODO_EUROPEAN_COUNTRIES=r.DECODO_EUROPEAN_COUNTRIES;exports.DECODO_ASIAN_COUNTRIES=r.DECODO_ASIAN_COUNTRIES;exports.DECODO_US_STATES=r.DECODO_US_STATES;exports.DECODO_COMMON_CITIES=r.DECODO_COMMON_CITIES;exports.getRandomDeviceType=r.getRandomDeviceType;exports.getRandomLocale=r.getRandomLocale;exports.getRandomCountry=r.getRandomCountry;exports.getRandomCity=r.getRandomCity;exports.generateSessionId=r.generateSessionId;var u="https://scraper-api.smartproxy.com/v2/scrape";class d{config;http;authHeader;constructor(e){let t="username"in e&&"password"in e&&e.username&&e.password,s="token"in e&&e.token;if(!t&&!s)throw Error("Decodo requires either username/password or token for authentication");if(this.config={username:e.username??"",password:e.password??"",deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new h({baseURL:u,timeout:this.config.timeout}),s)this.authHeader=`Basic ${e.token}`;else this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},n=this.buildRequestBody(e,s),a=(await this.http.postJson(u,n,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(a.error)throw Error(`Decodo API error: ${a.error} (${a.error_code||"unknown"})`);if(!a.results||a.results.length===0)throw Error("Decodo API returned no results");let o=a.results[0];return{statusCode:o.status_code,url:o.url,content:o.body,cookies:o.cookies||[],headers:o.headers||{},taskId:a.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:o.content_type,contentLength:o.content_length,raw:a}}async scrapeMany(e,t,s=1000){let n=[];for(let i=0;i<e.length;i++){let a=await this.scrape(e[i],t);if(n.push(a),i<e.length-1&&s>0)await new Promise((o)=>setTimeout(o,s))}return n}async scrapeWithSession(e,t,s=10){let n=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:n,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new d({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}exports.Decodo=d;exports.default=d;module.exports=Object.assign(d,exports);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{Rezo as u}from"../../../core/rezo.js";import{DECODO_DEVICE_TYPES as y,DECODO_HEADLESS_MODES as l,DECODO_COMMON_LOCALES as w,DECODO_COMMON_COUNTRIES as O,DECODO_EUROPEAN_COUNTRIES as _,DECODO_ASIAN_COUNTRIES as D,DECODO_US_STATES as C,DECODO_COMMON_CITIES as E,getRandomDeviceType as m,getRandomLocale as v,getRandomCountry as S,getRandomCity as T,generateSessionId as I}from"./options.js";var d="https://scraper-api.smartproxy.com/v2/scrape";class i{config;http;authHeader;constructor(e){let t="username"in e&&"password"in e&&e.username&&e.password,s="token"in e&&e.token;if(!t&&!s)throw Error("Decodo requires either username/password or token for authentication");if(this.config={username:e.username??"",password:e.password??"",deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new u({baseURL:d,timeout:this.config.timeout}),s)this.authHeader=`Basic ${e.token}`;else this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},o=this.buildRequestBody(e,s),r=(await this.http.postJson(d,o,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(r.error)throw Error(`Decodo API error: ${r.error} (${r.error_code||"unknown"})`);if(!r.results||r.results.length===0)throw Error("Decodo API returned no results");let a=r.results[0];return{statusCode:a.status_code,url:a.url,content:a.body,cookies:a.cookies||[],headers:a.headers||{},taskId:r.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:a.content_type,contentLength:a.content_length,raw:r}}async scrapeMany(e,t,s=1000){let o=[];for(let n=0;n<e.length;n++){let r=await this.scrape(e[n],t);if(o.push(r),n<e.length-1&&s>0)await new Promise((a)=>setTimeout(a,s))}return o}async scrapeWithSession(e,t,s=10){let o=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:o,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new i({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}var R=i;export{v as getRandomLocale,m as getRandomDeviceType,S as getRandomCountry,T as getRandomCity,I as generateSessionId,R as default,i as Decodo,C as DECODO_US_STATES,l as DECODO_HEADLESS_MODES,_ as DECODO_EUROPEAN_COUNTRIES,y as DECODO_DEVICE_TYPES,w as DECODO_COMMON_LOCALES,O as DECODO_COMMON_COUNTRIES,E as DECODO_COMMON_CITIES,D as DECODO_ASIAN_COUNTRIES};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
var{defineProperty:f,getOwnPropertyNames:g,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var t=m.get(e),s;if(t)return t;if(t=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")g(e).map((a)=>!A.call(t,a)&&f(t,a,{get:()=>e[a],enumerable:!(s=y(e,a))||s.enumerable}));return m.set(e,t),t};var b={};module.exports=v(b);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:M}=require("./addon/oxylabs/index.cjs"),w=require("node:path"),O=require("node:os"),{Decodo:p}=require("./addon/decodo/index.cjs");class x{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=D();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??w.join(O.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,a,r)=>r.indexOf(s)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,a)=>s===t[a]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:a,headers:r}=t;if(!s&&!a)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:a,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:a,proxy:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:a,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:a,options:r}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:a,pqueue:new $(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:a,adaptar:new M(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:a,options:r,queueOptions:i}=t;if(!s&&!a)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:a,adaptar:new p(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,a){if(!this.getDomainName(e))return null;let i=[],n=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<n.length;o++)i.push(o);if(i.length){let o=a?i[this.rnd(0,i.length-1)]:i[0];if(n[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,a){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[n,o]of Object.entries(s.entries()))i.set(n,o);else if(s&&typeof s==="object"){for(let[n,o]of Object.entries(s))if(typeof o==="string")i.set(n,o)}if(a)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let a=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let n=i.toString().trim();if(s.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(a(n))try{let l=n,h="i",u=n.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(n.toLowerCase())}let o=s.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function D(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let a=0;a<200;a++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],n="";switch(r.name){case"Chrome":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(n)}return s}b.CrawlerOptions=x;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{RezoQueue as m}from"../queue/queue.js";import{Oxylabs as f}from"./addon/oxylabs/index.js";import x from"node:path";import b from"node:os";import{Decodo as g}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;maxDepth;maxUrls;maxResponseSize;respectRobotsTxt;followNofollow;autoThrottle;autoThrottleTargetDelay;autoThrottleMinDelay;autoThrottleMaxDelay;maxWaitOn429;alwaysWaitOn429;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??x.join(b.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this.maxDepth=e.maxDepth??0,this.maxUrls=e.maxUrls??0,this.maxResponseSize=e.maxResponseSize??0,this.respectRobotsTxt=e.respectRobotsTxt??!1,this.followNofollow=e.followNofollow??!1,this.autoThrottle=e.autoThrottle??!0,this.autoThrottleTargetDelay=e.autoThrottleTargetDelay??1000,this.autoThrottleMinDelay=e.autoThrottleMinDelay??100,this.autoThrottleMaxDelay=e.autoThrottleMaxDelay??60000,this.maxWaitOn429=e.maxWaitOn429??1800000,this.alwaysWaitOn429=e.alwaysWaitOn429??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addDecodo(e.decodo),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((s)=>s.domain).map((s)=>s.domain).filter((s,n,r)=>r.indexOf(s)===n)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.proxies=this.proxies.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.limiters=this.limiters.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this.oxylabs=this.oxylabs.filter((t)=>!t.domain||!this._domainsEqual(t.domain,e)),this}_domainsEqual(e,t){if(Array.isArray(e)&&Array.isArray(t))return e.length===t.length&&e.every((s,n)=>s===t[n]);return e===t}getConfigurationSummary(){let e=(t)=>({total:t.length,global:t.filter((s)=>s.isGlobal).length,domainSpecific:t.filter((s)=>!s.isGlobal&&s.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let t of e.httpHeaders){let{domain:s,isGlobal:n,headers:r}=t;if(!s&&!n)continue;if(r instanceof Headers){let i=Object.fromEntries(r.entries());if(Object.keys(i).length<1)continue;r=i}else if(!r||Object.keys(r).length<1)continue;this.requestHeaders.push({domain:s,isGlobal:n,headers:r})}}_addProxies(e){if(!e||!e.enable)return;for(let t of e.proxies){let{domain:s,isGlobal:n,proxy:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.proxies.push({domain:s,isGlobal:n,proxy:r})}}_addLimiters(e){if(!e||!e.enable)return;for(let t of e.limiters){let{domain:s,isGlobal:n,options:r}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.limiters.push({domain:s,isGlobal:n,pqueue:new m(r)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.oxylabs.push({domain:s,isGlobal:n,adaptar:new f(r)})}}_addDecodo(e){if(!e||!e.enable)return;for(let t of e.labs){let{domain:s,isGlobal:n,options:r,queueOptions:i}=t;if(!s&&!n)continue;if(!r||Object.keys(r).length<1)continue;this.decodo.push({domain:s,isGlobal:n,adaptar:new g(r)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,t,s,n){if(!this.getDomainName(e))return null;let i=[],a=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let o=0;o<a.length;o++)if(this._hasDomain(e,a[o].domain))i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}i.length=0;for(let o=0;o<a.length;o++)i.push(o);if(i.length){let o=n?i[this.rnd(0,i.length-1)]:i[0];if(a[o].isGlobal&&s)return t==="headers"?this.requestHeaders[o].headers:t==="limiters"?this.limiters[o].pqueue:t==="oxylabs"?this.oxylabs[o].adaptar:t==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,t=Number.MAX_VALUE){return Math.floor(Math.random()*(t-e+1))+e}hasDomain(e,t,s){if(!this.getDomainName(e))return!1;let r=t==="headers"?this.requestHeaders:t==="limiters"?this.limiters:t==="oxylabs"?this.oxylabs:t==="decodo"?this.decodo:this.proxies;for(let i=0;i<r.length;i++)if(this._hasDomain(e,r[i].domain))return!0;if(s){for(let i=0;i<r.length;i++)if(r[i].isGlobal)return!0}return!1}pickHeaders(e,t,s,n){let r=this.getAdapter(e,"headers",t),i=new Headers(r??{});if(s&&s instanceof Headers)for(let[a,o]of Object.entries(s.entries()))i.set(a,o);else if(s&&typeof s==="object"){for(let[a,o]of Object.entries(s))if(typeof o==="string")i.set(a,o)}if(n)i.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(i.entries())}_hasDomain(e,t){if(!t)return!1;let s=this.getDomainName(e);if(!s)return!1;let n=(i)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(i)||i.startsWith("/")||i.includes(".*")||i.includes(".+")},r=(i)=>{if(i instanceof RegExp)return i.test(s)||i.test(e);let a=i.toString().trim();if(s.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(s)||h.test(e)}if(n(a))try{let l=a,h="i",u=a.match(/^\/(.*)\/(\w*)$/);if(u)l=u[1],h=u[2]||"i";let c=new RegExp(l,h);return c.test(s)||c.test(e)}catch(l){return s.toLowerCase().includes(a.toLowerCase())}let o=s.toLowerCase(),d=a.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(t)){for(let i of t)if(r(i))return!0;return!1}return r(t)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let t=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),t.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let t=new URL(e);if(!t.protocol||!["http:","https:"].includes(t.protocol.toLowerCase()))return!1;if(!t.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(t.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],t=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],s=[];for(let n=0;n<200;n++){let r=e[Math.floor(Math.random()*e.length)],i=t[Math.floor(Math.random()*t.length)],a="";switch(r.name){case"Chrome":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${i}; rv:${r.version}) ${r.engine} Firefox/${r.version}`;break;case"Safari":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Version/${r.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Edg/${r.version}`;break;case"Opera":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 OPR/${r.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Vivaldi/${r.version}`;break;case"Brave":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Brave/${r.version}`;break;case"Chromium":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chromium/${r.version} Chrome/${r.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} YaBrowser/${r.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${i}) ${r.engine} (KHTML, like Gecko) Chrome/${r.version} Safari/537.36 Maxthon/${r.version}`;break}s.push(a)}return s}export{y as CrawlerOptions};
|