recker 1.0.85 → 1.0.86-next.a24fa13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/browser/browser/index.d.ts +2 -0
  2. package/dist/browser/browser/index.js +1 -0
  3. package/dist/browser/browser/recker.d.ts +2 -0
  4. package/dist/browser/browser/recker.js +2 -0
  5. package/dist/browser/core/client.d.ts +2 -0
  6. package/dist/browser/core/client.js +8 -0
  7. package/dist/browser/core/request.d.ts +3 -0
  8. package/dist/browser/core/request.js +6 -2
  9. package/dist/browser/index.d.ts +2 -0
  10. package/dist/browser/index.iife.min.js +79 -79
  11. package/dist/browser/index.js +1 -0
  12. package/dist/browser/index.min.js +79 -79
  13. package/dist/browser/index.mini.iife.js +312 -15
  14. package/dist/browser/index.mini.iife.min.js +38 -38
  15. package/dist/browser/index.mini.min.js +42 -42
  16. package/dist/browser/index.mini.umd.js +312 -15
  17. package/dist/browser/index.mini.umd.min.js +38 -38
  18. package/dist/browser/index.umd.min.js +79 -79
  19. package/dist/browser/plugins/queue.d.ts +41 -0
  20. package/dist/browser/plugins/queue.js +184 -0
  21. package/dist/browser/recker.d.ts +2 -0
  22. package/dist/browser/recker.js +2 -0
  23. package/dist/browser/scrape/crawl-queue.d.ts +31 -0
  24. package/dist/browser/scrape/crawl-queue.js +40 -0
  25. package/dist/browser/scrape/crawl-storage.d.ts +33 -0
  26. package/dist/browser/scrape/crawl-storage.js +26 -0
  27. package/dist/browser/scrape/index.d.ts +6 -0
  28. package/dist/browser/scrape/index.js +3 -0
  29. package/dist/browser/scrape/proxy-adapter.d.ts +12 -0
  30. package/dist/browser/scrape/proxy-adapter.js +17 -0
  31. package/dist/browser/scrape/spider.d.ts +14 -4
  32. package/dist/browser/scrape/spider.js +119 -45
  33. package/dist/browser/transport/curl.js +53 -9
  34. package/dist/browser/transport/undici.js +4 -0
  35. package/dist/browser/types/index.d.ts +53 -2
  36. package/dist/core/client.d.ts +2 -0
  37. package/dist/core/client.js +8 -0
  38. package/dist/core/request.d.ts +3 -0
  39. package/dist/core/request.js +6 -2
  40. package/dist/index.d.ts +2 -0
  41. package/dist/index.js +2 -0
  42. package/dist/plugins/queue.d.ts +41 -0
  43. package/dist/plugins/queue.js +184 -0
  44. package/dist/queue/consumer.d.ts +17 -0
  45. package/dist/queue/consumer.js +48 -0
  46. package/dist/scrape/crawl-queue.d.ts +31 -0
  47. package/dist/scrape/crawl-queue.js +40 -0
  48. package/dist/scrape/crawl-storage.d.ts +33 -0
  49. package/dist/scrape/crawl-storage.js +26 -0
  50. package/dist/scrape/index.d.ts +6 -0
  51. package/dist/scrape/index.js +3 -0
  52. package/dist/scrape/proxy-adapter.d.ts +12 -0
  53. package/dist/scrape/proxy-adapter.js +17 -0
  54. package/dist/scrape/spider.d.ts +14 -4
  55. package/dist/scrape/spider.js +119 -45
  56. package/dist/transport/curl.js +53 -9
  57. package/dist/transport/undici.js +4 -0
  58. package/dist/types/index.d.ts +53 -2
  59. package/dist/version.js +1 -1
  60. package/package.json +2 -2
@@ -10027,6 +10027,9 @@ var recker = (() => {
10027
10027
  case "total":
10028
10028
  values.total = parseCurlTimingValue(value);
10029
10029
  break;
10030
+ case "pretransfer":
10031
+ values.pretransfer = parseCurlTimingValue(value);
10032
+ break;
10030
10033
  default:
10031
10034
  break;
10032
10035
  }
@@ -10138,7 +10141,7 @@ var recker = (() => {
10138
10141
  "--compressed",
10139
10142
  "--no-keepalive",
10140
10143
  "--write-out",
10141
- `\\n${TIMING_MARKER} dns=%{time_namelookup} tcp=%{time_connect} tls=%{time_appconnect} ttfb=%{time_starttransfer} total=%{time_total}`
10144
+ `\\n${TIMING_MARKER} dns=%{time_namelookup} tcp=%{time_connect} tls=%{time_appconnect} pretransfer=%{time_pretransfer} ttfb=%{time_starttransfer} total=%{time_total}`
10142
10145
  ];
10143
10146
  const proxy = this.getNextProxy();
10144
10147
  if (proxy) {
@@ -10228,14 +10231,49 @@ var recker = (() => {
10228
10231
  statusText,
10229
10232
  headers
10230
10233
  });
10231
- const responseTimings = timings && Object.keys(timings).length > 0 ? {
10232
- dns: timings.dns,
10233
- tcp: timings.tcp,
10234
- tls: timings.tls ? timings.ttfb !== void 0 ? timings.tls : timings.tls : void 0,
10235
- firstByte: timings.ttfb,
10236
- content: timings.download,
10237
- total: timings.total
10238
- } : {};
10234
+ const usedProxy = this.proxyList.length > 0;
10235
+ let responseTimings = {};
10236
+ if (timings && Object.keys(timings).length > 0) {
10237
+ const dns = timings.dns;
10238
+ const tcp = timings.tcp !== void 0 && timings.dns !== void 0 ? timings.tcp - timings.dns : void 0;
10239
+ const hasTls = timings.tls !== void 0 && timings.tls > 0;
10240
+ const tls = hasTls && timings.tcp !== void 0 ? timings.tls - timings.tcp : void 0;
10241
+ const content = timings.download;
10242
+ if (usedProxy && timings.pretransfer !== void 0) {
10243
+ const afterProxy = hasTls ? timings.tls : timings.tcp;
10244
+ const targetTls = afterProxy !== void 0 ? timings.pretransfer - afterProxy : void 0;
10245
+ const proxyTotal = (dns ?? 0) + (tcp ?? 0) + (tls ?? 0);
10246
+ const connectionTime = proxyTotal + (targetTls ?? 0);
10247
+ const serverTime = timings.ttfb !== void 0 ? timings.ttfb - connectionTime : void 0;
10248
+ responseTimings = {
10249
+ tls: targetTls,
10250
+ content,
10251
+ transferTime: content,
10252
+ proxyDns: dns,
10253
+ proxyTcp: tcp,
10254
+ proxyTls: tls,
10255
+ proxyTotal,
10256
+ firstByte: timings.ttfb,
10257
+ total: timings.total,
10258
+ connectionTime,
10259
+ serverTime
10260
+ };
10261
+ } else {
10262
+ const connectionTime = (dns ?? 0) + (tcp ?? 0) + (tls ?? 0);
10263
+ const serverTime = timings.ttfb !== void 0 ? timings.ttfb - connectionTime : void 0;
10264
+ responseTimings = {
10265
+ dns,
10266
+ tcp,
10267
+ tls,
10268
+ content,
10269
+ transferTime: content,
10270
+ firstByte: timings.ttfb,
10271
+ total: timings.total,
10272
+ connectionTime,
10273
+ serverTime
10274
+ };
10275
+ }
10276
+ }
10239
10277
  resolve12(new HttpResponse(nativeResponse, {
10240
10278
  timings: responseTimings,
10241
10279
  connection: { protocol: "curl" }
@@ -11068,6 +11106,90 @@ var recker = (() => {
11068
11106
  }
11069
11107
  });
11070
11108
 
11109
+ // dist/browser/utils/env-proxy.js
11110
+ function getProxyForUrl(url, options2 = {}) {
11111
+ let parsedUrl;
11112
+ try {
11113
+ parsedUrl = new URL(url);
11114
+ } catch {
11115
+ return void 0;
11116
+ }
11117
+ const protocol = parsedUrl.protocol.replace(":", "").toLowerCase();
11118
+ const noProxy = options2.noProxy ?? process.env.NO_PROXY ?? process.env.no_proxy ?? "";
11119
+ if (shouldBypassProxy(parsedUrl.hostname, parsedUrl.port, noProxy)) {
11120
+ return void 0;
11121
+ }
11122
+ let proxy;
11123
+ if (protocol === "https") {
11124
+ proxy = options2.httpsProxy ?? process.env.HTTPS_PROXY ?? process.env.https_proxy ?? process.env.ALL_PROXY ?? process.env.all_proxy;
11125
+ } else {
11126
+ proxy = options2.httpProxy ?? process.env.HTTP_PROXY ?? process.env.http_proxy ?? process.env.ALL_PROXY ?? process.env.all_proxy;
11127
+ }
11128
+ return proxy || void 0;
11129
+ }
11130
+ function shouldBypassProxy(hostname, port, noProxy) {
11131
+ if (!noProxy || noProxy.trim() === "") {
11132
+ return false;
11133
+ }
11134
+ hostname = hostname.toLowerCase();
11135
+ const rules = noProxy.split(/[\s,]+/).filter(Boolean);
11136
+ for (const rule of rules) {
11137
+ const normalizedRule = rule.toLowerCase().trim();
11138
+ if (normalizedRule === "*") {
11139
+ return true;
11140
+ }
11141
+ if (normalizedRule.includes("/") && !normalizedRule.includes(":")) {
11142
+ if (matchesCIDR(hostname, normalizedRule)) {
11143
+ return true;
11144
+ }
11145
+ continue;
11146
+ }
11147
+ if (normalizedRule.includes(":") && !normalizedRule.startsWith("[")) {
11148
+ const [ruleHost, rulePort] = normalizedRule.split(":");
11149
+ if (hostname === ruleHost && (!rulePort || port === rulePort)) {
11150
+ return true;
11151
+ }
11152
+ continue;
11153
+ }
11154
+ if (normalizedRule.startsWith(".")) {
11155
+ if (hostname.endsWith(normalizedRule) || hostname === normalizedRule.slice(1)) {
11156
+ return true;
11157
+ }
11158
+ continue;
11159
+ }
11160
+ if (hostname === normalizedRule) {
11161
+ return true;
11162
+ }
11163
+ if (hostname.endsWith("." + normalizedRule)) {
11164
+ return true;
11165
+ }
11166
+ }
11167
+ return false;
11168
+ }
11169
+ function matchesCIDR(ip, cidr) {
11170
+ const [range, bits] = cidr.split("/");
11171
+ if (!bits)
11172
+ return ip === range;
11173
+ const mask = parseInt(bits, 10);
11174
+ if (isNaN(mask))
11175
+ return false;
11176
+ const ipParts = ip.split(".").map(Number);
11177
+ const rangeParts = range.split(".").map(Number);
11178
+ if (ipParts.length !== 4 || rangeParts.length !== 4)
11179
+ return false;
11180
+ if (ipParts.some(isNaN) || rangeParts.some(isNaN))
11181
+ return false;
11182
+ const ipNum = ipParts[0] << 24 | ipParts[1] << 16 | ipParts[2] << 8 | ipParts[3];
11183
+ const rangeNum = rangeParts[0] << 24 | rangeParts[1] << 16 | rangeParts[2] << 8 | rangeParts[3];
11184
+ const maskNum = ~((1 << 32 - mask) - 1);
11185
+ return (ipNum & maskNum) === (rangeNum & maskNum);
11186
+ }
11187
+ var init_env_proxy = __esm({
11188
+ "dist/browser/utils/env-proxy.js"() {
11189
+ "use strict";
11190
+ }
11191
+ });
11192
+
11071
11193
  // node-stub:events
11072
11194
  var events_exports = {};
11073
11195
  __export(events_exports, {
@@ -16269,7 +16391,7 @@ var recker = (() => {
16269
16391
  return void 0;
16270
16392
  }
16271
16393
  }
16272
- function matchesCIDR(ip, cidr) {
16394
+ function matchesCIDR2(ip, cidr) {
16273
16395
  const [range, bits] = cidr.split("/");
16274
16396
  if (!bits)
16275
16397
  return ip === range;
@@ -16287,7 +16409,7 @@ var recker = (() => {
16287
16409
  const maskNum = ~((1 << 32 - mask) - 1);
16288
16410
  return (ipNum & maskNum) === (rangeNum & maskNum);
16289
16411
  }
16290
- function shouldBypassProxy(url, bypass) {
16412
+ function shouldBypassProxy2(url, bypass) {
16291
16413
  if (!bypass || bypass.length === 0)
16292
16414
  return false;
16293
16415
  let hostname = "";
@@ -16303,7 +16425,7 @@ var recker = (() => {
16303
16425
  if (rule === "*")
16304
16426
  return true;
16305
16427
  if (rule.includes("/")) {
16306
- if (matchesCIDR(hostname, rule))
16428
+ if (matchesCIDR2(hostname, rule))
16307
16429
  return true;
16308
16430
  continue;
16309
16431
  }
@@ -16431,6 +16553,7 @@ var recker = (() => {
16431
16553
  init_node_diagnostics_channel();
16432
16554
  init_dns();
16433
16555
  init_agent_manager();
16556
+ init_env_proxy();
16434
16557
  init_progress();
16435
16558
  init_streaming();
16436
16559
  init_protocol_cache();
@@ -16683,7 +16806,7 @@ var recker = (() => {
16683
16806
  if (this.proxySlots.length > 0) {
16684
16807
  const slot = this.proxySlots[this.proxyIndex % this.proxySlots.length];
16685
16808
  this.proxyIndex++;
16686
- if (!shouldBypassProxy(url, slot.bypass)) {
16809
+ if (!shouldBypassProxy2(url, slot.bypass)) {
16687
16810
  if (slot.agent)
16688
16811
  return slot.agent;
16689
16812
  if (!slot.socksAgentPromise) {
@@ -16693,6 +16816,9 @@ var recker = (() => {
16693
16816
  return slot.agent;
16694
16817
  }
16695
16818
  }
16819
+ const envProxy = getProxyForUrl(url);
16820
+ if (envProxy)
16821
+ return new ProxyAgent5(envProxy);
16696
16822
  if (this.agentManager)
16697
16823
  return this.agentManager.getAgentForUrl(url);
16698
16824
  if (this.dnsAgent)
@@ -18994,6 +19120,7 @@ var recker = (() => {
18994
19120
  policyTags;
18995
19121
  policySource;
18996
19122
  traceId;
19123
+ queue;
18997
19124
  constructor(url, options2 = {}) {
18998
19125
  this.url = url;
18999
19126
  this.method = options2.method || "GET";
@@ -19015,6 +19142,7 @@ var recker = (() => {
19015
19142
  this.policyTags = options2.policyTags ?? [];
19016
19143
  this.policySource = options2.policySource;
19017
19144
  this.traceId = options2.traceId;
19145
+ this.queue = options2.queue;
19018
19146
  }
19019
19147
  withHeader(name, value) {
19020
19148
  const context = getRequestContext(this);
@@ -19039,7 +19167,8 @@ var recker = (() => {
19039
19167
  tenant: this.tenant,
19040
19168
  policyTags: this.policyTags,
19041
19169
  policySource: this.policySource,
19042
- traceId: this.traceId
19170
+ traceId: this.traceId,
19171
+ queue: this.queue
19043
19172
  });
19044
19173
  if (context) {
19045
19174
  return attachRequestContext(request11, context);
@@ -19067,7 +19196,8 @@ var recker = (() => {
19067
19196
  tenant: this.tenant,
19068
19197
  policyTags: this.policyTags,
19069
19198
  policySource: this.policySource,
19070
- traceId: this.traceId
19199
+ traceId: this.traceId,
19200
+ queue: this.queue
19071
19201
  });
19072
19202
  if (context) {
19073
19203
  return attachRequestContext(request11, context);
@@ -21073,6 +21203,166 @@ var recker = (() => {
21073
21203
  };
21074
21204
  }
21075
21205
 
21206
+ // dist/browser/plugins/queue.js
21207
+ var jobSeq = 0;
21208
+ function generateJobId() {
21209
+ jobSeq += 1;
21210
+ return `qj-${Date.now().toString(36)}-${jobSeq.toString(36)}-${Math.random().toString(16).slice(2, 8)}`;
21211
+ }
21212
+ function serializeRequest(req, jobId, metadata) {
21213
+ var _a2;
21214
+ const headers = {};
21215
+ req.headers.forEach((value, key) => {
21216
+ headers[key] = value;
21217
+ });
21218
+ let body = null;
21219
+ const bodyContentType = headers["content-type"];
21220
+ if (req.body !== null && req.body !== void 0) {
21221
+ if (typeof req.body === "string") {
21222
+ body = req.body;
21223
+ } else if (typeof req.body === "object") {
21224
+ try {
21225
+ body = JSON.stringify(req.body);
21226
+ } catch {
21227
+ body = String(req.body);
21228
+ }
21229
+ } else {
21230
+ body = String(req.body);
21231
+ }
21232
+ }
21233
+ return {
21234
+ jobId,
21235
+ url: req.url,
21236
+ method: req.method,
21237
+ headers,
21238
+ body,
21239
+ bodyContentType: bodyContentType || void 0,
21240
+ correlationId: req.correlationId,
21241
+ traceId: req.traceId,
21242
+ tenant: req.tenant,
21243
+ policyTags: ((_a2 = req.policyTags) == null ? void 0 : _a2.length) ? req.policyTags : void 0,
21244
+ createdAt: Date.now(),
21245
+ metadata: metadata && Object.keys(metadata).length > 0 ? metadata : void 0
21246
+ };
21247
+ }
21248
+ function compileFilter(config) {
21249
+ return (req) => {
21250
+ if (config.methods && !config.methods.includes(req.method)) {
21251
+ return false;
21252
+ }
21253
+ if (config.urlPatterns) {
21254
+ const matched = config.urlPatterns.some((pattern) => {
21255
+ if (typeof pattern === "string")
21256
+ return req.url.includes(pattern);
21257
+ return pattern.test(req.url);
21258
+ });
21259
+ if (!matched)
21260
+ return false;
21261
+ }
21262
+ if (config.headerPresent && !req.headers.has(config.headerPresent)) {
21263
+ return false;
21264
+ }
21265
+ return true;
21266
+ };
21267
+ }
21268
+ var QueuedResponse = class _QueuedResponse {
21269
+ jobId;
21270
+ raw;
21271
+ timings = void 0;
21272
+ connection = void 0;
21273
+ constructor(jobId) {
21274
+ this.jobId = jobId;
21275
+ const responseBody = JSON.stringify({
21276
+ queued: true,
21277
+ jobId,
21278
+ status: "queued"
21279
+ });
21280
+ this.raw = new Response(responseBody, {
21281
+ status: 202,
21282
+ statusText: "Accepted",
21283
+ headers: {
21284
+ "Content-Type": "application/json",
21285
+ "X-Queue-Job-Id": jobId,
21286
+ "X-Queue-Status": "queued"
21287
+ }
21288
+ });
21289
+ }
21290
+ get status() {
21291
+ return 202;
21292
+ }
21293
+ get statusText() {
21294
+ return "Accepted";
21295
+ }
21296
+ get headers() {
21297
+ return this.raw.headers;
21298
+ }
21299
+ get ok() {
21300
+ return true;
21301
+ }
21302
+ get url() {
21303
+ return "";
21304
+ }
21305
+ json() {
21306
+ return this.raw.json();
21307
+ }
21308
+ text() {
21309
+ return this.raw.text();
21310
+ }
21311
+ async cleanText() {
21312
+ return this.raw.text();
21313
+ }
21314
+ blob() {
21315
+ return this.raw.blob();
21316
+ }
21317
+ read() {
21318
+ return this.raw.body;
21319
+ }
21320
+ clone() {
21321
+ return new _QueuedResponse(this.jobId);
21322
+ }
21323
+ async *sse() {
21324
+ }
21325
+ async *download() {
21326
+ }
21327
+ async *[Symbol.asyncIterator]() {
21328
+ if (!this.raw.body)
21329
+ return;
21330
+ const reader = this.raw.body.getReader();
21331
+ while (true) {
21332
+ const { done, value } = await reader.read();
21333
+ if (done)
21334
+ break;
21335
+ yield value;
21336
+ }
21337
+ }
21338
+ };
21339
+ function queuePlugin(options2) {
21340
+ const adapter = options2.adapter;
21341
+ const filter2 = typeof options2.filter === "function" ? options2.filter : options2.filter ? compileFilter(options2.filter) : () => true;
21342
+ const generateId = options2.jobIdGenerator || generateJobId;
21343
+ const queueMiddleware = async (req, next) => {
21344
+ const perReq = req.queue;
21345
+ if (perReq === false) {
21346
+ return next(req);
21347
+ }
21348
+ const shouldQueue = perReq === true || typeof perReq === "object" || filter2(req);
21349
+ if (!shouldQueue) {
21350
+ return next(req);
21351
+ }
21352
+ const metadata = {
21353
+ ...options2.defaultMetadata,
21354
+ ...typeof perReq === "object" && perReq !== null && "metadata" in perReq ? perReq.metadata : void 0
21355
+ };
21356
+ const jobId = generateId(req);
21357
+ const job = serializeRequest(req, jobId, metadata);
21358
+ const assignedId = await adapter.enqueue(job);
21359
+ return new QueuedResponse(assignedId || jobId);
21360
+ };
21361
+ return (client) => {
21362
+ client.use(queueMiddleware);
21363
+ };
21364
+ }
21365
+
21076
21366
  // dist/browser/plugins/xsrf.js
21077
21367
  function parseCookies(cookieString) {
21078
21368
  const cookies = {};
@@ -23005,6 +23295,13 @@ var recker = (() => {
23005
23295
  scope: "request"
23006
23296
  });
23007
23297
  }
23298
+ if (options2.queue) {
23299
+ registerPlugin(queuePlugin(options2.queue), {
23300
+ name: "recker:queue",
23301
+ priority: 135,
23302
+ scope: "request"
23303
+ });
23304
+ }
23008
23305
  if (options2.plugins) {
23009
23306
  options2.plugins.forEach((plugin, index) => {
23010
23307
  const existingManifest = getPluginManifest(plugin);