recker 1.0.85 → 1.0.86-next.a24fa13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/browser/index.d.ts +2 -0
- package/dist/browser/browser/index.js +1 -0
- package/dist/browser/browser/recker.d.ts +2 -0
- package/dist/browser/browser/recker.js +2 -0
- package/dist/browser/core/client.d.ts +2 -0
- package/dist/browser/core/client.js +8 -0
- package/dist/browser/core/request.d.ts +3 -0
- package/dist/browser/core/request.js +6 -2
- package/dist/browser/index.d.ts +2 -0
- package/dist/browser/index.iife.min.js +79 -79
- package/dist/browser/index.js +1 -0
- package/dist/browser/index.min.js +79 -79
- package/dist/browser/index.mini.iife.js +312 -15
- package/dist/browser/index.mini.iife.min.js +38 -38
- package/dist/browser/index.mini.min.js +42 -42
- package/dist/browser/index.mini.umd.js +312 -15
- package/dist/browser/index.mini.umd.min.js +38 -38
- package/dist/browser/index.umd.min.js +79 -79
- package/dist/browser/plugins/queue.d.ts +41 -0
- package/dist/browser/plugins/queue.js +184 -0
- package/dist/browser/recker.d.ts +2 -0
- package/dist/browser/recker.js +2 -0
- package/dist/browser/scrape/crawl-queue.d.ts +31 -0
- package/dist/browser/scrape/crawl-queue.js +40 -0
- package/dist/browser/scrape/crawl-storage.d.ts +33 -0
- package/dist/browser/scrape/crawl-storage.js +26 -0
- package/dist/browser/scrape/index.d.ts +6 -0
- package/dist/browser/scrape/index.js +3 -0
- package/dist/browser/scrape/proxy-adapter.d.ts +12 -0
- package/dist/browser/scrape/proxy-adapter.js +17 -0
- package/dist/browser/scrape/spider.d.ts +14 -4
- package/dist/browser/scrape/spider.js +119 -45
- package/dist/browser/transport/curl.js +53 -9
- package/dist/browser/transport/undici.js +4 -0
- package/dist/browser/types/index.d.ts +53 -2
- package/dist/core/client.d.ts +2 -0
- package/dist/core/client.js +8 -0
- package/dist/core/request.d.ts +3 -0
- package/dist/core/request.js +6 -2
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/plugins/queue.d.ts +41 -0
- package/dist/plugins/queue.js +184 -0
- package/dist/queue/consumer.d.ts +17 -0
- package/dist/queue/consumer.js +48 -0
- package/dist/scrape/crawl-queue.d.ts +31 -0
- package/dist/scrape/crawl-queue.js +40 -0
- package/dist/scrape/crawl-storage.d.ts +33 -0
- package/dist/scrape/crawl-storage.js +26 -0
- package/dist/scrape/index.d.ts +6 -0
- package/dist/scrape/index.js +3 -0
- package/dist/scrape/proxy-adapter.d.ts +12 -0
- package/dist/scrape/proxy-adapter.js +17 -0
- package/dist/scrape/spider.d.ts +14 -4
- package/dist/scrape/spider.js +119 -45
- package/dist/transport/curl.js +53 -9
- package/dist/transport/undici.js +4 -0
- package/dist/types/index.d.ts +53 -2
- package/dist/version.js +1 -1
- package/package.json +2 -2
|
@@ -9,6 +9,9 @@ import { hasImpersonate } from '../utils/binary-manager.js';
|
|
|
9
9
|
import { CurlTransport } from '../transport/curl.js';
|
|
10
10
|
import { HttpRequest } from '../core/request.js';
|
|
11
11
|
import { getRandomUserAgent } from '../utils/user-agent.js';
|
|
12
|
+
import { InMemoryCrawlQueue } from './crawl-queue.js';
|
|
13
|
+
import { InMemoryCrawlStorage } from './crawl-storage.js';
|
|
14
|
+
import { ListProxyAdapter } from './proxy-adapter.js';
|
|
12
15
|
const FALLBACK_ACCEPT_LANGUAGES = [
|
|
13
16
|
'en-US,en;q=0.9',
|
|
14
17
|
'en-GB,en;q=0.9',
|
|
@@ -79,6 +82,9 @@ function normalizeUrl(urlStr) {
|
|
|
79
82
|
try {
|
|
80
83
|
const url = new URL(urlStr);
|
|
81
84
|
url.hash = '';
|
|
85
|
+
if (url.hostname.startsWith('www.')) {
|
|
86
|
+
url.hostname = url.hostname.slice(4);
|
|
87
|
+
}
|
|
82
88
|
const paramsToDelete = [];
|
|
83
89
|
url.searchParams.forEach((_, key) => {
|
|
84
90
|
if (TRACKING_PARAMS.has(key.toLowerCase())) {
|
|
@@ -172,10 +178,13 @@ export class Spider {
|
|
|
172
178
|
options;
|
|
173
179
|
client;
|
|
174
180
|
pool;
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
181
|
+
crawlQueue;
|
|
182
|
+
crawlStorage;
|
|
183
|
+
proxyAdapter = null;
|
|
184
|
+
proxyClients = new Map();
|
|
185
|
+
_visitedCount = 0;
|
|
186
|
+
_queueSize = 0;
|
|
187
|
+
_resultCount = 0;
|
|
179
188
|
baseHost = '';
|
|
180
189
|
running = false;
|
|
181
190
|
aborted = false;
|
|
@@ -236,6 +245,17 @@ export class Spider {
|
|
|
236
245
|
extract: extractSchema,
|
|
237
246
|
parserOptions: options.parserOptions,
|
|
238
247
|
};
|
|
248
|
+
if (options.proxy) {
|
|
249
|
+
if (typeof options.proxy === 'string') {
|
|
250
|
+
this.proxyAdapter = new ListProxyAdapter([options.proxy]);
|
|
251
|
+
}
|
|
252
|
+
else if (Array.isArray(options.proxy)) {
|
|
253
|
+
this.proxyAdapter = new ListProxyAdapter(options.proxy);
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
this.proxyAdapter = options.proxy;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
239
259
|
this.client = createClient({
|
|
240
260
|
baseUrl: 'http://localhost',
|
|
241
261
|
timeout: this.options.timeout,
|
|
@@ -253,6 +273,8 @@ export class Spider {
|
|
|
253
273
|
interval: this.options.delay,
|
|
254
274
|
} : {}),
|
|
255
275
|
});
|
|
276
|
+
this.crawlQueue = options.crawlQueue ?? new InMemoryCrawlQueue();
|
|
277
|
+
this.crawlStorage = options.crawlStorage ?? new InMemoryCrawlStorage();
|
|
256
278
|
}
|
|
257
279
|
async crawl(startUrl) {
|
|
258
280
|
const perfStart = performance.now();
|
|
@@ -260,10 +282,11 @@ export class Spider {
|
|
|
260
282
|
const normalizedStart = normalizeUrl(startUrl);
|
|
261
283
|
const baseUrl = new URL(normalizedStart).origin;
|
|
262
284
|
this.baseHost = new URL(normalizedStart).hostname;
|
|
263
|
-
this.
|
|
264
|
-
this.
|
|
265
|
-
this.
|
|
266
|
-
this.
|
|
285
|
+
await this.crawlQueue.clear();
|
|
286
|
+
await this.crawlStorage.clear();
|
|
287
|
+
this._visitedCount = 0;
|
|
288
|
+
this._queueSize = 0;
|
|
289
|
+
this._resultCount = 0;
|
|
267
290
|
this.running = true;
|
|
268
291
|
this.aborted = false;
|
|
269
292
|
this.pendingCount = 0;
|
|
@@ -287,15 +310,15 @@ export class Spider {
|
|
|
287
310
|
await this.fetchSitemaps(baseUrl);
|
|
288
311
|
}
|
|
289
312
|
const pending = new Map();
|
|
290
|
-
const scheduleUrl = (item) => {
|
|
313
|
+
const scheduleUrl = async (item) => {
|
|
291
314
|
const normalized = normalizeUrl(item.url);
|
|
292
|
-
if (this.
|
|
315
|
+
if (await this.crawlQueue.hasVisited(normalized))
|
|
293
316
|
return;
|
|
294
317
|
if (pending.has(normalized))
|
|
295
318
|
return;
|
|
296
319
|
if (item.depth > this.options.maxDepth)
|
|
297
320
|
return;
|
|
298
|
-
if (this.
|
|
321
|
+
if (this._resultCount + pending.size >= this.options.maxPages)
|
|
299
322
|
return;
|
|
300
323
|
if (this.options.respectRobotsTxt && this.robotsData) {
|
|
301
324
|
try {
|
|
@@ -308,7 +331,8 @@ export class Spider {
|
|
|
308
331
|
return;
|
|
309
332
|
}
|
|
310
333
|
}
|
|
311
|
-
this.
|
|
334
|
+
await this.crawlQueue.markVisited(normalized);
|
|
335
|
+
this._visitedCount++;
|
|
312
336
|
this.pendingCount++;
|
|
313
337
|
const promise = this.pool.run(() => this.crawlPage({ ...item, url: normalized }))
|
|
314
338
|
.finally(() => {
|
|
@@ -317,27 +341,29 @@ export class Spider {
|
|
|
317
341
|
});
|
|
318
342
|
pending.set(normalized, promise);
|
|
319
343
|
};
|
|
320
|
-
scheduleUrl({ url: normalizedStart, depth: 0 });
|
|
344
|
+
await scheduleUrl({ url: normalizedStart, depth: 0 });
|
|
321
345
|
if (this.options.useSitemap && this.sitemapUrls.length > 0) {
|
|
322
346
|
for (const sitemapUrl of this.sitemapUrls) {
|
|
323
347
|
try {
|
|
324
348
|
const urlHost = new URL(sitemapUrl.loc).hostname;
|
|
325
349
|
if (urlHost === this.baseHost) {
|
|
326
|
-
scheduleUrl({ url: sitemapUrl.loc, depth: 1 });
|
|
350
|
+
await scheduleUrl({ url: sitemapUrl.loc, depth: 1 });
|
|
327
351
|
}
|
|
328
352
|
}
|
|
329
353
|
catch {
|
|
330
354
|
}
|
|
331
355
|
}
|
|
332
356
|
}
|
|
333
|
-
while ((pending.size > 0 || this.
|
|
357
|
+
while ((pending.size > 0 || this._queueSize > 0)
|
|
334
358
|
&& !this.aborted
|
|
335
|
-
&& this.
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
359
|
+
&& this._resultCount < this.options.maxPages) {
|
|
360
|
+
let nextItem = await this.crawlQueue.pop();
|
|
361
|
+
while (nextItem && !this.aborted) {
|
|
362
|
+
this._queueSize = Math.max(0, this._queueSize - 1);
|
|
363
|
+
if (this._resultCount + pending.size >= this.options.maxPages)
|
|
339
364
|
break;
|
|
340
|
-
scheduleUrl(
|
|
365
|
+
await scheduleUrl(nextItem);
|
|
366
|
+
nextItem = await this.crawlQueue.pop();
|
|
341
367
|
}
|
|
342
368
|
if (pending.size > 0) {
|
|
343
369
|
await Promise.race(pending.values());
|
|
@@ -347,16 +373,21 @@ export class Spider {
|
|
|
347
373
|
await Promise.all(pending.values());
|
|
348
374
|
}
|
|
349
375
|
this.running = false;
|
|
350
|
-
const
|
|
376
|
+
const pages = await this.crawlStorage.getResults();
|
|
377
|
+
const errors = await this.crawlStorage.getErrors();
|
|
378
|
+
const sitemapAnalysis = this.buildSitemapAnalysis(pages);
|
|
351
379
|
const robotsAnalysis = this.buildRobotsAnalysis();
|
|
380
|
+
const visited = this.crawlQueue instanceof InMemoryCrawlQueue
|
|
381
|
+
? this.crawlQueue.getVisited()
|
|
382
|
+
: new Set(pages.map(r => r.url));
|
|
352
383
|
return {
|
|
353
384
|
startUrl: normalizedStart,
|
|
354
|
-
pages
|
|
355
|
-
visited
|
|
385
|
+
pages,
|
|
386
|
+
visited,
|
|
356
387
|
duration: Math.round(performance.now() - perfStart),
|
|
357
388
|
startTime: startTimestamp,
|
|
358
389
|
endTime: Date.now(),
|
|
359
|
-
errors
|
|
390
|
+
errors,
|
|
360
391
|
sitemap: this.options.useSitemap ? sitemapAnalysis : undefined,
|
|
361
392
|
robots: robotsAnalysis,
|
|
362
393
|
};
|
|
@@ -444,8 +475,8 @@ export class Spider {
|
|
|
444
475
|
catch (error) {
|
|
445
476
|
}
|
|
446
477
|
}
|
|
447
|
-
buildSitemapAnalysis() {
|
|
448
|
-
const crawledUrls = new Set(
|
|
478
|
+
buildSitemapAnalysis(results) {
|
|
479
|
+
const crawledUrls = new Set(results.map(r => normalizeUrl(r.url)));
|
|
449
480
|
const sitemapUrlSet = this.sitemapUrlSet.size > 0
|
|
450
481
|
? this.sitemapUrlSet
|
|
451
482
|
: new Set(this.sitemapUrls.map((u) => normalizeUrl(u.loc)));
|
|
@@ -467,7 +498,7 @@ export class Spider {
|
|
|
467
498
|
}
|
|
468
499
|
}
|
|
469
500
|
}
|
|
470
|
-
for (const page of
|
|
501
|
+
for (const page of results) {
|
|
471
502
|
for (const link of page.links) {
|
|
472
503
|
if (link.href) {
|
|
473
504
|
linkedUrls.add(normalizeUrl(link.href));
|
|
@@ -508,6 +539,21 @@ export class Spider {
|
|
|
508
539
|
issues: this.robotsValidation?.issues ?? [],
|
|
509
540
|
};
|
|
510
541
|
}
|
|
542
|
+
getClientForProxy(proxyUrl) {
|
|
543
|
+
if (!proxyUrl)
|
|
544
|
+
return this.client;
|
|
545
|
+
let proxied = this.proxyClients.get(proxyUrl);
|
|
546
|
+
if (!proxied) {
|
|
547
|
+
proxied = createClient({
|
|
548
|
+
baseUrl: 'http://localhost',
|
|
549
|
+
timeout: this.options.timeout,
|
|
550
|
+
headers: { 'User-Agent': this.options.userAgent },
|
|
551
|
+
proxy: proxyUrl,
|
|
552
|
+
});
|
|
553
|
+
this.proxyClients.set(proxyUrl, proxied);
|
|
554
|
+
}
|
|
555
|
+
return proxied;
|
|
556
|
+
}
|
|
511
557
|
async fetchPage(url) {
|
|
512
558
|
const hostname = getHostname(url);
|
|
513
559
|
const maxAttempts = Math.max(1, this.options.maxRetryAttempts);
|
|
@@ -523,14 +569,18 @@ export class Spider {
|
|
|
523
569
|
let forcedTransport = null;
|
|
524
570
|
let attemptLog = [];
|
|
525
571
|
let lastRetryAfterMs = 0;
|
|
572
|
+
const proxyUrl = this.proxyAdapter ? await this.proxyAdapter.getProxy() : null;
|
|
526
573
|
const executeRequest = async (useCurl) => {
|
|
527
574
|
if (useCurl && this.curlTransport) {
|
|
575
|
+
const curlForRequest = proxyUrl
|
|
576
|
+
? new CurlTransport(proxyUrl)
|
|
577
|
+
: this.curlTransport;
|
|
528
578
|
const req = new HttpRequest(url, {
|
|
529
579
|
method: 'GET',
|
|
530
580
|
headers: this.buildRequestHeaders(url, true),
|
|
531
581
|
});
|
|
532
582
|
const requestStart = performance.now();
|
|
533
|
-
const response = await
|
|
583
|
+
const response = await curlForRequest.dispatch(req);
|
|
534
584
|
const contentType = response.headers.get('content-type') || '';
|
|
535
585
|
const shouldReadCurlResponseBody = shouldReadResponseBody(response.status, contentType);
|
|
536
586
|
let body = '';
|
|
@@ -558,7 +608,8 @@ export class Spider {
|
|
|
558
608
|
timings,
|
|
559
609
|
};
|
|
560
610
|
}
|
|
561
|
-
const
|
|
611
|
+
const clientForRequest = this.getClientForProxy(proxyUrl);
|
|
612
|
+
const response = await clientForRequest.get(url, {
|
|
562
613
|
headers: this.buildRequestHeaders(url, false),
|
|
563
614
|
});
|
|
564
615
|
const contentType = response.headers.get('content-type') || '';
|
|
@@ -649,6 +700,9 @@ export class Spider {
|
|
|
649
700
|
this.registerDomainSuccess(hostname);
|
|
650
701
|
}
|
|
651
702
|
if (!shouldRetry || attempt === maxAttempts - 1) {
|
|
703
|
+
if (proxyUrl && this.proxyAdapter?.reportResult) {
|
|
704
|
+
await this.proxyAdapter.reportResult(proxyUrl, isHighQualitySuccess);
|
|
705
|
+
}
|
|
652
706
|
return {
|
|
653
707
|
response,
|
|
654
708
|
body,
|
|
@@ -752,6 +806,9 @@ export class Spider {
|
|
|
752
806
|
lastStatus: lastResponse.status,
|
|
753
807
|
};
|
|
754
808
|
}
|
|
809
|
+
if (proxyUrl && this.proxyAdapter?.reportResult) {
|
|
810
|
+
await this.proxyAdapter.reportResult(proxyUrl, false);
|
|
811
|
+
}
|
|
755
812
|
throw wrapped;
|
|
756
813
|
}
|
|
757
814
|
throw new Error(`Failed to fetch ${url}`);
|
|
@@ -760,10 +817,10 @@ export class Spider {
|
|
|
760
817
|
const startTime = performance.now();
|
|
761
818
|
const fetchedAt = Date.now();
|
|
762
819
|
this.options.onProgress?.({
|
|
763
|
-
crawled: this.
|
|
764
|
-
queued: this.
|
|
820
|
+
crawled: this._resultCount,
|
|
821
|
+
queued: this._queueSize,
|
|
765
822
|
pending: this.pendingCount,
|
|
766
|
-
total: this.
|
|
823
|
+
total: this._visitedCount,
|
|
767
824
|
currentUrl: item.url,
|
|
768
825
|
depth: item.depth,
|
|
769
826
|
});
|
|
@@ -808,7 +865,8 @@ export class Spider {
|
|
|
808
865
|
security,
|
|
809
866
|
fetchedAt,
|
|
810
867
|
};
|
|
811
|
-
this.
|
|
868
|
+
await this.crawlStorage.saveResult(nonHtmlResult);
|
|
869
|
+
this._resultCount++;
|
|
812
870
|
this.options.onPage?.(nonHtmlResult);
|
|
813
871
|
return;
|
|
814
872
|
}
|
|
@@ -869,23 +927,38 @@ export class Spider {
|
|
|
869
927
|
fetchedAt,
|
|
870
928
|
extracted,
|
|
871
929
|
};
|
|
872
|
-
this.
|
|
930
|
+
await this.crawlStorage.saveResult(result);
|
|
931
|
+
this._resultCount++;
|
|
873
932
|
this.options.onPage?.(result);
|
|
874
933
|
if (this.options.onPageWithHtml) {
|
|
875
934
|
await this.options.onPageWithHtml(result, html);
|
|
876
935
|
}
|
|
936
|
+
const candidates = [];
|
|
937
|
+
const candidateUrls = [];
|
|
877
938
|
for (const link of links) {
|
|
878
939
|
if (!link.href)
|
|
879
940
|
continue;
|
|
880
941
|
const normalized = normalizeUrl(link.href);
|
|
881
|
-
if (this.visited.has(normalized))
|
|
882
|
-
continue;
|
|
883
942
|
if (!shouldCrawl(normalized, this.baseHost, this.options))
|
|
884
943
|
continue;
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
944
|
+
candidateUrls.push(normalized);
|
|
945
|
+
candidates.push({ url: normalized, depth: item.depth + 1 });
|
|
946
|
+
}
|
|
947
|
+
if (candidates.length > 0) {
|
|
948
|
+
const visitedSet = this.crawlQueue.hasVisitedBatch
|
|
949
|
+
? await this.crawlQueue.hasVisitedBatch(candidateUrls)
|
|
950
|
+
: new Set(await Promise.all(candidateUrls.map(async (u) => (await this.crawlQueue.hasVisited(u)) ? u : null)).then(r => r.filter(Boolean)));
|
|
951
|
+
const newItems = candidates.filter((_, i) => !visitedSet.has(candidateUrls[i]));
|
|
952
|
+
if (newItems.length > 0) {
|
|
953
|
+
if (this.crawlQueue.pushBatch) {
|
|
954
|
+
await this.crawlQueue.pushBatch(newItems);
|
|
955
|
+
}
|
|
956
|
+
else {
|
|
957
|
+
for (const newItem of newItems)
|
|
958
|
+
await this.crawlQueue.push(newItem);
|
|
959
|
+
}
|
|
960
|
+
this._queueSize += newItems.length;
|
|
961
|
+
}
|
|
889
962
|
}
|
|
890
963
|
}
|
|
891
964
|
catch (error) {
|
|
@@ -950,8 +1023,9 @@ export class Spider {
|
|
|
950
1023
|
security,
|
|
951
1024
|
fetchedAt,
|
|
952
1025
|
};
|
|
953
|
-
this.
|
|
954
|
-
this.
|
|
1026
|
+
await this.crawlStorage.saveResult(errorResult);
|
|
1027
|
+
this._resultCount++;
|
|
1028
|
+
await this.crawlStorage.saveError({ url: item.url, error: message });
|
|
955
1029
|
this.options.onPage?.(errorResult);
|
|
956
1030
|
}
|
|
957
1031
|
}
|
|
@@ -1149,10 +1223,10 @@ export class Spider {
|
|
|
1149
1223
|
}
|
|
1150
1224
|
getProgress() {
|
|
1151
1225
|
return {
|
|
1152
|
-
crawled: this.
|
|
1153
|
-
queued: this.
|
|
1226
|
+
crawled: this._resultCount,
|
|
1227
|
+
queued: this._queueSize,
|
|
1154
1228
|
pending: this.pendingCount,
|
|
1155
|
-
total: this.
|
|
1229
|
+
total: this._visitedCount,
|
|
1156
1230
|
currentUrl: '',
|
|
1157
1231
|
depth: 0,
|
|
1158
1232
|
};
|
|
@@ -41,6 +41,9 @@ function parseCurlTimings(payload) {
|
|
|
41
41
|
case 'total':
|
|
42
42
|
values.total = parseCurlTimingValue(value);
|
|
43
43
|
break;
|
|
44
|
+
case 'pretransfer':
|
|
45
|
+
values.pretransfer = parseCurlTimingValue(value);
|
|
46
|
+
break;
|
|
44
47
|
default:
|
|
45
48
|
break;
|
|
46
49
|
}
|
|
@@ -143,7 +146,7 @@ export class CurlTransport {
|
|
|
143
146
|
'--compressed',
|
|
144
147
|
'--no-keepalive',
|
|
145
148
|
'--write-out',
|
|
146
|
-
`\\n${TIMING_MARKER} dns=%{time_namelookup} tcp=%{time_connect} tls=%{time_appconnect} ttfb=%{time_starttransfer} total=%{time_total}`,
|
|
149
|
+
`\\n${TIMING_MARKER} dns=%{time_namelookup} tcp=%{time_connect} tls=%{time_appconnect} pretransfer=%{time_pretransfer} ttfb=%{time_starttransfer} total=%{time_total}`,
|
|
147
150
|
];
|
|
148
151
|
const proxy = this.getNextProxy();
|
|
149
152
|
if (proxy) {
|
|
@@ -240,14 +243,55 @@ export class CurlTransport {
|
|
|
240
243
|
statusText,
|
|
241
244
|
headers,
|
|
242
245
|
});
|
|
243
|
-
const
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
246
|
+
const usedProxy = this.proxyList.length > 0;
|
|
247
|
+
let responseTimings = {};
|
|
248
|
+
if (timings && Object.keys(timings).length > 0) {
|
|
249
|
+
const dns = timings.dns;
|
|
250
|
+
const tcp = timings.tcp !== undefined && timings.dns !== undefined
|
|
251
|
+
? timings.tcp - timings.dns : undefined;
|
|
252
|
+
const hasTls = timings.tls !== undefined && timings.tls > 0;
|
|
253
|
+
const tls = hasTls && timings.tcp !== undefined
|
|
254
|
+
? timings.tls - timings.tcp : undefined;
|
|
255
|
+
const content = timings.download;
|
|
256
|
+
if (usedProxy && timings.pretransfer !== undefined) {
|
|
257
|
+
const afterProxy = hasTls ? timings.tls : timings.tcp;
|
|
258
|
+
const targetTls = afterProxy !== undefined
|
|
259
|
+
? timings.pretransfer - afterProxy : undefined;
|
|
260
|
+
const proxyTotal = (dns ?? 0) + (tcp ?? 0) + (tls ?? 0);
|
|
261
|
+
const connectionTime = proxyTotal + (targetTls ?? 0);
|
|
262
|
+
const serverTime = timings.ttfb !== undefined
|
|
263
|
+
? timings.ttfb - connectionTime : undefined;
|
|
264
|
+
responseTimings = {
|
|
265
|
+
tls: targetTls,
|
|
266
|
+
content,
|
|
267
|
+
transferTime: content,
|
|
268
|
+
proxyDns: dns,
|
|
269
|
+
proxyTcp: tcp,
|
|
270
|
+
proxyTls: tls,
|
|
271
|
+
proxyTotal,
|
|
272
|
+
firstByte: timings.ttfb,
|
|
273
|
+
total: timings.total,
|
|
274
|
+
connectionTime,
|
|
275
|
+
serverTime,
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
else {
|
|
279
|
+
const connectionTime = (dns ?? 0) + (tcp ?? 0) + (tls ?? 0);
|
|
280
|
+
const serverTime = timings.ttfb !== undefined
|
|
281
|
+
? timings.ttfb - connectionTime : undefined;
|
|
282
|
+
responseTimings = {
|
|
283
|
+
dns,
|
|
284
|
+
tcp,
|
|
285
|
+
tls,
|
|
286
|
+
content,
|
|
287
|
+
transferTime: content,
|
|
288
|
+
firstByte: timings.ttfb,
|
|
289
|
+
total: timings.total,
|
|
290
|
+
connectionTime,
|
|
291
|
+
serverTime,
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
}
|
|
251
295
|
resolve(new HttpResponse(nativeResponse, {
|
|
252
296
|
timings: responseTimings,
|
|
253
297
|
connection: { protocol: 'curl' },
|
|
@@ -6,6 +6,7 @@ import { AsyncLocalStorage } from 'async_hooks';
|
|
|
6
6
|
import { channel } from 'node:diagnostics_channel';
|
|
7
7
|
import { createLookupFunction } from '../utils/dns.js';
|
|
8
8
|
import { AgentManager } from '../utils/agent-manager.js';
|
|
9
|
+
import { getProxyForUrl } from '../utils/env-proxy.js';
|
|
9
10
|
import { createProgressStream } from '../utils/progress.js';
|
|
10
11
|
import { nodeToWebStream } from '../utils/streaming.js';
|
|
11
12
|
import { ProtocolCache, getGlobalProtocolCache, normalizeProtocol } from '../utils/protocol-cache.js';
|
|
@@ -318,6 +319,9 @@ export class UndiciTransport {
|
|
|
318
319
|
return slot.agent;
|
|
319
320
|
}
|
|
320
321
|
}
|
|
322
|
+
const envProxy = getProxyForUrl(url);
|
|
323
|
+
if (envProxy)
|
|
324
|
+
return new ProxyAgent(envProxy);
|
|
321
325
|
if (this.agentManager)
|
|
322
326
|
return this.agentManager.getAgentForUrl(url);
|
|
323
327
|
if (this.dnsAgent)
|
|
@@ -54,6 +54,9 @@ export interface RequestOptions {
|
|
|
54
54
|
followRedirects?: boolean;
|
|
55
55
|
http2?: boolean;
|
|
56
56
|
useCurl?: boolean;
|
|
57
|
+
queue?: boolean | {
|
|
58
|
+
metadata?: Record<string, unknown>;
|
|
59
|
+
};
|
|
57
60
|
}
|
|
58
61
|
export interface ReckerRequest {
|
|
59
62
|
url: string;
|
|
@@ -77,6 +80,9 @@ export interface ReckerRequest {
|
|
|
77
80
|
http2?: boolean;
|
|
78
81
|
useCurl?: boolean;
|
|
79
82
|
userAgent?: string;
|
|
83
|
+
queue?: boolean | {
|
|
84
|
+
metadata?: Record<string, unknown>;
|
|
85
|
+
};
|
|
80
86
|
withHeader(name: string, value: string): ReckerRequest;
|
|
81
87
|
_hooks?: {
|
|
82
88
|
onDnsLookup?: (info: any) => void;
|
|
@@ -101,11 +107,18 @@ export interface ReckerRequest {
|
|
|
101
107
|
export interface Timings {
|
|
102
108
|
queuing?: number;
|
|
103
109
|
dns?: number;
|
|
104
|
-
tls?: number;
|
|
105
110
|
tcp?: number;
|
|
106
|
-
|
|
111
|
+
tls?: number;
|
|
107
112
|
content?: number;
|
|
113
|
+
firstByte?: number;
|
|
108
114
|
total?: number;
|
|
115
|
+
connectionTime?: number;
|
|
116
|
+
serverTime?: number;
|
|
117
|
+
transferTime?: number;
|
|
118
|
+
proxyDns?: number;
|
|
119
|
+
proxyTcp?: number;
|
|
120
|
+
proxyTls?: number;
|
|
121
|
+
proxyTotal?: number;
|
|
109
122
|
}
|
|
110
123
|
export interface ConcurrencyConfig {
|
|
111
124
|
max?: number;
|
|
@@ -213,6 +226,44 @@ export interface CacheStorage {
|
|
|
213
226
|
set(key: string, value: CacheEntry, ttl: number): Promise<void>;
|
|
214
227
|
delete(key: string): Promise<void>;
|
|
215
228
|
}
|
|
229
|
+
export interface QueueJob {
|
|
230
|
+
jobId: string;
|
|
231
|
+
url: string;
|
|
232
|
+
method: Method;
|
|
233
|
+
headers: Record<string, string>;
|
|
234
|
+
body: string | null;
|
|
235
|
+
bodyContentType?: string;
|
|
236
|
+
correlationId?: string;
|
|
237
|
+
traceId?: string;
|
|
238
|
+
tenant?: string;
|
|
239
|
+
policyTags?: string[];
|
|
240
|
+
createdAt: number;
|
|
241
|
+
metadata?: Record<string, unknown>;
|
|
242
|
+
}
|
|
243
|
+
export type QueueJobStatus = 'pending' | 'processing' | 'completed' | 'failed' | 'unknown';
|
|
244
|
+
export interface QueueJobStatusResult {
|
|
245
|
+
jobId: string;
|
|
246
|
+
status: QueueJobStatus;
|
|
247
|
+
result?: unknown;
|
|
248
|
+
error?: string;
|
|
249
|
+
}
|
|
250
|
+
export interface QueueAdapter {
|
|
251
|
+
enqueue(job: QueueJob): Promise<string>;
|
|
252
|
+
getStatus?(jobId: string): Promise<QueueJobStatusResult>;
|
|
253
|
+
close?(): Promise<void>;
|
|
254
|
+
}
|
|
255
|
+
export type QueueFilter = (req: ReckerRequest) => boolean;
|
|
256
|
+
export interface QueueFilterConfig {
|
|
257
|
+
methods?: Method[];
|
|
258
|
+
urlPatterns?: (string | RegExp)[];
|
|
259
|
+
headerPresent?: string;
|
|
260
|
+
}
|
|
261
|
+
export interface QueueOptions {
|
|
262
|
+
adapter: QueueAdapter;
|
|
263
|
+
filter?: QueueFilter | QueueFilterConfig;
|
|
264
|
+
jobIdGenerator?: (req: ReckerRequest) => string;
|
|
265
|
+
defaultMetadata?: Record<string, unknown>;
|
|
266
|
+
}
|
|
216
267
|
export type CacheStrategy = 'cache-first' | 'network-only' | 'network-first' | 'stale-while-revalidate' | 'revalidate' | 'rfc-compliant';
|
|
217
268
|
export interface PluginClient {
|
|
218
269
|
beforeRequest(fn: (req: ReckerRequest) => ReckerRequest | void | Promise<ReckerRequest | void>): void;
|
package/dist/core/client.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { ClientOptions, Middleware, ReckerRequest, ReckerResponse, RequestOptions, CacheStorage, PageResult } from '../types/index.js';
|
|
2
|
+
import type { QueueOptions } from '../types/index.js';
|
|
2
3
|
import type { ClientAI, ClientOptionsWithAI } from '../types/ai-client.js';
|
|
3
4
|
import { RequestPromise } from './request-promise.js';
|
|
4
5
|
import { type RuntimeEventName, type RuntimeEventPayloads, type TypedEventBus } from '../core-runtime/typed-events.js';
|
|
@@ -27,6 +28,7 @@ export interface ExtendedClientOptions extends ClientOptions {
|
|
|
27
28
|
retry?: RetryOptions;
|
|
28
29
|
cache?: ClientCacheConfig;
|
|
29
30
|
dedup?: DedupOptions;
|
|
31
|
+
queue?: QueueOptions;
|
|
30
32
|
}
|
|
31
33
|
export declare class Client {
|
|
32
34
|
static get version(): string;
|
package/dist/core/client.js
CHANGED
|
@@ -17,6 +17,7 @@ import { paginate, streamPages } from '../plugins/pagination.js';
|
|
|
17
17
|
import { retryPlugin } from '../plugins/retry.js';
|
|
18
18
|
import { cachePlugin } from '../plugins/cache.js';
|
|
19
19
|
import { dedupPlugin } from '../plugins/dedup.js';
|
|
20
|
+
import { queuePlugin } from '../plugins/queue.js';
|
|
20
21
|
import { createXSRFMiddleware } from '../plugins/xsrf.js';
|
|
21
22
|
import { createCompressionMiddleware } from '../plugins/compression.js';
|
|
22
23
|
import { serializeXML } from '../plugins/xml.js';
|
|
@@ -389,6 +390,13 @@ export class Client {
|
|
|
389
390
|
scope: 'request'
|
|
390
391
|
});
|
|
391
392
|
}
|
|
393
|
+
if (options.queue) {
|
|
394
|
+
registerPlugin(queuePlugin(options.queue), {
|
|
395
|
+
name: 'recker:queue',
|
|
396
|
+
priority: 135,
|
|
397
|
+
scope: 'request'
|
|
398
|
+
});
|
|
399
|
+
}
|
|
392
400
|
if (options.plugins) {
|
|
393
401
|
options.plugins.forEach((plugin, index) => {
|
|
394
402
|
const existingManifest = getPluginManifest(plugin);
|
package/dist/core/request.d.ts
CHANGED
|
@@ -20,6 +20,9 @@ export declare class HttpRequest implements ReckerRequest {
|
|
|
20
20
|
readonly policyTags: string[];
|
|
21
21
|
readonly policySource?: string;
|
|
22
22
|
readonly traceId?: string;
|
|
23
|
+
readonly queue?: boolean | {
|
|
24
|
+
metadata?: Record<string, unknown>;
|
|
25
|
+
};
|
|
23
26
|
constructor(url: string, options?: RequestOptions);
|
|
24
27
|
withHeader(name: string, value: string): ReckerRequest;
|
|
25
28
|
withBody(body: BodyInit): ReckerRequest;
|
package/dist/core/request.js
CHANGED
|
@@ -28,6 +28,7 @@ export class HttpRequest {
|
|
|
28
28
|
policyTags;
|
|
29
29
|
policySource;
|
|
30
30
|
traceId;
|
|
31
|
+
queue;
|
|
31
32
|
constructor(url, options = {}) {
|
|
32
33
|
this.url = url;
|
|
33
34
|
this.method = options.method || 'GET';
|
|
@@ -51,6 +52,7 @@ export class HttpRequest {
|
|
|
51
52
|
this.policyTags = options.policyTags ?? [];
|
|
52
53
|
this.policySource = options.policySource;
|
|
53
54
|
this.traceId = options.traceId;
|
|
55
|
+
this.queue = options.queue;
|
|
54
56
|
}
|
|
55
57
|
withHeader(name, value) {
|
|
56
58
|
const context = getRequestContext(this);
|
|
@@ -75,7 +77,8 @@ export class HttpRequest {
|
|
|
75
77
|
tenant: this.tenant,
|
|
76
78
|
policyTags: this.policyTags,
|
|
77
79
|
policySource: this.policySource,
|
|
78
|
-
traceId: this.traceId
|
|
80
|
+
traceId: this.traceId,
|
|
81
|
+
queue: this.queue,
|
|
79
82
|
});
|
|
80
83
|
if (context) {
|
|
81
84
|
return attachRequestContext(request, context);
|
|
@@ -103,7 +106,8 @@ export class HttpRequest {
|
|
|
103
106
|
tenant: this.tenant,
|
|
104
107
|
policyTags: this.policyTags,
|
|
105
108
|
policySource: this.policySource,
|
|
106
|
-
traceId: this.traceId
|
|
109
|
+
traceId: this.traceId,
|
|
110
|
+
queue: this.queue,
|
|
107
111
|
});
|
|
108
112
|
if (context) {
|
|
109
113
|
return attachRequestContext(request, context);
|
package/dist/index.d.ts
CHANGED
|
@@ -60,6 +60,8 @@ export * from './plugins/odata.js';
|
|
|
60
60
|
export * from './plugins/http2-push.js';
|
|
61
61
|
export * from './plugins/http3.js';
|
|
62
62
|
export * from './cache/redis-storage.js';
|
|
63
|
+
export * from './plugins/queue.js';
|
|
64
|
+
export * from './queue/consumer.js';
|
|
63
65
|
export * from './extractors/index.js';
|
|
64
66
|
export * from './video/index.js';
|
|
65
67
|
export * from './events/request-events.js';
|
package/dist/index.js
CHANGED
|
@@ -60,6 +60,8 @@ export * from './plugins/odata.js';
|
|
|
60
60
|
export * from './plugins/http2-push.js';
|
|
61
61
|
export * from './plugins/http3.js';
|
|
62
62
|
export * from './cache/redis-storage.js';
|
|
63
|
+
export * from './plugins/queue.js';
|
|
64
|
+
export * from './queue/consumer.js';
|
|
63
65
|
export * from './extractors/index.js';
|
|
64
66
|
export * from './video/index.js';
|
|
65
67
|
export * from './events/request-events.js';
|