recker 1.0.102 → 1.0.103-next.8501b56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/browser/core/errors.d.ts +2 -2
  2. package/dist/browser/scrape/crawl-queue.d.ts +2 -0
  3. package/dist/browser/scrape/domain-stats.d.ts +25 -0
  4. package/dist/browser/scrape/domain-stats.js +47 -0
  5. package/dist/browser/scrape/errors.d.ts +82 -0
  6. package/dist/browser/scrape/errors.js +138 -0
  7. package/dist/browser/scrape/index.d.ts +5 -0
  8. package/dist/browser/scrape/index.js +3 -0
  9. package/dist/browser/scrape/rewrite-url.d.ts +6 -0
  10. package/dist/browser/scrape/rewrite-url.js +77 -0
  11. package/dist/browser/scrape/spider.d.ts +13 -0
  12. package/dist/browser/scrape/spider.js +260 -40
  13. package/dist/browser/scrape/sqlite-crawl-storage.d.ts +1 -1
  14. package/dist/clients/index.d.ts +1 -1
  15. package/dist/clients/index.js +1 -1
  16. package/dist/clients/reddb-proto.d.ts +1 -0
  17. package/dist/clients/reddb-proto.js +195 -0
  18. package/dist/clients/reddb.d.ts +306 -71
  19. package/dist/clients/reddb.js +1410 -324
  20. package/dist/core/errors.d.ts +2 -2
  21. package/dist/protocols/index.d.ts +1 -1
  22. package/dist/protocols/index.js +1 -1
  23. package/dist/scrape/crawl-queue.d.ts +2 -0
  24. package/dist/scrape/domain-stats.d.ts +25 -0
  25. package/dist/scrape/domain-stats.js +47 -0
  26. package/dist/scrape/errors.d.ts +82 -0
  27. package/dist/scrape/errors.js +138 -0
  28. package/dist/scrape/index.d.ts +5 -0
  29. package/dist/scrape/index.js +3 -0
  30. package/dist/scrape/rewrite-url.d.ts +6 -0
  31. package/dist/scrape/rewrite-url.js +77 -0
  32. package/dist/scrape/spider.d.ts +13 -0
  33. package/dist/scrape/spider.js +260 -40
  34. package/dist/scrape/sqlite-crawl-storage.d.ts +1 -1
  35. package/dist/version.js +1 -1
  36. package/package.json +4 -1
@@ -1,6 +1,6 @@
1
1
  import { ReckerRequest, ReckerResponse } from '../types/index.js';
2
- export type CanonicalErrorCategory = 'http' | 'network' | 'timeout' | 'protocol' | 'validation' | 'state' | 'filesystem' | 'resource' | 'policy' | 'queue' | 'unknown';
3
- export type ErrorSource = 'client' | 'transport' | 'server' | 'upstream';
2
+ export type CanonicalErrorCategory = 'http' | 'network' | 'timeout' | 'protocol' | 'validation' | 'state' | 'filesystem' | 'resource' | 'policy' | 'queue' | 'scrape' | 'unknown';
3
+ export type ErrorSource = 'client' | 'transport' | 'server' | 'upstream' | 'spider';
4
4
  export type CanonicalErrorSeverity = 'low' | 'medium' | 'high';
5
5
  export interface CanonicalErrorMetadata {
6
6
  category: CanonicalErrorCategory;
@@ -3,6 +3,8 @@ export interface CrawlQueueItem {
3
3
  depth: number;
4
4
  priority?: number;
5
5
  discoveredFrom?: string;
6
+ sourceUrl?: string;
7
+ rewriteReason?: string;
6
8
  }
7
9
  export interface CrawlQueueAdapter {
8
10
  push(item: CrawlQueueItem): Promise<void>;
@@ -0,0 +1,25 @@
1
+ export interface DomainTransportStats {
2
+ hostname: string;
3
+ curlSuccess: number;
4
+ curlFail: number;
5
+ undiciSuccess: number;
6
+ undiciFail: number;
7
+ lastSuccessTransport?: 'curl' | 'undici';
8
+ preferredTransport?: 'curl' | 'undici';
9
+ lastSeenTs: number;
10
+ }
11
+ export interface DomainStatsAdapter {
12
+ load(hostname: string): Promise<DomainTransportStats | null>;
13
+ loadAll(): Promise<Map<string, DomainTransportStats>>;
14
+ record(hostname: string, transport: 'curl' | 'undici', success: boolean): Promise<void>;
15
+ clear?(): Promise<void>;
16
+ close?(): Promise<void>;
17
+ }
18
+ export declare class InMemoryDomainStats implements DomainStatsAdapter {
19
+ private map;
20
+ load(hostname: string): Promise<DomainTransportStats | null>;
21
+ loadAll(): Promise<Map<string, DomainTransportStats>>;
22
+ record(hostname: string, transport: 'curl' | 'undici', success: boolean): Promise<void>;
23
+ clear(): Promise<void>;
24
+ close(): Promise<void>;
25
+ }
@@ -0,0 +1,47 @@
1
+ export class InMemoryDomainStats {
2
+ map = new Map();
3
+ async load(hostname) {
4
+ const s = this.map.get(hostname);
5
+ return s ? { ...s } : null;
6
+ }
7
+ async loadAll() {
8
+ const copy = new Map();
9
+ for (const [k, v] of this.map) {
10
+ copy.set(k, { ...v });
11
+ }
12
+ return copy;
13
+ }
14
+ async record(hostname, transport, success) {
15
+ let stats = this.map.get(hostname);
16
+ if (!stats) {
17
+ stats = {
18
+ hostname,
19
+ curlSuccess: 0,
20
+ curlFail: 0,
21
+ undiciSuccess: 0,
22
+ undiciFail: 0,
23
+ lastSeenTs: 0,
24
+ };
25
+ this.map.set(hostname, stats);
26
+ }
27
+ if (success) {
28
+ if (transport === 'curl')
29
+ stats.curlSuccess += 1;
30
+ else
31
+ stats.undiciSuccess += 1;
32
+ stats.lastSuccessTransport = transport;
33
+ }
34
+ else {
35
+ if (transport === 'curl')
36
+ stats.curlFail += 1;
37
+ else
38
+ stats.undiciFail += 1;
39
+ }
40
+ stats.lastSeenTs = Date.now();
41
+ }
42
+ async clear() {
43
+ this.map.clear();
44
+ }
45
+ async close() {
46
+ }
47
+ }
@@ -0,0 +1,82 @@
1
+ import { ReckerError } from '../core/errors.js';
2
+ import type { BlockDetectionResult } from '../utils/block-detector.js';
3
+ import type { CaptchaDetectionResult } from '../utils/block-detector.js';
4
+ export type SpiderTransportLabel = 'auto' | 'undici' | 'curl';
5
+ export type CaptchaProviderLabel = CaptchaDetectionResult['provider'];
6
+ export declare class SpiderBlockError extends ReckerError {
7
+ url: string;
8
+ domain: string;
9
+ reason: BlockDetectionResult['reason'];
10
+ confidence: number;
11
+ transport: SpiderTransportLabel;
12
+ preferredTransport?: SpiderTransportLabel;
13
+ constructor(opts: {
14
+ url: string;
15
+ domain: string;
16
+ reason: BlockDetectionResult['reason'];
17
+ confidence: number;
18
+ transport: SpiderTransportLabel;
19
+ preferredTransport?: SpiderTransportLabel;
20
+ statusCode?: number;
21
+ message?: string;
22
+ });
23
+ }
24
+ export declare class SpiderChallengeError extends ReckerError {
25
+ url: string;
26
+ domain: string;
27
+ provider?: CaptchaProviderLabel;
28
+ confidence: number;
29
+ cooldownMs: number;
30
+ transport: SpiderTransportLabel;
31
+ constructor(opts: {
32
+ url: string;
33
+ domain: string;
34
+ provider?: CaptchaProviderLabel;
35
+ confidence: number;
36
+ cooldownMs: number;
37
+ transport: SpiderTransportLabel;
38
+ statusCode?: number;
39
+ });
40
+ }
41
+ export declare class SpiderRobotsDisallowedError extends ReckerError {
42
+ url: string;
43
+ path: string;
44
+ userAgent: string;
45
+ robotsUrl: string;
46
+ constructor(opts: {
47
+ url: string;
48
+ path: string;
49
+ userAgent: string;
50
+ robotsUrl: string;
51
+ });
52
+ }
53
+ export declare class SpiderDepthLimitError extends ReckerError {
54
+ url: string;
55
+ depth: number;
56
+ maxDepth: number;
57
+ constructor(opts: {
58
+ url: string;
59
+ depth: number;
60
+ maxDepth: number;
61
+ });
62
+ }
63
+ export declare class SpiderDomainOutOfScopeError extends ReckerError {
64
+ url: string;
65
+ allowedDomains?: string[];
66
+ constructor(opts: {
67
+ url: string;
68
+ allowedDomains?: string[];
69
+ });
70
+ }
71
+ export declare class SpiderUnsupportedContentError extends ReckerError {
72
+ url: string;
73
+ contentType: string;
74
+ reason: 'binary' | 'pdf' | 'doc' | 'media' | 'archive';
75
+ fallbackSuggestion?: string;
76
+ constructor(opts: {
77
+ url: string;
78
+ contentType: string;
79
+ reason: 'binary' | 'pdf' | 'doc' | 'media' | 'archive';
80
+ fallbackSuggestion?: string;
81
+ });
82
+ }
@@ -0,0 +1,138 @@
1
+ import { ReckerError } from '../core/errors.js';
2
+ export class SpiderBlockError extends ReckerError {
3
+ url;
4
+ domain;
5
+ reason;
6
+ confidence;
7
+ transport;
8
+ preferredTransport;
9
+ constructor(opts) {
10
+ super(opts.message ?? `Request to ${opts.url} was blocked (${opts.reason ?? 'unknown'})`, undefined, undefined, [
11
+ opts.preferredTransport && opts.preferredTransport !== opts.transport
12
+ ? `Retry with the ${opts.preferredTransport} transport.`
13
+ : 'Rotate user-agent, proxy, or use curl-impersonate to bypass TLS fingerprinting.',
14
+ 'Increase the retry delay and honor any Retry-After header.',
15
+ ], true, {
16
+ category: 'scrape',
17
+ source: 'spider',
18
+ severity: 'medium',
19
+ canRetry: true,
20
+ reason: `Blocked: ${opts.reason ?? 'unknown'}`,
21
+ statusCode: opts.statusCode,
22
+ });
23
+ this.name = 'SpiderBlockError';
24
+ this.url = opts.url;
25
+ this.domain = opts.domain;
26
+ this.reason = opts.reason;
27
+ this.confidence = opts.confidence;
28
+ this.transport = opts.transport;
29
+ this.preferredTransport = opts.preferredTransport;
30
+ }
31
+ }
32
+ export class SpiderChallengeError extends ReckerError {
33
+ url;
34
+ domain;
35
+ provider;
36
+ confidence;
37
+ cooldownMs;
38
+ transport;
39
+ constructor(opts) {
40
+ super(`CAPTCHA challenge detected on ${opts.url}${opts.provider ? ` (${opts.provider})` : ''}`, undefined, undefined, [
41
+ `Cool down the host for at least ${opts.cooldownMs}ms before retrying.`,
42
+ 'Consider solving the challenge via an external CAPTCHA-solving service.',
43
+ 'Rotate proxy/IP before retrying.',
44
+ ], true, {
45
+ category: 'scrape',
46
+ source: 'spider',
47
+ severity: 'high',
48
+ canRetry: true,
49
+ reason: opts.provider ? `Challenge: ${opts.provider}` : 'CAPTCHA challenge',
50
+ statusCode: opts.statusCode,
51
+ retryAfterMs: opts.cooldownMs,
52
+ });
53
+ this.name = 'SpiderChallengeError';
54
+ this.url = opts.url;
55
+ this.domain = opts.domain;
56
+ this.provider = opts.provider;
57
+ this.confidence = opts.confidence;
58
+ this.cooldownMs = opts.cooldownMs;
59
+ this.transport = opts.transport;
60
+ }
61
+ }
62
+ export class SpiderRobotsDisallowedError extends ReckerError {
63
+ url;
64
+ path;
65
+ userAgent;
66
+ robotsUrl;
67
+ constructor(opts) {
68
+ super(`URL ${opts.url} is disallowed by robots.txt for ${opts.userAgent}`, undefined, undefined, [
69
+ 'Do not crawl this URL — the site operator has explicitly excluded it.',
70
+ `Check ${opts.robotsUrl} for the full rules.`,
71
+ ], false, {
72
+ category: 'scrape',
73
+ source: 'spider',
74
+ severity: 'low',
75
+ canRetry: false,
76
+ reason: 'Disallowed by robots.txt',
77
+ });
78
+ this.name = 'SpiderRobotsDisallowedError';
79
+ this.url = opts.url;
80
+ this.path = opts.path;
81
+ this.userAgent = opts.userAgent;
82
+ this.robotsUrl = opts.robotsUrl;
83
+ }
84
+ }
85
+ export class SpiderDepthLimitError extends ReckerError {
86
+ url;
87
+ depth;
88
+ maxDepth;
89
+ constructor(opts) {
90
+ super(`URL ${opts.url} exceeds max crawl depth (${opts.depth} > ${opts.maxDepth})`, undefined, undefined, ['Increase maxDepth if deeper crawls are expected.'], false, {
91
+ category: 'scrape',
92
+ source: 'spider',
93
+ severity: 'low',
94
+ canRetry: false,
95
+ reason: 'Depth limit exceeded',
96
+ });
97
+ this.name = 'SpiderDepthLimitError';
98
+ this.url = opts.url;
99
+ this.depth = opts.depth;
100
+ this.maxDepth = opts.maxDepth;
101
+ }
102
+ }
103
+ export class SpiderDomainOutOfScopeError extends ReckerError {
104
+ url;
105
+ allowedDomains;
106
+ constructor(opts) {
107
+ super(`URL ${opts.url} is outside the crawl scope`, undefined, undefined, ['Add the host to allowedDomains or disable sameDomain filtering.'], false, {
108
+ category: 'scrape',
109
+ source: 'spider',
110
+ severity: 'low',
111
+ canRetry: false,
112
+ reason: 'Domain out of scope',
113
+ });
114
+ this.name = 'SpiderDomainOutOfScopeError';
115
+ this.url = opts.url;
116
+ this.allowedDomains = opts.allowedDomains;
117
+ }
118
+ }
119
+ export class SpiderUnsupportedContentError extends ReckerError {
120
+ url;
121
+ contentType;
122
+ reason;
123
+ fallbackSuggestion;
124
+ constructor(opts) {
125
+ super(`Unsupported content type on ${opts.url}: ${opts.contentType}`, undefined, undefined, opts.fallbackSuggestion ? [opts.fallbackSuggestion] : ['Handle this URL with a specialty fetcher or skip.'], false, {
126
+ category: 'scrape',
127
+ source: 'spider',
128
+ severity: 'low',
129
+ canRetry: false,
130
+ reason: `Unsupported content: ${opts.contentType}`,
131
+ });
132
+ this.name = 'SpiderUnsupportedContentError';
133
+ this.url = opts.url;
134
+ this.contentType = opts.contentType;
135
+ this.reason = opts.reason;
136
+ this.fallbackSuggestion = opts.fallbackSuggestion;
137
+ }
138
+ }
@@ -4,12 +4,17 @@ export { ScrapeDocument } from './document.js';
4
4
  export { ScrapeElement } from './element.js';
5
5
  export { Spider, spider } from './spider.js';
6
6
  export type { SpiderOptions, SpiderPageResult, SpiderPageEvent, SpiderProgress, SpiderResult, } from './spider.js';
7
+ export { SpiderBlockError, SpiderChallengeError, SpiderRobotsDisallowedError, SpiderDepthLimitError, SpiderDomainOutOfScopeError, SpiderUnsupportedContentError, } from './errors.js';
8
+ export { rewriteUrl } from './rewrite-url.js';
9
+ export type { UrlRewriteResult } from './rewrite-url.js';
7
10
  export { InMemoryCrawlQueue } from './crawl-queue.js';
8
11
  export { SqliteCrawlQueue } from './sqlite-crawl-queue.js';
9
12
  export type { CrawlQueueAdapter, CrawlQueueItem } from './crawl-queue.js';
10
13
  export { InMemoryCrawlStorage } from './crawl-storage.js';
11
14
  export { SqliteCrawlStorage } from './sqlite-crawl-storage.js';
12
15
  export type { CrawlStorageAdapter } from './crawl-storage.js';
16
+ export { InMemoryDomainStats } from './domain-stats.js';
17
+ export type { DomainStatsAdapter, DomainTransportStats } from './domain-stats.js';
13
18
  export { ListProxyAdapter } from './proxy-adapter.js';
14
19
  export type { ProxyAdapter } from './proxy-adapter.js';
15
20
  export { extractLinks, extractImages, extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractForms, extractTables, extractScripts, extractStyles, } from './extractors.js';
@@ -2,9 +2,12 @@ export { parse as parseHtmlSync, HTMLElement, TextNode, CommentNode, Node, NodeT
2
2
  export { ScrapeDocument } from './document.js';
3
3
  export { ScrapeElement } from './element.js';
4
4
  export { Spider, spider } from './spider.js';
5
+ export { SpiderBlockError, SpiderChallengeError, SpiderRobotsDisallowedError, SpiderDepthLimitError, SpiderDomainOutOfScopeError, SpiderUnsupportedContentError, } from './errors.js';
6
+ export { rewriteUrl } from './rewrite-url.js';
5
7
  export { InMemoryCrawlQueue } from './crawl-queue.js';
6
8
  export { SqliteCrawlQueue } from './sqlite-crawl-queue.js';
7
9
  export { InMemoryCrawlStorage } from './crawl-storage.js';
8
10
  export { SqliteCrawlStorage } from './sqlite-crawl-storage.js';
11
+ export { InMemoryDomainStats } from './domain-stats.js';
9
12
  export { ListProxyAdapter } from './proxy-adapter.js';
10
13
  export { extractLinks, extractImages, extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractForms, extractTables, extractScripts, extractStyles, } from './extractors.js';
@@ -0,0 +1,6 @@
1
+ export interface UrlRewriteResult {
2
+ url: string;
3
+ rewritten: boolean;
4
+ reason?: string;
5
+ }
6
+ export declare function rewriteUrl(input: string): UrlRewriteResult;
@@ -0,0 +1,77 @@
1
+ const GOOGLE_DOC_PREFIXES = [
2
+ 'https://docs.google.com/document/d/',
3
+ 'http://docs.google.com/document/d/',
4
+ ];
5
+ const GOOGLE_PRESENTATION_PREFIXES = [
6
+ 'https://docs.google.com/presentation/d/',
7
+ 'http://docs.google.com/presentation/d/',
8
+ ];
9
+ const GOOGLE_SPREADSHEET_PREFIXES = [
10
+ 'https://docs.google.com/spreadsheets/d/',
11
+ 'http://docs.google.com/spreadsheets/d/',
12
+ ];
13
+ const GOOGLE_DRIVE_FILE_PREFIXES = [
14
+ 'https://drive.google.com/file/d/',
15
+ 'http://drive.google.com/file/d/',
16
+ ];
17
+ function startsWithAny(url, prefixes) {
18
+ for (const p of prefixes) {
19
+ if (url.startsWith(p))
20
+ return true;
21
+ }
22
+ return false;
23
+ }
24
+ export function rewriteUrl(input) {
25
+ if (startsWithAny(input, GOOGLE_DOC_PREFIXES)) {
26
+ if (input.includes('/document/d/e/')) {
27
+ return { url: input, rewritten: false };
28
+ }
29
+ const id = input.match(/\/document\/d\/([-\w]+)/)?.[1];
30
+ if (id) {
31
+ return {
32
+ url: `https://docs.google.com/document/d/${id}/export?format=html`,
33
+ rewritten: true,
34
+ reason: 'google-docs-export',
35
+ };
36
+ }
37
+ }
38
+ if (startsWithAny(input, GOOGLE_PRESENTATION_PREFIXES)) {
39
+ if (input.includes('/presentation/d/e/')) {
40
+ return { url: input, rewritten: false };
41
+ }
42
+ const id = input.match(/\/presentation\/d\/([-\w]+)/)?.[1];
43
+ if (id) {
44
+ return {
45
+ url: `https://docs.google.com/presentation/d/${id}/export?format=html`,
46
+ rewritten: true,
47
+ reason: 'google-slides-export',
48
+ };
49
+ }
50
+ }
51
+ if (startsWithAny(input, GOOGLE_SPREADSHEET_PREFIXES)) {
52
+ if (input.includes('/spreadsheets/d/e/')) {
53
+ return { url: input, rewritten: false };
54
+ }
55
+ const id = input.match(/\/spreadsheets\/d\/([-\w]+)/)?.[1];
56
+ if (id) {
57
+ const gidMatch = input.match(/[?&#]gid=(\d+)/);
58
+ const gidParam = gidMatch ? `&gid=${gidMatch[1]}` : '';
59
+ return {
60
+ url: `https://docs.google.com/spreadsheets/d/${id}/gviz/tq?tqx=out:html${gidParam}`,
61
+ rewritten: true,
62
+ reason: 'google-sheets-export',
63
+ };
64
+ }
65
+ }
66
+ if (startsWithAny(input, GOOGLE_DRIVE_FILE_PREFIXES)) {
67
+ const id = input.match(/\/file\/d\/([-\w]+)/)?.[1];
68
+ if (id) {
69
+ return {
70
+ url: `https://drive.google.com/uc?export=download&id=${id}`,
71
+ rewritten: true,
72
+ reason: 'google-drive-download',
73
+ };
74
+ }
75
+ }
76
+ return { url: input, rewritten: false };
77
+ }
@@ -5,7 +5,9 @@ import { type SitemapUrl } from '../seo/validators/sitemap.js';
5
5
  import { type CaptchaDetectionResult, type BlockDetectionResult } from '../utils/block-detector.js';
6
6
  import { type CrawlQueueAdapter } from './crawl-queue.js';
7
7
  import { type CrawlStorageAdapter } from './crawl-storage.js';
8
+ import type { DomainStatsAdapter } from './domain-stats.js';
8
9
  import { type ProxyAdapter } from './proxy-adapter.js';
10
+ import type { ReckerError } from '../core/errors.js';
9
11
  export type SpiderTransport = 'auto' | 'undici' | 'curl';
10
12
  type CaptchaProvider = CaptchaDetectionResult['provider'];
11
13
  export interface SpiderOptions {
@@ -28,11 +30,15 @@ export interface SpiderOptions {
28
30
  retryJitterMs?: number;
29
31
  maxDomainBlockStrikes?: number;
30
32
  respectRobotsTxt?: boolean;
33
+ respectRobotsCrawlDelay?: boolean;
34
+ maxRobotsCrawlDelayMs?: number;
31
35
  useSitemap?: boolean;
32
36
  sitemapUrl?: string;
37
+ rewriteUrls?: boolean;
33
38
  proxy?: string | string[] | ProxyAdapter;
34
39
  transport?: SpiderTransport;
35
40
  preferCurlFirst?: boolean;
41
+ domainStats?: DomainStatsAdapter;
36
42
  onPage?: (event: SpiderPageEvent) => void | Promise<void>;
37
43
  onCaptchaDetected?: (result: {
38
44
  url: string;
@@ -134,6 +140,9 @@ export interface SpiderPageResult {
134
140
  contentHash?: string;
135
141
  isDuplicate?: boolean;
136
142
  duplicateOf?: string;
143
+ sourceUrl?: string;
144
+ rewriteReason?: string;
145
+ typedError?: ReckerError;
137
146
  }
138
147
  export interface SpiderPageEvent {
139
148
  result: SpiderPageResult;
@@ -191,6 +200,7 @@ export declare class Spider {
191
200
  private pool;
192
201
  private crawlQueue;
193
202
  private crawlStorage;
203
+ private domainStats;
194
204
  private proxyAdapter;
195
205
  private proxyClients;
196
206
  private _visitedCount;
@@ -209,11 +219,13 @@ export declare class Spider {
209
219
  private curlTransport;
210
220
  private curlAvailable;
211
221
  private domainStates;
222
+ private domainStatsWrites;
212
223
  private sitemapUrls;
213
224
  private sitemapUrlSet;
214
225
  private robotsData;
215
226
  private sitemapValidation;
216
227
  private robotsValidation;
228
+ private baseRobotsCrawlDelayMs;
217
229
  private waitForDomainRateLimit;
218
230
  private toHeaderRecord;
219
231
  constructor(options?: SpiderOptions);
@@ -227,6 +239,7 @@ export declare class Spider {
227
239
  private crawlPage;
228
240
  private getOrCreateDomainState;
229
241
  private recordTransportResult;
242
+ private persistTransportStat;
230
243
  private isRetryableStatus;
231
244
  private buildRequestHeaders;
232
245
  private shouldUseCurlForHost;