@crawlee/basic 4.0.0-beta.10 → 4.0.0-beta.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,4 +1,3 @@
1
1
  export * from '@crawlee/core';
2
2
  export * from './internals/basic-crawler.js';
3
- export * from './internals/constants.js';
4
3
  //# sourceMappingURL=index.d.ts.map
package/index.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC;AAC7C,cAAc,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC"}
package/index.js CHANGED
@@ -1,4 +1,3 @@
1
1
  export * from '@crawlee/core';
2
2
  export * from './internals/basic-crawler.js';
3
- export * from './internals/constants.js';
4
3
  //# sourceMappingURL=index.js.map
package/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC;AAC7C,cAAc,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC"}
@@ -1,38 +1,14 @@
1
- import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, AutoscaledPoolOptions, BaseHttpClient, CrawlingContext, DatasetExportOptions, EnqueueLinksOptions, EventManager, FinalStatistics, GetUserDataFromRequest, IRequestList, LoadedContext, ProxyInfo, Request, RequestOptions, RestrictedCrawlingContext, RouterHandler, RouterRoutes, Session, SessionPoolOptions, SkippedRequestCallback, Source, StatisticsOptions, StatisticState } from '@crawlee/core';
2
- import { AutoscaledPool, Configuration, Dataset, RequestProvider, SessionPool, Statistics } from '@crawlee/core';
3
- import type { Awaitable, BatchAddRequestsResult, Dictionary, SetStatusMessageOptions } from '@crawlee/types';
1
+ import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, AutoscaledPoolOptions, BaseHttpClient, CrawlingContext, DatasetExportOptions, EventManager, FinalStatistics, GetUserDataFromRequest, IRequestList, ProxyConfiguration, ProxyInfo, Request, RequestOptions, RouterHandler, RouterRoutes, Session, SessionPoolOptions, SkippedRequestCallback, Source, StatisticsOptions, StatisticState } from '@crawlee/core';
2
+ import { AutoscaledPool, Configuration, ContextPipeline, Dataset, RequestProvider, SessionPool, Statistics } from '@crawlee/core';
3
+ import type { Awaitable, Dictionary, SetStatusMessageOptions } from '@crawlee/types';
4
4
  import { RobotsTxtFile } from '@crawlee/utils';
5
- import type { SetRequired } from 'type-fest';
5
+ import type { ReadonlyDeep } from 'type-fest';
6
6
  import type { Log } from '@apify/log';
7
7
  import { TimeoutError } from '@apify/timeout';
8
- export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> extends CrawlingContext<BasicCrawler, UserData> {
9
- /**
10
- * This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
11
- * currently used by the crawler.
12
- *
13
- * Optionally, the function allows you to filter the target links' URLs using an array of globs or regular expressions
14
- * and override settings of the enqueued {@link Request} objects.
15
- *
16
- * Check out the [Crawl a website with relative links](https://crawlee.dev/js/docs/examples/crawl-relative-links) example
17
- * for more details regarding its usage.
18
- *
19
- * **Example usage**
20
- *
21
- * ```ts
22
- * async requestHandler({ enqueueLinks }) {
23
- * await enqueueLinks({
24
- * urls: [...],
25
- * });
26
- * },
27
- * ```
28
- *
29
- * @param [options] All `enqueueLinks()` parameters are passed via an options object.
30
- * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
31
- */
32
- enqueueLinks(options?: SetRequired<EnqueueLinksOptions, 'urls'>): Promise<BatchAddRequestsResult>;
8
+ export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> extends CrawlingContext<UserData> {
33
9
  }
34
- export type RequestHandler<Context extends CrawlingContext = LoadedContext<BasicCrawlingContext & RestrictedCrawlingContext>> = (inputs: LoadedContext<Context>) => Awaitable<void>;
35
- export type ErrorHandler<Context extends CrawlingContext = LoadedContext<BasicCrawlingContext & RestrictedCrawlingContext>> = (inputs: LoadedContext<Context>, error: Error) => Awaitable<void>;
10
+ export type RequestHandler<Context extends CrawlingContext = CrawlingContext> = (inputs: Context) => Awaitable<void>;
11
+ export type ErrorHandler<Context extends CrawlingContext = CrawlingContext, ExtendedContext extends Context = Context> = (inputs: Context & Partial<ExtendedContext>, error: Error) => Awaitable<void>;
36
12
  export interface StatusMessageCallbackParams<Context extends CrawlingContext = BasicCrawlingContext, Crawler extends BasicCrawler<any> = BasicCrawler<Context>> {
37
13
  state: StatisticState;
38
14
  crawler: Crawler;
@@ -40,7 +16,10 @@ export interface StatusMessageCallbackParams<Context extends CrawlingContext = B
40
16
  message: string;
41
17
  }
42
18
  export type StatusMessageCallback<Context extends CrawlingContext = BasicCrawlingContext, Crawler extends BasicCrawler<any> = BasicCrawler<Context>> = (params: StatusMessageCallbackParams<Context, Crawler>) => Awaitable<void>;
43
- export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCrawlingContext> {
19
+ export type RequireContextPipeline<DefaultContextType extends CrawlingContext, FinalContextType extends DefaultContextType> = DefaultContextType extends FinalContextType ? {} : {
20
+ contextPipelineBuilder: () => ContextPipeline<CrawlingContext, FinalContextType>;
21
+ };
22
+ export interface BasicCrawlerOptions<Context extends CrawlingContext = CrawlingContext, ContextExtension = {}, ExtendedContext extends Context = Context & ContextExtension> {
44
23
  /**
45
24
  * User-provided function that performs the logic of the crawler. It is called for each URL to crawl.
46
25
  *
@@ -58,7 +37,35 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
58
37
  * The exceptions are logged to the request using the
59
38
  * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
60
39
  */
61
- requestHandler?: RequestHandler<Context>;
40
+ requestHandler?: RequestHandler<ExtendedContext>;
41
+ /**
42
+ * Allows the user to extend the crawling context passed to the request handler with custom functionality.
43
+ *
44
+ * **Example usage:**
45
+ *
46
+ * ```javascript
47
+ * import { BasicCrawler } from 'crawlee';
48
+ *
49
+ * // Create a crawler instance
50
+ * const crawler = new BasicCrawler({
51
+ * extendContext(context) => ({
52
+ * async customHelper() {
53
+ * await context.pushData({ url: context.request.url })
54
+ * }
55
+ * }),
56
+ * async requestHandler(context) {
57
+ * await context.customHelper();
58
+ * },
59
+ * });
60
+ * ```
61
+ */
62
+ extendContext?: (context: Context) => Awaitable<ContextExtension>;
63
+ /**
64
+ * *Intended for BasicCrawler subclasses*. Prepares a context pipeline that transforms the initial crawling context into the shape given by the `Context` type parameter.
65
+ *
66
+ * The option is not required if your crawler subclass does not extend the crawling context with custom information or helpers.
67
+ */
68
+ contextPipelineBuilder?: () => ContextPipeline<CrawlingContext, Context>;
62
69
  /**
63
70
  * Static list of URLs to be processed.
64
71
  * If not provided, the crawler will open the default request queue when the {@link BasicCrawler.addRequests|`crawler.addRequests()`} function is called.
@@ -87,7 +94,7 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
87
94
  * Second argument is the `Error` instance that
88
95
  * represents the last error thrown during processing of the request.
89
96
  */
90
- errorHandler?: ErrorHandler<Context>;
97
+ errorHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
91
98
  /**
92
99
  * A function to handle requests that failed more than {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
93
100
  *
@@ -96,7 +103,7 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
96
103
  * Second argument is the `Error` instance that
97
104
  * represents the last error thrown during processing of the request.
98
105
  */
99
- failedRequestHandler?: ErrorHandler<Context>;
106
+ failedRequestHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
100
107
  /**
101
108
  * Specifies the maximum number of retries allowed for a request if its processing fails.
102
109
  * This includes retries due to navigation errors or errors thrown from user-supplied functions
@@ -223,6 +230,11 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
223
230
  * Defaults to a new instance of {@link GotScrapingHttpClient}
224
231
  */
225
232
  httpClient?: BaseHttpClient;
233
+ /**
234
+ * If set, the crawler will be configured for all connections to use
235
+ * the Proxy URLs provided and rotated according to the configuration.
236
+ */
237
+ proxyConfiguration?: ProxyConfiguration;
226
238
  }
227
239
  /**
228
240
  * A set of options that you can toggle to enable experimental features in Crawlee.
@@ -303,7 +315,7 @@ export interface CrawlerExperiments {
303
315
  * ```
304
316
  * @category Crawlers
305
317
  */
306
- export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext> {
318
+ export declare class BasicCrawler<Context extends CrawlingContext = CrawlingContext, ContextExtension = {}, ExtendedContext extends Context = Context & ContextExtension> {
307
319
  readonly config: Configuration;
308
320
  protected static readonly CRAWLEE_STATE_KEY = "CRAWLEE_STATE";
309
321
  /**
@@ -334,17 +346,25 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
334
346
  * or to abort it by calling {@link AutoscaledPool.abort|`autoscaledPool.abort()`}.
335
347
  */
336
348
  autoscaledPool?: AutoscaledPool;
349
+ /**
350
+ * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
351
+ * Only available if used by the crawler.
352
+ */
353
+ proxyConfiguration?: ProxyConfiguration;
337
354
  /**
338
355
  * Default {@link Router} instance that will be used if we don't specify any {@link BasicCrawlerOptions.requestHandler|`requestHandler`}.
339
356
  * See {@link Router.addHandler|`router.addHandler()`} and {@link Router.addDefaultHandler|`router.addDefaultHandler()`}.
340
357
  */
341
- readonly router: RouterHandler<LoadedContext<Context>>;
358
+ readonly router: RouterHandler<Context>;
359
+ private contextPipelineBuilder;
360
+ private _contextPipeline?;
361
+ get contextPipeline(): ContextPipeline<CrawlingContext, ExtendedContext>;
342
362
  running: boolean;
343
363
  hasFinishedBefore: boolean;
344
364
  readonly log: Log;
345
- protected requestHandler: RequestHandler<Context>;
346
- protected errorHandler?: ErrorHandler<Context>;
347
- protected failedRequestHandler?: ErrorHandler<Context>;
365
+ protected requestHandler: RequestHandler<ExtendedContext>;
366
+ protected errorHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
367
+ protected failedRequestHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
348
368
  protected requestHandlerTimeoutMillis: number;
349
369
  protected internalTimeoutMillis: number;
350
370
  protected maxRequestRetries: number;
@@ -356,7 +376,6 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
356
376
  protected statusMessageCallback?: StatusMessageCallback;
357
377
  protected sessionPoolOptions: SessionPoolOptions;
358
378
  protected useSessionPool: boolean;
359
- protected crawlingContexts: Map<string, Context>;
360
379
  protected autoscaledPoolOptions: AutoscaledPoolOptions;
361
380
  protected events: EventManager;
362
381
  protected httpClient: BaseHttpClient;
@@ -368,6 +387,10 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
368
387
  private readonly robotsTxtFileCache;
369
388
  private _experimentWarnings;
370
389
  protected static optionsShape: {
390
+ // @ts-ignore optional peer dependency or compatibility with es2022
391
+ contextPipelineBuilder: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
392
+ // @ts-ignore optional peer dependency or compatibility with es2022
393
+ extendContext: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
371
394
  // @ts-ignore optional peer dependency or compatibility with es2022
372
395
  requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
373
396
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -394,6 +417,8 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
394
417
  sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
395
418
  // @ts-ignore optional peer dependency or compatibility with es2022
396
419
  useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
420
+ // @ts-ignore optional peer dependency or compatibility with es2022
421
+ proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
397
422
  // @ts-ignore optional peer dependency or compatibility with es2022
398
423
  statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
399
424
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -424,7 +449,8 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
424
449
  /**
425
450
  * All `BasicCrawler` parameters are passed via an options object.
426
451
  */
427
- constructor(options?: BasicCrawlerOptions<Context>, config?: Configuration);
452
+ constructor(options?: BasicCrawlerOptions<Context, ContextExtension, ExtendedContext> & RequireContextPipeline<CrawlingContext, Context>, // cast because the constructor logic handles missing `contextPipelineBuilder` - the type is just for DX
453
+ config?: Configuration);
428
454
  /**
429
455
  * Checks if the given error is a proxy error by comparing its message to a list of known proxy error messages.
430
456
  * Used for retrying requests that failed due to proxy errors.
@@ -432,12 +458,6 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
432
458
  * @param error The error to check.
433
459
  */
434
460
  protected isProxyError(error: Error): boolean;
435
- /**
436
- * Checks whether the given crawling context is getting blocked by anti-bot protection using several heuristics.
437
- * Returns `false` if the request is not blocked, otherwise returns a string with a description of the block reason.
438
- * @param _crawlingContext The crawling context to check.
439
- */
440
- protected isRequestBlocked(_crawlingContext: Context): Promise<string | false>;
441
461
  /**
442
462
  * This method is periodically called by the crawler, every `statusMessageLoggingInterval` seconds.
443
463
  */
@@ -473,7 +493,7 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
473
493
  * @param requests The requests to add
474
494
  * @param options Options for the request queue
475
495
  */
476
- addRequests(requests: (string | Source)[], options?: CrawlerAddRequestsOptions): Promise<CrawlerAddRequestsResult>;
496
+ addRequests(requests: ReadonlyDeep<(string | Source)[]>, options?: CrawlerAddRequestsOptions): Promise<CrawlerAddRequestsResult>;
477
497
  /**
478
498
  * Pushes data to the specified {@link Dataset}, or the default crawler {@link Dataset} by calling {@link Dataset.pushData}.
479
499
  */
@@ -492,7 +512,7 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
492
512
  */
493
513
  exportData<Data>(path: string, format?: 'json' | 'csv', options?: DatasetExportOptions): Promise<Data[]>;
494
514
  protected _init(): Promise<void>;
495
- protected _runRequestHandler(crawlingContext: Context): Promise<void>;
515
+ protected runRequestHandler(crawlingContext: CrawlingContext): Promise<void>;
496
516
  /**
497
517
  * Handles blocked request
498
518
  */
@@ -505,11 +525,6 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
505
525
  * and RequestQueue is present then enqueues it to the queue first.
506
526
  */
507
527
  protected _fetchNextRequest(): Promise<Request<Dictionary> | null | undefined>;
508
- /**
509
- * Executed when `errorHandler` finishes or the request is successful.
510
- * Can be used to clean up orphaned browser pages.
511
- */
512
- protected _cleanupContext(_crawlingContext: Context): Promise<void>;
513
528
  /**
514
529
  * Delays processing of the request based on the `sameDomainDelaySecs` option,
515
530
  * adding it back to the queue after the timeout passes. Returns `true` if the request
@@ -535,12 +550,17 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
535
550
  */
536
551
  protected _defaultIsFinishedFunction(): Promise<boolean>;
537
552
  private _rotateSession;
553
+ /**
554
+ * Unwraps errors thrown by the context pipeline to get the actual user error.
555
+ * RequestHandlerError and ContextPipelineInitializationError wrap the actual error.
556
+ */
557
+ private unwrapError;
538
558
  /**
539
559
  * Handles errors thrown by user provided requestHandler()
540
560
  */
541
- protected _requestFunctionErrorHandler(error: Error, crawlingContext: Context, source: IRequestList | RequestProvider): Promise<void>;
561
+ protected _requestFunctionErrorHandler(error: Error, crawlingContext: CrawlingContext, source: IRequestList | RequestProvider): Promise<void>;
542
562
  protected _tagUserHandlerError<T>(cb: () => unknown): Promise<T>;
543
- protected _handleFailedRequestHandler(crawlingContext: Context, error: Error): Promise<void>;
563
+ protected _handleFailedRequestHandler(crawlingContext: CrawlingContext, error: Error): Promise<void>;
544
564
  /**
545
565
  * Resolves the most verbose error message from a thrown error
546
566
  * @param error The error received
@@ -1 +1 @@
1
- {"version":3,"file":"basic-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/basic-crawler.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,qBAAqB,EACrB,cAAc,EACd,eAAe,EACf,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,eAAe,EACf,sBAAsB,EACtB,YAAY,EACZ,aAAa,EACb,SAAS,EACT,OAAO,EACP,cAAc,EACd,yBAAyB,EACzB,aAAa,EACb,YAAY,EACZ,OAAO,EACP,kBAAkB,EAClB,sBAAsB,EACtB,MAAM,EACN,iBAAiB,EACjB,cAAc,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EACH,cAAc,EACd,aAAa,EAEb,OAAO,EASP,eAAe,EAOf,WAAW,EACX,UAAU,EAEb,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,SAAS,EAAE,sBAAsB,EAAE,UAAU,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAC7G,OAAO,EAAE,aAAa,EAAuB,MAAM,gBAAgB,CAAC;AAKpE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAG7C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,EAAuB,YAAY,EAAa,MAAM,gBAAgB,CAAC;AAK9E,MAAM,WAAW,oBAAoB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAC1E,SAAQ,eAAe,CAAC,YAAY,EAAE,QAAQ,CAAC;IAC/C;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,YAAY,CAAC,OAAO,CAAC,EAAE,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACrG;AAaD,MAAM,MAAM,cAAc,CACtB,OAAO,SAAS,eAAe,GAAG,aAAa,CAAC,oBAAoB,GAAG,yBAAyB,CAAC,IACjG,CAAC,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAExD,MAAM,MAAM,YAAY,CACpB,OAAO,SAAS,eAAe,GAAG,aAAa,CAAC,oBAAoB,GAAG,yBAAyB,CAAC,IACjG,CAAC,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,KAAK,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAEtE,MAAM,WAAW,2BAA2B,CACxC,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC;IAEzD,KAAK,EAAE,cAAc,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,cAAc,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,qBAAqB,CAC7B,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC,IACzD,CAAC,MAAM,EAAE,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/E,MAAM,WAAW,mBAAmB,CAAC,OAAO,SAAS,eAAe,GAAG,oBAAoB;IACvF;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;IAEzC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;;OAKG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IAErC;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IAE7C;;;;;;;;OAQG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;OAEG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;OAEG;IACH,4BAA4B,CAAC,EAAE,MAAM,CAAC;IAEtC;;;;;;;;;;;;;;;OAeG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAE1C,gBAAgB;IAChB,GAAG,CAAC,EAAE,GAAG,CAAC;IAEV;;;OAGG;IACH,WAAW,CAAC,EAAE,kBAAkB,CAAC;IAEjC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IAEtC;;;OAGG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B;AAED;;;;;GAKG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AACH,qBAAa,YAAY,CAAC,OAAO,SAAS,eAAe,GAAG,oBAAoB;IAqHxE,QAAQ,CAAC,MAAM;IApHnB,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAiB,mBAAmB;IAE9D;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAE3B;;;OAGG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;OAIG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;IAEhC;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAA2C;IAEjG,OAAO,UAAS;IAChB,iBAAiB,UAAS;IAE1B,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC;IAClB,SAAS,CAAC,cAAc,EAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IACnD,SAAS,CAAC,YAAY,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IAC/C,SAAS,CAAC,oBAAoB,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IACvD,SAAS,CAAC,2BAA2B,EAAG,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACtC,SAAS,CAAC,oBAAoB,EAAE,MAAM,CAAC;IACvC,SAAS,CAAC,4BAA4B,EAAE,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IACxD,SAAS,CAAC,kBAAkB,EAAE,kBAAkB,CAAC;IACjD,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,gBAAgB,uBAA8B;IACxD,SAAS,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;IACvD,SAAS,CAAC,MAAM,EAAE,YAAY,CAAC;IAC/B,SAAS,CAAC,UAAU,EAAE,cAAc,CAAC;IACrC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IACpD,OAAO,CAAC,YAAY,CAAC,CAAU;IAE/B,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAA0B;IAC7D,OAAO,CAAC,mBAAmB,CAA0D;IAErF,SAAS,CAAC,MAAM,CAAC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;MAqC3B;IAEF;;OAEG;gBAEC,OAAO,GAAE,mBAAmB,CAAC,OAAO,CAAM,EACjC,MAAM,gBAAkC;IA4KrD;;;;;OAKG;IACH,SAAS,CAAC,YAAY,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO;IAI7C;;;;OAIG;cACa,gBAAgB,CAAC,gBAAgB,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;IAIpF;;OAEG;IACG,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,uBAA4B;IAmB7E,OAAO,CAAC,iBAAiB;IAgDzB;;;;;;;;;OASG;IACG,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,GAAG,OAAO,GAAG,cAAc,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA8GlH;;;;OAIG;IACH,IAAI,CAAC,OAAO,SAA6C,GAAG,IAAI;IAY1D,eAAe;IAYf,QAAQ,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,GAAS,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAKjG;;;;;;;;;;OAUG;IACG,WAAW,CACb,QAAQ,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,EAC7B,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAsCpC;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjG;;OAEG;IACG,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAIrD;;OAEG;IACG,OAAO,CAAC,GAAG,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAKtF;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;cAoC9F,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;cAoBtB,kBAAkB,CAAC,eAAe,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAI3E;;OAEG;IACH,SAAS,CAAC,sBAAsB,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM;YAQvD,6BAA6B;cAS3B,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;cAuBvE,iBAAiB;IAuCjC;;;OAGG;cACa,iBAAiB;IAyBjC;;;OAGG;cACa,eAAe,CAAC,gBAAgB,EAAE,OAAO;IAEzD;;;;OAIG;IACH,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,GAAG,eAAe;IAsC/E;;;OAGG;cACa,gBAAgB;IA0JhC;;;OAGG;cACa,gBAAgB,CAC5B,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,EAC/B,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,KAAK,GAAG,MAAM,EACrB,UAAU,SAAI,EACd,OAAO,SAAI,GACZ,OAAO,CAAC,IAAI,CAAC;IAehB;;OAEG;cACa,oBAAoB;IASpC;;OAEG;cACa,0BAA0B;YAS5B,cAAc;IAQ5B;;OAEG;cACa,4BAA4B,CACxC,KAAK,EAAE,KAAK,EACZ,eAAe,EAAE,OAAO,EACxB,MAAM,EAAE,YAAY,GAAG,eAAe,GACvC,OAAO,CAAC,IAAI,CAAC;cAyDA,oBAAoB,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC;cAStD,2BAA2B,CAAC,eAAe,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;IAYlG;;;;OAIG;IACH,SAAS,CAAC,oBAAoB,CAAC,KAAK,EAAE,KAAK,EAAE,UAAU,UAAQ;IAmB/D,SAAS,CAAC,oBAAoB,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK;IAoB7D;;;;;;;OAOG;cACa,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC;cAQzC,aAAa,CAAC,QAAQ,SAAS,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,EAC9E,KAAK,EAAE,QAAQ,EAAE,EACjB,GAAG,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;IASjC;;;OAGG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAc/B,SAAS,CAAC,2BAA2B,CAAC,OAAO,EAAE,OAAO;YAWxC,gBAAgB;IAc9B,SAAS,CAAC,6BAA6B,CAAC,OAAO,EAAE,OAAO;CA6C3D;AAED,MAAM,WAAW,oBAAoB;IACjC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,SAAS,CAAC;CACzB;AAED,MAAM,WAAW,yBAA0B,SAAQ,yBAAyB;CAAG;AAE/E,MAAM,WAAW,wBAAyB,SAAQ,wBAAwB;CAAG;AAE7E,MAAM,WAAW,iBAAkB,SAAQ,yBAAyB;IAChE;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,iBAAiB,CAC7B,OAAO,SAAS,oBAAoB,GAAG,oBAAoB,EAC3D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,0BAEzC"}
1
+ {"version":3,"file":"basic-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/basic-crawler.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,qBAAqB,EACrB,cAAc,EACd,eAAe,EACf,oBAAoB,EACpB,YAAY,EACZ,eAAe,EACf,sBAAsB,EACtB,YAAY,EACZ,kBAAkB,EAClB,SAAS,EACT,OAAO,EACP,cAAc,EACd,aAAa,EACb,YAAY,EACZ,OAAO,EACP,kBAAkB,EAClB,sBAAsB,EACtB,MAAM,EACN,iBAAiB,EACjB,cAAc,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EACH,cAAc,EACd,aAAa,EACb,eAAe,EAKf,OAAO,EAUP,eAAe,EAOf,WAAW,EACX,UAAU,EAEb,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AACrF,OAAO,EAAE,aAAa,EAAuB,MAAM,gBAAgB,CAAC;AAKpE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAG9C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,EAAuB,YAAY,EAAa,MAAM,gBAAgB,CAAC;AAK9E,MAAM,WAAW,oBAAoB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SAAQ,eAAe,CAAC,QAAQ,CAAC;CAAG;AAapH,MAAM,MAAM,cAAc,CAAC,OAAO,SAAS,eAAe,GAAG,eAAe,IAAI,CAAC,MAAM,EAAE,OAAO,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAErH,MAAM,MAAM,YAAY,CACpB,OAAO,SAAS,eAAe,GAAG,eAAe,EACjD,eAAe,SAAS,OAAO,GAAG,OAAO,IACzC,CAAC,MAAM,EAAE,OAAO,GAAG,OAAO,CAAC,eAAe,CAAC,EAAE,KAAK,EAAE,KAAK,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAElF,MAAM,WAAW,2BAA2B,CACxC,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC;IAEzD,KAAK,EAAE,cAAc,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,cAAc,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,qBAAqB,CAC7B,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC,IACzD,CAAC,MAAM,EAAE,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/E,MAAM,MAAM,sBAAsB,CAC9B,kBAAkB,SAAS,eAAe,EAC1C,gBAAgB,SAAS,kBAAkB,IAC3C,kBAAkB,SAAS,gBAAgB,GACzC,EAAE,GACF;IAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC,eAAe,EAAE,gBAAgB,CAAC,CAAA;CAAE,CAAC;AAE3F,MAAM,WAAW,mBAAmB,CAChC,OAAO,SAAS,eAAe,GAAG,eAAe,EACjD,gBAAgB,GAAG,EAAE,EACrB,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB;IAE5D;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC,eAAe,CAAC,CAAC;IAEjD;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,aAAa,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,SAAS,CAAC,gBAAgB,CAAC,CAAC;IAElE;;;;OAIG;IACH,sBAAsB,CAAC,EAAE,MAAM,eAAe,CAAC,eAAe,EAAE,OAAO,CAAC,CAAC;IAEzE;;;;;OAKG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;;OAKG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IAE9D;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IAEtE;;;;;;;;OAQG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;OAEG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;OAEG;IACH,4BAA4B,CAAC,EAAE,MAAM,CAAC;IAEtC;;;;;;;;;;;;;;;OAeG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAE1C,gBAAgB;IAChB,GAAG,CAAC,EAAE,GAAG,CAAC;IAEV;;;OAGG;IACH,WAAW,CAAC,EAAE,kBAAkB,CAAC;IAEjC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IAEtC;;;OAGG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;IAE5B;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;CAC3C;AAED;;;;;GAKG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AACH,qBAAa,YAAY,CACrB,OAAO,SAAS,eAAe,GAAG,eAAe,EACjD,gBAAgB,GAAG,EAAE,EACrB,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB;IA2IxD,QAAQ,CAAC,MAAM;IAzInB,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAiB,mBAAmB;IAE9D;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAE3B;;;OAGG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;OAIG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;IAEhC;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,CAA4B;IAEnE,OAAO,CAAC,sBAAsB,CAA0D;IACxF,OAAO,CAAC,gBAAgB,CAAC,CAAoD;IAE7E,IAAI,eAAe,IAAI,eAAe,CAAC,eAAe,EAAE,eAAe,CAAC,CAMvE;IAED,OAAO,UAAS;IAChB,iBAAiB,UAAS;IAE1B,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC;IAClB,SAAS,CAAC,cAAc,EAAG,cAAc,CAAC,eAAe,CAAC,CAAC;IAC3D,SAAS,CAAC,YAAY,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IACxE,SAAS,CAAC,oBAAoB,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IAChF,SAAS,CAAC,2BAA2B,EAAG,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACtC,SAAS,CAAC,oBAAoB,EAAE,MAAM,CAAC;IACvC,SAAS,CAAC,4BAA4B,EAAE,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IACxD,SAAS,CAAC,kBAAkB,EAAE,kBAAkB,CAAC;IACjD,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;IACvD,SAAS,CAAC,MAAM,EAAE,YAAY,CAAC;IAC/B,SAAS,CAAC,UAAU,EAAE,cAAc,CAAC;IACrC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IACpD,OAAO,CAAC,YAAY,CAAC,CAAU;IAE/B,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAA0B;IAC7D,OAAO,CAAC,mBAAmB,CAA0D;IAErF,SAAS,CAAC,MAAM,CAAC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAyC3B;IAEF;;OAEG;gBAEC,OAAO,GAAE,mBAAmB,CAAC,OAAO,EAAE,gBAAgB,EAAE,eAAe,CAAC,GACpE,sBAAsB,CAAC,eAAe,EAAE,OAAO,CAAa,EAAE,wGAAwG;IACjK,MAAM,gBAAkC;IA6MrD;;;;;OAKG;IACH,SAAS,CAAC,YAAY,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO;IAI7C;;OAEG;IACG,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,uBAA4B;IAmB7E,OAAO,CAAC,iBAAiB;IAgDzB;;;;;;;;;OASG;IACG,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,GAAG,OAAO,GAAG,cAAc,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA8GlH;;;;OAIG;IACH,IAAI,CAAC,OAAO,SAA6C,GAAG,IAAI;IAY1D,eAAe;IAYf,QAAQ,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,GAAS,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAKjG;;;;;;;;;;OAUG;IACG,WAAW,CACb,QAAQ,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC3C,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAsCpC;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjG;;OAEG;IACG,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAIrD;;OAEG;IACG,OAAO,CAAC,GAAG,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAKtF;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;cAoC9F,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;cAoBtB,iBAAiB,CAAC,eAAe,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAUlF;;OAEG;IACH,SAAS,CAAC,sBAAsB,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM;YAQvD,6BAA6B;cAS3B,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;cAuBvE,iBAAiB;IAuCjC;;;OAGG;cACa,iBAAiB;IAyBjC;;;;OAIG;IACH,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,GAAG,eAAe;IAsC/E;;;OAGG;cACa,gBAAgB;IA+JhC;;;OAGG;cACa,gBAAgB,CAC5B,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,EAC/B,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,KAAK,GAAG,MAAM,EACrB,UAAU,SAAI,EACd,OAAO,SAAI,GACZ,OAAO,CAAC,IAAI,CAAC;IAehB;;OAEG;cACa,oBAAoB;IASpC;;OAEG;cACa,0BAA0B;YAS5B,cAAc;IAQ5B;;;OAGG;IACH,OAAO,CAAC,WAAW;IAWnB;;OAEG;cACa,4BAA4B,CACxC,KAAK,EAAE,KAAK,EACZ,eAAe,EAAE,eAAe,EAChC,MAAM,EAAE,YAAY,GAAG,eAAe,GACvC,OAAO,CAAC,IAAI,CAAC;cA4DA,oBAAoB,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC;cAStD,2BAA2B,CAAC,eAAe,EAAE,eAAe,EAAE,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;IAe1G;;;;OAIG;IACH,SAAS,CAAC,oBAAoB,CAAC,KAAK,EAAE,KAAK,EAAE,UAAU,UAAQ;IAmB/D,SAAS,CAAC,oBAAoB,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK;IAoB7D;;;;;;;OAOG;cACa,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC;cAQzC,aAAa,CAAC,QAAQ,SAAS,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,EAC9E,KAAK,EAAE,QAAQ,EAAE,EACjB,GAAG,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;IASjC;;;OAGG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAY/B,SAAS,CAAC,2BAA2B,CAAC,OAAO,EAAE,OAAO;YAWxC,gBAAgB;IAc9B,SAAS,CAAC,6BAA6B,CAAC,OAAO,EAAE,OAAO;CA6C3D;AAED,MAAM,WAAW,oBAAoB;IACjC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,SAAS,CAAC;CACzB;AAED,MAAM,WAAW,yBAA0B,SAAQ,yBAAyB;CAAG;AAE/E,MAAM,WAAW,wBAAyB,SAAQ,wBAAwB;CAAG;AAE7E,MAAM,WAAW,iBAAkB,SAAQ,yBAAyB;IAChE;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,iBAAiB,CAC7B,OAAO,SAAS,oBAAoB,GAAG,oBAAoB,EAC3D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,0BAEzC"}
@@ -1,6 +1,6 @@
1
1
  import { writeFile } from 'node:fs/promises';
2
2
  import { dirname } from 'node:path';
3
- import { AutoscaledPool, Configuration, CriticalError, Dataset, enqueueLinks, EnqueueStrategy, GotScrapingHttpClient, KeyValueStore, mergeCookies, NonRetryableError, purgeDefaultStorages, RequestProvider, RequestQueue, RequestQueueV1, RequestState, RetryRequestError, Router, SessionError, SessionPool, Statistics, validators, } from '@crawlee/core';
3
+ import { AutoscaledPool, Configuration, ContextPipeline, ContextPipelineCleanupError, ContextPipelineInitializationError, ContextPipelineInterruptedError, CriticalError, Dataset, enqueueLinks, EnqueueStrategy, GotScrapingHttpClient, KeyValueStore, mergeCookies, NonRetryableError, purgeDefaultStorages, RequestHandlerError, RequestProvider, RequestQueue, RequestQueueV1, RequestState, RetryRequestError, Router, SessionError, SessionPool, Statistics, validators, } from '@crawlee/core';
4
4
  import { RobotsTxtFile, ROTATE_PROXY_ERRORS } from '@crawlee/utils';
5
5
  import { stringify } from 'csv-stringify/sync';
6
6
  import { ensureDir, writeJSON } from 'fs-extra/esm';
@@ -116,11 +116,24 @@ export class BasicCrawler {
116
116
  * or to abort it by calling {@link AutoscaledPool.abort|`autoscaledPool.abort()`}.
117
117
  */
118
118
  autoscaledPool;
119
+ /**
120
+ * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
121
+ * Only available if used by the crawler.
122
+ */
123
+ proxyConfiguration;
119
124
  /**
120
125
  * Default {@link Router} instance that will be used if we don't specify any {@link BasicCrawlerOptions.requestHandler|`requestHandler`}.
121
126
  * See {@link Router.addHandler|`router.addHandler()`} and {@link Router.addDefaultHandler|`router.addDefaultHandler()`}.
122
127
  */
123
128
  router = Router.create();
129
+ contextPipelineBuilder;
130
+ _contextPipeline;
131
+ get contextPipeline() {
132
+ if (this._contextPipeline === undefined) {
133
+ this._contextPipeline = this.contextPipelineBuilder();
134
+ }
135
+ return this._contextPipeline;
136
+ }
124
137
  running = false;
125
138
  hasFinishedBefore = false;
126
139
  log;
@@ -138,7 +151,6 @@ export class BasicCrawler {
138
151
  statusMessageCallback;
139
152
  sessionPoolOptions;
140
153
  useSessionPool;
141
- crawlingContexts = new Map();
142
154
  autoscaledPoolOptions;
143
155
  events;
144
156
  httpClient;
@@ -150,6 +162,8 @@ export class BasicCrawler {
150
162
  robotsTxtFileCache;
151
163
  _experimentWarnings = {};
152
164
  static optionsShape = {
165
+ contextPipelineBuilder: ow.optional.object,
166
+ extendContext: ow.optional.function,
153
167
  requestList: ow.optional.object.validate(validators.requestList),
154
168
  requestQueue: ow.optional.object.validate(validators.requestQueue),
155
169
  // Subclasses override this function instead of passing it
@@ -166,6 +180,7 @@ export class BasicCrawler {
166
180
  autoscaledPoolOptions: ow.optional.object,
167
181
  sessionPoolOptions: ow.optional.object,
168
182
  useSessionPool: ow.optional.boolean,
183
+ proxyConfiguration: ow.optional.object.validate(validators.proxyConfiguration),
169
184
  statusMessageLoggingInterval: ow.optional.number,
170
185
  statusMessageCallback: ow.optional.function,
171
186
  retryOnBlocked: ow.optional.boolean,
@@ -185,17 +200,45 @@ export class BasicCrawler {
185
200
  /**
186
201
  * All `BasicCrawler` parameters are passed via an options object.
187
202
  */
188
- constructor(options = {}, config = Configuration.getGlobalConfig()) {
203
+ constructor(options = {}, // cast because the constructor logic handles missing `contextPipelineBuilder` - the type is just for DX
204
+ config = Configuration.getGlobalConfig()) {
189
205
  this.config = config;
190
206
  ow(options, 'BasicCrawlerOptions', ow.object.exactShape(BasicCrawler.optionsShape));
191
- const { requestList, requestQueue, maxRequestRetries = 3, sameDomainDelaySecs = 0, maxSessionRotations = 10, maxRequestsPerCrawl, autoscaledPoolOptions = {}, keepAlive, sessionPoolOptions = {}, useSessionPool = true,
207
+ const { requestList, requestQueue, maxRequestRetries = 3, sameDomainDelaySecs = 0, maxSessionRotations = 10, maxRequestsPerCrawl, autoscaledPoolOptions = {}, keepAlive, sessionPoolOptions = {}, useSessionPool = true, proxyConfiguration,
192
208
  // AutoscaledPool shorthands
193
209
  minConcurrency, maxConcurrency, maxRequestsPerMinute, retryOnBlocked = false, respectRobotsTxtFile = false, onSkippedRequest, requestHandler, requestHandlerTimeoutSecs, errorHandler, failedRequestHandler, statusMessageLoggingInterval = 10, statusMessageCallback, statisticsOptions, httpClient,
194
210
  // internal
195
211
  log = defaultLog.child({ prefix: this.constructor.name }), experiments = {}, } = options;
212
+ // Store the builder so that it can be run when the contextPipeline is needed.
213
+ // Invoking it immediately would cause problems with parent constructor call order.
214
+ this.contextPipelineBuilder = () => {
215
+ let contextPipeline = (options.contextPipelineBuilder?.() ??
216
+ ContextPipeline.create()); // Thanks to the RequireContextPipeline, contextPipeline will only be undefined if InitialContextType is CrawlingContext
217
+ if (options.extendContext !== undefined) {
218
+ contextPipeline = contextPipeline.compose({
219
+ action: async (context) => await options.extendContext(context),
220
+ });
221
+ }
222
+ contextPipeline = contextPipeline.compose({
223
+ action: async (context) => {
224
+ const { request } = context;
225
+ if (!this.requestMatchesEnqueueStrategy(request)) {
226
+ // eslint-disable-next-line dot-notation
227
+ const message = `Skipping request ${request.id} (starting url: ${request.url} -> loaded url: ${request.loadedUrl}) because it does not match the enqueue strategy (${request['enqueueStrategy']}).`;
228
+ this.log.debug(message);
229
+ request.noRetry = true;
230
+ request.state = RequestState.SKIPPED;
231
+ throw new ContextPipelineInterruptedError(message);
232
+ }
233
+ return context;
234
+ },
235
+ });
236
+ return contextPipeline;
237
+ };
196
238
  this.requestList = requestList;
197
239
  this.requestQueue = requestQueue;
198
240
  this.httpClient = httpClient ?? new GotScrapingHttpClient();
241
+ this.proxyConfiguration = proxyConfiguration;
199
242
  this.log = log;
200
243
  this.statusMessageLoggingInterval = statusMessageLoggingInterval;
201
244
  this.statusMessageCallback = statusMessageCallback;
@@ -247,7 +290,6 @@ export class BasicCrawler {
247
290
  }
248
291
  }
249
292
  this.useSessionPool = useSessionPool;
250
- this.crawlingContexts = new Map();
251
293
  const maxSignedInteger = 2 ** 31 - 1;
252
294
  if (this.requestHandlerTimeoutMillis > maxSignedInteger) {
253
295
  log.warning(`requestHandlerTimeoutMillis ${this.requestHandlerTimeoutMillis}` +
@@ -310,14 +352,6 @@ export class BasicCrawler {
310
352
  isProxyError(error) {
311
353
  return ROTATE_PROXY_ERRORS.some((x) => this._getMessageFromError(error)?.includes(x));
312
354
  }
313
- /**
314
- * Checks whether the given crawling context is getting blocked by anti-bot protection using several heuristics.
315
- * Returns `false` if the request is not blocked, otherwise returns a string with a description of the block reason.
316
- * @param _crawlingContext The crawling context to check.
317
- */
318
- async isRequestBlocked(_crawlingContext) {
319
- throw new Error('the "isRequestBlocked" method is not implemented in this crawler.');
320
- }
321
355
  /**
322
356
  * This method is periodically called by the crawler, every `statusMessageLoggingInterval` seconds.
323
357
  */
@@ -590,8 +624,10 @@ export class BasicCrawler {
590
624
  }
591
625
  await this._loadHandledRequestCount();
592
626
  }
593
- async _runRequestHandler(crawlingContext) {
594
- await this.requestHandler(crawlingContext);
627
+ async runRequestHandler(crawlingContext) {
628
+ await this.contextPipeline.call(crawlingContext, async (finalContext) => {
629
+ await addTimeoutToPromise(async () => this.requestHandler(finalContext), this.requestHandlerTimeoutMillis, `requestHandler timed out after ${this.requestHandlerTimeoutMillis / 1000} seconds (${finalContext.request.id}).`);
630
+ });
595
631
  }
596
632
  /**
597
633
  * Handles blocked request
@@ -686,11 +722,6 @@ export class BasicCrawler {
686
722
  await this.requestList.markRequestHandled(request);
687
723
  return this.requestQueue.fetchNextRequest();
688
724
  }
689
- /**
690
- * Executed when `errorHandler` finishes or the request is successful.
691
- * Can be used to clean up orphaned browser pages.
692
- */
693
- async _cleanupContext(_crawlingContext) { }
694
725
  /**
695
726
  * Delays processing of the request based on the `sameDomainDelaySecs` option,
696
727
  * adding it back to the queue after the timeout passes. Returns `true` if the request
@@ -737,7 +768,12 @@ export class BasicCrawler {
737
768
  tryCancel();
738
769
  if (this.useSessionPool) {
739
770
  await this._timeoutAndRetry(async () => {
740
- session = await this.sessionPool.getSession();
771
+ session = await this.sessionPool.newSession({
772
+ proxyInfo: await this.proxyConfiguration?.newProxyInfo({
773
+ request: request ?? undefined,
774
+ }),
775
+ maxUsageCount: 1,
776
+ });
741
777
  }, this.internalTimeoutMillis, `Fetching session timed out after ${this.internalTimeoutMillis / 1e3} seconds.`);
742
778
  }
743
779
  tryCancel();
@@ -759,18 +795,15 @@ export class BasicCrawler {
759
795
  request.loadedUrl = undefined;
760
796
  const statisticsId = request.id || request.uniqueKey;
761
797
  this.stats.startJob(statisticsId);
762
- // Shared crawling context
763
- // @ts-expect-error
764
- // All missing properties (that extend CrawlingContext) are set dynamically,
765
- // but TS does not know that, so otherwise it would throw when compiling.
798
+ const deferredCleanup = [];
766
799
  const crawlingContext = {
767
800
  id: cryptoRandomObjectId(10),
768
- crawler: this,
769
801
  log: this.log,
770
802
  request,
771
803
  session,
804
+ proxyInfo: session?.proxyInfo,
772
805
  enqueueLinks: async (options) => {
773
- return enqueueLinks({
806
+ return await enqueueLinks({
774
807
  // specify the RQ first to allow overriding it
775
808
  requestQueue: await this.getRequestQueue(),
776
809
  robotsTxtFile: await this.getRobotsTxtFileForUrl(request.url),
@@ -778,17 +811,21 @@ export class BasicCrawler {
778
811
  ...options,
779
812
  });
780
813
  },
781
- addRequests: this.addRequests.bind(this),
814
+ addRequests: async (requests, options) => {
815
+ await this.addRequests(requests, options);
816
+ },
782
817
  pushData: this.pushData.bind(this),
783
818
  useState: this.useState.bind(this),
784
- sendRequest: createSendRequest(this.httpClient, request, session, () => crawlingContext.proxyInfo?.url),
819
+ sendRequest: createSendRequest(this.httpClient, request, session),
785
820
  getKeyValueStore: async (idOrName) => KeyValueStore.open(idOrName, { config: this.config }),
821
+ registerDeferredCleanup: (cleanup) => {
822
+ deferredCleanup.push(cleanup);
823
+ },
786
824
  };
787
- this.crawlingContexts.set(crawlingContext.id, crawlingContext);
788
825
  let isRequestLocked = true;
789
826
  try {
790
827
  request.state = RequestState.REQUEST_HANDLER;
791
- await addTimeoutToPromise(async () => this._runRequestHandler(crawlingContext), this.requestHandlerTimeoutMillis, `requestHandler timed out after ${this.requestHandlerTimeoutMillis / 1000} seconds (${request.id}).`);
828
+ await this.runRequestHandler(crawlingContext);
792
829
  await this._timeoutAndRetry(async () => source.markRequestHandled(request), this.internalTimeoutMillis, `Marking request ${request.url} (${request.id}) as handled timed out after ${this.internalTimeoutMillis / 1e3} seconds.`);
793
830
  isRequestLocked = false; // markRequestHandled succeeded and unlocked the request
794
831
  this.stats.finishJob(statisticsId, request.retryCount);
@@ -797,7 +834,8 @@ export class BasicCrawler {
797
834
  request.state = RequestState.DONE;
798
835
  crawlingContext.session?.markGood();
799
836
  }
800
- catch (err) {
837
+ catch (rawError) {
838
+ const err = this.unwrapError(rawError);
801
839
  try {
802
840
  request.state = RequestState.ERROR_HANDLER;
803
841
  await addTimeoutToPromise(async () => this._requestFunctionErrorHandler(err, crawlingContext, source), this.internalTimeoutMillis, `Handling request failure of ${request.url} (${request.id}) timed out after ${this.internalTimeoutMillis / 1e3} seconds.`);
@@ -807,24 +845,24 @@ export class BasicCrawler {
807
845
  request.state = RequestState.DONE;
808
846
  }
809
847
  catch (secondaryError) {
810
- if (!secondaryError.triggeredFromUserHandler &&
848
+ const unwrappedSecondaryError = this.unwrapError(secondaryError);
849
+ if (!unwrappedSecondaryError.triggeredFromUserHandler &&
811
850
  // avoid reprinting the same critical error multiple times, as it will be printed by Nodejs at the end anyway
812
- !(secondaryError instanceof CriticalError)) {
851
+ !(unwrappedSecondaryError instanceof CriticalError)) {
813
852
  const apifySpecific = process.env.APIFY_IS_AT_HOME
814
853
  ? `This may have happened due to an internal error of Apify's API or due to a misconfigured crawler.`
815
854
  : '';
816
- this.log.exception(secondaryError, 'An exception occurred during handling of failed request. ' +
855
+ this.log.exception(unwrappedSecondaryError, 'An exception occurred during handling of failed request. ' +
817
856
  `This places the crawler and its underlying storages into an unknown state and crawling will be terminated. ${apifySpecific}`);
818
857
  }
819
858
  request.state = RequestState.ERROR;
820
- throw secondaryError;
859
+ throw unwrappedSecondaryError;
821
860
  }
822
861
  // decrease the session score if the request fails (but the error handler did not throw)
823
862
  crawlingContext.session?.markBad();
824
863
  }
825
864
  finally {
826
- await this._cleanupContext(crawlingContext);
827
- this.crawlingContexts.delete(crawlingContext.id);
865
+ await Promise.all(deferredCleanup.map((cleanup) => cleanup()));
828
866
  // Safety net - release the lock if nobody managed to do it before
829
867
  if (isRequestLocked && source instanceof RequestProvider) {
830
868
  try {
@@ -883,6 +921,18 @@ export class BasicCrawler {
883
921
  request.sessionRotationCount++;
884
922
  crawlingContext.session?.retire();
885
923
  }
924
+ /**
925
+ * Unwraps errors thrown by the context pipeline to get the actual user error.
926
+ * RequestHandlerError and ContextPipelineInitializationError wrap the actual error.
927
+ */
928
+ unwrapError(error) {
929
+ if (error instanceof RequestHandlerError ||
930
+ error instanceof ContextPipelineInitializationError ||
931
+ error instanceof ContextPipelineCleanupError) {
932
+ return this.unwrapError(error.cause);
933
+ }
934
+ return error;
935
+ }
886
936
  /**
887
937
  * Handles errors thrown by user provided requestHandler()
888
938
  */
@@ -895,7 +945,8 @@ export class BasicCrawler {
895
945
  const shouldRetryRequest = this._canRequestBeRetried(request, error);
896
946
  if (shouldRetryRequest) {
897
947
  await this.stats.errorTrackerRetry.addAsync(error, crawlingContext);
898
- await this.errorHandler?.(crawlingContext, error);
948
+ await this.errorHandler?.(crawlingContext, // valid cast - ExtendedContext transitively extends CrawlingContext
949
+ error);
899
950
  if (error instanceof SessionError) {
900
951
  await this._rotateSession(crawlingContext);
901
952
  }
@@ -947,7 +998,8 @@ export class BasicCrawler {
947
998
  const message = this._getMessageFromError(error, true);
948
999
  this.log.error(`Request failed and reached maximum retries. ${message}`, { id, url, method, uniqueKey });
949
1000
  if (this.failedRequestHandler) {
950
- await this.failedRequestHandler?.(crawlingContext, error);
1001
+ await this.failedRequestHandler?.(crawlingContext, // valid cast - ExtendedContext transitively extends CrawlingContext
1002
+ error);
951
1003
  }
952
1004
  }
953
1005
  /**
@@ -1013,9 +1065,7 @@ export class BasicCrawler {
1013
1065
  */
1014
1066
  async teardown() {
1015
1067
  this.events.emit("persistState" /* EventType.PERSIST_STATE */, { isMigrating: false });
1016
- if (this.useSessionPool) {
1017
- await this.sessionPool.teardown();
1018
- }
1068
+ await this.sessionPool?.teardown();
1019
1069
  if (this._closeEvents) {
1020
1070
  await this.events.close();
1021
1071
  }