@crawlee/basic 4.0.0-beta.10 → 4.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,4 +1,3 @@
1
1
  export * from '@crawlee/core';
2
2
  export * from './internals/basic-crawler.js';
3
- export * from './internals/constants.js';
4
3
  //# sourceMappingURL=index.d.ts.map
package/index.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC;AAC7C,cAAc,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC"}
package/index.js CHANGED
@@ -1,4 +1,3 @@
1
1
  export * from '@crawlee/core';
2
2
  export * from './internals/basic-crawler.js';
3
- export * from './internals/constants.js';
4
3
  //# sourceMappingURL=index.js.map
package/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC;AAC7C,cAAc,0BAA0B,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,8BAA8B,CAAC"}
@@ -1,38 +1,14 @@
1
- import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, AutoscaledPoolOptions, BaseHttpClient, CrawlingContext, DatasetExportOptions, EnqueueLinksOptions, EventManager, FinalStatistics, GetUserDataFromRequest, IRequestList, LoadedContext, ProxyInfo, Request, RequestOptions, RestrictedCrawlingContext, RouterHandler, RouterRoutes, Session, SessionPoolOptions, SkippedRequestCallback, Source, StatisticsOptions, StatisticState } from '@crawlee/core';
2
- import { AutoscaledPool, Configuration, Dataset, RequestProvider, SessionPool, Statistics } from '@crawlee/core';
3
- import type { Awaitable, BatchAddRequestsResult, Dictionary, SetStatusMessageOptions } from '@crawlee/types';
1
+ import type { AddRequestsBatchedOptions, AddRequestsBatchedResult, AutoscaledPoolOptions, BaseHttpClient, CrawlingContext, DatasetExportOptions, EventManager, FinalStatistics, GetUserDataFromRequest, IRequestList, ProxyInfo, Request, RequestOptions, RouterHandler, RouterRoutes, Session, SessionPoolOptions, SkippedRequestCallback, Source, StatisticsOptions, StatisticState } from '@crawlee/core';
2
+ import { AutoscaledPool, Configuration, ContextPipeline, Dataset, RequestProvider, SessionPool, Statistics } from '@crawlee/core';
3
+ import type { Awaitable, Dictionary, SetStatusMessageOptions } from '@crawlee/types';
4
4
  import { RobotsTxtFile } from '@crawlee/utils';
5
- import type { SetRequired } from 'type-fest';
5
+ import type { ReadonlyDeep } from 'type-fest';
6
6
  import type { Log } from '@apify/log';
7
7
  import { TimeoutError } from '@apify/timeout';
8
- export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> extends CrawlingContext<BasicCrawler, UserData> {
9
- /**
10
- * This function automatically finds and enqueues links from the current page, adding them to the {@link RequestQueue}
11
- * currently used by the crawler.
12
- *
13
- * Optionally, the function allows you to filter the target links' URLs using an array of globs or regular expressions
14
- * and override settings of the enqueued {@link Request} objects.
15
- *
16
- * Check out the [Crawl a website with relative links](https://crawlee.dev/js/docs/examples/crawl-relative-links) example
17
- * for more details regarding its usage.
18
- *
19
- * **Example usage**
20
- *
21
- * ```ts
22
- * async requestHandler({ enqueueLinks }) {
23
- * await enqueueLinks({
24
- * urls: [...],
25
- * });
26
- * },
27
- * ```
28
- *
29
- * @param [options] All `enqueueLinks()` parameters are passed via an options object.
30
- * @returns Promise that resolves to {@link BatchAddRequestsResult} object.
31
- */
32
- enqueueLinks(options?: SetRequired<EnqueueLinksOptions, 'urls'>): Promise<BatchAddRequestsResult>;
8
+ export interface BasicCrawlingContext<UserData extends Dictionary = Dictionary> extends CrawlingContext<UserData> {
33
9
  }
34
- export type RequestHandler<Context extends CrawlingContext = LoadedContext<BasicCrawlingContext & RestrictedCrawlingContext>> = (inputs: LoadedContext<Context>) => Awaitable<void>;
35
- export type ErrorHandler<Context extends CrawlingContext = LoadedContext<BasicCrawlingContext & RestrictedCrawlingContext>> = (inputs: LoadedContext<Context>, error: Error) => Awaitable<void>;
10
+ export type RequestHandler<Context extends CrawlingContext = CrawlingContext> = (inputs: Context) => Awaitable<void>;
11
+ export type ErrorHandler<Context extends CrawlingContext = CrawlingContext, ExtendedContext extends Context = Context> = (inputs: Context & Partial<ExtendedContext>, error: Error) => Awaitable<void>;
36
12
  export interface StatusMessageCallbackParams<Context extends CrawlingContext = BasicCrawlingContext, Crawler extends BasicCrawler<any> = BasicCrawler<Context>> {
37
13
  state: StatisticState;
38
14
  crawler: Crawler;
@@ -40,7 +16,10 @@ export interface StatusMessageCallbackParams<Context extends CrawlingContext = B
40
16
  message: string;
41
17
  }
42
18
  export type StatusMessageCallback<Context extends CrawlingContext = BasicCrawlingContext, Crawler extends BasicCrawler<any> = BasicCrawler<Context>> = (params: StatusMessageCallbackParams<Context, Crawler>) => Awaitable<void>;
43
- export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCrawlingContext> {
19
+ export type RequireContextPipeline<DefaultContextType extends CrawlingContext, FinalContextType extends DefaultContextType> = DefaultContextType extends FinalContextType ? {} : {
20
+ contextPipelineBuilder: () => ContextPipeline<CrawlingContext, FinalContextType>;
21
+ };
22
+ export interface BasicCrawlerOptions<Context extends CrawlingContext = CrawlingContext, ContextExtension = {}, ExtendedContext extends Context = Context & ContextExtension> {
44
23
  /**
45
24
  * User-provided function that performs the logic of the crawler. It is called for each URL to crawl.
46
25
  *
@@ -58,7 +37,35 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
58
37
  * The exceptions are logged to the request using the
59
38
  * {@link Request.pushErrorMessage|`Request.pushErrorMessage()`} function.
60
39
  */
61
- requestHandler?: RequestHandler<Context>;
40
+ requestHandler?: RequestHandler<ExtendedContext>;
41
+ /**
42
+ * Allows the user to extend the crawling context passed to the request handler with custom functionality.
43
+ *
44
+ * **Example usage:**
45
+ *
46
+ * ```javascript
47
+ * import { BasicCrawler } from 'crawlee';
48
+ *
49
+ * // Create a crawler instance
50
+ * const crawler = new BasicCrawler({
51
+ * extendContext(context) => ({
52
+ * async customHelper() {
53
+ * await context.pushData({ url: context.request.url })
54
+ * }
55
+ * }),
56
+ * async requestHandler(context) {
57
+ * await context.customHelper();
58
+ * },
59
+ * });
60
+ * ```
61
+ */
62
+ extendContext?: (context: Context) => Awaitable<ContextExtension>;
63
+ /**
64
+ * *Intended for BasicCrawler subclasses*. Prepares a context pipeline that transforms the initial crawling context into the shape given by the `Context` type parameter.
65
+ *
66
+ * The option is not required if your crawler subclass does not extend the crawling context with custom information or helpers.
67
+ */
68
+ contextPipelineBuilder?: () => ContextPipeline<CrawlingContext, Context>;
62
69
  /**
63
70
  * Static list of URLs to be processed.
64
71
  * If not provided, the crawler will open the default request queue when the {@link BasicCrawler.addRequests|`crawler.addRequests()`} function is called.
@@ -87,7 +94,7 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
87
94
  * Second argument is the `Error` instance that
88
95
  * represents the last error thrown during processing of the request.
89
96
  */
90
- errorHandler?: ErrorHandler<Context>;
97
+ errorHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
91
98
  /**
92
99
  * A function to handle requests that failed more than {@link BasicCrawlerOptions.maxRequestRetries|`maxRequestRetries`} times.
93
100
  *
@@ -96,7 +103,7 @@ export interface BasicCrawlerOptions<Context extends CrawlingContext = BasicCraw
96
103
  * Second argument is the `Error` instance that
97
104
  * represents the last error thrown during processing of the request.
98
105
  */
99
- failedRequestHandler?: ErrorHandler<Context>;
106
+ failedRequestHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
100
107
  /**
101
108
  * Specifies the maximum number of retries allowed for a request if its processing fails.
102
109
  * This includes retries due to navigation errors or errors thrown from user-supplied functions
@@ -303,7 +310,7 @@ export interface CrawlerExperiments {
303
310
  * ```
304
311
  * @category Crawlers
305
312
  */
306
- export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlingContext> {
313
+ export declare class BasicCrawler<Context extends CrawlingContext = CrawlingContext, ContextExtension = {}, ExtendedContext extends Context = Context & ContextExtension> {
307
314
  readonly config: Configuration;
308
315
  protected static readonly CRAWLEE_STATE_KEY = "CRAWLEE_STATE";
309
316
  /**
@@ -338,13 +345,16 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
338
345
  * Default {@link Router} instance that will be used if we don't specify any {@link BasicCrawlerOptions.requestHandler|`requestHandler`}.
339
346
  * See {@link Router.addHandler|`router.addHandler()`} and {@link Router.addDefaultHandler|`router.addDefaultHandler()`}.
340
347
  */
341
- readonly router: RouterHandler<LoadedContext<Context>>;
348
+ readonly router: RouterHandler<Context>;
349
+ private contextPipelineBuilder;
350
+ private _contextPipeline?;
351
+ get contextPipeline(): ContextPipeline<CrawlingContext, ExtendedContext>;
342
352
  running: boolean;
343
353
  hasFinishedBefore: boolean;
344
354
  readonly log: Log;
345
- protected requestHandler: RequestHandler<Context>;
346
- protected errorHandler?: ErrorHandler<Context>;
347
- protected failedRequestHandler?: ErrorHandler<Context>;
355
+ protected requestHandler: RequestHandler<ExtendedContext>;
356
+ protected errorHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
357
+ protected failedRequestHandler?: ErrorHandler<CrawlingContext, ExtendedContext>;
348
358
  protected requestHandlerTimeoutMillis: number;
349
359
  protected internalTimeoutMillis: number;
350
360
  protected maxRequestRetries: number;
@@ -356,7 +366,6 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
356
366
  protected statusMessageCallback?: StatusMessageCallback;
357
367
  protected sessionPoolOptions: SessionPoolOptions;
358
368
  protected useSessionPool: boolean;
359
- protected crawlingContexts: Map<string, Context>;
360
369
  protected autoscaledPoolOptions: AutoscaledPoolOptions;
361
370
  protected events: EventManager;
362
371
  protected httpClient: BaseHttpClient;
@@ -368,6 +377,10 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
368
377
  private readonly robotsTxtFileCache;
369
378
  private _experimentWarnings;
370
379
  protected static optionsShape: {
380
+ // @ts-ignore optional peer dependency or compatibility with es2022
381
+ contextPipelineBuilder: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
382
+ // @ts-ignore optional peer dependency or compatibility with es2022
383
+ extendContext: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
371
384
  // @ts-ignore optional peer dependency or compatibility with es2022
372
385
  requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
373
386
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -424,7 +437,8 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
424
437
  /**
425
438
  * All `BasicCrawler` parameters are passed via an options object.
426
439
  */
427
- constructor(options?: BasicCrawlerOptions<Context>, config?: Configuration);
440
+ constructor(options?: BasicCrawlerOptions<Context, ContextExtension, ExtendedContext> & RequireContextPipeline<CrawlingContext, Context>, // cast because the constructor logic handles missing `contextPipelineBuilder` - the type is just for DX
441
+ config?: Configuration);
428
442
  /**
429
443
  * Checks if the given error is a proxy error by comparing its message to a list of known proxy error messages.
430
444
  * Used for retrying requests that failed due to proxy errors.
@@ -432,12 +446,6 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
432
446
  * @param error The error to check.
433
447
  */
434
448
  protected isProxyError(error: Error): boolean;
435
- /**
436
- * Checks whether the given crawling context is getting blocked by anti-bot protection using several heuristics.
437
- * Returns `false` if the request is not blocked, otherwise returns a string with a description of the block reason.
438
- * @param _crawlingContext The crawling context to check.
439
- */
440
- protected isRequestBlocked(_crawlingContext: Context): Promise<string | false>;
441
449
  /**
442
450
  * This method is periodically called by the crawler, every `statusMessageLoggingInterval` seconds.
443
451
  */
@@ -473,7 +481,7 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
473
481
  * @param requests The requests to add
474
482
  * @param options Options for the request queue
475
483
  */
476
- addRequests(requests: (string | Source)[], options?: CrawlerAddRequestsOptions): Promise<CrawlerAddRequestsResult>;
484
+ addRequests(requests: ReadonlyDeep<(string | Source)[]>, options?: CrawlerAddRequestsOptions): Promise<CrawlerAddRequestsResult>;
477
485
  /**
478
486
  * Pushes data to the specified {@link Dataset}, or the default crawler {@link Dataset} by calling {@link Dataset.pushData}.
479
487
  */
@@ -492,7 +500,7 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
492
500
  */
493
501
  exportData<Data>(path: string, format?: 'json' | 'csv', options?: DatasetExportOptions): Promise<Data[]>;
494
502
  protected _init(): Promise<void>;
495
- protected _runRequestHandler(crawlingContext: Context): Promise<void>;
503
+ protected runRequestHandler(crawlingContext: CrawlingContext): Promise<void>;
496
504
  /**
497
505
  * Handles blocked request
498
506
  */
@@ -505,11 +513,6 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
505
513
  * and RequestQueue is present then enqueues it to the queue first.
506
514
  */
507
515
  protected _fetchNextRequest(): Promise<Request<Dictionary> | null | undefined>;
508
- /**
509
- * Executed when `errorHandler` finishes or the request is successful.
510
- * Can be used to clean up orphaned browser pages.
511
- */
512
- protected _cleanupContext(_crawlingContext: Context): Promise<void>;
513
516
  /**
514
517
  * Delays processing of the request based on the `sameDomainDelaySecs` option,
515
518
  * adding it back to the queue after the timeout passes. Returns `true` if the request
@@ -535,12 +538,17 @@ export declare class BasicCrawler<Context extends CrawlingContext = BasicCrawlin
535
538
  */
536
539
  protected _defaultIsFinishedFunction(): Promise<boolean>;
537
540
  private _rotateSession;
541
+ /**
542
+ * Unwraps errors thrown by the context pipeline to get the actual user error.
543
+ * RequestHandlerError and ContextPipelineInitializationError wrap the actual error.
544
+ */
545
+ private unwrapError;
538
546
  /**
539
547
  * Handles errors thrown by user provided requestHandler()
540
548
  */
541
- protected _requestFunctionErrorHandler(error: Error, crawlingContext: Context, source: IRequestList | RequestProvider): Promise<void>;
549
+ protected _requestFunctionErrorHandler(error: Error, crawlingContext: CrawlingContext, source: IRequestList | RequestProvider): Promise<void>;
542
550
  protected _tagUserHandlerError<T>(cb: () => unknown): Promise<T>;
543
- protected _handleFailedRequestHandler(crawlingContext: Context, error: Error): Promise<void>;
551
+ protected _handleFailedRequestHandler(crawlingContext: CrawlingContext, error: Error): Promise<void>;
544
552
  /**
545
553
  * Resolves the most verbose error message from a thrown error
546
554
  * @param error The error received
@@ -1 +1 @@
1
- {"version":3,"file":"basic-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/basic-crawler.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,qBAAqB,EACrB,cAAc,EACd,eAAe,EACf,oBAAoB,EACpB,mBAAmB,EACnB,YAAY,EACZ,eAAe,EACf,sBAAsB,EACtB,YAAY,EACZ,aAAa,EACb,SAAS,EACT,OAAO,EACP,cAAc,EACd,yBAAyB,EACzB,aAAa,EACb,YAAY,EACZ,OAAO,EACP,kBAAkB,EAClB,sBAAsB,EACtB,MAAM,EACN,iBAAiB,EACjB,cAAc,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EACH,cAAc,EACd,aAAa,EAEb,OAAO,EASP,eAAe,EAOf,WAAW,EACX,UAAU,EAEb,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,SAAS,EAAE,sBAAsB,EAAE,UAAU,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AAC7G,OAAO,EAAE,aAAa,EAAuB,MAAM,gBAAgB,CAAC;AAKpE,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAG7C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,EAAuB,YAAY,EAAa,MAAM,gBAAgB,CAAC;AAK9E,MAAM,WAAW,oBAAoB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAC1E,SAAQ,eAAe,CAAC,YAAY,EAAE,QAAQ,CAAC;IAC/C;;;;;;;;;;;;;;;;;;;;;;OAsBG;IACH,YAAY,CAAC,OAAO,CAAC,EAAE,WAAW,CAAC,mBAAmB,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;CACrG;AAaD,MAAM,MAAM,cAAc,CACtB,OAAO,SAAS,eAAe,GAAG,aAAa,CAAC,oBAAoB,GAAG,yBAAyB,CAAC,IACjG,CAAC,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAExD,MAAM,MAAM,YAAY,CACpB,OAAO,SAAS,eAAe,GAAG,aAAa,CAAC,oBAAoB,GAAG,yBAAyB,CAAC,IACjG,CAAC,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,EAAE,KAAK,EAAE,KAAK,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAEtE,MAAM,WAAW,2BAA2B,CACxC,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC;IAEzD,KAAK,EAAE,cAAc,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,cAAc,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,qBAAqB,CAC7B,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC,IACzD,CAAC,MAAM,EAAE,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/E,MAAM,WAAW,mBAAmB,CAAC,OAAO,SAAS,eAAe,GAAG,oBAAoB;IACvF;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;IAEzC;;;;;OAKG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;;OAKG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IAErC;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IAE7C;;;;;;;;OAQG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;OAEG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;OAEG;IACH,4BAA4B,CAAC,EAAE,MAAM,CAAC;IAEtC;;;;;;;;;;;;;;;OAeG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAE1C,gBAAgB;IAChB,GAAG,CAAC,EAAE,GAAG,CAAC;IAEV;;;OAGG;IACH,WAAW,CAAC,EAAE,kBAAkB,CAAC;IAEjC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IAEtC;;;OAGG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B;AAED;;;;;GAKG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AACH,qBAAa,YAAY,CAAC,OAAO,SAAS,eAAe,GAAG,oBAAoB;IAqHxE,QAAQ,CAAC,MAAM;IApHnB,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAiB,mBAAmB;IAE9D;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAE3B;;;OAGG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;OAIG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;IAEhC;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAA2C;IAEjG,OAAO,UAAS;IAChB,iBAAiB,UAAS;IAE1B,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC;IAClB,SAAS,CAAC,cAAc,EAAG,cAAc,CAAC,OAAO,CAAC,CAAC;IACnD,SAAS,CAAC,YAAY,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IAC/C,SAAS,CAAC,oBAAoB,CAAC,EAAE,YAAY,CAAC,OAAO,CAAC,CAAC;IACvD,SAAS,CAAC,2BAA2B,EAAG,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACtC,SAAS,CAAC,oBAAoB,EAAE,MAAM,CAAC;IACvC,SAAS,CAAC,4BAA4B,EAAE,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IACxD,SAAS,CAAC,kBAAkB,EAAE,kBAAkB,CAAC;IACjD,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,gBAAgB,uBAA8B;IACxD,SAAS,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;IACvD,SAAS,CAAC,MAAM,EAAE,YAAY,CAAC;IAC/B,SAAS,CAAC,UAAU,EAAE,cAAc,CAAC;IACrC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IACpD,OAAO,CAAC,YAAY,CAAC,CAAU;IAE/B,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAA0B;IAC7D,OAAO,CAAC,mBAAmB,CAA0D;IAErF,SAAS,CAAC,MAAM,CAAC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;MAqC3B;IAEF;;OAEG;gBAEC,OAAO,GAAE,mBAAmB,CAAC,OAAO,CAAM,EACjC,MAAM,gBAAkC;IA4KrD;;;;;OAKG;IACH,SAAS,CAAC,YAAY,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO;IAI7C;;;;OAIG;cACa,gBAAgB,CAAC,gBAAgB,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;IAIpF;;OAEG;IACG,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,uBAA4B;IAmB7E,OAAO,CAAC,iBAAiB;IAgDzB;;;;;;;;;OASG;IACG,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,GAAG,OAAO,GAAG,cAAc,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA8GlH;;;;OAIG;IACH,IAAI,CAAC,OAAO,SAA6C,GAAG,IAAI;IAY1D,eAAe;IAYf,QAAQ,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,GAAS,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAKjG;;;;;;;;;;OAUG;IACG,WAAW,CACb,QAAQ,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,EAC7B,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAsCpC;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjG;;OAEG;IACG,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAIrD;;OAEG;IACG,OAAO,CAAC,GAAG,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAKtF;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;cAoC9F,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;cAoBtB,kBAAkB,CAAC,eAAe,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAI3E;;OAEG;IACH,SAAS,CAAC,sBAAsB,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM;YAQvD,6BAA6B;cAS3B,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;cAuBvE,iBAAiB;IAuCjC;;;OAGG;cACa,iBAAiB;IAyBjC;;;OAGG;cACa,eAAe,CAAC,gBAAgB,EAAE,OAAO;IAEzD;;;;OAIG;IACH,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,GAAG,eAAe;IAsC/E;;;OAGG;cACa,gBAAgB;IA0JhC;;;OAGG;cACa,gBAAgB,CAC5B,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,EAC/B,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,KAAK,GAAG,MAAM,EACrB,UAAU,SAAI,EACd,OAAO,SAAI,GACZ,OAAO,CAAC,IAAI,CAAC;IAehB;;OAEG;cACa,oBAAoB;IASpC;;OAEG;cACa,0BAA0B;YAS5B,cAAc;IAQ5B;;OAEG;cACa,4BAA4B,CACxC,KAAK,EAAE,KAAK,EACZ,eAAe,EAAE,OAAO,EACxB,MAAM,EAAE,YAAY,GAAG,eAAe,GACvC,OAAO,CAAC,IAAI,CAAC;cAyDA,oBAAoB,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC;cAStD,2BAA2B,CAAC,eAAe,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;IAYlG;;;;OAIG;IACH,SAAS,CAAC,oBAAoB,CAAC,KAAK,EAAE,KAAK,EAAE,UAAU,UAAQ;IAmB/D,SAAS,CAAC,oBAAoB,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK;IAoB7D;;;;;;;OAOG;cACa,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC;cAQzC,aAAa,CAAC,QAAQ,SAAS,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,EAC9E,KAAK,EAAE,QAAQ,EAAE,EACjB,GAAG,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;IASjC;;;OAGG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAc/B,SAAS,CAAC,2BAA2B,CAAC,OAAO,EAAE,OAAO;YAWxC,gBAAgB;IAc9B,SAAS,CAAC,6BAA6B,CAAC,OAAO,EAAE,OAAO;CA6C3D;AAED,MAAM,WAAW,oBAAoB;IACjC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,SAAS,CAAC;CACzB;AAED,MAAM,WAAW,yBAA0B,SAAQ,yBAAyB;CAAG;AAE/E,MAAM,WAAW,wBAAyB,SAAQ,wBAAwB;CAAG;AAE7E,MAAM,WAAW,iBAAkB,SAAQ,yBAAyB;IAChE;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,iBAAiB,CAC7B,OAAO,SAAS,oBAAoB,GAAG,oBAAoB,EAC3D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,0BAEzC"}
1
+ {"version":3,"file":"basic-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/basic-crawler.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACR,yBAAyB,EACzB,wBAAwB,EACxB,qBAAqB,EACrB,cAAc,EACd,eAAe,EACf,oBAAoB,EACpB,YAAY,EACZ,eAAe,EACf,sBAAsB,EACtB,YAAY,EACZ,SAAS,EACT,OAAO,EACP,cAAc,EACd,aAAa,EACb,YAAY,EACZ,OAAO,EACP,kBAAkB,EAClB,sBAAsB,EACtB,MAAM,EACN,iBAAiB,EACjB,cAAc,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EACH,cAAc,EACd,aAAa,EACb,eAAe,EAKf,OAAO,EAUP,eAAe,EAOf,WAAW,EACX,UAAU,EAEb,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAC;AACrF,OAAO,EAAE,aAAa,EAAuB,MAAM,gBAAgB,CAAC;AAKpE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAG9C,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,YAAY,CAAC;AAEtC,OAAO,EAAuB,YAAY,EAAa,MAAM,gBAAgB,CAAC;AAK9E,MAAM,WAAW,oBAAoB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAAE,SAAQ,eAAe,CAAC,QAAQ,CAAC;CAAG;AAapH,MAAM,MAAM,cAAc,CAAC,OAAO,SAAS,eAAe,GAAG,eAAe,IAAI,CAAC,MAAM,EAAE,OAAO,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAErH,MAAM,MAAM,YAAY,CACpB,OAAO,SAAS,eAAe,GAAG,eAAe,EACjD,eAAe,SAAS,OAAO,GAAG,OAAO,IACzC,CAAC,MAAM,EAAE,OAAO,GAAG,OAAO,CAAC,eAAe,CAAC,EAAE,KAAK,EAAE,KAAK,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAElF,MAAM,WAAW,2BAA2B,CACxC,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC;IAEzD,KAAK,EAAE,cAAc,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,cAAc,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,MAAM,qBAAqB,CAC7B,OAAO,SAAS,eAAe,GAAG,oBAAoB,EACtD,OAAO,SAAS,YAAY,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,OAAO,CAAC,IACzD,CAAC,MAAM,EAAE,2BAA2B,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/E,MAAM,MAAM,sBAAsB,CAC9B,kBAAkB,SAAS,eAAe,EAC1C,gBAAgB,SAAS,kBAAkB,IAC3C,kBAAkB,SAAS,gBAAgB,GACzC,EAAE,GACF;IAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC,eAAe,EAAE,gBAAgB,CAAC,CAAA;CAAE,CAAC;AAE3F,MAAM,WAAW,mBAAmB,CAChC,OAAO,SAAS,eAAe,GAAG,eAAe,EACjD,gBAAgB,GAAG,EAAE,EACrB,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB;IAE5D;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC,eAAe,CAAC,CAAC;IAEjD;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,aAAa,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,SAAS,CAAC,gBAAgB,CAAC,CAAC;IAElE;;;;OAIG;IACH,sBAAsB,CAAC,EAAE,MAAM,eAAe,CAAC,eAAe,EAAE,OAAO,CAAC,CAAC;IAEzE;;;;;OAKG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;;OAKG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IAE9D;;;;;;;OAOG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IAEtE;;;;;;;;OAQG;IACH,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;OAIG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAE7B;;;;;;OAMG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;OAGG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAExB;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAE9B;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IAEpB;;;OAGG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;OAEG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;OAEG;IACH,4BAA4B,CAAC,EAAE,MAAM,CAAC;IAEtC;;;;;;;;;;;;;;;OAeG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAE9C;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;IAEzB;;;OAGG;IACH,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAE/B;;;OAGG;IACH,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IAE1C,gBAAgB;IAChB,GAAG,CAAC,EAAE,GAAG,CAAC;IAEV;;;OAGG;IACH,WAAW,CAAC,EAAE,kBAAkB,CAAC;IAEjC;;;OAGG;IACH,iBAAiB,CAAC,EAAE,iBAAiB,CAAC;IAEtC;;;OAGG;IACH,UAAU,CAAC,EAAE,cAAc,CAAC;CAC/B;AAED;;;;;GAKG;AACH,MAAM,WAAW,kBAAkB;IAC/B;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+DG;AACH,qBAAa,YAAY,CACrB,OAAO,SAAS,eAAe,GAAG,eAAe,EACjD,gBAAgB,GAAG,EAAE,EACrB,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB;IAoIxD,QAAQ,CAAC,MAAM;IAlInB,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,iBAAiB,mBAAmB;IAE9D;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE,UAAU,CAAC;IAE3B;;;OAGG;IACH,WAAW,CAAC,EAAE,YAAY,CAAC;IAE3B;;;;OAIG;IACH,YAAY,CAAC,EAAE,eAAe,CAAC;IAE/B;;;OAGG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,cAAc,CAAC;IAEhC;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,CAA4B;IAEnE,OAAO,CAAC,sBAAsB,CAA0D;IACxF,OAAO,CAAC,gBAAgB,CAAC,CAAoD;IAE7E,IAAI,eAAe,IAAI,eAAe,CAAC,eAAe,EAAE,eAAe,CAAC,CAMvE;IAED,OAAO,UAAS;IAChB,iBAAiB,UAAS;IAE1B,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC;IAClB,SAAS,CAAC,cAAc,EAAG,cAAc,CAAC,eAAe,CAAC,CAAC;IAC3D,SAAS,CAAC,YAAY,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IACxE,SAAS,CAAC,oBAAoB,CAAC,EAAE,YAAY,CAAC,eAAe,EAAE,eAAe,CAAC,CAAC;IAChF,SAAS,CAAC,2BAA2B,EAAG,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,iBAAiB,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,qBAAqB,EAAE,MAAM,CAAC;IACxC,SAAS,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACtC,SAAS,CAAC,oBAAoB,EAAE,MAAM,CAAC;IACvC,SAAS,CAAC,4BAA4B,EAAE,MAAM,CAAC;IAC/C,SAAS,CAAC,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IACxD,SAAS,CAAC,kBAAkB,EAAE,kBAAkB,CAAC;IACjD,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;IACvD,SAAS,CAAC,MAAM,EAAE,YAAY,CAAC;IAC/B,SAAS,CAAC,UAAU,EAAE,cAAc,CAAC;IACrC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC;IAClC,SAAS,CAAC,oBAAoB,EAAE,OAAO,CAAC;IACxC,SAAS,CAAC,gBAAgB,CAAC,EAAE,sBAAsB,CAAC;IACpD,OAAO,CAAC,YAAY,CAAC,CAAU;IAE/B,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAA0B;IAC7D,OAAO,CAAC,mBAAmB,CAA0D;IAErF,SAAS,CAAC,MAAM,CAAC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAwC3B;IAEF;;OAEG;gBAEC,OAAO,GAAE,mBAAmB,CAAC,OAAO,EAAE,gBAAgB,EAAE,eAAe,CAAC,GACpE,sBAAsB,CAAC,eAAe,EAAE,OAAO,CAAa,EAAE,wGAAwG;IACjK,MAAM,gBAAkC;IA2MrD;;;;;OAKG;IACH,SAAS,CAAC,YAAY,CAAC,KAAK,EAAE,KAAK,GAAG,OAAO;IAI7C;;OAEG;IACG,gBAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,uBAA4B;IAmB7E,OAAO,CAAC,iBAAiB;IAgDzB;;;;;;;;;OASG;IACG,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,MAAM,GAAG,OAAO,GAAG,cAAc,CAAC,EAAE,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA8GlH;;;;OAIG;IACH,IAAI,CAAC,OAAO,SAA6C,GAAG,IAAI;IAY1D,eAAe;IAYf,QAAQ,CAAC,KAAK,SAAS,UAAU,GAAG,UAAU,EAAE,YAAY,GAAS,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;IAKjG;;;;;;;;;;OAUG;IACG,WAAW,CACb,QAAQ,EAAE,YAAY,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,EAAE,CAAC,EAC3C,OAAO,GAAE,yBAA8B,GACxC,OAAO,CAAC,wBAAwB,CAAC;IAsCpC;;OAEG;IACG,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,eAAe,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKjG;;OAEG;IACG,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAIrD;;OAEG;IACG,OAAO,CAAC,GAAG,IAAI,EAAE,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,GAAG,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAKtF;;;OAGG;IACG,UAAU,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,KAAK,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;cAoC9F,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;cAoBtB,iBAAiB,CAAC,eAAe,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAUlF;;OAEG;IACH,SAAS,CAAC,sBAAsB,CAAC,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM;YAQvD,6BAA6B;cAS3B,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,GAAG,SAAS,CAAC;cAuBvE,iBAAiB;IAuCjC;;;OAGG;cACa,iBAAiB;IAyBjC;;;;OAIG;IACH,SAAS,CAAC,YAAY,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,GAAG,eAAe;IAsC/E;;;OAGG;cACa,gBAAgB;IAyJhC;;;OAGG;cACa,gBAAgB,CAC5B,OAAO,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,EAC/B,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,KAAK,GAAG,MAAM,EACrB,UAAU,SAAI,EACd,OAAO,SAAI,GACZ,OAAO,CAAC,IAAI,CAAC;IAehB;;OAEG;cACa,oBAAoB;IASpC;;OAEG;cACa,0BAA0B;YAS5B,cAAc;IAQ5B;;;OAGG;IACH,OAAO,CAAC,WAAW;IAWnB;;OAEG;cACa,4BAA4B,CACxC,KAAK,EAAE,KAAK,EACZ,eAAe,EAAE,eAAe,EAChC,MAAM,EAAE,YAAY,GAAG,eAAe,GACvC,OAAO,CAAC,IAAI,CAAC;cA4DA,oBAAoB,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC;cAStD,2BAA2B,CAAC,eAAe,EAAE,eAAe,EAAE,KAAK,EAAE,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;IAe1G;;;;OAIG;IACH,SAAS,CAAC,oBAAoB,CAAC,KAAK,EAAE,KAAK,EAAE,UAAU,UAAQ;IAmB/D,SAAS,CAAC,oBAAoB,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK;IAoB7D;;;;;;;OAOG;cACa,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC;cAQzC,aAAa,CAAC,QAAQ,SAAS,CAAC,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,SAAS,CAAC,IAAI,CAAC,EAC9E,KAAK,EAAE,QAAQ,EAAE,EACjB,GAAG,IAAI,EAAE,UAAU,CAAC,QAAQ,CAAC;IASjC;;;OAGG;IACG,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAY/B,SAAS,CAAC,2BAA2B,CAAC,OAAO,EAAE,OAAO;YAWxC,gBAAgB;IAc9B,SAAS,CAAC,6BAA6B,CAAC,OAAO,EAAE,OAAO;CA6C3D;AAED,MAAM,WAAW,oBAAoB;IACjC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,SAAS,CAAC,EAAE,SAAS,CAAC;CACzB;AAED,MAAM,WAAW,yBAA0B,SAAQ,yBAAyB;CAAG;AAE/E,MAAM,WAAW,wBAAyB,SAAQ,wBAAwB;CAAG;AAE7E,MAAM,WAAW,iBAAkB,SAAQ,yBAAyB;IAChE;;;;OAIG;IACH,iBAAiB,CAAC,EAAE,OAAO,CAAC;CAC/B;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,iBAAiB,CAC7B,OAAO,SAAS,oBAAoB,GAAG,oBAAoB,EAC3D,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,0BAEzC"}
@@ -1,6 +1,6 @@
1
1
  import { writeFile } from 'node:fs/promises';
2
2
  import { dirname } from 'node:path';
3
- import { AutoscaledPool, Configuration, CriticalError, Dataset, enqueueLinks, EnqueueStrategy, GotScrapingHttpClient, KeyValueStore, mergeCookies, NonRetryableError, purgeDefaultStorages, RequestProvider, RequestQueue, RequestQueueV1, RequestState, RetryRequestError, Router, SessionError, SessionPool, Statistics, validators, } from '@crawlee/core';
3
+ import { AutoscaledPool, Configuration, ContextPipeline, ContextPipelineCleanupError, ContextPipelineInitializationError, ContextPipelineInterruptedError, CriticalError, Dataset, enqueueLinks, EnqueueStrategy, GotScrapingHttpClient, KeyValueStore, mergeCookies, NonRetryableError, purgeDefaultStorages, RequestHandlerError, RequestProvider, RequestQueue, RequestQueueV1, RequestState, RetryRequestError, Router, SessionError, SessionPool, Statistics, validators, } from '@crawlee/core';
4
4
  import { RobotsTxtFile, ROTATE_PROXY_ERRORS } from '@crawlee/utils';
5
5
  import { stringify } from 'csv-stringify/sync';
6
6
  import { ensureDir, writeJSON } from 'fs-extra/esm';
@@ -121,6 +121,14 @@ export class BasicCrawler {
121
121
  * See {@link Router.addHandler|`router.addHandler()`} and {@link Router.addDefaultHandler|`router.addDefaultHandler()`}.
122
122
  */
123
123
  router = Router.create();
124
+ contextPipelineBuilder;
125
+ _contextPipeline;
126
+ get contextPipeline() {
127
+ if (this._contextPipeline === undefined) {
128
+ this._contextPipeline = this.contextPipelineBuilder();
129
+ }
130
+ return this._contextPipeline;
131
+ }
124
132
  running = false;
125
133
  hasFinishedBefore = false;
126
134
  log;
@@ -138,7 +146,6 @@ export class BasicCrawler {
138
146
  statusMessageCallback;
139
147
  sessionPoolOptions;
140
148
  useSessionPool;
141
- crawlingContexts = new Map();
142
149
  autoscaledPoolOptions;
143
150
  events;
144
151
  httpClient;
@@ -150,6 +157,8 @@ export class BasicCrawler {
150
157
  robotsTxtFileCache;
151
158
  _experimentWarnings = {};
152
159
  static optionsShape = {
160
+ contextPipelineBuilder: ow.optional.object,
161
+ extendContext: ow.optional.function,
153
162
  requestList: ow.optional.object.validate(validators.requestList),
154
163
  requestQueue: ow.optional.object.validate(validators.requestQueue),
155
164
  // Subclasses override this function instead of passing it
@@ -185,7 +194,8 @@ export class BasicCrawler {
185
194
  /**
186
195
  * All `BasicCrawler` parameters are passed via an options object.
187
196
  */
188
- constructor(options = {}, config = Configuration.getGlobalConfig()) {
197
+ constructor(options = {}, // cast because the constructor logic handles missing `contextPipelineBuilder` - the type is just for DX
198
+ config = Configuration.getGlobalConfig()) {
189
199
  this.config = config;
190
200
  ow(options, 'BasicCrawlerOptions', ow.object.exactShape(BasicCrawler.optionsShape));
191
201
  const { requestList, requestQueue, maxRequestRetries = 3, sameDomainDelaySecs = 0, maxSessionRotations = 10, maxRequestsPerCrawl, autoscaledPoolOptions = {}, keepAlive, sessionPoolOptions = {}, useSessionPool = true,
@@ -193,6 +203,32 @@ export class BasicCrawler {
193
203
  minConcurrency, maxConcurrency, maxRequestsPerMinute, retryOnBlocked = false, respectRobotsTxtFile = false, onSkippedRequest, requestHandler, requestHandlerTimeoutSecs, errorHandler, failedRequestHandler, statusMessageLoggingInterval = 10, statusMessageCallback, statisticsOptions, httpClient,
194
204
  // internal
195
205
  log = defaultLog.child({ prefix: this.constructor.name }), experiments = {}, } = options;
206
+ // Store the builder so that it can be run when the contextPipeline is needed.
207
+ // Invoking it immediately would cause problems with parent constructor call order.
208
+ this.contextPipelineBuilder = () => {
209
+ let contextPipeline = (options.contextPipelineBuilder?.() ??
210
+ ContextPipeline.create()); // Thanks to the RequireContextPipeline, contextPipeline will only be undefined if InitialContextType is CrawlingContext
211
+ if (options.extendContext !== undefined) {
212
+ contextPipeline = contextPipeline.compose({
213
+ action: async (context) => await options.extendContext(context),
214
+ });
215
+ }
216
+ contextPipeline = contextPipeline.compose({
217
+ action: async (context) => {
218
+ const { request } = context;
219
+ if (!this.requestMatchesEnqueueStrategy(request)) {
220
+ // eslint-disable-next-line dot-notation
221
+ const message = `Skipping request ${request.id} (starting url: ${request.url} -> loaded url: ${request.loadedUrl}) because it does not match the enqueue strategy (${request['enqueueStrategy']}).`;
222
+ this.log.debug(message);
223
+ request.noRetry = true;
224
+ request.state = RequestState.SKIPPED;
225
+ throw new ContextPipelineInterruptedError(message);
226
+ }
227
+ return context;
228
+ },
229
+ });
230
+ return contextPipeline;
231
+ };
196
232
  this.requestList = requestList;
197
233
  this.requestQueue = requestQueue;
198
234
  this.httpClient = httpClient ?? new GotScrapingHttpClient();
@@ -247,7 +283,6 @@ export class BasicCrawler {
247
283
  }
248
284
  }
249
285
  this.useSessionPool = useSessionPool;
250
- this.crawlingContexts = new Map();
251
286
  const maxSignedInteger = 2 ** 31 - 1;
252
287
  if (this.requestHandlerTimeoutMillis > maxSignedInteger) {
253
288
  log.warning(`requestHandlerTimeoutMillis ${this.requestHandlerTimeoutMillis}` +
@@ -310,14 +345,6 @@ export class BasicCrawler {
310
345
  isProxyError(error) {
311
346
  return ROTATE_PROXY_ERRORS.some((x) => this._getMessageFromError(error)?.includes(x));
312
347
  }
313
- /**
314
- * Checks whether the given crawling context is getting blocked by anti-bot protection using several heuristics.
315
- * Returns `false` if the request is not blocked, otherwise returns a string with a description of the block reason.
316
- * @param _crawlingContext The crawling context to check.
317
- */
318
- async isRequestBlocked(_crawlingContext) {
319
- throw new Error('the "isRequestBlocked" method is not implemented in this crawler.');
320
- }
321
348
  /**
322
349
  * This method is periodically called by the crawler, every `statusMessageLoggingInterval` seconds.
323
350
  */
@@ -590,8 +617,10 @@ export class BasicCrawler {
590
617
  }
591
618
  await this._loadHandledRequestCount();
592
619
  }
593
- async _runRequestHandler(crawlingContext) {
594
- await this.requestHandler(crawlingContext);
620
+ async runRequestHandler(crawlingContext) {
621
+ await this.contextPipeline.call(crawlingContext, async (finalContext) => {
622
+ await addTimeoutToPromise(async () => this.requestHandler(finalContext), this.requestHandlerTimeoutMillis, `requestHandler timed out after ${this.requestHandlerTimeoutMillis / 1000} seconds (${finalContext.request.id}).`);
623
+ });
595
624
  }
596
625
  /**
597
626
  * Handles blocked request
@@ -686,11 +715,6 @@ export class BasicCrawler {
686
715
  await this.requestList.markRequestHandled(request);
687
716
  return this.requestQueue.fetchNextRequest();
688
717
  }
689
- /**
690
- * Executed when `errorHandler` finishes or the request is successful.
691
- * Can be used to clean up orphaned browser pages.
692
- */
693
- async _cleanupContext(_crawlingContext) { }
694
718
  /**
695
719
  * Delays processing of the request based on the `sameDomainDelaySecs` option,
696
720
  * adding it back to the queue after the timeout passes. Returns `true` if the request
@@ -759,18 +783,14 @@ export class BasicCrawler {
759
783
  request.loadedUrl = undefined;
760
784
  const statisticsId = request.id || request.uniqueKey;
761
785
  this.stats.startJob(statisticsId);
762
- // Shared crawling context
763
- // @ts-expect-error
764
- // All missing properties (that extend CrawlingContext) are set dynamically,
765
- // but TS does not know that, so otherwise it would throw when compiling.
786
+ const deferredCleanup = [];
766
787
  const crawlingContext = {
767
788
  id: cryptoRandomObjectId(10),
768
- crawler: this,
769
789
  log: this.log,
770
790
  request,
771
791
  session,
772
792
  enqueueLinks: async (options) => {
773
- return enqueueLinks({
793
+ return await enqueueLinks({
774
794
  // specify the RQ first to allow overriding it
775
795
  requestQueue: await this.getRequestQueue(),
776
796
  robotsTxtFile: await this.getRobotsTxtFileForUrl(request.url),
@@ -778,17 +798,21 @@ export class BasicCrawler {
778
798
  ...options,
779
799
  });
780
800
  },
781
- addRequests: this.addRequests.bind(this),
801
+ addRequests: async (requests, options) => {
802
+ await this.addRequests(requests, options);
803
+ },
782
804
  pushData: this.pushData.bind(this),
783
805
  useState: this.useState.bind(this),
784
806
  sendRequest: createSendRequest(this.httpClient, request, session, () => crawlingContext.proxyInfo?.url),
785
807
  getKeyValueStore: async (idOrName) => KeyValueStore.open(idOrName, { config: this.config }),
808
+ registerDeferredCleanup: (cleanup) => {
809
+ deferredCleanup.push(cleanup);
810
+ },
786
811
  };
787
- this.crawlingContexts.set(crawlingContext.id, crawlingContext);
788
812
  let isRequestLocked = true;
789
813
  try {
790
814
  request.state = RequestState.REQUEST_HANDLER;
791
- await addTimeoutToPromise(async () => this._runRequestHandler(crawlingContext), this.requestHandlerTimeoutMillis, `requestHandler timed out after ${this.requestHandlerTimeoutMillis / 1000} seconds (${request.id}).`);
815
+ await this.runRequestHandler(crawlingContext);
792
816
  await this._timeoutAndRetry(async () => source.markRequestHandled(request), this.internalTimeoutMillis, `Marking request ${request.url} (${request.id}) as handled timed out after ${this.internalTimeoutMillis / 1e3} seconds.`);
793
817
  isRequestLocked = false; // markRequestHandled succeeded and unlocked the request
794
818
  this.stats.finishJob(statisticsId, request.retryCount);
@@ -797,7 +821,8 @@ export class BasicCrawler {
797
821
  request.state = RequestState.DONE;
798
822
  crawlingContext.session?.markGood();
799
823
  }
800
- catch (err) {
824
+ catch (rawError) {
825
+ const err = this.unwrapError(rawError);
801
826
  try {
802
827
  request.state = RequestState.ERROR_HANDLER;
803
828
  await addTimeoutToPromise(async () => this._requestFunctionErrorHandler(err, crawlingContext, source), this.internalTimeoutMillis, `Handling request failure of ${request.url} (${request.id}) timed out after ${this.internalTimeoutMillis / 1e3} seconds.`);
@@ -807,24 +832,24 @@ export class BasicCrawler {
807
832
  request.state = RequestState.DONE;
808
833
  }
809
834
  catch (secondaryError) {
810
- if (!secondaryError.triggeredFromUserHandler &&
835
+ const unwrappedSecondaryError = this.unwrapError(secondaryError);
836
+ if (!unwrappedSecondaryError.triggeredFromUserHandler &&
811
837
  // avoid reprinting the same critical error multiple times, as it will be printed by Nodejs at the end anyway
812
- !(secondaryError instanceof CriticalError)) {
838
+ !(unwrappedSecondaryError instanceof CriticalError)) {
813
839
  const apifySpecific = process.env.APIFY_IS_AT_HOME
814
840
  ? `This may have happened due to an internal error of Apify's API or due to a misconfigured crawler.`
815
841
  : '';
816
- this.log.exception(secondaryError, 'An exception occurred during handling of failed request. ' +
842
+ this.log.exception(unwrappedSecondaryError, 'An exception occurred during handling of failed request. ' +
817
843
  `This places the crawler and its underlying storages into an unknown state and crawling will be terminated. ${apifySpecific}`);
818
844
  }
819
845
  request.state = RequestState.ERROR;
820
- throw secondaryError;
846
+ throw unwrappedSecondaryError;
821
847
  }
822
848
  // decrease the session score if the request fails (but the error handler did not throw)
823
849
  crawlingContext.session?.markBad();
824
850
  }
825
851
  finally {
826
- await this._cleanupContext(crawlingContext);
827
- this.crawlingContexts.delete(crawlingContext.id);
852
+ await Promise.all(deferredCleanup.map((cleanup) => cleanup()));
828
853
  // Safety net - release the lock if nobody managed to do it before
829
854
  if (isRequestLocked && source instanceof RequestProvider) {
830
855
  try {
@@ -883,6 +908,18 @@ export class BasicCrawler {
883
908
  request.sessionRotationCount++;
884
909
  crawlingContext.session?.retire();
885
910
  }
911
+ /**
912
+ * Unwraps errors thrown by the context pipeline to get the actual user error.
913
+ * RequestHandlerError and ContextPipelineInitializationError wrap the actual error.
914
+ */
915
+ unwrapError(error) {
916
+ if (error instanceof RequestHandlerError ||
917
+ error instanceof ContextPipelineInitializationError ||
918
+ error instanceof ContextPipelineCleanupError) {
919
+ return this.unwrapError(error.cause);
920
+ }
921
+ return error;
922
+ }
886
923
  /**
887
924
  * Handles errors thrown by user provided requestHandler()
888
925
  */
@@ -895,7 +932,8 @@ export class BasicCrawler {
895
932
  const shouldRetryRequest = this._canRequestBeRetried(request, error);
896
933
  if (shouldRetryRequest) {
897
934
  await this.stats.errorTrackerRetry.addAsync(error, crawlingContext);
898
- await this.errorHandler?.(crawlingContext, error);
935
+ await this.errorHandler?.(crawlingContext, // valid cast - ExtendedContext transitively extends CrawlingContext
936
+ error);
899
937
  if (error instanceof SessionError) {
900
938
  await this._rotateSession(crawlingContext);
901
939
  }
@@ -947,7 +985,8 @@ export class BasicCrawler {
947
985
  const message = this._getMessageFromError(error, true);
948
986
  this.log.error(`Request failed and reached maximum retries. ${message}`, { id, url, method, uniqueKey });
949
987
  if (this.failedRequestHandler) {
950
- await this.failedRequestHandler?.(crawlingContext, error);
988
+ await this.failedRequestHandler?.(crawlingContext, // valid cast - ExtendedContext transitively extends CrawlingContext
989
+ error);
951
990
  }
952
991
  }
953
992
  /**
@@ -1013,9 +1052,7 @@ export class BasicCrawler {
1013
1052
  */
1014
1053
  async teardown() {
1015
1054
  this.events.emit("persistState" /* EventType.PERSIST_STATE */, { isMigrating: false });
1016
- if (this.useSessionPool) {
1017
- await this.sessionPool.teardown();
1018
- }
1055
+ await this.sessionPool?.teardown();
1019
1056
  if (this._closeEvents) {
1020
1057
  await this.events.close();
1021
1058
  }