@crawlee/http 4.0.0-beta.1 → 4.0.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,22 @@
1
+ import type { Readable } from 'node:stream';
2
+ import type { BasicCrawlerOptions } from '@crawlee/basic';
3
+ import { BasicCrawler } from '@crawlee/basic';
4
+ import type { CrawlingContext, HttpResponse, LoadedRequest, Request, StreamingHttpResponse } from '@crawlee/core';
1
5
  import type { Dictionary } from '@crawlee/types';
2
- // @ts-ignore optional peer dependency or compatibility with es2022
3
- import type { Request } from 'got-scraping';
4
- import type { ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RouterRoutes } from '../index.js';
5
- import { HttpCrawler } from '../index.js';
6
- export type FileDownloadErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
7
- JSONData extends Dictionary = any> = ErrorHandler<FileDownloadCrawlingContext<UserData, JSONData>>;
8
- export type StreamHandlerContext = Omit<FileDownloadCrawlingContext, 'body' | 'parseWithCheerio' | 'json' | 'addRequests' | 'contentType'> & {
9
- stream: Request;
10
- };
11
- type StreamHandler = (context: StreamHandlerContext) => void | Promise<void>;
12
- export type FileDownloadOptions<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
13
- JSONData extends Dictionary = any> = (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
14
- requestHandler?: never;
15
- streamHandler?: StreamHandler;
16
- }) | (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
17
- requestHandler: FileDownloadRequestHandler;
18
- streamHandler?: never;
19
- });
20
- export type FileDownloadHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
21
- JSONData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData, JSONData>>;
22
- export interface FileDownloadCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
23
- JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData, JSONData, FileDownload> {
6
+ import type { ErrorHandler, GetUserDataFromRequest, InternalHttpHook, RequestHandler, RouterRoutes } from '../index.js';
7
+ export type FileDownloadErrorHandler<UserData extends Dictionary = any> = ErrorHandler<FileDownloadCrawlingContext<UserData>>;
8
+ export type FileDownloadHook<UserData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData>>;
9
+ export interface FileDownloadCrawlingContext<UserData extends Dictionary = any> extends CrawlingContext<UserData> {
10
+ request: LoadedRequest<Request<UserData>>;
11
+ response: HttpResponse<'buffer'> | StreamingHttpResponse;
12
+ body: Promise<Buffer>;
13
+ stream: Readable;
14
+ contentType: {
15
+ type: string;
16
+ encoding: BufferEncoding;
17
+ };
24
18
  }
25
- export type FileDownloadRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
26
- JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<UserData, JSONData>>;
19
+ export type FileDownloadRequestHandler<UserData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<UserData>>;
27
20
  /**
28
21
  * Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
29
22
  *
@@ -67,11 +60,11 @@ JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<
67
60
  * ]);
68
61
  * ```
69
62
  */
70
- export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContext> {
71
- private streamHandler?;
72
- constructor(options?: FileDownloadOptions);
73
- protected _runRequestHandler(context: FileDownloadCrawlingContext): Promise<void>;
74
- private streamRequestHandler;
63
+ export declare class FileDownload extends BasicCrawler<FileDownloadCrawlingContext> {
64
+ #private;
65
+ constructor(options?: BasicCrawlerOptions<FileDownloadCrawlingContext>);
66
+ private initiateDownload;
67
+ private cleanupDownload;
75
68
  }
76
69
  /**
77
70
  * Creates new {@link Router} instance that works based on request labels.
@@ -98,6 +91,5 @@ export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContex
98
91
  * ```
99
92
  */
100
93
  // @ts-ignore optional peer dependency or compatibility with es2022
101
- export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("packages/core/dist/router.js").RouterHandler<Context>;
102
- export {};
94
+ export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("@crawlee/basic").RouterHandler<Context>;
103
95
  //# sourceMappingURL=file-download.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,KAAK,EACR,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,YAAY,EACf,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,WAAW,EAAU,MAAM,aAAa,CAAC;AAElD,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAElE,MAAM,MAAM,oBAAoB,GAAG,IAAI,CACnC,2BAA2B,EAC3B,MAAM,GAAG,kBAAkB,GAAG,MAAM,GAAG,aAAa,GAAG,aAAa,CACvE,GAAG;IACA,MAAM,EAAE,OAAO,CAAC;CACnB,CAAC;AAEF,KAAK,aAAa,GAAG,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAE7E,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IAE/B,CAAC,IAAI,CAAC,kBAAkB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAAE,gBAAgB,CAAC,GAAG;IAC3F,cAAc,CAAC,EAAE,KAAK,CAAC;IACvB,aAAa,CAAC,EAAE,aAAa,CAAC;CACjC,CAAC,GACF,CAAC,IAAI,CAAC,kBAAkB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAAE,gBAAgB,CAAC,GAAG;IAC3F,cAAc,EAAE,0BAA0B,CAAC;IAC3C,aAAa,CAAC,EAAE,KAAK,CAAC;CACzB,CAAC,CAAC;AAET,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEtE,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,YAAY,CAAC;CAAG;AAE1E,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEpE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,WAAW,CAAC,2BAA2B,CAAC;IACtE,OAAO,CAAC,aAAa,CAAC,CAAgB;gBAE1B,OAAO,GAAE,mBAAwB;cAqBpB,kBAAkB,CAAC,OAAO,EAAE,2BAA2B;YAQlE,oBAAoB;CA8DrC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,iEAEzC"}
1
+ {"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAmB,MAAM,gBAAgB,CAAC;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,aAAa,EAAE,OAAO,EAAE,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAClH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAIxH,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAExD,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE5D,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC1C,QAAQ,EAAE,YAAY,CAAC,QAAQ,CAAC,GAAG,qBAAqB,CAAC;IACzD,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACtB,MAAM,EAAE,QAAQ,CAAC;IACjB,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;CAC3D;AAED,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAM1D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,YAAY,CAAC,2BAA2B,CAAC;;gBAI3D,OAAO,GAAE,mBAAmB,CAAC,2BAA2B,CAAM;YAW5D,gBAAgB;YAmChB,eAAe;CAmBhC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
@@ -1,6 +1,8 @@
1
+ import { buffer } from 'node:stream/consumers';
1
2
  import { finished } from 'node:stream/promises';
2
- import { isPromise } from 'node:util/types';
3
- import { HttpCrawler, Router } from '../index.js';
3
+ import { BasicCrawler, ContextPipeline } from '@crawlee/basic';
4
+ import { Router } from '../index.js';
5
+ import { parseContentTypeFromResponse } from './utils.js';
4
6
  /**
5
7
  * Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
6
8
  *
@@ -44,79 +46,60 @@ import { HttpCrawler, Router } from '../index.js';
44
46
  * ]);
45
47
  * ```
46
48
  */
47
- export class FileDownload extends HttpCrawler {
48
- streamHandler;
49
+ export class FileDownload extends BasicCrawler {
50
+ #contextInternals = Symbol('contextInternals');
51
+ // TODO hooks
49
52
  constructor(options = {}) {
50
- const { streamHandler } = options;
51
- delete options.streamHandler;
52
- if (streamHandler) {
53
- // For streams, the navigation is done in the request handler.
54
- options.requestHandlerTimeoutSecs = options.navigationTimeoutSecs ?? 120;
55
- }
56
- super(options);
57
- this.streamHandler = streamHandler;
58
- if (this.streamHandler) {
59
- this.requestHandler = this.streamRequestHandler;
60
- }
61
- // The base HttpCrawler class only supports a handful of text based mime types.
62
- // With the FileDownload crawler, we want to download any file type.
63
- this.supportedMimeTypes = new Set(['*/*']);
64
- }
65
- async _runRequestHandler(context) {
66
- if (this.streamHandler) {
67
- context.request.skipNavigation = true;
68
- }
69
- await super._runRequestHandler(context);
53
+ super({
54
+ ...options,
55
+ contextPipelineBuilder: () => ContextPipeline.create().compose({
56
+ action: this.initiateDownload.bind(this),
57
+ cleanup: this.cleanupDownload.bind(this),
58
+ }),
59
+ });
70
60
  }
71
- async streamRequestHandler(context) {
72
- const { log, request: { url }, } = context;
61
+ async initiateDownload(context) {
73
62
  const response = await this.httpClient.stream({
74
- url,
63
+ url: context.request.url,
75
64
  timeout: { request: undefined },
76
65
  proxyUrl: context.proxyInfo?.url,
77
66
  });
78
- let pollingInterval;
79
- const cleanUp = () => {
80
- clearInterval(pollingInterval);
81
- response.stream.destroy();
67
+ const { type, charset: encoding } = parseContentTypeFromResponse(response);
68
+ context.request.url = response.url;
69
+ const pollingInterval = setInterval(() => {
70
+ const { total, transferred } = response.downloadProgress;
71
+ if (transferred > 0) {
72
+ context.log.debug(`Downloaded ${transferred} bytes of ${total ?? 0} bytes from ${context.request.url}.`);
73
+ }
74
+ }, 5000);
75
+ const contextExtension = {
76
+ [this.#contextInternals]: { pollingInterval },
77
+ request: context.request,
78
+ response,
79
+ contentType: { type, encoding },
80
+ stream: response.stream,
81
+ get body() {
82
+ return buffer(response.stream);
83
+ },
82
84
  };
83
- const downloadPromise = new Promise((resolve, reject) => {
84
- pollingInterval = setInterval(() => {
85
- const { total, transferred } = response.downloadProgress;
86
- if (transferred > 0) {
87
- log.debug(`Downloaded ${transferred} bytes of ${total ?? 0} bytes from ${url}.`);
85
+ return contextExtension;
86
+ }
87
+ async cleanupDownload(context, error) {
88
+ clearInterval(context[this.#contextInternals].pollingInterval);
89
+ // If there was no error and the stream is still readable, wait for it to be consumed before proceeding
90
+ if (error === undefined) {
91
+ if (!context.stream.destroyed && context.stream.readable) {
92
+ try {
93
+ await finished(context.stream);
94
+ }
95
+ catch {
96
+ // Stream might have encountered an error or been closed, which is fine
88
97
  }
89
- }, 5000);
90
- response.stream.on('error', async (error) => {
91
- cleanUp();
92
- reject(error);
93
- });
94
- let streamHandlerResult;
95
- try {
96
- context.stream = response.stream;
97
- context.response = response;
98
- streamHandlerResult = this.streamHandler(context);
99
- }
100
- catch (e) {
101
- cleanUp();
102
- reject(e);
103
- }
104
- if (isPromise(streamHandlerResult)) {
105
- streamHandlerResult
106
- .then(() => {
107
- resolve();
108
- })
109
- .catch((e) => {
110
- cleanUp();
111
- reject(e);
112
- });
113
- }
114
- else {
115
- resolve();
116
98
  }
117
- });
118
- await Promise.all([downloadPromise, finished(response.stream)]);
119
- cleanUp();
99
+ }
100
+ else {
101
+ context.stream.destroy();
102
+ }
120
103
  }
121
104
  }
122
105
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAc5C,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AA4ClD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,YAAa,SAAQ,WAAwC;IAC9D,aAAa,CAAiB;IAEtC,YAAY,UAA+B,EAAE;QACzC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;QAClC,OAAO,OAAO,CAAC,aAAa,CAAC;QAE7B,IAAI,aAAa,EAAE,CAAC;YAChB,8DAA8D;YAC7D,OAAe,CAAC,yBAAyB,GAAG,OAAO,CAAC,qBAAqB,IAAI,GAAG,CAAC;QACtF,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,CAAC;QAEf,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,oBAA2B,CAAC;QAC3D,CAAC;QAED,+EAA+E;QAC/E,oEAAoE;QACnE,IAAY,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IACxD,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAAoC;QAC5E,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,OAAO,CAAC,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;QAC1C,CAAC;QAED,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAAC,OAAoC;QACnE,MAAM,EACF,GAAG,EACH,OAAO,EAAE,EAAE,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;QAEZ,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAC1C,GAAG;YACH,OAAO,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/B,QAAQ,EAAE,OAAO,CAAC,SAAS,EAAE,GAAG;SACnC,CAAC,CAAC;QAEH,IAAI,eAA2C,CAAC;QAEhD,MAAM,OAAO,GAAG,GAAG,EAAE;YACjB,aAAa,CAAC,eAAgB,CAAC,CAAC;YAChC,QAAQ,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAC9B,CAAC,CAAC;QAEF,MAAM,eAAe,GAAG,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1D,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC/B,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,QAAQ,CAAC,gBAAgB,CAAC;gBAEzD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;oBAClB,GAAG,CAAC,KAAK,CAAC,cAAc,WAAW,aAAa,KAAK,IAAI,CAAC,eAAe,GAAG,GAAG,CAAC,CAAC;gBACrF,CAAC;YACL,CAAC,EAAE,IAAI,CAAC,CAAC;YAET,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,KAAY,EAAE,EAAE;gBAC/C,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,KAAK,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,mBAAmB,CAAC;YAExB,IAAI,CAAC;gBACD,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;gBACjC,OAAO,CAAC,QAAQ,GAAG,QAAe,CAAC;gBACnC,mBAAmB,GAAG,IAAI,CAAC,aAAc,CAAC,OAAc,CAAC,CAAC;YAC9D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,CAAC,CAAC,CAAC;YACd,CAAC;YAED,IAAI,SAAS,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBACjC,mBAAmB;qBACd,IAAI,CAAC,GAAG,EAAE;oBACP,OAAO,EAAE,CAAC;gBACd,CAAC,CAAC;qBACD,KAAK,CAAC,CAAC,CAAQ,EAAE,EAAE;oBAChB,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,CAAC,CAAC,CAAC;gBACd,CAAC,CAAC,CAAC;YACX,CAAC;iBAAM,CAAC;gBACJ,OAAO,EAAE,CAAC;YACd,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,eAAe,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAEhE,OAAO,EAAE,CAAC;IACd,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAGhD,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAK/D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,4BAA4B,EAAE,MAAM,YAAY,CAAC;AA4B1D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,YAAa,SAAQ,YAAyC;IACvE,iBAAiB,GAAG,MAAM,CAAC,kBAAkB,CAAC,CAAC;IAE/C,aAAa;IACb,YAAY,UAA4D,EAAE;QACtE,KAAK,CAAC;YACF,GAAG,OAAO;YACV,sBAAsB,EAAE,GAAG,EAAE,CACzB,eAAe,CAAC,MAAM,EAAmB,CAAC,OAAO,CAAC;gBAC9C,MAAM,EAAE,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;gBACxC,OAAO,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;aAC3C,CAAC;SACT,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,OAAwB;QACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAC1C,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG;YACxB,OAAO,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/B,QAAQ,EAAE,OAAO,CAAC,SAAS,EAAE,GAAG;SACnC,CAAC,CAAC;QAEH,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,4BAA4B,CAAC,QAAQ,CAAC,CAAC;QAE3E,OAAO,CAAC,OAAO,CAAC,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC;QAEnC,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACrC,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,QAAQ,CAAC,gBAAgB,CAAC;YAEzD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;gBAClB,OAAO,CAAC,GAAG,CAAC,KAAK,CACb,cAAc,WAAW,aAAa,KAAK,IAAI,CAAC,eAAe,OAAO,CAAC,OAAO,CAAC,GAAG,GAAG,CACxF,CAAC;YACN,CAAC;QACL,CAAC,EAAE,IAAI,CAAC,CAAC;QAET,MAAM,gBAAgB,GAAG;YACrB,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,EAAE,eAAe,EAAsB;YACjE,OAAO,EAAE,OAAO,CAAC,OAAiC;YAClD,QAAQ;YACR,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC/B,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,IAAI,IAAI;gBACJ,OAAO,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC;SACJ,CAAC;QAEF,OAAO,gBAAgB,CAAC;IAC5B,CAAC;IAEO,KAAK,CAAC,eAAe,CACzB,OAAwE,EACxE,KAAe;QAEf,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,eAAe,CAAC,CAAC;QAE/D,uGAAuG;QACvG,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACvD,IAAI,CAAC;oBACD,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;gBACnC,CAAC;gBAAC,MAAM,CAAC;oBACL,uEAAuE;gBAC3E,CAAC;YACL,CAAC;QACL,CAAC;aAAM,CAAC;YACJ,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAC7B,CAAC;IACL,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
@@ -1,14 +1,13 @@
1
1
  import type { IncomingMessage } from 'node:http';
2
2
  import type { Readable } from 'node:stream';
3
- import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, ProxyConfiguration, Request, RequestHandler, RouterRoutes, Session } from '@crawlee/basic';
4
- import { BasicCrawler, Configuration, CrawlerExtension } from '@crawlee/basic';
5
- import type { HttpResponse } from '@crawlee/core';
3
+ import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, ProxyConfiguration, Request, RequestHandler, RequireContextPipeline, RouterRoutes, Session } from '@crawlee/basic';
4
+ import { BasicCrawler, Configuration, ContextPipeline } from '@crawlee/basic';
5
+ import type { HttpResponse, LoadedRequest } from '@crawlee/core';
6
6
  import type { Awaitable, Dictionary } from '@crawlee/types';
7
7
  import { type CheerioRoot } from '@crawlee/utils';
8
8
  import type { RequestLike, ResponseLike } from 'content-type';
9
9
  // @ts-ignore optional peer dependency or compatibility with es2022
10
10
  import type { Method, OptionsInit } from 'got-scraping';
11
- import { ObjectPredicate } from 'ow';
12
11
  import type { JsonValue } from 'type-fest';
13
12
  /**
14
13
  * TODO exists for BC within HttpCrawler - replace completely with StreamingHttpResponse in 4.0
@@ -19,7 +18,7 @@ export type PlainResponse = Omit<HttpResponse, 'body'> & IncomingMessage & {
19
18
  };
20
19
  export type HttpErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
21
20
  JSONData extends JsonValue = any> = ErrorHandler<HttpCrawlingContext<UserData, JSONData>>;
22
- export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext> extends BasicCrawlerOptions<Context> {
21
+ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext, ExtendedContext extends Context = Context> extends BasicCrawlerOptions<Context, ExtendedContext> {
23
22
  /**
24
23
  * Timeout in which the HTTP request to the resource needs to finish, given in seconds.
25
24
  */
@@ -50,7 +49,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
50
49
  * Modyfing `pageOptions` is supported only in Playwright incognito.
51
50
  * See {@link PrePageCreateHook}
52
51
  */
53
- preNavigationHooks?: InternalHttpHook<Context>[];
52
+ preNavigationHooks?: InternalHttpHook<CrawlingContext>[];
54
53
  /**
55
54
  * Async functions that are sequentially evaluated after the navigation. Good for checking if the navigation was successful.
56
55
  * The function accepts `crawlingContext` as the only parameter.
@@ -63,7 +62,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
63
62
  * ]
64
63
  * ```
65
64
  */
66
- postNavigationHooks?: InternalHttpHook<Context>[];
65
+ postNavigationHooks?: ((crawlingContext: CrawlingContextWithReponse) => Awaitable<void>)[];
67
66
  /**
68
67
  * An array of [MIME types](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types)
69
68
  * you want the crawler to load and process. By default, only `text/html` and `application/xhtml+xml` MIME types are supported.
@@ -115,12 +114,21 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
115
114
  export type InternalHttpHook<Context> = (crawlingContext: Context, gotOptions: OptionsInit) => Awaitable<void>;
116
115
  export type HttpHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
117
116
  JSONData extends JsonValue = any> = InternalHttpHook<HttpCrawlingContext<UserData, JSONData>>;
117
+ interface CrawlingContextWithReponse<UserData extends Dictionary = any> extends CrawlingContext<UserData> {
118
+ /**
119
+ * The request object that was successfully loaded and navigated to, including the {@link Request.loadedUrl|`loadedUrl`} property.
120
+ */
121
+ request: LoadedRequest<Request<UserData>>;
122
+ /**
123
+ * The HTTP response object containing status code, headers, and other response metadata.
124
+ */
125
+ response: PlainResponse;
126
+ }
118
127
  /**
119
128
  * @internal
120
129
  */
121
130
  export interface InternalHttpCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
122
- JSONData extends JsonValue = any, // with default to Dictionary we cant use a typed router in untyped crawler
123
- Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
131
+ JSONData extends JsonValue = any> extends CrawlingContextWithReponse<UserData> {
124
132
  /**
125
133
  * The request body of the web page.
126
134
  * The type depends on the `Content-Type` header of the web page:
@@ -139,7 +147,6 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
139
147
  type: string;
140
148
  encoding: BufferEncoding;
141
149
  };
142
- response: PlainResponse;
143
150
  /**
144
151
  * Wait for an element matching the selector to appear. Timeout is ignored.
145
152
  *
@@ -167,7 +174,7 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
167
174
  */
168
175
  parseWithCheerio(selector?: string, timeoutMs?: number): Promise<CheerioRoot>;
169
176
  }
170
- export interface HttpCrawlingContext<UserData extends Dictionary = any, JSONData extends JsonValue = any> extends InternalHttpCrawlingContext<UserData, JSONData, HttpCrawler<HttpCrawlingContext<UserData, JSONData>>> {
177
+ export interface HttpCrawlingContext<UserData extends Dictionary = any, JSONData extends JsonValue = any> extends InternalHttpCrawlingContext<UserData, JSONData> {
171
178
  }
172
179
  export type HttpRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
173
180
  JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData, JSONData>>;
@@ -238,16 +245,15 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
238
245
  * ```
239
246
  * @category Crawlers
240
247
  */
241
- export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, HttpCrawler<Context>>> extends BasicCrawler<Context> {
248
+ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any> = InternalHttpCrawlingContext, ContextExtension = {}, ExtendedContext extends Context = Context & ContextExtension> extends BasicCrawler<Context, ExtendedContext> {
242
249
  readonly config: Configuration;
243
250
  /**
244
251
  * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
245
252
  * Only available if used by the crawler.
246
253
  */
247
254
  proxyConfiguration?: ProxyConfiguration;
248
- protected userRequestHandlerTimeoutMillis: number;
249
- protected preNavigationHooks: InternalHttpHook<Context>[];
250
- protected postNavigationHooks: InternalHttpHook<Context>[];
255
+ protected preNavigationHooks: InternalHttpHook<CrawlingContext>[];
256
+ protected postNavigationHooks: ((crawlingContext: CrawlingContextWithReponse) => Awaitable<void>)[];
251
257
  protected persistCookiesPerSession: boolean;
252
258
  protected navigationTimeoutMillis: number;
253
259
  protected ignoreSslErrors: boolean;
@@ -268,7 +274,7 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
268
274
  // @ts-ignore optional peer dependency or compatibility with es2022
269
275
  forceResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
270
276
  // @ts-ignore optional peer dependency or compatibility with es2022
271
- proxyConfiguration: ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
277
+ proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
272
278
  // @ts-ignore optional peer dependency or compatibility with es2022
273
279
  persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
274
280
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -279,6 +285,10 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
279
285
  preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
280
286
  // @ts-ignore optional peer dependency or compatibility with es2022
281
287
  postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
288
+ // @ts-ignore optional peer dependency or compatibility with es2022
289
+ contextPipelineBuilder: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
290
+ // @ts-ignore optional peer dependency or compatibility with es2022
291
+ extendContext: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
282
292
  // @ts-ignore optional peer dependency or compatibility with es2022
283
293
  requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
284
294
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -335,19 +345,13 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
335
345
  /**
336
346
  * All `HttpCrawlerOptions` parameters are passed via an options object.
337
347
  */
338
- constructor(options?: HttpCrawlerOptions<Context>, config?: Configuration);
339
- /**
340
- * **EXPERIMENTAL**
341
- * Function for attaching CrawlerExtensions such as the Unblockers.
342
- * @param extension Crawler extension that overrides the crawler configuration.
343
- */
344
- use(extension: CrawlerExtension): void;
345
- /**
346
- * Wrapper around requestHandler that opens and closes pages etc.
347
- */
348
- protected _runRequestHandler(crawlingContext: Context): Promise<void>;
349
- protected isRequestBlocked(crawlingContext: Context): Promise<string | false>;
350
- protected _handleNavigation(crawlingContext: Context): Promise<void>;
348
+ constructor(options?: HttpCrawlerOptions<Context, ExtendedContext> & RequireContextPipeline<InternalHttpCrawlingContext, Context>, config?: Configuration);
349
+ protected buildContextPipeline(): ContextPipeline<CrawlingContext, InternalHttpCrawlingContext>;
350
+ private prepareProxyInfo;
351
+ private makeHttpRequest;
352
+ private processHttpResponse;
353
+ private handleBlockedRequestByContent;
354
+ protected isRequestBlocked(crawlingContext: InternalHttpCrawlingContext): Promise<string | false>;
351
355
  /**
352
356
  * Sets the cookie header to `gotOptions` based on the provided request and session headers, as well as any changes that occurred due to hooks.
353
357
  */
@@ -361,38 +365,19 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
361
365
  /**
362
366
  * Encodes and parses response according to the provided content type
363
367
  */
364
- protected _parseResponse(request: Request, responseStream: IncomingMessage, crawlingContext: Context): Promise<(Partial<Context> & {
365
- isXml: boolean;
366
- response: IncomingMessage;
367
- contentType: {
368
- type: string;
369
- encoding: BufferEncoding;
370
- };
371
- }) | {
372
- body: Buffer<ArrayBufferLike>;
373
- response: IncomingMessage;
374
- contentType: {
375
- type: string;
376
- encoding: BufferEncoding;
377
- };
378
- enqueueLinks: () => Promise<{
379
- processedRequests: never[];
380
- unprocessedRequests: never[];
381
- }>;
382
- }>;
383
- protected _parseHTML(response: IncomingMessage, _isXml: boolean, _crawlingContext: Context): Promise<Partial<Context>>;
368
+ private _parseResponse;
384
369
  /**
385
370
  * Combines the provided `requestOptions` with mandatory (non-overridable) values.
386
371
  */
387
372
  protected _getRequestOptions(request: Request, session?: Session, proxyUrl?: string, gotOptions?: OptionsInit): {
373
+ url?: string | URL | undefined;
374
+ // @ts-ignore optional peer dependency or compatibility with es2022
375
+ headers?: import("got-scraping").Headers | undefined;
388
376
  // @ts-ignore optional peer dependency or compatibility with es2022
389
377
  body?: string | Buffer | Readable | Generator | AsyncGenerator | import("form-data-encoder").FormDataLike | undefined;
390
378
  json?: unknown;
391
379
  // @ts-ignore optional peer dependency or compatibility with es2022
392
380
  request?: import("got-scraping").RequestFunction | undefined;
393
- url?: string | URL | undefined;
394
- // @ts-ignore optional peer dependency or compatibility with es2022
395
- headers?: import("got-scraping").Headers | undefined;
396
381
  // @ts-ignore optional peer dependency or compatibility with es2022
397
382
  agent?: import("got-scraping").Agents | undefined;
398
383
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -1 +1 @@
1
- {"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAuB,eAAe,EAAE,MAAM,WAAW,CAAC;AAEtE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAG5C,OAAO,KAAK,EAER,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,sBAAsB,EAEtB,kBAAkB,EAClB,OAAO,EACP,cAAc,EACd,YAAY,EACZ,OAAO,EACV,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAEH,YAAY,EACZ,aAAa,EACb,gBAAgB,EAOnB,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,EAAE,YAAY,EAAyB,MAAM,eAAe,CAAC;AACzE,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE9D,OAAO,KAAK,EAAE,MAAM,EAAE,WAAW,EAAqC,MAAM,cAAc,CAAC;AAG3F,OAAW,EAAE,eAAe,EAAE,MAAM,IAAI,CAAC;AACzC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAO3C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,GAClD,eAAe,GAAG;IACd,IAAI,CAAC,EAAE,OAAO,CAAC;CAClB,CAAC;AAkBN,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,YAAY,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE1D,MAAM,WAAW,kBAAkB,CAAC,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,CACzG,SAAQ,mBAAmB,CAAC,OAAO,CAAC;IACpC;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;;;;;;;;;;;;;;OAeG;IACH,kBAAkB,CAAC,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAEjD;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAElD;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE/B;;;;;;;;;;OAUG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAEjC;;;;;;;;OAQG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,OAAO,CAAC;IAEnC;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtC;;;OAGG;IACH,8BAA8B,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,CAAC,OAAO,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,UAAU,EAAE,WAAW,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/G,MAAM,MAAM,QAAQ,CAChB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,gBAAgB,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,EAAE,2EAA2E;AAC7G,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAC5B,SAAQ,eAAe,CAAC,OAAO,EAAE,QAAQ,CAAC;IACxC;;;;;OAKG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAEtB;;OAEG;IACH,IAAI,EAAE,QAAQ,CAAC;IAEf;;OAEG;IACH,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;IACxD,QAAQ,EAAE,aAAa,CAAC;IAExB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,WAAW,mBAAmB,CAAC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,QAAQ,SAAS,SAAS,GAAG,GAAG,CACpG,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;CAAG;AAEpH,MAAM,MAAM,kBAAkB,CAC1B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,cAAc,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkEG;AACH,qBAAa,WAAW,CACpB,OAAO,SAAS,2BAA2B,CAAC,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAC7E,SAAQ,YAAY,CAAC,OAAO,CAAC;aA0CL,MAAM;IAzC5B;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC,SAAS,CAAC,+BAA+B,EAAE,MAAM,CAAC;IAClD,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC1D,SAAS,CAAC,mBAAmB,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC3D,SAAS,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC5C,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC;IAC1C,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAC3C,SAAS,CAAC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IACzC,SAAS,CAAC,8BAA8B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACtD,SAAS,CAAC,0BAA0B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,QAAQ,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnD,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAgBpC;IAEF;;OAEG;gBAEC,OAAO,GAAE,kBAAkB,CAAC,OAAO,CAAM,EACvB,MAAM,gBAAkC;IA2E9D;;;;OAIG;IACH,GAAG,CAAC,SAAS,EAAE,gBAAgB;IAkC/B;;OAEG;cACsB,kBAAkB,CAAC,eAAe,EAAE,OAAO;cAuF3C,gBAAgB,CAAC,eAAe,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;cAa5E,iBAAiB,CAAC,eAAe,EAAE,OAAO;IA6B1D;;OAEG;IACH,SAAS,CAAC,aAAa,CACnB,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,eAAe,EACrC,UAAU,EAAE,WAAW,EACvB,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM;IAkD3B;;;;OAIG;cACa,gBAAgB,CAAC,EAC7B,OAAO,EACP,OAAO,EACP,QAAQ,EACR,UAAU,GACb,EAAE,sBAAsB,GAAG,OAAO,CAAC,aAAa,CAAC;IAwBlD;;OAEG;cACa,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,eAAe,EAAE,OAAO;;;;;;;;;;;;;;;;;;;cA8C1F,UAAU,CACtB,QAAQ,EAAE,eAAe,EACzB,MAAM,EAAE,OAAO,EACf,gBAAgB,EAAE,OAAO,GAC1B,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAM5B;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBACpB,IAAI;;IAkC7F,SAAS,CAAC,eAAe,CACrB,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,eAAe,EACzB,QAAQ,EAAE,cAAc,GACzB;QACC,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,eAAe,CAAC;KAC7B;IAqCD;;OAEG;IACH,SAAS,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,CAAC,MAAM,GAAG,WAAW,GAAG,YAAY,CAAC,EAAE;IAgBhG;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,OAAO;IAKjD,OAAO,CAAC,oBAAoB;IAkB5B;;OAEG;IACH,OAAO,CAAC,iBAAiB,CAuBvB;CACL;AAED,UAAU,sBAAsB;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,WAAW,CAAC;CAC3B;AAkFD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,mBAAmB,GAAG,mBAAmB,EACzD,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
1
+ {"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAuB,eAAe,EAAE,MAAM,WAAW,CAAC;AACtE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAG5C,OAAO,KAAK,EAER,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,OAAO,EACP,cAAc,EACd,sBAAsB,EACtB,YAAY,EACZ,OAAO,EACV,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACH,YAAY,EACZ,aAAa,EACb,eAAe,EAOlB,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAoC,MAAM,eAAe,CAAC;AACnG,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE9D,OAAO,KAAK,EAAE,MAAM,EAAE,WAAW,EAAqC,MAAM,cAAc,CAAC;AAG3F,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAS3C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,GAClD,eAAe,GAAG;IACd,IAAI,CAAC,EAAE,OAAO,CAAC;CAClB,CAAC;AAkBN,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,YAAY,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE1D,MAAM,WAAW,kBAAkB,CAC/B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,eAAe,SAAS,OAAO,GAAG,OAAO,CAC3C,SAAQ,mBAAmB,CAAC,OAAO,EAAE,eAAe,CAAC;IACnD;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;;;;;;;;;;;;;;OAeG;IACH,kBAAkB,CAAC,EAAE,gBAAgB,CAAC,eAAe,CAAC,EAAE,CAAC;IAEzD;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,EAAE,CAAC,CAAC,eAAe,EAAE,0BAA0B,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;IAE3F;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE/B;;;;;;;;;;OAUG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAEjC;;;;;;;;OAQG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,OAAO,CAAC;IAEnC;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtC;;;OAGG;IACH,8BAA8B,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,CAAC,OAAO,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,UAAU,EAAE,WAAW,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/G,MAAM,MAAM,QAAQ,CAChB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,gBAAgB,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,UAAU,0BAA0B,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B;;OAEG;IACH,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAE1C;;OAEG;IACH,QAAQ,EAAE,aAAa,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,CAClC,SAAQ,0BAA0B,CAAC,QAAQ,CAAC;IAC1C;;;;;OAKG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAEtB;;OAEG;IACH,IAAI,EAAE,QAAQ,CAAC;IAEf;;OAEG;IACH,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;IAExD;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,WAAW,mBAAmB,CAAC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,QAAQ,SAAS,SAAS,GAAG,GAAG,CACpG,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;CAAG;AAE9D,MAAM,MAAM,kBAAkB,CAC1B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,cAAc,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkEG;AACH,qBAAa,WAAW,CACpB,OAAO,SAAS,2BAA2B,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,2BAA2B,EACnF,gBAAgB,GAAG,EAAE,EACrB,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB,CAC9D,SAAQ,YAAY,CAAC,OAAO,EAAE,eAAe,CAAC;aA0CtB,MAAM;IAzC5B;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,eAAe,CAAC,EAAE,CAAC;IAClE,SAAS,CAAC,mBAAmB,EAAE,CAAC,CAAC,eAAe,EAAE,0BAA0B,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;IACpG,SAAS,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC5C,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC;IAC1C,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAC3C,SAAS,CAAC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IACzC,SAAS,CAAC,8BAA8B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACtD,SAAS,CAAC,0BAA0B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,QAAQ,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnD,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAgBpC;IAEF;;OAEG;gBAEC,OAAO,GAAE,kBAAkB,CAAC,OAAO,EAAE,eAAe,CAAC,GACjD,sBAAsB,CAAC,2BAA2B,EAAE,OAAO,CAAa,EAC1D,MAAM,gBAAkC;IAoE9D,SAAS,CAAC,oBAAoB,IAAI,eAAe,CAAC,eAAe,EAAE,2BAA2B,CAAC;YAUjF,gBAAgB;YAYhB,eAAe;YAmDf,mBAAmB;YAuEnB,6BAA6B;cAQ3B,gBAAgB,CAAC,eAAe,EAAE,2BAA2B,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;IAavG;;OAEG;IACH,SAAS,CAAC,aAAa,CACnB,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,eAAe,EACrC,UAAU,EAAE,WAAW,EACvB,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM;IAkD3B;;;;OAIG;cACa,gBAAgB,CAAC,EAC7B,OAAO,EACP,OAAO,EACP,QAAQ,EACR,UAAU,GACb,EAAE,sBAAsB,GAAG,OAAO,CAAC,aAAa,CAAC;IAwBlD;;OAEG;YACW,cAAc;IA2C5B;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBACpB,IAAI;;IAkC7F,SAAS,CAAC,eAAe,CACrB,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,eAAe,EACzB,QAAQ,EAAE,cAAc,GACzB;QACC,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,eAAe,CAAC;KAC7B;IAqCD;;OAEG;IACH,SAAS,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,CAAC,MAAM,GAAG,WAAW,GAAG,YAAY,CAAC,EAAE;IAgBhG;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,OAAO;IAKjD,OAAO,CAAC,oBAAoB;IAkB5B;;OAEG;IACH,OAAO,CAAC,iBAAiB,CAuBvB;CACL;AAED,UAAU,sBAAsB;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,WAAW,CAAC;CAC3B;AA4CD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,mBAAmB,GAAG,mBAAmB,EACzD,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}