@crawlee/http 4.0.0-beta.10 → 4.0.0-beta.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,22 @@
1
+ import type { Readable } from 'node:stream';
2
+ import type { BasicCrawlerOptions } from '@crawlee/basic';
3
+ import { BasicCrawler } from '@crawlee/basic';
4
+ import type { CrawlingContext, HttpResponse, LoadedRequest, Request, StreamingHttpResponse } from '@crawlee/core';
1
5
  import type { Dictionary } from '@crawlee/types';
2
- // @ts-ignore optional peer dependency or compatibility with es2022
3
- import type { Request } from 'got-scraping';
4
- import type { ErrorHandler, GetUserDataFromRequest, HttpCrawlerOptions, InternalHttpCrawlingContext, InternalHttpHook, RequestHandler, RouterRoutes } from '../index.js';
5
- import { HttpCrawler } from '../index.js';
6
- export type FileDownloadErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
7
- JSONData extends Dictionary = any> = ErrorHandler<FileDownloadCrawlingContext<UserData, JSONData>>;
8
- export type StreamHandlerContext = Omit<FileDownloadCrawlingContext, 'body' | 'parseWithCheerio' | 'json' | 'addRequests' | 'contentType'> & {
9
- stream: Request;
10
- };
11
- type StreamHandler = (context: StreamHandlerContext) => void | Promise<void>;
12
- export type FileDownloadOptions<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
13
- JSONData extends Dictionary = any> = (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
14
- requestHandler?: never;
15
- streamHandler?: StreamHandler;
16
- }) | (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
17
- requestHandler: FileDownloadRequestHandler;
18
- streamHandler?: never;
19
- });
20
- export type FileDownloadHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
21
- JSONData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData, JSONData>>;
22
- export interface FileDownloadCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
23
- JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData, JSONData, FileDownload> {
6
+ import type { ErrorHandler, GetUserDataFromRequest, InternalHttpHook, RequestHandler, RouterRoutes } from '../index.js';
7
+ export type FileDownloadErrorHandler<UserData extends Dictionary = any> = ErrorHandler<FileDownloadCrawlingContext<UserData>>;
8
+ export type FileDownloadHook<UserData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData>>;
9
+ export interface FileDownloadCrawlingContext<UserData extends Dictionary = any> extends CrawlingContext<UserData> {
10
+ request: LoadedRequest<Request<UserData>>;
11
+ response: HttpResponse<'buffer'> | StreamingHttpResponse;
12
+ body: Promise<Buffer>;
13
+ stream: Readable;
14
+ contentType: {
15
+ type: string;
16
+ encoding: BufferEncoding;
17
+ };
24
18
  }
25
- export type FileDownloadRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
26
- JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<UserData, JSONData>>;
19
+ export type FileDownloadRequestHandler<UserData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<UserData>>;
27
20
  /**
28
21
  * Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
29
22
  *
@@ -67,11 +60,11 @@ JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<
67
60
  * ]);
68
61
  * ```
69
62
  */
70
- export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContext> {
71
- private streamHandler?;
72
- constructor(options?: FileDownloadOptions);
73
- protected _runRequestHandler(context: FileDownloadCrawlingContext): Promise<void>;
74
- private streamRequestHandler;
63
+ export declare class FileDownload extends BasicCrawler<FileDownloadCrawlingContext> {
64
+ #private;
65
+ constructor(options?: BasicCrawlerOptions<FileDownloadCrawlingContext>);
66
+ private initiateDownload;
67
+ private cleanupDownload;
75
68
  }
76
69
  /**
77
70
  * Creates new {@link Router} instance that works based on request labels.
@@ -98,6 +91,5 @@ export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContex
98
91
  * ```
99
92
  */
100
93
  // @ts-ignore optional peer dependency or compatibility with es2022
101
- export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("packages/core/dist/router.js").RouterHandler<Context>;
102
- export {};
94
+ export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("@crawlee/basic").RouterHandler<Context>;
103
95
  //# sourceMappingURL=file-download.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,KAAK,EACR,YAAY,EACZ,sBAAsB,EACtB,kBAAkB,EAClB,2BAA2B,EAC3B,gBAAgB,EAChB,cAAc,EACd,YAAY,EACf,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,WAAW,EAAU,MAAM,aAAa,CAAC;AAElD,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAElE,MAAM,MAAM,oBAAoB,GAAG,IAAI,CACnC,2BAA2B,EAC3B,MAAM,GAAG,kBAAkB,GAAG,MAAM,GAAG,aAAa,GAAG,aAAa,CACvE,GAAG;IACA,MAAM,EAAE,OAAO,CAAC;CACnB,CAAC;AAEF,KAAK,aAAa,GAAG,CAAC,OAAO,EAAE,oBAAoB,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AAE7E,MAAM,MAAM,mBAAmB,CAC3B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IAE/B,CAAC,IAAI,CAAC,kBAAkB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAAE,gBAAgB,CAAC,GAAG;IAC3F,cAAc,CAAC,EAAE,KAAK,CAAC;IACvB,aAAa,CAAC,EAAE,aAAa,CAAC;CACjC,CAAC,GACF,CAAC,IAAI,CAAC,kBAAkB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,EAAE,gBAAgB,CAAC,GAAG;IAC3F,cAAc,EAAE,0BAA0B,CAAC;IAC3C,aAAa,CAAC,EAAE,KAAK,CAAC;CACzB,CAAC,CAAC;AAET,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEtE,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,YAAY,CAAC;CAAG;AAE1E,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEpE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,WAAW,CAAC,2BAA2B,CAAC;IACtE,OAAO,CAAC,aAAa,CAAC,CAAgB;gBAE1B,OAAO,GAAE,mBAAwB;cAqBpB,kBAAkB,CAAC,OAAO,EAAE,2BAA2B;YAQlE,oBAAoB;CA8DrC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,iEAEzC"}
1
+ {"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAmB,MAAM,gBAAgB,CAAC;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,YAAY,EAAE,aAAa,EAAE,OAAO,EAAE,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAClH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAIxH,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAExD,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE5D,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC1C,QAAQ,EAAE,YAAY,CAAC,QAAQ,CAAC,GAAG,qBAAqB,CAAC;IACzD,IAAI,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;IACtB,MAAM,EAAE,QAAQ,CAAC;IACjB,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;CAC3D;AAED,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAM1D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,YAAY,CAAC,2BAA2B,CAAC;;gBAI3D,OAAO,GAAE,mBAAmB,CAAC,2BAA2B,CAAM;YAW5D,gBAAgB;YAmChB,eAAe;CAmBhC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
@@ -1,6 +1,8 @@
1
+ import { buffer } from 'node:stream/consumers';
1
2
  import { finished } from 'node:stream/promises';
2
- import { isPromise } from 'node:util/types';
3
- import { HttpCrawler, Router } from '../index.js';
3
+ import { BasicCrawler, ContextPipeline } from '@crawlee/basic';
4
+ import { Router } from '../index.js';
5
+ import { parseContentTypeFromResponse } from './utils.js';
4
6
  /**
5
7
  * Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
6
8
  *
@@ -44,79 +46,60 @@ import { HttpCrawler, Router } from '../index.js';
44
46
  * ]);
45
47
  * ```
46
48
  */
47
- export class FileDownload extends HttpCrawler {
48
- streamHandler;
49
+ export class FileDownload extends BasicCrawler {
50
+ #contextInternals = Symbol('contextInternals');
51
+ // TODO hooks
49
52
  constructor(options = {}) {
50
- const { streamHandler } = options;
51
- delete options.streamHandler;
52
- if (streamHandler) {
53
- // For streams, the navigation is done in the request handler.
54
- options.requestHandlerTimeoutSecs = options.navigationTimeoutSecs ?? 120;
55
- }
56
- super(options);
57
- this.streamHandler = streamHandler;
58
- if (this.streamHandler) {
59
- this.requestHandler = this.streamRequestHandler;
60
- }
61
- // The base HttpCrawler class only supports a handful of text based mime types.
62
- // With the FileDownload crawler, we want to download any file type.
63
- this.supportedMimeTypes = new Set(['*/*']);
64
- }
65
- async _runRequestHandler(context) {
66
- if (this.streamHandler) {
67
- context.request.skipNavigation = true;
68
- }
69
- await super._runRequestHandler(context);
53
+ super({
54
+ ...options,
55
+ contextPipelineBuilder: () => ContextPipeline.create().compose({
56
+ action: this.initiateDownload.bind(this),
57
+ cleanup: this.cleanupDownload.bind(this),
58
+ }),
59
+ });
70
60
  }
71
- async streamRequestHandler(context) {
72
- const { log, request: { url }, } = context;
61
+ async initiateDownload(context) {
73
62
  const response = await this.httpClient.stream({
74
- url,
63
+ url: context.request.url,
75
64
  timeout: { request: undefined },
76
65
  proxyUrl: context.proxyInfo?.url,
77
66
  });
78
- let pollingInterval;
79
- const cleanUp = () => {
80
- clearInterval(pollingInterval);
81
- response.stream.destroy();
67
+ const { type, charset: encoding } = parseContentTypeFromResponse(response);
68
+ context.request.url = response.url;
69
+ const pollingInterval = setInterval(() => {
70
+ const { total, transferred } = response.downloadProgress;
71
+ if (transferred > 0) {
72
+ context.log.debug(`Downloaded ${transferred} bytes of ${total ?? 0} bytes from ${context.request.url}.`);
73
+ }
74
+ }, 5000);
75
+ const contextExtension = {
76
+ [this.#contextInternals]: { pollingInterval },
77
+ request: context.request,
78
+ response,
79
+ contentType: { type, encoding },
80
+ stream: response.stream,
81
+ get body() {
82
+ return buffer(response.stream);
83
+ },
82
84
  };
83
- const downloadPromise = new Promise((resolve, reject) => {
84
- pollingInterval = setInterval(() => {
85
- const { total, transferred } = response.downloadProgress;
86
- if (transferred > 0) {
87
- log.debug(`Downloaded ${transferred} bytes of ${total ?? 0} bytes from ${url}.`);
85
+ return contextExtension;
86
+ }
87
+ async cleanupDownload(context, error) {
88
+ clearInterval(context[this.#contextInternals].pollingInterval);
89
+ // If there was no error and the stream is still readable, wait for it to be consumed before proceeding
90
+ if (error === undefined) {
91
+ if (!context.stream.destroyed && context.stream.readable) {
92
+ try {
93
+ await finished(context.stream);
94
+ }
95
+ catch {
96
+ // Stream might have encountered an error or been closed, which is fine
88
97
  }
89
- }, 5000);
90
- response.stream.on('error', async (error) => {
91
- cleanUp();
92
- reject(error);
93
- });
94
- let streamHandlerResult;
95
- try {
96
- context.stream = response.stream;
97
- context.response = response;
98
- streamHandlerResult = this.streamHandler(context);
99
- }
100
- catch (e) {
101
- cleanUp();
102
- reject(e);
103
- }
104
- if (isPromise(streamHandlerResult)) {
105
- streamHandlerResult
106
- .then(() => {
107
- resolve();
108
- })
109
- .catch((e) => {
110
- cleanUp();
111
- reject(e);
112
- });
113
- }
114
- else {
115
- resolve();
116
98
  }
117
- });
118
- await Promise.all([downloadPromise, finished(response.stream)]);
119
- cleanUp();
99
+ }
100
+ else {
101
+ context.stream.destroy();
102
+ }
120
103
  }
121
104
  }
122
105
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAc5C,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AA4ClD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,YAAa,SAAQ,WAAwC;IAC9D,aAAa,CAAiB;IAEtC,YAAY,UAA+B,EAAE;QACzC,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;QAClC,OAAO,OAAO,CAAC,aAAa,CAAC;QAE7B,IAAI,aAAa,EAAE,CAAC;YAChB,8DAA8D;YAC7D,OAAe,CAAC,yBAAyB,GAAG,OAAO,CAAC,qBAAqB,IAAI,GAAG,CAAC;QACtF,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,CAAC;QAEf,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;QACnC,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,oBAA2B,CAAC;QAC3D,CAAC;QAED,+EAA+E;QAC/E,oEAAoE;QACnE,IAAY,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IACxD,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CAAC,OAAoC;QAC5E,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACrB,OAAO,CAAC,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;QAC1C,CAAC;QAED,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;IAC5C,CAAC;IAEO,KAAK,CAAC,oBAAoB,CAAC,OAAoC;QACnE,MAAM,EACF,GAAG,EACH,OAAO,EAAE,EAAE,GAAG,EAAE,GACnB,GAAG,OAAO,CAAC;QAEZ,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAC1C,GAAG;YACH,OAAO,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/B,QAAQ,EAAE,OAAO,CAAC,SAAS,EAAE,GAAG;SACnC,CAAC,CAAC;QAEH,IAAI,eAA2C,CAAC;QAEhD,MAAM,OAAO,GAAG,GAAG,EAAE;YACjB,aAAa,CAAC,eAAgB,CAAC,CAAC;YAChC,QAAQ,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAC9B,CAAC,CAAC;QAEF,MAAM,eAAe,GAAG,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1D,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC/B,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,QAAQ,CAAC,gBAAgB,CAAC;gBAEzD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;oBAClB,GAAG,CAAC,KAAK,CAAC,cAAc,WAAW,aAAa,KAAK,IAAI,CAAC,eAAe,GAAG,GAAG,CAAC,CAAC;gBACrF,CAAC;YACL,CAAC,EAAE,IAAI,CAAC,CAAC;YAET,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,KAAY,EAAE,EAAE;gBAC/C,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,KAAK,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;YAEH,IAAI,mBAAmB,CAAC;YAExB,IAAI,CAAC;gBACD,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;gBACjC,OAAO,CAAC,QAAQ,GAAG,QAAe,CAAC;gBACnC,mBAAmB,GAAG,IAAI,CAAC,aAAc,CAAC,OAAc,CAAC,CAAC;YAC9D,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACT,OAAO,EAAE,CAAC;gBACV,MAAM,CAAC,CAAC,CAAC,CAAC;YACd,CAAC;YAED,IAAI,SAAS,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBACjC,mBAAmB;qBACd,IAAI,CAAC,GAAG,EAAE;oBACP,OAAO,EAAE,CAAC;gBACd,CAAC,CAAC;qBACD,KAAK,CAAC,CAAC,CAAQ,EAAE,EAAE;oBAChB,OAAO,EAAE,CAAC;oBACV,MAAM,CAAC,CAAC,CAAC,CAAC;gBACd,CAAC,CAAC,CAAC;YACX,CAAC;iBAAM,CAAC;gBACJ,OAAO,EAAE,CAAC;YACd,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC,eAAe,EAAE,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAEhE,OAAO,EAAE,CAAC;IACd,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
1
+ {"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAGhD,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAK/D,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,4BAA4B,EAAE,MAAM,YAAY,CAAC;AA4B1D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,YAAa,SAAQ,YAAyC;IACvE,iBAAiB,GAAG,MAAM,CAAC,kBAAkB,CAAC,CAAC;IAE/C,aAAa;IACb,YAAY,UAA4D,EAAE;QACtE,KAAK,CAAC;YACF,GAAG,OAAO;YACV,sBAAsB,EAAE,GAAG,EAAE,CACzB,eAAe,CAAC,MAAM,EAAmB,CAAC,OAAO,CAAC;gBAC9C,MAAM,EAAE,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC;gBACxC,OAAO,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC;aAC3C,CAAC;SACT,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,OAAwB;QACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;YAC1C,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,GAAG;YACxB,OAAO,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YAC/B,QAAQ,EAAE,OAAO,CAAC,SAAS,EAAE,GAAG;SACnC,CAAC,CAAC;QAEH,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,4BAA4B,CAAC,QAAQ,CAAC,CAAC;QAE3E,OAAO,CAAC,OAAO,CAAC,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC;QAEnC,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;YACrC,MAAM,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,QAAQ,CAAC,gBAAgB,CAAC;YAEzD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;gBAClB,OAAO,CAAC,GAAG,CAAC,KAAK,CACb,cAAc,WAAW,aAAa,KAAK,IAAI,CAAC,eAAe,OAAO,CAAC,OAAO,CAAC,GAAG,GAAG,CACxF,CAAC;YACN,CAAC;QACL,CAAC,EAAE,IAAI,CAAC,CAAC;QAET,MAAM,gBAAgB,GAAG;YACrB,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,EAAE,eAAe,EAAsB;YACjE,OAAO,EAAE,OAAO,CAAC,OAAiC;YAClD,QAAQ;YACR,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC/B,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,IAAI,IAAI;gBACJ,OAAO,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACnC,CAAC;SACJ,CAAC;QAEF,OAAO,gBAAgB,CAAC;IAC5B,CAAC;IAEO,KAAK,CAAC,eAAe,CACzB,OAAwE,EACxE,KAAe;QAEf,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,eAAe,CAAC,CAAC;QAE/D,uGAAuG;QACvG,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,IAAI,OAAO,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACvD,IAAI,CAAC;oBACD,MAAM,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;gBACnC,CAAC;gBAAC,MAAM,CAAC;oBACL,uEAAuE;gBAC3E,CAAC;YACL,CAAC;QACL,CAAC;aAAM,CAAC;YACJ,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QAC7B,CAAC;IACL,CAAC;CACJ;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
@@ -1,14 +1,13 @@
1
1
  import type { IncomingMessage } from 'node:http';
2
2
  import type { Readable } from 'node:stream';
3
- import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, ProxyConfiguration, Request, RequestHandler, RouterRoutes, Session } from '@crawlee/basic';
4
- import { BasicCrawler, Configuration, CrawlerExtension } from '@crawlee/basic';
5
- import type { HttpResponse } from '@crawlee/core';
3
+ import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, Request, RequestHandler, RequireContextPipeline, RouterRoutes, Session } from '@crawlee/basic';
4
+ import { BasicCrawler, Configuration, ContextPipeline } from '@crawlee/basic';
5
+ import type { HttpResponse, LoadedRequest } from '@crawlee/core';
6
6
  import type { Awaitable, Dictionary } from '@crawlee/types';
7
7
  import { type CheerioRoot } from '@crawlee/utils';
8
8
  import type { RequestLike, ResponseLike } from 'content-type';
9
9
  // @ts-ignore optional peer dependency or compatibility with es2022
10
10
  import type { Method, OptionsInit } from 'got-scraping';
11
- import { ObjectPredicate } from 'ow';
12
11
  import type { JsonValue } from 'type-fest';
13
12
  /**
14
13
  * TODO exists for BC within HttpCrawler - replace completely with StreamingHttpResponse in 4.0
@@ -19,7 +18,7 @@ export type PlainResponse = Omit<HttpResponse, 'body'> & IncomingMessage & {
19
18
  };
20
19
  export type HttpErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
21
20
  JSONData extends JsonValue = any> = ErrorHandler<HttpCrawlingContext<UserData, JSONData>>;
22
- export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext> extends BasicCrawlerOptions<Context> {
21
+ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext, ExtendedContext extends Context = Context> extends BasicCrawlerOptions<Context, ExtendedContext> {
23
22
  /**
24
23
  * Timeout in which the HTTP request to the resource needs to finish, given in seconds.
25
24
  */
@@ -28,12 +27,6 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
28
27
  * If set to true, SSL certificate errors will be ignored.
29
28
  */
30
29
  ignoreSslErrors?: boolean;
31
- /**
32
- * If set, this crawler will be configured for all connections to use
33
- * [Apify Proxy](https://console.apify.com/proxy) or your own Proxy URLs provided and rotated according to the configuration.
34
- * For more information, see the [documentation](https://docs.apify.com/proxy).
35
- */
36
- proxyConfiguration?: ProxyConfiguration;
37
30
  /**
38
31
  * Async functions that are sequentially evaluated before the navigation. Good for setting additional cookies
39
32
  * or browser properties before navigation. The function accepts two parameters, `crawlingContext` and `gotOptions`,
@@ -50,7 +43,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
50
43
  * Modyfing `pageOptions` is supported only in Playwright incognito.
51
44
  * See {@link PrePageCreateHook}
52
45
  */
53
- preNavigationHooks?: InternalHttpHook<Context>[];
46
+ preNavigationHooks?: InternalHttpHook<CrawlingContext>[];
54
47
  /**
55
48
  * Async functions that are sequentially evaluated after the navigation. Good for checking if the navigation was successful.
56
49
  * The function accepts `crawlingContext` as the only parameter.
@@ -63,7 +56,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
63
56
  * ]
64
57
  * ```
65
58
  */
66
- postNavigationHooks?: InternalHttpHook<Context>[];
59
+ postNavigationHooks?: ((crawlingContext: CrawlingContextWithReponse) => Awaitable<void>)[];
67
60
  /**
68
61
  * An array of [MIME types](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types)
69
62
  * you want the crawler to load and process. By default, only `text/html` and `application/xhtml+xml` MIME types are supported.
@@ -115,12 +108,21 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
115
108
  export type InternalHttpHook<Context> = (crawlingContext: Context, gotOptions: OptionsInit) => Awaitable<void>;
116
109
  export type HttpHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
117
110
  JSONData extends JsonValue = any> = InternalHttpHook<HttpCrawlingContext<UserData, JSONData>>;
111
+ interface CrawlingContextWithReponse<UserData extends Dictionary = any> extends CrawlingContext<UserData> {
112
+ /**
113
+ * The request object that was successfully loaded and navigated to, including the {@link Request.loadedUrl|`loadedUrl`} property.
114
+ */
115
+ request: LoadedRequest<Request<UserData>>;
116
+ /**
117
+ * The HTTP response object containing status code, headers, and other response metadata.
118
+ */
119
+ response: PlainResponse;
120
+ }
118
121
  /**
119
122
  * @internal
120
123
  */
121
124
  export interface InternalHttpCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
122
- JSONData extends JsonValue = any, // with default to Dictionary we cant use a typed router in untyped crawler
123
- Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
125
+ JSONData extends JsonValue = any> extends CrawlingContextWithReponse<UserData> {
124
126
  /**
125
127
  * The request body of the web page.
126
128
  * The type depends on the `Content-Type` header of the web page:
@@ -139,7 +141,6 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
139
141
  type: string;
140
142
  encoding: BufferEncoding;
141
143
  };
142
- response: PlainResponse;
143
144
  /**
144
145
  * Wait for an element matching the selector to appear. Timeout is ignored.
145
146
  *
@@ -167,7 +168,7 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
167
168
  */
168
169
  parseWithCheerio(selector?: string, timeoutMs?: number): Promise<CheerioRoot>;
169
170
  }
170
- export interface HttpCrawlingContext<UserData extends Dictionary = any, JSONData extends JsonValue = any> extends InternalHttpCrawlingContext<UserData, JSONData, HttpCrawler<HttpCrawlingContext<UserData, JSONData>>> {
171
+ export interface HttpCrawlingContext<UserData extends Dictionary = any, JSONData extends JsonValue = any> extends InternalHttpCrawlingContext<UserData, JSONData> {
171
172
  }
172
173
  export type HttpRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
173
174
  JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData, JSONData>>;
@@ -238,16 +239,10 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
238
239
  * ```
239
240
  * @category Crawlers
240
241
  */
241
- export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, HttpCrawler<Context>>> extends BasicCrawler<Context> {
242
+ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any> = InternalHttpCrawlingContext, ContextExtension = {}, ExtendedContext extends Context = Context & ContextExtension> extends BasicCrawler<Context, ExtendedContext> {
242
243
  readonly config: Configuration;
243
- /**
244
- * A reference to the underlying {@link ProxyConfiguration} class that manages the crawler's proxies.
245
- * Only available if used by the crawler.
246
- */
247
- proxyConfiguration?: ProxyConfiguration;
248
- protected userRequestHandlerTimeoutMillis: number;
249
- protected preNavigationHooks: InternalHttpHook<Context>[];
250
- protected postNavigationHooks: InternalHttpHook<Context>[];
244
+ protected preNavigationHooks: InternalHttpHook<CrawlingContext>[];
245
+ protected postNavigationHooks: ((crawlingContext: CrawlingContextWithReponse) => Awaitable<void>)[];
251
246
  protected persistCookiesPerSession: boolean;
252
247
  protected navigationTimeoutMillis: number;
253
248
  protected ignoreSslErrors: boolean;
@@ -267,8 +262,6 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
267
262
  suggestResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
268
263
  // @ts-ignore optional peer dependency or compatibility with es2022
269
264
  forceResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
270
- // @ts-ignore optional peer dependency or compatibility with es2022
271
- proxyConfiguration: ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
272
265
  // @ts-ignore optional peer dependency or compatibility with es2022
273
266
  persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
274
267
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -279,6 +272,10 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
279
272
  preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
280
273
  // @ts-ignore optional peer dependency or compatibility with es2022
281
274
  postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
275
+ // @ts-ignore optional peer dependency or compatibility with es2022
276
+ contextPipelineBuilder: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
277
+ // @ts-ignore optional peer dependency or compatibility with es2022
278
+ extendContext: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
282
279
  // @ts-ignore optional peer dependency or compatibility with es2022
283
280
  requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
284
281
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -305,6 +302,8 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
305
302
  sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
306
303
  // @ts-ignore optional peer dependency or compatibility with es2022
307
304
  useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
305
+ // @ts-ignore optional peer dependency or compatibility with es2022
306
+ proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
308
307
  // @ts-ignore optional peer dependency or compatibility with es2022
309
308
  statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
310
309
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -335,19 +334,12 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
335
334
  /**
336
335
  * All `HttpCrawlerOptions` parameters are passed via an options object.
337
336
  */
338
- constructor(options?: HttpCrawlerOptions<Context>, config?: Configuration);
339
- /**
340
- * **EXPERIMENTAL**
341
- * Function for attaching CrawlerExtensions such as the Unblockers.
342
- * @param extension Crawler extension that overrides the crawler configuration.
343
- */
344
- use(extension: CrawlerExtension): void;
345
- /**
346
- * Wrapper around requestHandler that opens and closes pages etc.
347
- */
348
- protected _runRequestHandler(crawlingContext: Context): Promise<void>;
349
- protected isRequestBlocked(crawlingContext: Context): Promise<string | false>;
350
- protected _handleNavigation(crawlingContext: Context): Promise<void>;
337
+ constructor(options?: HttpCrawlerOptions<Context, ExtendedContext> & RequireContextPipeline<InternalHttpCrawlingContext, Context>, config?: Configuration);
338
+ protected buildContextPipeline(): ContextPipeline<CrawlingContext, InternalHttpCrawlingContext>;
339
+ private makeHttpRequest;
340
+ private processHttpResponse;
341
+ private handleBlockedRequestByContent;
342
+ protected isRequestBlocked(crawlingContext: InternalHttpCrawlingContext): Promise<string | false>;
351
343
  /**
352
344
  * Sets the cookie header to `gotOptions` based on the provided request and session headers, as well as any changes that occurred due to hooks.
353
345
  */
@@ -361,38 +353,19 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
361
353
  /**
362
354
  * Encodes and parses response according to the provided content type
363
355
  */
364
- protected _parseResponse(request: Request, responseStream: IncomingMessage, crawlingContext: Context): Promise<(Partial<Context> & {
365
- isXml: boolean;
366
- response: IncomingMessage;
367
- contentType: {
368
- type: string;
369
- encoding: BufferEncoding;
370
- };
371
- }) | {
372
- body: Buffer<ArrayBufferLike>;
373
- response: IncomingMessage;
374
- contentType: {
375
- type: string;
376
- encoding: BufferEncoding;
377
- };
378
- enqueueLinks: () => Promise<{
379
- processedRequests: never[];
380
- unprocessedRequests: never[];
381
- }>;
382
- }>;
383
- protected _parseHTML(response: IncomingMessage, _isXml: boolean, _crawlingContext: Context): Promise<Partial<Context>>;
356
+ private _parseResponse;
384
357
  /**
385
358
  * Combines the provided `requestOptions` with mandatory (non-overridable) values.
386
359
  */
387
360
  protected _getRequestOptions(request: Request, session?: Session, proxyUrl?: string, gotOptions?: OptionsInit): {
361
+ url?: string | URL | undefined;
362
+ // @ts-ignore optional peer dependency or compatibility with es2022
363
+ headers?: import("got-scraping").Headers | undefined;
388
364
  // @ts-ignore optional peer dependency or compatibility with es2022
389
365
  body?: string | Buffer | Readable | Generator | AsyncGenerator | import("form-data-encoder").FormDataLike | undefined;
390
366
  json?: unknown;
391
367
  // @ts-ignore optional peer dependency or compatibility with es2022
392
368
  request?: import("got-scraping").RequestFunction | undefined;
393
- url?: string | URL | undefined;
394
- // @ts-ignore optional peer dependency or compatibility with es2022
395
- headers?: import("got-scraping").Headers | undefined;
396
369
  // @ts-ignore optional peer dependency or compatibility with es2022
397
370
  agent?: import("got-scraping").Agents | undefined;
398
371
  // @ts-ignore optional peer dependency or compatibility with es2022
@@ -1 +1 @@
1
- {"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAuB,eAAe,EAAE,MAAM,WAAW,CAAC;AAEtE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAG5C,OAAO,KAAK,EAER,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,sBAAsB,EAEtB,kBAAkB,EAClB,OAAO,EACP,cAAc,EACd,YAAY,EACZ,OAAO,EACV,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAEH,YAAY,EACZ,aAAa,EACb,gBAAgB,EAOnB,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,EAAE,YAAY,EAAyB,MAAM,eAAe,CAAC;AACzE,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE9D,OAAO,KAAK,EAAE,MAAM,EAAE,WAAW,EAAqC,MAAM,cAAc,CAAC;AAG3F,OAAW,EAAE,eAAe,EAAE,MAAM,IAAI,CAAC;AACzC,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAO3C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,GAClD,eAAe,GAAG;IACd,IAAI,CAAC,EAAE,OAAO,CAAC;CAClB,CAAC;AAkBN,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,YAAY,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE1D,MAAM,WAAW,kBAAkB,CAAC,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,CACzG,SAAQ,mBAAmB,CAAC,OAAO,CAAC;IACpC;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;;;;;;;;;;;;;;OAeG;IACH,kBAAkB,CAAC,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAEjD;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAElD;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE/B;;;;;;;;;;OAUG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAEjC;;;;;;;;OAQG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,OAAO,CAAC;IAEnC;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtC;;;OAGG;IACH,8BAA8B,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,CAAC,OAAO,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,UAAU,EAAE,WAAW,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/G,MAAM,MAAM,QAAQ,CAChB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,gBAAgB,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,EAAE,2EAA2E;AAC7G,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAC5B,SAAQ,eAAe,CAAC,OAAO,EAAE,QAAQ,CAAC;IACxC;;;;;OAKG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAEtB;;OAEG;IACH,IAAI,EAAE,QAAQ,CAAC;IAEf;;OAEG;IACH,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;IACxD,QAAQ,EAAE,aAAa,CAAC;IAExB;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,WAAW,mBAAmB,CAAC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,QAAQ,SAAS,SAAS,GAAG,GAAG,CACpG,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;CAAG;AAEpH,MAAM,MAAM,kBAAkB,CAC1B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,cAAc,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkEG;AACH,qBAAa,WAAW,CACpB,OAAO,SAAS,2BAA2B,CAAC,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAC7E,SAAQ,YAAY,CAAC,OAAO,CAAC;aA0CL,MAAM;IAzC5B;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC,SAAS,CAAC,+BAA+B,EAAE,MAAM,CAAC;IAClD,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC1D,SAAS,CAAC,mBAAmB,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC3D,SAAS,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC5C,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC;IAC1C,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAC3C,SAAS,CAAC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IACzC,SAAS,CAAC,8BAA8B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACtD,SAAS,CAAC,0BAA0B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,QAAQ,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnD,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAgBpC;IAEF;;OAEG;gBAEC,OAAO,GAAE,kBAAkB,CAAC,OAAO,CAAM,EACvB,MAAM,gBAAkC;IA0E9D;;;;OAIG;IACH,GAAG,CAAC,SAAS,EAAE,gBAAgB;IAkC/B;;OAEG;cACsB,kBAAkB,CAAC,eAAe,EAAE,OAAO;cAuF3C,gBAAgB,CAAC,eAAe,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;cAa5E,iBAAiB,CAAC,eAAe,EAAE,OAAO;IA6B1D;;OAEG;IACH,SAAS,CAAC,aAAa,CACnB,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,eAAe,EACrC,UAAU,EAAE,WAAW,EACvB,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM;IAkD3B;;;;OAIG;cACa,gBAAgB,CAAC,EAC7B,OAAO,EACP,OAAO,EACP,QAAQ,EACR,UAAU,GACb,EAAE,sBAAsB,GAAG,OAAO,CAAC,aAAa,CAAC;IAwBlD;;OAEG;cACa,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,eAAe,EAAE,OAAO;;;;;;;;;;;;;;;;;;;cA8C1F,UAAU,CACtB,QAAQ,EAAE,eAAe,EACzB,MAAM,EAAE,OAAO,EACf,gBAAgB,EAAE,OAAO,GAC1B,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAM5B;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBACpB,IAAI;;IAkC7F,SAAS,CAAC,eAAe,CACrB,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,eAAe,EACzB,QAAQ,EAAE,cAAc,GACzB;QACC,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,eAAe,CAAC;KAC7B;IAqCD;;OAEG;IACH,SAAS,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,CAAC,MAAM,GAAG,WAAW,GAAG,YAAY,CAAC,EAAE;IAgBhG;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,OAAO;IAKjD,OAAO,CAAC,oBAAoB;IAkB5B;;OAEG;IACH,OAAO,CAAC,iBAAiB,CAuBvB;CACL;AAED,UAAU,sBAAsB;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,WAAW,CAAC;CAC3B;AAkFD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,mBAAmB,GAAG,mBAAmB,EACzD,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
1
+ {"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAuB,eAAe,EAAE,MAAM,WAAW,CAAC;AACtE,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAG5C,OAAO,KAAK,EAER,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,sBAAsB,EACtB,OAAO,EACP,cAAc,EACd,sBAAsB,EACtB,YAAY,EACZ,OAAO,EACV,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACH,YAAY,EACZ,aAAa,EACb,eAAe,EAMlB,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAyB,MAAM,eAAe,CAAC;AACxF,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE9D,OAAO,KAAK,EAAE,MAAM,EAAE,WAAW,EAAqC,MAAM,cAAc,CAAC;AAG3F,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAS3C;;;GAGG;AACH,MAAM,MAAM,aAAa,GAAG,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,GAClD,eAAe,GAAG;IACd,IAAI,CAAC,EAAE,OAAO,CAAC;CAClB,CAAC;AAkBN,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,YAAY,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE1D,MAAM,WAAW,kBAAkB,CAC/B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,eAAe,SAAS,OAAO,GAAG,OAAO,CAC3C,SAAQ,mBAAmB,CAAC,OAAO,EAAE,eAAe,CAAC;IACnD;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;;;;;;;;;;;;OAeG;IACH,kBAAkB,CAAC,EAAE,gBAAgB,CAAC,eAAe,CAAC,EAAE,CAAC;IAEzD;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,EAAE,CAAC,CAAC,eAAe,EAAE,0BAA0B,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;IAE3F;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE/B;;;;;;;;;;OAUG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAEjC;;;;;;;;OAQG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,OAAO,CAAC;IAEnC;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtC;;;OAGG;IACH,8BAA8B,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,CAAC,OAAO,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,UAAU,EAAE,WAAW,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAE/G,MAAM,MAAM,QAAQ,CAChB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,gBAAgB,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,UAAU,0BAA0B,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B;;OAEG;IACH,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAE1C;;OAEG;IACH,QAAQ,EAAE,aAAa,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,CAClC,SAAQ,0BAA0B,CAAC,QAAQ,CAAC;IAC1C;;;;;OAKG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAEtB;;OAEG;IACH,IAAI,EAAE,QAAQ,CAAC;IAEf;;OAEG;IACH,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;IAExD;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,WAAW,mBAAmB,CAAC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,QAAQ,SAAS,SAAS,GAAG,GAAG,CACpG,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;CAAG;AAE9D,MAAM,MAAM,kBAAkB,CAC1B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,cAAc,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkEG;AACH,qBAAa,WAAW,CACpB,OAAO,SAAS,2BAA2B,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,2BAA2B,EACnF,gBAAgB,GAAG,EAAE,EACrB,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB,CAC9D,SAAQ,YAAY,CAAC,OAAO,EAAE,eAAe,CAAC;aAmCtB,MAAM;IAlC5B,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,eAAe,CAAC,EAAE,CAAC;IAClE,SAAS,CAAC,mBAAmB,EAAE,CAAC,CAAC,eAAe,EAAE,0BAA0B,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;IACpG,SAAS,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC5C,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC;IAC1C,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAC3C,SAAS,CAAC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IACzC,SAAS,CAAC,8BAA8B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACtD,SAAS,CAAC,0BAA0B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,QAAQ,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnD,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAepC;IAEF;;OAEG;gBAEC,OAAO,GAAE,kBAAkB,CAAC,OAAO,EAAE,eAAe,CAAC,GACjD,sBAAsB,CAAC,2BAA2B,EAAE,OAAO,CAAa,EAC1D,MAAM,gBAAkC;IAkE9D,SAAS,CAAC,oBAAoB,IAAI,eAAe,CAAC,eAAe,EAAE,2BAA2B,CAAC;YASjF,eAAe;YAmDf,mBAAmB;YAuEnB,6BAA6B;cAQ3B,gBAAgB,CAAC,eAAe,EAAE,2BAA2B,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;IAavG;;OAEG;IACH,SAAS,CAAC,aAAa,CACnB,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,eAAe,EACrC,UAAU,EAAE,WAAW,EACvB,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM;IAkD3B;;;;OAIG;cACa,gBAAgB,CAAC,EAC7B,OAAO,EACP,OAAO,EACP,QAAQ,EACR,UAAU,GACb,EAAE,sBAAsB,GAAG,OAAO,CAAC,aAAa,CAAC;IAwBlD;;OAEG;YACW,cAAc;IA2C5B;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;kBACpB,IAAI;;IA+B7F,SAAS,CAAC,eAAe,CACrB,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,eAAe,EACzB,QAAQ,EAAE,cAAc,GACzB;QACC,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,eAAe,CAAC;KAC7B;IAqCD;;OAEG;IACH,SAAS,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,CAAC,MAAM,GAAG,WAAW,GAAG,YAAY,CAAC,EAAE;IAgBhG;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,OAAO;IAKjD,OAAO,CAAC,oBAAoB;IAkB5B;;OAEG;IACH,OAAO,CAAC,iBAAiB,CAuBvB;CACL;AAED,UAAU,sBAAsB;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,WAAW,CAAC;CAC3B;AA4CD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,mBAAmB,GAAG,mBAAmB,EACzD,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}