@crawlee/http 3.0.3-beta.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +201 -0
- package/README.md +158 -0
- package/index.d.ts +3 -0
- package/index.d.ts.map +1 -0
- package/index.js +6 -0
- package/index.js.map +1 -0
- package/index.mjs +73 -0
- package/internals/http-crawler.d.ts +344 -0
- package/internals/http-crawler.d.ts.map +1 -0
- package/internals/http-crawler.js +629 -0
- package/internals/http-crawler.js.map +1 -0
- package/package.json +67 -0
- package/tsconfig.build.tsbuildinfo +1 -0
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
/// <reference types="node" />
|
|
3
|
+
import type { BasicCrawlerOptions, ErrorHandler, RequestHandler, CrawlingContext, ProxyConfiguration, Request, Session } from '@crawlee/basic';
|
|
4
|
+
import { BasicCrawler, CrawlerExtension, Configuration } from '@crawlee/basic';
|
|
5
|
+
import type { Awaitable, Dictionary } from '@crawlee/types';
|
|
6
|
+
import type { RequestLike, ResponseLike } from 'content-type';
|
|
7
|
+
import type { OptionsInit, Response as GotResponse, GotOptionsInit } from 'got-scraping';
|
|
8
|
+
import type { IncomingMessage } from 'node:http';
|
|
9
|
+
export declare type HttpErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
10
|
+
JSONData extends Dictionary = Dictionary> = ErrorHandler<HttpCrawlingContext<UserData, JSONData>>;
|
|
11
|
+
export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext> extends BasicCrawlerOptions<Context> {
|
|
12
|
+
/**
|
|
13
|
+
* An alias for {@apilink HttpCrawlerOptions.requestHandler}
|
|
14
|
+
* Soon to be removed, use `requestHandler` instead.
|
|
15
|
+
* @deprecated
|
|
16
|
+
*/
|
|
17
|
+
handlePageFunction?: HttpCrawlerOptions<Context>['requestHandler'];
|
|
18
|
+
/**
|
|
19
|
+
* Timeout in which the HTTP request to the resource needs to finish, given in seconds.
|
|
20
|
+
*/
|
|
21
|
+
navigationTimeoutSecs?: number;
|
|
22
|
+
/**
|
|
23
|
+
* If set to true, SSL certificate errors will be ignored.
|
|
24
|
+
*/
|
|
25
|
+
ignoreSslErrors?: boolean;
|
|
26
|
+
/**
|
|
27
|
+
* If set, this crawler will be configured for all connections to use
|
|
28
|
+
* [Apify Proxy](https://console.apify.com/proxy) or your own Proxy URLs provided and rotated according to the configuration.
|
|
29
|
+
* For more information, see the [documentation](https://docs.apify.com/proxy).
|
|
30
|
+
*/
|
|
31
|
+
proxyConfiguration?: ProxyConfiguration;
|
|
32
|
+
/**
|
|
33
|
+
* Async functions that are sequentially evaluated before the navigation. Good for setting additional cookies
|
|
34
|
+
* or browser properties before navigation. The function accepts two parameters, `crawlingContext` and `gotOptions`,
|
|
35
|
+
* which are passed to the `requestAsBrowser()` function the crawler calls to navigate.
|
|
36
|
+
* Example:
|
|
37
|
+
* ```
|
|
38
|
+
* preNavigationHooks: [
|
|
39
|
+
* async (crawlingContext, gotOptions) => {
|
|
40
|
+
* // ...
|
|
41
|
+
* },
|
|
42
|
+
* ]
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
preNavigationHooks?: InternalHttpHook<Context>[];
|
|
46
|
+
/**
|
|
47
|
+
* Async functions that are sequentially evaluated after the navigation. Good for checking if the navigation was successful.
|
|
48
|
+
* The function accepts `crawlingContext` as the only parameter.
|
|
49
|
+
* Example:
|
|
50
|
+
* ```
|
|
51
|
+
* postNavigationHooks: [
|
|
52
|
+
* async (crawlingContext) => {
|
|
53
|
+
* // ...
|
|
54
|
+
* },
|
|
55
|
+
* ]
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
postNavigationHooks?: InternalHttpHook<Context>[];
|
|
59
|
+
/**
|
|
60
|
+
* An array of [MIME types](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types)
|
|
61
|
+
* you want the crawler to load and process. By default, only `text/html` and `application/xhtml+xml` MIME types are supported.
|
|
62
|
+
*/
|
|
63
|
+
additionalMimeTypes?: string[];
|
|
64
|
+
/**
|
|
65
|
+
* By default this crawler will extract correct encoding from the HTTP response headers.
|
|
66
|
+
* Sadly, there are some websites which use invalid headers. Those are encoded using the UTF-8 encoding.
|
|
67
|
+
* If those sites actually use a different encoding, the response will be corrupted. You can use
|
|
68
|
+
* `suggestResponseEncoding` to fall back to a certain encoding, if you know that your target website uses it.
|
|
69
|
+
* To force a certain encoding, disregarding the response headers, use {@apilink HttpCrawlerOptions.forceResponseEncoding}
|
|
70
|
+
* ```
|
|
71
|
+
* // Will fall back to windows-1250 encoding if none found
|
|
72
|
+
* suggestResponseEncoding: 'windows-1250'
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
suggestResponseEncoding?: string;
|
|
76
|
+
/**
|
|
77
|
+
* By default this crawler will extract correct encoding from the HTTP response headers. Use `forceResponseEncoding`
|
|
78
|
+
* to force a certain encoding, disregarding the response headers.
|
|
79
|
+
* To only provide a default for missing encodings, use {@apilink HttpCrawlerOptions.suggestResponseEncoding}
|
|
80
|
+
* ```
|
|
81
|
+
* // Will force windows-1250 encoding even if headers say otherwise
|
|
82
|
+
* forceResponseEncoding: 'windows-1250'
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
forceResponseEncoding?: string;
|
|
86
|
+
/**
|
|
87
|
+
* Automatically saves cookies to Session. Works only if Session Pool is used.
|
|
88
|
+
*
|
|
89
|
+
* It parses cookie from response "set-cookie" header saves or updates cookies for session and once the session is used for next request.
|
|
90
|
+
* It passes the "Cookie" header to the request with the session cookies.
|
|
91
|
+
*/
|
|
92
|
+
persistCookiesPerSession?: boolean;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* @internal
|
|
96
|
+
*/
|
|
97
|
+
export declare type InternalHttpHook<Context> = (crawlingContext: Context, gotOptions: OptionsInit) => Awaitable<void>;
|
|
98
|
+
export declare type HttpHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
99
|
+
JSONData extends Dictionary = Dictionary> = InternalHttpHook<HttpCrawlingContext<UserData, JSONData>>;
|
|
100
|
+
/**
|
|
101
|
+
* @internal
|
|
102
|
+
*/
|
|
103
|
+
export interface InternalHttpCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
104
|
+
JSONData extends Dictionary = Dictionary, Crawler = HttpCrawler<any>> extends CrawlingContext<UserData> {
|
|
105
|
+
/**
|
|
106
|
+
* The request body of the web page.
|
|
107
|
+
* The type depends on the `Content-Type` header of the web page:
|
|
108
|
+
* - String for `text/html`, `application/xhtml+xml`, `application/xml` MIME content types
|
|
109
|
+
* - Buffer for others MIME content types
|
|
110
|
+
*/
|
|
111
|
+
body: (string | Buffer);
|
|
112
|
+
/**
|
|
113
|
+
* The parsed object from JSON string if the response contains the content type application/json.
|
|
114
|
+
*/
|
|
115
|
+
json: JSONData;
|
|
116
|
+
/**
|
|
117
|
+
* Parsed `Content-Type header: { type, encoding }`.
|
|
118
|
+
*/
|
|
119
|
+
contentType: {
|
|
120
|
+
type: string;
|
|
121
|
+
encoding: BufferEncoding;
|
|
122
|
+
};
|
|
123
|
+
crawler: Crawler;
|
|
124
|
+
response: IncomingMessage;
|
|
125
|
+
sendRequest: (overrideOptions?: Partial<GotOptionsInit>) => Promise<GotResponse<string>>;
|
|
126
|
+
}
|
|
127
|
+
export interface HttpCrawlingContext<UserData = any, JSONData = any> extends InternalHttpCrawlingContext<UserData, JSONData, HttpCrawler<HttpCrawlingContext<UserData, JSONData>>> {
|
|
128
|
+
}
|
|
129
|
+
export declare type HttpRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
130
|
+
JSONData extends Dictionary = Dictionary> = RequestHandler<HttpCrawlingContext<UserData, JSONData>>;
|
|
131
|
+
/**
|
|
132
|
+
* Provides a framework for the parallel crawling of web pages using plain HTTP requests.
|
|
133
|
+
* The URLs to crawl are fed either from a static list of URLs
|
|
134
|
+
* or from a dynamic queue of URLs enabling recursive crawling of websites.
|
|
135
|
+
*
|
|
136
|
+
* It is very fast and efficient on data bandwidth. However, if the target website requires JavaScript
|
|
137
|
+
* to display the content, you might need to use {@apilink PuppeteerCrawler} or {@apilink PlaywrightCrawler} instead,
|
|
138
|
+
* because it loads the pages using full-featured headless Chrome browser.
|
|
139
|
+
*
|
|
140
|
+
* This crawler downloads each URL using a plain HTTP request and doesn't do any HTML parsing.
|
|
141
|
+
*
|
|
142
|
+
* The source URLs are represented using {@apilink Request} objects that are fed from
|
|
143
|
+
* {@apilink RequestList} or {@apilink RequestQueue} instances provided by the {@apilink HttpCrawlerOptions.requestList}
|
|
144
|
+
* or {@apilink HttpCrawlerOptions.requestQueue} constructor options, respectively.
|
|
145
|
+
*
|
|
146
|
+
* If both {@apilink HttpCrawlerOptions.requestList} and {@apilink HttpCrawlerOptions.requestQueue} are used,
|
|
147
|
+
* the instance first processes URLs from the {@apilink RequestList} and automatically enqueues all of them
|
|
148
|
+
* to {@apilink RequestQueue} before it starts their processing. This ensures that a single URL is not crawled multiple times.
|
|
149
|
+
*
|
|
150
|
+
* The crawler finishes when there are no more {@apilink Request} objects to crawl.
|
|
151
|
+
*
|
|
152
|
+
* We can use the `preNavigationHooks` to adjust `gotOptions`:
|
|
153
|
+
*
|
|
154
|
+
* ```javascript
|
|
155
|
+
* preNavigationHooks: [
|
|
156
|
+
* (crawlingContext, gotOptions) => {
|
|
157
|
+
* // ...
|
|
158
|
+
* },
|
|
159
|
+
* ]
|
|
160
|
+
* ```
|
|
161
|
+
*
|
|
162
|
+
* By default, this crawler only processes web pages with the `text/html`
|
|
163
|
+
* and `application/xhtml+xml` MIME content types (as reported by the `Content-Type` HTTP header),
|
|
164
|
+
* and skips pages with other content types. If you want the crawler to process other content types,
|
|
165
|
+
* use the {@apilink HttpCrawlerOptions.additionalMimeTypes} constructor option.
|
|
166
|
+
* Beware that the parsing behavior differs for HTML, XML, JSON and other types of content.
|
|
167
|
+
* For details, see {@apilink HttpCrawlerOptions.requestHandler}.
|
|
168
|
+
*
|
|
169
|
+
* New requests are only dispatched when there is enough free CPU and memory available,
|
|
170
|
+
* using the functionality provided by the {@apilink AutoscaledPool} class.
|
|
171
|
+
* All {@apilink AutoscaledPool} configuration options can be passed to the `autoscaledPoolOptions`
|
|
172
|
+
* parameter of the constructor. For user convenience, the `minConcurrency` and `maxConcurrency`
|
|
173
|
+
* {@apilink AutoscaledPool} options are available directly in the constructor.
|
|
174
|
+
*
|
|
175
|
+
* **Example usage:**
|
|
176
|
+
*
|
|
177
|
+
* ```javascript
|
|
178
|
+
* import { HttpCrawler, Dataset } from '@crawlee/http';
|
|
179
|
+
*
|
|
180
|
+
* const crawler = new HttpCrawler({
|
|
181
|
+
* requestList,
|
|
182
|
+
* async requestHandler({ request, response, body, contentType }) {
|
|
183
|
+
* // Save the data to dataset.
|
|
184
|
+
* await Dataset.pushData({
|
|
185
|
+
* url: request.url,
|
|
186
|
+
* html: body,
|
|
187
|
+
* });
|
|
188
|
+
* },
|
|
189
|
+
* });
|
|
190
|
+
*
|
|
191
|
+
* await crawler.run([
|
|
192
|
+
* 'http://www.example.com/page-1',
|
|
193
|
+
* 'http://www.example.com/page-2',
|
|
194
|
+
* ]);
|
|
195
|
+
* ```
|
|
196
|
+
* @category Crawlers
|
|
197
|
+
*/
|
|
198
|
+
export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any, HttpCrawler<Context>>> extends BasicCrawler<Context> {
|
|
199
|
+
readonly config: Configuration;
|
|
200
|
+
/**
|
|
201
|
+
* A reference to the underlying {@apilink ProxyConfiguration} class that manages the crawler's proxies.
|
|
202
|
+
* Only available if used by the crawler.
|
|
203
|
+
*/
|
|
204
|
+
proxyConfiguration?: ProxyConfiguration;
|
|
205
|
+
protected userRequestHandlerTimeoutMillis: number;
|
|
206
|
+
protected preNavigationHooks: InternalHttpHook<Context>[];
|
|
207
|
+
protected postNavigationHooks: InternalHttpHook<Context>[];
|
|
208
|
+
protected persistCookiesPerSession: boolean;
|
|
209
|
+
protected navigationTimeoutMillis: number;
|
|
210
|
+
protected ignoreSslErrors: boolean;
|
|
211
|
+
protected suggestResponseEncoding?: string;
|
|
212
|
+
protected forceResponseEncoding?: string;
|
|
213
|
+
protected readonly supportedMimeTypes: Set<string>;
|
|
214
|
+
protected static optionsShape: {
|
|
215
|
+
handlePageFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
216
|
+
navigationTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
217
|
+
ignoreSslErrors: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
218
|
+
additionalMimeTypes: import("ow").ArrayPredicate<string>;
|
|
219
|
+
suggestResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
220
|
+
forceResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
221
|
+
proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
222
|
+
persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
223
|
+
preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
|
|
224
|
+
postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
|
|
225
|
+
requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
226
|
+
requestQueue: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
227
|
+
requestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
228
|
+
handleRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
229
|
+
requestHandlerTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
230
|
+
handleRequestTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
231
|
+
errorHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
232
|
+
failedRequestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
233
|
+
handleFailedRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
234
|
+
maxRequestRetries: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
235
|
+
maxRequestsPerCrawl: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
236
|
+
autoscaledPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
237
|
+
sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
238
|
+
useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
239
|
+
minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
240
|
+
maxConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
241
|
+
maxRequestsPerMinute: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
242
|
+
log: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
243
|
+
};
|
|
244
|
+
/**
|
|
245
|
+
* All `HttpCrawlerOptions` parameters are passed via an options object.
|
|
246
|
+
*/
|
|
247
|
+
constructor(options?: HttpCrawlerOptions<Context>, config?: Configuration);
|
|
248
|
+
/**
|
|
249
|
+
* **EXPERIMENTAL**
|
|
250
|
+
* Function for attaching CrawlerExtensions such as the Unblockers.
|
|
251
|
+
* @param extension Crawler extension that overrides the crawler configuration.
|
|
252
|
+
*/
|
|
253
|
+
use(extension: CrawlerExtension): void;
|
|
254
|
+
/**
|
|
255
|
+
* Wrapper around requestHandler that opens and closes pages etc.
|
|
256
|
+
*/
|
|
257
|
+
protected _runRequestHandler(crawlingContext: Context): Promise<void>;
|
|
258
|
+
protected _handleNavigation(crawlingContext: Context): Promise<void>;
|
|
259
|
+
/**
|
|
260
|
+
* Sets the cookie header to `gotOptions` based on the provided request and session headers, as well as any changes that occurred due to hooks.
|
|
261
|
+
*/
|
|
262
|
+
private _applyCookies;
|
|
263
|
+
/**
|
|
264
|
+
* Function to make the HTTP request. It performs optimizations
|
|
265
|
+
* on the request such as only downloading the request body if the
|
|
266
|
+
* received content type matches text/html, application/xml, application/xhtml+xml.
|
|
267
|
+
*/
|
|
268
|
+
protected _requestFunction({ request, session, proxyUrl, gotOptions }: RequestFunctionOptions): Promise<IncomingMessage>;
|
|
269
|
+
/**
|
|
270
|
+
* Encodes and parses response according to the provided content type
|
|
271
|
+
*/
|
|
272
|
+
protected _parseResponse(request: Request, responseStream: IncomingMessage, crawlingContext: Context): Promise<(Awaited<Partial<Context>> & {
|
|
273
|
+
isXml: boolean;
|
|
274
|
+
response: IncomingMessage;
|
|
275
|
+
contentType: {
|
|
276
|
+
type: string;
|
|
277
|
+
encoding: BufferEncoding;
|
|
278
|
+
};
|
|
279
|
+
}) | {
|
|
280
|
+
body: Buffer;
|
|
281
|
+
response: IncomingMessage;
|
|
282
|
+
contentType: {
|
|
283
|
+
type: string;
|
|
284
|
+
encoding: BufferEncoding;
|
|
285
|
+
};
|
|
286
|
+
}>;
|
|
287
|
+
protected _parseHTML(response: IncomingMessage, _isXml: boolean, _crawlingContext: Context): Promise<Partial<Context>>;
|
|
288
|
+
/**
|
|
289
|
+
* Combines the provided `requestOptions` with mandatory (non-overridable) values.
|
|
290
|
+
*/
|
|
291
|
+
protected _getRequestOptions(request: Request, session?: Session, proxyUrl?: string, gotOptions?: OptionsInit): OptionsInit & {
|
|
292
|
+
isStream: true;
|
|
293
|
+
};
|
|
294
|
+
protected _encodeResponse(request: Request, response: IncomingMessage, encoding: BufferEncoding): {
|
|
295
|
+
encoding: BufferEncoding;
|
|
296
|
+
response: IncomingMessage;
|
|
297
|
+
};
|
|
298
|
+
/**
|
|
299
|
+
* Checks and extends supported mime types
|
|
300
|
+
*/
|
|
301
|
+
protected _extendSupportedMimeTypes(additionalMimeTypes: (string | RequestLike | ResponseLike)[]): void;
|
|
302
|
+
/**
|
|
303
|
+
* Handles timeout request
|
|
304
|
+
*/
|
|
305
|
+
protected _handleRequestTimeout(session?: Session): void;
|
|
306
|
+
private _abortDownloadOfBody;
|
|
307
|
+
/**
|
|
308
|
+
* @internal wraps public utility for mocking purposes
|
|
309
|
+
*/
|
|
310
|
+
private _requestAsBrowser;
|
|
311
|
+
}
|
|
312
|
+
interface RequestFunctionOptions {
|
|
313
|
+
request: Request;
|
|
314
|
+
session?: Session;
|
|
315
|
+
proxyUrl?: string;
|
|
316
|
+
gotOptions: OptionsInit;
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Creates new {@apilink Router} instance that works based on request labels.
|
|
320
|
+
* This instance can then serve as a `requestHandler` of your {@apilink HttpCrawler}.
|
|
321
|
+
* Defaults to the {@apilink HttpCrawlingContext}.
|
|
322
|
+
*
|
|
323
|
+
* > Serves as a shortcut for using `Router.create<HttpCrawlingContext>()`.
|
|
324
|
+
*
|
|
325
|
+
* ```ts
|
|
326
|
+
* import { HttpCrawler, createHttpRouter } from 'crawlee';
|
|
327
|
+
*
|
|
328
|
+
* const router = createHttpRouter();
|
|
329
|
+
* router.addHandler('label-a', async (ctx) => {
|
|
330
|
+
* ctx.log.info('...');
|
|
331
|
+
* });
|
|
332
|
+
* router.addDefaultHandler(async (ctx) => {
|
|
333
|
+
* ctx.log.info('...');
|
|
334
|
+
* });
|
|
335
|
+
*
|
|
336
|
+
* const crawler = new HttpCrawler({
|
|
337
|
+
* requestHandler: router,
|
|
338
|
+
* });
|
|
339
|
+
* await crawler.run();
|
|
340
|
+
* ```
|
|
341
|
+
*/
|
|
342
|
+
export declare function createHttpRouter<Context extends HttpCrawlingContext = HttpCrawlingContext>(): import("@crawlee/basic").RouterHandler<Context>;
|
|
343
|
+
export {};
|
|
344
|
+
//# sourceMappingURL=http-crawler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":";;AAEA,OAAO,KAAK,EAER,mBAAmB,EACnB,YAAY,EACZ,cAAc,EACd,eAAe,EACf,kBAAkB,EAClB,OAAO,EACP,OAAO,EACV,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACH,YAAY,EAEZ,gBAAgB,EAIhB,aAAa,EAChB,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAG9D,OAAO,KAAK,EAAE,WAAW,EAAiC,QAAQ,IAAI,WAAW,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAGxH,OAAO,KAAK,EAAuB,eAAe,EAAE,MAAM,WAAW,CAAC;AAqBtE,oBAAY,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,UAAU,IACpC,YAAY,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,MAAM,WAAW,kBAAkB,CAAC,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,CAAE,SAAQ,mBAAmB,CAAC,OAAO,CAAC;IAC/I;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC,OAAO,CAAC,CAAC,gBAAgB,CAAC,CAAC;IAEnE;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;OAIG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC;;;;;;;;;;;;OAYG;IACH,kBAAkB,CAAC,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAEjD;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAElD;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE/B;;;;;;;;;;OAUG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAEjC;;;;;;;;OAQG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,OAAO,CAAC;CACtC;AAED;;GAEG;AACH,oBAAY,gBAAgB,CAAC,OAAO,IAAI,CACpC,eAAe,EAAE,OAAO,EACxB,UAAU,EAAE,WAAW,KACtB,SAAS,CAAC,IAAI,CAAC,CAAC;AAErB,oBAAY,QAAQ,CAChB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,UAAU,IACxC,gBAAgB,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,UAAU,EACxC,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CACxB,SAAQ,eAAe,CAAC,QAAQ,CAAC;IACnC;;;;;OAKG;IACH,IAAI,EAAE,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC;IAExB;;OAEG;IACH,IAAI,EAAE,QAAQ,CAAC;IAEf;;OAEG;IACH,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;IACxD,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,eAAe,CAAC;IAC1B,WAAW,EAAE,CAAC,eAAe,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC,KAAK,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC;CAC5F;AAED,MAAM,WAAW,mBAAmB,CAAC,QAAQ,GAAG,GAAG,EAAE,QAAQ,GAAG,GAAG,CAC/D,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,EAAE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;CAAG;AAEpH,oBAAY,kBAAkB,CAC1B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,UAAU,GAAG,UAAU,IACpC,cAAc,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAEhE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkEG;AACH,qBAAa,WAAW,CAAC,OAAO,SAAS,2BAA2B,CAAC,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAE,SAAQ,YAAY,CAAC,OAAO,CAAC;aAoCtD,MAAM;IAnC/E;;;OAGG;IACH,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IAExC,SAAS,CAAC,+BAA+B,EAAE,MAAM,CAAC;IAClD,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC1D,SAAS,CAAC,mBAAmB,EAAE,gBAAgB,CAAC,OAAO,CAAC,EAAE,CAAC;IAC3D,SAAS,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC5C,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC;IAC1C,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAC3C,SAAS,CAAC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IACzC,SAAS,CAAC,QAAQ,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnD,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAcpC;IAEF;;OAEG;gBACS,OAAO,GAAE,kBAAkB,CAAC,OAAO,CAAM,EAAoB,MAAM,gBAAkC;IA+EjH;;;;OAIG;IACH,GAAG,CAAC,SAAS,EAAE,gBAAgB;IA8B/B;;OAEG;cACsB,kBAAkB,CAAC,eAAe,EAAE,OAAO;cA4CpD,iBAAiB,CAAC,eAAe,EAAE,OAAO;IA2B1D;;OAEG;IACH,OAAO,CAAC,aAAa;IAmDrB;;;;OAIG;cACa,gBAAgB,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,EAAE,sBAAsB,GAAG,OAAO,CAAC,eAAe,CAAC;IAe9H;;OAEG;cACa,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,eAAe,EAAE,OAAO;;;;;;;;;;;;;;;cA8B1F,UAAU,CAAC,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAM5H;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,WAAW;kBACzD,IAAI;;IAkCxD,SAAS,CAAC,eAAe,CAAC,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,QAAQ,EAAE,cAAc,GAAG;QAC9F,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,eAAe,CAAC;KAC7B;IAqCD;;OAEG;IACH,SAAS,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,CAAC,MAAM,GAAG,WAAW,GAAG,YAAY,CAAC,EAAE;IAgBhG;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,OAAO;IAKjD,OAAO,CAAC,oBAAoB;IAgB5B;;OAEG;IACH,OAAO,CAAC,iBAAiB,CASvB;CACL;AAED,UAAU,sBAAsB;IAC5B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,WAAW,CAAC;CAC3B;AA2ED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,SAAS,mBAAmB,GAAG,mBAAmB,qDAEzF"}
|