@crawlee/http 4.0.0-beta.2 → 4.0.0-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -1
- package/internals/file-download.d.ts +46 -33
- package/internals/file-download.d.ts.map +1 -1
- package/internals/file-download.js +85 -73
- package/internals/file-download.js.map +1 -1
- package/internals/http-crawler.d.ts +70 -149
- package/internals/http-crawler.d.ts.map +1 -1
- package/internals/http-crawler.js +178 -287
- package/internals/http-crawler.js.map +1 -1
- package/internals/utils.d.ts +9 -0
- package/internals/utils.d.ts.map +1 -0
- package/internals/utils.js +35 -0
- package/internals/utils.js.map +1 -0
- package/package.json +6 -6
- package/tsconfig.build.tsbuildinfo +0 -1
|
@@ -1,25 +1,13 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import
|
|
3
|
-
import type {
|
|
4
|
-
import { BasicCrawler, Configuration, CrawlerExtension } from '@crawlee/basic';
|
|
5
|
-
import type { HttpResponse } from '@crawlee/core';
|
|
1
|
+
import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, Request as CrawleeRequest, RequestHandler, RequireContextPipeline, RouterRoutes, Session } from '@crawlee/basic';
|
|
2
|
+
import { BasicCrawler, Configuration, ContextPipeline } from '@crawlee/basic';
|
|
3
|
+
import type { LoadedRequest } from '@crawlee/core';
|
|
6
4
|
import type { Awaitable, Dictionary } from '@crawlee/types';
|
|
7
5
|
import { type CheerioRoot } from '@crawlee/utils';
|
|
8
6
|
import type { RequestLike, ResponseLike } from 'content-type';
|
|
9
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
10
|
-
import type { Method, OptionsInit } from 'got-scraping';
|
|
11
|
-
import { ObjectPredicate } from 'ow';
|
|
12
7
|
import type { JsonValue } from 'type-fest';
|
|
13
|
-
/**
|
|
14
|
-
* TODO exists for BC within HttpCrawler - replace completely with StreamingHttpResponse in 4.0
|
|
15
|
-
* @internal
|
|
16
|
-
*/
|
|
17
|
-
export type PlainResponse = Omit<HttpResponse, 'body'> & IncomingMessage & {
|
|
18
|
-
body?: unknown;
|
|
19
|
-
};
|
|
20
8
|
export type HttpErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
21
9
|
JSONData extends JsonValue = any> = ErrorHandler<HttpCrawlingContext<UserData, JSONData>>;
|
|
22
|
-
export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext> extends BasicCrawlerOptions<Context> {
|
|
10
|
+
export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext = InternalHttpCrawlingContext, ContextExtension = Dictionary<never>, ExtendedContext extends Context = Context & ContextExtension> extends BasicCrawlerOptions<Context, ContextExtension, ExtendedContext> {
|
|
23
11
|
/**
|
|
24
12
|
* Timeout in which the HTTP request to the resource needs to finish, given in seconds.
|
|
25
13
|
*/
|
|
@@ -28,20 +16,14 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
|
|
|
28
16
|
* If set to true, SSL certificate errors will be ignored.
|
|
29
17
|
*/
|
|
30
18
|
ignoreSslErrors?: boolean;
|
|
31
|
-
/**
|
|
32
|
-
* If set, this crawler will be configured for all connections to use
|
|
33
|
-
* [Apify Proxy](https://console.apify.com/proxy) or your own Proxy URLs provided and rotated according to the configuration.
|
|
34
|
-
* For more information, see the [documentation](https://docs.apify.com/proxy).
|
|
35
|
-
*/
|
|
36
|
-
proxyConfiguration?: ProxyConfiguration;
|
|
37
19
|
/**
|
|
38
20
|
* Async functions that are sequentially evaluated before the navigation. Good for setting additional cookies
|
|
39
|
-
* or browser properties before navigation. The function accepts
|
|
40
|
-
* which
|
|
21
|
+
* or browser properties before navigation. The function accepts one parameter `crawlingContext`,
|
|
22
|
+
* which is passed to the `requestAsBrowser()` function the crawler calls to navigate.
|
|
41
23
|
* Example:
|
|
42
24
|
* ```
|
|
43
25
|
* preNavigationHooks: [
|
|
44
|
-
* async (crawlingContext
|
|
26
|
+
* async (crawlingContext) => {
|
|
45
27
|
* // ...
|
|
46
28
|
* },
|
|
47
29
|
* ]
|
|
@@ -50,7 +32,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
|
|
|
50
32
|
* Modyfing `pageOptions` is supported only in Playwright incognito.
|
|
51
33
|
* See {@link PrePageCreateHook}
|
|
52
34
|
*/
|
|
53
|
-
preNavigationHooks?: InternalHttpHook<
|
|
35
|
+
preNavigationHooks?: InternalHttpHook<CrawlingContext>[];
|
|
54
36
|
/**
|
|
55
37
|
* Async functions that are sequentially evaluated after the navigation. Good for checking if the navigation was successful.
|
|
56
38
|
* The function accepts `crawlingContext` as the only parameter.
|
|
@@ -63,7 +45,7 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
|
|
|
63
45
|
* ]
|
|
64
46
|
* ```
|
|
65
47
|
*/
|
|
66
|
-
postNavigationHooks?:
|
|
48
|
+
postNavigationHooks?: ((crawlingContext: CrawlingContextWithReponse) => Awaitable<void>)[];
|
|
67
49
|
/**
|
|
68
50
|
* An array of [MIME types](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types/Complete_list_of_MIME_types)
|
|
69
51
|
* you want the crawler to load and process. By default, only `text/html` and `application/xhtml+xml` MIME types are supported.
|
|
@@ -112,15 +94,24 @@ export interface HttpCrawlerOptions<Context extends InternalHttpCrawlingContext
|
|
|
112
94
|
/**
|
|
113
95
|
* @internal
|
|
114
96
|
*/
|
|
115
|
-
export type InternalHttpHook<Context> = (crawlingContext: Context
|
|
97
|
+
export type InternalHttpHook<Context> = (crawlingContext: Context) => Awaitable<void>;
|
|
116
98
|
export type HttpHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
117
99
|
JSONData extends JsonValue = any> = InternalHttpHook<HttpCrawlingContext<UserData, JSONData>>;
|
|
100
|
+
interface CrawlingContextWithReponse<UserData extends Dictionary = any> extends CrawlingContext<UserData> {
|
|
101
|
+
/**
|
|
102
|
+
* The request object that was successfully loaded and navigated to, including the {@link Request.loadedUrl|`loadedUrl`} property.
|
|
103
|
+
*/
|
|
104
|
+
request: LoadedRequest<CrawleeRequest<UserData>>;
|
|
105
|
+
/**
|
|
106
|
+
* The HTTP response object containing status code, headers, and other response metadata.
|
|
107
|
+
*/
|
|
108
|
+
response: Response;
|
|
109
|
+
}
|
|
118
110
|
/**
|
|
119
111
|
* @internal
|
|
120
112
|
*/
|
|
121
113
|
export interface InternalHttpCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
122
|
-
JSONData extends JsonValue = any
|
|
123
|
-
Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
|
|
114
|
+
JSONData extends JsonValue = any> extends CrawlingContextWithReponse<UserData> {
|
|
124
115
|
/**
|
|
125
116
|
* The request body of the web page.
|
|
126
117
|
* The type depends on the `Content-Type` header of the web page:
|
|
@@ -139,7 +130,6 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
|
|
|
139
130
|
type: string;
|
|
140
131
|
encoding: BufferEncoding;
|
|
141
132
|
};
|
|
142
|
-
response: PlainResponse;
|
|
143
133
|
/**
|
|
144
134
|
* Wait for an element matching the selector to appear. Timeout is ignored.
|
|
145
135
|
*
|
|
@@ -167,7 +157,7 @@ Crawler = HttpCrawler<any>> extends CrawlingContext<Crawler, UserData> {
|
|
|
167
157
|
*/
|
|
168
158
|
parseWithCheerio(selector?: string, timeoutMs?: number): Promise<CheerioRoot>;
|
|
169
159
|
}
|
|
170
|
-
export interface HttpCrawlingContext<UserData extends Dictionary = any, JSONData extends JsonValue = any> extends InternalHttpCrawlingContext<UserData, JSONData
|
|
160
|
+
export interface HttpCrawlingContext<UserData extends Dictionary = any, JSONData extends JsonValue = any> extends InternalHttpCrawlingContext<UserData, JSONData> {
|
|
171
161
|
}
|
|
172
162
|
export type HttpRequestHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
173
163
|
JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData, JSONData>>;
|
|
@@ -192,11 +182,11 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
|
|
|
192
182
|
*
|
|
193
183
|
* The crawler finishes when there are no more {@link Request} objects to crawl.
|
|
194
184
|
*
|
|
195
|
-
* We can use the `preNavigationHooks` to adjust
|
|
185
|
+
* We can use the `preNavigationHooks` to adjust the crawling context before the request is made:
|
|
196
186
|
*
|
|
197
187
|
* ```javascript
|
|
198
188
|
* preNavigationHooks: [
|
|
199
|
-
* (crawlingContext
|
|
189
|
+
* (crawlingContext) => {
|
|
200
190
|
* // ...
|
|
201
191
|
* },
|
|
202
192
|
* ]
|
|
@@ -238,16 +228,10 @@ JSONData extends JsonValue = any> = RequestHandler<HttpCrawlingContext<UserData,
|
|
|
238
228
|
* ```
|
|
239
229
|
* @category Crawlers
|
|
240
230
|
*/
|
|
241
|
-
export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any,
|
|
231
|
+
export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any, any> = InternalHttpCrawlingContext, ContextExtension = Dictionary<never>, ExtendedContext extends Context = Context & ContextExtension> extends BasicCrawler<Context, ContextExtension, ExtendedContext> {
|
|
242
232
|
readonly config: Configuration;
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
* Only available if used by the crawler.
|
|
246
|
-
*/
|
|
247
|
-
proxyConfiguration?: ProxyConfiguration;
|
|
248
|
-
protected userRequestHandlerTimeoutMillis: number;
|
|
249
|
-
protected preNavigationHooks: InternalHttpHook<Context>[];
|
|
250
|
-
protected postNavigationHooks: InternalHttpHook<Context>[];
|
|
233
|
+
protected preNavigationHooks: InternalHttpHook<CrawlingContext>[];
|
|
234
|
+
protected postNavigationHooks: ((crawlingContext: CrawlingContextWithReponse) => Awaitable<void>)[];
|
|
251
235
|
protected persistCookiesPerSession: boolean;
|
|
252
236
|
protected navigationTimeoutMillis: number;
|
|
253
237
|
protected ignoreSslErrors: boolean;
|
|
@@ -267,8 +251,6 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
267
251
|
suggestResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
268
252
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
269
253
|
forceResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
270
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
271
|
-
proxyConfiguration: ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
272
254
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
273
255
|
persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
274
256
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
@@ -279,6 +261,10 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
279
261
|
preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
|
|
280
262
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
281
263
|
postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
|
|
264
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
265
|
+
contextPipelineBuilder: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
266
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
267
|
+
extendContext: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
282
268
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
283
269
|
requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
284
270
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
@@ -299,12 +285,16 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
299
285
|
maxSessionRotations: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
300
286
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
301
287
|
maxRequestsPerCrawl: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
288
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
289
|
+
maxCrawlDepth: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
302
290
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
303
291
|
autoscaledPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
304
292
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
305
293
|
sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
306
294
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
307
295
|
useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
296
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
297
|
+
proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
308
298
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
309
299
|
statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
310
300
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
@@ -312,7 +302,7 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
312
302
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
313
303
|
retryOnBlocked: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
314
304
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
315
|
-
respectRobotsTxtFile: import("ow").
|
|
305
|
+
respectRobotsTxtFile: import("ow").AnyPredicate<boolean | object>;
|
|
316
306
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
317
307
|
onSkippedRequest: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
318
308
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
@@ -335,131 +325,62 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
335
325
|
/**
|
|
336
326
|
* All `HttpCrawlerOptions` parameters are passed via an options object.
|
|
337
327
|
*/
|
|
338
|
-
constructor(options?: HttpCrawlerOptions<Context>, config?: Configuration);
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
use(extension: CrawlerExtension): void;
|
|
328
|
+
constructor(options?: HttpCrawlerOptions<Context, ContextExtension, ExtendedContext> & RequireContextPipeline<InternalHttpCrawlingContext, Context>, config?: Configuration);
|
|
329
|
+
protected buildContextPipeline(): ContextPipeline<CrawlingContext, InternalHttpCrawlingContext>;
|
|
330
|
+
private makeHttpRequest;
|
|
331
|
+
private processHttpResponse;
|
|
332
|
+
private handleBlockedRequestByContent;
|
|
333
|
+
protected isRequestBlocked(crawlingContext: InternalHttpCrawlingContext): Promise<string | false>;
|
|
345
334
|
/**
|
|
346
|
-
*
|
|
335
|
+
* Returns the `Cookie` header value based on the current context and
|
|
336
|
+
* any changes that occurred in the navigation hooks.
|
|
347
337
|
*/
|
|
348
|
-
protected
|
|
349
|
-
protected isRequestBlocked(crawlingContext: Context): Promise<string | false>;
|
|
350
|
-
protected _handleNavigation(crawlingContext: Context): Promise<void>;
|
|
351
|
-
/**
|
|
352
|
-
* Sets the cookie header to `gotOptions` based on the provided request and session headers, as well as any changes that occurred due to hooks.
|
|
353
|
-
*/
|
|
354
|
-
protected _applyCookies({ session, request }: CrawlingContext, gotOptions: OptionsInit, preHookCookies: string, postHookCookies: string): void;
|
|
338
|
+
protected _applyCookies({ session, request }: CrawlingContext, preHookCookies: string, postHookCookies: string): string;
|
|
355
339
|
/**
|
|
356
340
|
* Function to make the HTTP request. It performs optimizations
|
|
357
341
|
* on the request such as only downloading the request body if the
|
|
358
342
|
* received content type matches text/html, application/xml, application/xhtml+xml.
|
|
359
343
|
*/
|
|
360
|
-
protected _requestFunction({ request, session, proxyUrl,
|
|
344
|
+
protected _requestFunction({ request, session, proxyUrl, cookieString, }: RequestFunctionOptions): Promise<Response>;
|
|
361
345
|
/**
|
|
362
346
|
* Encodes and parses response according to the provided content type
|
|
363
347
|
*/
|
|
364
|
-
protected _parseResponse(request:
|
|
365
|
-
|
|
366
|
-
response: IncomingMessage;
|
|
348
|
+
protected _parseResponse(request: CrawleeRequest, response: Response): Promise<{
|
|
349
|
+
response: Response;
|
|
367
350
|
contentType: {
|
|
368
351
|
type: string;
|
|
369
352
|
encoding: BufferEncoding;
|
|
370
353
|
};
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
354
|
+
body: string;
|
|
355
|
+
} | {
|
|
356
|
+
body: Buffer<ArrayBuffer>;
|
|
357
|
+
response: Response;
|
|
374
358
|
contentType: {
|
|
375
359
|
type: string;
|
|
376
360
|
encoding: BufferEncoding;
|
|
377
361
|
};
|
|
378
|
-
enqueueLinks: () => Promise<{
|
|
379
|
-
processedRequests: never[];
|
|
380
|
-
unprocessedRequests: never[];
|
|
381
|
-
}>;
|
|
382
362
|
}>;
|
|
383
|
-
protected _parseHTML(response: IncomingMessage, _isXml: boolean, _crawlingContext: Context): Promise<Partial<Context>>;
|
|
384
363
|
/**
|
|
385
364
|
* Combines the provided `requestOptions` with mandatory (non-overridable) values.
|
|
386
365
|
*/
|
|
387
|
-
protected _getRequestOptions(request:
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
402
|
-
timeout?: import("got-scraping").Delays | undefined;
|
|
403
|
-
prefixUrl?: string | URL | undefined;
|
|
404
|
-
form?: Record<string, any> | undefined;
|
|
405
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
406
|
-
cookieJar?: import("got-scraping").PromiseCookieJar | import("got-scraping").ToughCookieJar | undefined;
|
|
407
|
-
signal?: AbortSignal | undefined;
|
|
408
|
-
ignoreInvalidCookies?: boolean | undefined;
|
|
409
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
410
|
-
searchParams?: string | import("got-scraping").SearchParameters | URLSearchParams | undefined;
|
|
411
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
412
|
-
dnsLookup?: import("cacheable-lookup").default["lookup"] | undefined;
|
|
413
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
414
|
-
dnsCache?: import("cacheable-lookup").default | boolean | undefined;
|
|
415
|
-
context?: Record<string, unknown> | undefined;
|
|
416
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
417
|
-
followRedirect?: boolean | ((response: import("got-scraping").PlainResponse) => boolean) | undefined;
|
|
418
|
-
maxRedirects?: number | undefined;
|
|
419
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
420
|
-
cache?: string | import("cacheable-request").StorageAdapter | boolean | undefined;
|
|
421
|
-
throwHttpErrors?: boolean | undefined;
|
|
422
|
-
username?: string | undefined;
|
|
423
|
-
password?: string | undefined;
|
|
424
|
-
http2?: boolean | undefined;
|
|
425
|
-
allowGetBody?: boolean | undefined;
|
|
426
|
-
methodRewriting?: boolean | undefined;
|
|
427
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
428
|
-
dnsLookupIpVersion?: import("got-scraping").DnsLookupIpVersion;
|
|
429
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
430
|
-
parseJson?: import("got-scraping").ParseJsonFunction | undefined;
|
|
431
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
432
|
-
stringifyJson?: import("got-scraping").StringifyJsonFunction | undefined;
|
|
433
|
-
localAddress?: string | undefined;
|
|
434
|
-
method?: Method | undefined;
|
|
435
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
436
|
-
createConnection?: import("got-scraping").CreateConnectionFunction | undefined;
|
|
437
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
438
|
-
cacheOptions?: import("got-scraping").CacheOptions | undefined;
|
|
439
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
440
|
-
https?: import("got-scraping").HttpsOptions | undefined;
|
|
441
|
-
encoding?: BufferEncoding | undefined;
|
|
442
|
-
resolveBodyOnly?: boolean | undefined;
|
|
443
|
-
isStream?: boolean | undefined;
|
|
444
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
445
|
-
responseType?: import("got-scraping").ResponseType | undefined;
|
|
446
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
447
|
-
pagination?: import("got-scraping").PaginationOptions<unknown, unknown> | undefined;
|
|
448
|
-
setHost?: boolean | undefined;
|
|
449
|
-
maxHeaderSize?: number | undefined;
|
|
450
|
-
enableUnixSockets?: boolean | undefined;
|
|
451
|
-
} & {
|
|
452
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
453
|
-
hooks?: Partial<import("got-scraping").Hooks>;
|
|
454
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
455
|
-
retry?: Partial<import("got-scraping").RetryOptions>;
|
|
456
|
-
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
457
|
-
} & import("got-scraping").Context & Required<Pick<OptionsInit, "url">> & {
|
|
458
|
-
isStream: true;
|
|
366
|
+
protected _getRequestOptions(request: CrawleeRequest, session?: Session, proxyUrl?: string): {
|
|
367
|
+
url: string;
|
|
368
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
369
|
+
method: import("@crawlee/types").AllowedHttpMethods;
|
|
370
|
+
proxyUrl: string | undefined;
|
|
371
|
+
timeout: number;
|
|
372
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
373
|
+
cookieJar: import("tough-cookie").CookieJar | undefined;
|
|
374
|
+
sessionToken: Session | undefined;
|
|
375
|
+
headers: Record<string, string> | undefined;
|
|
376
|
+
https: {
|
|
377
|
+
rejectUnauthorized: boolean;
|
|
378
|
+
};
|
|
379
|
+
body: string | undefined;
|
|
459
380
|
};
|
|
460
|
-
protected _encodeResponse(request:
|
|
381
|
+
protected _encodeResponse(request: CrawleeRequest, response: Response, encoding: BufferEncoding): {
|
|
461
382
|
encoding: BufferEncoding;
|
|
462
|
-
response:
|
|
383
|
+
response: Response;
|
|
463
384
|
};
|
|
464
385
|
/**
|
|
465
386
|
* Checks and extends supported mime types
|
|
@@ -476,10 +397,10 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
476
397
|
private _requestAsBrowser;
|
|
477
398
|
}
|
|
478
399
|
interface RequestFunctionOptions {
|
|
479
|
-
request:
|
|
400
|
+
request: CrawleeRequest;
|
|
480
401
|
session?: Session;
|
|
481
402
|
proxyUrl?: string;
|
|
482
|
-
|
|
403
|
+
cookieString?: string;
|
|
483
404
|
}
|
|
484
405
|
/**
|
|
485
406
|
* Creates new {@link Router} instance that works based on request labels.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"http-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/http-crawler.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAER,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,sBAAsB,EACtB,OAAO,IAAI,cAAc,EACzB,cAAc,EACd,sBAAsB,EACtB,YAAY,EACZ,OAAO,EACV,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACH,YAAY,EAEZ,aAAa,EACb,eAAe,EAOlB,MAAM,gBAAgB,CAAC;AACxB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,KAAK,WAAW,EAAuB,MAAM,gBAAgB,CAAC;AAEvE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAK9D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAwB3C,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,YAAY,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE1D,MAAM,WAAW,kBAAkB,CAC/B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB,CAC9D,SAAQ,mBAAmB,CAAC,OAAO,EAAE,gBAAgB,EAAE,eAAe,CAAC;IACrE;;OAEG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAE1B;;;;;;;;;;;;;;;OAeG;IACH,kBAAkB,CAAC,EAAE,gBAAgB,CAAC,eAAe,CAAC,EAAE,CAAC;IAEzD;;;;;;;;;;;OAWG;IACH,mBAAmB,CAAC,EAAE,CAAC,CAAC,eAAe,EAAE,0BAA0B,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;IAE3F;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,EAAE,CAAC;IAE/B;;;;;;;;;;OAUG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAEjC;;;;;;;;OAQG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAE/B;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,OAAO,CAAC;IAEnC;;;OAGG;IACH,0BAA0B,CAAC,EAAE,MAAM,EAAE,CAAC;IAEtC;;;OAGG;IACH,8BAA8B,CAAC,EAAE,MAAM,EAAE,CAAC;CAC7C;AAED;;GAEG;AACH,MAAM,MAAM,gBAAgB,CAAC,OAAO,IAAI,CAAC,eAAe,EAAE,OAAO,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC;AAEtF,MAAM,MAAM,QAAQ,CAChB,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,gBAAgB,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE9D,UAAU,0BAA0B,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B;;OAEG;IACH,OAAO,EAAE,aAAa,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC,CAAC;IAEjD;;OAEG;IACH,QAAQ,EAAE,QAAQ,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,CAClC,SAAQ,0BAA0B,CAAC,QAAQ,CAAC;IAC1C;;;;;OAKG;IACH,IAAI,EAAE,MAAM,GAAG,MAAM,CAAC;IAEtB;;OAEG;IACH,IAAI,EAAE,QAAQ,CAAC;IAEf;;OAEG;IACH,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;IAExD;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAErE;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;CACjF;AAED,MAAM,WAAW,mBAAmB,CAAC,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,QAAQ,SAAS,SAAS,GAAG,GAAG,CACpG,SAAQ,2BAA2B,CAAC,QAAQ,EAAE,QAAQ,CAAC;CAAG;AAE9D,MAAM,MAAM,kBAAkB,CAC1B,QAAQ,SAAS,UAAU,GAAG,GAAG,EAAE,2EAA2E;AAC9G,QAAQ,SAAS,SAAS,GAAG,GAAG,IAChC,cAAc,CAAC,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC;AAE5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkEG;AACH,qBAAa,WAAW,CACpB,OAAO,SAAS,2BAA2B,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,2BAA2B,EACnF,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,OAAO,GAAG,OAAO,GAAG,gBAAgB,CAC9D,SAAQ,YAAY,CAAC,OAAO,EAAE,gBAAgB,EAAE,eAAe,CAAC;aAmCxC,MAAM;IAlC5B,SAAS,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,eAAe,CAAC,EAAE,CAAC;IAClE,SAAS,CAAC,mBAAmB,EAAE,CAAC,CAAC,eAAe,EAAE,0BAA0B,KAAK,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;IACpG,SAAS,CAAC,wBAAwB,EAAE,OAAO,CAAC;IAC5C,SAAS,CAAC,uBAAuB,EAAE,MAAM,CAAC;IAC1C,SAAS,CAAC,eAAe,EAAE,OAAO,CAAC;IACnC,SAAS,CAAC,uBAAuB,CAAC,EAAE,MAAM,CAAC;IAC3C,SAAS,CAAC,qBAAqB,CAAC,EAAE,MAAM,CAAC;IACzC,SAAS,CAAC,8BAA8B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACtD,SAAS,CAAC,0BAA0B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAClD,SAAS,CAAC,QAAQ,CAAC,kBAAkB,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAEnD,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAepC;IAEF;;OAEG;gBAEC,OAAO,GAAE,kBAAkB,CAAC,OAAO,EAAE,gBAAgB,EAAE,eAAe,CAAC,GACnE,sBAAsB,CAAC,2BAA2B,EAAE,OAAO,CAAa,EAC1D,MAAM,gBAAkC;IAkE9D,SAAS,CAAC,oBAAoB,IAAI,eAAe,CAAC,eAAe,EAAE,2BAA2B,CAAC;YASjF,eAAe;YAkDf,mBAAmB;YAuEnB,6BAA6B;cAQ3B,gBAAgB,CAAC,eAAe,EAAE,2BAA2B,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;IAyBvG;;;OAGG;IACH,SAAS,CAAC,aAAa,CACnB,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,eAAe,EACrC,cAAc,EAAE,MAAM,EACtB,eAAe,EAAE,MAAM,GACxB,MAAM;IAQT;;;;OAIG;cACa,gBAAgB,CAAC,EAC7B,OAAO,EACP,OAAO,EACP,QAAQ,EACR,YAAY,GACf,EAAE,sBAAsB,GAAG,OAAO,CAAC,QAAQ,CAAC;IAwB7C;;OAEG;cACa,cAAc,CAAC,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ;;;;;;;;;;;;;;;IA2C1E;;OAEG;IACH,SAAS,CAAC,kBAAkB,CAAC,OAAO,EAAE,cAAc,EAAE,OAAO,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,EAAE,MAAM;;;;;;;;;;;cAY/D,MAAM,GAAG,SAAS;;IAmB7C,SAAS,CAAC,eAAe,CACrB,OAAO,EAAE,cAAc,EACvB,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,cAAc,GACzB;QACC,QAAQ,EAAE,cAAc,CAAC;QACzB,QAAQ,EAAE,QAAQ,CAAC;KACtB;IAsCD;;OAEG;IACH,SAAS,CAAC,yBAAyB,CAAC,mBAAmB,EAAE,CAAC,MAAM,GAAG,WAAW,GAAG,YAAY,CAAC,EAAE;IAgBhG;;OAEG;IACH,SAAS,CAAC,qBAAqB,CAAC,OAAO,CAAC,EAAE,OAAO;IAKjD,OAAO,CAAC,oBAAoB;IAkB5B;;OAEG;IACH,OAAO,CAAC,iBAAiB,CAsCvB;CACL;AAED,UAAU,sBAAsB;IAC5B,OAAO,EAAE,cAAc,CAAC;IACxB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,mBAAmB,GAAG,mBAAmB,EACzD,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
|