@crawlee/http 3.7.1-beta.4 → 3.7.1-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/internals/http-crawler.d.ts +71 -1
- package/package.json +5 -5
|
@@ -1,7 +1,12 @@
|
|
|
1
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
1
2
|
/// <reference types="node" />
|
|
3
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
2
4
|
/// <reference types="node" />
|
|
5
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
3
6
|
/// <reference types="node" />
|
|
7
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
4
8
|
/// <reference types="node" />
|
|
9
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
5
10
|
/// <reference types="node" />
|
|
6
11
|
import type { IncomingMessage } from 'node:http';
|
|
7
12
|
import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, ProxyConfiguration, Request, RequestHandler, RouterRoutes, Session } from '@crawlee/basic';
|
|
@@ -9,7 +14,7 @@ import { BasicCrawler, CrawlerExtension, Configuration } from '@crawlee/basic';
|
|
|
9
14
|
import type { Awaitable, Dictionary } from '@crawlee/types';
|
|
10
15
|
import * as cheerio from 'cheerio';
|
|
11
16
|
import type { RequestLike, ResponseLike } from 'content-type';
|
|
12
|
-
// @ts-ignore optional peer dependency
|
|
17
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
13
18
|
import type { OptionsInit, Method, PlainResponse } from 'got-scraping';
|
|
14
19
|
import type { JsonValue } from 'type-fest';
|
|
15
20
|
export type HttpErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
@@ -233,43 +238,81 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
233
238
|
protected ignoreHttpErrorStatusCodes: Set<number>;
|
|
234
239
|
protected readonly supportedMimeTypes: Set<string>;
|
|
235
240
|
protected static optionsShape: {
|
|
241
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
236
242
|
handlePageFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
243
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
237
244
|
navigationTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
245
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
238
246
|
ignoreSslErrors: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
247
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
239
248
|
additionalMimeTypes: import("ow").ArrayPredicate<string>;
|
|
249
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
240
250
|
suggestResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
251
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
241
252
|
forceResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
|
|
253
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
242
254
|
proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
255
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
243
256
|
persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
257
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
244
258
|
additionalHttpErrorStatusCodes: import("ow").ArrayPredicate<number>;
|
|
259
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
245
260
|
ignoreHttpErrorStatusCodes: import("ow").ArrayPredicate<number>;
|
|
261
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
246
262
|
preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
|
|
263
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
247
264
|
postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
|
|
265
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
248
266
|
requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
267
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
249
268
|
requestQueue: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
269
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
250
270
|
requestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
271
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
251
272
|
handleRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
273
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
252
274
|
requestHandlerTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
275
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
253
276
|
handleRequestTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
277
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
254
278
|
errorHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
279
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
255
280
|
failedRequestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
281
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
256
282
|
handleFailedRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
283
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
257
284
|
maxRequestRetries: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
285
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
258
286
|
sameDomainDelaySecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
287
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
259
288
|
maxSessionRotations: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
289
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
260
290
|
maxRequestsPerCrawl: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
291
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
261
292
|
autoscaledPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
293
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
262
294
|
sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
295
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
263
296
|
useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
297
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
264
298
|
statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
299
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
265
300
|
statusMessageCallback: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
301
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
266
302
|
retryOnBlocked: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
303
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
267
304
|
minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
305
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
268
306
|
maxConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
307
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
269
308
|
maxRequestsPerMinute: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
309
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
270
310
|
keepAlive: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
|
|
311
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
271
312
|
log: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
313
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
272
314
|
experiments: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
315
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
273
316
|
statisticsOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
274
317
|
};
|
|
275
318
|
/**
|
|
@@ -325,59 +368,85 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
|
|
|
325
368
|
* Combines the provided `requestOptions` with mandatory (non-overridable) values.
|
|
326
369
|
*/
|
|
327
370
|
protected _getRequestOptions(request: Request, session?: Session, proxyUrl?: string, gotOptions?: OptionsInit): {
|
|
371
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
328
372
|
request?: import("got-scraping", { with: { "resolution-mode": "import" } }).RequestFunction | undefined;
|
|
373
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
329
374
|
agent?: import("got-scraping", { with: { "resolution-mode": "import" } }).Agents | undefined;
|
|
375
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
330
376
|
h2session?: import("http2").ClientHttp2Session | undefined;
|
|
331
377
|
decompress?: boolean | undefined;
|
|
378
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
332
379
|
timeout?: import("got-scraping", { with: { "resolution-mode": "import" } }).Delays | undefined;
|
|
333
380
|
prefixUrl?: string | URL | undefined;
|
|
381
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
334
382
|
body?: string | Buffer | import("stream").Readable | Generator<unknown, any, unknown> | AsyncGenerator<unknown, any, unknown> | import("form-data-encoder", { with: { "resolution-mode": "import" } }).FormDataLike | undefined;
|
|
335
383
|
form?: Record<string, any> | undefined;
|
|
336
384
|
json?: unknown;
|
|
337
385
|
url?: string | URL | undefined;
|
|
386
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
338
387
|
cookieJar?: import("got-scraping", { with: { "resolution-mode": "import" } }).PromiseCookieJar | import("got-scraping", { with: { "resolution-mode": "import" } }).ToughCookieJar | undefined;
|
|
339
388
|
signal?: AbortSignal | undefined;
|
|
340
389
|
ignoreInvalidCookies?: boolean | undefined;
|
|
390
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
341
391
|
searchParams?: string | import("got-scraping", { with: { "resolution-mode": "import" } }).SearchParameters | URLSearchParams | undefined;
|
|
342
392
|
dnsLookup?: {
|
|
393
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
343
394
|
(hostname: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily, callback: (error: NodeJS.ErrnoException | null, address: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily) => void): void;
|
|
395
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
344
396
|
(hostname: string, callback: (error: NodeJS.ErrnoException | null, address: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily) => void): void;
|
|
397
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
345
398
|
(hostname: string, options: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).LookupOptions & {
|
|
346
399
|
all: true;
|
|
400
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
347
401
|
}, callback: (error: NodeJS.ErrnoException | null, result: readonly import("cacheable-lookup", { with: { "resolution-mode": "import" } }).EntryObject[]) => void): void;
|
|
402
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
348
403
|
(hostname: string, options: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).LookupOptions, callback: (error: NodeJS.ErrnoException | null, address: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily) => void): void;
|
|
349
404
|
} | undefined;
|
|
405
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
350
406
|
dnsCache?: boolean | import("cacheable-lookup", { with: { "resolution-mode": "import" } }).default | undefined;
|
|
351
407
|
context?: Record<string, unknown> | undefined;
|
|
352
408
|
followRedirect?: boolean | undefined;
|
|
353
409
|
maxRedirects?: number | undefined;
|
|
410
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
354
411
|
cache?: string | boolean | import("cacheable-request", { with: { "resolution-mode": "import" } }).StorageAdapter | undefined;
|
|
355
412
|
throwHttpErrors?: boolean | undefined;
|
|
356
413
|
username?: string | undefined;
|
|
357
414
|
password?: string | undefined;
|
|
358
415
|
http2?: boolean | undefined;
|
|
359
416
|
allowGetBody?: boolean | undefined;
|
|
417
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
360
418
|
headers?: import("got-scraping", { with: { "resolution-mode": "import" } }).Headers | undefined;
|
|
361
419
|
methodRewriting?: boolean | undefined;
|
|
420
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
362
421
|
dnsLookupIpVersion?: import("got-scraping", { with: { "resolution-mode": "import" } }).DnsLookupIpVersion;
|
|
422
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
363
423
|
parseJson?: import("got-scraping", { with: { "resolution-mode": "import" } }).ParseJsonFunction | undefined;
|
|
424
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
364
425
|
stringifyJson?: import("got-scraping", { with: { "resolution-mode": "import" } }).StringifyJsonFunction | undefined;
|
|
365
426
|
localAddress?: string | undefined;
|
|
366
427
|
method?: Method | undefined;
|
|
428
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
367
429
|
createConnection?: import("got-scraping", { with: { "resolution-mode": "import" } }).CreateConnectionFunction | undefined;
|
|
430
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
368
431
|
cacheOptions?: import("got-scraping", { with: { "resolution-mode": "import" } }).CacheOptions | undefined;
|
|
432
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
369
433
|
https?: import("got-scraping", { with: { "resolution-mode": "import" } }).HttpsOptions | undefined;
|
|
370
434
|
encoding?: BufferEncoding | undefined;
|
|
371
435
|
resolveBodyOnly?: boolean | undefined;
|
|
372
436
|
isStream?: boolean | undefined;
|
|
437
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
373
438
|
responseType?: import("got-scraping", { with: { "resolution-mode": "import" } }).ResponseType | undefined;
|
|
439
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
374
440
|
pagination?: import("got-scraping", { with: { "resolution-mode": "import" } }).PaginationOptions<unknown, unknown> | undefined;
|
|
375
441
|
setHost?: boolean | undefined;
|
|
376
442
|
maxHeaderSize?: number | undefined;
|
|
377
443
|
enableUnixSockets?: boolean | undefined;
|
|
378
444
|
} & {
|
|
445
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
379
446
|
hooks?: Partial<import("got-scraping", { with: { "resolution-mode": "import" } }).Hooks> | undefined;
|
|
447
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
380
448
|
retry?: Partial<import("got-scraping", { with: { "resolution-mode": "import" } }).RetryOptions> | undefined;
|
|
449
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
381
450
|
} & import("got-scraping", { with: { "resolution-mode": "import" } }).Context & {
|
|
382
451
|
isStream: true;
|
|
383
452
|
};
|
|
@@ -429,6 +498,7 @@ interface RequestFunctionOptions {
|
|
|
429
498
|
* await crawler.run();
|
|
430
499
|
* ```
|
|
431
500
|
*/
|
|
501
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
432
502
|
export declare function createHttpRouter<Context extends HttpCrawlingContext = HttpCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("@crawlee/basic").RouterHandler<Context>;
|
|
433
503
|
export {};
|
|
434
504
|
//# sourceMappingURL=http-crawler.d.ts.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/http",
|
|
3
|
-
"version": "3.7.1-beta.
|
|
3
|
+
"version": "3.7.1-beta.6",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -55,9 +55,9 @@
|
|
|
55
55
|
"dependencies": {
|
|
56
56
|
"@apify/timeout": "^0.3.0",
|
|
57
57
|
"@apify/utilities": "^2.7.10",
|
|
58
|
-
"@crawlee/basic": "3.7.1-beta.
|
|
59
|
-
"@crawlee/types": "3.7.1-beta.
|
|
60
|
-
"@crawlee/utils": "3.7.1-beta.
|
|
58
|
+
"@crawlee/basic": "3.7.1-beta.6",
|
|
59
|
+
"@crawlee/types": "3.7.1-beta.6",
|
|
60
|
+
"@crawlee/utils": "3.7.1-beta.6",
|
|
61
61
|
"@types/content-type": "^1.1.5",
|
|
62
62
|
"cheerio": "^1.0.0-rc.12",
|
|
63
63
|
"content-type": "^1.0.4",
|
|
@@ -75,5 +75,5 @@
|
|
|
75
75
|
}
|
|
76
76
|
}
|
|
77
77
|
},
|
|
78
|
-
"gitHead": "
|
|
78
|
+
"gitHead": "01664e58799374a6acc2aa9db397566c1acc69d4"
|
|
79
79
|
}
|