@crawlee/http 3.7.1-beta.4 → 3.7.1-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,12 @@
1
+ // @ts-ignore optional peer dependency or compatibility with es2022
1
2
  /// <reference types="node" />
3
+ // @ts-ignore optional peer dependency or compatibility with es2022
2
4
  /// <reference types="node" />
5
+ // @ts-ignore optional peer dependency or compatibility with es2022
3
6
  /// <reference types="node" />
7
+ // @ts-ignore optional peer dependency or compatibility with es2022
4
8
  /// <reference types="node" />
9
+ // @ts-ignore optional peer dependency or compatibility with es2022
5
10
  /// <reference types="node" />
6
11
  import type { IncomingMessage } from 'node:http';
7
12
  import type { BasicCrawlerOptions, CrawlingContext, ErrorHandler, GetUserDataFromRequest, ProxyConfiguration, Request, RequestHandler, RouterRoutes, Session } from '@crawlee/basic';
@@ -9,7 +14,7 @@ import { BasicCrawler, CrawlerExtension, Configuration } from '@crawlee/basic';
9
14
  import type { Awaitable, Dictionary } from '@crawlee/types';
10
15
  import * as cheerio from 'cheerio';
11
16
  import type { RequestLike, ResponseLike } from 'content-type';
12
- // @ts-ignore optional peer dependency
17
+ // @ts-ignore optional peer dependency or compatibility with es2022
13
18
  import type { OptionsInit, Method, PlainResponse } from 'got-scraping';
14
19
  import type { JsonValue } from 'type-fest';
15
20
  export type HttpErrorHandler<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
@@ -233,43 +238,81 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
233
238
  protected ignoreHttpErrorStatusCodes: Set<number>;
234
239
  protected readonly supportedMimeTypes: Set<string>;
235
240
  protected static optionsShape: {
241
+ // @ts-ignore optional peer dependency or compatibility with es2022
236
242
  handlePageFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
243
+ // @ts-ignore optional peer dependency or compatibility with es2022
237
244
  navigationTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
245
+ // @ts-ignore optional peer dependency or compatibility with es2022
238
246
  ignoreSslErrors: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
247
+ // @ts-ignore optional peer dependency or compatibility with es2022
239
248
  additionalMimeTypes: import("ow").ArrayPredicate<string>;
249
+ // @ts-ignore optional peer dependency or compatibility with es2022
240
250
  suggestResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
251
+ // @ts-ignore optional peer dependency or compatibility with es2022
241
252
  forceResponseEncoding: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
253
+ // @ts-ignore optional peer dependency or compatibility with es2022
242
254
  proxyConfiguration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
255
+ // @ts-ignore optional peer dependency or compatibility with es2022
243
256
  persistCookiesPerSession: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
257
+ // @ts-ignore optional peer dependency or compatibility with es2022
244
258
  additionalHttpErrorStatusCodes: import("ow").ArrayPredicate<number>;
259
+ // @ts-ignore optional peer dependency or compatibility with es2022
245
260
  ignoreHttpErrorStatusCodes: import("ow").ArrayPredicate<number>;
261
+ // @ts-ignore optional peer dependency or compatibility with es2022
246
262
  preNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
263
+ // @ts-ignore optional peer dependency or compatibility with es2022
247
264
  postNavigationHooks: import("ow").ArrayPredicate<unknown> & import("ow").BasePredicate<unknown[] | undefined>;
265
+ // @ts-ignore optional peer dependency or compatibility with es2022
248
266
  requestList: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
267
+ // @ts-ignore optional peer dependency or compatibility with es2022
249
268
  requestQueue: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
269
+ // @ts-ignore optional peer dependency or compatibility with es2022
250
270
  requestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
271
+ // @ts-ignore optional peer dependency or compatibility with es2022
251
272
  handleRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
273
+ // @ts-ignore optional peer dependency or compatibility with es2022
252
274
  requestHandlerTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
275
+ // @ts-ignore optional peer dependency or compatibility with es2022
253
276
  handleRequestTimeoutSecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
277
+ // @ts-ignore optional peer dependency or compatibility with es2022
254
278
  errorHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
279
+ // @ts-ignore optional peer dependency or compatibility with es2022
255
280
  failedRequestHandler: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
281
+ // @ts-ignore optional peer dependency or compatibility with es2022
256
282
  handleFailedRequestFunction: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
283
+ // @ts-ignore optional peer dependency or compatibility with es2022
257
284
  maxRequestRetries: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
285
+ // @ts-ignore optional peer dependency or compatibility with es2022
258
286
  sameDomainDelaySecs: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
287
+ // @ts-ignore optional peer dependency or compatibility with es2022
259
288
  maxSessionRotations: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
289
+ // @ts-ignore optional peer dependency or compatibility with es2022
260
290
  maxRequestsPerCrawl: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
291
+ // @ts-ignore optional peer dependency or compatibility with es2022
261
292
  autoscaledPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
293
+ // @ts-ignore optional peer dependency or compatibility with es2022
262
294
  sessionPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
295
+ // @ts-ignore optional peer dependency or compatibility with es2022
263
296
  useSessionPool: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
297
+ // @ts-ignore optional peer dependency or compatibility with es2022
264
298
  statusMessageLoggingInterval: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
299
+ // @ts-ignore optional peer dependency or compatibility with es2022
265
300
  statusMessageCallback: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
301
+ // @ts-ignore optional peer dependency or compatibility with es2022
266
302
  retryOnBlocked: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
303
+ // @ts-ignore optional peer dependency or compatibility with es2022
267
304
  minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
305
+ // @ts-ignore optional peer dependency or compatibility with es2022
268
306
  maxConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
307
+ // @ts-ignore optional peer dependency or compatibility with es2022
269
308
  maxRequestsPerMinute: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
309
+ // @ts-ignore optional peer dependency or compatibility with es2022
270
310
  keepAlive: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
311
+ // @ts-ignore optional peer dependency or compatibility with es2022
271
312
  log: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
313
+ // @ts-ignore optional peer dependency or compatibility with es2022
272
314
  experiments: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
315
+ // @ts-ignore optional peer dependency or compatibility with es2022
273
316
  statisticsOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
274
317
  };
275
318
  /**
@@ -325,59 +368,85 @@ export declare class HttpCrawler<Context extends InternalHttpCrawlingContext<any
325
368
  * Combines the provided `requestOptions` with mandatory (non-overridable) values.
326
369
  */
327
370
  protected _getRequestOptions(request: Request, session?: Session, proxyUrl?: string, gotOptions?: OptionsInit): {
371
+ // @ts-ignore optional peer dependency or compatibility with es2022
328
372
  request?: import("got-scraping", { with: { "resolution-mode": "import" } }).RequestFunction | undefined;
373
+ // @ts-ignore optional peer dependency or compatibility with es2022
329
374
  agent?: import("got-scraping", { with: { "resolution-mode": "import" } }).Agents | undefined;
375
+ // @ts-ignore optional peer dependency or compatibility with es2022
330
376
  h2session?: import("http2").ClientHttp2Session | undefined;
331
377
  decompress?: boolean | undefined;
378
+ // @ts-ignore optional peer dependency or compatibility with es2022
332
379
  timeout?: import("got-scraping", { with: { "resolution-mode": "import" } }).Delays | undefined;
333
380
  prefixUrl?: string | URL | undefined;
381
+ // @ts-ignore optional peer dependency or compatibility with es2022
334
382
  body?: string | Buffer | import("stream").Readable | Generator<unknown, any, unknown> | AsyncGenerator<unknown, any, unknown> | import("form-data-encoder", { with: { "resolution-mode": "import" } }).FormDataLike | undefined;
335
383
  form?: Record<string, any> | undefined;
336
384
  json?: unknown;
337
385
  url?: string | URL | undefined;
386
+ // @ts-ignore optional peer dependency or compatibility with es2022
338
387
  cookieJar?: import("got-scraping", { with: { "resolution-mode": "import" } }).PromiseCookieJar | import("got-scraping", { with: { "resolution-mode": "import" } }).ToughCookieJar | undefined;
339
388
  signal?: AbortSignal | undefined;
340
389
  ignoreInvalidCookies?: boolean | undefined;
390
+ // @ts-ignore optional peer dependency or compatibility with es2022
341
391
  searchParams?: string | import("got-scraping", { with: { "resolution-mode": "import" } }).SearchParameters | URLSearchParams | undefined;
342
392
  dnsLookup?: {
393
+ // @ts-ignore optional peer dependency or compatibility with es2022
343
394
  (hostname: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily, callback: (error: NodeJS.ErrnoException | null, address: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily) => void): void;
395
+ // @ts-ignore optional peer dependency or compatibility with es2022
344
396
  (hostname: string, callback: (error: NodeJS.ErrnoException | null, address: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily) => void): void;
397
+ // @ts-ignore optional peer dependency or compatibility with es2022
345
398
  (hostname: string, options: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).LookupOptions & {
346
399
  all: true;
400
+ // @ts-ignore optional peer dependency or compatibility with es2022
347
401
  }, callback: (error: NodeJS.ErrnoException | null, result: readonly import("cacheable-lookup", { with: { "resolution-mode": "import" } }).EntryObject[]) => void): void;
402
+ // @ts-ignore optional peer dependency or compatibility with es2022
348
403
  (hostname: string, options: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).LookupOptions, callback: (error: NodeJS.ErrnoException | null, address: string, family: import("cacheable-lookup", { with: { "resolution-mode": "import" } }).IPFamily) => void): void;
349
404
  } | undefined;
405
+ // @ts-ignore optional peer dependency or compatibility with es2022
350
406
  dnsCache?: boolean | import("cacheable-lookup", { with: { "resolution-mode": "import" } }).default | undefined;
351
407
  context?: Record<string, unknown> | undefined;
352
408
  followRedirect?: boolean | undefined;
353
409
  maxRedirects?: number | undefined;
410
+ // @ts-ignore optional peer dependency or compatibility with es2022
354
411
  cache?: string | boolean | import("cacheable-request", { with: { "resolution-mode": "import" } }).StorageAdapter | undefined;
355
412
  throwHttpErrors?: boolean | undefined;
356
413
  username?: string | undefined;
357
414
  password?: string | undefined;
358
415
  http2?: boolean | undefined;
359
416
  allowGetBody?: boolean | undefined;
417
+ // @ts-ignore optional peer dependency or compatibility with es2022
360
418
  headers?: import("got-scraping", { with: { "resolution-mode": "import" } }).Headers | undefined;
361
419
  methodRewriting?: boolean | undefined;
420
+ // @ts-ignore optional peer dependency or compatibility with es2022
362
421
  dnsLookupIpVersion?: import("got-scraping", { with: { "resolution-mode": "import" } }).DnsLookupIpVersion;
422
+ // @ts-ignore optional peer dependency or compatibility with es2022
363
423
  parseJson?: import("got-scraping", { with: { "resolution-mode": "import" } }).ParseJsonFunction | undefined;
424
+ // @ts-ignore optional peer dependency or compatibility with es2022
364
425
  stringifyJson?: import("got-scraping", { with: { "resolution-mode": "import" } }).StringifyJsonFunction | undefined;
365
426
  localAddress?: string | undefined;
366
427
  method?: Method | undefined;
428
+ // @ts-ignore optional peer dependency or compatibility with es2022
367
429
  createConnection?: import("got-scraping", { with: { "resolution-mode": "import" } }).CreateConnectionFunction | undefined;
430
+ // @ts-ignore optional peer dependency or compatibility with es2022
368
431
  cacheOptions?: import("got-scraping", { with: { "resolution-mode": "import" } }).CacheOptions | undefined;
432
+ // @ts-ignore optional peer dependency or compatibility with es2022
369
433
  https?: import("got-scraping", { with: { "resolution-mode": "import" } }).HttpsOptions | undefined;
370
434
  encoding?: BufferEncoding | undefined;
371
435
  resolveBodyOnly?: boolean | undefined;
372
436
  isStream?: boolean | undefined;
437
+ // @ts-ignore optional peer dependency or compatibility with es2022
373
438
  responseType?: import("got-scraping", { with: { "resolution-mode": "import" } }).ResponseType | undefined;
439
+ // @ts-ignore optional peer dependency or compatibility with es2022
374
440
  pagination?: import("got-scraping", { with: { "resolution-mode": "import" } }).PaginationOptions<unknown, unknown> | undefined;
375
441
  setHost?: boolean | undefined;
376
442
  maxHeaderSize?: number | undefined;
377
443
  enableUnixSockets?: boolean | undefined;
378
444
  } & {
445
+ // @ts-ignore optional peer dependency or compatibility with es2022
379
446
  hooks?: Partial<import("got-scraping", { with: { "resolution-mode": "import" } }).Hooks> | undefined;
447
+ // @ts-ignore optional peer dependency or compatibility with es2022
380
448
  retry?: Partial<import("got-scraping", { with: { "resolution-mode": "import" } }).RetryOptions> | undefined;
449
+ // @ts-ignore optional peer dependency or compatibility with es2022
381
450
  } & import("got-scraping", { with: { "resolution-mode": "import" } }).Context & {
382
451
  isStream: true;
383
452
  };
@@ -429,6 +498,7 @@ interface RequestFunctionOptions {
429
498
  * await crawler.run();
430
499
  * ```
431
500
  */
501
+ // @ts-ignore optional peer dependency or compatibility with es2022
432
502
  export declare function createHttpRouter<Context extends HttpCrawlingContext = HttpCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("@crawlee/basic").RouterHandler<Context>;
433
503
  export {};
434
504
  //# sourceMappingURL=http-crawler.d.ts.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@crawlee/http",
3
- "version": "3.7.1-beta.4",
3
+ "version": "3.7.1-beta.6",
4
4
  "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
5
5
  "engines": {
6
6
  "node": ">=16.0.0"
@@ -55,9 +55,9 @@
55
55
  "dependencies": {
56
56
  "@apify/timeout": "^0.3.0",
57
57
  "@apify/utilities": "^2.7.10",
58
- "@crawlee/basic": "3.7.1-beta.4",
59
- "@crawlee/types": "3.7.1-beta.4",
60
- "@crawlee/utils": "3.7.1-beta.4",
58
+ "@crawlee/basic": "3.7.1-beta.6",
59
+ "@crawlee/types": "3.7.1-beta.6",
60
+ "@crawlee/utils": "3.7.1-beta.6",
61
61
  "@types/content-type": "^1.1.5",
62
62
  "cheerio": "^1.0.0-rc.12",
63
63
  "content-type": "^1.0.4",
@@ -75,5 +75,5 @@
75
75
  }
76
76
  }
77
77
  },
78
- "gitHead": "9957e0ac9336718d611936819914cc71e49c2d6c"
78
+ "gitHead": "01664e58799374a6acc2aa9db397566c1acc69d4"
79
79
  }