@isdk/web-fetcher 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.action.cn.md +469 -0
  2. package/README.action.md +452 -0
  3. package/README.cn.md +147 -0
  4. package/README.engine.cn.md +262 -0
  5. package/README.engine.md +262 -0
  6. package/README.md +147 -0
  7. package/dist/index.d.mts +1603 -0
  8. package/dist/index.d.ts +1603 -0
  9. package/dist/index.js +1 -0
  10. package/dist/index.mjs +1 -0
  11. package/docs/README.md +151 -0
  12. package/docs/_media/LICENSE-MIT +22 -0
  13. package/docs/_media/README.action.md +452 -0
  14. package/docs/_media/README.cn.md +147 -0
  15. package/docs/_media/README.engine.md +262 -0
  16. package/docs/classes/CheerioFetchEngine.md +1447 -0
  17. package/docs/classes/ClickAction.md +533 -0
  18. package/docs/classes/ExtractAction.md +533 -0
  19. package/docs/classes/FetchAction.md +444 -0
  20. package/docs/classes/FetchEngine.md +1230 -0
  21. package/docs/classes/FetchSession.md +111 -0
  22. package/docs/classes/FillAction.md +533 -0
  23. package/docs/classes/GetContentAction.md +533 -0
  24. package/docs/classes/GotoAction.md +537 -0
  25. package/docs/classes/PauseAction.md +533 -0
  26. package/docs/classes/PlaywrightFetchEngine.md +1437 -0
  27. package/docs/classes/SubmitAction.md +533 -0
  28. package/docs/classes/WaitForAction.md +533 -0
  29. package/docs/classes/WebFetcher.md +85 -0
  30. package/docs/enumerations/FetchActionResultStatus.md +40 -0
  31. package/docs/functions/fetchWeb.md +43 -0
  32. package/docs/globals.md +72 -0
  33. package/docs/interfaces/BaseFetchActionProperties.md +83 -0
  34. package/docs/interfaces/BaseFetchCollectorActionProperties.md +145 -0
  35. package/docs/interfaces/BaseFetcherProperties.md +206 -0
  36. package/docs/interfaces/Cookie.md +142 -0
  37. package/docs/interfaces/DispatchedEngineAction.md +60 -0
  38. package/docs/interfaces/ExtractActionProperties.md +113 -0
  39. package/docs/interfaces/FetchActionInContext.md +149 -0
  40. package/docs/interfaces/FetchActionProperties.md +125 -0
  41. package/docs/interfaces/FetchActionResult.md +55 -0
  42. package/docs/interfaces/FetchContext.md +424 -0
  43. package/docs/interfaces/FetchEngineContext.md +328 -0
  44. package/docs/interfaces/FetchMetadata.md +73 -0
  45. package/docs/interfaces/FetchResponse.md +105 -0
  46. package/docs/interfaces/FetchReturnTypeRegistry.md +57 -0
  47. package/docs/interfaces/FetchSite.md +320 -0
  48. package/docs/interfaces/FetcherOptions.md +300 -0
  49. package/docs/interfaces/GotoActionOptions.md +66 -0
  50. package/docs/interfaces/PendingEngineRequest.md +51 -0
  51. package/docs/interfaces/SubmitActionOptions.md +23 -0
  52. package/docs/interfaces/WaitForActionOptions.md +39 -0
  53. package/docs/type-aliases/BaseFetchActionOptions.md +11 -0
  54. package/docs/type-aliases/BaseFetchCollectorOptions.md +11 -0
  55. package/docs/type-aliases/BrowserEngine.md +11 -0
  56. package/docs/type-aliases/FetchActionCapabilities.md +11 -0
  57. package/docs/type-aliases/FetchActionCapabilityMode.md +11 -0
  58. package/docs/type-aliases/FetchActionOptions.md +11 -0
  59. package/docs/type-aliases/FetchEngineAction.md +18 -0
  60. package/docs/type-aliases/FetchEngineType.md +11 -0
  61. package/docs/type-aliases/FetchReturnType.md +11 -0
  62. package/docs/type-aliases/FetchReturnTypeFor.md +17 -0
  63. package/docs/type-aliases/OnFetchPauseCallback.md +23 -0
  64. package/docs/type-aliases/ResourceType.md +11 -0
  65. package/docs/variables/DefaultFetcherProperties.md +11 -0
  66. package/package.json +90 -0
@@ -0,0 +1,1603 @@
1
+ import { CrawlingContext, BasicCrawler, BasicCrawlerOptions, RequestQueue, Cookie, CheerioCrawlingContext, CheerioCrawler, CheerioCrawlerOptions, PlaywrightCrawlingContext, PlaywrightCrawler, PlaywrightCrawlerOptions } from 'crawlee';
2
+ export { Cookie } from 'crawlee';
3
+ import { EventEmitter } from 'events-ex';
4
+
5
+ declare global {
6
+ // eslint-disable-next-line @typescript-eslint/consistent-type-definitions -- It has to be an `interface` so that it can be merged.
7
+ interface SymbolConstructor {
8
+ readonly observable: symbol;
9
+ }
10
+ }
11
+
12
+ /**
13
+ Extract all optional keys from the given type.
14
+
15
+ This is useful when you want to create a new type that contains different type values for the optional keys only.
16
+
17
+ @example
18
+ ```
19
+ import type {OptionalKeysOf, Except} from 'type-fest';
20
+
21
+ interface User {
22
+ name: string;
23
+ surname: string;
24
+
25
+ luckyNumber?: number;
26
+ }
27
+
28
+ const REMOVE_FIELD = Symbol('remove field symbol');
29
+ type UpdateOperation<Entity extends object> = Except<Partial<Entity>, OptionalKeysOf<Entity>> & {
30
+ [Key in OptionalKeysOf<Entity>]?: Entity[Key] | typeof REMOVE_FIELD;
31
+ };
32
+
33
+ const update1: UpdateOperation<User> = {
34
+ name: 'Alice'
35
+ };
36
+
37
+ const update2: UpdateOperation<User> = {
38
+ name: 'Bob',
39
+ luckyNumber: REMOVE_FIELD
40
+ };
41
+ ```
42
+
43
+ @category Utilities
44
+ */
45
+ type OptionalKeysOf<BaseType extends object> =
46
+ BaseType extends unknown // For distributing `BaseType`
47
+ ? (keyof {
48
+ [Key in keyof BaseType as BaseType extends Record<Key, BaseType[Key]> ? never : Key]: never
49
+ }) & (keyof BaseType) // Intersect with `keyof BaseType` to ensure result of `OptionalKeysOf<BaseType>` is always assignable to `keyof BaseType`
50
+ : never; // Should never happen
51
+
52
+ /**
53
+ Extract all required keys from the given type.
54
+
55
+ This is useful when you want to create a new type that contains different type values for the required keys only or use the list of keys for validation purposes, etc...
56
+
57
+ @example
58
+ ```
59
+ import type {RequiredKeysOf} from 'type-fest';
60
+
61
+ declare function createValidation<Entity extends object, Key extends RequiredKeysOf<Entity> = RequiredKeysOf<Entity>>(field: Key, validator: (value: Entity[Key]) => boolean): ValidatorFn;
62
+
63
+ interface User {
64
+ name: string;
65
+ surname: string;
66
+
67
+ luckyNumber?: number;
68
+ }
69
+
70
+ const validator1 = createValidation<User>('name', value => value.length < 25);
71
+ const validator2 = createValidation<User>('surname', value => value.length < 25);
72
+ ```
73
+
74
+ @category Utilities
75
+ */
76
+ type RequiredKeysOf<BaseType extends object> =
77
+ BaseType extends unknown // For distributing `BaseType`
78
+ ? Exclude<keyof BaseType, OptionalKeysOf<BaseType>>
79
+ : never; // Should never happen
80
+
81
+ /**
82
+ Returns a boolean for whether the given type is `never`.
83
+
84
+ @link https://github.com/microsoft/TypeScript/issues/31751#issuecomment-498526919
85
+ @link https://stackoverflow.com/a/53984913/10292952
86
+ @link https://www.zhenghao.io/posts/ts-never
87
+
88
+ Useful in type utilities, such as checking if something does not occur.
89
+
90
+ @example
91
+ ```
92
+ import type {IsNever, And} from 'type-fest';
93
+
94
+ // https://github.com/andnp/SimplyTyped/blob/master/src/types/strings.ts
95
+ type AreStringsEqual<A extends string, B extends string> =
96
+ And<
97
+ IsNever<Exclude<A, B>> extends true ? true : false,
98
+ IsNever<Exclude<B, A>> extends true ? true : false
99
+ >;
100
+
101
+ type EndIfEqual<I extends string, O extends string> =
102
+ AreStringsEqual<I, O> extends true
103
+ ? never
104
+ : void;
105
+
106
+ function endIfEqual<I extends string, O extends string>(input: I, output: O): EndIfEqual<I, O> {
107
+ if (input === output) {
108
+ process.exit(0);
109
+ }
110
+ }
111
+
112
+ endIfEqual('abc', 'abc');
113
+ //=> never
114
+
115
+ endIfEqual('abc', '123');
116
+ //=> void
117
+ ```
118
+
119
+ @category Type Guard
120
+ @category Utilities
121
+ */
122
+ type IsNever<T> = [T] extends [never] ? true : false;
123
+
124
+ /**
125
+ An if-else-like type that resolves depending on whether the given type is `never`.
126
+
127
+ @see {@link IsNever}
128
+
129
+ @example
130
+ ```
131
+ import type {IfNever} from 'type-fest';
132
+
133
+ type ShouldBeTrue = IfNever<never>;
134
+ //=> true
135
+
136
+ type ShouldBeBar = IfNever<'not never', 'foo', 'bar'>;
137
+ //=> 'bar'
138
+ ```
139
+
140
+ @category Type Guard
141
+ @category Utilities
142
+ */
143
+ type IfNever<T, TypeIfNever = true, TypeIfNotNever = false> = (
144
+ IsNever<T> extends true ? TypeIfNever : TypeIfNotNever
145
+ );
146
+
147
+ // Can eventually be replaced with the built-in once this library supports
148
+ // TS5.4+ only. Tracked in https://github.com/sindresorhus/type-fest/issues/848
149
+ type NoInfer<T> = T extends infer U ? U : never;
150
+
151
+ /**
152
+ Returns a boolean for whether the given type is `any`.
153
+
154
+ @link https://stackoverflow.com/a/49928360/1490091
155
+
156
+ Useful in type utilities, such as disallowing `any`s to be passed to a function.
157
+
158
+ @example
159
+ ```
160
+ import type {IsAny} from 'type-fest';
161
+
162
+ const typedObject = {a: 1, b: 2} as const;
163
+ const anyObject: any = {a: 1, b: 2};
164
+
165
+ function get<O extends (IsAny<O> extends true ? {} : Record<string, number>), K extends keyof O = keyof O>(obj: O, key: K) {
166
+ return obj[key];
167
+ }
168
+
169
+ const typedA = get(typedObject, 'a');
170
+ //=> 1
171
+
172
+ const anyA = get(anyObject, 'a');
173
+ //=> any
174
+ ```
175
+
176
+ @category Type Guard
177
+ @category Utilities
178
+ */
179
+ type IsAny<T> = 0 extends 1 & NoInfer<T> ? true : false;
180
+
181
+ /**
182
+ Returns a boolean for whether the two given types are equal.
183
+
184
+ @link https://github.com/microsoft/TypeScript/issues/27024#issuecomment-421529650
185
+ @link https://stackoverflow.com/questions/68961864/how-does-the-equals-work-in-typescript/68963796#68963796
186
+
187
+ Use-cases:
188
+ - If you want to make a conditional branch based on the result of a comparison of two types.
189
+
190
+ @example
191
+ ```
192
+ import type {IsEqual} from 'type-fest';
193
+
194
+ // This type returns a boolean for whether the given array includes the given item.
195
+ // `IsEqual` is used to compare the given array at position 0 and the given item and then return true if they are equal.
196
+ type Includes<Value extends readonly any[], Item> =
197
+ Value extends readonly [Value[0], ...infer rest]
198
+ ? IsEqual<Value[0], Item> extends true
199
+ ? true
200
+ : Includes<rest, Item>
201
+ : false;
202
+ ```
203
+
204
+ @category Type Guard
205
+ @category Utilities
206
+ */
207
+ type IsEqual<A, B> =
208
+ (<G>() => G extends A & G | G ? 1 : 2) extends
209
+ (<G>() => G extends B & G | G ? 1 : 2)
210
+ ? true
211
+ : false;
212
+
213
+ /**
214
+ Useful to flatten the type output to improve type hints shown in editors. And also to transform an interface into a type to aide with assignability.
215
+
216
+ @example
217
+ ```
218
+ import type {Simplify} from 'type-fest';
219
+
220
+ type PositionProps = {
221
+ top: number;
222
+ left: number;
223
+ };
224
+
225
+ type SizeProps = {
226
+ width: number;
227
+ height: number;
228
+ };
229
+
230
+ // In your editor, hovering over `Props` will show a flattened object with all the properties.
231
+ type Props = Simplify<PositionProps & SizeProps>;
232
+ ```
233
+
234
+ Sometimes it is desired to pass a value as a function argument that has a different type. At first inspection it may seem assignable, and then you discover it is not because the `value`'s type definition was defined as an interface. In the following example, `fn` requires an argument of type `Record<string, unknown>`. If the value is defined as a literal, then it is assignable. And if the `value` is defined as type using the `Simplify` utility the value is assignable. But if the `value` is defined as an interface, it is not assignable because the interface is not sealed and elsewhere a non-string property could be added to the interface.
235
+
236
+ If the type definition must be an interface (perhaps it was defined in a third-party npm package), then the `value` can be defined as `const value: Simplify<SomeInterface> = ...`. Then `value` will be assignable to the `fn` argument. Or the `value` can be cast as `Simplify<SomeInterface>` if you can't re-declare the `value`.
237
+
238
+ @example
239
+ ```
240
+ import type {Simplify} from 'type-fest';
241
+
242
+ interface SomeInterface {
243
+ foo: number;
244
+ bar?: string;
245
+ baz: number | undefined;
246
+ }
247
+
248
+ type SomeType = {
249
+ foo: number;
250
+ bar?: string;
251
+ baz: number | undefined;
252
+ };
253
+
254
+ const literal = {foo: 123, bar: 'hello', baz: 456};
255
+ const someType: SomeType = literal;
256
+ const someInterface: SomeInterface = literal;
257
+
258
+ function fn(object: Record<string, unknown>): void {}
259
+
260
+ fn(literal); // Good: literal object type is sealed
261
+ fn(someType); // Good: type is sealed
262
+ fn(someInterface); // Error: Index signature for type 'string' is missing in type 'someInterface'. Because `interface` can be re-opened
263
+ fn(someInterface as Simplify<SomeInterface>); // Good: transform an `interface` into a `type`
264
+ ```
265
+
266
+ @link https://github.com/microsoft/TypeScript/issues/15300
267
+ @see SimplifyDeep
268
+ @category Object
269
+ */
270
+ type Simplify<T> = {[KeyType in keyof T]: T[KeyType]} & {};
271
+
272
+ /**
273
+ Omit any index signatures from the given object type, leaving only explicitly defined properties.
274
+
275
+ This is the counterpart of `PickIndexSignature`.
276
+
277
+ Use-cases:
278
+ - Remove overly permissive signatures from third-party types.
279
+
280
+ This type was taken from this [StackOverflow answer](https://stackoverflow.com/a/68261113/420747).
281
+
282
+ It relies on the fact that an empty object (`{}`) is assignable to an object with just an index signature, like `Record<string, unknown>`, but not to an object with explicitly defined keys, like `Record<'foo' | 'bar', unknown>`.
283
+
284
+ (The actual value type, `unknown`, is irrelevant and could be any type. Only the key type matters.)
285
+
286
+ ```
287
+ const indexed: Record<string, unknown> = {}; // Allowed
288
+
289
+ const keyed: Record<'foo', unknown> = {}; // Error
290
+ // => TS2739: Type '{}' is missing the following properties from type 'Record<"foo" | "bar", unknown>': foo, bar
291
+ ```
292
+
293
+ Instead of causing a type error like the above, you can also use a [conditional type](https://www.typescriptlang.org/docs/handbook/2/conditional-types.html) to test whether a type is assignable to another:
294
+
295
+ ```
296
+ type Indexed = {} extends Record<string, unknown>
297
+ ? '✅ `{}` is assignable to `Record<string, unknown>`'
298
+ : '❌ `{}` is NOT assignable to `Record<string, unknown>`';
299
+ // => '✅ `{}` is assignable to `Record<string, unknown>`'
300
+
301
+ type Keyed = {} extends Record<'foo' | 'bar', unknown>
302
+ ? "✅ `{}` is assignable to `Record<'foo' | 'bar', unknown>`"
303
+ : "❌ `{}` is NOT assignable to `Record<'foo' | 'bar', unknown>`";
304
+ // => "❌ `{}` is NOT assignable to `Record<'foo' | 'bar', unknown>`"
305
+ ```
306
+
307
+ Using a [mapped type](https://www.typescriptlang.org/docs/handbook/2/mapped-types.html#further-exploration), you can then check for each `KeyType` of `ObjectType`...
308
+
309
+ ```
310
+ import type {OmitIndexSignature} from 'type-fest';
311
+
312
+ type OmitIndexSignature<ObjectType> = {
313
+ [KeyType in keyof ObjectType // Map each key of `ObjectType`...
314
+ ]: ObjectType[KeyType]; // ...to its original value, i.e. `OmitIndexSignature<Foo> == Foo`.
315
+ };
316
+ ```
317
+
318
+ ...whether an empty object (`{}`) would be assignable to an object with that `KeyType` (`Record<KeyType, unknown>`)...
319
+
320
+ ```
321
+ import type {OmitIndexSignature} from 'type-fest';
322
+
323
+ type OmitIndexSignature<ObjectType> = {
324
+ [KeyType in keyof ObjectType
325
+ // Is `{}` assignable to `Record<KeyType, unknown>`?
326
+ as {} extends Record<KeyType, unknown>
327
+ ? ... // ✅ `{}` is assignable to `Record<KeyType, unknown>`
328
+ : ... // ❌ `{}` is NOT assignable to `Record<KeyType, unknown>`
329
+ ]: ObjectType[KeyType];
330
+ };
331
+ ```
332
+
333
+ If `{}` is assignable, it means that `KeyType` is an index signature and we want to remove it. If it is not assignable, `KeyType` is a "real" key and we want to keep it.
334
+
335
+ @example
336
+ ```
337
+ import type {OmitIndexSignature} from 'type-fest';
338
+
339
+ interface Example {
340
+ // These index signatures will be removed.
341
+ [x: string]: any
342
+ [x: number]: any
343
+ [x: symbol]: any
344
+ [x: `head-${string}`]: string
345
+ [x: `${string}-tail`]: string
346
+ [x: `head-${string}-tail`]: string
347
+ [x: `${bigint}`]: string
348
+ [x: `embedded-${number}`]: string
349
+
350
+ // These explicitly defined keys will remain.
351
+ foo: 'bar';
352
+ qux?: 'baz';
353
+ }
354
+
355
+ type ExampleWithoutIndexSignatures = OmitIndexSignature<Example>;
356
+ // => { foo: 'bar'; qux?: 'baz' | undefined; }
357
+ ```
358
+
359
+ @see PickIndexSignature
360
+ @category Object
361
+ */
362
+ type OmitIndexSignature<ObjectType> = {
363
+ [KeyType in keyof ObjectType as {} extends Record<KeyType, unknown>
364
+ ? never
365
+ : KeyType]: ObjectType[KeyType];
366
+ };
367
+
368
+ /**
369
+ Pick only index signatures from the given object type, leaving out all explicitly defined properties.
370
+
371
+ This is the counterpart of `OmitIndexSignature`.
372
+
373
+ @example
374
+ ```
375
+ import type {PickIndexSignature} from 'type-fest';
376
+
377
+ declare const symbolKey: unique symbol;
378
+
379
+ type Example = {
380
+ // These index signatures will remain.
381
+ [x: string]: unknown;
382
+ [x: number]: unknown;
383
+ [x: symbol]: unknown;
384
+ [x: `head-${string}`]: string;
385
+ [x: `${string}-tail`]: string;
386
+ [x: `head-${string}-tail`]: string;
387
+ [x: `${bigint}`]: string;
388
+ [x: `embedded-${number}`]: string;
389
+
390
+ // These explicitly defined keys will be removed.
391
+ ['kebab-case-key']: string;
392
+ [symbolKey]: string;
393
+ foo: 'bar';
394
+ qux?: 'baz';
395
+ };
396
+
397
+ type ExampleIndexSignature = PickIndexSignature<Example>;
398
+ // {
399
+ // [x: string]: unknown;
400
+ // [x: number]: unknown;
401
+ // [x: symbol]: unknown;
402
+ // [x: `head-${string}`]: string;
403
+ // [x: `${string}-tail`]: string;
404
+ // [x: `head-${string}-tail`]: string;
405
+ // [x: `${bigint}`]: string;
406
+ // [x: `embedded-${number}`]: string;
407
+ // }
408
+ ```
409
+
410
+ @see OmitIndexSignature
411
+ @category Object
412
+ */
413
+ type PickIndexSignature<ObjectType> = {
414
+ [KeyType in keyof ObjectType as {} extends Record<KeyType, unknown>
415
+ ? KeyType
416
+ : never]: ObjectType[KeyType];
417
+ };
418
+
419
+ // Merges two objects without worrying about index signatures.
420
+ type SimpleMerge<Destination, Source> = {
421
+ [Key in keyof Destination as Key extends keyof Source ? never : Key]: Destination[Key];
422
+ } & Source;
423
+
424
+ /**
425
+ Merge two types into a new type. Keys of the second type overrides keys of the first type.
426
+
427
+ @example
428
+ ```
429
+ import type {Merge} from 'type-fest';
430
+
431
+ interface Foo {
432
+ [x: string]: unknown;
433
+ [x: number]: unknown;
434
+ foo: string;
435
+ bar: symbol;
436
+ }
437
+
438
+ type Bar = {
439
+ [x: number]: number;
440
+ [x: symbol]: unknown;
441
+ bar: Date;
442
+ baz: boolean;
443
+ };
444
+
445
+ export type FooBar = Merge<Foo, Bar>;
446
+ // => {
447
+ // [x: string]: unknown;
448
+ // [x: number]: number;
449
+ // [x: symbol]: unknown;
450
+ // foo: string;
451
+ // bar: Date;
452
+ // baz: boolean;
453
+ // }
454
+ ```
455
+
456
+ @category Object
457
+ */
458
+ type Merge<Destination, Source> =
459
+ Simplify<
460
+ SimpleMerge<PickIndexSignature<Destination>, PickIndexSignature<Source>>
461
+ & SimpleMerge<OmitIndexSignature<Destination>, OmitIndexSignature<Source>>
462
+ >;
463
+
464
+ /**
465
+ An if-else-like type that resolves depending on whether the given type is `any`.
466
+
467
+ @see {@link IsAny}
468
+
469
+ @example
470
+ ```
471
+ import type {IfAny} from 'type-fest';
472
+
473
+ type ShouldBeTrue = IfAny<any>;
474
+ //=> true
475
+
476
+ type ShouldBeBar = IfAny<'not any', 'foo', 'bar'>;
477
+ //=> 'bar'
478
+ ```
479
+
480
+ @category Type Guard
481
+ @category Utilities
482
+ */
483
+ type IfAny<T, TypeIfAny = true, TypeIfNotAny = false> = (
484
+ IsAny<T> extends true ? TypeIfAny : TypeIfNotAny
485
+ );
486
+
487
+ // Should never happen
488
+
489
+ /**
490
+ An if-else-like type that resolves depending on whether the given type is `any` or `never`.
491
+
492
+ @example
493
+ ```
494
+ // When `T` is a NOT `any` or `never` (like `string`) => Returns `IfNotAnyOrNever` branch
495
+ type A = IfNotAnyOrNever<string, 'VALID', 'IS_ANY', 'IS_NEVER'>;
496
+ //=> 'VALID'
497
+
498
+ // When `T` is `any` => Returns `IfAny` branch
499
+ type B = IfNotAnyOrNever<any, 'VALID', 'IS_ANY', 'IS_NEVER'>;
500
+ //=> 'IS_ANY'
501
+
502
+ // When `T` is `never` => Returns `IfNever` branch
503
+ type C = IfNotAnyOrNever<never, 'VALID', 'IS_ANY', 'IS_NEVER'>;
504
+ //=> 'IS_NEVER'
505
+ ```
506
+ */
507
+ type IfNotAnyOrNever<T, IfNotAnyOrNever, IfAny = any, IfNever = never> =
508
+ IsAny<T> extends true
509
+ ? IfAny
510
+ : IsNever<T> extends true
511
+ ? IfNever
512
+ : IfNotAnyOrNever;
513
+
514
+ /**
515
+ Merges user specified options with default options.
516
+
517
+ @example
518
+ ```
519
+ type PathsOptions = {maxRecursionDepth?: number; leavesOnly?: boolean};
520
+ type DefaultPathsOptions = {maxRecursionDepth: 10; leavesOnly: false};
521
+ type SpecifiedOptions = {leavesOnly: true};
522
+
523
+ type Result = ApplyDefaultOptions<PathsOptions, DefaultPathsOptions, SpecifiedOptions>;
524
+ //=> {maxRecursionDepth: 10; leavesOnly: true}
525
+ ```
526
+
527
+ @example
528
+ ```
529
+ // Complains if default values are not provided for optional options
530
+
531
+ type PathsOptions = {maxRecursionDepth?: number; leavesOnly?: boolean};
532
+ type DefaultPathsOptions = {maxRecursionDepth: 10};
533
+ type SpecifiedOptions = {};
534
+
535
+ type Result = ApplyDefaultOptions<PathsOptions, DefaultPathsOptions, SpecifiedOptions>;
536
+ // ~~~~~~~~~~~~~~~~~~~
537
+ // Property 'leavesOnly' is missing in type 'DefaultPathsOptions' but required in type '{ maxRecursionDepth: number; leavesOnly: boolean; }'.
538
+ ```
539
+
540
+ @example
541
+ ```
542
+ // Complains if an option's default type does not conform to the expected type
543
+
544
+ type PathsOptions = {maxRecursionDepth?: number; leavesOnly?: boolean};
545
+ type DefaultPathsOptions = {maxRecursionDepth: 10; leavesOnly: 'no'};
546
+ type SpecifiedOptions = {};
547
+
548
+ type Result = ApplyDefaultOptions<PathsOptions, DefaultPathsOptions, SpecifiedOptions>;
549
+ // ~~~~~~~~~~~~~~~~~~~
550
+ // Types of property 'leavesOnly' are incompatible. Type 'string' is not assignable to type 'boolean'.
551
+ ```
552
+
553
+ @example
554
+ ```
555
+ // Complains if an option's specified type does not conform to the expected type
556
+
557
+ type PathsOptions = {maxRecursionDepth?: number; leavesOnly?: boolean};
558
+ type DefaultPathsOptions = {maxRecursionDepth: 10; leavesOnly: false};
559
+ type SpecifiedOptions = {leavesOnly: 'yes'};
560
+
561
+ type Result = ApplyDefaultOptions<PathsOptions, DefaultPathsOptions, SpecifiedOptions>;
562
+ // ~~~~~~~~~~~~~~~~
563
+ // Types of property 'leavesOnly' are incompatible. Type 'string' is not assignable to type 'boolean'.
564
+ ```
565
+ */
566
+ type ApplyDefaultOptions<
567
+ Options extends object,
568
+ Defaults extends Simplify<Omit<Required<Options>, RequiredKeysOf<Options>> & Partial<Record<RequiredKeysOf<Options>, never>>>,
569
+ SpecifiedOptions extends Options,
570
+ > =
571
+ IfAny<SpecifiedOptions, Defaults,
572
+ IfNever<SpecifiedOptions, Defaults,
573
+ Simplify<Merge<Defaults, {
574
+ [Key in keyof SpecifiedOptions
575
+ as Key extends OptionalKeysOf<Options>
576
+ ? Extract<SpecifiedOptions[Key], undefined> extends never
577
+ ? Key
578
+ : never
579
+ : Key
580
+ ]: SpecifiedOptions[Key]
581
+ }> & Required<Options>> // `& Required<Options>` ensures that `ApplyDefaultOptions<SomeOption, ...>` is always assignable to `Required<SomeOption>`
582
+ >>;
583
+
584
+ /**
585
+ Filter out keys from an object.
586
+
587
+ Returns `never` if `Exclude` is strictly equal to `Key`.
588
+ Returns `never` if `Key` extends `Exclude`.
589
+ Returns `Key` otherwise.
590
+
591
+ @example
592
+ ```
593
+ type Filtered = Filter<'foo', 'foo'>;
594
+ //=> never
595
+ ```
596
+
597
+ @example
598
+ ```
599
+ type Filtered = Filter<'bar', string>;
600
+ //=> never
601
+ ```
602
+
603
+ @example
604
+ ```
605
+ type Filtered = Filter<'bar', 'foo'>;
606
+ //=> 'bar'
607
+ ```
608
+
609
+ @see {Except}
610
+ */
611
+ type Filter<KeyType, ExcludeType> = IsEqual<KeyType, ExcludeType> extends true ? never : (KeyType extends ExcludeType ? never : KeyType);
612
+
613
+ type ExceptOptions = {
614
+ /**
615
+ Disallow assigning non-specified properties.
616
+
617
+ Note that any omitted properties in the resulting type will be present in autocomplete as `undefined`.
618
+
619
+ @default false
620
+ */
621
+ requireExactProps?: boolean;
622
+ };
623
+
624
+ type DefaultExceptOptions = {
625
+ requireExactProps: false;
626
+ };
627
+
628
+ /**
629
+ Create a type from an object type without certain keys.
630
+
631
+ We recommend setting the `requireExactProps` option to `true`.
632
+
633
+ This type is a stricter version of [`Omit`](https://www.typescriptlang.org/docs/handbook/release-notes/typescript-3-5.html#the-omit-helper-type). The `Omit` type does not restrict the omitted keys to be keys present on the given type, while `Except` does. The benefits of a stricter type are avoiding typos and allowing the compiler to pick up on rename refactors automatically.
634
+
635
+ This type was proposed to the TypeScript team, which declined it, saying they prefer that libraries implement stricter versions of the built-in types ([microsoft/TypeScript#30825](https://github.com/microsoft/TypeScript/issues/30825#issuecomment-523668235)).
636
+
637
+ @example
638
+ ```
639
+ import type {Except} from 'type-fest';
640
+
641
+ type Foo = {
642
+ a: number;
643
+ b: string;
644
+ };
645
+
646
+ type FooWithoutA = Except<Foo, 'a'>;
647
+ //=> {b: string}
648
+
649
+ const fooWithoutA: FooWithoutA = {a: 1, b: '2'};
650
+ //=> errors: 'a' does not exist in type '{ b: string; }'
651
+
652
+ type FooWithoutB = Except<Foo, 'b', {requireExactProps: true}>;
653
+ //=> {a: number} & Partial<Record<"b", never>>
654
+
655
+ const fooWithoutB: FooWithoutB = {a: 1, b: '2'};
656
+ //=> errors at 'b': Type 'string' is not assignable to type 'undefined'.
657
+
658
+ // The `Omit` utility type doesn't work when omitting specific keys from objects containing index signatures.
659
+
660
+ // Consider the following example:
661
+
662
+ type UserData = {
663
+ [metadata: string]: string;
664
+ email: string;
665
+ name: string;
666
+ role: 'admin' | 'user';
667
+ };
668
+
669
+ // `Omit` clearly doesn't behave as expected in this case:
670
+ type PostPayload = Omit<UserData, 'email'>;
671
+ //=> type PostPayload = { [x: string]: string; [x: number]: string; }
672
+
673
+ // In situations like this, `Except` works better.
674
+ // It simply removes the `email` key while preserving all the other keys.
675
+ type PostPayload = Except<UserData, 'email'>;
676
+ //=> type PostPayload = { [x: string]: string; name: string; role: 'admin' | 'user'; }
677
+ ```
678
+
679
+ @category Object
680
+ */
681
+ type Except<ObjectType, KeysType extends keyof ObjectType, Options extends ExceptOptions = {}> =
682
+ _Except<ObjectType, KeysType, ApplyDefaultOptions<ExceptOptions, DefaultExceptOptions, Options>>;
683
+
684
+ type _Except<ObjectType, KeysType extends keyof ObjectType, Options extends Required<ExceptOptions>> = {
685
+ [KeyType in keyof ObjectType as Filter<KeyType, KeysType>]: ObjectType[KeyType];
686
+ } & (Options['requireExactProps'] extends true
687
+ ? Partial<Record<KeysType, never>>
688
+ : {});
689
+
690
+ /**
691
+ Create a type that requires at least one of the given keys. The remaining keys are kept as is.
692
+
693
+ @example
694
+ ```
695
+ import type {RequireAtLeastOne} from 'type-fest';
696
+
697
+ type Responder = {
698
+ text?: () => string;
699
+ json?: () => string;
700
+ secure?: boolean;
701
+ };
702
+
703
+ const responder: RequireAtLeastOne<Responder, 'text' | 'json'> = {
704
+ json: () => '{"message": "ok"}',
705
+ secure: true
706
+ };
707
+ ```
708
+
709
+ @category Object
710
+ */
711
+ type RequireAtLeastOne<
712
+ ObjectType,
713
+ KeysType extends keyof ObjectType = keyof ObjectType,
714
+ > =
715
+ IfNotAnyOrNever<ObjectType,
716
+ IfNever<KeysType,
717
+ never,
718
+ _RequireAtLeastOne<ObjectType, IfAny<KeysType, keyof ObjectType, KeysType>>
719
+ >>;
720
+
721
+ type _RequireAtLeastOne<
722
+ ObjectType,
723
+ KeysType extends keyof ObjectType,
724
+ > = {
725
+ // For each `Key` in `KeysType` make a mapped type:
726
+ [Key in KeysType]-?: Required<Pick<ObjectType, Key>> & // 1. Make `Key`'s type required
727
+ // 2. Make all other keys in `KeysType` optional
728
+ Partial<Pick<ObjectType, Exclude<KeysType, Key>>>;
729
+ }[KeysType] &
730
+ // 3. Add the remaining keys not in `KeysType`
731
+ Except<ObjectType, KeysType>;
732
+
733
+ type ExtractSchema = ExtractObjectSchema | ExtractArraySchema | ExtractValueSchema;
734
+ interface ExtractValueSchema {
735
+ type?: 'string' | 'number' | 'boolean' | 'html';
736
+ selector?: string;
737
+ attribute?: string;
738
+ has?: string;
739
+ exclude?: string;
740
+ }
741
+ interface ExtractArraySchema {
742
+ type: 'array';
743
+ selector: string;
744
+ has?: string;
745
+ exclude?: string;
746
+ items?: ExtractSchema;
747
+ attribute?: string;
748
+ }
749
+ interface ExtractObjectSchema {
750
+ type: 'object';
751
+ selector?: string;
752
+ has?: string;
753
+ exclude?: string;
754
+ properties: {
755
+ [key: string]: ExtractSchema;
756
+ };
757
+ }
758
+
759
+ interface PromiseLock extends Promise<void> {
760
+ release: () => void;
761
+ }
762
+
763
+ /**
764
+ * Options for the {@link FetchEngine.goto}, allowing configuration of HTTP method, payload, headers, and navigation behavior.
765
+ *
766
+ * @remarks
767
+ * Used when navigating to a URL to specify additional parameters beyond the basic URL.
768
+ *
769
+ * @example
770
+ * ```ts
771
+ * await engine.goto('https://example.com', {
772
+ * method: 'POST',
773
+ * payload: { username: 'user', password: 'pass' },
774
+ * headers: { 'Content-Type': 'application/json' },
775
+ * waitUntil: 'networkidle'
776
+ * });
777
+ * ```
778
+ */
779
+ interface GotoActionOptions {
780
+ method?: 'GET' | 'HEAD' | 'POST' | 'PUT' | 'DELETE' | 'TRACE' | 'OPTIONS' | 'CONNECT' | 'PATCH';
781
+ payload?: any;
782
+ headers?: Record<string, string>;
783
+ waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
784
+ timeoutMs?: number;
785
+ }
786
+ /**
787
+ * Options for the {@link FetchEngine.waitFor} action, specifying conditions to wait for before continuing.
788
+ *
789
+ * @remarks
790
+ * Controls timing behavior for interactions, allowing waiting for elements, time intervals, or network conditions.
791
+ */
792
+ interface WaitForActionOptions {
793
+ ms?: number;
794
+ selector?: string;
795
+ networkIdle?: boolean;
796
+ }
797
+ /**
798
+ * Options for the {@link FetchEngine.submit} action, configuring form submission behavior.
799
+ *
800
+ * @remarks
801
+ * Specifies encoding type for form submissions, particularly relevant for JSON-based APIs.
802
+ */
803
+ interface SubmitActionOptions {
804
+ enctype?: 'application/x-www-form-urlencoded' | 'application/json' | 'multipart/form-data';
805
+ }
806
+ /**
807
+ * Union type representing all possible engine actions that can be dispatched.
808
+ *
809
+ * @remarks
810
+ * Defines the command structure processed during page interactions. Each action type corresponds to
811
+ * a specific user interaction or navigation command within the action loop architecture.
812
+ */
813
+ type FetchEngineAction = {
814
+ type: 'click';
815
+ selector: string;
816
+ } | {
817
+ type: 'fill';
818
+ selector: string;
819
+ value: string;
820
+ } | {
821
+ type: 'waitFor';
822
+ options?: WaitForActionOptions;
823
+ } | {
824
+ type: 'submit';
825
+ selector?: any;
826
+ options?: SubmitActionOptions;
827
+ } | {
828
+ type: 'getContent';
829
+ } | {
830
+ type: 'navigate';
831
+ url: string;
832
+ opts?: GotoActionOptions;
833
+ } | {
834
+ type: 'extract';
835
+ schema: ExtractSchema;
836
+ } | {
837
+ type: 'pause';
838
+ message?: string;
839
+ } | {
840
+ type: 'dispose';
841
+ };
842
+ /**
843
+ * Represents an action that has been dispatched and is awaiting execution in the active page context.
844
+ *
845
+ * @remarks
846
+ * Connects the action request with its resolution mechanism. Used internally by the action dispatch system
847
+ * to handle promises while maintaining the page context validity window.
848
+ */
849
+ interface DispatchedEngineAction {
850
+ action: FetchEngineAction;
851
+ resolve: (value?: any) => void;
852
+ reject: (reason?: any) => void;
853
+ }
854
+ /**
855
+ * Represents a pending navigation request awaiting resolution.
856
+ *
857
+ * @remarks
858
+ * Tracks navigation requests that have been queued but not yet processed by the request handler.
859
+ */
860
+ interface PendingEngineRequest {
861
+ resolve: (value: any) => void;
862
+ reject: (reason?: any) => void;
863
+ }
864
+ /**
865
+ * Abstract base class for all fetch engines, providing a unified interface for web content fetching and interaction.
866
+ *
867
+ * @remarks
868
+ * The `FetchEngine` class serves as the foundation for concrete engine implementations (e.g., `CheerioFetchEngine`,
869
+ * `PlaywrightFetchEngine`). It abstracts underlying crawling technology and provides a consistent API for navigation,
870
+ * content retrieval, and user interaction.
871
+ *
872
+ * The engine architecture uses an event-driven action loop to bridge Crawlee's stateless request handling with
873
+ * the need for a stateful, sequential API for page interactions. This solves the critical challenge of maintaining
874
+ * page context validity across asynchronous operations.
875
+ *
876
+ * @example
877
+ * ```ts
878
+ * import "./playwright"; // 引入注册 Playwright browser 引擎
879
+ * const engine = await FetchEngine.create(context, { engine: 'browser' });
880
+ * await engine.goto('https://example.com');
881
+ * await engine.fill('#username', 'user');
882
+ * await engine.click('#submit');
883
+ * const response = await engine.getContent();
884
+ * ```
885
+ */
886
+ type AnyFetchEngine = FetchEngine<any, any, any>;
887
+ type AnyFetchEngineCtor = new (...args: any[]) => AnyFetchEngine;
888
+ declare abstract class FetchEngine<TContext extends CrawlingContext = any, TCrawler extends BasicCrawler<TContext> = any, TOptions extends BasicCrawlerOptions<TContext> = any> {
889
+ private static registry;
890
+ /**
891
+ * Registers a fetch engine implementation with the global registry.
892
+ *
893
+ * @param engineClass - The engine class to register
894
+ * @throws {Error} When engine class lacks static `id` or ID is already registered
895
+ *
896
+ * @example
897
+ * ```ts
898
+ * FetchEngine.register(CheerioFetchEngine);
899
+ * ```
900
+ */
901
+ static register(engineClass: AnyFetchEngineCtor): void;
902
+ /**
903
+ * Retrieves a fetch engine implementation by its unique ID.
904
+ *
905
+ * @param id - The ID of the engine to retrieve
906
+ * @returns Engine class if found, otherwise `undefined`
907
+ */
908
+ static get(id: string): AnyFetchEngineCtor | undefined;
909
+ /**
910
+ * Retrieves a fetch engine implementation by execution mode.
911
+ *
912
+ * @param mode - Execution mode (`'http'` or `'browser'`)
913
+ * @returns Engine class if found, otherwise `undefined`
914
+ */
915
+ static getByMode(mode: FetchEngineType): AnyFetchEngineCtor | undefined;
916
+ /**
917
+ * Factory method to create and initialize a fetch engine instance.
918
+ *
919
+ * @param ctx - Fetch engine context
920
+ * @param options - Configuration options
921
+ * @returns Initialized fetch engine instance
922
+ * @throws {Error} When no suitable engine implementation is found
923
+ *
924
+ * @remarks
925
+ * Primary entry point for engine creation. Selects appropriate implementation based on `engine` name of the option or context.
926
+ */
927
+ static create(ctx: FetchEngineContext, options?: BaseFetcherProperties): Promise<AnyFetchEngine | undefined>;
928
+ /**
929
+ * Unique identifier for the engine implementation.
930
+ *
931
+ * @remarks
932
+ * Must be defined by concrete implementations. Used for registration and lookup in engine registry.
933
+ */
934
+ static readonly id: string;
935
+ /**
936
+ * Execution mode of the engine (`'http'` or `'browser'`).
937
+ *
938
+ * @remarks
939
+ * Must be defined by concrete implementations. Indicates whether engine operates at HTTP level or uses full browser.
940
+ */
941
+ static readonly mode: FetchEngineType;
942
+ protected ctx?: FetchEngineContext;
943
+ protected opts?: BaseFetcherProperties;
944
+ protected crawler?: TCrawler;
945
+ protected isCrawlerReady?: boolean;
946
+ protected requestQueue?: RequestQueue;
947
+ protected hdrs: Record<string, string>;
948
+ protected jar: Cookie[];
949
+ protected pendingRequests: Map<string, PendingEngineRequest>;
950
+ protected requestCounter: number;
951
+ protected actionEmitter: EventEmitter;
952
+ protected isPageActive: boolean;
953
+ protected navigationLock: PromiseLock;
954
+ protected lastResponse?: FetchResponse;
955
+ protected blockedTypes: Set<string>;
956
+ protected _cleanup?(): Promise<void>;
957
+ protected abstract _querySelectorAll(context: any, selector: string): Promise<any[]>;
958
+ protected abstract _extractValue(schema: ExtractValueSchema, context: any): Promise<any>;
959
+ protected _extract(schema: ExtractSchema, context: any): Promise<any>;
960
+ /**
961
+ * Creates the crawler instance for the specific engine implementation.
962
+ * @param options - The final crawler options.
963
+ * @internal
964
+ */
965
+ protected abstract _createCrawler(options: TOptions): TCrawler;
966
+ /**
967
+ * Gets the crawler-specific options from the subclass.
968
+ * @param ctx - The fetch engine context.
969
+ * @internal
970
+ */
971
+ protected abstract _getSpecificCrawlerOptions(ctx: FetchEngineContext): Promise<Partial<TOptions>> | Partial<TOptions>;
972
+ /**
973
+ * Abstract method for building standard [FetchResponse] from Crawlee context.
974
+ *
975
+ * @param context - Crawlee crawling context
976
+ * @returns Promise resolving to [FetchResponse] object
977
+ *
978
+ * @remarks
979
+ * Converts implementation-specific context (Playwright `page` or Cheerio `$`) to standardized response.
980
+ * @internal
981
+ */
982
+ protected abstract buildResponse(context: TContext): Promise<FetchResponse>;
983
+ /**
984
+ * Abstract method for executing action within current page context.
985
+ *
986
+ * @param context - Crawlee crawling context
987
+ * @param action - Action to execute
988
+ * @returns Promise resolving to action result
989
+ *
990
+ * @remarks
991
+ * Handles specific user interactions using underlying technology (Playwright/Cheerio).
992
+ * @internal
993
+ */
994
+ protected abstract executeAction(context: TContext, action: FetchEngineAction): Promise<any>;
995
+ /**
996
+ * Navigates to the specified URL.
997
+ *
998
+ * @param url - Target URL
999
+ * @param params - Navigation options
1000
+ * @returns Promise resolving when navigation completes
1001
+ *
1002
+ * @example
1003
+ * ```ts
1004
+ * await engine.goto('https://example.com');
1005
+ * ```
1006
+ */
1007
+ abstract goto(url: string, params?: GotoActionOptions): Promise<void | FetchResponse>;
1008
+ /**
1009
+ * Waits for specified condition before continuing.
1010
+ *
1011
+ * @param params - Wait conditions
1012
+ * @returns Promise resolving when wait condition is met
1013
+ *
1014
+ * @example
1015
+ * ```ts
1016
+ * await engine.waitFor({ ms: 1000 }); // Wait 1 second
1017
+ * await engine.waitFor({ selector: '#content' }); // Wait for element
1018
+ * ```
1019
+ */
1020
+ waitFor(params?: WaitForActionOptions): Promise<void>;
1021
+ /**
1022
+ * Clicks on element matching selector.
1023
+ *
1024
+ * @param selector - CSS selector of element to click
1025
+ * @returns Promise resolving when click is processed
1026
+ * @throws {Error} When no active page context exists
1027
+ */
1028
+ click(selector: string): Promise<void>;
1029
+ /**
1030
+ * Fills input element with specified value.
1031
+ *
1032
+ * @param selector - CSS selector of input element
1033
+ * @param value - Value to fill
1034
+ * @returns Promise resolving when fill operation completes
1035
+ * @throws {Error} When no active page context exists
1036
+ */
1037
+ fill(selector: string, value: string): Promise<void>;
1038
+ /**
1039
+ * Submits a form.
1040
+ *
1041
+ * @param selector - Optional form/submit button selector
1042
+ * @param options - Submission options
1043
+ * @returns Promise resolving when form is submitted
1044
+ * @throws {Error} When no active page context exists
1045
+ */
1046
+ submit(selector?: any, options?: SubmitActionOptions): Promise<void>;
1047
+ /**
1048
+ * Pauses execution, allowing for manual intervention or inspection.
1049
+ *
1050
+ * @param message - Optional message to display during pause
1051
+ * @returns Promise resolving when execution is resumed
1052
+ * @throws {Error} When no active page context exists
1053
+ */
1054
+ pause(message?: string): Promise<void>;
1055
+ /**
1056
+ * Extracts structured data from the current page content.
1057
+ *
1058
+ * @param schema - An object defining the data to extract.
1059
+ * @returns A promise that resolves to an object with the extracted data.
1060
+ */
1061
+ extract<T>(schema: ExtractSchema): Promise<T>;
1062
+ protected _normalizeSchema(schema: ExtractSchema): ExtractSchema;
1063
+ /**
1064
+ * Gets the unique identifier of this engine implementation.
1065
+ */
1066
+ get id(): string;
1067
+ /**
1068
+ * Gets the execution mode of this engine (`'http'` or `'browser'`).
1069
+ */
1070
+ get mode(): FetchEngineType;
1071
+ /**
1072
+ * Gets the fetch engine context associated with this instance.
1073
+ */
1074
+ get context(): FetchEngineContext | undefined;
1075
+ /**
1076
+ * Initializes the fetch engine with provided context and options.
1077
+ *
1078
+ * @param context - Fetch engine context
1079
+ * @param options - Configuration options
1080
+ * @returns Promise resolving when initialization completes
1081
+ *
1082
+ * @remarks
1083
+ * Sets up internal state and calls implementation-specific [_initialize](file:///home/riceball/Documents/mywork/public/@isdk/ai-tools/packages/web-fetcher/src/engine/cheerio.ts#L169-L204) method.
1084
+ * Automatically called when creating engine via `FetchEngine.create()`.
1085
+ */
1086
+ initialize(context: FetchEngineContext, options?: BaseFetcherProperties): Promise<void>;
1087
+ cleanup(): Promise<void>;
1088
+ /**
1089
+ * Executes all pending fetch engine actions within the current Crawlee request handler context.
1090
+ *
1091
+ * **Critical Execution Constraint**: This method **MUST** be awaited within the synchronous execution path
1092
+ * of Crawlee's [requestHandler](https://crawlee.dev/js/api/basic-crawler) (before any `await` that yields control back to the event loop).
1093
+ *
1094
+ * ### Why This Constraint Exists
1095
+ * - Crawlee's page context ([PlaywrightCrawler](https://crawlee.dev/js/api/playwright-crawler)'s `page` or [CheerioCrawler](https://crawlee.dev/js/api/cheerio-crawler)'s `$`)
1096
+ * is **only valid during the synchronous execution phase** of the request handler
1097
+ * - After any `await` (e.g., `await page.goto()`), the page context may be destroyed
1098
+ * due to Crawlee's internal resource management
1099
+ *
1100
+ * ### How It Works
1101
+ * 1. Processes all actions queued via {@link dispatchAction} (click, fill, submit, etc.)
1102
+ * 2. Maintains the page context validity window via {@link isPageActive} lifecycle flag
1103
+ * 3. Automatically cleans up event listeners upon completion
1104
+ *
1105
+ * Usage see {@link _sharedRequestHandler}
1106
+ * @see {@link _sharedRequestHandler}
1107
+ * @param context The active Crawlee crawling context containing the page/$ object
1108
+ * @throws {Error} If called outside valid page context window (`!this.isPageActive`)
1109
+ * @internal Engine infrastructure method - not for direct consumer use
1110
+ */
1111
+ protected _executePendingActions(context: TContext): Promise<void>;
1112
+ protected _sharedRequestHandler(context: TContext): Promise<void>;
1113
+ protected _sharedFailedRequestHandler(context: TContext, error?: Error): Promise<void>;
1114
+ protected dispatchAction<T>(action: FetchEngineAction): Promise<T>;
1115
+ private _requestHandler;
1116
+ private _failedRequestHandler;
1117
+ protected _commonCleanup(): Promise<void>;
1118
+ /**
1119
+ * Blocks specified resource types from loading.
1120
+ *
1121
+ * @param types - Resource types to block
1122
+ * @param overwrite - Whether to replace existing blocked types
1123
+ * @returns Number of blocked resource types
1124
+ *
1125
+ * @example
1126
+ * ```ts
1127
+ * await engine.blockResources(['image', 'stylesheet']);
1128
+ * await engine.blockResources(['script'], true); // Replace existing
1129
+ * ```
1130
+ */
1131
+ blockResources(types: ResourceType[], overwrite?: boolean): Promise<number>;
1132
+ /**
1133
+ * Gets content of current page.
1134
+ *
1135
+ * @returns Promise resolving to fetch response
1136
+ * @throws {Error} When no content has been fetched yet
1137
+ */
1138
+ getContent(): Promise<FetchResponse>;
1139
+ /**
1140
+ * Manages HTTP headers for requests with multiple overloads.
1141
+ *
1142
+ * @overload
1143
+ * Gets all headers.
1144
+ * @returns All headers as record
1145
+ *
1146
+ * @overload
1147
+ * Gets specific header value.
1148
+ * @param name - Header name
1149
+ * @returns Header value
1150
+ *
1151
+ * @overload
1152
+ * Sets multiple headers.
1153
+ * @param headers - Headers to set
1154
+ * @param replaced - Whether to replace all existing headers
1155
+ * @returns `true` if successful
1156
+ *
1157
+ * @overload
1158
+ * Sets single header.
1159
+ * @param name - Header name
1160
+ * @param value - Header value or `null` to remove
1161
+ * @returns `true` if successful
1162
+ *
1163
+ * @example
1164
+ * ```ts
1165
+ * const allHeaders = await engine.headers();
1166
+ * const userAgent = await engine.headers('user-agent');
1167
+ * await engine.headers({ 'x-custom': 'value' });
1168
+ * await engine.headers('auth', 'token');
1169
+ * ```
1170
+ */
1171
+ headers(): Promise<Record<string, string>>;
1172
+ headers(name: string): Promise<string>;
1173
+ headers(headers: Record<string, string>, replaced?: boolean): Promise<boolean>;
1174
+ headers(name: string, value: string | null): Promise<boolean>;
1175
+ /**
1176
+ * Manages cookies for current session with multiple overloads.
1177
+ *
1178
+ * @overload
1179
+ * Gets all cookies.
1180
+ * @returns Array of cookies
1181
+ *
1182
+ * @overload
1183
+ * Sets cookies for session.
1184
+ * @param cookies - Cookies to set
1185
+ * @returns `true` if successful
1186
+ *
1187
+ * @example
1188
+ * ```ts
1189
+ * const cookies = await engine.cookies();
1190
+ * await engine.cookies([{ name: 'session', value: '123' }]);
1191
+ * ```
1192
+ */
1193
+ cookies(): Promise<Cookie[]>;
1194
+ cookies(cookies: Cookie[]): Promise<boolean>;
1195
+ /**
1196
+ * Disposes of engine, cleaning up all resources.
1197
+ *
1198
+ * @returns Promise resolving when disposal completes
1199
+ */
1200
+ dispose(): Promise<void>;
1201
+ }
1202
+
1203
+ type FetchReturnType = 'response' | 'context' | 'outputs' | 'any' | 'none';
1204
+ interface FetchReturnTypeRegistry {
1205
+ response: FetchResponse;
1206
+ context: FetchContext;
1207
+ result: FetchActionResult<any> | undefined;
1208
+ outputs: Record<string, any>;
1209
+ any: any;
1210
+ none: void;
1211
+ }
1212
+ type FetchReturnTypeFor<R extends FetchReturnType> = R extends keyof FetchReturnTypeRegistry ? FetchReturnTypeRegistry[R] : never;
1213
+
1214
+ interface FetchActionInContext extends FetchActionProperties {
1215
+ index?: number;
1216
+ error?: Error;
1217
+ depth?: number;
1218
+ }
1219
+ interface BaseFetchContextInteralState {
1220
+ engine?: FetchEngine;
1221
+ [key: string]: any;
1222
+ }
1223
+ interface FetchContextInteralState extends BaseFetchContextInteralState {
1224
+ actionStack?: FetchActionInContext[];
1225
+ actionIndex?: number;
1226
+ }
1227
+ interface FetchEngineContext extends BaseFetcherProperties {
1228
+ id: string;
1229
+ url?: string;
1230
+ finalUrl?: string;
1231
+ lastResponse?: FetchResponse;
1232
+ lastResult?: FetchActionResult;
1233
+ internal: BaseFetchContextInteralState;
1234
+ }
1235
+ interface FetchContext extends FetchEngineContext {
1236
+ currentAction?: FetchActionInContext;
1237
+ outputs: Record<string, any>;
1238
+ execute<R extends FetchReturnType = 'any'>(actionOptions: FetchActionOptions): Promise<FetchActionResult<R>>;
1239
+ action<R extends FetchReturnType = 'any'>(name: string, params?: any, options?: Partial<FetchActionOptions>): Promise<FetchActionResult<R>>;
1240
+ internal: FetchContextInteralState;
1241
+ eventBus: EventEmitter;
1242
+ }
1243
+
1244
+ type CheerioAPI = NonNullable<CheerioCrawlingContext['$']>;
1245
+ type CheerioSelection = ReturnType<CheerioAPI>;
1246
+ type CheerioNode = ReturnType<CheerioSelection['first']>;
1247
+ declare class CheerioFetchEngine extends FetchEngine<CheerioCrawlingContext, CheerioCrawler, CheerioCrawlerOptions> {
1248
+ static readonly id = "cheerio";
1249
+ static readonly mode = "http";
1250
+ protected buildResponse(context: CheerioCrawlingContext): Promise<FetchResponse>;
1251
+ protected _querySelectorAll(context: {
1252
+ $: CheerioAPI;
1253
+ el: CheerioNode;
1254
+ }, selector: string): Promise<any[]>;
1255
+ protected _extractValue(schema: ExtractValueSchema, context: {
1256
+ el: CheerioNode;
1257
+ }): Promise<any>;
1258
+ protected executeAction(context: CheerioCrawlingContext, action: FetchEngineAction): Promise<any>;
1259
+ private _updateStateAfterNavigation;
1260
+ protected _createCrawler(options: CheerioCrawlerOptions): CheerioCrawler;
1261
+ protected _getSpecificCrawlerOptions(ctx: FetchEngineContext): CheerioCrawlerOptions;
1262
+ goto(url: string, params?: GotoActionOptions): Promise<void | FetchResponse>;
1263
+ }
1264
+
1265
+ type Page = NonNullable<PlaywrightCrawlingContext['page']>;
1266
+ type Locator = ReturnType<Page['locator']>;
1267
+ declare class PlaywrightFetchEngine extends FetchEngine<PlaywrightCrawlingContext, PlaywrightCrawler, PlaywrightCrawlerOptions> {
1268
+ static readonly id = "playwright";
1269
+ static readonly mode = "browser";
1270
+ protected buildResponse(context: PlaywrightCrawlingContext): Promise<FetchResponse>;
1271
+ protected _querySelectorAll(context: Locator, selector: string): Promise<any[]>;
1272
+ protected _extractValue(schema: ExtractValueSchema, context: Locator): Promise<any>;
1273
+ protected executeAction(context: PlaywrightCrawlingContext, action: FetchEngineAction): Promise<any>;
1274
+ protected _createCrawler(options: PlaywrightCrawlerOptions): PlaywrightCrawler;
1275
+ protected _getSpecificCrawlerOptions(ctx: FetchEngineContext): Promise<Partial<PlaywrightCrawlerOptions>>;
1276
+ goto(url: string, opts?: GotoActionOptions): Promise<FetchResponse>;
1277
+ }
1278
+
1279
+ declare enum FetchActionResultStatus {
1280
+ /**
1281
+ * 动作执行失败但未抛出(通常因 failOnError=false);错误信息在 error 字段
1282
+ */
1283
+ Failed = 0,
1284
+ /**
1285
+ * 动作按预期完成(即便产生 warnings)
1286
+ */
1287
+ Success = 1,
1288
+ /**
1289
+ * 动作被判定为不执行/降级为 noop(比如引擎不支持且 degradeTo='noop')
1290
+ * 能力不支持且 degradeTo='noop' 时:status='skipped',warnings 增加 { code:'capability-not-supported' }
1291
+ */
1292
+ Skipped = 2
1293
+ }
1294
+ type FetchActionCapabilityMode = 'native' | 'simulate' | 'noop';
1295
+ interface FetchActionMeta {
1296
+ id: string;
1297
+ index?: number;
1298
+ engineType?: FetchEngineType;
1299
+ capability?: FetchActionCapabilityMode;
1300
+ response?: FetchResponse;
1301
+ timings?: {
1302
+ start: number;
1303
+ total: number;
1304
+ };
1305
+ retries?: number;
1306
+ }
1307
+ interface FetchActionResult<R extends FetchReturnType = FetchReturnType> {
1308
+ status: FetchActionResultStatus;
1309
+ returnType?: R;
1310
+ result?: FetchReturnTypeFor<R>;
1311
+ error?: Error;
1312
+ meta?: FetchActionMeta;
1313
+ }
1314
+ interface BaseFetchActionProperties {
1315
+ id?: string;
1316
+ name?: string;
1317
+ params?: any;
1318
+ storeAs?: string;
1319
+ failOnError?: boolean;
1320
+ failOnTimeout?: boolean;
1321
+ timeoutMs?: number;
1322
+ maxRetries?: number;
1323
+ [key: string]: any;
1324
+ }
1325
+ type BaseFetchActionOptions = RequireAtLeastOne<BaseFetchActionProperties, 'id' | 'name'>;
1326
+ interface BaseFetchCollectorActionProperties extends BaseFetchActionProperties {
1327
+ activateOn?: string | RegExp | Array<string | RegExp>;
1328
+ deactivateOn?: string | RegExp | Array<string | RegExp>;
1329
+ collectOn?: string | RegExp | Array<string | RegExp>;
1330
+ background?: boolean;
1331
+ }
1332
+ type BaseFetchCollectorOptions = RequireAtLeastOne<BaseFetchCollectorActionProperties, 'id' | 'name'>;
1333
+ interface FetchActionProperties extends BaseFetchActionProperties {
1334
+ collectors?: BaseFetchCollectorOptions[];
1335
+ }
1336
+ type FetchActionOptions = RequireAtLeastOne<FetchActionProperties, 'id' | 'name'>;
1337
+ type FetchActionCapabilities = {
1338
+ [mode in FetchEngineType]?: FetchActionCapabilityMode;
1339
+ };
1340
+ declare abstract class FetchAction {
1341
+ private static registry;
1342
+ static register(actionClass: typeof FetchAction): void;
1343
+ static get(id: string): typeof FetchAction | undefined;
1344
+ static create(id: FetchActionOptions): FetchAction | undefined;
1345
+ static create(id: string): FetchAction | undefined;
1346
+ static has(name: string): boolean;
1347
+ static list(): string[];
1348
+ static id: string;
1349
+ static returnType: FetchReturnType;
1350
+ static capabilities: FetchActionCapabilities;
1351
+ static getCapability(mode?: FetchEngineType): FetchActionCapabilityMode;
1352
+ getCapability(mode?: FetchEngineType): FetchActionCapabilityMode;
1353
+ get id(): string;
1354
+ get returnType(): FetchReturnType;
1355
+ get capabilities(): FetchActionCapabilities;
1356
+ protected onBeforeExec?(context: FetchContext, options?: FetchActionProperties): Promise<void> | void;
1357
+ protected onAfterExec?(context: FetchContext, options?: FetchActionProperties): Promise<void> | void;
1358
+ abstract onExecute(context: FetchContext, options?: FetchActionProperties, eventPayload?: any): Promise<any> | any;
1359
+ protected delegateToEngine(context: FetchContext, method: keyof FetchEngine, ...args: any[]): Promise<any>;
1360
+ protected installCollectors(context: FetchContext, options?: FetchActionProperties): CollectorsRuntime | undefined;
1361
+ /**
1362
+ * Action 开始生命周期
1363
+ * 负责:初始化 stack、设置 currentAction、触发事件、调用钩子
1364
+ */
1365
+ beforeExec(context: FetchContext, options?: FetchActionProperties): Promise<{
1366
+ entry: FetchActionInContext;
1367
+ collectors: CollectorsRuntime | undefined;
1368
+ }>;
1369
+ /**
1370
+ * Action 结束生命周期
1371
+ * 负责:调用钩子、赋值lastResult, 触发事件、清理 stack、恢复 currentAction
1372
+ */
1373
+ afterExec(context: FetchContext, options?: BaseFetchCollectorActionProperties, result?: FetchActionResult, scope?: {
1374
+ entry: FetchActionInContext;
1375
+ collectors?: CollectorsRuntime;
1376
+ }): Promise<void>;
1377
+ execute<R extends FetchReturnType = 'any'>(context: FetchContext, options?: FetchActionProperties): Promise<FetchActionResult<R>>;
1378
+ }
1379
+ type CollectorsRuntime = {
1380
+ cleanup: () => void;
1381
+ awaitExecPendings: () => Promise<void>;
1382
+ };
1383
+
1384
+ type FetchEngineType = 'http' | 'browser';
1385
+ type BrowserEngine = 'playwright' | 'puppeteer';
1386
+ type FetchEngineMode = FetchEngineType | 'auto' | string;
1387
+ type ResourceType = 'image' | 'stylesheet' | 'font' | 'script' | 'media' | string;
1388
+ interface BaseFetcherProperties {
1389
+ /**
1390
+ * 抓取模式
1391
+ *
1392
+ * - `http`: 使用 HTTP 进行抓取
1393
+ * - `browser`: 使用浏览器进行抓取
1394
+ * - `auto`: auto 会走“智能探测”选择 http 或 browser, 但是如果没有启用 smart,并且在站点注册表中没有,那么则等价为 http.
1395
+ */
1396
+ engine?: FetchEngineMode;
1397
+ enableSmart?: boolean;
1398
+ useSiteRegistry?: boolean;
1399
+ antibot?: boolean;
1400
+ headers?: Record<string, string>;
1401
+ cookies?: Cookie[];
1402
+ reuseCookies?: boolean;
1403
+ throwHttpErrors?: boolean;
1404
+ proxy?: string | string[];
1405
+ blockResources?: ResourceType[];
1406
+ ignoreSslErrors?: boolean;
1407
+ browser?: {
1408
+ /**
1409
+ * 浏览器引擎,默认为 playwright
1410
+ *
1411
+ * - `playwright`: 使用 Playwright 引擎
1412
+ * - `puppeteer`: 使用 Puppeteer 引擎
1413
+ */
1414
+ engine?: BrowserEngine;
1415
+ headless?: boolean;
1416
+ waitUntil?: 'load' | 'domcontentloaded' | 'networkidle' | 'commit';
1417
+ };
1418
+ http?: {
1419
+ method?: 'GET' | 'POST' | 'PUT' | 'PATCH' | 'DELETE';
1420
+ body?: any;
1421
+ };
1422
+ timeoutMs?: number;
1423
+ maxConcurrency?: number;
1424
+ maxRequestsPerMinute?: number;
1425
+ delayBetweenRequestsMs?: number;
1426
+ retries?: number;
1427
+ sites?: FetchSite[];
1428
+ url?: string;
1429
+ }
1430
+ interface FetchSite extends BaseFetcherProperties {
1431
+ domain: string;
1432
+ pathScope?: string[];
1433
+ meta?: {
1434
+ updatedAt?: number;
1435
+ ttlMs?: number;
1436
+ source?: 'manual' | 'smart';
1437
+ };
1438
+ }
1439
+ type OnFetchPauseCallback = (options: {
1440
+ message?: string;
1441
+ }) => Promise<void>;
1442
+ interface FetcherOptions extends BaseFetcherProperties {
1443
+ actions?: FetchActionOptions[];
1444
+ onPause?: OnFetchPauseCallback;
1445
+ }
1446
+ interface FetchMetadata {
1447
+ mode: FetchEngineType;
1448
+ engine?: BrowserEngine;
1449
+ timings?: {
1450
+ start: number;
1451
+ total: number;
1452
+ ttfb?: number;
1453
+ dns?: number;
1454
+ tcp?: number;
1455
+ firstByte?: number;
1456
+ download?: number;
1457
+ };
1458
+ proxy?: string;
1459
+ [key: string]: any;
1460
+ }
1461
+ interface FetchResponse {
1462
+ url: string;
1463
+ finalUrl: string;
1464
+ statusCode?: number;
1465
+ statusText?: string;
1466
+ headers: Record<string, string>;
1467
+ contentType?: string;
1468
+ body?: string | Buffer<ArrayBufferLike>;
1469
+ html?: string;
1470
+ text?: string;
1471
+ json?: any;
1472
+ cookies?: Cookie[];
1473
+ metadata?: FetchMetadata;
1474
+ }
1475
+ declare const DefaultFetcherProperties: BaseFetcherProperties;
1476
+
1477
+ declare class FetchSession {
1478
+ private options;
1479
+ readonly id: string;
1480
+ readonly context: FetchContext;
1481
+ private closed;
1482
+ constructor(options?: FetcherOptions);
1483
+ /**
1484
+ * 执行单个动作
1485
+ */
1486
+ execute<R extends FetchReturnType = 'response'>(actionOptions: FetchActionOptions): Promise<FetchActionResult<R>>;
1487
+ executeAll(actions: FetchActionOptions[]): Promise<{
1488
+ result: FetchResponse | undefined;
1489
+ outputs: Record<string, any>;
1490
+ }>;
1491
+ getOutputs(): Record<string, any>;
1492
+ dispose(): Promise<void>;
1493
+ private ensureEngine;
1494
+ private createContext;
1495
+ }
1496
+
1497
+ declare class WebFetcher {
1498
+ private defaults;
1499
+ constructor(defaults?: FetcherOptions);
1500
+ createSession(options?: FetcherOptions): Promise<FetchSession>;
1501
+ fetch(url: string, options?: FetcherOptions): Promise<{
1502
+ result: FetchResponse | undefined;
1503
+ outputs: Record<string, any>;
1504
+ }>;
1505
+ fetch(options: FetcherOptions): Promise<{
1506
+ result: FetchResponse | undefined;
1507
+ outputs: Record<string, any>;
1508
+ }>;
1509
+ }
1510
+
1511
+ declare class ClickAction extends FetchAction {
1512
+ static id: string;
1513
+ static returnType: "none";
1514
+ static capabilities: {
1515
+ http: "simulate";
1516
+ browser: "native";
1517
+ };
1518
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
1519
+ }
1520
+
1521
+ declare class FillAction extends FetchAction {
1522
+ static id: string;
1523
+ static returnType: "none";
1524
+ static capabilities: {
1525
+ http: "simulate";
1526
+ browser: "native";
1527
+ };
1528
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
1529
+ }
1530
+
1531
+ declare class GetContentAction extends FetchAction {
1532
+ static id: string;
1533
+ static returnType: "response";
1534
+ static capabilities: {
1535
+ http: "native";
1536
+ browser: "native";
1537
+ };
1538
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<any>;
1539
+ }
1540
+
1541
+ declare class GotoAction extends FetchAction {
1542
+ static id: string;
1543
+ static returnType: "response";
1544
+ static capabilities: {
1545
+ http: "native";
1546
+ browser: "native";
1547
+ };
1548
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties, eventPayload?: any): Promise<FetchResponse | void>;
1549
+ }
1550
+
1551
+ declare class SubmitAction extends FetchAction {
1552
+ static id: string;
1553
+ static returnType: "none";
1554
+ static capabilities: {
1555
+ http: "simulate";
1556
+ browser: "native";
1557
+ };
1558
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
1559
+ }
1560
+
1561
+ declare class WaitForAction extends FetchAction {
1562
+ static id: string;
1563
+ static returnType: "none";
1564
+ static capabilities: {
1565
+ http: "native";
1566
+ browser: "native";
1567
+ };
1568
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
1569
+ }
1570
+
1571
+ interface ExtractActionProperties extends BaseFetchActionProperties {
1572
+ params: ExtractSchema;
1573
+ }
1574
+ declare class ExtractAction extends FetchAction {
1575
+ static id: string;
1576
+ static returnType: "any";
1577
+ static capabilities: {
1578
+ http: "native";
1579
+ browser: "native";
1580
+ };
1581
+ onExecute(context: FetchContext, options?: ExtractActionProperties): Promise<any>;
1582
+ }
1583
+
1584
+ declare class PauseAction extends FetchAction {
1585
+ static id: string;
1586
+ static capabilities: {
1587
+ http: "native";
1588
+ browser: "native";
1589
+ };
1590
+ static returnType: "none";
1591
+ onExecute(context: FetchContext, options?: BaseFetchActionProperties): Promise<void>;
1592
+ }
1593
+
1594
+ declare function fetchWeb(options: FetcherOptions): Promise<{
1595
+ result: FetchResponse | undefined;
1596
+ outputs: Record<string, any>;
1597
+ }>;
1598
+ declare function fetchWeb(url: string, options?: FetcherOptions): Promise<{
1599
+ result: FetchResponse | undefined;
1600
+ outputs: Record<string, any>;
1601
+ }>;
1602
+
1603
+ export { type BaseFetchActionOptions, type BaseFetchActionProperties, type BaseFetchCollectorActionProperties, type BaseFetchCollectorOptions, type BaseFetcherProperties, type BrowserEngine, CheerioFetchEngine, ClickAction, DefaultFetcherProperties, type DispatchedEngineAction, ExtractAction, type ExtractActionProperties, FetchAction, type FetchActionCapabilities, type FetchActionCapabilityMode, type FetchActionInContext, type FetchActionOptions, type FetchActionProperties, type FetchActionResult, FetchActionResultStatus, type FetchContext, FetchEngine, type FetchEngineAction, type FetchEngineContext, type FetchEngineType, type FetchMetadata, type FetchResponse, type FetchReturnType, type FetchReturnTypeFor, type FetchReturnTypeRegistry, FetchSession, type FetchSite, type FetcherOptions, FillAction, GetContentAction, GotoAction, type GotoActionOptions, type OnFetchPauseCallback, PauseAction, type PendingEngineRequest, PlaywrightFetchEngine, type ResourceType, SubmitAction, type SubmitActionOptions, WaitForAction, type WaitForActionOptions, WebFetcher, fetchWeb };