@cyia/crawl 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/define.d.ts CHANGED
@@ -5,6 +5,12 @@ declare const Value: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
5
5
  readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
6
6
  }, undefined>], undefined>;
7
7
  export type ValueType = v.InferOutput<typeof Value>;
8
+ declare const OutputP: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
9
+ readonly key: v.StringSchema<undefined>;
10
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
11
+ }, undefined>], undefined>, undefined>;
12
+ type OutputI = v.InferInput<typeof OutputP>;
13
+ export type OutputO = v.InferOutput<typeof OutputP>;
8
14
  export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectSchema<{
9
15
  readonly timeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
10
16
  readonly waitUntil: v.OptionalSchema<v.PicklistSchema<["load", "domcontentloaded", "networkidle0", "networkidle2"], undefined>, "networkidle2">;
@@ -43,6 +49,9 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
43
49
  readonly concurrency: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
44
50
  }, undefined>, v.ObjectSchema<{
45
51
  readonly mode: v.LiteralSchema<"navigation", undefined>;
52
+ }, undefined>, v.ObjectSchema<{
53
+ readonly mode: v.LiteralSchema<"waitBodyElements", undefined>;
54
+ readonly threshold: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
46
55
  }, undefined>], undefined>;
47
56
  }, undefined>, v.ObjectSchema<{
48
57
  readonly type: v.LiteralSchema<"click", undefined>;
@@ -68,30 +77,45 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
68
77
  }, undefined>, v.ObjectSchema<{
69
78
  readonly type: v.LiteralSchema<"selector", undefined>;
70
79
  readonly selector: v.StringSchema<undefined>;
71
- readonly output: v.StringSchema<undefined>;
80
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
81
+ readonly key: v.StringSchema<undefined>;
82
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
83
+ }, undefined>], undefined>, undefined>;
72
84
  readonly multi: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
73
85
  }, undefined>, v.ObjectSchema<{
74
86
  readonly type: v.LiteralSchema<"findData", undefined>;
75
87
  readonly input: v.StringSchema<undefined>;
76
- readonly output: v.StringSchema<undefined>;
88
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
89
+ readonly key: v.StringSchema<undefined>;
90
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
91
+ }, undefined>], undefined>, undefined>;
77
92
  readonly kind: v.PicklistSchema<["property"], undefined>;
78
93
  readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
79
94
  }, undefined>, v.ObjectSchema<{
80
95
  readonly type: v.LiteralSchema<"getContent", undefined>;
81
96
  readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text", "markdown"], undefined>, "html">;
82
97
  readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
83
- readonly output: v.StringSchema<undefined>;
98
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
99
+ readonly key: v.StringSchema<undefined>;
100
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
101
+ }, undefined>], undefined>, undefined>;
102
+ }, undefined>, v.ObjectSchema<{
103
+ readonly type: v.LiteralSchema<"rawContent", undefined>;
104
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
105
+ readonly key: v.StringSchema<undefined>;
106
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
107
+ }, undefined>], undefined>, undefined>;
84
108
  }, undefined>, v.GenericSchema<{
85
109
  type: "page";
86
110
  input: string;
87
- output?: string;
111
+ output?: OutputI;
88
112
  actions: v.InferInput<ActionType>[];
89
113
  concurrency?: number;
90
114
  throwError?: boolean;
91
115
  }, {
92
116
  type: "page";
93
117
  input: string;
94
- output?: string;
118
+ output?: OutputO;
95
119
  actions: v.InferOutput<ActionType>[];
96
120
  concurrency: number;
97
121
  throwError: boolean;
@@ -103,16 +127,24 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
103
127
  readonly type: v.StringSchema<undefined>;
104
128
  }, undefined>, undefined>;
105
129
  readonly fn: v.OptionalSchema<v.CustomSchema<(input: WebPage) => Promise<any>, undefined>, undefined>;
130
+ }, undefined>, v.ObjectSchema<{
131
+ readonly type: v.LiteralSchema<"read-variable", undefined>;
132
+ readonly input: v.StringSchema<undefined>;
133
+ }, undefined>, v.ObjectSchema<{
134
+ readonly type: v.LiteralSchema<"evaluate", undefined>;
135
+ readonly fn: v.CustomSchema<(...args: any[]) => any, undefined>;
136
+ readonly args: v.OptionalSchema<v.ArraySchema<v.AnySchema, undefined>, undefined>;
137
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
138
+ readonly key: v.StringSchema<undefined>;
139
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
140
+ }, undefined>], undefined>, undefined>;
106
141
  }, undefined>], undefined>, (item: v.OutputDataset<{
107
142
  timeout?: number | undefined;
108
143
  waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
109
- url: (string | {
144
+ url: string | {
110
145
  source: "variable";
111
- key: (string | string[] | undefined) & (string | string[]);
112
- } | undefined) & (string | {
113
- source: "variable";
114
- key: (string | string[] | undefined) & (string | string[]);
115
- });
146
+ key: string | string[];
147
+ };
116
148
  type: "goto";
117
149
  } | {
118
150
  width: number;
@@ -144,6 +176,9 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
144
176
  concurrency?: number | undefined;
145
177
  } | {
146
178
  mode: "navigation";
179
+ } | {
180
+ mode: "waitBodyElements";
181
+ threshold?: number | undefined;
147
182
  };
148
183
  } | {
149
184
  type: "click";
@@ -157,13 +192,10 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
157
192
  } | {
158
193
  type: "type";
159
194
  selector: string;
160
- text: (string | {
195
+ text: string | {
161
196
  source: "variable";
162
- key: (string | string[] | undefined) & (string | string[]);
163
- } | undefined) & (string | {
164
- source: "variable";
165
- key: (string | string[] | undefined) & (string | string[]);
166
- });
197
+ key: string | string[];
198
+ };
167
199
  delay?: number | undefined;
168
200
  } | {
169
201
  type: "keypress";
@@ -172,23 +204,38 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
172
204
  } | {
173
205
  type: "selector";
174
206
  selector: string;
175
- output: string;
207
+ output?: string | {
208
+ key: string;
209
+ method: "push" | "flat-push" | "define" | "merge";
210
+ } | undefined;
176
211
  multi: boolean;
177
212
  } | {
178
213
  type: "findData";
179
214
  input: string;
180
- output: string;
215
+ output?: string | {
216
+ key: string;
217
+ method: "push" | "flat-push" | "define" | "merge";
218
+ } | undefined;
181
219
  kind: "property";
182
220
  key?: string | undefined;
183
221
  } | {
184
222
  type: "getContent";
185
223
  format: "html" | "text" | "markdown";
186
224
  cleanContent?: boolean | undefined;
187
- output: string;
225
+ output?: string | {
226
+ key: string;
227
+ method: "push" | "flat-push" | "define" | "merge";
228
+ } | undefined;
229
+ } | {
230
+ type: "rawContent";
231
+ output?: string | {
232
+ key: string;
233
+ method: "push" | "flat-push" | "define" | "merge";
234
+ } | undefined;
188
235
  } | {
189
236
  type: "page";
190
237
  input: string;
191
- output?: string;
238
+ output?: OutputO;
192
239
  actions: v.InferOutput<ActionType>[];
193
240
  concurrency: number;
194
241
  throwError: boolean;
@@ -202,7 +249,18 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
202
249
  [key: string]: unknown;
203
250
  }) | undefined;
204
251
  fn?: ((input: WebPage) => Promise<any>) | undefined;
205
- }, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
252
+ } | {
253
+ type: "read-variable";
254
+ input: string;
255
+ } | {
256
+ type: "evaluate";
257
+ fn: (...args: any[]) => any;
258
+ args?: any[] | undefined;
259
+ output?: string | {
260
+ key: string;
261
+ method: "push" | "flat-push" | "define" | "merge";
262
+ } | undefined;
263
+ }, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
206
264
  type: "custom";
207
265
  config: any;
208
266
  }>;
@@ -244,6 +302,9 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
244
302
  readonly concurrency: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
245
303
  }, undefined>, v.ObjectSchema<{
246
304
  readonly mode: v.LiteralSchema<"navigation", undefined>;
305
+ }, undefined>, v.ObjectSchema<{
306
+ readonly mode: v.LiteralSchema<"waitBodyElements", undefined>;
307
+ readonly threshold: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
247
308
  }, undefined>], undefined>;
248
309
  }, undefined>, v.ObjectSchema<{
249
310
  readonly type: v.LiteralSchema<"click", undefined>;
@@ -269,30 +330,45 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
269
330
  }, undefined>, v.ObjectSchema<{
270
331
  readonly type: v.LiteralSchema<"selector", undefined>;
271
332
  readonly selector: v.StringSchema<undefined>;
272
- readonly output: v.StringSchema<undefined>;
333
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
334
+ readonly key: v.StringSchema<undefined>;
335
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
336
+ }, undefined>], undefined>, undefined>;
273
337
  readonly multi: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
274
338
  }, undefined>, v.ObjectSchema<{
275
339
  readonly type: v.LiteralSchema<"findData", undefined>;
276
340
  readonly input: v.StringSchema<undefined>;
277
- readonly output: v.StringSchema<undefined>;
341
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
342
+ readonly key: v.StringSchema<undefined>;
343
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
344
+ }, undefined>], undefined>, undefined>;
278
345
  readonly kind: v.PicklistSchema<["property"], undefined>;
279
346
  readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
280
347
  }, undefined>, v.ObjectSchema<{
281
348
  readonly type: v.LiteralSchema<"getContent", undefined>;
282
349
  readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text", "markdown"], undefined>, "html">;
283
350
  readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
284
- readonly output: v.StringSchema<undefined>;
351
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
352
+ readonly key: v.StringSchema<undefined>;
353
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
354
+ }, undefined>], undefined>, undefined>;
355
+ }, undefined>, v.ObjectSchema<{
356
+ readonly type: v.LiteralSchema<"rawContent", undefined>;
357
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
358
+ readonly key: v.StringSchema<undefined>;
359
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
360
+ }, undefined>], undefined>, undefined>;
285
361
  }, undefined>, v.GenericSchema<{
286
362
  type: "page";
287
363
  input: string;
288
- output?: string;
364
+ output?: OutputI;
289
365
  actions: v.InferInput<ActionType>[];
290
366
  concurrency?: number;
291
367
  throwError?: boolean;
292
368
  }, {
293
369
  type: "page";
294
370
  input: string;
295
- output?: string;
371
+ output?: OutputO;
296
372
  actions: v.InferOutput<ActionType>[];
297
373
  concurrency: number;
298
374
  throwError: boolean;
@@ -304,16 +380,24 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
304
380
  readonly type: v.StringSchema<undefined>;
305
381
  }, undefined>, undefined>;
306
382
  readonly fn: v.OptionalSchema<v.CustomSchema<(input: WebPage) => Promise<any>, undefined>, undefined>;
383
+ }, undefined>, v.ObjectSchema<{
384
+ readonly type: v.LiteralSchema<"read-variable", undefined>;
385
+ readonly input: v.StringSchema<undefined>;
386
+ }, undefined>, v.ObjectSchema<{
387
+ readonly type: v.LiteralSchema<"evaluate", undefined>;
388
+ readonly fn: v.CustomSchema<(...args: any[]) => any, undefined>;
389
+ readonly args: v.OptionalSchema<v.ArraySchema<v.AnySchema, undefined>, undefined>;
390
+ readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
391
+ readonly key: v.StringSchema<undefined>;
392
+ readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
393
+ }, undefined>], undefined>, undefined>;
307
394
  }, undefined>], undefined>, (item: v.OutputDataset<{
308
395
  timeout?: number | undefined;
309
396
  waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
310
- url: (string | {
397
+ url: string | {
311
398
  source: "variable";
312
- key: (string | string[] | undefined) & (string | string[]);
313
- } | undefined) & (string | {
314
- source: "variable";
315
- key: (string | string[] | undefined) & (string | string[]);
316
- });
399
+ key: string | string[];
400
+ };
317
401
  type: "goto";
318
402
  } | {
319
403
  width: number;
@@ -345,6 +429,9 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
345
429
  concurrency?: number | undefined;
346
430
  } | {
347
431
  mode: "navigation";
432
+ } | {
433
+ mode: "waitBodyElements";
434
+ threshold?: number | undefined;
348
435
  };
349
436
  } | {
350
437
  type: "click";
@@ -358,13 +445,10 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
358
445
  } | {
359
446
  type: "type";
360
447
  selector: string;
361
- text: (string | {
448
+ text: string | {
362
449
  source: "variable";
363
- key: (string | string[] | undefined) & (string | string[]);
364
- } | undefined) & (string | {
365
- source: "variable";
366
- key: (string | string[] | undefined) & (string | string[]);
367
- });
450
+ key: string | string[];
451
+ };
368
452
  delay?: number | undefined;
369
453
  } | {
370
454
  type: "keypress";
@@ -373,23 +457,38 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
373
457
  } | {
374
458
  type: "selector";
375
459
  selector: string;
376
- output: string;
460
+ output?: string | {
461
+ key: string;
462
+ method: "push" | "flat-push" | "define" | "merge";
463
+ } | undefined;
377
464
  multi: boolean;
378
465
  } | {
379
466
  type: "findData";
380
467
  input: string;
381
- output: string;
468
+ output?: string | {
469
+ key: string;
470
+ method: "push" | "flat-push" | "define" | "merge";
471
+ } | undefined;
382
472
  kind: "property";
383
473
  key?: string | undefined;
384
474
  } | {
385
475
  type: "getContent";
386
476
  format: "html" | "text" | "markdown";
387
477
  cleanContent?: boolean | undefined;
388
- output: string;
478
+ output?: string | {
479
+ key: string;
480
+ method: "push" | "flat-push" | "define" | "merge";
481
+ } | undefined;
482
+ } | {
483
+ type: "rawContent";
484
+ output?: string | {
485
+ key: string;
486
+ method: "push" | "flat-push" | "define" | "merge";
487
+ } | undefined;
389
488
  } | {
390
489
  type: "page";
391
490
  input: string;
392
- output?: string;
491
+ output?: OutputO;
393
492
  actions: v.InferOutput<ActionType>[];
394
493
  concurrency: number;
395
494
  throwError: boolean;
@@ -403,7 +502,18 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
403
502
  [key: string]: unknown;
404
503
  }) | undefined;
405
504
  fn?: ((input: WebPage) => Promise<any>) | undefined;
406
- }, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
505
+ } | {
506
+ type: "read-variable";
507
+ input: string;
508
+ } | {
509
+ type: "evaluate";
510
+ fn: (...args: any[]) => any;
511
+ args?: any[] | undefined;
512
+ output?: string | {
513
+ key: string;
514
+ method: "push" | "flat-push" | "define" | "merge";
515
+ } | undefined;
516
+ }, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
407
517
  type: "custom";
408
518
  config: any;
409
519
  }>, undefined>;
package/format.d.ts CHANGED
@@ -1,4 +1,16 @@
1
1
  export declare function format(rawHtml: string, options: {
2
2
  cleanContent?: boolean;
3
3
  format: 'html' | 'text' | 'markdown';
4
- }): string | undefined;
4
+ }): string | null | undefined;
5
+ export declare function formatDoc(rawHtml: string): {
6
+ title: string | null | undefined;
7
+ content: string | null | undefined;
8
+ textContent: string | null | undefined;
9
+ length: number | null | undefined;
10
+ excerpt: string | null | undefined;
11
+ byline: string | null | undefined;
12
+ dir: string | null | undefined;
13
+ siteName: string | null | undefined;
14
+ lang: string | null | undefined;
15
+ publishedTime: string | null | undefined;
16
+ } | null;
@@ -0,0 +1,18 @@
1
+ import { WebBrowser } from './init';
2
+ export declare class FullWebRequest {
3
+ #private;
4
+ config: {
5
+ url: string;
6
+ filterLink: (url: string) => boolean;
7
+ };
8
+ browser: WebBrowser;
9
+ dataMap: Map<string, any>;
10
+ constructor(config: {
11
+ url: string;
12
+ filterLink: (url: string) => boolean;
13
+ });
14
+ start(): Promise<Map<string, any>>;
15
+ searchWebOne(url: string, context?: {
16
+ from: string;
17
+ }): Promise<void>;
18
+ }
package/index.d.ts CHANGED
@@ -3,3 +3,4 @@ export * from './define';
3
3
  export * from './page';
4
4
  export * from './download';
5
5
  export * from './format';
6
+ export * from './full-web-request';
package/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import ne from"puppeteer-core";import{ElementHandle as W}from"puppeteer-core";import{load as B}from"cheerio";import U from"turndown";function b(a,o){let i=B(a,void 0,!0),t=i("body");if(o.cleanContent&&(t.find("script,style,iframe,footer,br,hr,svg,header,img").remove(),t.find("*").removeAttr("class"),t.find("*").removeAttr("style"),i("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),i("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((n,s)=>s.trim())),o.format==="html")return t.html();if(o.format==="text")return t.text();if(o.format==="markdown"){var r=new U;return r.turndown(t.html())}}import{promise as G}from"fastq";var p=class{page;browser;parent;#e={};constructor(o,i,t){this.page=o,this.browser=i,this.parent=t}ab;timeoutId;setMaxTimeout(o){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},o)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(o,i){this.#e[o]=i}getVariable(o){return this.#e[o]}#t(o,i){let t,r=!1;for(let n=0;n<i.length;n++){let s=i[n];if(s===".."){if(!o.parent)throw new Error("未找到父级");o=o.parent}else if(r){if(!t||typeof t!="object")throw new Error(`${i}路径下未找到值`);t=t[s]}else t=o.#e[s],r=!0}return t}#o(o){if(typeof o=="string")return o;if(o.source==="variable")return typeof o.key=="string"?this.#e[o.key]:this.#t(this,o.key)}async exeQueue(o){let i;for(let t of o)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,this.#o(t.text),{delay:t.delay});break}case"goto":{i=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{i=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{i=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let r=t.config;i=await this.page.waitForRequest(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.method&&r.method!==n.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let r=t.config;i=await this.page.waitForResponse(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.status&&r.status!==n.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{i=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{i=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}}break}case"selector":{t.multi?i=this.#e[t.output]=await this.page.$$(t.selector):i=this.#e[t.output]=await this.page.$(t.selector);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let r=this.#e[t.input];Array.isArray(r)?t.kind==="property"&&(i=this.#e[t.output]=await Promise.all(r.map(n=>n.getProperty(t.key).then(s=>s.jsonValue())))):r instanceof W&&(i=this.#e[t.output]=r.getProperty(t.key).then(n=>n.jsonValue()));break}case"getContent":{let r=await this.page.content();i=this.#e[t.output]=b(r,{cleanContent:t.cleanContent,format:t.format});break}case"page":{let r=this.#e[t.input],n=Array.isArray(r)?r:[r],s=G(async l=>{console.log("准备执行",l);try{let y=await this.browser.openPage(async u=>(u.setVariable("$item",n[l.index]),u.setVariable("$index",l.index),u.setVariable("$first",l.index===0),u.setVariable("$last",l.index===n.length-1),await u.exeQueue(t.actions)),this);g.push(y)}catch(y){if(t.throwError)throw y;g.push(void 0)}},t.concurrency),m;s.error(l=>{l&&(m=l)});let g=[];for(let l=0;l<n.length;l++)s.push({index:l});if(await s.drained(),t.throwError&&m)throw m;i=g,t.output&&(this.#e[t.output]=i);break}case"setUserAgent":{await this.page.setUserAgent(t.userAgent);break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")i=await t.fn(this);else{let r=this.browser.getCustom(t.config.type);if(!r)throw new Error(`自定义[${t.config.type}]未实现处理`);i=await r(t.config,this)}break}default:break}return i}};import*as e from"valibot";var $=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
2
- `,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),h=e.optional(e.boolean()),H=e.optional(e.string());var q=c,Q=e.pipe(e.string(),e.transform(a=>new RegExp(a))),J=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),d=e.union([Q,J]),f=e.string(),w=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),k=e.object({timeout:q,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:w,type:e.literal("goto")}),x=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),C=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),Y=e.object({selector:f,visible:h,hidden:h}),Z=e.object({mode:e.literal("selector"),...Y.entries}),z=e.object({mode:e.literal("request"),urlRegexp:d,method:H}),X=e.object({mode:e.literal("response"),urlRegexp:e.optional(d),status:c}),_=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),ee=e.object({mode:e.literal("navigation")}),A=e.object({type:e.literal("wait"),config:e.variant("mode",[Z,z,X,_,ee])}),P=e.object({type:e.literal("click"),selector:f,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),T=e.object({type:e.literal("type"),selector:f,text:w,delay:c}),E=e.object({type:e.literal("keypress"),key:$,delay:c}),D=e.object({type:e.literal("selector"),selector:f,output:e.string(),multi:e.optional(e.boolean(),!1)}),I=e.object({type:e.literal("findData"),input:e.string(),output:e.string(),kind:e.picklist(["property"]),key:e.optional(e.string())}),F=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text","markdown"]),"html"),cleanContent:h,output:e.string()}),S=e.object({type:e.literal("close")}),R=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),te=e.object({type:e.literal("page"),input:e.string(),output:e.optional(e.string()),concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(N))}),oe=[...[k,x,C,A,P,T,E,D,I,F,S,R].map(a=>a.entries.type.literal),"page"],N=e.fallback(e.union([k,x,C,A,P,T,E,D,I,F,te,S,R]),a=>{if(oe.includes((a?.value).type))throw new Error(JSON.stringify(a?.issues));return{type:"custom",config:a?.value}}),V=e.array(N),ve=e.object({maxTimeout:c,actionTimeout:c});import*as L from"valibot";import{Browser as ae,computeExecutablePath as se}from"@puppeteer/browsers";import*as M from"fs";import{Browser as K,computeExecutablePath as ie,install as re}from"@puppeteer/browsers";async function j(a){let o=await re({browser:K.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...a,unpack:!0})}function ke(a,o){return ie({cacheDir:a,browser:K.CHROME,buildId:o})}import{PUPPETEER_REVISIONS as le}from"puppeteer-core/internal/revisions.js";async function Ne(a){return v.init(a)}var O=le.chrome,v=class a{browser;static async init(o){let i=ae.CHROME,t=se({cacheDir:o.cacheDir,browser:i,buildId:O});M.existsSync(t)||(console.log("准备下载"),await j({cacheDir:o.cacheDir,buildId:O,browser:i}));let r=await ne.launch({...o,executablePath:t});return new a(r)}constructor(o){this.browser=o}#e;#t=new Map;setConfig(o){this.#e=o}getConfig(){return this.#e}registerCustom(o,i){this.#t.set(o,i)}clearCustom(){this.#t.clear()}getCustom(o){return this.#t.get(o)}async openPage(o,i){let t=new p(await this.browser.newPage(),this,i);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),o(t)}runQueue(o,i){let t=L.safeParse(V,o);if(!t.success)throw new Error(`解析配置错误
3
- ${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{N as ActionDefine,V as ActionListDefine,ve as GlobalConfig,v as WebBrowser,p as WebPage,j as download,b as format,ke as getExecutablePath,Ne as init};
1
+ import ye from"puppeteer-core";import{ElementHandle as z}from"puppeteer-core";import{load as v}from"cheerio";import d from"turndown";import{Readability as k}from"@mozilla/readability";import{JSDOM as x}from"jsdom";function A(n,r){if(r.cleanContent){let t=new x(n),i=new k(t.window.document).parse();if(i)switch(r.format){case"html":return i.content;case"text":return i.textContent;case"markdown":{var o=new d;return o.turndown(i.content)}default:throw""}let a=v(n,void 0,!0),s=a("body");if(s.find("script,style,iframe,footer,br,hr,svg,header,img").remove(),s.find("*").removeAttr("class"),s.find("*").removeAttr("style"),a("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),a("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((p,u)=>u.trim()),r.format==="html")return s.html();if(r.format==="text")return s.text();if(r.format==="markdown"){var o=new d;return o.turndown(s.html())}}else{if(r.format==="html")return n;if(r.format==="text")return v(n,void 0,!0)("body").text();if(r.format==="markdown"){var o=new d;return o.turndown(n)}}}function C(n){let r=new x(n);return new k(r.window.document,{charThreshold:100}).parse()}import{promise as X}from"fastq";var g=class{page;browser;parent;#e={};constructor(r,o,t){this.page=r,this.browser=o,this.parent=t}ab;timeoutId;setMaxTimeout(r){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},r)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(r,o){this.#e[r]=o}getVariable(r){return this.#e[r]}#r(r,o){let t,i=!1;for(let a=0;a<o.length;a++){let s=o[a];if(s===".."){if(!r.parent)throw new Error("未找到父级");r=r.parent}else if(i){if(!t||typeof t!="object")throw new Error(`${o}路径下未找到值`);t=t[s]}else t=r.#e[s],i=!0}return t}#o(r){if(typeof r=="string")return r;if(r.source==="variable")return typeof r.key=="string"?this.#e[r.key]:this.#r(this,r.key)}#t(r,o){if(o)if(typeof o=="string")this.#e[o]=r;else switch(o.method){case"push":{if(!Array.isArray(this.#e[o.key]))throw new Error(`${o.key}不是数组类型`);this.#e[o.key]||=[],this.#e[o.key].push(r);break}case"flat-push":{if(!Array.isArray(this.#e[o.key]))throw new Error(`${o.key}不是数组类型`);if(!Array.isArray(r))throw new Error(`${JSON.stringify(r)}不是数组类型`);this.#e[o.key]||=[],this.#e[o.key].push(...r);break}case"define":{this.#e[o.key]=r;break}case"merge":{if(typeof this.#e[o.key]!="object")throw new Error(`${o.key}不是对象类型`);this.#e[o.key]||={},this.#e[o.key]={...this.#e[o.key],...r};break}default:break}}async exeQueue(r){let o;for(let t of r)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,this.#o(t.text),{delay:t.delay});break}case"goto":{o=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{o=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{o=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let i=t.config;o=await this.page.waitForRequest(async a=>!(i.urlRegexp&&!i.urlRegexp.test(a.url())||i.method&&i.method!==a.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let i=t.config;o=await this.page.waitForResponse(async a=>!(i.urlRegexp&&!i.urlRegexp.test(a.url())||i.status&&i.status!==a.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{o=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{o=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"waitBodyElements":{let i=t.config.threshold||100;o=await this.page.waitForFunction(a=>document.body.querySelectorAll("*").length>=a,{timeout:this.browser.getConfig()?.actionTimeout,signal:this.ab?.signal},i);break}}break}case"selector":{t.multi?this.#t(o=await this.page.$$(t.selector),t.output):this.#t(o=await this.page.$(t.selector),t.output);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let i=this.#e[t.input];Array.isArray(i)?t.kind==="property"&&this.#t(o=await Promise.all(i.map(a=>a.getProperty(t.key).then(s=>s.jsonValue()))),t.output):i instanceof z&&this.#t(o=i.getProperty(t.key).then(a=>a.jsonValue()),t.output);break}case"getContent":{let i=await this.page.content();this.#t(o=A(i,{cleanContent:t.cleanContent,format:t.format}),t.output);break}case"rawContent":{let i=await this.page.content();this.#t(o=i,t.output);break}case"page":{let i=this.#e[t.input],a=Array.isArray(i)?i:[i],s=X(async l=>{console.log("准备执行",l);try{let y=await this.browser.openPage(async f=>(f.setVariable("$item",a[l.index]),f.setVariable("$index",l.index),f.setVariable("$first",l.index===0),f.setVariable("$last",l.index===a.length-1),{result:await f.exeQueue(t.actions),page:f}),this);u.push(y)}catch(y){if(t.throwError)throw y;u.push(void 0)}},t.concurrency),p;s.error(l=>{l&&(p=l)});let u=[];for(let l=0;l<a.length;l++)s.push({index:l});if(await s.drained(),t.throwError&&p)throw p;o=u,this.#t(o,t.output);break}case"setUserAgent":{await this.page.setUserAgent({userAgent:t.userAgent});break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")o=await t.fn(this);else{let i=this.browser.getCustom(t.config.type);if(!i)throw new Error(`自定义[${t.config.type}]未实现处理`);o=await i(t.config,this)}break}case"evaluate":{this.#t(o=await this.page.evaluate(t.fn,...t.args??[]),t.output);break}case"read-variable":{o=this.#e[t.input];break}default:break}return o}dispose(){return this.page.close()}};import*as e from"valibot";var ee=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
2
+ `,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),b=e.optional(e.boolean()),te=e.optional(e.string());var re=c,oe=e.pipe(e.string(),e.transform(n=>new RegExp(n))),ie=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([n,r])=>new RegExp(n,r))),E=e.union([oe,ie]),h=e.string(),P=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),m=e.optional(e.union([e.string(),e.object({key:e.string(),method:e.picklist(["push","flat-push","define","merge"])})])),T=e.object({timeout:re,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:P,type:e.literal("goto")}),I=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),O=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),ne=e.object({selector:h,visible:b,hidden:b}),ae=e.object({mode:e.literal("selector"),...ne.entries}),se=e.object({mode:e.literal("request"),urlRegexp:E,method:te}),le=e.object({mode:e.literal("response"),urlRegexp:e.optional(E),status:c}),ce=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),ue=e.object({mode:e.literal("waitBodyElements"),threshold:c}),pe=e.object({mode:e.literal("navigation")}),D=e.object({type:e.literal("wait"),config:e.variant("mode",[ae,se,le,ce,pe,ue])}),S=e.object({type:e.literal("click"),selector:h,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),R=e.object({type:e.literal("type"),selector:h,text:P,delay:c}),F=e.object({type:e.literal("keypress"),key:ee,delay:c}),V=e.object({type:e.literal("selector"),selector:h,output:m,multi:e.optional(e.boolean(),!1)}),j=e.object({type:e.literal("findData"),input:e.string(),output:m,kind:e.picklist(["property"]),key:e.optional(e.string())}),N=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text","markdown"]),"html"),cleanContent:b,output:m}),L=e.object({type:e.literal("rawContent"),output:m}),M=e.object({type:e.literal("evaluate"),fn:e.custom(n=>typeof n=="function"),args:e.optional(e.array(e.any())),output:m}),K=e.object({type:e.literal("close")}),U=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),B=e.object({type:e.literal("read-variable"),input:e.string()}),fe=e.object({type:e.literal("page"),input:e.string(),output:m,concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(W))}),me=[...[T,I,O,D,S,R,F,V,j,N,L,K,U,B,M].map(n=>n.entries.type.literal),"page"],W=e.fallback(e.union([T,I,O,D,S,R,F,V,j,N,L,fe,K,U,B,M]),n=>{if(me.includes((n?.value).type))throw new Error(JSON.stringify(n?.issues));return{type:"custom",config:n?.value}}),$=e.array(W),Se=e.object({maxTimeout:c,actionTimeout:c});import*as J from"valibot";import{Browser as de,computeExecutablePath as be}from"@puppeteer/browsers";import*as H from"fs";import{Browser as G,computeExecutablePath as ge,install as he}from"@puppeteer/browsers";async function q(n){let r=await he({browser:G.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...n,unpack:!0})}function je(n,r){return ge({cacheDir:n,browser:G.CHROME,buildId:r})}import{PUPPETEER_REVISIONS as we}from"puppeteer-core/internal/revisions.js";async function _(n){return w.init(n)}var Q=we.chrome,w=class n{browser;static async init(r){let o=de.CHROME,t=be({cacheDir:r.cacheDir,browser:o,buildId:Q});H.existsSync(t)||(console.log("准备下载"),await q({cacheDir:r.cacheDir,buildId:Q,browser:o}));let i=await ye.launch({...r,executablePath:t});return new n(i)}constructor(r){this.browser=r}#e;#r=new Map;setConfig(r){this.#e=r}getConfig(){return this.#e}registerCustom(r,o){this.#r.set(r,o)}clearCustom(){this.#r.clear()}getCustom(r){return this.#r.get(r)}async openPage(r,o){let t=new g(await this.browser.newPage(),this,o);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),r(t)}runQueue(r,o){let t=J.safeParse($,r);if(!t.success)throw new Error(`解析配置错误
3
+ ${JSON.stringify(t.issues)}`);return this.openPage(async i=>{if(o)for(let a in o)i.setVariable(a,o[a]);return{result:await i.exeQueue(t.output),page:i}})}};import{load as ve}from"cheerio";function Y(n,r){let o=ve(r,{baseURI:n});return o("a").map((i,a)=>o(a).attr("href")?o(a).prop("href"):"").get().filter(Boolean)}var Z=class{config;browser;dataMap=new Map;#e=new Set;constructor(r){this.config=r}async start(){return this.browser=await _({cacheDir:process.cwd(),headless:!1}),this.browser.setConfig({maxTimeout:12e4,actionTimeout:12e4}),await this.searchWebOne(this.config.url,void 0),await this.browser.browser.close(),this.dataMap}async searchWebOne(r,o){let t=await this.browser.runQueue([{type:"setViewport",width:1920,height:1080},{type:"goto",url:r,waitUntil:"networkidle0"},{type:"evaluate",output:"baseURI",fn:()=>window.location.origin},{type:"evaluate",output:"href",fn:()=>window.location.href},{type:"rawContent",output:"data"}]);console.log("解析完成",r);let i=t.page.getVariable("href"),a=t.page.getVariable("baseURI"),s=t.page.getVariable("data"),p=C(s);if(this.#e.add(r),this.#e.add(i),p)this.dataMap.set(r,{requestUrl:r,parsedUrl:i,parent:o?.from,metadata:p,raw:s}),await t.page.dispose();else{this.dataMap.set(r,{requestUrl:r,parsedUrl:i,parent:o?.from,metadata:void 0,raw:s}),await t.page.dispose();return}let u=Y(a,s);u=u.filter(this.config.filterLink);for(let l of u){if(this.#e.has(l)){console.log("已索引,跳过",l);continue}await this.searchWebOne(l,{from:r})}}};export{W as ActionDefine,$ as ActionListDefine,Z as FullWebRequest,Se as GlobalConfig,w as WebBrowser,g as WebPage,q as download,A as format,C as formatDoc,je as getExecutablePath,_ as init};
package/init.d.ts CHANGED
@@ -20,6 +20,9 @@ export declare class WebBrowser {
20
20
  clearCustom(): void;
21
21
  getCustom(key: string): PluginFn | undefined;
22
22
  openPage<T>(fn: (page: WebPage) => Promise<T>, parent?: WebPage): Promise<T>;
23
- runQueue(list: QueueList, input?: Record<string, any>): Promise<any>;
23
+ runQueue(list: QueueList, input?: Record<string, any>): Promise<{
24
+ result: any;
25
+ page: WebPage;
26
+ }>;
24
27
  }
25
28
  export {};
package/package.json CHANGED
@@ -1,20 +1,24 @@
1
1
  {
2
2
  "name": "@cyia/crawl",
3
- "version": "0.0.11",
3
+ "version": "0.0.13",
4
4
  "author": "wszgrcy",
5
5
  "description": "",
6
6
  "dependencies": {
7
- "cheerio": "1.0.0",
8
- "fastq": "1.19.1",
9
- "html-entities": "2.6.0",
10
- "puppeteer-core": "24.6.0",
11
- "valibot": "1.0.0",
12
- "turndown": "^7.2.0"
7
+ "@mozilla/readability": "^0.6.0",
8
+ "cheerio": "^1.2.0",
9
+ "fastq": "^1.20.1",
10
+ "html-entities": "^2.6.0",
11
+ "htmlparser2": "^10.1.0",
12
+ "jsdom": "^27.4.0",
13
+ "puppeteer-core": "^24.36.0",
14
+ "turndown": "^7.2.2",
15
+ "valibot": "^1.2.0"
13
16
  },
14
17
  "exports": {
15
18
  ".": {
16
19
  "types": "./index.d.ts",
17
- "default": "./index.mjs"
20
+ "default": "./index.mjs",
21
+ "node": "./index.mjs"
18
22
  }
19
23
  },
20
24
  "publishConfig": {
package/page.d.ts CHANGED
@@ -15,4 +15,5 @@ export declare class WebPage {
15
15
  setVariable(key: string, value: any): void;
16
16
  getVariable(key: string): any;
17
17
  exeQueue(list: v.InferOutput<typeof ActionDefine>[]): Promise<any>;
18
+ dispose(): Promise<void>;
18
19
  }
@@ -0,0 +1 @@
1
+ export declare function getPageLinks(baseURI: string, content: string): string[];