@cyia/crawl 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/define.d.ts +74 -20
- package/format.d.ts +1 -1
- package/index.mjs +925 -3
- package/package.json +4 -2
package/define.d.ts
CHANGED
|
@@ -5,6 +5,12 @@ declare const Value: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
|
5
5
|
readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
|
|
6
6
|
}, undefined>], undefined>;
|
|
7
7
|
export type ValueType = v.InferOutput<typeof Value>;
|
|
8
|
+
declare const OutputP: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
9
|
+
readonly key: v.StringSchema<undefined>;
|
|
10
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
11
|
+
}, undefined>], undefined>, undefined>;
|
|
12
|
+
type OutputI = v.InferInput<typeof OutputP>;
|
|
13
|
+
export type OutputO = v.InferOutput<typeof OutputP>;
|
|
8
14
|
export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectSchema<{
|
|
9
15
|
readonly timeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
10
16
|
readonly waitUntil: v.OptionalSchema<v.PicklistSchema<["load", "domcontentloaded", "networkidle0", "networkidle2"], undefined>, "networkidle2">;
|
|
@@ -68,30 +74,39 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
68
74
|
}, undefined>, v.ObjectSchema<{
|
|
69
75
|
readonly type: v.LiteralSchema<"selector", undefined>;
|
|
70
76
|
readonly selector: v.StringSchema<undefined>;
|
|
71
|
-
readonly output: v.StringSchema<undefined
|
|
77
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
78
|
+
readonly key: v.StringSchema<undefined>;
|
|
79
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
80
|
+
}, undefined>], undefined>, undefined>;
|
|
72
81
|
readonly multi: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
|
|
73
82
|
}, undefined>, v.ObjectSchema<{
|
|
74
83
|
readonly type: v.LiteralSchema<"findData", undefined>;
|
|
75
84
|
readonly input: v.StringSchema<undefined>;
|
|
76
|
-
readonly output: v.StringSchema<undefined
|
|
85
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
86
|
+
readonly key: v.StringSchema<undefined>;
|
|
87
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
88
|
+
}, undefined>], undefined>, undefined>;
|
|
77
89
|
readonly kind: v.PicklistSchema<["property"], undefined>;
|
|
78
90
|
readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
79
91
|
}, undefined>, v.ObjectSchema<{
|
|
80
92
|
readonly type: v.LiteralSchema<"getContent", undefined>;
|
|
81
93
|
readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text", "markdown"], undefined>, "html">;
|
|
82
94
|
readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
83
|
-
readonly output: v.StringSchema<undefined
|
|
95
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
96
|
+
readonly key: v.StringSchema<undefined>;
|
|
97
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
98
|
+
}, undefined>], undefined>, undefined>;
|
|
84
99
|
}, undefined>, v.GenericSchema<{
|
|
85
100
|
type: "page";
|
|
86
101
|
input: string;
|
|
87
|
-
output?:
|
|
102
|
+
output?: OutputI;
|
|
88
103
|
actions: v.InferInput<ActionType>[];
|
|
89
104
|
concurrency?: number;
|
|
90
105
|
throwError?: boolean;
|
|
91
106
|
}, {
|
|
92
107
|
type: "page";
|
|
93
108
|
input: string;
|
|
94
|
-
output?:
|
|
109
|
+
output?: OutputO;
|
|
95
110
|
actions: v.InferOutput<ActionType>[];
|
|
96
111
|
concurrency: number;
|
|
97
112
|
throwError: boolean;
|
|
@@ -103,6 +118,9 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
103
118
|
readonly type: v.StringSchema<undefined>;
|
|
104
119
|
}, undefined>, undefined>;
|
|
105
120
|
readonly fn: v.OptionalSchema<v.CustomSchema<(input: WebPage) => Promise<any>, undefined>, undefined>;
|
|
121
|
+
}, undefined>, v.ObjectSchema<{
|
|
122
|
+
readonly type: v.LiteralSchema<"read-variable", undefined>;
|
|
123
|
+
readonly input: v.StringSchema<undefined>;
|
|
106
124
|
}, undefined>], undefined>, (item: v.OutputDataset<{
|
|
107
125
|
timeout?: number | undefined;
|
|
108
126
|
waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
@@ -172,23 +190,32 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
172
190
|
} | {
|
|
173
191
|
type: "selector";
|
|
174
192
|
selector: string;
|
|
175
|
-
output
|
|
193
|
+
output?: string | {
|
|
194
|
+
key: string;
|
|
195
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
196
|
+
} | undefined;
|
|
176
197
|
multi: boolean;
|
|
177
198
|
} | {
|
|
178
199
|
type: "findData";
|
|
179
200
|
input: string;
|
|
180
|
-
output
|
|
201
|
+
output?: string | {
|
|
202
|
+
key: string;
|
|
203
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
204
|
+
} | undefined;
|
|
181
205
|
kind: "property";
|
|
182
206
|
key?: string | undefined;
|
|
183
207
|
} | {
|
|
184
208
|
type: "getContent";
|
|
185
209
|
format: "html" | "text" | "markdown";
|
|
186
210
|
cleanContent?: boolean | undefined;
|
|
187
|
-
output
|
|
211
|
+
output?: string | {
|
|
212
|
+
key: string;
|
|
213
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
214
|
+
} | undefined;
|
|
188
215
|
} | {
|
|
189
216
|
type: "page";
|
|
190
217
|
input: string;
|
|
191
|
-
output?:
|
|
218
|
+
output?: OutputO;
|
|
192
219
|
actions: v.InferOutput<ActionType>[];
|
|
193
220
|
concurrency: number;
|
|
194
221
|
throwError: boolean;
|
|
@@ -202,7 +229,10 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
202
229
|
[key: string]: unknown;
|
|
203
230
|
}) | undefined;
|
|
204
231
|
fn?: ((input: WebPage) => Promise<any>) | undefined;
|
|
205
|
-
}
|
|
232
|
+
} | {
|
|
233
|
+
type: "read-variable";
|
|
234
|
+
input: string;
|
|
235
|
+
}, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
|
|
206
236
|
type: "custom";
|
|
207
237
|
config: any;
|
|
208
238
|
}>;
|
|
@@ -269,30 +299,39 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
269
299
|
}, undefined>, v.ObjectSchema<{
|
|
270
300
|
readonly type: v.LiteralSchema<"selector", undefined>;
|
|
271
301
|
readonly selector: v.StringSchema<undefined>;
|
|
272
|
-
readonly output: v.StringSchema<undefined
|
|
302
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
303
|
+
readonly key: v.StringSchema<undefined>;
|
|
304
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
305
|
+
}, undefined>], undefined>, undefined>;
|
|
273
306
|
readonly multi: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
|
|
274
307
|
}, undefined>, v.ObjectSchema<{
|
|
275
308
|
readonly type: v.LiteralSchema<"findData", undefined>;
|
|
276
309
|
readonly input: v.StringSchema<undefined>;
|
|
277
|
-
readonly output: v.StringSchema<undefined
|
|
310
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
311
|
+
readonly key: v.StringSchema<undefined>;
|
|
312
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
313
|
+
}, undefined>], undefined>, undefined>;
|
|
278
314
|
readonly kind: v.PicklistSchema<["property"], undefined>;
|
|
279
315
|
readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
280
316
|
}, undefined>, v.ObjectSchema<{
|
|
281
317
|
readonly type: v.LiteralSchema<"getContent", undefined>;
|
|
282
318
|
readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text", "markdown"], undefined>, "html">;
|
|
283
319
|
readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
284
|
-
readonly output: v.StringSchema<undefined
|
|
320
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
321
|
+
readonly key: v.StringSchema<undefined>;
|
|
322
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
323
|
+
}, undefined>], undefined>, undefined>;
|
|
285
324
|
}, undefined>, v.GenericSchema<{
|
|
286
325
|
type: "page";
|
|
287
326
|
input: string;
|
|
288
|
-
output?:
|
|
327
|
+
output?: OutputI;
|
|
289
328
|
actions: v.InferInput<ActionType>[];
|
|
290
329
|
concurrency?: number;
|
|
291
330
|
throwError?: boolean;
|
|
292
331
|
}, {
|
|
293
332
|
type: "page";
|
|
294
333
|
input: string;
|
|
295
|
-
output?:
|
|
334
|
+
output?: OutputO;
|
|
296
335
|
actions: v.InferOutput<ActionType>[];
|
|
297
336
|
concurrency: number;
|
|
298
337
|
throwError: boolean;
|
|
@@ -304,6 +343,9 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
304
343
|
readonly type: v.StringSchema<undefined>;
|
|
305
344
|
}, undefined>, undefined>;
|
|
306
345
|
readonly fn: v.OptionalSchema<v.CustomSchema<(input: WebPage) => Promise<any>, undefined>, undefined>;
|
|
346
|
+
}, undefined>, v.ObjectSchema<{
|
|
347
|
+
readonly type: v.LiteralSchema<"read-variable", undefined>;
|
|
348
|
+
readonly input: v.StringSchema<undefined>;
|
|
307
349
|
}, undefined>], undefined>, (item: v.OutputDataset<{
|
|
308
350
|
timeout?: number | undefined;
|
|
309
351
|
waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
@@ -373,23 +415,32 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
373
415
|
} | {
|
|
374
416
|
type: "selector";
|
|
375
417
|
selector: string;
|
|
376
|
-
output
|
|
418
|
+
output?: string | {
|
|
419
|
+
key: string;
|
|
420
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
421
|
+
} | undefined;
|
|
377
422
|
multi: boolean;
|
|
378
423
|
} | {
|
|
379
424
|
type: "findData";
|
|
380
425
|
input: string;
|
|
381
|
-
output
|
|
426
|
+
output?: string | {
|
|
427
|
+
key: string;
|
|
428
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
429
|
+
} | undefined;
|
|
382
430
|
kind: "property";
|
|
383
431
|
key?: string | undefined;
|
|
384
432
|
} | {
|
|
385
433
|
type: "getContent";
|
|
386
434
|
format: "html" | "text" | "markdown";
|
|
387
435
|
cleanContent?: boolean | undefined;
|
|
388
|
-
output
|
|
436
|
+
output?: string | {
|
|
437
|
+
key: string;
|
|
438
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
439
|
+
} | undefined;
|
|
389
440
|
} | {
|
|
390
441
|
type: "page";
|
|
391
442
|
input: string;
|
|
392
|
-
output?:
|
|
443
|
+
output?: OutputO;
|
|
393
444
|
actions: v.InferOutput<ActionType>[];
|
|
394
445
|
concurrency: number;
|
|
395
446
|
throwError: boolean;
|
|
@@ -403,7 +454,10 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
403
454
|
[key: string]: unknown;
|
|
404
455
|
}) | undefined;
|
|
405
456
|
fn?: ((input: WebPage) => Promise<any>) | undefined;
|
|
406
|
-
}
|
|
457
|
+
} | {
|
|
458
|
+
type: "read-variable";
|
|
459
|
+
input: string;
|
|
460
|
+
}, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
|
|
407
461
|
type: "custom";
|
|
408
462
|
config: any;
|
|
409
463
|
}>, undefined>;
|
package/format.d.ts
CHANGED
package/index.mjs
CHANGED
|
@@ -1,3 +1,925 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
// src/init.ts
|
|
2
|
+
import puppeteer from "puppeteer-core";
|
|
3
|
+
|
|
4
|
+
// src/page.ts
|
|
5
|
+
import { ElementHandle } from "puppeteer-core";
|
|
6
|
+
|
|
7
|
+
// src/format.ts
|
|
8
|
+
import { load } from "cheerio";
|
|
9
|
+
import TurndownService from "turndown";
|
|
10
|
+
import { Readability } from "@mozilla/readability";
|
|
11
|
+
import { JSDOM } from "jsdom";
|
|
12
|
+
function format(rawHtml, options) {
|
|
13
|
+
if (options.cleanContent) {
|
|
14
|
+
let dom = new JSDOM(rawHtml);
|
|
15
|
+
let result = new Readability(dom.window.document).parse();
|
|
16
|
+
if (result) {
|
|
17
|
+
switch (options.format) {
|
|
18
|
+
case "html":
|
|
19
|
+
return result.content;
|
|
20
|
+
case "text":
|
|
21
|
+
return result.textContent;
|
|
22
|
+
case "markdown": {
|
|
23
|
+
var turndownService = new TurndownService();
|
|
24
|
+
return turndownService.turndown(result.content);
|
|
25
|
+
}
|
|
26
|
+
default:
|
|
27
|
+
throw "";
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
let $ = load(rawHtml, void 0, true);
|
|
31
|
+
let $body = $("body");
|
|
32
|
+
$body.find("script,style,iframe,footer,br,hr,svg,header,img").remove();
|
|
33
|
+
$body.find("*").removeAttr("class");
|
|
34
|
+
$body.find("*").removeAttr("style");
|
|
35
|
+
$("*").contents().filter(function() {
|
|
36
|
+
return this.type === "comment" || this.type === "text" && !this.data.trim();
|
|
37
|
+
}).remove();
|
|
38
|
+
$("*").contents().filter(function() {
|
|
39
|
+
return this.type === "text" && !!this.data.trim();
|
|
40
|
+
}).text((i, text) => {
|
|
41
|
+
return text.trim();
|
|
42
|
+
});
|
|
43
|
+
if (options.format === "html") {
|
|
44
|
+
return $body.html();
|
|
45
|
+
} else if (options.format === "text") {
|
|
46
|
+
return $body.text();
|
|
47
|
+
} else if (options.format === "markdown") {
|
|
48
|
+
var turndownService = new TurndownService();
|
|
49
|
+
return turndownService.turndown($body.html());
|
|
50
|
+
}
|
|
51
|
+
} else {
|
|
52
|
+
if (options.format === "html") {
|
|
53
|
+
return rawHtml;
|
|
54
|
+
} else if (options.format === "text") {
|
|
55
|
+
let $ = load(rawHtml, void 0, true);
|
|
56
|
+
let $body = $("body");
|
|
57
|
+
return $body.text();
|
|
58
|
+
} else if (options.format === "markdown") {
|
|
59
|
+
var turndownService = new TurndownService();
|
|
60
|
+
return turndownService.turndown(rawHtml);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// src/page.ts
|
|
66
|
+
import { promise as fastq } from "fastq";
|
|
67
|
+
var WebPage = class {
|
|
68
|
+
page;
|
|
69
|
+
browser;
|
|
70
|
+
parent;
|
|
71
|
+
#obj = {};
|
|
72
|
+
constructor(page, browser, parent) {
|
|
73
|
+
this.page = page;
|
|
74
|
+
this.browser = browser;
|
|
75
|
+
this.parent = parent;
|
|
76
|
+
}
|
|
77
|
+
ab;
|
|
78
|
+
timeoutId;
|
|
79
|
+
setMaxTimeout(timeout) {
|
|
80
|
+
this.ab = new AbortController();
|
|
81
|
+
this.timeoutId = setTimeout(() => {
|
|
82
|
+
this.ab.abort("timeout");
|
|
83
|
+
}, timeout);
|
|
84
|
+
}
|
|
85
|
+
clearTimeout() {
|
|
86
|
+
clearTimeout(this.timeoutId);
|
|
87
|
+
}
|
|
88
|
+
setVariable(key, value) {
|
|
89
|
+
this.#obj[key] = value;
|
|
90
|
+
}
|
|
91
|
+
getVariable(key) {
|
|
92
|
+
return this.#obj[key];
|
|
93
|
+
}
|
|
94
|
+
#navigatePath(page, paths) {
|
|
95
|
+
let value = void 0;
|
|
96
|
+
let findValue = false;
|
|
97
|
+
for (let i = 0; i < paths.length; i++) {
|
|
98
|
+
const item = paths[i];
|
|
99
|
+
if (item === "..") {
|
|
100
|
+
if (!page.parent) {
|
|
101
|
+
throw new Error("未找到父级");
|
|
102
|
+
}
|
|
103
|
+
page = page.parent;
|
|
104
|
+
} else if (findValue) {
|
|
105
|
+
if (!value || typeof value !== "object") {
|
|
106
|
+
throw new Error(`${paths}路径下未找到值`);
|
|
107
|
+
}
|
|
108
|
+
value = value[item];
|
|
109
|
+
} else {
|
|
110
|
+
value = page.#obj[item];
|
|
111
|
+
findValue = true;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return value;
|
|
115
|
+
}
|
|
116
|
+
#getValue(value) {
|
|
117
|
+
if (typeof value === "string") {
|
|
118
|
+
return value;
|
|
119
|
+
} else if (value.source === "variable") {
|
|
120
|
+
if (typeof value.key === "string") {
|
|
121
|
+
return this.#obj[value.key];
|
|
122
|
+
} else {
|
|
123
|
+
return this.#navigatePath(this, value.key);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
#setOutput(value, output) {
|
|
128
|
+
if (!output) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (typeof output === "string") {
|
|
132
|
+
this.#obj[output] = value;
|
|
133
|
+
} else {
|
|
134
|
+
switch (output.method) {
|
|
135
|
+
case "push": {
|
|
136
|
+
if (!Array.isArray(this.#obj[output.key])) {
|
|
137
|
+
throw new Error(`${output.key}不是数组类型`);
|
|
138
|
+
}
|
|
139
|
+
this.#obj[output.key] ||= [];
|
|
140
|
+
this.#obj[output.key].push(value);
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
case "flat-push": {
|
|
144
|
+
if (!Array.isArray(this.#obj[output.key])) {
|
|
145
|
+
throw new Error(`${output.key}不是数组类型`);
|
|
146
|
+
}
|
|
147
|
+
if (!Array.isArray(value)) {
|
|
148
|
+
throw new Error(`${JSON.stringify(value)}不是数组类型`);
|
|
149
|
+
}
|
|
150
|
+
this.#obj[output.key] ||= [];
|
|
151
|
+
this.#obj[output.key].push(...value);
|
|
152
|
+
break;
|
|
153
|
+
}
|
|
154
|
+
case "define": {
|
|
155
|
+
this.#obj[output.key] = value;
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
case "merge": {
|
|
159
|
+
if (typeof this.#obj[output.key] !== "object") {
|
|
160
|
+
throw new Error(`${output.key}不是对象类型`);
|
|
161
|
+
}
|
|
162
|
+
this.#obj[output.key] ||= {};
|
|
163
|
+
this.#obj[output.key] = { ...this.#obj[output.key], ...value };
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
default:
|
|
167
|
+
break;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
async exeQueue(list) {
|
|
172
|
+
let value;
|
|
173
|
+
for (const item of list) {
|
|
174
|
+
console.log("准备执行", item);
|
|
175
|
+
switch (item.type) {
|
|
176
|
+
case "click":
|
|
177
|
+
await this.page.click(item.selector, {
|
|
178
|
+
offset: item.offset,
|
|
179
|
+
delay: item.delay,
|
|
180
|
+
count: item.count
|
|
181
|
+
});
|
|
182
|
+
break;
|
|
183
|
+
case "type": {
|
|
184
|
+
await this.page.type(item.selector, this.#getValue(item.text), {
|
|
185
|
+
delay: item.delay
|
|
186
|
+
});
|
|
187
|
+
break;
|
|
188
|
+
}
|
|
189
|
+
case "goto": {
|
|
190
|
+
value = await this.page.goto(this.#getValue(item.url), {
|
|
191
|
+
waitUntil: item.waitUntil,
|
|
192
|
+
signal: this.ab?.signal,
|
|
193
|
+
timeout: this.browser.getConfig()?.actionTimeout
|
|
194
|
+
});
|
|
195
|
+
break;
|
|
196
|
+
}
|
|
197
|
+
case "setViewport": {
|
|
198
|
+
value = await this.page.setViewport({
|
|
199
|
+
width: item.width,
|
|
200
|
+
height: item.height,
|
|
201
|
+
isMobile: item.isMobile,
|
|
202
|
+
isLandscape: item.isLandscape
|
|
203
|
+
});
|
|
204
|
+
break;
|
|
205
|
+
}
|
|
206
|
+
case "wait": {
|
|
207
|
+
switch (item.config.mode) {
|
|
208
|
+
case "selector": {
|
|
209
|
+
value = await this.page.waitForSelector(item.config.selector, {
|
|
210
|
+
visible: item.config.visible,
|
|
211
|
+
hidden: item.config.hidden,
|
|
212
|
+
signal: this.ab?.signal,
|
|
213
|
+
timeout: this.browser.getConfig()?.actionTimeout
|
|
214
|
+
});
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
case "request": {
|
|
218
|
+
const config = item.config;
|
|
219
|
+
value = await this.page.waitForRequest(
|
|
220
|
+
async (req) => {
|
|
221
|
+
if (config.urlRegexp) {
|
|
222
|
+
let result = config.urlRegexp.test(req.url());
|
|
223
|
+
if (!result) {
|
|
224
|
+
return false;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
if (config.method && config.method !== req.method()) {
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
return true;
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
signal: this.ab?.signal,
|
|
234
|
+
timeout: this.browser.getConfig()?.actionTimeout
|
|
235
|
+
}
|
|
236
|
+
);
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
case "response": {
|
|
240
|
+
const config = item.config;
|
|
241
|
+
value = await this.page.waitForResponse(
|
|
242
|
+
async (res) => {
|
|
243
|
+
if (config.urlRegexp) {
|
|
244
|
+
let result = config.urlRegexp.test(res.url());
|
|
245
|
+
if (!result) {
|
|
246
|
+
return false;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
if (config.status && config.status !== res.status()) {
|
|
250
|
+
return false;
|
|
251
|
+
}
|
|
252
|
+
return true;
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
signal: this.ab?.signal,
|
|
256
|
+
timeout: this.browser.getConfig()?.actionTimeout
|
|
257
|
+
}
|
|
258
|
+
);
|
|
259
|
+
break;
|
|
260
|
+
}
|
|
261
|
+
case "networkIdle": {
|
|
262
|
+
value = await this.page.waitForNetworkIdle({
|
|
263
|
+
idleTime: item.config.idleTime,
|
|
264
|
+
concurrency: item.config.concurrency,
|
|
265
|
+
signal: this.ab?.signal,
|
|
266
|
+
timeout: this.browser.getConfig()?.actionTimeout
|
|
267
|
+
});
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
270
|
+
case "navigation": {
|
|
271
|
+
value = await this.page.waitForNavigation({
|
|
272
|
+
signal: this.ab?.signal,
|
|
273
|
+
timeout: this.browser.getConfig()?.actionTimeout
|
|
274
|
+
});
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
case "selector": {
|
|
281
|
+
if (item.multi) {
|
|
282
|
+
this.#setOutput(value = await this.page.$$(item.selector), item.output);
|
|
283
|
+
} else {
|
|
284
|
+
this.#setOutput(value = await this.page.$(item.selector), item.output);
|
|
285
|
+
}
|
|
286
|
+
break;
|
|
287
|
+
}
|
|
288
|
+
case "keypress": {
|
|
289
|
+
await this.page.keyboard.press(item.key, { delay: item.delay });
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
case "findData": {
|
|
293
|
+
let data = this.#obj[item.input];
|
|
294
|
+
if (Array.isArray(data)) {
|
|
295
|
+
if (item.kind === "property") {
|
|
296
|
+
this.#setOutput(
|
|
297
|
+
value = await Promise.all(
|
|
298
|
+
data.map((el) => {
|
|
299
|
+
return el.getProperty(item.key).then((a) => {
|
|
300
|
+
return a.jsonValue();
|
|
301
|
+
});
|
|
302
|
+
})
|
|
303
|
+
),
|
|
304
|
+
item.output
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
} else {
|
|
308
|
+
if (data instanceof ElementHandle) {
|
|
309
|
+
this.#setOutput(
|
|
310
|
+
value = data.getProperty(item.key).then((a) => {
|
|
311
|
+
return a.jsonValue();
|
|
312
|
+
}),
|
|
313
|
+
item.output
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
break;
|
|
318
|
+
}
|
|
319
|
+
case "getContent": {
|
|
320
|
+
let content = await this.page.content();
|
|
321
|
+
this.#setOutput(value = format(content, { cleanContent: item.cleanContent, format: item.format }), item.output);
|
|
322
|
+
break;
|
|
323
|
+
}
|
|
324
|
+
case "page": {
|
|
325
|
+
let inputValue = this.#obj[item.input];
|
|
326
|
+
let list2 = Array.isArray(inputValue) ? inputValue : [inputValue];
|
|
327
|
+
let queue = fastq(async (input) => {
|
|
328
|
+
console.log("准备执行", input);
|
|
329
|
+
try {
|
|
330
|
+
let result = await this.browser.openPage(async (page) => {
|
|
331
|
+
page.setVariable("$item", list2[input.index]);
|
|
332
|
+
page.setVariable("$index", input.index);
|
|
333
|
+
page.setVariable("$first", input.index === 0);
|
|
334
|
+
page.setVariable("$last", input.index === list2.length - 1);
|
|
335
|
+
return await page.exeQueue(item.actions);
|
|
336
|
+
}, this);
|
|
337
|
+
resultList.push(result);
|
|
338
|
+
} catch (error) {
|
|
339
|
+
if (item.throwError) {
|
|
340
|
+
throw error;
|
|
341
|
+
} else {
|
|
342
|
+
resultList.push(void 0);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}, item.concurrency);
|
|
346
|
+
let queueError;
|
|
347
|
+
queue.error((error) => {
|
|
348
|
+
if (error) {
|
|
349
|
+
queueError = error;
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
let resultList = [];
|
|
353
|
+
for (let index = 0; index < list2.length; index++) {
|
|
354
|
+
queue.push({ index });
|
|
355
|
+
}
|
|
356
|
+
await queue.drained();
|
|
357
|
+
if (item.throwError && queueError) {
|
|
358
|
+
throw queueError;
|
|
359
|
+
}
|
|
360
|
+
value = resultList;
|
|
361
|
+
this.#setOutput(value, item.output);
|
|
362
|
+
break;
|
|
363
|
+
}
|
|
364
|
+
case "setUserAgent": {
|
|
365
|
+
await this.page.setUserAgent(item.userAgent);
|
|
366
|
+
break;
|
|
367
|
+
}
|
|
368
|
+
case "close": {
|
|
369
|
+
await this.page.close({ runBeforeUnload: false });
|
|
370
|
+
this.clearTimeout();
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
case "custom": {
|
|
374
|
+
if (typeof item.fn === "function") {
|
|
375
|
+
value = await item.fn(this);
|
|
376
|
+
} else {
|
|
377
|
+
let plugin = this.browser.getCustom(item.config.type);
|
|
378
|
+
if (!plugin) {
|
|
379
|
+
throw new Error(`自定义[${item.config.type}]未实现处理`);
|
|
380
|
+
}
|
|
381
|
+
value = await plugin(item.config, this);
|
|
382
|
+
}
|
|
383
|
+
break;
|
|
384
|
+
}
|
|
385
|
+
case "read-variable": {
|
|
386
|
+
value = this.#obj[item.input];
|
|
387
|
+
break;
|
|
388
|
+
}
|
|
389
|
+
default:
|
|
390
|
+
break;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
return value;
|
|
394
|
+
}
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
// src/define.ts
|
|
398
|
+
import * as v from "valibot";
|
|
399
|
+
var KEYLIST = v.picklist([
|
|
400
|
+
"0",
|
|
401
|
+
"1",
|
|
402
|
+
"2",
|
|
403
|
+
"3",
|
|
404
|
+
"4",
|
|
405
|
+
"5",
|
|
406
|
+
"6",
|
|
407
|
+
"7",
|
|
408
|
+
"8",
|
|
409
|
+
"9",
|
|
410
|
+
"Power",
|
|
411
|
+
"Eject",
|
|
412
|
+
"Abort",
|
|
413
|
+
"Help",
|
|
414
|
+
"Backspace",
|
|
415
|
+
"Tab",
|
|
416
|
+
"Numpad5",
|
|
417
|
+
"NumpadEnter",
|
|
418
|
+
"Enter",
|
|
419
|
+
"\r",
|
|
420
|
+
"\n",
|
|
421
|
+
"ShiftLeft",
|
|
422
|
+
"ShiftRight",
|
|
423
|
+
"ControlLeft",
|
|
424
|
+
"ControlRight",
|
|
425
|
+
"AltLeft",
|
|
426
|
+
"AltRight",
|
|
427
|
+
"Pause",
|
|
428
|
+
"CapsLock",
|
|
429
|
+
"Escape",
|
|
430
|
+
"Convert",
|
|
431
|
+
"NonConvert",
|
|
432
|
+
"Space",
|
|
433
|
+
"Numpad9",
|
|
434
|
+
"PageUp",
|
|
435
|
+
"Numpad3",
|
|
436
|
+
"PageDown",
|
|
437
|
+
"End",
|
|
438
|
+
"Numpad1",
|
|
439
|
+
"Home",
|
|
440
|
+
"Numpad7",
|
|
441
|
+
"ArrowLeft",
|
|
442
|
+
"Numpad4",
|
|
443
|
+
"Numpad8",
|
|
444
|
+
"ArrowUp",
|
|
445
|
+
"ArrowRight",
|
|
446
|
+
"Numpad6",
|
|
447
|
+
"Numpad2",
|
|
448
|
+
"ArrowDown",
|
|
449
|
+
"Select",
|
|
450
|
+
"Open",
|
|
451
|
+
"PrintScreen",
|
|
452
|
+
"Insert",
|
|
453
|
+
"Numpad0",
|
|
454
|
+
"Delete",
|
|
455
|
+
"NumpadDecimal",
|
|
456
|
+
"Digit0",
|
|
457
|
+
"Digit1",
|
|
458
|
+
"Digit2",
|
|
459
|
+
"Digit3",
|
|
460
|
+
"Digit4",
|
|
461
|
+
"Digit5",
|
|
462
|
+
"Digit6",
|
|
463
|
+
"Digit7",
|
|
464
|
+
"Digit8",
|
|
465
|
+
"Digit9",
|
|
466
|
+
"KeyA",
|
|
467
|
+
"KeyB",
|
|
468
|
+
"KeyC",
|
|
469
|
+
"KeyD",
|
|
470
|
+
"KeyE",
|
|
471
|
+
"KeyF",
|
|
472
|
+
"KeyG",
|
|
473
|
+
"KeyH",
|
|
474
|
+
"KeyI",
|
|
475
|
+
"KeyJ",
|
|
476
|
+
"KeyK",
|
|
477
|
+
"KeyL",
|
|
478
|
+
"KeyM",
|
|
479
|
+
"KeyN",
|
|
480
|
+
"KeyO",
|
|
481
|
+
"KeyP",
|
|
482
|
+
"KeyQ",
|
|
483
|
+
"KeyR",
|
|
484
|
+
"KeyS",
|
|
485
|
+
"KeyT",
|
|
486
|
+
"KeyU",
|
|
487
|
+
"KeyV",
|
|
488
|
+
"KeyW",
|
|
489
|
+
"KeyX",
|
|
490
|
+
"KeyY",
|
|
491
|
+
"KeyZ",
|
|
492
|
+
"MetaLeft",
|
|
493
|
+
"MetaRight",
|
|
494
|
+
"ContextMenu",
|
|
495
|
+
"NumpadMultiply",
|
|
496
|
+
"NumpadAdd",
|
|
497
|
+
"NumpadSubtract",
|
|
498
|
+
"NumpadDivide",
|
|
499
|
+
"F1",
|
|
500
|
+
"F2",
|
|
501
|
+
"F3",
|
|
502
|
+
"F4",
|
|
503
|
+
"F5",
|
|
504
|
+
"F6",
|
|
505
|
+
"F7",
|
|
506
|
+
"F8",
|
|
507
|
+
"F9",
|
|
508
|
+
"F10",
|
|
509
|
+
"F11",
|
|
510
|
+
"F12",
|
|
511
|
+
"F13",
|
|
512
|
+
"F14",
|
|
513
|
+
"F15",
|
|
514
|
+
"F16",
|
|
515
|
+
"F17",
|
|
516
|
+
"F18",
|
|
517
|
+
"F19",
|
|
518
|
+
"F20",
|
|
519
|
+
"F21",
|
|
520
|
+
"F22",
|
|
521
|
+
"F23",
|
|
522
|
+
"F24",
|
|
523
|
+
"NumLock",
|
|
524
|
+
"ScrollLock",
|
|
525
|
+
"AudioVolumeMute",
|
|
526
|
+
"AudioVolumeDown",
|
|
527
|
+
"AudioVolumeUp",
|
|
528
|
+
"MediaTrackNext",
|
|
529
|
+
"MediaTrackPrevious",
|
|
530
|
+
"MediaStop",
|
|
531
|
+
"MediaPlayPause",
|
|
532
|
+
"Semicolon",
|
|
533
|
+
"Equal",
|
|
534
|
+
"NumpadEqual",
|
|
535
|
+
"Comma",
|
|
536
|
+
"Minus",
|
|
537
|
+
"Period",
|
|
538
|
+
"Slash",
|
|
539
|
+
"Backquote",
|
|
540
|
+
"BracketLeft",
|
|
541
|
+
"Backslash",
|
|
542
|
+
"BracketRight",
|
|
543
|
+
"Quote",
|
|
544
|
+
"AltGraph",
|
|
545
|
+
"Props",
|
|
546
|
+
"Cancel",
|
|
547
|
+
"Clear",
|
|
548
|
+
"Shift",
|
|
549
|
+
"Control",
|
|
550
|
+
"Alt",
|
|
551
|
+
"Accept",
|
|
552
|
+
"ModeChange",
|
|
553
|
+
" ",
|
|
554
|
+
"Print",
|
|
555
|
+
"Execute",
|
|
556
|
+
"\0",
|
|
557
|
+
"a",
|
|
558
|
+
"b",
|
|
559
|
+
"c",
|
|
560
|
+
"d",
|
|
561
|
+
"e",
|
|
562
|
+
"f",
|
|
563
|
+
"g",
|
|
564
|
+
"h",
|
|
565
|
+
"i",
|
|
566
|
+
"j",
|
|
567
|
+
"k",
|
|
568
|
+
"l",
|
|
569
|
+
"m",
|
|
570
|
+
"n",
|
|
571
|
+
"o",
|
|
572
|
+
"p",
|
|
573
|
+
"q",
|
|
574
|
+
"r",
|
|
575
|
+
"s",
|
|
576
|
+
"t",
|
|
577
|
+
"u",
|
|
578
|
+
"v",
|
|
579
|
+
"w",
|
|
580
|
+
"x",
|
|
581
|
+
"y",
|
|
582
|
+
"z",
|
|
583
|
+
"Meta",
|
|
584
|
+
"*",
|
|
585
|
+
"+",
|
|
586
|
+
"-",
|
|
587
|
+
"/",
|
|
588
|
+
";",
|
|
589
|
+
"=",
|
|
590
|
+
",",
|
|
591
|
+
".",
|
|
592
|
+
"`",
|
|
593
|
+
"[",
|
|
594
|
+
"\\",
|
|
595
|
+
"]",
|
|
596
|
+
"'",
|
|
597
|
+
"Attn",
|
|
598
|
+
"CrSel",
|
|
599
|
+
"ExSel",
|
|
600
|
+
"EraseEof",
|
|
601
|
+
"Play",
|
|
602
|
+
"ZoomOut",
|
|
603
|
+
")",
|
|
604
|
+
"!",
|
|
605
|
+
"@",
|
|
606
|
+
"#",
|
|
607
|
+
"$",
|
|
608
|
+
"%",
|
|
609
|
+
"^",
|
|
610
|
+
"&",
|
|
611
|
+
"(",
|
|
612
|
+
"A",
|
|
613
|
+
"B",
|
|
614
|
+
"C",
|
|
615
|
+
"D",
|
|
616
|
+
"E",
|
|
617
|
+
"F",
|
|
618
|
+
"G",
|
|
619
|
+
"H",
|
|
620
|
+
"I",
|
|
621
|
+
"J",
|
|
622
|
+
"K",
|
|
623
|
+
"L",
|
|
624
|
+
"M",
|
|
625
|
+
"N",
|
|
626
|
+
"O",
|
|
627
|
+
"P",
|
|
628
|
+
"Q",
|
|
629
|
+
"R",
|
|
630
|
+
"S",
|
|
631
|
+
"T",
|
|
632
|
+
"U",
|
|
633
|
+
"V",
|
|
634
|
+
"W",
|
|
635
|
+
"X",
|
|
636
|
+
"Y",
|
|
637
|
+
"Z",
|
|
638
|
+
":",
|
|
639
|
+
"<",
|
|
640
|
+
"_",
|
|
641
|
+
">",
|
|
642
|
+
"?",
|
|
643
|
+
"~",
|
|
644
|
+
"{",
|
|
645
|
+
",",
|
|
646
|
+
"}",
|
|
647
|
+
'"',
|
|
648
|
+
"SoftLeft",
|
|
649
|
+
"SoftRight",
|
|
650
|
+
"Camera",
|
|
651
|
+
"Call",
|
|
652
|
+
"EndCall",
|
|
653
|
+
"VolumeDown",
|
|
654
|
+
"VolumeUp"
|
|
655
|
+
]);
|
|
656
|
+
var OptNumber = v.optional(v.number());
|
|
657
|
+
var OptBoolean = v.optional(v.boolean());
|
|
658
|
+
var OptString = v.optional(v.string());
|
|
659
|
+
var TimeoutDefine = OptNumber;
|
|
660
|
+
var RegExpStr = v.pipe(
|
|
661
|
+
v.string(),
|
|
662
|
+
v.transform((input) => {
|
|
663
|
+
return new RegExp(input);
|
|
664
|
+
})
|
|
665
|
+
);
|
|
666
|
+
var RegexpTup = v.pipe(
|
|
667
|
+
v.tuple([v.string(), v.pipe(v.string())]),
|
|
668
|
+
v.transform(([input, flag]) => {
|
|
669
|
+
return new RegExp(input, flag);
|
|
670
|
+
})
|
|
671
|
+
);
|
|
672
|
+
var RegexpUni = v.union([RegExpStr, RegexpTup]);
|
|
673
|
+
var Selector = v.string();
|
|
674
|
+
var Value = v.union([v.string(), v.object({ source: v.literal("variable"), key: v.union([v.string(), v.array(v.string())]) })]);
|
|
675
|
+
var OutputP = v.optional(
|
|
676
|
+
v.union([v.string(), v.object({ key: v.string(), method: v.picklist(["push", "flat-push", "define", "merge"]) })])
|
|
677
|
+
);
|
|
678
|
+
var GoToA = v.object({
|
|
679
|
+
timeout: TimeoutDefine,
|
|
680
|
+
waitUntil: v.optional(v.picklist(["load", "domcontentloaded", "networkidle0", "networkidle2"]), "networkidle2"),
|
|
681
|
+
url: Value,
|
|
682
|
+
type: v.literal("goto")
|
|
683
|
+
});
|
|
684
|
+
var SetViewportA = v.object({
|
|
685
|
+
width: v.optional(v.number(), 1920),
|
|
686
|
+
height: v.optional(v.number(), 1080),
|
|
687
|
+
isMobile: v.optional(v.boolean()),
|
|
688
|
+
isLandscape: v.optional(v.boolean()),
|
|
689
|
+
type: v.literal("setViewport")
|
|
690
|
+
});
|
|
691
|
+
var SetUserAgentA = v.object({
|
|
692
|
+
userAgent: v.string(),
|
|
693
|
+
type: v.literal("setUserAgent")
|
|
694
|
+
});
|
|
695
|
+
var SelectorCommon = v.object({
|
|
696
|
+
selector: Selector,
|
|
697
|
+
visible: OptBoolean,
|
|
698
|
+
hidden: OptBoolean
|
|
699
|
+
});
|
|
700
|
+
var WaitSelector = v.object({
|
|
701
|
+
mode: v.literal("selector"),
|
|
702
|
+
...SelectorCommon.entries
|
|
703
|
+
});
|
|
704
|
+
var WaitRequest = v.object({
|
|
705
|
+
mode: v.literal("request"),
|
|
706
|
+
urlRegexp: RegexpUni,
|
|
707
|
+
method: OptString
|
|
708
|
+
});
|
|
709
|
+
var WaitResponse = v.object({
|
|
710
|
+
mode: v.literal("response"),
|
|
711
|
+
urlRegexp: v.optional(RegexpUni),
|
|
712
|
+
// method: OptString,
|
|
713
|
+
status: OptNumber
|
|
714
|
+
});
|
|
715
|
+
var WaitNetworkIdle = v.object({
|
|
716
|
+
mode: v.literal("networkIdle"),
|
|
717
|
+
idleTime: OptNumber,
|
|
718
|
+
concurrency: OptNumber
|
|
719
|
+
});
|
|
720
|
+
var WaitNavigation = v.object({
|
|
721
|
+
mode: v.literal("navigation")
|
|
722
|
+
});
|
|
723
|
+
var WaitA = v.object({
|
|
724
|
+
type: v.literal("wait"),
|
|
725
|
+
config: v.variant("mode", [WaitSelector, WaitRequest, WaitResponse, WaitNetworkIdle, WaitNavigation])
|
|
726
|
+
});
|
|
727
|
+
var ClickA = v.object({
|
|
728
|
+
type: v.literal("click"),
|
|
729
|
+
selector: Selector,
|
|
730
|
+
offset: v.optional(v.object({ x: v.number(), y: v.number() })),
|
|
731
|
+
delay: OptNumber,
|
|
732
|
+
count: OptNumber
|
|
733
|
+
});
|
|
734
|
+
var TypeA = v.object({
|
|
735
|
+
type: v.literal("type"),
|
|
736
|
+
selector: Selector,
|
|
737
|
+
text: Value,
|
|
738
|
+
delay: OptNumber
|
|
739
|
+
});
|
|
740
|
+
var KeyPress = v.object({
|
|
741
|
+
type: v.literal("keypress"),
|
|
742
|
+
key: KEYLIST,
|
|
743
|
+
delay: OptNumber
|
|
744
|
+
});
|
|
745
|
+
var SelectEl = v.object({
|
|
746
|
+
type: v.literal("selector"),
|
|
747
|
+
// ...SelectorCommon.entries,
|
|
748
|
+
selector: Selector,
|
|
749
|
+
output: OutputP,
|
|
750
|
+
multi: v.optional(v.boolean(), false)
|
|
751
|
+
});
|
|
752
|
+
var FindData = v.object({
|
|
753
|
+
type: v.literal("findData"),
|
|
754
|
+
input: v.string(),
|
|
755
|
+
output: OutputP,
|
|
756
|
+
kind: v.picklist(["property"]),
|
|
757
|
+
key: v.optional(v.string())
|
|
758
|
+
// multi: v.optional(v.boolean(), true),
|
|
759
|
+
});
|
|
760
|
+
var GetContent = v.object({
|
|
761
|
+
type: v.literal("getContent"),
|
|
762
|
+
format: v.optional(v.picklist(["html", "text", "markdown"]), "html"),
|
|
763
|
+
cleanContent: OptBoolean,
|
|
764
|
+
output: OutputP
|
|
765
|
+
});
|
|
766
|
+
var CloseA = v.object({
|
|
767
|
+
type: v.literal("close")
|
|
768
|
+
});
|
|
769
|
+
var PluginA = v.object({
|
|
770
|
+
type: v.literal("custom"),
|
|
771
|
+
config: v.optional(v.looseObject({ type: v.string() })),
|
|
772
|
+
fn: v.optional(v.custom(Boolean))
|
|
773
|
+
});
|
|
774
|
+
var ReadVariable = v.object({
|
|
775
|
+
type: v.literal("read-variable"),
|
|
776
|
+
input: v.string()
|
|
777
|
+
});
|
|
778
|
+
var PageA = v.object({
|
|
779
|
+
type: v.literal("page"),
|
|
780
|
+
input: v.string(),
|
|
781
|
+
output: OutputP,
|
|
782
|
+
concurrency: v.optional(v.number(), 2),
|
|
783
|
+
throwError: v.optional(v.boolean(), false),
|
|
784
|
+
actions: v.lazy(() => v.array(ActionDefine))
|
|
785
|
+
});
|
|
786
|
+
var TypeList = [
|
|
787
|
+
...[
|
|
788
|
+
GoToA,
|
|
789
|
+
SetViewportA,
|
|
790
|
+
SetUserAgentA,
|
|
791
|
+
WaitA,
|
|
792
|
+
ClickA,
|
|
793
|
+
TypeA,
|
|
794
|
+
KeyPress,
|
|
795
|
+
SelectEl,
|
|
796
|
+
FindData,
|
|
797
|
+
GetContent,
|
|
798
|
+
// PageA,
|
|
799
|
+
CloseA,
|
|
800
|
+
PluginA,
|
|
801
|
+
ReadVariable
|
|
802
|
+
].map((item) => item.entries.type.literal),
|
|
803
|
+
"page"
|
|
804
|
+
];
|
|
805
|
+
var ActionDefine = v.fallback(
|
|
806
|
+
v.union([
|
|
807
|
+
GoToA,
|
|
808
|
+
SetViewportA,
|
|
809
|
+
SetUserAgentA,
|
|
810
|
+
WaitA,
|
|
811
|
+
ClickA,
|
|
812
|
+
TypeA,
|
|
813
|
+
KeyPress,
|
|
814
|
+
SelectEl,
|
|
815
|
+
FindData,
|
|
816
|
+
GetContent,
|
|
817
|
+
PageA,
|
|
818
|
+
CloseA,
|
|
819
|
+
PluginA,
|
|
820
|
+
ReadVariable
|
|
821
|
+
]),
|
|
822
|
+
(item) => {
|
|
823
|
+
if (TypeList.includes((item?.value).type)) {
|
|
824
|
+
throw new Error(JSON.stringify(item?.issues));
|
|
825
|
+
}
|
|
826
|
+
return { type: "custom", config: item?.value };
|
|
827
|
+
}
|
|
828
|
+
);
|
|
829
|
+
var ActionListDefine = v.array(ActionDefine);
|
|
830
|
+
var GlobalConfig = v.object({
|
|
831
|
+
maxTimeout: OptNumber,
|
|
832
|
+
actionTimeout: OptNumber
|
|
833
|
+
});
|
|
834
|
+
|
|
835
|
+
// src/init.ts
|
|
836
|
+
import * as v2 from "valibot";
|
|
837
|
+
import { Browser as BV, computeExecutablePath as computeExecutablePath2 } from "@puppeteer/browsers";
|
|
838
|
+
import * as fs from "fs";
|
|
839
|
+
|
|
840
|
+
// src/download.ts
|
|
841
|
+
import { Browser, computeExecutablePath, install } from "@puppeteer/browsers";
|
|
842
|
+
async function download(options) {
|
|
843
|
+
let result = await install({
|
|
844
|
+
browser: Browser.CHROME,
|
|
845
|
+
baseUrl: "https://cdn.npmmirror.com/binaries/chrome-for-testing",
|
|
846
|
+
...options,
|
|
847
|
+
unpack: true
|
|
848
|
+
});
|
|
849
|
+
}
|
|
850
|
+
function getExecutablePath(dir, buildId) {
|
|
851
|
+
return computeExecutablePath({ cacheDir: dir, browser: Browser.CHROME, buildId });
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
// src/init.ts
|
|
855
|
+
import { PUPPETEER_REVISIONS } from "puppeteer-core/internal/revisions.js";
|
|
856
|
+
async function init(options) {
|
|
857
|
+
return WebBrowser.init(options);
|
|
858
|
+
}
|
|
859
|
+
var CHROME_VERSION = PUPPETEER_REVISIONS.chrome;
|
|
860
|
+
var WebBrowser = class _WebBrowser {
|
|
861
|
+
browser;
|
|
862
|
+
static async init(options) {
|
|
863
|
+
let bvType = BV.CHROME;
|
|
864
|
+
let executablePath = computeExecutablePath2({ cacheDir: options.cacheDir, browser: bvType, buildId: CHROME_VERSION });
|
|
865
|
+
if (!fs.existsSync(executablePath)) {
|
|
866
|
+
console.log("准备下载");
|
|
867
|
+
await download({ cacheDir: options.cacheDir, buildId: CHROME_VERSION, browser: bvType });
|
|
868
|
+
}
|
|
869
|
+
const browser = await puppeteer.launch({ ...options, executablePath });
|
|
870
|
+
return new _WebBrowser(browser);
|
|
871
|
+
}
|
|
872
|
+
constructor(browser) {
|
|
873
|
+
this.browser = browser;
|
|
874
|
+
}
|
|
875
|
+
#config;
|
|
876
|
+
#pluginMap = /* @__PURE__ */ new Map();
|
|
877
|
+
setConfig(config) {
|
|
878
|
+
this.#config = config;
|
|
879
|
+
}
|
|
880
|
+
getConfig() {
|
|
881
|
+
return this.#config;
|
|
882
|
+
}
|
|
883
|
+
registerCustom(type, fn) {
|
|
884
|
+
this.#pluginMap.set(type, fn);
|
|
885
|
+
}
|
|
886
|
+
clearCustom() {
|
|
887
|
+
this.#pluginMap.clear();
|
|
888
|
+
}
|
|
889
|
+
getCustom(key) {
|
|
890
|
+
return this.#pluginMap.get(key);
|
|
891
|
+
}
|
|
892
|
+
async openPage(fn, parent) {
|
|
893
|
+
let page = new WebPage(await this.browser.newPage(), this, parent);
|
|
894
|
+
if (this.#config?.maxTimeout) {
|
|
895
|
+
page.setMaxTimeout(this.#config.maxTimeout);
|
|
896
|
+
}
|
|
897
|
+
return fn(page);
|
|
898
|
+
}
|
|
899
|
+
runQueue(list, input) {
|
|
900
|
+
let result = v2.safeParse(ActionListDefine, list);
|
|
901
|
+
if (!result.success) {
|
|
902
|
+
throw new Error(`解析配置错误
|
|
903
|
+
${JSON.stringify(result.issues)}`);
|
|
904
|
+
}
|
|
905
|
+
return this.openPage(async (page) => {
|
|
906
|
+
if (input) {
|
|
907
|
+
for (const key in input) {
|
|
908
|
+
page.setVariable(key, input[key]);
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
return page.exeQueue(result.output);
|
|
912
|
+
});
|
|
913
|
+
}
|
|
914
|
+
};
|
|
915
|
+
export {
|
|
916
|
+
ActionDefine,
|
|
917
|
+
ActionListDefine,
|
|
918
|
+
GlobalConfig,
|
|
919
|
+
WebBrowser,
|
|
920
|
+
WebPage,
|
|
921
|
+
download,
|
|
922
|
+
format,
|
|
923
|
+
getExecutablePath,
|
|
924
|
+
init
|
|
925
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cyia/crawl",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.12",
|
|
4
4
|
"author": "wszgrcy",
|
|
5
5
|
"description": "",
|
|
6
6
|
"dependencies": {
|
|
@@ -9,7 +9,9 @@
|
|
|
9
9
|
"html-entities": "2.6.0",
|
|
10
10
|
"puppeteer-core": "24.6.0",
|
|
11
11
|
"valibot": "1.0.0",
|
|
12
|
-
"turndown": "^7.2.0"
|
|
12
|
+
"turndown": "^7.2.0",
|
|
13
|
+
"@mozilla/readability": "^0.6.0",
|
|
14
|
+
"jsdom": "^26.0.0"
|
|
13
15
|
},
|
|
14
16
|
"exports": {
|
|
15
17
|
".": {
|