@cyia/crawl 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/define.d.ts +80 -24
- package/format.d.ts +12 -0
- package/full-web-request.d.ts +18 -0
- package/index.d.ts +1 -0
- package/index.mjs +3 -925
- package/init.d.ts +4 -1
- package/package.json +11 -9
- package/page.d.ts +1 -0
- package/util/get-page-link.d.ts +1 -0
package/define.d.ts
CHANGED
|
@@ -49,6 +49,9 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
49
49
|
readonly concurrency: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
50
50
|
}, undefined>, v.ObjectSchema<{
|
|
51
51
|
readonly mode: v.LiteralSchema<"navigation", undefined>;
|
|
52
|
+
}, undefined>, v.ObjectSchema<{
|
|
53
|
+
readonly mode: v.LiteralSchema<"waitBodyElements", undefined>;
|
|
54
|
+
readonly threshold: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
52
55
|
}, undefined>], undefined>;
|
|
53
56
|
}, undefined>, v.ObjectSchema<{
|
|
54
57
|
readonly type: v.LiteralSchema<"click", undefined>;
|
|
@@ -96,6 +99,12 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
96
99
|
readonly key: v.StringSchema<undefined>;
|
|
97
100
|
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
98
101
|
}, undefined>], undefined>, undefined>;
|
|
102
|
+
}, undefined>, v.ObjectSchema<{
|
|
103
|
+
readonly type: v.LiteralSchema<"rawContent", undefined>;
|
|
104
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
105
|
+
readonly key: v.StringSchema<undefined>;
|
|
106
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
107
|
+
}, undefined>], undefined>, undefined>;
|
|
99
108
|
}, undefined>, v.GenericSchema<{
|
|
100
109
|
type: "page";
|
|
101
110
|
input: string;
|
|
@@ -121,16 +130,21 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
121
130
|
}, undefined>, v.ObjectSchema<{
|
|
122
131
|
readonly type: v.LiteralSchema<"read-variable", undefined>;
|
|
123
132
|
readonly input: v.StringSchema<undefined>;
|
|
133
|
+
}, undefined>, v.ObjectSchema<{
|
|
134
|
+
readonly type: v.LiteralSchema<"evaluate", undefined>;
|
|
135
|
+
readonly fn: v.CustomSchema<(...args: any[]) => any, undefined>;
|
|
136
|
+
readonly args: v.OptionalSchema<v.ArraySchema<v.AnySchema, undefined>, undefined>;
|
|
137
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
138
|
+
readonly key: v.StringSchema<undefined>;
|
|
139
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
140
|
+
}, undefined>], undefined>, undefined>;
|
|
124
141
|
}, undefined>], undefined>, (item: v.OutputDataset<{
|
|
125
142
|
timeout?: number | undefined;
|
|
126
143
|
waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
127
|
-
url:
|
|
128
|
-
source: "variable";
|
|
129
|
-
key: (string | string[] | undefined) & (string | string[]);
|
|
130
|
-
} | undefined) & (string | {
|
|
144
|
+
url: string | {
|
|
131
145
|
source: "variable";
|
|
132
|
-
key:
|
|
133
|
-
}
|
|
146
|
+
key: string | string[];
|
|
147
|
+
};
|
|
134
148
|
type: "goto";
|
|
135
149
|
} | {
|
|
136
150
|
width: number;
|
|
@@ -162,6 +176,9 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
162
176
|
concurrency?: number | undefined;
|
|
163
177
|
} | {
|
|
164
178
|
mode: "navigation";
|
|
179
|
+
} | {
|
|
180
|
+
mode: "waitBodyElements";
|
|
181
|
+
threshold?: number | undefined;
|
|
165
182
|
};
|
|
166
183
|
} | {
|
|
167
184
|
type: "click";
|
|
@@ -175,13 +192,10 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
175
192
|
} | {
|
|
176
193
|
type: "type";
|
|
177
194
|
selector: string;
|
|
178
|
-
text:
|
|
179
|
-
source: "variable";
|
|
180
|
-
key: (string | string[] | undefined) & (string | string[]);
|
|
181
|
-
} | undefined) & (string | {
|
|
195
|
+
text: string | {
|
|
182
196
|
source: "variable";
|
|
183
|
-
key:
|
|
184
|
-
}
|
|
197
|
+
key: string | string[];
|
|
198
|
+
};
|
|
185
199
|
delay?: number | undefined;
|
|
186
200
|
} | {
|
|
187
201
|
type: "keypress";
|
|
@@ -212,6 +226,12 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
212
226
|
key: string;
|
|
213
227
|
method: "push" | "flat-push" | "define" | "merge";
|
|
214
228
|
} | undefined;
|
|
229
|
+
} | {
|
|
230
|
+
type: "rawContent";
|
|
231
|
+
output?: string | {
|
|
232
|
+
key: string;
|
|
233
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
234
|
+
} | undefined;
|
|
215
235
|
} | {
|
|
216
236
|
type: "page";
|
|
217
237
|
input: string;
|
|
@@ -232,6 +252,14 @@ export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectS
|
|
|
232
252
|
} | {
|
|
233
253
|
type: "read-variable";
|
|
234
254
|
input: string;
|
|
255
|
+
} | {
|
|
256
|
+
type: "evaluate";
|
|
257
|
+
fn: (...args: any[]) => any;
|
|
258
|
+
args?: any[] | undefined;
|
|
259
|
+
output?: string | {
|
|
260
|
+
key: string;
|
|
261
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
262
|
+
} | undefined;
|
|
235
263
|
}, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
|
|
236
264
|
type: "custom";
|
|
237
265
|
config: any;
|
|
@@ -274,6 +302,9 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
274
302
|
readonly concurrency: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
275
303
|
}, undefined>, v.ObjectSchema<{
|
|
276
304
|
readonly mode: v.LiteralSchema<"navigation", undefined>;
|
|
305
|
+
}, undefined>, v.ObjectSchema<{
|
|
306
|
+
readonly mode: v.LiteralSchema<"waitBodyElements", undefined>;
|
|
307
|
+
readonly threshold: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
277
308
|
}, undefined>], undefined>;
|
|
278
309
|
}, undefined>, v.ObjectSchema<{
|
|
279
310
|
readonly type: v.LiteralSchema<"click", undefined>;
|
|
@@ -321,6 +352,12 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
321
352
|
readonly key: v.StringSchema<undefined>;
|
|
322
353
|
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
323
354
|
}, undefined>], undefined>, undefined>;
|
|
355
|
+
}, undefined>, v.ObjectSchema<{
|
|
356
|
+
readonly type: v.LiteralSchema<"rawContent", undefined>;
|
|
357
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
358
|
+
readonly key: v.StringSchema<undefined>;
|
|
359
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
360
|
+
}, undefined>], undefined>, undefined>;
|
|
324
361
|
}, undefined>, v.GenericSchema<{
|
|
325
362
|
type: "page";
|
|
326
363
|
input: string;
|
|
@@ -346,16 +383,21 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
346
383
|
}, undefined>, v.ObjectSchema<{
|
|
347
384
|
readonly type: v.LiteralSchema<"read-variable", undefined>;
|
|
348
385
|
readonly input: v.StringSchema<undefined>;
|
|
386
|
+
}, undefined>, v.ObjectSchema<{
|
|
387
|
+
readonly type: v.LiteralSchema<"evaluate", undefined>;
|
|
388
|
+
readonly fn: v.CustomSchema<(...args: any[]) => any, undefined>;
|
|
389
|
+
readonly args: v.OptionalSchema<v.ArraySchema<v.AnySchema, undefined>, undefined>;
|
|
390
|
+
readonly output: v.OptionalSchema<v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
391
|
+
readonly key: v.StringSchema<undefined>;
|
|
392
|
+
readonly method: v.PicklistSchema<["push", "flat-push", "define", "merge"], undefined>;
|
|
393
|
+
}, undefined>], undefined>, undefined>;
|
|
349
394
|
}, undefined>], undefined>, (item: v.OutputDataset<{
|
|
350
395
|
timeout?: number | undefined;
|
|
351
396
|
waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
352
|
-
url:
|
|
353
|
-
source: "variable";
|
|
354
|
-
key: (string | string[] | undefined) & (string | string[]);
|
|
355
|
-
} | undefined) & (string | {
|
|
397
|
+
url: string | {
|
|
356
398
|
source: "variable";
|
|
357
|
-
key:
|
|
358
|
-
}
|
|
399
|
+
key: string | string[];
|
|
400
|
+
};
|
|
359
401
|
type: "goto";
|
|
360
402
|
} | {
|
|
361
403
|
width: number;
|
|
@@ -387,6 +429,9 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
387
429
|
concurrency?: number | undefined;
|
|
388
430
|
} | {
|
|
389
431
|
mode: "navigation";
|
|
432
|
+
} | {
|
|
433
|
+
mode: "waitBodyElements";
|
|
434
|
+
threshold?: number | undefined;
|
|
390
435
|
};
|
|
391
436
|
} | {
|
|
392
437
|
type: "click";
|
|
@@ -400,13 +445,10 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
400
445
|
} | {
|
|
401
446
|
type: "type";
|
|
402
447
|
selector: string;
|
|
403
|
-
text:
|
|
404
|
-
source: "variable";
|
|
405
|
-
key: (string | string[] | undefined) & (string | string[]);
|
|
406
|
-
} | undefined) & (string | {
|
|
448
|
+
text: string | {
|
|
407
449
|
source: "variable";
|
|
408
|
-
key:
|
|
409
|
-
}
|
|
450
|
+
key: string | string[];
|
|
451
|
+
};
|
|
410
452
|
delay?: number | undefined;
|
|
411
453
|
} | {
|
|
412
454
|
type: "keypress";
|
|
@@ -437,6 +479,12 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
437
479
|
key: string;
|
|
438
480
|
method: "push" | "flat-push" | "define" | "merge";
|
|
439
481
|
} | undefined;
|
|
482
|
+
} | {
|
|
483
|
+
type: "rawContent";
|
|
484
|
+
output?: string | {
|
|
485
|
+
key: string;
|
|
486
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
487
|
+
} | undefined;
|
|
440
488
|
} | {
|
|
441
489
|
type: "page";
|
|
442
490
|
input: string;
|
|
@@ -457,6 +505,14 @@ export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.Unio
|
|
|
457
505
|
} | {
|
|
458
506
|
type: "read-variable";
|
|
459
507
|
input: string;
|
|
508
|
+
} | {
|
|
509
|
+
type: "evaluate";
|
|
510
|
+
fn: (...args: any[]) => any;
|
|
511
|
+
args?: any[] | undefined;
|
|
512
|
+
output?: string | {
|
|
513
|
+
key: string;
|
|
514
|
+
method: "push" | "flat-push" | "define" | "merge";
|
|
515
|
+
} | undefined;
|
|
460
516
|
}, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.PicklistIssue> | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
|
|
461
517
|
type: "custom";
|
|
462
518
|
config: any;
|
package/format.d.ts
CHANGED
|
@@ -2,3 +2,15 @@ export declare function format(rawHtml: string, options: {
|
|
|
2
2
|
cleanContent?: boolean;
|
|
3
3
|
format: 'html' | 'text' | 'markdown';
|
|
4
4
|
}): string | null | undefined;
|
|
5
|
+
export declare function formatDoc(rawHtml: string): {
|
|
6
|
+
title: string | null | undefined;
|
|
7
|
+
content: string | null | undefined;
|
|
8
|
+
textContent: string | null | undefined;
|
|
9
|
+
length: number | null | undefined;
|
|
10
|
+
excerpt: string | null | undefined;
|
|
11
|
+
byline: string | null | undefined;
|
|
12
|
+
dir: string | null | undefined;
|
|
13
|
+
siteName: string | null | undefined;
|
|
14
|
+
lang: string | null | undefined;
|
|
15
|
+
publishedTime: string | null | undefined;
|
|
16
|
+
} | null;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { WebBrowser } from './init';
|
|
2
|
+
export declare class FullWebRequest {
|
|
3
|
+
#private;
|
|
4
|
+
config: {
|
|
5
|
+
url: string;
|
|
6
|
+
filterLink: (url: string) => boolean;
|
|
7
|
+
};
|
|
8
|
+
browser: WebBrowser;
|
|
9
|
+
dataMap: Map<string, any>;
|
|
10
|
+
constructor(config: {
|
|
11
|
+
url: string;
|
|
12
|
+
filterLink: (url: string) => boolean;
|
|
13
|
+
});
|
|
14
|
+
start(): Promise<Map<string, any>>;
|
|
15
|
+
searchWebOne(url: string, context?: {
|
|
16
|
+
from: string;
|
|
17
|
+
}): Promise<void>;
|
|
18
|
+
}
|
package/index.d.ts
CHANGED
package/index.mjs
CHANGED
|
@@ -1,925 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
import puppeteer from "puppeteer-core";
|
|
3
|
-
|
|
4
|
-
// src/page.ts
|
|
5
|
-
import { ElementHandle } from "puppeteer-core";
|
|
6
|
-
|
|
7
|
-
// src/format.ts
|
|
8
|
-
import { load } from "cheerio";
|
|
9
|
-
import TurndownService from "turndown";
|
|
10
|
-
import { Readability } from "@mozilla/readability";
|
|
11
|
-
import { JSDOM } from "jsdom";
|
|
12
|
-
function format(rawHtml, options) {
|
|
13
|
-
if (options.cleanContent) {
|
|
14
|
-
let dom = new JSDOM(rawHtml);
|
|
15
|
-
let result = new Readability(dom.window.document).parse();
|
|
16
|
-
if (result) {
|
|
17
|
-
switch (options.format) {
|
|
18
|
-
case "html":
|
|
19
|
-
return result.content;
|
|
20
|
-
case "text":
|
|
21
|
-
return result.textContent;
|
|
22
|
-
case "markdown": {
|
|
23
|
-
var turndownService = new TurndownService();
|
|
24
|
-
return turndownService.turndown(result.content);
|
|
25
|
-
}
|
|
26
|
-
default:
|
|
27
|
-
throw "";
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
let $ = load(rawHtml, void 0, true);
|
|
31
|
-
let $body = $("body");
|
|
32
|
-
$body.find("script,style,iframe,footer,br,hr,svg,header,img").remove();
|
|
33
|
-
$body.find("*").removeAttr("class");
|
|
34
|
-
$body.find("*").removeAttr("style");
|
|
35
|
-
$("*").contents().filter(function() {
|
|
36
|
-
return this.type === "comment" || this.type === "text" && !this.data.trim();
|
|
37
|
-
}).remove();
|
|
38
|
-
$("*").contents().filter(function() {
|
|
39
|
-
return this.type === "text" && !!this.data.trim();
|
|
40
|
-
}).text((i, text) => {
|
|
41
|
-
return text.trim();
|
|
42
|
-
});
|
|
43
|
-
if (options.format === "html") {
|
|
44
|
-
return $body.html();
|
|
45
|
-
} else if (options.format === "text") {
|
|
46
|
-
return $body.text();
|
|
47
|
-
} else if (options.format === "markdown") {
|
|
48
|
-
var turndownService = new TurndownService();
|
|
49
|
-
return turndownService.turndown($body.html());
|
|
50
|
-
}
|
|
51
|
-
} else {
|
|
52
|
-
if (options.format === "html") {
|
|
53
|
-
return rawHtml;
|
|
54
|
-
} else if (options.format === "text") {
|
|
55
|
-
let $ = load(rawHtml, void 0, true);
|
|
56
|
-
let $body = $("body");
|
|
57
|
-
return $body.text();
|
|
58
|
-
} else if (options.format === "markdown") {
|
|
59
|
-
var turndownService = new TurndownService();
|
|
60
|
-
return turndownService.turndown(rawHtml);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// src/page.ts
|
|
66
|
-
import { promise as fastq } from "fastq";
|
|
67
|
-
var WebPage = class {
|
|
68
|
-
page;
|
|
69
|
-
browser;
|
|
70
|
-
parent;
|
|
71
|
-
#obj = {};
|
|
72
|
-
constructor(page, browser, parent) {
|
|
73
|
-
this.page = page;
|
|
74
|
-
this.browser = browser;
|
|
75
|
-
this.parent = parent;
|
|
76
|
-
}
|
|
77
|
-
ab;
|
|
78
|
-
timeoutId;
|
|
79
|
-
setMaxTimeout(timeout) {
|
|
80
|
-
this.ab = new AbortController();
|
|
81
|
-
this.timeoutId = setTimeout(() => {
|
|
82
|
-
this.ab.abort("timeout");
|
|
83
|
-
}, timeout);
|
|
84
|
-
}
|
|
85
|
-
clearTimeout() {
|
|
86
|
-
clearTimeout(this.timeoutId);
|
|
87
|
-
}
|
|
88
|
-
setVariable(key, value) {
|
|
89
|
-
this.#obj[key] = value;
|
|
90
|
-
}
|
|
91
|
-
getVariable(key) {
|
|
92
|
-
return this.#obj[key];
|
|
93
|
-
}
|
|
94
|
-
#navigatePath(page, paths) {
|
|
95
|
-
let value = void 0;
|
|
96
|
-
let findValue = false;
|
|
97
|
-
for (let i = 0; i < paths.length; i++) {
|
|
98
|
-
const item = paths[i];
|
|
99
|
-
if (item === "..") {
|
|
100
|
-
if (!page.parent) {
|
|
101
|
-
throw new Error("未找到父级");
|
|
102
|
-
}
|
|
103
|
-
page = page.parent;
|
|
104
|
-
} else if (findValue) {
|
|
105
|
-
if (!value || typeof value !== "object") {
|
|
106
|
-
throw new Error(`${paths}路径下未找到值`);
|
|
107
|
-
}
|
|
108
|
-
value = value[item];
|
|
109
|
-
} else {
|
|
110
|
-
value = page.#obj[item];
|
|
111
|
-
findValue = true;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
return value;
|
|
115
|
-
}
|
|
116
|
-
#getValue(value) {
|
|
117
|
-
if (typeof value === "string") {
|
|
118
|
-
return value;
|
|
119
|
-
} else if (value.source === "variable") {
|
|
120
|
-
if (typeof value.key === "string") {
|
|
121
|
-
return this.#obj[value.key];
|
|
122
|
-
} else {
|
|
123
|
-
return this.#navigatePath(this, value.key);
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
#setOutput(value, output) {
|
|
128
|
-
if (!output) {
|
|
129
|
-
return;
|
|
130
|
-
}
|
|
131
|
-
if (typeof output === "string") {
|
|
132
|
-
this.#obj[output] = value;
|
|
133
|
-
} else {
|
|
134
|
-
switch (output.method) {
|
|
135
|
-
case "push": {
|
|
136
|
-
if (!Array.isArray(this.#obj[output.key])) {
|
|
137
|
-
throw new Error(`${output.key}不是数组类型`);
|
|
138
|
-
}
|
|
139
|
-
this.#obj[output.key] ||= [];
|
|
140
|
-
this.#obj[output.key].push(value);
|
|
141
|
-
break;
|
|
142
|
-
}
|
|
143
|
-
case "flat-push": {
|
|
144
|
-
if (!Array.isArray(this.#obj[output.key])) {
|
|
145
|
-
throw new Error(`${output.key}不是数组类型`);
|
|
146
|
-
}
|
|
147
|
-
if (!Array.isArray(value)) {
|
|
148
|
-
throw new Error(`${JSON.stringify(value)}不是数组类型`);
|
|
149
|
-
}
|
|
150
|
-
this.#obj[output.key] ||= [];
|
|
151
|
-
this.#obj[output.key].push(...value);
|
|
152
|
-
break;
|
|
153
|
-
}
|
|
154
|
-
case "define": {
|
|
155
|
-
this.#obj[output.key] = value;
|
|
156
|
-
break;
|
|
157
|
-
}
|
|
158
|
-
case "merge": {
|
|
159
|
-
if (typeof this.#obj[output.key] !== "object") {
|
|
160
|
-
throw new Error(`${output.key}不是对象类型`);
|
|
161
|
-
}
|
|
162
|
-
this.#obj[output.key] ||= {};
|
|
163
|
-
this.#obj[output.key] = { ...this.#obj[output.key], ...value };
|
|
164
|
-
break;
|
|
165
|
-
}
|
|
166
|
-
default:
|
|
167
|
-
break;
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
async exeQueue(list) {
|
|
172
|
-
let value;
|
|
173
|
-
for (const item of list) {
|
|
174
|
-
console.log("准备执行", item);
|
|
175
|
-
switch (item.type) {
|
|
176
|
-
case "click":
|
|
177
|
-
await this.page.click(item.selector, {
|
|
178
|
-
offset: item.offset,
|
|
179
|
-
delay: item.delay,
|
|
180
|
-
count: item.count
|
|
181
|
-
});
|
|
182
|
-
break;
|
|
183
|
-
case "type": {
|
|
184
|
-
await this.page.type(item.selector, this.#getValue(item.text), {
|
|
185
|
-
delay: item.delay
|
|
186
|
-
});
|
|
187
|
-
break;
|
|
188
|
-
}
|
|
189
|
-
case "goto": {
|
|
190
|
-
value = await this.page.goto(this.#getValue(item.url), {
|
|
191
|
-
waitUntil: item.waitUntil,
|
|
192
|
-
signal: this.ab?.signal,
|
|
193
|
-
timeout: this.browser.getConfig()?.actionTimeout
|
|
194
|
-
});
|
|
195
|
-
break;
|
|
196
|
-
}
|
|
197
|
-
case "setViewport": {
|
|
198
|
-
value = await this.page.setViewport({
|
|
199
|
-
width: item.width,
|
|
200
|
-
height: item.height,
|
|
201
|
-
isMobile: item.isMobile,
|
|
202
|
-
isLandscape: item.isLandscape
|
|
203
|
-
});
|
|
204
|
-
break;
|
|
205
|
-
}
|
|
206
|
-
case "wait": {
|
|
207
|
-
switch (item.config.mode) {
|
|
208
|
-
case "selector": {
|
|
209
|
-
value = await this.page.waitForSelector(item.config.selector, {
|
|
210
|
-
visible: item.config.visible,
|
|
211
|
-
hidden: item.config.hidden,
|
|
212
|
-
signal: this.ab?.signal,
|
|
213
|
-
timeout: this.browser.getConfig()?.actionTimeout
|
|
214
|
-
});
|
|
215
|
-
break;
|
|
216
|
-
}
|
|
217
|
-
case "request": {
|
|
218
|
-
const config = item.config;
|
|
219
|
-
value = await this.page.waitForRequest(
|
|
220
|
-
async (req) => {
|
|
221
|
-
if (config.urlRegexp) {
|
|
222
|
-
let result = config.urlRegexp.test(req.url());
|
|
223
|
-
if (!result) {
|
|
224
|
-
return false;
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
if (config.method && config.method !== req.method()) {
|
|
228
|
-
return false;
|
|
229
|
-
}
|
|
230
|
-
return true;
|
|
231
|
-
},
|
|
232
|
-
{
|
|
233
|
-
signal: this.ab?.signal,
|
|
234
|
-
timeout: this.browser.getConfig()?.actionTimeout
|
|
235
|
-
}
|
|
236
|
-
);
|
|
237
|
-
break;
|
|
238
|
-
}
|
|
239
|
-
case "response": {
|
|
240
|
-
const config = item.config;
|
|
241
|
-
value = await this.page.waitForResponse(
|
|
242
|
-
async (res) => {
|
|
243
|
-
if (config.urlRegexp) {
|
|
244
|
-
let result = config.urlRegexp.test(res.url());
|
|
245
|
-
if (!result) {
|
|
246
|
-
return false;
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
if (config.status && config.status !== res.status()) {
|
|
250
|
-
return false;
|
|
251
|
-
}
|
|
252
|
-
return true;
|
|
253
|
-
},
|
|
254
|
-
{
|
|
255
|
-
signal: this.ab?.signal,
|
|
256
|
-
timeout: this.browser.getConfig()?.actionTimeout
|
|
257
|
-
}
|
|
258
|
-
);
|
|
259
|
-
break;
|
|
260
|
-
}
|
|
261
|
-
case "networkIdle": {
|
|
262
|
-
value = await this.page.waitForNetworkIdle({
|
|
263
|
-
idleTime: item.config.idleTime,
|
|
264
|
-
concurrency: item.config.concurrency,
|
|
265
|
-
signal: this.ab?.signal,
|
|
266
|
-
timeout: this.browser.getConfig()?.actionTimeout
|
|
267
|
-
});
|
|
268
|
-
break;
|
|
269
|
-
}
|
|
270
|
-
case "navigation": {
|
|
271
|
-
value = await this.page.waitForNavigation({
|
|
272
|
-
signal: this.ab?.signal,
|
|
273
|
-
timeout: this.browser.getConfig()?.actionTimeout
|
|
274
|
-
});
|
|
275
|
-
break;
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
break;
|
|
279
|
-
}
|
|
280
|
-
case "selector": {
|
|
281
|
-
if (item.multi) {
|
|
282
|
-
this.#setOutput(value = await this.page.$$(item.selector), item.output);
|
|
283
|
-
} else {
|
|
284
|
-
this.#setOutput(value = await this.page.$(item.selector), item.output);
|
|
285
|
-
}
|
|
286
|
-
break;
|
|
287
|
-
}
|
|
288
|
-
case "keypress": {
|
|
289
|
-
await this.page.keyboard.press(item.key, { delay: item.delay });
|
|
290
|
-
break;
|
|
291
|
-
}
|
|
292
|
-
case "findData": {
|
|
293
|
-
let data = this.#obj[item.input];
|
|
294
|
-
if (Array.isArray(data)) {
|
|
295
|
-
if (item.kind === "property") {
|
|
296
|
-
this.#setOutput(
|
|
297
|
-
value = await Promise.all(
|
|
298
|
-
data.map((el) => {
|
|
299
|
-
return el.getProperty(item.key).then((a) => {
|
|
300
|
-
return a.jsonValue();
|
|
301
|
-
});
|
|
302
|
-
})
|
|
303
|
-
),
|
|
304
|
-
item.output
|
|
305
|
-
);
|
|
306
|
-
}
|
|
307
|
-
} else {
|
|
308
|
-
if (data instanceof ElementHandle) {
|
|
309
|
-
this.#setOutput(
|
|
310
|
-
value = data.getProperty(item.key).then((a) => {
|
|
311
|
-
return a.jsonValue();
|
|
312
|
-
}),
|
|
313
|
-
item.output
|
|
314
|
-
);
|
|
315
|
-
}
|
|
316
|
-
}
|
|
317
|
-
break;
|
|
318
|
-
}
|
|
319
|
-
case "getContent": {
|
|
320
|
-
let content = await this.page.content();
|
|
321
|
-
this.#setOutput(value = format(content, { cleanContent: item.cleanContent, format: item.format }), item.output);
|
|
322
|
-
break;
|
|
323
|
-
}
|
|
324
|
-
case "page": {
|
|
325
|
-
let inputValue = this.#obj[item.input];
|
|
326
|
-
let list2 = Array.isArray(inputValue) ? inputValue : [inputValue];
|
|
327
|
-
let queue = fastq(async (input) => {
|
|
328
|
-
console.log("准备执行", input);
|
|
329
|
-
try {
|
|
330
|
-
let result = await this.browser.openPage(async (page) => {
|
|
331
|
-
page.setVariable("$item", list2[input.index]);
|
|
332
|
-
page.setVariable("$index", input.index);
|
|
333
|
-
page.setVariable("$first", input.index === 0);
|
|
334
|
-
page.setVariable("$last", input.index === list2.length - 1);
|
|
335
|
-
return await page.exeQueue(item.actions);
|
|
336
|
-
}, this);
|
|
337
|
-
resultList.push(result);
|
|
338
|
-
} catch (error) {
|
|
339
|
-
if (item.throwError) {
|
|
340
|
-
throw error;
|
|
341
|
-
} else {
|
|
342
|
-
resultList.push(void 0);
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
}, item.concurrency);
|
|
346
|
-
let queueError;
|
|
347
|
-
queue.error((error) => {
|
|
348
|
-
if (error) {
|
|
349
|
-
queueError = error;
|
|
350
|
-
}
|
|
351
|
-
});
|
|
352
|
-
let resultList = [];
|
|
353
|
-
for (let index = 0; index < list2.length; index++) {
|
|
354
|
-
queue.push({ index });
|
|
355
|
-
}
|
|
356
|
-
await queue.drained();
|
|
357
|
-
if (item.throwError && queueError) {
|
|
358
|
-
throw queueError;
|
|
359
|
-
}
|
|
360
|
-
value = resultList;
|
|
361
|
-
this.#setOutput(value, item.output);
|
|
362
|
-
break;
|
|
363
|
-
}
|
|
364
|
-
case "setUserAgent": {
|
|
365
|
-
await this.page.setUserAgent(item.userAgent);
|
|
366
|
-
break;
|
|
367
|
-
}
|
|
368
|
-
case "close": {
|
|
369
|
-
await this.page.close({ runBeforeUnload: false });
|
|
370
|
-
this.clearTimeout();
|
|
371
|
-
break;
|
|
372
|
-
}
|
|
373
|
-
case "custom": {
|
|
374
|
-
if (typeof item.fn === "function") {
|
|
375
|
-
value = await item.fn(this);
|
|
376
|
-
} else {
|
|
377
|
-
let plugin = this.browser.getCustom(item.config.type);
|
|
378
|
-
if (!plugin) {
|
|
379
|
-
throw new Error(`自定义[${item.config.type}]未实现处理`);
|
|
380
|
-
}
|
|
381
|
-
value = await plugin(item.config, this);
|
|
382
|
-
}
|
|
383
|
-
break;
|
|
384
|
-
}
|
|
385
|
-
case "read-variable": {
|
|
386
|
-
value = this.#obj[item.input];
|
|
387
|
-
break;
|
|
388
|
-
}
|
|
389
|
-
default:
|
|
390
|
-
break;
|
|
391
|
-
}
|
|
392
|
-
}
|
|
393
|
-
return value;
|
|
394
|
-
}
|
|
395
|
-
};
|
|
396
|
-
|
|
397
|
-
// src/define.ts
|
|
398
|
-
import * as v from "valibot";
|
|
399
|
-
var KEYLIST = v.picklist([
|
|
400
|
-
"0",
|
|
401
|
-
"1",
|
|
402
|
-
"2",
|
|
403
|
-
"3",
|
|
404
|
-
"4",
|
|
405
|
-
"5",
|
|
406
|
-
"6",
|
|
407
|
-
"7",
|
|
408
|
-
"8",
|
|
409
|
-
"9",
|
|
410
|
-
"Power",
|
|
411
|
-
"Eject",
|
|
412
|
-
"Abort",
|
|
413
|
-
"Help",
|
|
414
|
-
"Backspace",
|
|
415
|
-
"Tab",
|
|
416
|
-
"Numpad5",
|
|
417
|
-
"NumpadEnter",
|
|
418
|
-
"Enter",
|
|
419
|
-
"\r",
|
|
420
|
-
"\n",
|
|
421
|
-
"ShiftLeft",
|
|
422
|
-
"ShiftRight",
|
|
423
|
-
"ControlLeft",
|
|
424
|
-
"ControlRight",
|
|
425
|
-
"AltLeft",
|
|
426
|
-
"AltRight",
|
|
427
|
-
"Pause",
|
|
428
|
-
"CapsLock",
|
|
429
|
-
"Escape",
|
|
430
|
-
"Convert",
|
|
431
|
-
"NonConvert",
|
|
432
|
-
"Space",
|
|
433
|
-
"Numpad9",
|
|
434
|
-
"PageUp",
|
|
435
|
-
"Numpad3",
|
|
436
|
-
"PageDown",
|
|
437
|
-
"End",
|
|
438
|
-
"Numpad1",
|
|
439
|
-
"Home",
|
|
440
|
-
"Numpad7",
|
|
441
|
-
"ArrowLeft",
|
|
442
|
-
"Numpad4",
|
|
443
|
-
"Numpad8",
|
|
444
|
-
"ArrowUp",
|
|
445
|
-
"ArrowRight",
|
|
446
|
-
"Numpad6",
|
|
447
|
-
"Numpad2",
|
|
448
|
-
"ArrowDown",
|
|
449
|
-
"Select",
|
|
450
|
-
"Open",
|
|
451
|
-
"PrintScreen",
|
|
452
|
-
"Insert",
|
|
453
|
-
"Numpad0",
|
|
454
|
-
"Delete",
|
|
455
|
-
"NumpadDecimal",
|
|
456
|
-
"Digit0",
|
|
457
|
-
"Digit1",
|
|
458
|
-
"Digit2",
|
|
459
|
-
"Digit3",
|
|
460
|
-
"Digit4",
|
|
461
|
-
"Digit5",
|
|
462
|
-
"Digit6",
|
|
463
|
-
"Digit7",
|
|
464
|
-
"Digit8",
|
|
465
|
-
"Digit9",
|
|
466
|
-
"KeyA",
|
|
467
|
-
"KeyB",
|
|
468
|
-
"KeyC",
|
|
469
|
-
"KeyD",
|
|
470
|
-
"KeyE",
|
|
471
|
-
"KeyF",
|
|
472
|
-
"KeyG",
|
|
473
|
-
"KeyH",
|
|
474
|
-
"KeyI",
|
|
475
|
-
"KeyJ",
|
|
476
|
-
"KeyK",
|
|
477
|
-
"KeyL",
|
|
478
|
-
"KeyM",
|
|
479
|
-
"KeyN",
|
|
480
|
-
"KeyO",
|
|
481
|
-
"KeyP",
|
|
482
|
-
"KeyQ",
|
|
483
|
-
"KeyR",
|
|
484
|
-
"KeyS",
|
|
485
|
-
"KeyT",
|
|
486
|
-
"KeyU",
|
|
487
|
-
"KeyV",
|
|
488
|
-
"KeyW",
|
|
489
|
-
"KeyX",
|
|
490
|
-
"KeyY",
|
|
491
|
-
"KeyZ",
|
|
492
|
-
"MetaLeft",
|
|
493
|
-
"MetaRight",
|
|
494
|
-
"ContextMenu",
|
|
495
|
-
"NumpadMultiply",
|
|
496
|
-
"NumpadAdd",
|
|
497
|
-
"NumpadSubtract",
|
|
498
|
-
"NumpadDivide",
|
|
499
|
-
"F1",
|
|
500
|
-
"F2",
|
|
501
|
-
"F3",
|
|
502
|
-
"F4",
|
|
503
|
-
"F5",
|
|
504
|
-
"F6",
|
|
505
|
-
"F7",
|
|
506
|
-
"F8",
|
|
507
|
-
"F9",
|
|
508
|
-
"F10",
|
|
509
|
-
"F11",
|
|
510
|
-
"F12",
|
|
511
|
-
"F13",
|
|
512
|
-
"F14",
|
|
513
|
-
"F15",
|
|
514
|
-
"F16",
|
|
515
|
-
"F17",
|
|
516
|
-
"F18",
|
|
517
|
-
"F19",
|
|
518
|
-
"F20",
|
|
519
|
-
"F21",
|
|
520
|
-
"F22",
|
|
521
|
-
"F23",
|
|
522
|
-
"F24",
|
|
523
|
-
"NumLock",
|
|
524
|
-
"ScrollLock",
|
|
525
|
-
"AudioVolumeMute",
|
|
526
|
-
"AudioVolumeDown",
|
|
527
|
-
"AudioVolumeUp",
|
|
528
|
-
"MediaTrackNext",
|
|
529
|
-
"MediaTrackPrevious",
|
|
530
|
-
"MediaStop",
|
|
531
|
-
"MediaPlayPause",
|
|
532
|
-
"Semicolon",
|
|
533
|
-
"Equal",
|
|
534
|
-
"NumpadEqual",
|
|
535
|
-
"Comma",
|
|
536
|
-
"Minus",
|
|
537
|
-
"Period",
|
|
538
|
-
"Slash",
|
|
539
|
-
"Backquote",
|
|
540
|
-
"BracketLeft",
|
|
541
|
-
"Backslash",
|
|
542
|
-
"BracketRight",
|
|
543
|
-
"Quote",
|
|
544
|
-
"AltGraph",
|
|
545
|
-
"Props",
|
|
546
|
-
"Cancel",
|
|
547
|
-
"Clear",
|
|
548
|
-
"Shift",
|
|
549
|
-
"Control",
|
|
550
|
-
"Alt",
|
|
551
|
-
"Accept",
|
|
552
|
-
"ModeChange",
|
|
553
|
-
" ",
|
|
554
|
-
"Print",
|
|
555
|
-
"Execute",
|
|
556
|
-
"\0",
|
|
557
|
-
"a",
|
|
558
|
-
"b",
|
|
559
|
-
"c",
|
|
560
|
-
"d",
|
|
561
|
-
"e",
|
|
562
|
-
"f",
|
|
563
|
-
"g",
|
|
564
|
-
"h",
|
|
565
|
-
"i",
|
|
566
|
-
"j",
|
|
567
|
-
"k",
|
|
568
|
-
"l",
|
|
569
|
-
"m",
|
|
570
|
-
"n",
|
|
571
|
-
"o",
|
|
572
|
-
"p",
|
|
573
|
-
"q",
|
|
574
|
-
"r",
|
|
575
|
-
"s",
|
|
576
|
-
"t",
|
|
577
|
-
"u",
|
|
578
|
-
"v",
|
|
579
|
-
"w",
|
|
580
|
-
"x",
|
|
581
|
-
"y",
|
|
582
|
-
"z",
|
|
583
|
-
"Meta",
|
|
584
|
-
"*",
|
|
585
|
-
"+",
|
|
586
|
-
"-",
|
|
587
|
-
"/",
|
|
588
|
-
";",
|
|
589
|
-
"=",
|
|
590
|
-
",",
|
|
591
|
-
".",
|
|
592
|
-
"`",
|
|
593
|
-
"[",
|
|
594
|
-
"\\",
|
|
595
|
-
"]",
|
|
596
|
-
"'",
|
|
597
|
-
"Attn",
|
|
598
|
-
"CrSel",
|
|
599
|
-
"ExSel",
|
|
600
|
-
"EraseEof",
|
|
601
|
-
"Play",
|
|
602
|
-
"ZoomOut",
|
|
603
|
-
")",
|
|
604
|
-
"!",
|
|
605
|
-
"@",
|
|
606
|
-
"#",
|
|
607
|
-
"$",
|
|
608
|
-
"%",
|
|
609
|
-
"^",
|
|
610
|
-
"&",
|
|
611
|
-
"(",
|
|
612
|
-
"A",
|
|
613
|
-
"B",
|
|
614
|
-
"C",
|
|
615
|
-
"D",
|
|
616
|
-
"E",
|
|
617
|
-
"F",
|
|
618
|
-
"G",
|
|
619
|
-
"H",
|
|
620
|
-
"I",
|
|
621
|
-
"J",
|
|
622
|
-
"K",
|
|
623
|
-
"L",
|
|
624
|
-
"M",
|
|
625
|
-
"N",
|
|
626
|
-
"O",
|
|
627
|
-
"P",
|
|
628
|
-
"Q",
|
|
629
|
-
"R",
|
|
630
|
-
"S",
|
|
631
|
-
"T",
|
|
632
|
-
"U",
|
|
633
|
-
"V",
|
|
634
|
-
"W",
|
|
635
|
-
"X",
|
|
636
|
-
"Y",
|
|
637
|
-
"Z",
|
|
638
|
-
":",
|
|
639
|
-
"<",
|
|
640
|
-
"_",
|
|
641
|
-
">",
|
|
642
|
-
"?",
|
|
643
|
-
"~",
|
|
644
|
-
"{",
|
|
645
|
-
",",
|
|
646
|
-
"}",
|
|
647
|
-
'"',
|
|
648
|
-
"SoftLeft",
|
|
649
|
-
"SoftRight",
|
|
650
|
-
"Camera",
|
|
651
|
-
"Call",
|
|
652
|
-
"EndCall",
|
|
653
|
-
"VolumeDown",
|
|
654
|
-
"VolumeUp"
|
|
655
|
-
]);
|
|
656
|
-
var OptNumber = v.optional(v.number());
|
|
657
|
-
var OptBoolean = v.optional(v.boolean());
|
|
658
|
-
var OptString = v.optional(v.string());
|
|
659
|
-
var TimeoutDefine = OptNumber;
|
|
660
|
-
var RegExpStr = v.pipe(
|
|
661
|
-
v.string(),
|
|
662
|
-
v.transform((input) => {
|
|
663
|
-
return new RegExp(input);
|
|
664
|
-
})
|
|
665
|
-
);
|
|
666
|
-
var RegexpTup = v.pipe(
|
|
667
|
-
v.tuple([v.string(), v.pipe(v.string())]),
|
|
668
|
-
v.transform(([input, flag]) => {
|
|
669
|
-
return new RegExp(input, flag);
|
|
670
|
-
})
|
|
671
|
-
);
|
|
672
|
-
var RegexpUni = v.union([RegExpStr, RegexpTup]);
|
|
673
|
-
var Selector = v.string();
|
|
674
|
-
var Value = v.union([v.string(), v.object({ source: v.literal("variable"), key: v.union([v.string(), v.array(v.string())]) })]);
|
|
675
|
-
var OutputP = v.optional(
|
|
676
|
-
v.union([v.string(), v.object({ key: v.string(), method: v.picklist(["push", "flat-push", "define", "merge"]) })])
|
|
677
|
-
);
|
|
678
|
-
var GoToA = v.object({
|
|
679
|
-
timeout: TimeoutDefine,
|
|
680
|
-
waitUntil: v.optional(v.picklist(["load", "domcontentloaded", "networkidle0", "networkidle2"]), "networkidle2"),
|
|
681
|
-
url: Value,
|
|
682
|
-
type: v.literal("goto")
|
|
683
|
-
});
|
|
684
|
-
var SetViewportA = v.object({
|
|
685
|
-
width: v.optional(v.number(), 1920),
|
|
686
|
-
height: v.optional(v.number(), 1080),
|
|
687
|
-
isMobile: v.optional(v.boolean()),
|
|
688
|
-
isLandscape: v.optional(v.boolean()),
|
|
689
|
-
type: v.literal("setViewport")
|
|
690
|
-
});
|
|
691
|
-
var SetUserAgentA = v.object({
|
|
692
|
-
userAgent: v.string(),
|
|
693
|
-
type: v.literal("setUserAgent")
|
|
694
|
-
});
|
|
695
|
-
var SelectorCommon = v.object({
|
|
696
|
-
selector: Selector,
|
|
697
|
-
visible: OptBoolean,
|
|
698
|
-
hidden: OptBoolean
|
|
699
|
-
});
|
|
700
|
-
var WaitSelector = v.object({
|
|
701
|
-
mode: v.literal("selector"),
|
|
702
|
-
...SelectorCommon.entries
|
|
703
|
-
});
|
|
704
|
-
var WaitRequest = v.object({
|
|
705
|
-
mode: v.literal("request"),
|
|
706
|
-
urlRegexp: RegexpUni,
|
|
707
|
-
method: OptString
|
|
708
|
-
});
|
|
709
|
-
var WaitResponse = v.object({
|
|
710
|
-
mode: v.literal("response"),
|
|
711
|
-
urlRegexp: v.optional(RegexpUni),
|
|
712
|
-
// method: OptString,
|
|
713
|
-
status: OptNumber
|
|
714
|
-
});
|
|
715
|
-
var WaitNetworkIdle = v.object({
|
|
716
|
-
mode: v.literal("networkIdle"),
|
|
717
|
-
idleTime: OptNumber,
|
|
718
|
-
concurrency: OptNumber
|
|
719
|
-
});
|
|
720
|
-
var WaitNavigation = v.object({
|
|
721
|
-
mode: v.literal("navigation")
|
|
722
|
-
});
|
|
723
|
-
var WaitA = v.object({
|
|
724
|
-
type: v.literal("wait"),
|
|
725
|
-
config: v.variant("mode", [WaitSelector, WaitRequest, WaitResponse, WaitNetworkIdle, WaitNavigation])
|
|
726
|
-
});
|
|
727
|
-
var ClickA = v.object({
|
|
728
|
-
type: v.literal("click"),
|
|
729
|
-
selector: Selector,
|
|
730
|
-
offset: v.optional(v.object({ x: v.number(), y: v.number() })),
|
|
731
|
-
delay: OptNumber,
|
|
732
|
-
count: OptNumber
|
|
733
|
-
});
|
|
734
|
-
var TypeA = v.object({
|
|
735
|
-
type: v.literal("type"),
|
|
736
|
-
selector: Selector,
|
|
737
|
-
text: Value,
|
|
738
|
-
delay: OptNumber
|
|
739
|
-
});
|
|
740
|
-
var KeyPress = v.object({
|
|
741
|
-
type: v.literal("keypress"),
|
|
742
|
-
key: KEYLIST,
|
|
743
|
-
delay: OptNumber
|
|
744
|
-
});
|
|
745
|
-
var SelectEl = v.object({
|
|
746
|
-
type: v.literal("selector"),
|
|
747
|
-
// ...SelectorCommon.entries,
|
|
748
|
-
selector: Selector,
|
|
749
|
-
output: OutputP,
|
|
750
|
-
multi: v.optional(v.boolean(), false)
|
|
751
|
-
});
|
|
752
|
-
var FindData = v.object({
|
|
753
|
-
type: v.literal("findData"),
|
|
754
|
-
input: v.string(),
|
|
755
|
-
output: OutputP,
|
|
756
|
-
kind: v.picklist(["property"]),
|
|
757
|
-
key: v.optional(v.string())
|
|
758
|
-
// multi: v.optional(v.boolean(), true),
|
|
759
|
-
});
|
|
760
|
-
var GetContent = v.object({
|
|
761
|
-
type: v.literal("getContent"),
|
|
762
|
-
format: v.optional(v.picklist(["html", "text", "markdown"]), "html"),
|
|
763
|
-
cleanContent: OptBoolean,
|
|
764
|
-
output: OutputP
|
|
765
|
-
});
|
|
766
|
-
var CloseA = v.object({
|
|
767
|
-
type: v.literal("close")
|
|
768
|
-
});
|
|
769
|
-
var PluginA = v.object({
|
|
770
|
-
type: v.literal("custom"),
|
|
771
|
-
config: v.optional(v.looseObject({ type: v.string() })),
|
|
772
|
-
fn: v.optional(v.custom(Boolean))
|
|
773
|
-
});
|
|
774
|
-
var ReadVariable = v.object({
|
|
775
|
-
type: v.literal("read-variable"),
|
|
776
|
-
input: v.string()
|
|
777
|
-
});
|
|
778
|
-
var PageA = v.object({
|
|
779
|
-
type: v.literal("page"),
|
|
780
|
-
input: v.string(),
|
|
781
|
-
output: OutputP,
|
|
782
|
-
concurrency: v.optional(v.number(), 2),
|
|
783
|
-
throwError: v.optional(v.boolean(), false),
|
|
784
|
-
actions: v.lazy(() => v.array(ActionDefine))
|
|
785
|
-
});
|
|
786
|
-
var TypeList = [
|
|
787
|
-
...[
|
|
788
|
-
GoToA,
|
|
789
|
-
SetViewportA,
|
|
790
|
-
SetUserAgentA,
|
|
791
|
-
WaitA,
|
|
792
|
-
ClickA,
|
|
793
|
-
TypeA,
|
|
794
|
-
KeyPress,
|
|
795
|
-
SelectEl,
|
|
796
|
-
FindData,
|
|
797
|
-
GetContent,
|
|
798
|
-
// PageA,
|
|
799
|
-
CloseA,
|
|
800
|
-
PluginA,
|
|
801
|
-
ReadVariable
|
|
802
|
-
].map((item) => item.entries.type.literal),
|
|
803
|
-
"page"
|
|
804
|
-
];
|
|
805
|
-
var ActionDefine = v.fallback(
|
|
806
|
-
v.union([
|
|
807
|
-
GoToA,
|
|
808
|
-
SetViewportA,
|
|
809
|
-
SetUserAgentA,
|
|
810
|
-
WaitA,
|
|
811
|
-
ClickA,
|
|
812
|
-
TypeA,
|
|
813
|
-
KeyPress,
|
|
814
|
-
SelectEl,
|
|
815
|
-
FindData,
|
|
816
|
-
GetContent,
|
|
817
|
-
PageA,
|
|
818
|
-
CloseA,
|
|
819
|
-
PluginA,
|
|
820
|
-
ReadVariable
|
|
821
|
-
]),
|
|
822
|
-
(item) => {
|
|
823
|
-
if (TypeList.includes((item?.value).type)) {
|
|
824
|
-
throw new Error(JSON.stringify(item?.issues));
|
|
825
|
-
}
|
|
826
|
-
return { type: "custom", config: item?.value };
|
|
827
|
-
}
|
|
828
|
-
);
|
|
829
|
-
var ActionListDefine = v.array(ActionDefine);
|
|
830
|
-
var GlobalConfig = v.object({
|
|
831
|
-
maxTimeout: OptNumber,
|
|
832
|
-
actionTimeout: OptNumber
|
|
833
|
-
});
|
|
834
|
-
|
|
835
|
-
// src/init.ts
|
|
836
|
-
import * as v2 from "valibot";
|
|
837
|
-
import { Browser as BV, computeExecutablePath as computeExecutablePath2 } from "@puppeteer/browsers";
|
|
838
|
-
import * as fs from "fs";
|
|
839
|
-
|
|
840
|
-
// src/download.ts
|
|
841
|
-
import { Browser, computeExecutablePath, install } from "@puppeteer/browsers";
|
|
842
|
-
async function download(options) {
|
|
843
|
-
let result = await install({
|
|
844
|
-
browser: Browser.CHROME,
|
|
845
|
-
baseUrl: "https://cdn.npmmirror.com/binaries/chrome-for-testing",
|
|
846
|
-
...options,
|
|
847
|
-
unpack: true
|
|
848
|
-
});
|
|
849
|
-
}
|
|
850
|
-
function getExecutablePath(dir, buildId) {
|
|
851
|
-
return computeExecutablePath({ cacheDir: dir, browser: Browser.CHROME, buildId });
|
|
852
|
-
}
|
|
853
|
-
|
|
854
|
-
// src/init.ts
|
|
855
|
-
import { PUPPETEER_REVISIONS } from "puppeteer-core/internal/revisions.js";
|
|
856
|
-
async function init(options) {
|
|
857
|
-
return WebBrowser.init(options);
|
|
858
|
-
}
|
|
859
|
-
var CHROME_VERSION = PUPPETEER_REVISIONS.chrome;
|
|
860
|
-
var WebBrowser = class _WebBrowser {
|
|
861
|
-
browser;
|
|
862
|
-
static async init(options) {
|
|
863
|
-
let bvType = BV.CHROME;
|
|
864
|
-
let executablePath = computeExecutablePath2({ cacheDir: options.cacheDir, browser: bvType, buildId: CHROME_VERSION });
|
|
865
|
-
if (!fs.existsSync(executablePath)) {
|
|
866
|
-
console.log("准备下载");
|
|
867
|
-
await download({ cacheDir: options.cacheDir, buildId: CHROME_VERSION, browser: bvType });
|
|
868
|
-
}
|
|
869
|
-
const browser = await puppeteer.launch({ ...options, executablePath });
|
|
870
|
-
return new _WebBrowser(browser);
|
|
871
|
-
}
|
|
872
|
-
constructor(browser) {
|
|
873
|
-
this.browser = browser;
|
|
874
|
-
}
|
|
875
|
-
#config;
|
|
876
|
-
#pluginMap = /* @__PURE__ */ new Map();
|
|
877
|
-
setConfig(config) {
|
|
878
|
-
this.#config = config;
|
|
879
|
-
}
|
|
880
|
-
getConfig() {
|
|
881
|
-
return this.#config;
|
|
882
|
-
}
|
|
883
|
-
registerCustom(type, fn) {
|
|
884
|
-
this.#pluginMap.set(type, fn);
|
|
885
|
-
}
|
|
886
|
-
clearCustom() {
|
|
887
|
-
this.#pluginMap.clear();
|
|
888
|
-
}
|
|
889
|
-
getCustom(key) {
|
|
890
|
-
return this.#pluginMap.get(key);
|
|
891
|
-
}
|
|
892
|
-
async openPage(fn, parent) {
|
|
893
|
-
let page = new WebPage(await this.browser.newPage(), this, parent);
|
|
894
|
-
if (this.#config?.maxTimeout) {
|
|
895
|
-
page.setMaxTimeout(this.#config.maxTimeout);
|
|
896
|
-
}
|
|
897
|
-
return fn(page);
|
|
898
|
-
}
|
|
899
|
-
runQueue(list, input) {
|
|
900
|
-
let result = v2.safeParse(ActionListDefine, list);
|
|
901
|
-
if (!result.success) {
|
|
902
|
-
throw new Error(`解析配置错误
|
|
903
|
-
${JSON.stringify(result.issues)}`);
|
|
904
|
-
}
|
|
905
|
-
return this.openPage(async (page) => {
|
|
906
|
-
if (input) {
|
|
907
|
-
for (const key in input) {
|
|
908
|
-
page.setVariable(key, input[key]);
|
|
909
|
-
}
|
|
910
|
-
}
|
|
911
|
-
return page.exeQueue(result.output);
|
|
912
|
-
});
|
|
913
|
-
}
|
|
914
|
-
};
|
|
915
|
-
export {
|
|
916
|
-
ActionDefine,
|
|
917
|
-
ActionListDefine,
|
|
918
|
-
GlobalConfig,
|
|
919
|
-
WebBrowser,
|
|
920
|
-
WebPage,
|
|
921
|
-
download,
|
|
922
|
-
format,
|
|
923
|
-
getExecutablePath,
|
|
924
|
-
init
|
|
925
|
-
};
|
|
1
|
+
import ye from"puppeteer-core";import{ElementHandle as z}from"puppeteer-core";import{load as v}from"cheerio";import d from"turndown";import{Readability as k}from"@mozilla/readability";import{JSDOM as x}from"jsdom";function A(n,r){if(r.cleanContent){let t=new x(n),i=new k(t.window.document).parse();if(i)switch(r.format){case"html":return i.content;case"text":return i.textContent;case"markdown":{var o=new d;return o.turndown(i.content)}default:throw""}let a=v(n,void 0,!0),s=a("body");if(s.find("script,style,iframe,footer,br,hr,svg,header,img").remove(),s.find("*").removeAttr("class"),s.find("*").removeAttr("style"),a("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),a("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((p,u)=>u.trim()),r.format==="html")return s.html();if(r.format==="text")return s.text();if(r.format==="markdown"){var o=new d;return o.turndown(s.html())}}else{if(r.format==="html")return n;if(r.format==="text")return v(n,void 0,!0)("body").text();if(r.format==="markdown"){var o=new d;return o.turndown(n)}}}function C(n){let r=new x(n);return new k(r.window.document,{charThreshold:100}).parse()}import{promise as X}from"fastq";var g=class{page;browser;parent;#e={};constructor(r,o,t){this.page=r,this.browser=o,this.parent=t}ab;timeoutId;setMaxTimeout(r){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},r)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(r,o){this.#e[r]=o}getVariable(r){return this.#e[r]}#r(r,o){let t,i=!1;for(let a=0;a<o.length;a++){let s=o[a];if(s===".."){if(!r.parent)throw new Error("未找到父级");r=r.parent}else if(i){if(!t||typeof t!="object")throw new Error(`${o}路径下未找到值`);t=t[s]}else t=r.#e[s],i=!0}return t}#o(r){if(typeof r=="string")return r;if(r.source==="variable")return typeof r.key=="string"?this.#e[r.key]:this.#r(this,r.key)}#t(r,o){if(o)if(typeof o=="string")this.#e[o]=r;else switch(o.method){case"push":{if(!Array.isArray(this.#e[o.key]))throw new Error(`${o.key}不是数组类型`);this.#e[o.key]||=[],this.#e[o.key].push(r);break}case"flat-push":{if(!Array.isArray(this.#e[o.key]))throw new Error(`${o.key}不是数组类型`);if(!Array.isArray(r))throw new Error(`${JSON.stringify(r)}不是数组类型`);this.#e[o.key]||=[],this.#e[o.key].push(...r);break}case"define":{this.#e[o.key]=r;break}case"merge":{if(typeof this.#e[o.key]!="object")throw new Error(`${o.key}不是对象类型`);this.#e[o.key]||={},this.#e[o.key]={...this.#e[o.key],...r};break}default:break}}async exeQueue(r){let o;for(let t of r)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,this.#o(t.text),{delay:t.delay});break}case"goto":{o=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{o=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{o=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let i=t.config;o=await this.page.waitForRequest(async a=>!(i.urlRegexp&&!i.urlRegexp.test(a.url())||i.method&&i.method!==a.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let i=t.config;o=await this.page.waitForResponse(async a=>!(i.urlRegexp&&!i.urlRegexp.test(a.url())||i.status&&i.status!==a.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{o=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{o=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"waitBodyElements":{let i=t.config.threshold||100;o=await this.page.waitForFunction(a=>document.body.querySelectorAll("*").length>=a,{timeout:this.browser.getConfig()?.actionTimeout,signal:this.ab?.signal},i);break}}break}case"selector":{t.multi?this.#t(o=await this.page.$$(t.selector),t.output):this.#t(o=await this.page.$(t.selector),t.output);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let i=this.#e[t.input];Array.isArray(i)?t.kind==="property"&&this.#t(o=await Promise.all(i.map(a=>a.getProperty(t.key).then(s=>s.jsonValue()))),t.output):i instanceof z&&this.#t(o=i.getProperty(t.key).then(a=>a.jsonValue()),t.output);break}case"getContent":{let i=await this.page.content();this.#t(o=A(i,{cleanContent:t.cleanContent,format:t.format}),t.output);break}case"rawContent":{let i=await this.page.content();this.#t(o=i,t.output);break}case"page":{let i=this.#e[t.input],a=Array.isArray(i)?i:[i],s=X(async l=>{console.log("准备执行",l);try{let y=await this.browser.openPage(async f=>(f.setVariable("$item",a[l.index]),f.setVariable("$index",l.index),f.setVariable("$first",l.index===0),f.setVariable("$last",l.index===a.length-1),{result:await f.exeQueue(t.actions),page:f}),this);u.push(y)}catch(y){if(t.throwError)throw y;u.push(void 0)}},t.concurrency),p;s.error(l=>{l&&(p=l)});let u=[];for(let l=0;l<a.length;l++)s.push({index:l});if(await s.drained(),t.throwError&&p)throw p;o=u,this.#t(o,t.output);break}case"setUserAgent":{await this.page.setUserAgent({userAgent:t.userAgent});break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")o=await t.fn(this);else{let i=this.browser.getCustom(t.config.type);if(!i)throw new Error(`自定义[${t.config.type}]未实现处理`);o=await i(t.config,this)}break}case"evaluate":{this.#t(o=await this.page.evaluate(t.fn,...t.args??[]),t.output);break}case"read-variable":{o=this.#e[t.input];break}default:break}return o}dispose(){return this.page.close()}};import*as e from"valibot";var ee=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
|
|
2
|
+
`,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),b=e.optional(e.boolean()),te=e.optional(e.string());var re=c,oe=e.pipe(e.string(),e.transform(n=>new RegExp(n))),ie=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([n,r])=>new RegExp(n,r))),E=e.union([oe,ie]),h=e.string(),P=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),m=e.optional(e.union([e.string(),e.object({key:e.string(),method:e.picklist(["push","flat-push","define","merge"])})])),T=e.object({timeout:re,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:P,type:e.literal("goto")}),I=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),O=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),ne=e.object({selector:h,visible:b,hidden:b}),ae=e.object({mode:e.literal("selector"),...ne.entries}),se=e.object({mode:e.literal("request"),urlRegexp:E,method:te}),le=e.object({mode:e.literal("response"),urlRegexp:e.optional(E),status:c}),ce=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),ue=e.object({mode:e.literal("waitBodyElements"),threshold:c}),pe=e.object({mode:e.literal("navigation")}),D=e.object({type:e.literal("wait"),config:e.variant("mode",[ae,se,le,ce,pe,ue])}),S=e.object({type:e.literal("click"),selector:h,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),R=e.object({type:e.literal("type"),selector:h,text:P,delay:c}),F=e.object({type:e.literal("keypress"),key:ee,delay:c}),V=e.object({type:e.literal("selector"),selector:h,output:m,multi:e.optional(e.boolean(),!1)}),j=e.object({type:e.literal("findData"),input:e.string(),output:m,kind:e.picklist(["property"]),key:e.optional(e.string())}),N=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text","markdown"]),"html"),cleanContent:b,output:m}),L=e.object({type:e.literal("rawContent"),output:m}),M=e.object({type:e.literal("evaluate"),fn:e.custom(n=>typeof n=="function"),args:e.optional(e.array(e.any())),output:m}),K=e.object({type:e.literal("close")}),U=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),B=e.object({type:e.literal("read-variable"),input:e.string()}),fe=e.object({type:e.literal("page"),input:e.string(),output:m,concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(W))}),me=[...[T,I,O,D,S,R,F,V,j,N,L,K,U,B,M].map(n=>n.entries.type.literal),"page"],W=e.fallback(e.union([T,I,O,D,S,R,F,V,j,N,L,fe,K,U,B,M]),n=>{if(me.includes((n?.value).type))throw new Error(JSON.stringify(n?.issues));return{type:"custom",config:n?.value}}),$=e.array(W),Se=e.object({maxTimeout:c,actionTimeout:c});import*as J from"valibot";import{Browser as de,computeExecutablePath as be}from"@puppeteer/browsers";import*as H from"fs";import{Browser as G,computeExecutablePath as ge,install as he}from"@puppeteer/browsers";async function q(n){let r=await he({browser:G.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...n,unpack:!0})}function je(n,r){return ge({cacheDir:n,browser:G.CHROME,buildId:r})}import{PUPPETEER_REVISIONS as we}from"puppeteer-core/internal/revisions.js";async function _(n){return w.init(n)}var Q=we.chrome,w=class n{browser;static async init(r){let o=de.CHROME,t=be({cacheDir:r.cacheDir,browser:o,buildId:Q});H.existsSync(t)||(console.log("准备下载"),await q({cacheDir:r.cacheDir,buildId:Q,browser:o}));let i=await ye.launch({...r,executablePath:t});return new n(i)}constructor(r){this.browser=r}#e;#r=new Map;setConfig(r){this.#e=r}getConfig(){return this.#e}registerCustom(r,o){this.#r.set(r,o)}clearCustom(){this.#r.clear()}getCustom(r){return this.#r.get(r)}async openPage(r,o){let t=new g(await this.browser.newPage(),this,o);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),r(t)}runQueue(r,o){let t=J.safeParse($,r);if(!t.success)throw new Error(`解析配置错误
|
|
3
|
+
${JSON.stringify(t.issues)}`);return this.openPage(async i=>{if(o)for(let a in o)i.setVariable(a,o[a]);return{result:await i.exeQueue(t.output),page:i}})}};import{load as ve}from"cheerio";function Y(n,r){let o=ve(r,{baseURI:n});return o("a").map((i,a)=>o(a).attr("href")?o(a).prop("href"):"").get().filter(Boolean)}var Z=class{config;browser;dataMap=new Map;#e=new Set;constructor(r){this.config=r}async start(){return this.browser=await _({cacheDir:process.cwd(),headless:!1}),this.browser.setConfig({maxTimeout:12e4,actionTimeout:12e4}),await this.searchWebOne(this.config.url,void 0),await this.browser.browser.close(),this.dataMap}async searchWebOne(r,o){let t=await this.browser.runQueue([{type:"setViewport",width:1920,height:1080},{type:"goto",url:r,waitUntil:"networkidle0"},{type:"evaluate",output:"baseURI",fn:()=>window.location.origin},{type:"evaluate",output:"href",fn:()=>window.location.href},{type:"rawContent",output:"data"}]);console.log("解析完成",r);let i=t.page.getVariable("href"),a=t.page.getVariable("baseURI"),s=t.page.getVariable("data"),p=C(s);if(this.#e.add(r),this.#e.add(i),p)this.dataMap.set(r,{requestUrl:r,parsedUrl:i,parent:o?.from,metadata:p,raw:s}),await t.page.dispose();else{this.dataMap.set(r,{requestUrl:r,parsedUrl:i,parent:o?.from,metadata:void 0,raw:s}),await t.page.dispose();return}let u=Y(a,s);u=u.filter(this.config.filterLink);for(let l of u){if(this.#e.has(l)){console.log("已索引,跳过",l);continue}await this.searchWebOne(l,{from:r})}}};export{W as ActionDefine,$ as ActionListDefine,Z as FullWebRequest,Se as GlobalConfig,w as WebBrowser,g as WebPage,q as download,A as format,C as formatDoc,je as getExecutablePath,_ as init};
|
package/init.d.ts
CHANGED
|
@@ -20,6 +20,9 @@ export declare class WebBrowser {
|
|
|
20
20
|
clearCustom(): void;
|
|
21
21
|
getCustom(key: string): PluginFn | undefined;
|
|
22
22
|
openPage<T>(fn: (page: WebPage) => Promise<T>, parent?: WebPage): Promise<T>;
|
|
23
|
-
runQueue(list: QueueList, input?: Record<string, any>): Promise<
|
|
23
|
+
runQueue(list: QueueList, input?: Record<string, any>): Promise<{
|
|
24
|
+
result: any;
|
|
25
|
+
page: WebPage;
|
|
26
|
+
}>;
|
|
24
27
|
}
|
|
25
28
|
export {};
|
package/package.json
CHANGED
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cyia/crawl",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.13",
|
|
4
4
|
"author": "wszgrcy",
|
|
5
5
|
"description": "",
|
|
6
6
|
"dependencies": {
|
|
7
|
-
"cheerio": "1.0.0",
|
|
8
|
-
"fastq": "1.19.1",
|
|
9
|
-
"html-entities": "2.6.0",
|
|
10
|
-
"puppeteer-core": "24.6.0",
|
|
11
|
-
"valibot": "1.0.0",
|
|
12
|
-
"turndown": "^7.2.0",
|
|
13
7
|
"@mozilla/readability": "^0.6.0",
|
|
14
|
-
"
|
|
8
|
+
"cheerio": "^1.2.0",
|
|
9
|
+
"fastq": "^1.20.1",
|
|
10
|
+
"html-entities": "^2.6.0",
|
|
11
|
+
"htmlparser2": "^10.1.0",
|
|
12
|
+
"jsdom": "^27.4.0",
|
|
13
|
+
"puppeteer-core": "^24.36.0",
|
|
14
|
+
"turndown": "^7.2.2",
|
|
15
|
+
"valibot": "^1.2.0"
|
|
15
16
|
},
|
|
16
17
|
"exports": {
|
|
17
18
|
".": {
|
|
18
19
|
"types": "./index.d.ts",
|
|
19
|
-
"default": "./index.mjs"
|
|
20
|
+
"default": "./index.mjs",
|
|
21
|
+
"node": "./index.mjs"
|
|
20
22
|
}
|
|
21
23
|
},
|
|
22
24
|
"publishConfig": {
|
package/page.d.ts
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function getPageLinks(baseURI: string, content: string): string[];
|