@cyia/crawl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/define.d.ts +394 -0
- package/download.d.ts +3 -0
- package/format.d.ts +1 -0
- package/index.d.ts +4 -0
- package/index.mjs +3 -0
- package/init.d.ts +25 -0
- package/package.json +23 -0
- package/page.d.ts +18 -0
package/define.d.ts
ADDED
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
import * as v from 'valibot';
|
|
2
|
+
import { WebPage } from './page';
|
|
3
|
+
declare const Value: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
4
|
+
readonly source: v.LiteralSchema<"variable", undefined>;
|
|
5
|
+
readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
|
|
6
|
+
}, undefined>], undefined>;
|
|
7
|
+
export type ValueType = v.InferOutput<typeof Value>;
|
|
8
|
+
export declare const ActionDefine: v.SchemaWithFallback<v.UnionSchema<[v.ObjectSchema<{
|
|
9
|
+
readonly timeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
10
|
+
readonly waitUntil: v.OptionalSchema<v.PicklistSchema<["load", "domcontentloaded", "networkidle0", "networkidle2"], undefined>, "networkidle2">;
|
|
11
|
+
readonly url: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
12
|
+
readonly source: v.LiteralSchema<"variable", undefined>;
|
|
13
|
+
readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
|
|
14
|
+
}, undefined>], undefined>;
|
|
15
|
+
readonly type: v.LiteralSchema<"goto", undefined>;
|
|
16
|
+
}, undefined>, v.ObjectSchema<{
|
|
17
|
+
readonly width: v.OptionalSchema<v.NumberSchema<undefined>, 1920>;
|
|
18
|
+
readonly height: v.OptionalSchema<v.NumberSchema<undefined>, 1080>;
|
|
19
|
+
readonly isMobile: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
20
|
+
readonly isLandscape: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
21
|
+
readonly type: v.LiteralSchema<"setViewport", undefined>;
|
|
22
|
+
}, undefined>, v.ObjectSchema<{
|
|
23
|
+
readonly userAgent: v.StringSchema<undefined>;
|
|
24
|
+
readonly type: v.LiteralSchema<"setUserAgent", undefined>;
|
|
25
|
+
}, undefined>, v.ObjectSchema<{
|
|
26
|
+
readonly type: v.LiteralSchema<"wait", undefined>;
|
|
27
|
+
readonly config: v.VariantSchema<"mode", [v.ObjectSchema<{
|
|
28
|
+
readonly selector: v.StringSchema<undefined>;
|
|
29
|
+
readonly visible: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
30
|
+
readonly hidden: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
31
|
+
readonly mode: v.LiteralSchema<"selector", undefined>;
|
|
32
|
+
}, undefined>, v.ObjectSchema<{
|
|
33
|
+
readonly mode: v.LiteralSchema<"request", undefined>;
|
|
34
|
+
readonly urlRegexp: v.UnionSchema<[v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TransformAction<string, RegExp>]>, v.SchemaWithPipe<readonly [v.TupleSchema<[v.StringSchema<undefined>, v.SchemaWithPipe<readonly [v.StringSchema<undefined>]>], undefined>, v.TransformAction<[string, string], RegExp>]>], undefined>;
|
|
35
|
+
readonly method: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
36
|
+
}, undefined>, v.ObjectSchema<{
|
|
37
|
+
readonly mode: v.LiteralSchema<"response", undefined>;
|
|
38
|
+
readonly urlRegexp: v.OptionalSchema<v.UnionSchema<[v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TransformAction<string, RegExp>]>, v.SchemaWithPipe<readonly [v.TupleSchema<[v.StringSchema<undefined>, v.SchemaWithPipe<readonly [v.StringSchema<undefined>]>], undefined>, v.TransformAction<[string, string], RegExp>]>], undefined>, undefined>;
|
|
39
|
+
readonly status: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
40
|
+
}, undefined>, v.ObjectSchema<{
|
|
41
|
+
readonly mode: v.LiteralSchema<"networkIdle", undefined>;
|
|
42
|
+
readonly idleTime: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
43
|
+
readonly concurrency: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
44
|
+
}, undefined>, v.ObjectSchema<{
|
|
45
|
+
readonly mode: v.LiteralSchema<"navigation", undefined>;
|
|
46
|
+
}, undefined>], undefined>;
|
|
47
|
+
}, undefined>, v.ObjectSchema<{
|
|
48
|
+
readonly type: v.LiteralSchema<"click", undefined>;
|
|
49
|
+
readonly selector: v.StringSchema<undefined>;
|
|
50
|
+
readonly offset: v.OptionalSchema<v.ObjectSchema<{
|
|
51
|
+
readonly x: v.NumberSchema<undefined>;
|
|
52
|
+
readonly y: v.NumberSchema<undefined>;
|
|
53
|
+
}, undefined>, undefined>;
|
|
54
|
+
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
55
|
+
readonly count: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
56
|
+
}, undefined>, v.ObjectSchema<{
|
|
57
|
+
readonly type: v.LiteralSchema<"type", undefined>;
|
|
58
|
+
readonly selector: v.StringSchema<undefined>;
|
|
59
|
+
readonly text: v.StringSchema<undefined>;
|
|
60
|
+
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
61
|
+
}, undefined>, v.ObjectSchema<{
|
|
62
|
+
readonly type: v.LiteralSchema<"keypress", undefined>;
|
|
63
|
+
readonly key: v.PicklistSchema<["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "Power", "Eject", "Abort", "Help", "Backspace", "Tab", "Numpad5", "NumpadEnter", "Enter", "\r", "\n", "ShiftLeft", "ShiftRight", "ControlLeft", "ControlRight", "AltLeft", "AltRight", "Pause", "CapsLock", "Escape", "Convert", "NonConvert", "Space", "Numpad9", "PageUp", "Numpad3", "PageDown", "End", "Numpad1", "Home", "Numpad7", "ArrowLeft", "Numpad4", "Numpad8", "ArrowUp", "ArrowRight", "Numpad6", "Numpad2", "ArrowDown", "Select", "Open", "PrintScreen", "Insert", "Numpad0", "Delete", "NumpadDecimal", "Digit0", "Digit1", "Digit2", "Digit3", "Digit4", "Digit5", "Digit6", "Digit7", "Digit8", "Digit9", "KeyA", "KeyB", "KeyC", "KeyD", "KeyE", "KeyF", "KeyG", "KeyH", "KeyI", "KeyJ", "KeyK", "KeyL", "KeyM", "KeyN", "KeyO", "KeyP", "KeyQ", "KeyR", "KeyS", "KeyT", "KeyU", "KeyV", "KeyW", "KeyX", "KeyY", "KeyZ", "MetaLeft", "MetaRight", "ContextMenu", "NumpadMultiply", "NumpadAdd", "NumpadSubtract", "NumpadDivide", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "NumLock", "ScrollLock", "AudioVolumeMute", "AudioVolumeDown", "AudioVolumeUp", "MediaTrackNext", "MediaTrackPrevious", "MediaStop", "MediaPlayPause", "Semicolon", "Equal", "NumpadEqual", "Comma", "Minus", "Period", "Slash", "Backquote", "BracketLeft", "Backslash", "BracketRight", "Quote", "AltGraph", "Props", "Cancel", "Clear", "Shift", "Control", "Alt", "Accept", "ModeChange", " ", "Print", "Execute", "\0", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "Meta", "*", "+", "-", "/", ";", "=", ",", ".", "`", "[", "\\", "]", "'", "Attn", "CrSel", "ExSel", "EraseEof", "Play", "ZoomOut", ")", "!", "@", "#", "$", "%", "^", "&", "(", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", ":", "<", "_", ">", "?", "~", "{", ",", "}", "\"", "SoftLeft", "SoftRight", "Camera", "Call", "EndCall", "VolumeDown", "VolumeUp"], undefined>;
|
|
64
|
+
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
65
|
+
}, undefined>, v.ObjectSchema<{
|
|
66
|
+
readonly type: v.LiteralSchema<"selector", undefined>;
|
|
67
|
+
readonly selector: v.StringSchema<undefined>;
|
|
68
|
+
readonly output: v.StringSchema<undefined>;
|
|
69
|
+
readonly multi: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
|
|
70
|
+
}, undefined>, v.ObjectSchema<{
|
|
71
|
+
readonly type: v.LiteralSchema<"findData", undefined>;
|
|
72
|
+
readonly input: v.StringSchema<undefined>;
|
|
73
|
+
readonly output: v.StringSchema<undefined>;
|
|
74
|
+
readonly kind: v.PicklistSchema<["property"], undefined>;
|
|
75
|
+
readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
76
|
+
}, undefined>, v.ObjectSchema<{
|
|
77
|
+
readonly type: v.LiteralSchema<"getContent", undefined>;
|
|
78
|
+
readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text"], undefined>, "html">;
|
|
79
|
+
readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
80
|
+
readonly output: v.StringSchema<undefined>;
|
|
81
|
+
}, undefined>, v.GenericSchema<{
|
|
82
|
+
type: "page";
|
|
83
|
+
input: string;
|
|
84
|
+
actions: v.InferInput<ActionType>[];
|
|
85
|
+
concurrency?: number;
|
|
86
|
+
throwError?: boolean;
|
|
87
|
+
}, {
|
|
88
|
+
type: "page";
|
|
89
|
+
input: string;
|
|
90
|
+
actions: v.InferOutput<ActionType>[];
|
|
91
|
+
concurrency: number;
|
|
92
|
+
throwError: boolean;
|
|
93
|
+
}>, v.ObjectSchema<{
|
|
94
|
+
readonly type: v.LiteralSchema<"close", undefined>;
|
|
95
|
+
}, undefined>, v.ObjectSchema<{
|
|
96
|
+
readonly type: v.LiteralSchema<"custom", undefined>;
|
|
97
|
+
readonly config: v.OptionalSchema<v.LooseObjectSchema<{
|
|
98
|
+
readonly type: v.StringSchema<undefined>;
|
|
99
|
+
}, undefined>, undefined>;
|
|
100
|
+
readonly fn: v.OptionalSchema<v.CustomSchema<(input: WebPage) => Promise<any>, undefined>, undefined>;
|
|
101
|
+
}, undefined>], undefined>, (item: v.OutputDataset<{
|
|
102
|
+
timeout?: number | undefined;
|
|
103
|
+
waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
104
|
+
url: (string | {
|
|
105
|
+
source: "variable";
|
|
106
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
107
|
+
} | undefined) & (string | {
|
|
108
|
+
source: "variable";
|
|
109
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
110
|
+
});
|
|
111
|
+
type: "goto";
|
|
112
|
+
} | {
|
|
113
|
+
width: number;
|
|
114
|
+
height: number;
|
|
115
|
+
isMobile?: boolean | undefined;
|
|
116
|
+
isLandscape?: boolean | undefined;
|
|
117
|
+
type: "setViewport";
|
|
118
|
+
} | {
|
|
119
|
+
userAgent: string;
|
|
120
|
+
type: "setUserAgent";
|
|
121
|
+
} | {
|
|
122
|
+
type: "wait";
|
|
123
|
+
config: {
|
|
124
|
+
selector: string;
|
|
125
|
+
visible?: boolean | undefined;
|
|
126
|
+
hidden?: boolean | undefined;
|
|
127
|
+
mode: "selector";
|
|
128
|
+
} | {
|
|
129
|
+
mode: "request";
|
|
130
|
+
urlRegexp: RegExp;
|
|
131
|
+
method?: string | undefined;
|
|
132
|
+
} | {
|
|
133
|
+
mode: "response";
|
|
134
|
+
urlRegexp?: RegExp | undefined;
|
|
135
|
+
status?: number | undefined;
|
|
136
|
+
} | {
|
|
137
|
+
mode: "networkIdle";
|
|
138
|
+
idleTime?: number | undefined;
|
|
139
|
+
concurrency?: number | undefined;
|
|
140
|
+
} | {
|
|
141
|
+
mode: "navigation";
|
|
142
|
+
};
|
|
143
|
+
} | {
|
|
144
|
+
type: "click";
|
|
145
|
+
selector: string;
|
|
146
|
+
offset?: {
|
|
147
|
+
x: number;
|
|
148
|
+
y: number;
|
|
149
|
+
} | undefined;
|
|
150
|
+
delay?: number | undefined;
|
|
151
|
+
count?: number | undefined;
|
|
152
|
+
} | {
|
|
153
|
+
type: "type";
|
|
154
|
+
selector: string;
|
|
155
|
+
text: string;
|
|
156
|
+
delay?: number | undefined;
|
|
157
|
+
} | {
|
|
158
|
+
type: "keypress";
|
|
159
|
+
key: "." | "#" | ":" | ">" | "+" | "~" | "[" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "0" | "*" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "Power" | "Eject" | "Abort" | "Help" | "Backspace" | "Tab" | "Numpad5" | "NumpadEnter" | "Enter" | "\r" | "\n" | "ShiftLeft" | "ShiftRight" | "ControlLeft" | "ControlRight" | "AltLeft" | "AltRight" | "Pause" | "CapsLock" | "Escape" | "Convert" | "NonConvert" | "Space" | "Numpad9" | "PageUp" | "Numpad3" | "PageDown" | "End" | "Numpad1" | "Home" | "Numpad7" | "ArrowLeft" | "Numpad4" | "Numpad8" | "ArrowUp" | "ArrowRight" | "Numpad6" | "Numpad2" | "ArrowDown" | "Select" | "Open" | "PrintScreen" | "Insert" | "Numpad0" | "Delete" | "NumpadDecimal" | "Digit0" | "Digit1" | "Digit2" | "Digit3" | "Digit4" | "Digit5" | "Digit6" | "Digit7" | "Digit8" | "Digit9" | "KeyA" | "KeyB" | "KeyC" | "KeyD" | "KeyE" | "KeyF" | "KeyG" | "KeyH" | "KeyI" | "KeyJ" | "KeyK" | "KeyL" | "KeyM" | "KeyN" | "KeyO" | "KeyP" | "KeyQ" | "KeyR" | "KeyS" | "KeyT" | "KeyU" | "KeyV" | "KeyW" | "KeyX" | "KeyY" | "KeyZ" | "MetaLeft" | "MetaRight" | "ContextMenu" | "NumpadMultiply" | "NumpadAdd" | "NumpadSubtract" | "NumpadDivide" | "F1" | "F2" | "F3" | "F4" | "F5" | "F6" | "F7" | "F8" | "F9" | "F10" | "F11" | "F12" | "F13" | "F14" | "F15" | "F16" | "F17" | "F18" | "F19" | "F20" | "F21" | "F22" | "F23" | "F24" | "NumLock" | "ScrollLock" | "AudioVolumeMute" | "AudioVolumeDown" | "AudioVolumeUp" | "MediaTrackNext" | "MediaTrackPrevious" | "MediaStop" | "MediaPlayPause" | "Semicolon" | "Equal" | "NumpadEqual" | "Comma" | "Minus" | "Period" | "Slash" | "Backquote" | "BracketLeft" | "Backslash" | "BracketRight" | "Quote" | "AltGraph" | "Props" | "Cancel" | "Clear" | "Shift" | "Control" | "Alt" | "Accept" | "ModeChange" | " " | "Print" | "Execute" | "\0" | "Meta" | "-" | "/" | ";" | "=" | "," | "`" | "\\" | "]" | "'" | "Attn" | "CrSel" | "ExSel" | "EraseEof" | "Play" | "ZoomOut" | ")" | "!" | "@" | "$" | "%" | "^" | "&" | "(" | "<" | "_" | "?" | "{" | "}" | "\"" | "SoftLeft" | "SoftRight" | "Camera" | "Call" | "EndCall" | "VolumeDown" | "VolumeUp";
|
|
160
|
+
delay?: number | undefined;
|
|
161
|
+
} | {
|
|
162
|
+
type: "selector";
|
|
163
|
+
selector: string;
|
|
164
|
+
output: string;
|
|
165
|
+
multi: boolean;
|
|
166
|
+
} | {
|
|
167
|
+
type: "findData";
|
|
168
|
+
input: string;
|
|
169
|
+
output: string;
|
|
170
|
+
kind: "property";
|
|
171
|
+
key?: string | undefined;
|
|
172
|
+
} | {
|
|
173
|
+
type: "getContent";
|
|
174
|
+
format: "text" | "html";
|
|
175
|
+
cleanContent?: boolean | undefined;
|
|
176
|
+
output: string;
|
|
177
|
+
} | {
|
|
178
|
+
type: "page";
|
|
179
|
+
input: string;
|
|
180
|
+
actions: v.InferOutput<ActionType>[];
|
|
181
|
+
concurrency: number;
|
|
182
|
+
throwError: boolean;
|
|
183
|
+
} | {
|
|
184
|
+
type: "close";
|
|
185
|
+
} | {
|
|
186
|
+
type: "custom";
|
|
187
|
+
config?: ({
|
|
188
|
+
type: string;
|
|
189
|
+
} & {
|
|
190
|
+
[key: string]: unknown;
|
|
191
|
+
}) | undefined;
|
|
192
|
+
fn?: ((input: WebPage) => Promise<any>) | undefined;
|
|
193
|
+
}, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
|
|
194
|
+
type: "custom";
|
|
195
|
+
config: any;
|
|
196
|
+
}>;
|
|
197
|
+
export declare const ActionListDefine: v.ArraySchema<v.SchemaWithFallback<v.UnionSchema<[v.ObjectSchema<{
|
|
198
|
+
readonly timeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
199
|
+
readonly waitUntil: v.OptionalSchema<v.PicklistSchema<["load", "domcontentloaded", "networkidle0", "networkidle2"], undefined>, "networkidle2">;
|
|
200
|
+
readonly url: v.UnionSchema<[v.StringSchema<undefined>, v.ObjectSchema<{
|
|
201
|
+
readonly source: v.LiteralSchema<"variable", undefined>;
|
|
202
|
+
readonly key: v.UnionSchema<[v.StringSchema<undefined>, v.ArraySchema<v.StringSchema<undefined>, undefined>], undefined>;
|
|
203
|
+
}, undefined>], undefined>;
|
|
204
|
+
readonly type: v.LiteralSchema<"goto", undefined>;
|
|
205
|
+
}, undefined>, v.ObjectSchema<{
|
|
206
|
+
readonly width: v.OptionalSchema<v.NumberSchema<undefined>, 1920>;
|
|
207
|
+
readonly height: v.OptionalSchema<v.NumberSchema<undefined>, 1080>;
|
|
208
|
+
readonly isMobile: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
209
|
+
readonly isLandscape: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
210
|
+
readonly type: v.LiteralSchema<"setViewport", undefined>;
|
|
211
|
+
}, undefined>, v.ObjectSchema<{
|
|
212
|
+
readonly userAgent: v.StringSchema<undefined>;
|
|
213
|
+
readonly type: v.LiteralSchema<"setUserAgent", undefined>;
|
|
214
|
+
}, undefined>, v.ObjectSchema<{
|
|
215
|
+
readonly type: v.LiteralSchema<"wait", undefined>;
|
|
216
|
+
readonly config: v.VariantSchema<"mode", [v.ObjectSchema<{
|
|
217
|
+
readonly selector: v.StringSchema<undefined>;
|
|
218
|
+
readonly visible: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
219
|
+
readonly hidden: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
220
|
+
readonly mode: v.LiteralSchema<"selector", undefined>;
|
|
221
|
+
}, undefined>, v.ObjectSchema<{
|
|
222
|
+
readonly mode: v.LiteralSchema<"request", undefined>;
|
|
223
|
+
readonly urlRegexp: v.UnionSchema<[v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TransformAction<string, RegExp>]>, v.SchemaWithPipe<readonly [v.TupleSchema<[v.StringSchema<undefined>, v.SchemaWithPipe<readonly [v.StringSchema<undefined>]>], undefined>, v.TransformAction<[string, string], RegExp>]>], undefined>;
|
|
224
|
+
readonly method: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
225
|
+
}, undefined>, v.ObjectSchema<{
|
|
226
|
+
readonly mode: v.LiteralSchema<"response", undefined>;
|
|
227
|
+
readonly urlRegexp: v.OptionalSchema<v.UnionSchema<[v.SchemaWithPipe<readonly [v.StringSchema<undefined>, v.TransformAction<string, RegExp>]>, v.SchemaWithPipe<readonly [v.TupleSchema<[v.StringSchema<undefined>, v.SchemaWithPipe<readonly [v.StringSchema<undefined>]>], undefined>, v.TransformAction<[string, string], RegExp>]>], undefined>, undefined>;
|
|
228
|
+
readonly status: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
229
|
+
}, undefined>, v.ObjectSchema<{
|
|
230
|
+
readonly mode: v.LiteralSchema<"networkIdle", undefined>;
|
|
231
|
+
readonly idleTime: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
232
|
+
readonly concurrency: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
233
|
+
}, undefined>, v.ObjectSchema<{
|
|
234
|
+
readonly mode: v.LiteralSchema<"navigation", undefined>;
|
|
235
|
+
}, undefined>], undefined>;
|
|
236
|
+
}, undefined>, v.ObjectSchema<{
|
|
237
|
+
readonly type: v.LiteralSchema<"click", undefined>;
|
|
238
|
+
readonly selector: v.StringSchema<undefined>;
|
|
239
|
+
readonly offset: v.OptionalSchema<v.ObjectSchema<{
|
|
240
|
+
readonly x: v.NumberSchema<undefined>;
|
|
241
|
+
readonly y: v.NumberSchema<undefined>;
|
|
242
|
+
}, undefined>, undefined>;
|
|
243
|
+
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
244
|
+
readonly count: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
245
|
+
}, undefined>, v.ObjectSchema<{
|
|
246
|
+
readonly type: v.LiteralSchema<"type", undefined>;
|
|
247
|
+
readonly selector: v.StringSchema<undefined>;
|
|
248
|
+
readonly text: v.StringSchema<undefined>;
|
|
249
|
+
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
250
|
+
}, undefined>, v.ObjectSchema<{
|
|
251
|
+
readonly type: v.LiteralSchema<"keypress", undefined>;
|
|
252
|
+
readonly key: v.PicklistSchema<["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "Power", "Eject", "Abort", "Help", "Backspace", "Tab", "Numpad5", "NumpadEnter", "Enter", "\r", "\n", "ShiftLeft", "ShiftRight", "ControlLeft", "ControlRight", "AltLeft", "AltRight", "Pause", "CapsLock", "Escape", "Convert", "NonConvert", "Space", "Numpad9", "PageUp", "Numpad3", "PageDown", "End", "Numpad1", "Home", "Numpad7", "ArrowLeft", "Numpad4", "Numpad8", "ArrowUp", "ArrowRight", "Numpad6", "Numpad2", "ArrowDown", "Select", "Open", "PrintScreen", "Insert", "Numpad0", "Delete", "NumpadDecimal", "Digit0", "Digit1", "Digit2", "Digit3", "Digit4", "Digit5", "Digit6", "Digit7", "Digit8", "Digit9", "KeyA", "KeyB", "KeyC", "KeyD", "KeyE", "KeyF", "KeyG", "KeyH", "KeyI", "KeyJ", "KeyK", "KeyL", "KeyM", "KeyN", "KeyO", "KeyP", "KeyQ", "KeyR", "KeyS", "KeyT", "KeyU", "KeyV", "KeyW", "KeyX", "KeyY", "KeyZ", "MetaLeft", "MetaRight", "ContextMenu", "NumpadMultiply", "NumpadAdd", "NumpadSubtract", "NumpadDivide", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "F10", "F11", "F12", "F13", "F14", "F15", "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23", "F24", "NumLock", "ScrollLock", "AudioVolumeMute", "AudioVolumeDown", "AudioVolumeUp", "MediaTrackNext", "MediaTrackPrevious", "MediaStop", "MediaPlayPause", "Semicolon", "Equal", "NumpadEqual", "Comma", "Minus", "Period", "Slash", "Backquote", "BracketLeft", "Backslash", "BracketRight", "Quote", "AltGraph", "Props", "Cancel", "Clear", "Shift", "Control", "Alt", "Accept", "ModeChange", " ", "Print", "Execute", "\0", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "Meta", "*", "+", "-", "/", ";", "=", ",", ".", "`", "[", "\\", "]", "'", "Attn", "CrSel", "ExSel", "EraseEof", "Play", "ZoomOut", ")", "!", "@", "#", "$", "%", "^", "&", "(", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", ":", "<", "_", ">", "?", "~", "{", ",", "}", "\"", "SoftLeft", "SoftRight", "Camera", "Call", "EndCall", "VolumeDown", "VolumeUp"], undefined>;
|
|
253
|
+
readonly delay: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
254
|
+
}, undefined>, v.ObjectSchema<{
|
|
255
|
+
readonly type: v.LiteralSchema<"selector", undefined>;
|
|
256
|
+
readonly selector: v.StringSchema<undefined>;
|
|
257
|
+
readonly output: v.StringSchema<undefined>;
|
|
258
|
+
readonly multi: v.OptionalSchema<v.BooleanSchema<undefined>, false>;
|
|
259
|
+
}, undefined>, v.ObjectSchema<{
|
|
260
|
+
readonly type: v.LiteralSchema<"findData", undefined>;
|
|
261
|
+
readonly input: v.StringSchema<undefined>;
|
|
262
|
+
readonly output: v.StringSchema<undefined>;
|
|
263
|
+
readonly kind: v.PicklistSchema<["property"], undefined>;
|
|
264
|
+
readonly key: v.OptionalSchema<v.StringSchema<undefined>, undefined>;
|
|
265
|
+
}, undefined>, v.ObjectSchema<{
|
|
266
|
+
readonly type: v.LiteralSchema<"getContent", undefined>;
|
|
267
|
+
readonly format: v.OptionalSchema<v.PicklistSchema<["html", "text"], undefined>, "html">;
|
|
268
|
+
readonly cleanContent: v.OptionalSchema<v.BooleanSchema<undefined>, undefined>;
|
|
269
|
+
readonly output: v.StringSchema<undefined>;
|
|
270
|
+
}, undefined>, v.GenericSchema<{
|
|
271
|
+
type: "page";
|
|
272
|
+
input: string;
|
|
273
|
+
actions: v.InferInput<ActionType>[];
|
|
274
|
+
concurrency?: number;
|
|
275
|
+
throwError?: boolean;
|
|
276
|
+
}, {
|
|
277
|
+
type: "page";
|
|
278
|
+
input: string;
|
|
279
|
+
actions: v.InferOutput<ActionType>[];
|
|
280
|
+
concurrency: number;
|
|
281
|
+
throwError: boolean;
|
|
282
|
+
}>, v.ObjectSchema<{
|
|
283
|
+
readonly type: v.LiteralSchema<"close", undefined>;
|
|
284
|
+
}, undefined>, v.ObjectSchema<{
|
|
285
|
+
readonly type: v.LiteralSchema<"custom", undefined>;
|
|
286
|
+
readonly config: v.OptionalSchema<v.LooseObjectSchema<{
|
|
287
|
+
readonly type: v.StringSchema<undefined>;
|
|
288
|
+
}, undefined>, undefined>;
|
|
289
|
+
readonly fn: v.OptionalSchema<v.CustomSchema<(input: WebPage) => Promise<any>, undefined>, undefined>;
|
|
290
|
+
}, undefined>], undefined>, (item: v.OutputDataset<{
|
|
291
|
+
timeout?: number | undefined;
|
|
292
|
+
waitUntil: "load" | "domcontentloaded" | "networkidle0" | "networkidle2";
|
|
293
|
+
url: (string | {
|
|
294
|
+
source: "variable";
|
|
295
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
296
|
+
} | undefined) & (string | {
|
|
297
|
+
source: "variable";
|
|
298
|
+
key: (string | string[] | undefined) & (string | string[]);
|
|
299
|
+
});
|
|
300
|
+
type: "goto";
|
|
301
|
+
} | {
|
|
302
|
+
width: number;
|
|
303
|
+
height: number;
|
|
304
|
+
isMobile?: boolean | undefined;
|
|
305
|
+
isLandscape?: boolean | undefined;
|
|
306
|
+
type: "setViewport";
|
|
307
|
+
} | {
|
|
308
|
+
userAgent: string;
|
|
309
|
+
type: "setUserAgent";
|
|
310
|
+
} | {
|
|
311
|
+
type: "wait";
|
|
312
|
+
config: {
|
|
313
|
+
selector: string;
|
|
314
|
+
visible?: boolean | undefined;
|
|
315
|
+
hidden?: boolean | undefined;
|
|
316
|
+
mode: "selector";
|
|
317
|
+
} | {
|
|
318
|
+
mode: "request";
|
|
319
|
+
urlRegexp: RegExp;
|
|
320
|
+
method?: string | undefined;
|
|
321
|
+
} | {
|
|
322
|
+
mode: "response";
|
|
323
|
+
urlRegexp?: RegExp | undefined;
|
|
324
|
+
status?: number | undefined;
|
|
325
|
+
} | {
|
|
326
|
+
mode: "networkIdle";
|
|
327
|
+
idleTime?: number | undefined;
|
|
328
|
+
concurrency?: number | undefined;
|
|
329
|
+
} | {
|
|
330
|
+
mode: "navigation";
|
|
331
|
+
};
|
|
332
|
+
} | {
|
|
333
|
+
type: "click";
|
|
334
|
+
selector: string;
|
|
335
|
+
offset?: {
|
|
336
|
+
x: number;
|
|
337
|
+
y: number;
|
|
338
|
+
} | undefined;
|
|
339
|
+
delay?: number | undefined;
|
|
340
|
+
count?: number | undefined;
|
|
341
|
+
} | {
|
|
342
|
+
type: "type";
|
|
343
|
+
selector: string;
|
|
344
|
+
text: string;
|
|
345
|
+
delay?: number | undefined;
|
|
346
|
+
} | {
|
|
347
|
+
type: "keypress";
|
|
348
|
+
key: "." | "#" | ":" | ">" | "+" | "~" | "[" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "0" | "*" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "Power" | "Eject" | "Abort" | "Help" | "Backspace" | "Tab" | "Numpad5" | "NumpadEnter" | "Enter" | "\r" | "\n" | "ShiftLeft" | "ShiftRight" | "ControlLeft" | "ControlRight" | "AltLeft" | "AltRight" | "Pause" | "CapsLock" | "Escape" | "Convert" | "NonConvert" | "Space" | "Numpad9" | "PageUp" | "Numpad3" | "PageDown" | "End" | "Numpad1" | "Home" | "Numpad7" | "ArrowLeft" | "Numpad4" | "Numpad8" | "ArrowUp" | "ArrowRight" | "Numpad6" | "Numpad2" | "ArrowDown" | "Select" | "Open" | "PrintScreen" | "Insert" | "Numpad0" | "Delete" | "NumpadDecimal" | "Digit0" | "Digit1" | "Digit2" | "Digit3" | "Digit4" | "Digit5" | "Digit6" | "Digit7" | "Digit8" | "Digit9" | "KeyA" | "KeyB" | "KeyC" | "KeyD" | "KeyE" | "KeyF" | "KeyG" | "KeyH" | "KeyI" | "KeyJ" | "KeyK" | "KeyL" | "KeyM" | "KeyN" | "KeyO" | "KeyP" | "KeyQ" | "KeyR" | "KeyS" | "KeyT" | "KeyU" | "KeyV" | "KeyW" | "KeyX" | "KeyY" | "KeyZ" | "MetaLeft" | "MetaRight" | "ContextMenu" | "NumpadMultiply" | "NumpadAdd" | "NumpadSubtract" | "NumpadDivide" | "F1" | "F2" | "F3" | "F4" | "F5" | "F6" | "F7" | "F8" | "F9" | "F10" | "F11" | "F12" | "F13" | "F14" | "F15" | "F16" | "F17" | "F18" | "F19" | "F20" | "F21" | "F22" | "F23" | "F24" | "NumLock" | "ScrollLock" | "AudioVolumeMute" | "AudioVolumeDown" | "AudioVolumeUp" | "MediaTrackNext" | "MediaTrackPrevious" | "MediaStop" | "MediaPlayPause" | "Semicolon" | "Equal" | "NumpadEqual" | "Comma" | "Minus" | "Period" | "Slash" | "Backquote" | "BracketLeft" | "Backslash" | "BracketRight" | "Quote" | "AltGraph" | "Props" | "Cancel" | "Clear" | "Shift" | "Control" | "Alt" | "Accept" | "ModeChange" | " " | "Print" | "Execute" | "\0" | "Meta" | "-" | "/" | ";" | "=" | "," | "`" | "\\" | "]" | "'" | "Attn" | "CrSel" | "ExSel" | "EraseEof" | "Play" | "ZoomOut" | ")" | "!" | "@" | "$" | "%" | "^" | "&" | "(" | "<" | "_" | "?" | "{" | "}" | "\"" | "SoftLeft" | "SoftRight" | "Camera" | "Call" | "EndCall" | "VolumeDown" | "VolumeUp";
|
|
349
|
+
delay?: number | undefined;
|
|
350
|
+
} | {
|
|
351
|
+
type: "selector";
|
|
352
|
+
selector: string;
|
|
353
|
+
output: string;
|
|
354
|
+
multi: boolean;
|
|
355
|
+
} | {
|
|
356
|
+
type: "findData";
|
|
357
|
+
input: string;
|
|
358
|
+
output: string;
|
|
359
|
+
kind: "property";
|
|
360
|
+
key?: string | undefined;
|
|
361
|
+
} | {
|
|
362
|
+
type: "getContent";
|
|
363
|
+
format: "text" | "html";
|
|
364
|
+
cleanContent?: boolean | undefined;
|
|
365
|
+
output: string;
|
|
366
|
+
} | {
|
|
367
|
+
type: "page";
|
|
368
|
+
input: string;
|
|
369
|
+
actions: v.InferOutput<ActionType>[];
|
|
370
|
+
concurrency: number;
|
|
371
|
+
throwError: boolean;
|
|
372
|
+
} | {
|
|
373
|
+
type: "close";
|
|
374
|
+
} | {
|
|
375
|
+
type: "custom";
|
|
376
|
+
config?: ({
|
|
377
|
+
type: string;
|
|
378
|
+
} & {
|
|
379
|
+
[key: string]: unknown;
|
|
380
|
+
}) | undefined;
|
|
381
|
+
fn?: ((input: WebPage) => Promise<any>) | undefined;
|
|
382
|
+
}, v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue | v.UnionIssue<v.NumberIssue | v.BaseIssue<unknown> | v.BooleanIssue | v.StringIssue | v.TupleIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue> | v.UnionIssue<v.StringIssue | v.ObjectIssue | v.LiteralIssue | v.ArrayIssue | v.UnionIssue<v.StringIssue | v.ArrayIssue>> | v.PicklistIssue | v.UnionIssue<v.StringIssue | v.TupleIssue> | v.LooseObjectIssue | v.VariantIssue | v.CustomIssue>> | undefined) => {
|
|
383
|
+
type: "custom";
|
|
384
|
+
config: any;
|
|
385
|
+
}>, undefined>;
|
|
386
|
+
export type ActionType = typeof ActionDefine;
|
|
387
|
+
export type QueueList = v.InferInput<typeof ActionDefine>[];
|
|
388
|
+
export declare const GlobalConfig: v.ObjectSchema<{
|
|
389
|
+
readonly maxTimeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
390
|
+
readonly actionTimeout: v.OptionalSchema<v.NumberSchema<undefined>, undefined>;
|
|
391
|
+
}, undefined>;
|
|
392
|
+
export type GlobalConfigType = v.InferOutput<typeof GlobalConfig>;
|
|
393
|
+
export type GlobalConfigInputType = v.InferInput<typeof GlobalConfig>;
|
|
394
|
+
export {};
|
package/download.d.ts
ADDED
package/format.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function format(rawHtml: string, returnText?: boolean): string;
|
package/index.d.ts
ADDED
package/index.mjs
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import ie from"puppeteer-core";import{ElementHandle as M}from"puppeteer-core";import{load as L}from"cheerio";function b(a,o){let i=L(a,void 0,!0),t=i("body");return t.find("script,style,iframe,footer,br,hr,svg").remove(),t.find("*").removeAttr("class"),t.find("*").removeAttr("style"),i("*").contents().filter(function(){return this.type==="comment"||this.type==="text"&&!this.data.trim()}).remove(),i("*").contents().filter(function(){return this.type==="text"&&!!this.data.trim()}).text((r,n)=>n.trim()),o?t.text():t.html()}import{promise as U}from"fastq";var u=class{page;browser;parent;#e={};constructor(o,i,t){this.page=o,this.browser=i,this.parent=t}ab;timeoutId;setMaxTimeout(o){this.ab=new AbortController,this.timeoutId=setTimeout(()=>{this.ab.abort("timeout")},o)}clearTimeout(){clearTimeout(this.timeoutId)}setVariable(o,i){this.#e[o]=i}getVariable(o){return this.#e[o]}#t(o,i){let t,r=!1;for(let n=0;n<i.length;n++){let l=i[n];if(l===".."){if(!o.parent)throw new Error("未找到父级");o=o.parent}else if(r){if(!t||typeof t!="object")throw new Error(`${i}路径下未找到值`);t=t[l]}else t=o.#e[l],r=!0}return t}#o(o){if(typeof o=="string")return o;if(o.source==="variable")return typeof o.key=="string"?this.#e[o.key]:this.#t(this,o.key)}async exeQueue(o){let i;for(let t of o)switch(console.log("准备执行",t),t.type){case"click":await this.page.click(t.selector,{offset:t.offset,delay:t.delay,count:t.count});break;case"type":{await this.page.type(t.selector,t.text,{delay:t.delay});break}case"goto":{i=await this.page.goto(this.#o(t.url),{waitUntil:t.waitUntil,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"setViewport":{i=await this.page.setViewport({width:t.width,height:t.height,isMobile:t.isMobile,isLandscape:t.isLandscape});break}case"wait":{switch(t.config.mode){case"selector":{i=await this.page.waitForSelector(t.config.selector,{visible:t.config.visible,hidden:t.config.hidden,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"request":{let r=t.config;i=await this.page.waitForRequest(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.method&&r.method!==n.method()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"response":{let r=t.config;i=await this.page.waitForResponse(async n=>!(r.urlRegexp&&!r.urlRegexp.test(n.url())||r.status&&r.status!==n.status()),{signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"networkIdle":{i=await this.page.waitForNetworkIdle({idleTime:t.config.idleTime,concurrency:t.config.concurrency,signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}case"navigation":{i=await this.page.waitForNavigation({signal:this.ab?.signal,timeout:this.browser.getConfig()?.actionTimeout});break}}break}case"selector":{t.multi?i=this.#e[t.output]=await this.page.$$(t.selector):i=this.#e[t.output]=await this.page.$(t.selector);break}case"keypress":{await this.page.keyboard.press(t.key,{delay:t.delay});break}case"findData":{let r=this.#e[t.input];Array.isArray(r)?t.kind==="property"&&(i=this.#e[t.output]=await Promise.all(r.map(n=>n.getProperty(t.key).then(l=>l.jsonValue())))):r instanceof M&&(i=this.#e[t.output]=r.getProperty(t.key).then(n=>n.jsonValue()));break}case"getContent":{let r=await this.page.content();i=this.#e[t.output]=t.cleanContent?b(r,t.format==="text"):r;break}case"page":{let r=this.#e[t.input],n=Array.isArray(r)?r:[r],l=U(s=>(s.page.setVariable("$item",n[s.index]),s.page.setVariable("$index",s.index),s.page.setVariable("$first",s.index===0),s.page.setVariable("$last",s.index===n.length-1),s.page.exeQueue(t.actions)),t.concurrency),g;l.error(s=>{s&&(g=s)});let y=[];for(let s=0;s<n.length;s++)y.push(await this.browser.openPage(async O=>l.push({page:O,index:s}),this).catch(()=>{}));if(await l.drained(),t.throwError&&g)throw g;i=y;break}case"setUserAgent":{await this.page.setUserAgent(t.userAgent);break}case"close":{await this.page.close({runBeforeUnload:!1}),this.clearTimeout();break}case"custom":{if(typeof t.fn=="function")i=await t.fn(this);else{let r=this.browser.getCustom(t.config.type);if(!r)throw new Error(`自定义[${t.config.type}]未实现处理`);i=await r(t.config,this)}break}default:break}return i}};import*as e from"valibot";var B=e.picklist(["0","1","2","3","4","5","6","7","8","9","Power","Eject","Abort","Help","Backspace","Tab","Numpad5","NumpadEnter","Enter","\r",`
|
|
2
|
+
`,"ShiftLeft","ShiftRight","ControlLeft","ControlRight","AltLeft","AltRight","Pause","CapsLock","Escape","Convert","NonConvert","Space","Numpad9","PageUp","Numpad3","PageDown","End","Numpad1","Home","Numpad7","ArrowLeft","Numpad4","Numpad8","ArrowUp","ArrowRight","Numpad6","Numpad2","ArrowDown","Select","Open","PrintScreen","Insert","Numpad0","Delete","NumpadDecimal","Digit0","Digit1","Digit2","Digit3","Digit4","Digit5","Digit6","Digit7","Digit8","Digit9","KeyA","KeyB","KeyC","KeyD","KeyE","KeyF","KeyG","KeyH","KeyI","KeyJ","KeyK","KeyL","KeyM","KeyN","KeyO","KeyP","KeyQ","KeyR","KeyS","KeyT","KeyU","KeyV","KeyW","KeyX","KeyY","KeyZ","MetaLeft","MetaRight","ContextMenu","NumpadMultiply","NumpadAdd","NumpadSubtract","NumpadDivide","F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12","F13","F14","F15","F16","F17","F18","F19","F20","F21","F22","F23","F24","NumLock","ScrollLock","AudioVolumeMute","AudioVolumeDown","AudioVolumeUp","MediaTrackNext","MediaTrackPrevious","MediaStop","MediaPlayPause","Semicolon","Equal","NumpadEqual","Comma","Minus","Period","Slash","Backquote","BracketLeft","Backslash","BracketRight","Quote","AltGraph","Props","Cancel","Clear","Shift","Control","Alt","Accept","ModeChange"," ","Print","Execute","\0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z","Meta","*","+","-","/",";","=",",",".","`","[","\\","]","'","Attn","CrSel","ExSel","EraseEof","Play","ZoomOut",")","!","@","#","$","%","^","&","(","A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z",":","<","_",">","?","~","{",",","}",'"',"SoftLeft","SoftRight","Camera","Call","EndCall","VolumeDown","VolumeUp"]),c=e.optional(e.number()),f=e.optional(e.boolean()),W=e.optional(e.string());var G=c,$=e.pipe(e.string(),e.transform(a=>new RegExp(a))),H=e.pipe(e.tuple([e.string(),e.pipe(e.string())]),e.transform(([a,o])=>new RegExp(a,o))),v=e.union([$,H]),p=e.string(),q=e.union([e.string(),e.object({source:e.literal("variable"),key:e.union([e.string(),e.array(e.string())])})]),h=e.object({timeout:G,waitUntil:e.optional(e.picklist(["load","domcontentloaded","networkidle0","networkidle2"]),"networkidle2"),url:q,type:e.literal("goto")}),d=e.object({width:e.optional(e.number(),1920),height:e.optional(e.number(),1080),isMobile:e.optional(e.boolean()),isLandscape:e.optional(e.boolean()),type:e.literal("setViewport")}),w=e.object({userAgent:e.string(),type:e.literal("setUserAgent")}),Q=e.object({selector:p,visible:f,hidden:f}),J=e.object({mode:e.literal("selector"),...Q.entries}),Y=e.object({mode:e.literal("request"),urlRegexp:v,method:W}),Z=e.object({mode:e.literal("response"),urlRegexp:e.optional(v),status:c}),_=e.object({mode:e.literal("networkIdle"),idleTime:c,concurrency:c}),z=e.object({mode:e.literal("navigation")}),k=e.object({type:e.literal("wait"),config:e.variant("mode",[J,Y,Z,_,z])}),x=e.object({type:e.literal("click"),selector:p,offset:e.optional(e.object({x:e.number(),y:e.number()})),delay:c,count:c}),C=e.object({type:e.literal("type"),selector:p,text:e.string(),delay:c}),A=e.object({type:e.literal("keypress"),key:B,delay:c}),P=e.object({type:e.literal("selector"),selector:p,output:e.string(),multi:e.optional(e.boolean(),!1)}),T=e.object({type:e.literal("findData"),input:e.string(),output:e.string(),kind:e.picklist(["property"]),key:e.optional(e.string())}),E=e.object({type:e.literal("getContent"),format:e.optional(e.picklist(["html","text"]),"html"),cleanContent:f,output:e.string()}),D=e.object({type:e.literal("close")}),I=e.object({type:e.literal("custom"),config:e.optional(e.looseObject({type:e.string()})),fn:e.optional(e.custom(Boolean))}),X=e.object({type:e.literal("page"),input:e.string(),concurrency:e.optional(e.number(),2),throwError:e.optional(e.boolean(),!1),actions:e.lazy(()=>e.array(F))}),ee=[...[h,d,w,k,x,C,A,P,T,E,D,I].map(a=>a.entries.type.literal),"page"];var F=e.fallback(e.union([h,d,w,k,x,C,A,P,T,E,X,D,I]),a=>{if(ee.includes((a?.value).type))throw new Error(JSON.stringify(a?.issues));return{type:"custom",config:a?.value}}),S=e.array(F),me=e.object({maxTimeout:c,actionTimeout:c});import*as K from"valibot";import{Browser as re,computeExecutablePath as ne}from"@puppeteer/browsers";import*as j from"fs";import{Browser as R,computeExecutablePath as te,install as oe}from"@puppeteer/browsers";async function V(a){let o=await oe({browser:R.CHROME,baseUrl:"https://cdn.npmmirror.com/binaries/chrome-for-testing",...a,unpack:!0})}function he(a,o){return te({cacheDir:a,browser:R.CHROME,buildId:o})}import{PUPPETEER_REVISIONS as ae}from"puppeteer-core/internal/revisions.js";async function Fe(a){return m.init(a)}var N=ae.chrome,m=class a{browser;static async init(o){let i=re.CHROME,t=ne({cacheDir:o.cacheDir,browser:i,buildId:N});j.existsSync(t)||(console.log("准备下载"),await V({cacheDir:o.cacheDir,buildId:N,browser:i}));let r=await ie.launch({...o,executablePath:t});return new a(r)}constructor(o){this.browser=o}#e;#t=new Map;setConfig(o){this.#e=o}getConfig(){return this.#e}registerCustom(o,i){this.#t.set(o,i)}clearCustom(){this.#t.clear()}getCustom(o){return this.#t.get(o)}async openPage(o,i){let t=new u(await this.browser.newPage(),this,i);return this.#e?.maxTimeout&&t.setMaxTimeout(this.#e.maxTimeout),o(t)}runQueue(o,i){let t=K.safeParse(S,o);if(!t.success)throw new Error(`解析配置错误
|
|
3
|
+
${JSON.stringify(t.issues)}`);return this.openPage(async r=>{if(i)for(let n in i)r.setVariable(n,i[n]);return r.exeQueue(t.output)})}};export{F as ActionDefine,S as ActionListDefine,me as GlobalConfig,m as WebBrowser,u as WebPage,V as download,he as getExecutablePath,Fe as init};
|
package/init.d.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Browser, LaunchOptions } from 'puppeteer-core';
|
|
2
|
+
import { WebPage } from './page';
|
|
3
|
+
import { GlobalConfigInputType, QueueList } from './define';
|
|
4
|
+
type InitOptions = LaunchOptions & {
|
|
5
|
+
cacheDir: string;
|
|
6
|
+
};
|
|
7
|
+
export declare function init(options: InitOptions): Promise<WebBrowser>;
|
|
8
|
+
type PluginFn = (input: any, page: WebPage) => Promise<any>;
|
|
9
|
+
export declare class WebBrowser {
|
|
10
|
+
#private;
|
|
11
|
+
browser: Browser;
|
|
12
|
+
static init(options: InitOptions): Promise<WebBrowser>;
|
|
13
|
+
constructor(browser: Browser);
|
|
14
|
+
setConfig(config: GlobalConfigInputType): void;
|
|
15
|
+
getConfig(): {
|
|
16
|
+
maxTimeout?: number | undefined;
|
|
17
|
+
actionTimeout?: number | undefined;
|
|
18
|
+
} | undefined;
|
|
19
|
+
registerCustom(type: string, fn: PluginFn): void;
|
|
20
|
+
clearCustom(): void;
|
|
21
|
+
getCustom(key: string): PluginFn | undefined;
|
|
22
|
+
openPage<T>(fn: (page: WebPage) => Promise<T>, parent?: WebPage): Promise<T>;
|
|
23
|
+
runQueue(list: QueueList, input?: Record<string, any>): Promise<any>;
|
|
24
|
+
}
|
|
25
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@cyia/crawl",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"author": "wszgrcy",
|
|
5
|
+
"description": "",
|
|
6
|
+
"dependencies": {
|
|
7
|
+
"cheerio": "1.0.0",
|
|
8
|
+
"fastq": "1.19.1",
|
|
9
|
+
"html-entities": "2.6.0",
|
|
10
|
+
"puppeteer-core": "24.6.0",
|
|
11
|
+
"valibot": "1.0.0"
|
|
12
|
+
},
|
|
13
|
+
"exports": {
|
|
14
|
+
".": {
|
|
15
|
+
"types": "./index.d.ts",
|
|
16
|
+
"default": "./index.mjs"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"publishConfig": {
|
|
20
|
+
"access": "public",
|
|
21
|
+
"registry": "https://registry.npmjs.org"
|
|
22
|
+
}
|
|
23
|
+
}
|
package/page.d.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { Page } from 'puppeteer-core';
|
|
2
|
+
import * as v from 'valibot';
|
|
3
|
+
import { WebBrowser } from './init';
|
|
4
|
+
import { ActionDefine } from './define';
|
|
5
|
+
export declare class WebPage {
|
|
6
|
+
#private;
|
|
7
|
+
page: Page;
|
|
8
|
+
browser: WebBrowser;
|
|
9
|
+
parent: WebPage | undefined;
|
|
10
|
+
constructor(page: Page, browser: WebBrowser, parent?: WebPage);
|
|
11
|
+
ab?: AbortController;
|
|
12
|
+
timeoutId: any;
|
|
13
|
+
setMaxTimeout(timeout: number): void;
|
|
14
|
+
clearTimeout(): void;
|
|
15
|
+
setVariable(key: string, value: any): void;
|
|
16
|
+
getVariable(key: string): any;
|
|
17
|
+
exeQueue(list: v.InferOutput<typeof ActionDefine>[]): Promise<any>;
|
|
18
|
+
}
|