@letsscrapedata/scraper 0.0.76 → 0.0.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.d.cts +1341 -3
- package/dist/index.d.ts +1341 -3
- package/dist/index.js +1 -1
- package/package.json +3 -2
- package/readme.md +1 -0
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,12 @@
|
|
|
1
|
-
import { BrowserControllerType, LsdBrowserType, LsdLaunchOptions, LsdConnectOptions,
|
|
1
|
+
import { BrowserStateData, LsdPage, LsdApiContext, BrowserControllerType, LsdBrowserType, LsdLaunchOptions, LsdConnectOptions, ResponseInterceptionItem, LsdElement, CheerioPage } from '@letsscrapedata/controller';
|
|
2
2
|
import { Proxy } from '@letsscrapedata/proxy';
|
|
3
3
|
import { LogFunction } from '@letsscrapedata/utils';
|
|
4
|
+
import EventEmitter from 'node:events';
|
|
4
5
|
|
|
6
|
+
interface XmlAttrCfg {
|
|
7
|
+
name: string;
|
|
8
|
+
value: string;
|
|
9
|
+
}
|
|
5
10
|
interface XmlParaCfg {
|
|
6
11
|
paraname: string;
|
|
7
12
|
name: string;
|
|
@@ -15,6 +20,97 @@ interface XmlParaCfg {
|
|
|
15
20
|
pattern: string;
|
|
16
21
|
alert: string;
|
|
17
22
|
}
|
|
23
|
+
interface XmlElementCfg {
|
|
24
|
+
loc: string;
|
|
25
|
+
source: string;
|
|
26
|
+
absolute: boolean;
|
|
27
|
+
}
|
|
28
|
+
interface XmlIframeCfg {
|
|
29
|
+
srcprefix: string;
|
|
30
|
+
loc: string;
|
|
31
|
+
}
|
|
32
|
+
interface XmlTemplstrCfg {
|
|
33
|
+
templ: string;
|
|
34
|
+
}
|
|
35
|
+
interface XmlElecontentAttrCfg {
|
|
36
|
+
type: string;
|
|
37
|
+
attrname: string;
|
|
38
|
+
attrname2: string;
|
|
39
|
+
attrname3: string;
|
|
40
|
+
attrname4: string;
|
|
41
|
+
trim: boolean;
|
|
42
|
+
pattern: string;
|
|
43
|
+
boolattr: boolean;
|
|
44
|
+
multieles: boolean;
|
|
45
|
+
join: string;
|
|
46
|
+
line: boolean;
|
|
47
|
+
absolute: boolean;
|
|
48
|
+
}
|
|
49
|
+
interface XmlElecontentImgbase64Cfg {
|
|
50
|
+
type: string;
|
|
51
|
+
}
|
|
52
|
+
interface XmlElecontentInnerhtmlCfg {
|
|
53
|
+
type: string;
|
|
54
|
+
}
|
|
55
|
+
interface XmlElecontentLengthCfg {
|
|
56
|
+
type: string;
|
|
57
|
+
}
|
|
58
|
+
interface XmlElecontentOuterhtmlCfg {
|
|
59
|
+
type: string;
|
|
60
|
+
}
|
|
61
|
+
interface XmlElecontentTextCfg {
|
|
62
|
+
type: string;
|
|
63
|
+
trim: boolean;
|
|
64
|
+
multieles: boolean;
|
|
65
|
+
join: string;
|
|
66
|
+
line: boolean;
|
|
67
|
+
top: boolean;
|
|
68
|
+
}
|
|
69
|
+
interface XmlDecodefontsvgCfg {
|
|
70
|
+
outtype: string;
|
|
71
|
+
}
|
|
72
|
+
interface XmlDecodefontttfCfg {
|
|
73
|
+
fflocs: string;
|
|
74
|
+
intype: string;
|
|
75
|
+
outtype: string;
|
|
76
|
+
}
|
|
77
|
+
interface XmlOcrcfgApiCfg {
|
|
78
|
+
type: string;
|
|
79
|
+
nlchars: string;
|
|
80
|
+
seperator: boolean;
|
|
81
|
+
lang: string;
|
|
82
|
+
}
|
|
83
|
+
interface XmlOcrcfgTesseractCfg {
|
|
84
|
+
type: string;
|
|
85
|
+
nlchars: string;
|
|
86
|
+
seperator: boolean;
|
|
87
|
+
lang: string;
|
|
88
|
+
}
|
|
89
|
+
interface XmlActionApiCfg {
|
|
90
|
+
type: string;
|
|
91
|
+
method: string;
|
|
92
|
+
url: string;
|
|
93
|
+
encodeuri: boolean;
|
|
94
|
+
session: boolean;
|
|
95
|
+
headerssource: string;
|
|
96
|
+
headers: string;
|
|
97
|
+
referer: string;
|
|
98
|
+
proxy: boolean;
|
|
99
|
+
timeout: string;
|
|
100
|
+
context: string;
|
|
101
|
+
data: string;
|
|
102
|
+
datatype: string;
|
|
103
|
+
form: boolean;
|
|
104
|
+
path: string;
|
|
105
|
+
diskcache: boolean;
|
|
106
|
+
cache: boolean;
|
|
107
|
+
varname: string;
|
|
108
|
+
errname: string;
|
|
109
|
+
valerrname: string;
|
|
110
|
+
pattern: string;
|
|
111
|
+
flags: string;
|
|
112
|
+
id: string;
|
|
113
|
+
}
|
|
18
114
|
interface XmlActionBreakCfg {
|
|
19
115
|
type: string;
|
|
20
116
|
id: string;
|
|
@@ -57,6 +153,21 @@ interface XmlActionMiscCfg {
|
|
|
57
153
|
type: string;
|
|
58
154
|
id: string;
|
|
59
155
|
}
|
|
156
|
+
interface XmlMiscDelsyncdbdataCfg {
|
|
157
|
+
type: string;
|
|
158
|
+
tids: string;
|
|
159
|
+
retention: string;
|
|
160
|
+
minretention: string;
|
|
161
|
+
}
|
|
162
|
+
interface XmlMiscExtractdataCfg {
|
|
163
|
+
type: string;
|
|
164
|
+
name: string;
|
|
165
|
+
method: string;
|
|
166
|
+
paras: boolean;
|
|
167
|
+
execdata: boolean;
|
|
168
|
+
subtasks: boolean;
|
|
169
|
+
credits: boolean;
|
|
170
|
+
}
|
|
60
171
|
interface XmlActionExitCfg {
|
|
61
172
|
type: string;
|
|
62
173
|
errname: string;
|
|
@@ -67,6 +178,109 @@ interface XmlActionExtractCfg {
|
|
|
67
178
|
tabname: string;
|
|
68
179
|
id: string;
|
|
69
180
|
}
|
|
181
|
+
interface XmlActionExtractArrayCfg {
|
|
182
|
+
type: string;
|
|
183
|
+
subtype: string;
|
|
184
|
+
list: string;
|
|
185
|
+
requrl: string;
|
|
186
|
+
subkeys: string;
|
|
187
|
+
keys: string;
|
|
188
|
+
tabname: string;
|
|
189
|
+
varname: string;
|
|
190
|
+
idxname: string;
|
|
191
|
+
maxloops: string;
|
|
192
|
+
errname: string;
|
|
193
|
+
id: string;
|
|
194
|
+
}
|
|
195
|
+
interface XmlActionExtractScriptCfg {
|
|
196
|
+
type: string;
|
|
197
|
+
subtype: string;
|
|
198
|
+
desc: string;
|
|
199
|
+
base64: boolean;
|
|
200
|
+
html: boolean;
|
|
201
|
+
isolated: boolean;
|
|
202
|
+
tabname: string;
|
|
203
|
+
maxloops: string;
|
|
204
|
+
errname: string;
|
|
205
|
+
id: string;
|
|
206
|
+
}
|
|
207
|
+
interface XmlActionExtractTableCfg {
|
|
208
|
+
type: string;
|
|
209
|
+
orientation: string;
|
|
210
|
+
check: boolean;
|
|
211
|
+
keyformat: string;
|
|
212
|
+
tabname: string;
|
|
213
|
+
maxloops: string;
|
|
214
|
+
errname: string;
|
|
215
|
+
id: string;
|
|
216
|
+
}
|
|
217
|
+
interface XmlColumnElementCfg {
|
|
218
|
+
type: string;
|
|
219
|
+
colname: string;
|
|
220
|
+
nickname: string;
|
|
221
|
+
completed: boolean;
|
|
222
|
+
setvar: boolean;
|
|
223
|
+
datatype: string;
|
|
224
|
+
errname: string;
|
|
225
|
+
valerrname: string;
|
|
226
|
+
pattern: string;
|
|
227
|
+
flags: string;
|
|
228
|
+
id: string;
|
|
229
|
+
discarded: boolean;
|
|
230
|
+
}
|
|
231
|
+
interface XmlColumnPropertyCfg {
|
|
232
|
+
type: string;
|
|
233
|
+
subkeys: string;
|
|
234
|
+
colname: string;
|
|
235
|
+
nickname: string;
|
|
236
|
+
completed: boolean;
|
|
237
|
+
setvar: boolean;
|
|
238
|
+
datatype: string;
|
|
239
|
+
valerrname: string;
|
|
240
|
+
pattern: string;
|
|
241
|
+
flags: string;
|
|
242
|
+
id: string;
|
|
243
|
+
discarded: boolean;
|
|
244
|
+
}
|
|
245
|
+
interface XmlColumnOcrCfg {
|
|
246
|
+
type: string;
|
|
247
|
+
colname: string;
|
|
248
|
+
nickname: string;
|
|
249
|
+
completed: boolean;
|
|
250
|
+
setvar: boolean;
|
|
251
|
+
datatype: string;
|
|
252
|
+
valerrname: string;
|
|
253
|
+
pattern: string;
|
|
254
|
+
flags: string;
|
|
255
|
+
id: string;
|
|
256
|
+
discarded: boolean;
|
|
257
|
+
}
|
|
258
|
+
interface XmlColumnSubtaskCfg {
|
|
259
|
+
type: string;
|
|
260
|
+
colname: string;
|
|
261
|
+
nickname: string;
|
|
262
|
+
completed: boolean;
|
|
263
|
+
setvar: boolean;
|
|
264
|
+
datatype: string;
|
|
265
|
+
valerrname: string;
|
|
266
|
+
pattern: string;
|
|
267
|
+
flags: string;
|
|
268
|
+
id: string;
|
|
269
|
+
discarded: boolean;
|
|
270
|
+
}
|
|
271
|
+
interface XmlColumnTemplstrCfg {
|
|
272
|
+
type: string;
|
|
273
|
+
colname: string;
|
|
274
|
+
nickname: string;
|
|
275
|
+
completed: boolean;
|
|
276
|
+
setvar: boolean;
|
|
277
|
+
datatype: string;
|
|
278
|
+
valerrname: string;
|
|
279
|
+
pattern: string;
|
|
280
|
+
flags: string;
|
|
281
|
+
id: string;
|
|
282
|
+
discarded: boolean;
|
|
283
|
+
}
|
|
70
284
|
interface XmlActionGotoCfg {
|
|
71
285
|
type: string;
|
|
72
286
|
url: string;
|
|
@@ -102,6 +316,15 @@ interface XmlActionIfelseCfg {
|
|
|
102
316
|
type: string;
|
|
103
317
|
id: string;
|
|
104
318
|
}
|
|
319
|
+
interface XmlConditionElementCfg {
|
|
320
|
+
type: string;
|
|
321
|
+
}
|
|
322
|
+
interface XmlConditionElseCfg {
|
|
323
|
+
type: string;
|
|
324
|
+
}
|
|
325
|
+
interface XmlConditionTemplstrCfg {
|
|
326
|
+
type: string;
|
|
327
|
+
}
|
|
105
328
|
interface XmlActionInputCfg {
|
|
106
329
|
type: string;
|
|
107
330
|
content: string;
|
|
@@ -132,6 +355,61 @@ interface XmlActionInterceptSetCfg {
|
|
|
132
355
|
type: string;
|
|
133
356
|
subtype: string;
|
|
134
357
|
}
|
|
358
|
+
interface XmlRequestAbortCfg {
|
|
359
|
+
type: string;
|
|
360
|
+
method: string;
|
|
361
|
+
url: string;
|
|
362
|
+
resourcetype: string;
|
|
363
|
+
postdata: string;
|
|
364
|
+
}
|
|
365
|
+
interface XmlResponseCacheCfg {
|
|
366
|
+
type: string;
|
|
367
|
+
method: string;
|
|
368
|
+
url: string;
|
|
369
|
+
resourcetype: string;
|
|
370
|
+
postdata: string;
|
|
371
|
+
minsize: string;
|
|
372
|
+
maxsize: string;
|
|
373
|
+
contenttype: string;
|
|
374
|
+
responses: boolean;
|
|
375
|
+
tabname: string;
|
|
376
|
+
requestheaders: boolean;
|
|
377
|
+
responseheaders: boolean;
|
|
378
|
+
}
|
|
379
|
+
interface XmlRequestDataCfg {
|
|
380
|
+
type: string;
|
|
381
|
+
subkeys: string;
|
|
382
|
+
value: string;
|
|
383
|
+
datatype: string;
|
|
384
|
+
}
|
|
385
|
+
interface XmlRequestHeaderCfg {
|
|
386
|
+
type: string;
|
|
387
|
+
name: string;
|
|
388
|
+
value: string;
|
|
389
|
+
}
|
|
390
|
+
interface XmlResponseSaveCfg {
|
|
391
|
+
type: string;
|
|
392
|
+
method: string;
|
|
393
|
+
url: string;
|
|
394
|
+
resourcetype: string;
|
|
395
|
+
postdata: string;
|
|
396
|
+
minsize: string;
|
|
397
|
+
maxsize: string;
|
|
398
|
+
basedir: string;
|
|
399
|
+
pathtype: string;
|
|
400
|
+
hashmethod: string;
|
|
401
|
+
hostnameparts: string;
|
|
402
|
+
extname: string;
|
|
403
|
+
validextnames: string;
|
|
404
|
+
params: string;
|
|
405
|
+
encoding: string;
|
|
406
|
+
tabname: string;
|
|
407
|
+
}
|
|
408
|
+
interface XmlResponseStatuscodeCfg {
|
|
409
|
+
type: string;
|
|
410
|
+
codes: string;
|
|
411
|
+
errname: string;
|
|
412
|
+
}
|
|
135
413
|
interface XmlActionLoopdowhileElementCfg {
|
|
136
414
|
type: string;
|
|
137
415
|
subtype: string;
|
|
@@ -264,6 +542,13 @@ interface XmlActionSelectCfg {
|
|
|
264
542
|
pv2: string;
|
|
265
543
|
id: string;
|
|
266
544
|
}
|
|
545
|
+
interface XmlDbqueryCfg {
|
|
546
|
+
dbname: string;
|
|
547
|
+
sql: string;
|
|
548
|
+
sqlparas: string;
|
|
549
|
+
parasplit: string;
|
|
550
|
+
join: string;
|
|
551
|
+
}
|
|
267
552
|
interface XmlActionSetvarDbqueryCfg {
|
|
268
553
|
type: string;
|
|
269
554
|
subtype: string;
|
|
@@ -287,6 +572,10 @@ interface XmlActionSetvarElementCfg {
|
|
|
287
572
|
path: string;
|
|
288
573
|
id: string;
|
|
289
574
|
}
|
|
575
|
+
interface XmlFileCfg {
|
|
576
|
+
path: string;
|
|
577
|
+
encoding: string;
|
|
578
|
+
}
|
|
290
579
|
interface XmlActionSetvarFileCfg {
|
|
291
580
|
type: string;
|
|
292
581
|
subtype: string;
|
|
@@ -343,6 +632,14 @@ interface XmlActionSetvarTemplstrCfg {
|
|
|
343
632
|
path: string;
|
|
344
633
|
id: string;
|
|
345
634
|
}
|
|
635
|
+
interface XmlSubtaskCfg {
|
|
636
|
+
tid: string;
|
|
637
|
+
parasstr: string;
|
|
638
|
+
idx: string;
|
|
639
|
+
minlen: string;
|
|
640
|
+
errname: string;
|
|
641
|
+
popuppage: boolean;
|
|
642
|
+
}
|
|
346
643
|
interface XmlActionSubtaskCfg {
|
|
347
644
|
type: string;
|
|
348
645
|
subtasks: string;
|
|
@@ -383,6 +680,16 @@ interface XmlFontsvgCfg {
|
|
|
383
680
|
csmaptype: string;
|
|
384
681
|
bsfilter: string;
|
|
385
682
|
}
|
|
683
|
+
interface XmlFontttfCfg {
|
|
684
|
+
exloc: string;
|
|
685
|
+
inloc: string;
|
|
686
|
+
minuc: string;
|
|
687
|
+
maxuc: string;
|
|
688
|
+
startidx: string;
|
|
689
|
+
fsfilter: string;
|
|
690
|
+
fufilter: string;
|
|
691
|
+
parsetype: string;
|
|
692
|
+
}
|
|
386
693
|
interface XmlFontselectorCfg {
|
|
387
694
|
name: string;
|
|
388
695
|
fontfamily: string;
|
|
@@ -400,7 +707,621 @@ interface XmlFontcharsCfg {
|
|
|
400
707
|
name: string;
|
|
401
708
|
chars: string;
|
|
402
709
|
}
|
|
403
|
-
|
|
710
|
+
interface XmlMyfunCfg {
|
|
711
|
+
name: string;
|
|
712
|
+
desc: string;
|
|
713
|
+
base64: boolean;
|
|
714
|
+
script: string;
|
|
715
|
+
}
|
|
716
|
+
interface XmlMyfunCfg {
|
|
717
|
+
name: string;
|
|
718
|
+
arg2: string;
|
|
719
|
+
arg3: string;
|
|
720
|
+
arg4: string;
|
|
721
|
+
arg5: string;
|
|
722
|
+
funbody: string;
|
|
723
|
+
}
|
|
724
|
+
interface XmlCaptchaFuncaptchaCfg {
|
|
725
|
+
type: string;
|
|
726
|
+
}
|
|
727
|
+
interface XmlCaptchaGeetestCfg {
|
|
728
|
+
type: string;
|
|
729
|
+
}
|
|
730
|
+
interface XmlCaptchaKeycaptchaCfg {
|
|
731
|
+
type: string;
|
|
732
|
+
}
|
|
733
|
+
interface XmlCaptchaMtcaptchaCfg {
|
|
734
|
+
type: string;
|
|
735
|
+
}
|
|
736
|
+
interface XmlCaptchaRecaptchaCfg {
|
|
737
|
+
type: string;
|
|
738
|
+
minscore: string;
|
|
739
|
+
}
|
|
740
|
+
interface XmlCaptchaTurnstileCfg {
|
|
741
|
+
type: string;
|
|
742
|
+
}
|
|
743
|
+
interface XmlImageElementCfg {
|
|
744
|
+
type: string;
|
|
745
|
+
}
|
|
746
|
+
interface XmlCommentElementCfg {
|
|
747
|
+
type: string;
|
|
748
|
+
attr: string;
|
|
749
|
+
}
|
|
750
|
+
interface XmlInputElementCfg {
|
|
751
|
+
type: string;
|
|
752
|
+
enter: boolean;
|
|
753
|
+
}
|
|
754
|
+
interface XmlSubmitElementCfg {
|
|
755
|
+
type: string;
|
|
756
|
+
}
|
|
757
|
+
interface XmlCheckResultCfg {
|
|
758
|
+
type: string;
|
|
759
|
+
attr: string;
|
|
760
|
+
failedstr: string;
|
|
761
|
+
}
|
|
762
|
+
interface XmlCaptchaAmazonCfg {
|
|
763
|
+
type: string;
|
|
764
|
+
}
|
|
765
|
+
interface XmlCaptchaTextCfg {
|
|
766
|
+
type: string;
|
|
767
|
+
case: boolean;
|
|
768
|
+
lang: string;
|
|
769
|
+
math: boolean;
|
|
770
|
+
maxlen: string;
|
|
771
|
+
minlen: string;
|
|
772
|
+
question: boolean;
|
|
773
|
+
space: boolean;
|
|
774
|
+
texttype: string;
|
|
775
|
+
}
|
|
776
|
+
interface XmlCaptchaCoordinateCfg {
|
|
777
|
+
type: string;
|
|
778
|
+
lang: string;
|
|
779
|
+
maxclicks: string;
|
|
780
|
+
minclicks: string;
|
|
781
|
+
}
|
|
782
|
+
interface XmlTransformCfg {
|
|
783
|
+
defaultval: string;
|
|
784
|
+
usevar: boolean;
|
|
785
|
+
}
|
|
786
|
+
interface XmlFunCGethtmlCfg {
|
|
787
|
+
type: string;
|
|
788
|
+
subtype: string;
|
|
789
|
+
loc: string;
|
|
790
|
+
}
|
|
791
|
+
interface XmlFunCGettextCfg {
|
|
792
|
+
type: string;
|
|
793
|
+
subtype: string;
|
|
794
|
+
loc: string;
|
|
795
|
+
}
|
|
796
|
+
interface XmlFunCHtmlCfg {
|
|
797
|
+
type: string;
|
|
798
|
+
subtype: string;
|
|
799
|
+
loc: string;
|
|
800
|
+
content: string;
|
|
801
|
+
}
|
|
802
|
+
interface XmlFunCRemoveCfg {
|
|
803
|
+
type: string;
|
|
804
|
+
subtype: string;
|
|
805
|
+
loc: string;
|
|
806
|
+
}
|
|
807
|
+
interface XmlFunCTextCfg {
|
|
808
|
+
type: string;
|
|
809
|
+
subtype: string;
|
|
810
|
+
loc: string;
|
|
811
|
+
content: string;
|
|
812
|
+
}
|
|
813
|
+
interface XmlFunCeilCfg {
|
|
814
|
+
type: string;
|
|
815
|
+
}
|
|
816
|
+
interface XmlFunClosingsubstrCfg {
|
|
817
|
+
type: string;
|
|
818
|
+
openchar: string;
|
|
819
|
+
startstr: string;
|
|
820
|
+
position: string;
|
|
821
|
+
}
|
|
822
|
+
interface XmlFunCompressCfg {
|
|
823
|
+
type: string;
|
|
824
|
+
method: string;
|
|
825
|
+
flush: string;
|
|
826
|
+
finishflush: string;
|
|
827
|
+
chunksize: string;
|
|
828
|
+
windowbits: string;
|
|
829
|
+
level: string;
|
|
830
|
+
memlevel: string;
|
|
831
|
+
strategy: string;
|
|
832
|
+
sourceencoding: string;
|
|
833
|
+
targetencoding: string;
|
|
834
|
+
}
|
|
835
|
+
interface XmlFunConcatCfg {
|
|
836
|
+
type: string;
|
|
837
|
+
str1: string;
|
|
838
|
+
str2: string;
|
|
839
|
+
str3: string;
|
|
840
|
+
str4: string;
|
|
841
|
+
str5: string;
|
|
842
|
+
}
|
|
843
|
+
interface XmlFunConvertencodingCfg {
|
|
844
|
+
type: string;
|
|
845
|
+
sourceencoding: string;
|
|
846
|
+
targetencoding: string;
|
|
847
|
+
start: string;
|
|
848
|
+
end: string;
|
|
849
|
+
}
|
|
850
|
+
interface XmlFunDecompressCfg {
|
|
851
|
+
type: string;
|
|
852
|
+
method: string;
|
|
853
|
+
flush: string;
|
|
854
|
+
finishflush: string;
|
|
855
|
+
chunksize: string;
|
|
856
|
+
windowbits: string;
|
|
857
|
+
sourceencoding: string;
|
|
858
|
+
targetencoding: string;
|
|
859
|
+
start: string;
|
|
860
|
+
end: string;
|
|
861
|
+
}
|
|
862
|
+
interface XmlFunDecodeCfg {
|
|
863
|
+
type: string;
|
|
864
|
+
str1: string;
|
|
865
|
+
val1: string;
|
|
866
|
+
str2: string;
|
|
867
|
+
val2: string;
|
|
868
|
+
str3: string;
|
|
869
|
+
val3: string;
|
|
870
|
+
str4: string;
|
|
871
|
+
val4: string;
|
|
872
|
+
str5: string;
|
|
873
|
+
val5: string;
|
|
874
|
+
defaultval: string;
|
|
875
|
+
}
|
|
876
|
+
interface XmlFunFloorCfg {
|
|
877
|
+
type: string;
|
|
878
|
+
}
|
|
879
|
+
interface XmlFunHashcodeCfg {
|
|
880
|
+
type: string;
|
|
881
|
+
hashmethod: string;
|
|
882
|
+
}
|
|
883
|
+
interface XmlFunIncludesCfg {
|
|
884
|
+
type: string;
|
|
885
|
+
searchstr: string;
|
|
886
|
+
}
|
|
887
|
+
interface XmlFunInsertCfg {
|
|
888
|
+
type: string;
|
|
889
|
+
str: string;
|
|
890
|
+
}
|
|
891
|
+
interface XmlFunItemstoobjCfg {
|
|
892
|
+
type: string;
|
|
893
|
+
split: string;
|
|
894
|
+
kvsplit: string;
|
|
895
|
+
keytrim: boolean;
|
|
896
|
+
keynows: boolean;
|
|
897
|
+
valuetrim: boolean;
|
|
898
|
+
valuenows: boolean;
|
|
899
|
+
}
|
|
900
|
+
interface XmlFunJsonparseCfg {
|
|
901
|
+
type: string;
|
|
902
|
+
key1: string;
|
|
903
|
+
key2: string;
|
|
904
|
+
key3: string;
|
|
905
|
+
key4: string;
|
|
906
|
+
key5: string;
|
|
907
|
+
key6: string;
|
|
908
|
+
keys: boolean;
|
|
909
|
+
length: boolean;
|
|
910
|
+
srctype: string;
|
|
911
|
+
}
|
|
912
|
+
interface XmlFunLengthCfg {
|
|
913
|
+
type: string;
|
|
914
|
+
}
|
|
915
|
+
interface XmlFunMatchallCfg {
|
|
916
|
+
type: string;
|
|
917
|
+
pattern: string;
|
|
918
|
+
flags: string;
|
|
919
|
+
filter: string;
|
|
920
|
+
start: string;
|
|
921
|
+
end: string;
|
|
922
|
+
resulttype: string;
|
|
923
|
+
join: string;
|
|
924
|
+
}
|
|
925
|
+
interface XmlFunMaxCfg {
|
|
926
|
+
type: string;
|
|
927
|
+
split: string;
|
|
928
|
+
}
|
|
929
|
+
interface XmlFunMinCfg {
|
|
930
|
+
type: string;
|
|
931
|
+
split: string;
|
|
932
|
+
}
|
|
933
|
+
interface XmlFunNumeqCfg {
|
|
934
|
+
type: string;
|
|
935
|
+
num: string;
|
|
936
|
+
}
|
|
937
|
+
interface XmlFunNumgeCfg {
|
|
938
|
+
type: string;
|
|
939
|
+
num: string;
|
|
940
|
+
}
|
|
941
|
+
interface XmlFunNumgtCfg {
|
|
942
|
+
type: string;
|
|
943
|
+
num: string;
|
|
944
|
+
}
|
|
945
|
+
interface XmlFunNumleCfg {
|
|
946
|
+
type: string;
|
|
947
|
+
num: string;
|
|
948
|
+
}
|
|
949
|
+
interface XmlFunNumltCfg {
|
|
950
|
+
type: string;
|
|
951
|
+
num: string;
|
|
952
|
+
}
|
|
953
|
+
interface XmlFunNotincludesCfg {
|
|
954
|
+
type: string;
|
|
955
|
+
searchstr: string;
|
|
956
|
+
}
|
|
957
|
+
interface XmlFunPadendCfg {
|
|
958
|
+
type: string;
|
|
959
|
+
targetlen: string;
|
|
960
|
+
padstr: string;
|
|
961
|
+
}
|
|
962
|
+
interface XmlFunPadstartCfg {
|
|
963
|
+
type: string;
|
|
964
|
+
targetlen: string;
|
|
965
|
+
padstr: string;
|
|
966
|
+
}
|
|
967
|
+
interface XmlFunParsefloatCfg {
|
|
968
|
+
type: string;
|
|
969
|
+
}
|
|
970
|
+
interface XmlFunParseintCfg {
|
|
971
|
+
type: string;
|
|
972
|
+
}
|
|
973
|
+
interface XmlFunRandomarryidxesCfg {
|
|
974
|
+
type: string;
|
|
975
|
+
join: string;
|
|
976
|
+
}
|
|
977
|
+
interface XmlFunReplaceCfg {
|
|
978
|
+
type: string;
|
|
979
|
+
substr: string;
|
|
980
|
+
newstr: string;
|
|
981
|
+
regexp: boolean;
|
|
982
|
+
flags: string;
|
|
983
|
+
}
|
|
984
|
+
interface XmlFunRoundCfg {
|
|
985
|
+
type: string;
|
|
986
|
+
}
|
|
987
|
+
interface XmlFunSetcomplementCfg {
|
|
988
|
+
type: string;
|
|
989
|
+
array2: string;
|
|
990
|
+
split1: string;
|
|
991
|
+
split2: string;
|
|
992
|
+
sorttype: string;
|
|
993
|
+
valtype: string;
|
|
994
|
+
limit: string;
|
|
995
|
+
join: string;
|
|
996
|
+
}
|
|
997
|
+
interface XmlFunSetdifferenceCfg {
|
|
998
|
+
type: string;
|
|
999
|
+
array2: string;
|
|
1000
|
+
split1: string;
|
|
1001
|
+
split2: string;
|
|
1002
|
+
sorttype: string;
|
|
1003
|
+
valtype: string;
|
|
1004
|
+
limit: string;
|
|
1005
|
+
join: string;
|
|
1006
|
+
}
|
|
1007
|
+
interface XmlFunSetintersectionCfg {
|
|
1008
|
+
type: string;
|
|
1009
|
+
array2: string;
|
|
1010
|
+
split1: string;
|
|
1011
|
+
split2: string;
|
|
1012
|
+
sorttype: string;
|
|
1013
|
+
valtype: string;
|
|
1014
|
+
limit: string;
|
|
1015
|
+
join: string;
|
|
1016
|
+
}
|
|
1017
|
+
interface XmlFunSetunionCfg {
|
|
1018
|
+
type: string;
|
|
1019
|
+
array2: string;
|
|
1020
|
+
split1: string;
|
|
1021
|
+
split2: string;
|
|
1022
|
+
sorttype: string;
|
|
1023
|
+
valtype: string;
|
|
1024
|
+
limit: string;
|
|
1025
|
+
join: string;
|
|
1026
|
+
}
|
|
1027
|
+
interface XmlFunSliceCfg {
|
|
1028
|
+
type: string;
|
|
1029
|
+
beginidx: string;
|
|
1030
|
+
endidx: string;
|
|
1031
|
+
}
|
|
1032
|
+
interface XmlFunShuffleCfg {
|
|
1033
|
+
type: string;
|
|
1034
|
+
split: string;
|
|
1035
|
+
}
|
|
1036
|
+
interface XmlFunSortCfg {
|
|
1037
|
+
type: string;
|
|
1038
|
+
split: string;
|
|
1039
|
+
valtype: string;
|
|
1040
|
+
sorttype: string;
|
|
1041
|
+
join: string;
|
|
1042
|
+
}
|
|
1043
|
+
interface XmlFunSpecialconvertCfg {
|
|
1044
|
+
type: string;
|
|
1045
|
+
method: string;
|
|
1046
|
+
para1: string;
|
|
1047
|
+
para2: string;
|
|
1048
|
+
para3: string;
|
|
1049
|
+
para4: string;
|
|
1050
|
+
para5: string;
|
|
1051
|
+
}
|
|
1052
|
+
interface XmlFunSplitCfg {
|
|
1053
|
+
type: string;
|
|
1054
|
+
split: string;
|
|
1055
|
+
limit: string;
|
|
1056
|
+
filter: string;
|
|
1057
|
+
start: string;
|
|
1058
|
+
end: string;
|
|
1059
|
+
resulttype: string;
|
|
1060
|
+
val: string;
|
|
1061
|
+
insertstr: string;
|
|
1062
|
+
join: string;
|
|
1063
|
+
}
|
|
1064
|
+
interface XmlFunStreqCfg {
|
|
1065
|
+
type: string;
|
|
1066
|
+
str: string;
|
|
1067
|
+
}
|
|
1068
|
+
interface XmlFunSubstrafterCfg {
|
|
1069
|
+
type: string;
|
|
1070
|
+
substr: string;
|
|
1071
|
+
position: string;
|
|
1072
|
+
last: boolean;
|
|
1073
|
+
include: boolean;
|
|
1074
|
+
}
|
|
1075
|
+
interface XmlFunSubstrbeforeCfg {
|
|
1076
|
+
type: string;
|
|
1077
|
+
substr: string;
|
|
1078
|
+
position: string;
|
|
1079
|
+
last: boolean;
|
|
1080
|
+
include: boolean;
|
|
1081
|
+
}
|
|
1082
|
+
interface XmlFunToboolCfg {
|
|
1083
|
+
type: string;
|
|
1084
|
+
}
|
|
1085
|
+
interface XmlFunTolowercaseCfg {
|
|
1086
|
+
type: string;
|
|
1087
|
+
}
|
|
1088
|
+
interface XmlFunTonumCfg {
|
|
1089
|
+
type: string;
|
|
1090
|
+
}
|
|
1091
|
+
interface XmlFunTouppercaseCfg {
|
|
1092
|
+
type: string;
|
|
1093
|
+
}
|
|
1094
|
+
interface XmlFunTrimCfg {
|
|
1095
|
+
type: string;
|
|
1096
|
+
}
|
|
1097
|
+
interface XmlFunTrimendCfg {
|
|
1098
|
+
type: string;
|
|
1099
|
+
}
|
|
1100
|
+
interface XmlFunTrimstartCfg {
|
|
1101
|
+
type: string;
|
|
1102
|
+
}
|
|
1103
|
+
interface XmlFunUniqCfg {
|
|
1104
|
+
type: string;
|
|
1105
|
+
split: string;
|
|
1106
|
+
join: string;
|
|
1107
|
+
}
|
|
1108
|
+
interface XmlFunUrldelparamsCfg {
|
|
1109
|
+
type: string;
|
|
1110
|
+
param1: string;
|
|
1111
|
+
param2: string;
|
|
1112
|
+
param3: string;
|
|
1113
|
+
}
|
|
1114
|
+
interface XmlFunUrldecodeCfg {
|
|
1115
|
+
type: string;
|
|
1116
|
+
method: string;
|
|
1117
|
+
encoding: string;
|
|
1118
|
+
percent: boolean;
|
|
1119
|
+
}
|
|
1120
|
+
interface XmlFunUrlencodeCfg {
|
|
1121
|
+
type: string;
|
|
1122
|
+
method: string;
|
|
1123
|
+
encoding: string;
|
|
1124
|
+
percent: boolean;
|
|
1125
|
+
}
|
|
1126
|
+
interface XmlFunUrlgetCfg {
|
|
1127
|
+
type: string;
|
|
1128
|
+
attrorpath: string;
|
|
1129
|
+
hashmethod: string;
|
|
1130
|
+
hostnameparts: string;
|
|
1131
|
+
params: string;
|
|
1132
|
+
param: string;
|
|
1133
|
+
extname: string;
|
|
1134
|
+
validextnames: string;
|
|
1135
|
+
title: string;
|
|
1136
|
+
}
|
|
1137
|
+
interface XmlFunUrlsetparamsCfg {
|
|
1138
|
+
type: string;
|
|
1139
|
+
param1: string;
|
|
1140
|
+
val1: string;
|
|
1141
|
+
param2: string;
|
|
1142
|
+
val2: string;
|
|
1143
|
+
param3: string;
|
|
1144
|
+
val3: string;
|
|
1145
|
+
replace: boolean;
|
|
1146
|
+
}
|
|
1147
|
+
interface XmlFunMyfunCfg {
|
|
1148
|
+
type: string;
|
|
1149
|
+
name: string;
|
|
1150
|
+
arg1: string;
|
|
1151
|
+
arg2: string;
|
|
1152
|
+
arg3: string;
|
|
1153
|
+
arg4: string;
|
|
1154
|
+
arg5: string;
|
|
1155
|
+
}
|
|
1156
|
+
interface XmlGetAddedtaskparasCfg {
|
|
1157
|
+
type: string;
|
|
1158
|
+
tid: string;
|
|
1159
|
+
limit: string;
|
|
1160
|
+
join: string;
|
|
1161
|
+
interval: string;
|
|
1162
|
+
}
|
|
1163
|
+
interface XmlGetContentCfg {
|
|
1164
|
+
type: string;
|
|
1165
|
+
}
|
|
1166
|
+
interface XmlGetCookiesCfg {
|
|
1167
|
+
type: string;
|
|
1168
|
+
urls: string;
|
|
1169
|
+
domain: string;
|
|
1170
|
+
name: string;
|
|
1171
|
+
path: string;
|
|
1172
|
+
value: boolean;
|
|
1173
|
+
}
|
|
1174
|
+
interface XmlGetDatetimeCfg {
|
|
1175
|
+
type: string;
|
|
1176
|
+
format: string;
|
|
1177
|
+
}
|
|
1178
|
+
interface XmlGetExecedtaskparasCfg {
|
|
1179
|
+
type: string;
|
|
1180
|
+
tid: string;
|
|
1181
|
+
limit: string;
|
|
1182
|
+
join: string;
|
|
1183
|
+
interval: string;
|
|
1184
|
+
}
|
|
1185
|
+
interface XmlGetFileCfg {
|
|
1186
|
+
type: string;
|
|
1187
|
+
url: string;
|
|
1188
|
+
path: string;
|
|
1189
|
+
basedir: string;
|
|
1190
|
+
pathtype: string;
|
|
1191
|
+
hashmethod: string;
|
|
1192
|
+
hostnameparts: string;
|
|
1193
|
+
params: string;
|
|
1194
|
+
extname: string;
|
|
1195
|
+
validextnames: string;
|
|
1196
|
+
pathvarname: string;
|
|
1197
|
+
proxy: boolean;
|
|
1198
|
+
headers: string;
|
|
1199
|
+
referer: string;
|
|
1200
|
+
setvar: boolean;
|
|
1201
|
+
}
|
|
1202
|
+
interface XmlHttpbrowserCfg {
|
|
1203
|
+
name: string;
|
|
1204
|
+
minversion: string;
|
|
1205
|
+
maxversion: string;
|
|
1206
|
+
httpversion: string;
|
|
1207
|
+
}
|
|
1208
|
+
interface XmlHttpdeviceCfg {
|
|
1209
|
+
device: string;
|
|
1210
|
+
}
|
|
1211
|
+
interface XmlHttpheaderCfg {
|
|
1212
|
+
name: string;
|
|
1213
|
+
source: string;
|
|
1214
|
+
value: string;
|
|
1215
|
+
delete: boolean;
|
|
1216
|
+
}
|
|
1217
|
+
interface XmlHttposCfg {
|
|
1218
|
+
os: string;
|
|
1219
|
+
}
|
|
1220
|
+
interface XmlGetHttpheadersCfg {
|
|
1221
|
+
type: string;
|
|
1222
|
+
headermethod: string;
|
|
1223
|
+
origheaders: string;
|
|
1224
|
+
browserlist: string;
|
|
1225
|
+
httpversion: string;
|
|
1226
|
+
}
|
|
1227
|
+
interface XmlGetMhtmlCfg {
|
|
1228
|
+
type: string;
|
|
1229
|
+
path: string;
|
|
1230
|
+
basedir: string;
|
|
1231
|
+
pathtype: string;
|
|
1232
|
+
hashmethod: string;
|
|
1233
|
+
hostnameparts: string;
|
|
1234
|
+
pathvarname: string;
|
|
1235
|
+
}
|
|
1236
|
+
interface XmlGetPdfCfg {
|
|
1237
|
+
type: string;
|
|
1238
|
+
path: string;
|
|
1239
|
+
basedir: string;
|
|
1240
|
+
pathtype: string;
|
|
1241
|
+
hashmethod: string;
|
|
1242
|
+
hostnameparts: string;
|
|
1243
|
+
scale: string;
|
|
1244
|
+
displayheaderfooter: boolean;
|
|
1245
|
+
headertemplate: string;
|
|
1246
|
+
footertemplate: string;
|
|
1247
|
+
printbackground: boolean;
|
|
1248
|
+
landscape: boolean;
|
|
1249
|
+
pageranges: string;
|
|
1250
|
+
format: string;
|
|
1251
|
+
width: string;
|
|
1252
|
+
height: string;
|
|
1253
|
+
top: string;
|
|
1254
|
+
right: string;
|
|
1255
|
+
bottom: string;
|
|
1256
|
+
left: string;
|
|
1257
|
+
screen: boolean;
|
|
1258
|
+
onepage: boolean;
|
|
1259
|
+
hmargin: string;
|
|
1260
|
+
setvar: boolean;
|
|
1261
|
+
pathvarname: string;
|
|
1262
|
+
}
|
|
1263
|
+
interface XmlGetQueuedcntwithparasCfg {
|
|
1264
|
+
type: string;
|
|
1265
|
+
tid: string;
|
|
1266
|
+
parasstr: string;
|
|
1267
|
+
operator: string;
|
|
1268
|
+
}
|
|
1269
|
+
interface XmlGetQueuedtaskparasCfg {
|
|
1270
|
+
type: string;
|
|
1271
|
+
tid: string;
|
|
1272
|
+
limit: string;
|
|
1273
|
+
join: string;
|
|
1274
|
+
interval: string;
|
|
1275
|
+
}
|
|
1276
|
+
interface XmlGetRandomCfg {
|
|
1277
|
+
type: string;
|
|
1278
|
+
min: string;
|
|
1279
|
+
max: string;
|
|
1280
|
+
}
|
|
1281
|
+
interface XmlGetResponseCfg {
|
|
1282
|
+
type: string;
|
|
1283
|
+
requrl: string;
|
|
1284
|
+
idx: string;
|
|
1285
|
+
length: boolean;
|
|
1286
|
+
}
|
|
1287
|
+
interface XmlGetScreenshotCfg {
|
|
1288
|
+
type: string;
|
|
1289
|
+
basedir: string;
|
|
1290
|
+
path: string;
|
|
1291
|
+
extname: string;
|
|
1292
|
+
quality: string;
|
|
1293
|
+
fullpage: boolean;
|
|
1294
|
+
x: string;
|
|
1295
|
+
y: string;
|
|
1296
|
+
width: string;
|
|
1297
|
+
height: string;
|
|
1298
|
+
omitbackground: boolean;
|
|
1299
|
+
setvar: boolean;
|
|
1300
|
+
pathvarname: string;
|
|
1301
|
+
}
|
|
1302
|
+
interface XmlGetSpecialCfg {
|
|
1303
|
+
type: string;
|
|
1304
|
+
method: string;
|
|
1305
|
+
parasstr: string;
|
|
1306
|
+
}
|
|
1307
|
+
interface XmlGetTitleCfg {
|
|
1308
|
+
type: string;
|
|
1309
|
+
aspath: boolean;
|
|
1310
|
+
}
|
|
1311
|
+
interface XmlGetWindowCfg {
|
|
1312
|
+
type: string;
|
|
1313
|
+
key1: string;
|
|
1314
|
+
key2: string;
|
|
1315
|
+
key3: string;
|
|
1316
|
+
key4: string;
|
|
1317
|
+
key5: string;
|
|
1318
|
+
key6: string;
|
|
1319
|
+
keys: boolean;
|
|
1320
|
+
length: boolean;
|
|
1321
|
+
}
|
|
1322
|
+
type XmlActionConfig = XmlActionApiCfg | XmlActionBreakCfg | XmlActionCaptchaCfg | XmlActionClickCfg | XmlActionContinueCfg | XmlActionMiscCfg | XmlActionExitCfg | XmlActionExtractCfg | XmlActionExtractArrayCfg | XmlActionExtractScriptCfg | XmlActionExtractTableCfg | XmlActionGotoCfg | XmlActionHoverCfg | XmlActionIfelseCfg | XmlActionInputCfg | XmlActionInterceptClearCfg | XmlActionInterceptSetCfg | XmlActionLoopdowhileElementCfg | XmlActionLoopdowhileTemplstrCfg | XmlActionLoopforCfg | XmlActionLoopinelesCfg | XmlActionLoopinstrCfg | XmlActionScrollByCfg | XmlActionScrollIntoviewCfg | XmlActionScrollToCfg | XmlActionSelectCfg | XmlActionSetvarDbqueryCfg | XmlActionSetvarElementCfg | XmlActionSetvarFileCfg | XmlActionSetvarGetCfg | XmlActionSetvarOcrCfg | XmlActionSetvarSubtaskCfg | XmlActionSetvarTemplstrCfg | XmlActionSubtaskCfg | XmlActionWaitElementCfg | XmlActionWaitNavigationCfg | XmlActionWaitSleepCfg;
|
|
1323
|
+
type XmlFunConfig = XmlFunCGethtmlCfg | XmlFunCGettextCfg | XmlFunCHtmlCfg | XmlFunCRemoveCfg | XmlFunCTextCfg | XmlFunCeilCfg | XmlFunClosingsubstrCfg | XmlFunCompressCfg | XmlFunConcatCfg | XmlFunConvertencodingCfg | XmlFunDecompressCfg | XmlFunDecodeCfg | XmlFunFloorCfg | XmlFunHashcodeCfg | XmlFunIncludesCfg | XmlFunInsertCfg | XmlFunItemstoobjCfg | XmlFunJsonparseCfg | XmlFunLengthCfg | XmlFunMatchallCfg | XmlFunMaxCfg | XmlFunMinCfg | XmlFunNumeqCfg | XmlFunNumgeCfg | XmlFunNumgtCfg | XmlFunNumleCfg | XmlFunNumltCfg | XmlFunNotincludesCfg | XmlFunPadendCfg | XmlFunPadstartCfg | XmlFunParsefloatCfg | XmlFunParseintCfg | XmlFunRandomarryidxesCfg | XmlFunReplaceCfg | XmlFunRoundCfg | XmlFunSetcomplementCfg | XmlFunSetdifferenceCfg | XmlFunSetintersectionCfg | XmlFunSetunionCfg | XmlFunSliceCfg | XmlFunShuffleCfg | XmlFunSortCfg | XmlFunSpecialconvertCfg | XmlFunSplitCfg | XmlFunStreqCfg | XmlFunSubstrafterCfg | XmlFunSubstrbeforeCfg | XmlFunToboolCfg | XmlFunTolowercaseCfg | XmlFunTonumCfg | XmlFunTouppercaseCfg | XmlFunTrimCfg | XmlFunTrimendCfg | XmlFunTrimstartCfg | XmlFunUniqCfg | XmlFunUrldelparamsCfg | XmlFunUrldecodeCfg | XmlFunUrlencodeCfg | XmlFunUrlgetCfg | XmlFunUrlsetparamsCfg | XmlFunMyfunCfg;
|
|
1324
|
+
type XmlGetConfig = XmlGetAddedtaskparasCfg | XmlGetContentCfg | XmlGetCookiesCfg | XmlGetDatetimeCfg | XmlGetExecedtaskparasCfg | XmlGetFileCfg | XmlGetHttpheadersCfg | XmlGetMhtmlCfg | XmlGetPdfCfg | XmlGetQueuedcntwithparasCfg | XmlGetQueuedtaskparasCfg | XmlGetRandomCfg | XmlGetResponseCfg | XmlGetScreenshotCfg | XmlGetSpecialCfg | XmlGetTitleCfg | XmlGetWindowCfg;
|
|
404
1325
|
|
|
405
1326
|
type TokenCaptchaType = "amazon" | "funcaptcha" | "geetest" | "keycaptcha" | "mtcaptcha" | "recaptcha" | "turnstile";
|
|
406
1327
|
type RecognitionCaptchaType = "text" | "coordinate" | "grid" | "slider" | "rotation";
|
|
@@ -419,7 +1340,39 @@ interface ScraperStateData extends BrowserStateData {
|
|
|
419
1340
|
*/
|
|
420
1341
|
userData: Record<string, string>;
|
|
421
1342
|
}
|
|
1343
|
+
type LoginWhen = "current" | "popup";
|
|
422
1344
|
type InParas = Record<string, string>;
|
|
1345
|
+
type XmlElementConfig = Record<string, string | boolean | any>;
|
|
1346
|
+
type ElementTagConfig = Record<string, string | boolean | any>;
|
|
1347
|
+
type ActionConfig = Record<string, string | boolean | any>;
|
|
1348
|
+
interface PageMatchInfo {
|
|
1349
|
+
/**
|
|
1350
|
+
* * eurl begins with http: urls = eurl.split(","); matchedFlag = href.startsWith(urls[0]) && urls.slice(1).every(item => href.includes(item));
|
|
1351
|
+
* * eurl doesn't begin with http: matchedFlag = href.match(new RegExp(eurl))
|
|
1352
|
+
* @default ""
|
|
1353
|
+
*/
|
|
1354
|
+
eurl: string;
|
|
1355
|
+
/**
|
|
1356
|
+
* @default ""
|
|
1357
|
+
*/
|
|
1358
|
+
eloc: string;
|
|
1359
|
+
/**
|
|
1360
|
+
* @default ""
|
|
1361
|
+
*/
|
|
1362
|
+
pn1: string;
|
|
1363
|
+
/**
|
|
1364
|
+
* @default ""
|
|
1365
|
+
*/
|
|
1366
|
+
pv1: string;
|
|
1367
|
+
/**
|
|
1368
|
+
* @default ""
|
|
1369
|
+
*/
|
|
1370
|
+
pn2: string;
|
|
1371
|
+
/**
|
|
1372
|
+
* @default ""
|
|
1373
|
+
*/
|
|
1374
|
+
pv2: string;
|
|
1375
|
+
}
|
|
423
1376
|
interface FontttfConfig {
|
|
424
1377
|
exloc: string;
|
|
425
1378
|
inloc: string;
|
|
@@ -442,7 +1395,17 @@ interface FontsConfig {
|
|
|
442
1395
|
fontsvgCfg?: XmlFontsvgCfg;
|
|
443
1396
|
fontttfConfig?: FontttfConfig;
|
|
444
1397
|
}
|
|
1398
|
+
interface CaptchaOptions {
|
|
1399
|
+
captchaTypes: CaptchaType[];
|
|
1400
|
+
pageMatchInfos?: PageMatchInfo[];
|
|
1401
|
+
}
|
|
1402
|
+
interface LoginOptions {
|
|
1403
|
+
loginWhens: LoginWhen[];
|
|
1404
|
+
pageMatchInfos: PageMatchInfo[];
|
|
1405
|
+
authInfo?: AuthInfo;
|
|
1406
|
+
}
|
|
445
1407
|
type ElementSource = "browser" | "cheerio";
|
|
1408
|
+
type ElementSourceExt = ElementSource | "default";
|
|
446
1409
|
interface TemplateInScraper {
|
|
447
1410
|
templateId: TemplateId;
|
|
448
1411
|
domainId: DomainId;
|
|
@@ -457,12 +1420,19 @@ interface TemplateInScraper {
|
|
|
457
1420
|
configDetail: string;
|
|
458
1421
|
capName?: string;
|
|
459
1422
|
}
|
|
1423
|
+
interface TemplateConfig {
|
|
1424
|
+
template: TemplateInScraper;
|
|
1425
|
+
actionConfigs: XmlActionConfig[];
|
|
1426
|
+
fontsConfig: FontsConfig | null;
|
|
1427
|
+
myfunScripts: Record<string, string>;
|
|
1428
|
+
}
|
|
460
1429
|
type AttrsInXml = Record<string, string>;
|
|
461
1430
|
type DatatableColumnMap = Map<string, string>;
|
|
462
1431
|
interface ParsedTemplate {
|
|
463
1432
|
actionConfigs: XmlActionConfig[];
|
|
464
1433
|
paraCfgs: XmlParaCfg[];
|
|
465
1434
|
fontsConfig: FontsConfig | null;
|
|
1435
|
+
myfunScripts: Record<string, string>;
|
|
466
1436
|
attrsInXml: AttrsInXml;
|
|
467
1437
|
captchaTypes: CaptchaType[];
|
|
468
1438
|
lastUsedTime: number;
|
|
@@ -471,6 +1441,8 @@ interface ParsedTemplate {
|
|
|
471
1441
|
template?: TemplateInScraper;
|
|
472
1442
|
}
|
|
473
1443
|
type ParsedTemplateExt = Required<ParsedTemplate>;
|
|
1444
|
+
type AuthInfo = Record<string, string>;
|
|
1445
|
+
type UserData = Record<string, string>;
|
|
474
1446
|
/**
|
|
475
1447
|
* Network context used to execute the task
|
|
476
1448
|
*/
|
|
@@ -501,6 +1473,44 @@ interface TaskNetworkContext {
|
|
|
501
1473
|
*/
|
|
502
1474
|
standaloneApiContext: LsdApiContext | null;
|
|
503
1475
|
}
|
|
1476
|
+
interface TaskOptions {
|
|
1477
|
+
performOneTask?: PerformOneTask;
|
|
1478
|
+
/**
|
|
1479
|
+
* @default 0
|
|
1480
|
+
*/
|
|
1481
|
+
taskId?: number;
|
|
1482
|
+
/**
|
|
1483
|
+
* @default []
|
|
1484
|
+
*/
|
|
1485
|
+
skipActions?: string[];
|
|
1486
|
+
/**
|
|
1487
|
+
* @default []
|
|
1488
|
+
*/
|
|
1489
|
+
extractColumns?: string[];
|
|
1490
|
+
/**
|
|
1491
|
+
* @default "1.0"
|
|
1492
|
+
*/
|
|
1493
|
+
version?: string;
|
|
1494
|
+
/**
|
|
1495
|
+
* @default "::"
|
|
1496
|
+
*/
|
|
1497
|
+
splitStr?: string;
|
|
1498
|
+
/**
|
|
1499
|
+
* @default template.defaultElementSource
|
|
1500
|
+
*/
|
|
1501
|
+
defaultElementSource?: ElementSource;
|
|
1502
|
+
/**
|
|
1503
|
+
* @default { cookies: [], localStorage: [], headers: {}, userData: {} }
|
|
1504
|
+
*/
|
|
1505
|
+
stateData?: ScraperStateData;
|
|
1506
|
+
/**
|
|
1507
|
+
* @default {}
|
|
1508
|
+
*/
|
|
1509
|
+
authInfo?: AuthInfo;
|
|
1510
|
+
_mine?: boolean;
|
|
1511
|
+
_captchaOptions?: CaptchaOptions;
|
|
1512
|
+
_loginOptions?: LoginOptions;
|
|
1513
|
+
}
|
|
504
1514
|
type DataRecord = Record<string, string>;
|
|
505
1515
|
type ExecData = Record<string, DataRecord[]>;
|
|
506
1516
|
interface Subtask {
|
|
@@ -535,6 +1545,22 @@ interface TaskResult {
|
|
|
535
1545
|
misc?: TaskMisc;
|
|
536
1546
|
}
|
|
537
1547
|
type TaskType = "indAsync" | "indSync" | "memSync";
|
|
1548
|
+
type PerformOneTask = (templateId: TemplateId, parasStr: string, taskNetworContext: TaskNetworkContext, taskType?: TaskType, xmlStr?: string, taskId?: number, useNickName?: boolean) => Promise<TaskResult>;
|
|
1549
|
+
interface TemplateManagerOptions {
|
|
1550
|
+
/**
|
|
1551
|
+
* templateFilename: template${templateId}.xml
|
|
1552
|
+
* @default ./template/
|
|
1553
|
+
*/
|
|
1554
|
+
templateDir?: string;
|
|
1555
|
+
/**
|
|
1556
|
+
* template: GET ${apiBasePath}/template?templateId=xxx&code=xxx
|
|
1557
|
+
* @default "https://web.letsscrapedata.com/api/nologin/"
|
|
1558
|
+
*/
|
|
1559
|
+
apiBasePath?: string;
|
|
1560
|
+
}
|
|
1561
|
+
interface TemplateManger {
|
|
1562
|
+
getTemplateConfig(templatedId: number, code?: string): Promise<ParsedTemplate>;
|
|
1563
|
+
}
|
|
538
1564
|
interface TemplateTasks {
|
|
539
1565
|
tid: number;
|
|
540
1566
|
parasstrs: string[];
|
|
@@ -592,6 +1618,11 @@ interface ScraperConfig {
|
|
|
592
1618
|
* @default false
|
|
593
1619
|
*/
|
|
594
1620
|
loadUnfinishedTasks?: boolean;
|
|
1621
|
+
/**
|
|
1622
|
+
* unit: minutes
|
|
1623
|
+
* @default 0
|
|
1624
|
+
*/
|
|
1625
|
+
loadFailedTasksInterval?: number;
|
|
595
1626
|
/**
|
|
596
1627
|
* @default "", which will use current directory of process + "/data/"
|
|
597
1628
|
* if not empty, baseDir must be an absolute path, and the directory must exist and have read and write permissions.
|
|
@@ -685,8 +1716,40 @@ interface ScraperConfig {
|
|
|
685
1716
|
*/
|
|
686
1717
|
columnSeperator?: string;
|
|
687
1718
|
}
|
|
1719
|
+
interface SimpleScheduler {
|
|
1720
|
+
}
|
|
1721
|
+
interface MyFunInData {
|
|
1722
|
+
origStr: string;
|
|
1723
|
+
arg1: string;
|
|
1724
|
+
arg2: string;
|
|
1725
|
+
arg3: string;
|
|
1726
|
+
arg4: string;
|
|
1727
|
+
arg5: string;
|
|
1728
|
+
}
|
|
1729
|
+
type MyFunResult = "string";
|
|
1730
|
+
interface ExtractScriptInData {
|
|
1731
|
+
vars: {
|
|
1732
|
+
inParas: InParas;
|
|
1733
|
+
userData: UserData;
|
|
1734
|
+
[key: string]: any;
|
|
1735
|
+
};
|
|
1736
|
+
responses: ResponseInterceptionItem[];
|
|
1737
|
+
tabName: string;
|
|
1738
|
+
maxLoops: number;
|
|
1739
|
+
errName: string;
|
|
1740
|
+
execData: ExecData;
|
|
1741
|
+
html?: string;
|
|
1742
|
+
}
|
|
1743
|
+
interface ExtractScriptResult {
|
|
1744
|
+
execData: ExecData;
|
|
1745
|
+
errName?: string;
|
|
1746
|
+
}
|
|
688
1747
|
|
|
689
1748
|
declare function setScraperLogFun(logFun: LogFunction): boolean;
|
|
1749
|
+
declare function logdbg(...args: any[]): Promise<void>;
|
|
1750
|
+
declare function loginfo(...args: any[]): Promise<void>;
|
|
1751
|
+
declare function logwarn(...args: any[]): Promise<void>;
|
|
1752
|
+
declare function logerr(...args: any[]): Promise<void>;
|
|
690
1753
|
|
|
691
1754
|
/** 修改node_modules/xml2js/lib/parser.js文件,添加如下内容(根据tagName自动添加type和subtype属性,如action_setvar_element添加 type="setvar" subtype="element"):
|
|
692
1755
|
//////// start of LSD added by Joe ////////////////////////////////////////////////////////////////////////////////////////////
|
|
@@ -719,6 +1782,7 @@ declare class TaskParser {
|
|
|
719
1782
|
static convertXmlToJson(xmlStr: string, defaultCfgFlag?: boolean): Promise<any>;
|
|
720
1783
|
static getPartOfJsonCfg(jsonCfg: any, partName: string, optional?: boolean): any;
|
|
721
1784
|
static getParaCfgsFromJsonCfg(jsonCfg: any): XmlParaCfg[];
|
|
1785
|
+
static getMyfunCfgsFromJsonCfg(jsonCfg: any): XmlMyfunCfg[];
|
|
722
1786
|
static getAttrsInXml(jsonCfg: any): AttrsInXml;
|
|
723
1787
|
static getCaptchTypes(jsonCfg: any): CaptchaType[];
|
|
724
1788
|
static getDatableMapFromJsonCfg(jsonCfg: any): Map<string, DatatableColumnMap> | null;
|
|
@@ -733,9 +1797,283 @@ declare class TemplateManagerInScraper {
|
|
|
733
1797
|
static clearTemplateConfig(templateId?: number): boolean;
|
|
734
1798
|
}
|
|
735
1799
|
|
|
1800
|
+
declare class SysParas {
|
|
1801
|
+
#private;
|
|
1802
|
+
constructor(taskContext: any, templateId: number, taskId: number, version: string);
|
|
1803
|
+
get version(): string;
|
|
1804
|
+
get templateId(): number;
|
|
1805
|
+
get taskId(): number;
|
|
1806
|
+
get url(): any;
|
|
1807
|
+
get hasPopupPage(): "0" | "1";
|
|
1808
|
+
get subtaskNum(): string;
|
|
1809
|
+
get datatableNum(): string;
|
|
1810
|
+
get responses(): string;
|
|
1811
|
+
get currentDate(): string;
|
|
1812
|
+
get currentDateTime(): string;
|
|
1813
|
+
get currentIsoTime(): string;
|
|
1814
|
+
get currentLocalTime(): string;
|
|
1815
|
+
get currentUtcTime(): string;
|
|
1816
|
+
get currentTime(): string;
|
|
1817
|
+
get currentUnixMs(): string;
|
|
1818
|
+
get currentUnixSec(): string;
|
|
1819
|
+
get random10(): string;
|
|
1820
|
+
get random100(): string;
|
|
1821
|
+
get random1000(): string;
|
|
1822
|
+
get random10000(): string;
|
|
1823
|
+
get uuidv1(): string;
|
|
1824
|
+
get uuidv4(): string;
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
interface ParasObject {
|
|
1828
|
+
inParas: InParas;
|
|
1829
|
+
authInfo: AuthInfo;
|
|
1830
|
+
sysParas: SysParas;
|
|
1831
|
+
userData: UserData;
|
|
1832
|
+
[key: string]: any;
|
|
1833
|
+
}
|
|
1834
|
+
type LoopElement = LsdElement | string | number;
|
|
1835
|
+
/**
|
|
1836
|
+
* Task Context used to execute the task:
|
|
1837
|
+
* * network context
|
|
1838
|
+
* * popup page
|
|
1839
|
+
* * stacks: loop/browser elements/cheerio elements
|
|
1840
|
+
* * state data
|
|
1841
|
+
* * task result
|
|
1842
|
+
*/
|
|
1843
|
+
interface TaskContext {
|
|
1844
|
+
/**
|
|
1845
|
+
* page and all LsdApiContexts(if not null) share this proxy
|
|
1846
|
+
*/
|
|
1847
|
+
proxy: Proxy | null;
|
|
1848
|
+
/**
|
|
1849
|
+
* page that is used to open web pages, null if task does not need open web pages
|
|
1850
|
+
*/
|
|
1851
|
+
page: LsdPage | null;
|
|
1852
|
+
/**
|
|
1853
|
+
* browserContext's LsdApiContext that shares the state data between tasks, which use the pages in the same browserContext
|
|
1854
|
+
* * null if not browserContext
|
|
1855
|
+
*/
|
|
1856
|
+
browserApiContext: LsdApiContext | null;
|
|
1857
|
+
/**
|
|
1858
|
+
* standalone LsdApiContext that shares the state data between tasks, which use this LsdApiContext
|
|
1859
|
+
*/
|
|
1860
|
+
standaloneApiContext: LsdApiContext | null;
|
|
1861
|
+
/**
|
|
1862
|
+
* LsdApiContext that state data is binded to, which is equal to:
|
|
1863
|
+
* * browserApiContext if browserApiContext is not null
|
|
1864
|
+
* * standaloneApiContext if browserApiContext is null
|
|
1865
|
+
*/
|
|
1866
|
+
stateApiContext: LsdApiContext | null;
|
|
1867
|
+
/**
|
|
1868
|
+
* LsdApiContext that is created when it is first used.
|
|
1869
|
+
*/
|
|
1870
|
+
taskApiContext: LsdApiContext | null;
|
|
1871
|
+
/**
|
|
1872
|
+
* the last popup page of page, which is used to execute subtask:
|
|
1873
|
+
* * popupPage is always null if page is null
|
|
1874
|
+
*/
|
|
1875
|
+
popupPage: LsdPage | null;
|
|
1876
|
+
/**
|
|
1877
|
+
* browser elements stack(elementSource is browser)
|
|
1878
|
+
*/
|
|
1879
|
+
browserElesStack: LsdElement[];
|
|
1880
|
+
/**
|
|
1881
|
+
* CheeroPage that is used to extract data, which is created or refreshed using the page.content or paras variable("cheerioHtml" or "cheerioXml")
|
|
1882
|
+
*/
|
|
1883
|
+
cheerioPage: CheerioPage | null;
|
|
1884
|
+
/**
|
|
1885
|
+
* cheerio elements stack(elementSource is cheerio)
|
|
1886
|
+
*/
|
|
1887
|
+
cheerioElesStack: LsdElement[];
|
|
1888
|
+
/**
|
|
1889
|
+
* stack of all types of loops
|
|
1890
|
+
*/
|
|
1891
|
+
loopsStack: LoopElement[];
|
|
1892
|
+
/**
|
|
1893
|
+
* whether action "continue" is launched and not processed
|
|
1894
|
+
*/
|
|
1895
|
+
continueFlag: boolean;
|
|
1896
|
+
/**
|
|
1897
|
+
* whether action "break" is launched and not processed
|
|
1898
|
+
*/
|
|
1899
|
+
breakFlag: boolean;
|
|
1900
|
+
origStateData: ScraperStateData;
|
|
1901
|
+
newStateData: ScraperStateData | null;
|
|
1902
|
+
execData: ExecData;
|
|
1903
|
+
subtasks: Subtask[];
|
|
1904
|
+
credits: number;
|
|
1905
|
+
subtaskResults: TaskResult[];
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
type ApiContextType = "default" | "fetch" | "browser" | "standalone" | "state" | "task";
|
|
1909
|
+
type GetAddedTaskParasFun = (getCfg: XmlGetAddedtaskparasCfg) => Promise<string>;
|
|
1910
|
+
type GetExecedTaskParasFun = (getCfg: XmlGetExecedtaskparasCfg) => Promise<string>;
|
|
1911
|
+
type GetQueuedCntWithParasCfgFun = (getCfg: XmlGetQueuedcntwithparasCfg) => Promise<string>;
|
|
1912
|
+
type GetQueuedTaskParasFun = (getCfg: XmlGetQueuedtaskparasCfg) => Promise<string>;
|
|
1913
|
+
type GetDataFromServerFunc = (method: "get", requestUrl: string, data: any, options: any) => Promise<string>;
|
|
1914
|
+
interface GetOtherFunObj {
|
|
1915
|
+
getAddedTaskParasFun?: GetAddedTaskParasFun;
|
|
1916
|
+
getExecedTaskParasFun?: GetExecedTaskParasFun;
|
|
1917
|
+
getQueuedCntWithParasCfgFun?: GetQueuedCntWithParasCfgFun;
|
|
1918
|
+
getQueuedTaskParasFun?: GetQueuedTaskParasFun;
|
|
1919
|
+
getDataFromServerFun?: GetDataFromServerFunc;
|
|
1920
|
+
}
|
|
1921
|
+
type GetSpecialFun = (getCfg: XmlGetSpecialCfg, otherInfo: any) => Promise<string>;
|
|
1922
|
+
type GetSpecialFunObj = Record<string, GetSpecialFun>;
|
|
1923
|
+
declare class GetPara {
|
|
1924
|
+
static getSpecialFunObj: GetSpecialFunObj;
|
|
1925
|
+
static setGetSpecialFunObj(getSpecialFunObj: GetSpecialFunObj): boolean;
|
|
1926
|
+
static getOtherFunObj: GetOtherFunObj;
|
|
1927
|
+
static setGetOtherFunObj(getOtherFunObj: GetOtherFunObj): boolean;
|
|
1928
|
+
static noNeedToReturnContent: string;
|
|
1929
|
+
static getPara(getCfg: XmlGetConfig, otherInfo: any): Promise<string>;
|
|
1930
|
+
static getParaInElement(getCfg: XmlGetConfig, element: LsdElement, otherInfo: any): Promise<string>;
|
|
1931
|
+
static getParaInPage(getCfg: XmlGetConfig, page: LsdPage, otherInfo: any): Promise<string>;
|
|
1932
|
+
static getContent(page: LsdPage): Promise<string>;
|
|
1933
|
+
static getCookies(page: LsdPage, getCfg: XmlGetCookiesCfg): Promise<string>;
|
|
1934
|
+
static getDatetime(getCfg: XmlGetDatetimeCfg): string;
|
|
1935
|
+
static getFile(getCfg: XmlGetFileCfg, otherInfo: any): Promise<string>;
|
|
1936
|
+
static _genHeaderMethod: Record<string, any>;
|
|
1937
|
+
static hasSameOrigin(sourceUrl: string, targetUrl: string, sourceType?: string): boolean;
|
|
1938
|
+
static getHttpHeaders(getCfg: XmlGetHttpheadersCfg, otherInfo: any): Promise<string>;
|
|
1939
|
+
static getMhtml(page: LsdPage, getCfg: XmlGetMhtmlCfg, paras: ParasObject): Promise<string>;
|
|
1940
|
+
static _getScreenSize(origSize: string): string;
|
|
1941
|
+
static getPdf(page: LsdPage, getCfg: XmlGetPdfCfg, paras: ParasObject): Promise<string>;
|
|
1942
|
+
static getRandom(getCfg: XmlGetRandomCfg): string;
|
|
1943
|
+
static getResponse(getCfg: XmlGetResponseCfg, otherInfo: any): Promise<string>;
|
|
1944
|
+
static getScreenshot(pageOrElement: LsdPage | LsdElement, getCfg: XmlGetScreenshotCfg, paras: ParasObject): Promise<string>;
|
|
1945
|
+
static getSpecial(getCfg: XmlGetSpecialCfg, otherInfo: any): Promise<string>;
|
|
1946
|
+
static getTitle(page: LsdPage, getCfg: XmlGetTitleCfg): Promise<string>;
|
|
1947
|
+
static getWindow(page: LsdPage, getCfg: XmlGetWindowCfg): Promise<string>;
|
|
1948
|
+
}
|
|
1949
|
+
|
|
1950
|
+
type SpecialConvertFun = (origStr: string, funCfg: XmlFunSpecialconvertCfg) => string;
|
|
1951
|
+
type SpecialConvertFunObj = Record<string, SpecialConvertFun>;
|
|
1952
|
+
type FunctionConfig = any;
|
|
1953
|
+
type GetStdFunScriptFun = (name: string) => Promise<string>;
|
|
1954
|
+
declare class LsdString {
|
|
1955
|
+
#private;
|
|
1956
|
+
static specialConvertFunObj: SpecialConvertFunObj;
|
|
1957
|
+
static setSpecialConvertFunObj(specialConvertFunObj: SpecialConvertFunObj): boolean;
|
|
1958
|
+
static transformStr(origStr: string, funCfgs: FunctionConfig[], defaultVal: string, myfunScripts: Record<string, string>): Promise<string>;
|
|
1959
|
+
static setGetStdFunScript(fun: GetStdFunScriptFun): boolean;
|
|
1960
|
+
static __getElementBySelector($: any, loc: string, idx?: number): any;
|
|
1961
|
+
static __cheerioOptions: {
|
|
1962
|
+
xmlMode: boolean;
|
|
1963
|
+
decodeEntities: boolean;
|
|
1964
|
+
};
|
|
1965
|
+
static _c_attr(origStr: string, funCfg: FunctionConfig): any;
|
|
1966
|
+
static _c_data(origStr: string, funCfg: FunctionConfig): any;
|
|
1967
|
+
static _c_html(origStr: string, funCfg: FunctionConfig): any;
|
|
1968
|
+
static _c_length(origStr: string, funCfg: FunctionConfig): string;
|
|
1969
|
+
static _c_text(origStr: string, funCfg: FunctionConfig): any;
|
|
1970
|
+
static _ceil(origStr: string): string;
|
|
1971
|
+
static _closingsubstr(origStr: string, funCfg: FunctionConfig): string | -1;
|
|
1972
|
+
static __getCompressOptions(funCfg: FunctionConfig, compressFlag: boolean): {} | undefined;
|
|
1973
|
+
static _compress(origStr: string, funCfg: FunctionConfig): string;
|
|
1974
|
+
static _decompress(origStr: string, funCfg: FunctionConfig): string;
|
|
1975
|
+
static _concat(origStr: string, funCfg: FunctionConfig): string;
|
|
1976
|
+
static _specialconvert(origStr: string, funCfg: FunctionConfig): string;
|
|
1977
|
+
static _decode(origStr: string, funCfg: FunctionConfig): any;
|
|
1978
|
+
static __convertBufferToString(buffer: Buffer, encoding: BufferEncoding, startStr: string, endStr: string): string;
|
|
1979
|
+
static _convertencoding(origStr: string, funCfg: FunctionConfig): string;
|
|
1980
|
+
static _floor(origStr: string): string;
|
|
1981
|
+
static _hashcode(origStr: string, funCfg: FunctionConfig): string;
|
|
1982
|
+
static _includes(origStr: string, funCfg: FunctionConfig): string;
|
|
1983
|
+
static _insert(origStr: string, funCfg: FunctionConfig): string;
|
|
1984
|
+
static _itemstoobj(origStr: string, funCfg: FunctionConfig): string;
|
|
1985
|
+
static _jsonparse(origStr: string, funCfg: FunctionConfig): string;
|
|
1986
|
+
static _length(origStr: string): string;
|
|
1987
|
+
static _matchall(origStr: string, funCfg: FunctionConfig): string;
|
|
1988
|
+
static _max(origStr: string, funCfg: FunctionConfig): string;
|
|
1989
|
+
static _min(origStr: string, funCfg: FunctionConfig): string;
|
|
1990
|
+
static _numeq(origStr: string, funCfg: FunctionConfig): "0" | "1";
|
|
1991
|
+
static _numge(origStr: string, funCfg: FunctionConfig): "0" | "1";
|
|
1992
|
+
static _numgt(origStr: string, funCfg: FunctionConfig): "0" | "1";
|
|
1993
|
+
static _numle(origStr: string, funCfg: FunctionConfig): "0" | "1";
|
|
1994
|
+
static _numlt(origStr: string, funCfg: FunctionConfig): "0" | "1";
|
|
1995
|
+
static _notincludes(origStr: string, funCfg: FunctionConfig): string;
|
|
1996
|
+
static _padEnd(origStr: string, funCfg: FunctionConfig): string;
|
|
1997
|
+
static _padStart(origStr: string, funCfg: FunctionConfig): string;
|
|
1998
|
+
static _parseFloat(origStr: string): string;
|
|
1999
|
+
static _parseInt(origStr: string): string;
|
|
2000
|
+
static _randomarryidxes(origStr: string, funCfg: FunctionConfig): string;
|
|
2001
|
+
static _replace(origStr: string, funCfg: FunctionConfig): string;
|
|
2002
|
+
static _round(origStr: string): string;
|
|
2003
|
+
static __sort(origArr: string[], valtype: string, sorttype: string): string[];
|
|
2004
|
+
static _setcomplement(origStr: string, funCfg: FunctionConfig): string;
|
|
2005
|
+
static _setdifference(origStr: string, funCfg: FunctionConfig): string;
|
|
2006
|
+
static _setintersection(origStr: string, funCfg: FunctionConfig): string;
|
|
2007
|
+
static _setunion(origStr: string, funCfg: FunctionConfig): string;
|
|
2008
|
+
static _shuffle(origStr: string, funCfg: FunctionConfig): string;
|
|
2009
|
+
static _slice(origStr: string, funCfg: FunctionConfig): string;
|
|
2010
|
+
static _sort(origStr: string, funCfg: FunctionConfig): string;
|
|
2011
|
+
static _split(origStr: string, funCfg: FunctionConfig): string;
|
|
2012
|
+
static _streq(origStr: string, funCfg: FunctionConfig): "0" | "1";
|
|
2013
|
+
static _substrAfter(origStr: string, funCfg: FunctionConfig): string;
|
|
2014
|
+
static _substrBefore(origStr: string, funCfg: FunctionConfig): string;
|
|
2015
|
+
static _toBool(origStr: string): string | false;
|
|
2016
|
+
static _toLowerCase(origStr: string): string;
|
|
2017
|
+
static _toNum(origStr: string): string;
|
|
2018
|
+
static _toUpperCase(origStr: string): string;
|
|
2019
|
+
static _trim(origStr: string): string;
|
|
2020
|
+
static _trimEnd(origStr: string): string;
|
|
2021
|
+
static _trimStart(origStr: string): string;
|
|
2022
|
+
static _uniq(origStr: string, funCfg: FunctionConfig): string;
|
|
2023
|
+
static _urlencode(origStr: string, funCfg: FunctionConfig): string;
|
|
2024
|
+
static _urldecode(origStr: string, funCfg: FunctionConfig): string;
|
|
2025
|
+
static _urldelparams(origStr: string, funCfg: FunctionConfig): string;
|
|
2026
|
+
static _urlget(origStr: string, funCfg: FunctionConfig): string;
|
|
2027
|
+
static __setUrlParam(url: URL, param: string, val: string, replaceFlag: boolean): boolean;
|
|
2028
|
+
static _urlsetparams(origStr: string, funCfg: FunctionConfig): string;
|
|
2029
|
+
static _funPerformers: Record<string, Function>;
|
|
2030
|
+
}
|
|
2031
|
+
declare function getTransformExpStr(origStr: string, funCfgs: any[], defaultVal?: string, myFunPerformers?: Record<string, string>): Promise<string>;
|
|
2032
|
+
|
|
2033
|
+
declare class TaskScraper extends EventEmitter {
|
|
2034
|
+
#private;
|
|
2035
|
+
_fontsConfig: FontsConfig | null;
|
|
2036
|
+
_templateId: TemplateId;
|
|
2037
|
+
_taskId: number;
|
|
2038
|
+
_taskContext: TaskContext;
|
|
2039
|
+
_paras: ParasObject;
|
|
2040
|
+
constructor(templateConfig: TemplateConfig, inParas: InParas, taskNetworkContext: TaskNetworkContext, taskOptions?: TaskOptions);
|
|
2041
|
+
_procErrname(errName: string, scenarioName: string): boolean;
|
|
2042
|
+
_getCfgStrAttr(cfg: XmlElementConfig, cfgAttrName: string, transTemplFlag?: boolean, mustFlag?: boolean): string;
|
|
2043
|
+
_getCfgBoolAttr(cfg: XmlElementConfig, cfgAttrName: string): boolean;
|
|
2044
|
+
_getCfgTextContext(cfg: XmlElementConfig, mustFlag?: boolean): string;
|
|
2045
|
+
_getCfgChildrenOf(parentCfg: XmlElementConfig, cfgType: string, mustFlag?: boolean): XmlElementConfig[];
|
|
2046
|
+
/** 涉及登录、打码、数据页面验证的操作步骤:
|
|
2047
|
+
* 0. 相关action和假设前提:
|
|
2048
|
+
* 0.1. 带login/captcha/pageMatchInfo属性的相关页面操作action:click/goto [/input/scroll/select]
|
|
2049
|
+
* 0.2. 涉及popup的假设前提:会依次触发 original page.popup -> LsdPage.pagePopup -> this.popup事件
|
|
2050
|
+
* 0.3. 其它假设前提:一个完整的页面操作最多弹出一个数据页面(即第2.3步最多执行一次)
|
|
2051
|
+
* 1. 页面操作前procBeforePageOperation:
|
|
2052
|
+
* 1.1. 设置scraperTask.actionCfg
|
|
2053
|
+
* 1.2. 如果涉及popup页面:setTimeout定时发送超时事件
|
|
2054
|
+
* 2. 页面操作和关联处理(scrollBy可能包含多次操作,如下操作均可能执行多次;其它情况只会执行一次)
|
|
2055
|
+
* 2.1. 执行具体页面操作
|
|
2056
|
+
* 2.2. 页面操作后procAfterPageOperation:监听TEEV_NEWPAGE_execId事件(涉及popup时,参见假设前提)
|
|
2057
|
+
* 2.3. 如果期间弹出了期望的数据页面(actCfg.popupsubtask):
|
|
2058
|
+
* 2.3.1. 替换当前数据页面(false):
|
|
2059
|
+
* 2.3.2. 保存popupPage(true):待后面执行同步子任务使用
|
|
2060
|
+
* 2.4. 当前页面处理processLoginAndCaptchaPage:登录、打码、当前页面是否为期望页面
|
|
2061
|
+
* 3. 清除scraperTask.actionCfg
|
|
2062
|
+
*/
|
|
2063
|
+
processNewPageLoginCaptcha(page: LsdPage, actCfg: ActionConfig, pageOperateFun: Function, pageOperateArgs: any[], actType?: string): Promise<boolean>;
|
|
2064
|
+
_performGetstatedataInMisc(miscCfg: XmlElementConfig): Promise<void>;
|
|
2065
|
+
run(): Promise<TaskResult>;
|
|
2066
|
+
__performMiscAction(actCfg: ActionConfig): Promise<boolean>;
|
|
2067
|
+
__processDbquery(actCfg: ActionConfig, defaultVal: string): Promise<string>;
|
|
2068
|
+
__processBrowserOcrCfg(ocrCfg: XmlElementConfig, $browserEles: LsdElement[]): Promise<string>;
|
|
2069
|
+
__decodeFontInContent(content: string, decodefontsvgCfg: XmlElementConfig | null, decodefontttfCfg: XmlElementConfig | null): Promise<string>;
|
|
2070
|
+
processLoginAndCaptchaPage(page: LsdPage, pageType: string): Promise<boolean>;
|
|
2071
|
+
__whenTaskEndFun(): Promise<boolean>;
|
|
2072
|
+
}
|
|
2073
|
+
|
|
736
2074
|
declare function performOneTask(templateId: number, parasStr: string, taskNetworkContext: TaskNetworkContext, taskType?: TaskType, xmlStr?: string, taskId?: number, useNickName?: boolean): Promise<TaskResult>;
|
|
737
2075
|
|
|
738
2076
|
declare function updateScraperConfig(config: ScraperConfig): Promise<boolean>;
|
|
739
2077
|
declare function scraper(newTasks?: TemplateTasks[], config?: ScraperConfig): Promise<boolean>;
|
|
740
2078
|
|
|
741
|
-
export { type AttrsInXml, type BrowserConfig, type ExecData, type ParsedTemplate, type ScraperConfig, TaskParser, TemplateManagerInScraper, type TemplatePara, type TemplateTasks, performOneTask, scraper, setScraperLogFun, updateScraperConfig };
|
|
2079
|
+
export { type ActionConfig, type ApiContextType, type AttrsInXml, type AuthInfo, type BrowserConfig, type CaptchaOptions, type DataFileFormat, type DataRecord, type DatatableColumnMap, type DomainId, type ElementSource, type ElementSourceExt, type ElementTagConfig, type ExecData, type ExtractScriptInData, type ExtractScriptResult, type FontsConfig, type FontttfConfig, type GetAddedTaskParasFun, type GetDataFromServerFunc, type GetExecedTaskParasFun, type GetOtherFunObj, GetPara, type GetQueuedCntWithParasCfgFun, type GetQueuedTaskParasFun, type GetSpecialFun, type GetSpecialFunObj, type GetStdFunScriptFun, type HttpHeaders, type InParas, type LoginOptions, type LoginWhen, LsdString, type MyFunInData, type MyFunResult, type PageMatchInfo, type ParsedTemplate, type ParsedTemplateExt, type PerformOneTask, type ScraperConfig, type ScraperStateData, type SimpleScheduler, type SpecialConvertFun, type SpecialConvertFunObj, type Subtask, type TaskData, type TaskMisc, type TaskNetworkContext, type TaskOptions, TaskParser, type TaskResult, TaskScraper, type TaskType, type TemplateConfig, type TemplateId, type TemplateInScraper, TemplateManagerInScraper, type TemplateManagerOptions, type TemplateManger, type TemplatePara, type TemplateTasks, type UserData, type XmlActionApiCfg, type XmlActionBreakCfg, type XmlActionCaptchaCfg, type XmlActionClickCfg, type XmlActionConfig, type XmlActionContinueCfg, type XmlActionExitCfg, type XmlActionExtractArrayCfg, type XmlActionExtractCfg, type XmlActionExtractScriptCfg, type XmlActionExtractTableCfg, type XmlActionGotoCfg, type XmlActionHoverCfg, type XmlActionIfelseCfg, type XmlActionInputCfg, type XmlActionInterceptClearCfg, type XmlActionInterceptSetCfg, type XmlActionLoopdowhileElementCfg, type XmlActionLoopdowhileTemplstrCfg, type XmlActionLoopforCfg, type XmlActionLoopinelesCfg, type XmlActionLoopinstrCfg, type XmlActionMiscCfg, type XmlActionScrollByCfg, type XmlActionScrollIntoviewCfg, type XmlActionScrollToCfg, type XmlActionSelectCfg, type XmlActionSetvarDbqueryCfg, type XmlActionSetvarElementCfg, type XmlActionSetvarFileCfg, type XmlActionSetvarGetCfg, type XmlActionSetvarOcrCfg, type XmlActionSetvarSubtaskCfg, type XmlActionSetvarTemplstrCfg, type XmlActionSubtaskCfg, type XmlActionWaitElementCfg, type XmlActionWaitNavigationCfg, type XmlActionWaitSleepCfg, type XmlAttrCfg, type XmlCaptchaAmazonCfg, type XmlCaptchaCoordinateCfg, type XmlCaptchaFuncaptchaCfg, type XmlCaptchaGeetestCfg, type XmlCaptchaKeycaptchaCfg, type XmlCaptchaMtcaptchaCfg, type XmlCaptchaRecaptchaCfg, type XmlCaptchaTextCfg, type XmlCaptchaTurnstileCfg, type XmlCheckResultCfg, type XmlColumnElementCfg, type XmlColumnOcrCfg, type XmlColumnPropertyCfg, type XmlColumnSubtaskCfg, type XmlColumnTemplstrCfg, type XmlCommentElementCfg, type XmlConditionElementCfg, type XmlConditionElseCfg, type XmlConditionTemplstrCfg, type XmlDbqueryCfg, type XmlDecodefontsvgCfg, type XmlDecodefontttfCfg, type XmlElecontentAttrCfg, type XmlElecontentImgbase64Cfg, type XmlElecontentInnerhtmlCfg, type XmlElecontentLengthCfg, type XmlElecontentOuterhtmlCfg, type XmlElecontentTextCfg, type XmlElementCfg, type XmlElementConfig, type XmlFileCfg, type XmlFontcharsCfg, type XmlFontcodesCfg, type XmlFontfamilyCfg, type XmlFontselectorCfg, type XmlFontsvgCfg, type XmlFontttfCfg, type XmlFunCGethtmlCfg, type XmlFunCGettextCfg, type XmlFunCHtmlCfg, type XmlFunCRemoveCfg, type XmlFunCTextCfg, type XmlFunCeilCfg, type XmlFunClosingsubstrCfg, type XmlFunCompressCfg, type XmlFunConcatCfg, type XmlFunConfig, type XmlFunConvertencodingCfg, type XmlFunDecodeCfg, type XmlFunDecompressCfg, type XmlFunFloorCfg, type XmlFunHashcodeCfg, type XmlFunIncludesCfg, type XmlFunInsertCfg, type XmlFunItemstoobjCfg, type XmlFunJsonparseCfg, type XmlFunLengthCfg, type XmlFunMatchallCfg, type XmlFunMaxCfg, type XmlFunMinCfg, type XmlFunMyfunCfg, type XmlFunNotincludesCfg, type XmlFunNumeqCfg, type XmlFunNumgeCfg, type XmlFunNumgtCfg, type XmlFunNumleCfg, type XmlFunNumltCfg, type XmlFunPadendCfg, type XmlFunPadstartCfg, type XmlFunParsefloatCfg, type XmlFunParseintCfg, type XmlFunRandomarryidxesCfg, type XmlFunReplaceCfg, type XmlFunRoundCfg, type XmlFunSetcomplementCfg, type XmlFunSetdifferenceCfg, type XmlFunSetintersectionCfg, type XmlFunSetunionCfg, type XmlFunShuffleCfg, type XmlFunSliceCfg, type XmlFunSortCfg, type XmlFunSpecialconvertCfg, type XmlFunSplitCfg, type XmlFunStreqCfg, type XmlFunSubstrafterCfg, type XmlFunSubstrbeforeCfg, type XmlFunToboolCfg, type XmlFunTolowercaseCfg, type XmlFunTonumCfg, type XmlFunTouppercaseCfg, type XmlFunTrimCfg, type XmlFunTrimendCfg, type XmlFunTrimstartCfg, type XmlFunUniqCfg, type XmlFunUrldecodeCfg, type XmlFunUrldelparamsCfg, type XmlFunUrlencodeCfg, type XmlFunUrlgetCfg, type XmlFunUrlsetparamsCfg, type XmlGetAddedtaskparasCfg, type XmlGetConfig, type XmlGetContentCfg, type XmlGetCookiesCfg, type XmlGetDatetimeCfg, type XmlGetExecedtaskparasCfg, type XmlGetFileCfg, type XmlGetHttpheadersCfg, type XmlGetMhtmlCfg, type XmlGetPdfCfg, type XmlGetQueuedcntwithparasCfg, type XmlGetQueuedtaskparasCfg, type XmlGetRandomCfg, type XmlGetResponseCfg, type XmlGetScreenshotCfg, type XmlGetSpecialCfg, type XmlGetTitleCfg, type XmlGetWindowCfg, type XmlHttpbrowserCfg, type XmlHttpdeviceCfg, type XmlHttpheaderCfg, type XmlHttposCfg, type XmlIframeCfg, type XmlImageElementCfg, type XmlInputElementCfg, type XmlMiscDelsyncdbdataCfg, type XmlMiscExtractdataCfg, type XmlMyfunCfg, type XmlOcrcfgApiCfg, type XmlOcrcfgTesseractCfg, type XmlParaCfg, type XmlRequestAbortCfg, type XmlRequestDataCfg, type XmlRequestHeaderCfg, type XmlResponseCacheCfg, type XmlResponseSaveCfg, type XmlResponseStatuscodeCfg, type XmlSubmitElementCfg, type XmlSubtaskCfg, type XmlTemplstrCfg, type XmlTransformCfg, getTransformExpStr, logdbg, logerr, loginfo, logwarn, performOneTask, scraper, setScraperLogFun, updateScraperConfig };
|