@letsscrapedata/scraper 0.0.76 → 0.0.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,12 @@
1
- import { BrowserControllerType, LsdBrowserType, LsdLaunchOptions, LsdConnectOptions, LsdPage, LsdApiContext, BrowserStateData } from '@letsscrapedata/controller';
1
+ import { BrowserStateData, LsdPage, LsdApiContext, BrowserControllerType, LsdBrowserType, LsdLaunchOptions, LsdConnectOptions, ResponseInterceptionItem, LsdElement, CheerioPage } from '@letsscrapedata/controller';
2
2
  import { Proxy } from '@letsscrapedata/proxy';
3
3
  import { LogFunction } from '@letsscrapedata/utils';
4
+ import EventEmitter from 'node:events';
4
5
 
6
+ interface XmlAttrCfg {
7
+ name: string;
8
+ value: string;
9
+ }
5
10
  interface XmlParaCfg {
6
11
  paraname: string;
7
12
  name: string;
@@ -15,6 +20,97 @@ interface XmlParaCfg {
15
20
  pattern: string;
16
21
  alert: string;
17
22
  }
23
+ interface XmlElementCfg {
24
+ loc: string;
25
+ source: string;
26
+ absolute: boolean;
27
+ }
28
+ interface XmlIframeCfg {
29
+ srcprefix: string;
30
+ loc: string;
31
+ }
32
+ interface XmlTemplstrCfg {
33
+ templ: string;
34
+ }
35
+ interface XmlElecontentAttrCfg {
36
+ type: string;
37
+ attrname: string;
38
+ attrname2: string;
39
+ attrname3: string;
40
+ attrname4: string;
41
+ trim: boolean;
42
+ pattern: string;
43
+ boolattr: boolean;
44
+ multieles: boolean;
45
+ join: string;
46
+ line: boolean;
47
+ absolute: boolean;
48
+ }
49
+ interface XmlElecontentImgbase64Cfg {
50
+ type: string;
51
+ }
52
+ interface XmlElecontentInnerhtmlCfg {
53
+ type: string;
54
+ }
55
+ interface XmlElecontentLengthCfg {
56
+ type: string;
57
+ }
58
+ interface XmlElecontentOuterhtmlCfg {
59
+ type: string;
60
+ }
61
+ interface XmlElecontentTextCfg {
62
+ type: string;
63
+ trim: boolean;
64
+ multieles: boolean;
65
+ join: string;
66
+ line: boolean;
67
+ top: boolean;
68
+ }
69
+ interface XmlDecodefontsvgCfg {
70
+ outtype: string;
71
+ }
72
+ interface XmlDecodefontttfCfg {
73
+ fflocs: string;
74
+ intype: string;
75
+ outtype: string;
76
+ }
77
+ interface XmlOcrcfgApiCfg {
78
+ type: string;
79
+ nlchars: string;
80
+ seperator: boolean;
81
+ lang: string;
82
+ }
83
+ interface XmlOcrcfgTesseractCfg {
84
+ type: string;
85
+ nlchars: string;
86
+ seperator: boolean;
87
+ lang: string;
88
+ }
89
+ interface XmlActionApiCfg {
90
+ type: string;
91
+ method: string;
92
+ url: string;
93
+ encodeuri: boolean;
94
+ session: boolean;
95
+ headerssource: string;
96
+ headers: string;
97
+ referer: string;
98
+ proxy: boolean;
99
+ timeout: string;
100
+ context: string;
101
+ data: string;
102
+ datatype: string;
103
+ form: boolean;
104
+ path: string;
105
+ diskcache: boolean;
106
+ cache: boolean;
107
+ varname: string;
108
+ errname: string;
109
+ valerrname: string;
110
+ pattern: string;
111
+ flags: string;
112
+ id: string;
113
+ }
18
114
  interface XmlActionBreakCfg {
19
115
  type: string;
20
116
  id: string;
@@ -57,6 +153,21 @@ interface XmlActionMiscCfg {
57
153
  type: string;
58
154
  id: string;
59
155
  }
156
+ interface XmlMiscDelsyncdbdataCfg {
157
+ type: string;
158
+ tids: string;
159
+ retention: string;
160
+ minretention: string;
161
+ }
162
+ interface XmlMiscExtractdataCfg {
163
+ type: string;
164
+ name: string;
165
+ method: string;
166
+ paras: boolean;
167
+ execdata: boolean;
168
+ subtasks: boolean;
169
+ credits: boolean;
170
+ }
60
171
  interface XmlActionExitCfg {
61
172
  type: string;
62
173
  errname: string;
@@ -67,6 +178,109 @@ interface XmlActionExtractCfg {
67
178
  tabname: string;
68
179
  id: string;
69
180
  }
181
+ interface XmlActionExtractArrayCfg {
182
+ type: string;
183
+ subtype: string;
184
+ list: string;
185
+ requrl: string;
186
+ subkeys: string;
187
+ keys: string;
188
+ tabname: string;
189
+ varname: string;
190
+ idxname: string;
191
+ maxloops: string;
192
+ errname: string;
193
+ id: string;
194
+ }
195
+ interface XmlActionExtractScriptCfg {
196
+ type: string;
197
+ subtype: string;
198
+ desc: string;
199
+ base64: boolean;
200
+ html: boolean;
201
+ isolated: boolean;
202
+ tabname: string;
203
+ maxloops: string;
204
+ errname: string;
205
+ id: string;
206
+ }
207
+ interface XmlActionExtractTableCfg {
208
+ type: string;
209
+ orientation: string;
210
+ check: boolean;
211
+ keyformat: string;
212
+ tabname: string;
213
+ maxloops: string;
214
+ errname: string;
215
+ id: string;
216
+ }
217
+ interface XmlColumnElementCfg {
218
+ type: string;
219
+ colname: string;
220
+ nickname: string;
221
+ completed: boolean;
222
+ setvar: boolean;
223
+ datatype: string;
224
+ errname: string;
225
+ valerrname: string;
226
+ pattern: string;
227
+ flags: string;
228
+ id: string;
229
+ discarded: boolean;
230
+ }
231
+ interface XmlColumnPropertyCfg {
232
+ type: string;
233
+ subkeys: string;
234
+ colname: string;
235
+ nickname: string;
236
+ completed: boolean;
237
+ setvar: boolean;
238
+ datatype: string;
239
+ valerrname: string;
240
+ pattern: string;
241
+ flags: string;
242
+ id: string;
243
+ discarded: boolean;
244
+ }
245
+ interface XmlColumnOcrCfg {
246
+ type: string;
247
+ colname: string;
248
+ nickname: string;
249
+ completed: boolean;
250
+ setvar: boolean;
251
+ datatype: string;
252
+ valerrname: string;
253
+ pattern: string;
254
+ flags: string;
255
+ id: string;
256
+ discarded: boolean;
257
+ }
258
+ interface XmlColumnSubtaskCfg {
259
+ type: string;
260
+ colname: string;
261
+ nickname: string;
262
+ completed: boolean;
263
+ setvar: boolean;
264
+ datatype: string;
265
+ valerrname: string;
266
+ pattern: string;
267
+ flags: string;
268
+ id: string;
269
+ discarded: boolean;
270
+ }
271
+ interface XmlColumnTemplstrCfg {
272
+ type: string;
273
+ colname: string;
274
+ nickname: string;
275
+ completed: boolean;
276
+ setvar: boolean;
277
+ datatype: string;
278
+ valerrname: string;
279
+ pattern: string;
280
+ flags: string;
281
+ id: string;
282
+ discarded: boolean;
283
+ }
70
284
  interface XmlActionGotoCfg {
71
285
  type: string;
72
286
  url: string;
@@ -102,6 +316,15 @@ interface XmlActionIfelseCfg {
102
316
  type: string;
103
317
  id: string;
104
318
  }
319
+ interface XmlConditionElementCfg {
320
+ type: string;
321
+ }
322
+ interface XmlConditionElseCfg {
323
+ type: string;
324
+ }
325
+ interface XmlConditionTemplstrCfg {
326
+ type: string;
327
+ }
105
328
  interface XmlActionInputCfg {
106
329
  type: string;
107
330
  content: string;
@@ -132,6 +355,61 @@ interface XmlActionInterceptSetCfg {
132
355
  type: string;
133
356
  subtype: string;
134
357
  }
358
+ interface XmlRequestAbortCfg {
359
+ type: string;
360
+ method: string;
361
+ url: string;
362
+ resourcetype: string;
363
+ postdata: string;
364
+ }
365
+ interface XmlResponseCacheCfg {
366
+ type: string;
367
+ method: string;
368
+ url: string;
369
+ resourcetype: string;
370
+ postdata: string;
371
+ minsize: string;
372
+ maxsize: string;
373
+ contenttype: string;
374
+ responses: boolean;
375
+ tabname: string;
376
+ requestheaders: boolean;
377
+ responseheaders: boolean;
378
+ }
379
+ interface XmlRequestDataCfg {
380
+ type: string;
381
+ subkeys: string;
382
+ value: string;
383
+ datatype: string;
384
+ }
385
+ interface XmlRequestHeaderCfg {
386
+ type: string;
387
+ name: string;
388
+ value: string;
389
+ }
390
+ interface XmlResponseSaveCfg {
391
+ type: string;
392
+ method: string;
393
+ url: string;
394
+ resourcetype: string;
395
+ postdata: string;
396
+ minsize: string;
397
+ maxsize: string;
398
+ basedir: string;
399
+ pathtype: string;
400
+ hashmethod: string;
401
+ hostnameparts: string;
402
+ extname: string;
403
+ validextnames: string;
404
+ params: string;
405
+ encoding: string;
406
+ tabname: string;
407
+ }
408
+ interface XmlResponseStatuscodeCfg {
409
+ type: string;
410
+ codes: string;
411
+ errname: string;
412
+ }
135
413
  interface XmlActionLoopdowhileElementCfg {
136
414
  type: string;
137
415
  subtype: string;
@@ -264,6 +542,13 @@ interface XmlActionSelectCfg {
264
542
  pv2: string;
265
543
  id: string;
266
544
  }
545
+ interface XmlDbqueryCfg {
546
+ dbname: string;
547
+ sql: string;
548
+ sqlparas: string;
549
+ parasplit: string;
550
+ join: string;
551
+ }
267
552
  interface XmlActionSetvarDbqueryCfg {
268
553
  type: string;
269
554
  subtype: string;
@@ -287,6 +572,10 @@ interface XmlActionSetvarElementCfg {
287
572
  path: string;
288
573
  id: string;
289
574
  }
575
+ interface XmlFileCfg {
576
+ path: string;
577
+ encoding: string;
578
+ }
290
579
  interface XmlActionSetvarFileCfg {
291
580
  type: string;
292
581
  subtype: string;
@@ -343,6 +632,14 @@ interface XmlActionSetvarTemplstrCfg {
343
632
  path: string;
344
633
  id: string;
345
634
  }
635
+ interface XmlSubtaskCfg {
636
+ tid: string;
637
+ parasstr: string;
638
+ idx: string;
639
+ minlen: string;
640
+ errname: string;
641
+ popuppage: boolean;
642
+ }
346
643
  interface XmlActionSubtaskCfg {
347
644
  type: string;
348
645
  subtasks: string;
@@ -383,6 +680,16 @@ interface XmlFontsvgCfg {
383
680
  csmaptype: string;
384
681
  bsfilter: string;
385
682
  }
683
+ interface XmlFontttfCfg {
684
+ exloc: string;
685
+ inloc: string;
686
+ minuc: string;
687
+ maxuc: string;
688
+ startidx: string;
689
+ fsfilter: string;
690
+ fufilter: string;
691
+ parsetype: string;
692
+ }
386
693
  interface XmlFontselectorCfg {
387
694
  name: string;
388
695
  fontfamily: string;
@@ -400,7 +707,621 @@ interface XmlFontcharsCfg {
400
707
  name: string;
401
708
  chars: string;
402
709
  }
403
- type XmlActionConfig = XmlActionBreakCfg | XmlActionCaptchaCfg | XmlActionClickCfg | XmlActionContinueCfg | XmlActionMiscCfg | XmlActionExitCfg | XmlActionExtractCfg | XmlActionGotoCfg | XmlActionHoverCfg | XmlActionIfelseCfg | XmlActionInputCfg | XmlActionInterceptClearCfg | XmlActionInterceptSetCfg | XmlActionLoopdowhileElementCfg | XmlActionLoopdowhileTemplstrCfg | XmlActionLoopforCfg | XmlActionLoopinelesCfg | XmlActionLoopinstrCfg | XmlActionScrollByCfg | XmlActionScrollIntoviewCfg | XmlActionScrollToCfg | XmlActionSelectCfg | XmlActionSetvarDbqueryCfg | XmlActionSetvarElementCfg | XmlActionSetvarFileCfg | XmlActionSetvarGetCfg | XmlActionSetvarOcrCfg | XmlActionSetvarSubtaskCfg | XmlActionSetvarTemplstrCfg | XmlActionSubtaskCfg | XmlActionWaitElementCfg | XmlActionWaitNavigationCfg | XmlActionWaitSleepCfg;
710
+ interface XmlMyfunCfg {
711
+ name: string;
712
+ desc: string;
713
+ base64: boolean;
714
+ script: string;
715
+ }
716
+ interface XmlMyfunCfg {
717
+ name: string;
718
+ arg2: string;
719
+ arg3: string;
720
+ arg4: string;
721
+ arg5: string;
722
+ funbody: string;
723
+ }
724
+ interface XmlCaptchaFuncaptchaCfg {
725
+ type: string;
726
+ }
727
+ interface XmlCaptchaGeetestCfg {
728
+ type: string;
729
+ }
730
+ interface XmlCaptchaKeycaptchaCfg {
731
+ type: string;
732
+ }
733
+ interface XmlCaptchaMtcaptchaCfg {
734
+ type: string;
735
+ }
736
+ interface XmlCaptchaRecaptchaCfg {
737
+ type: string;
738
+ minscore: string;
739
+ }
740
+ interface XmlCaptchaTurnstileCfg {
741
+ type: string;
742
+ }
743
+ interface XmlImageElementCfg {
744
+ type: string;
745
+ }
746
+ interface XmlCommentElementCfg {
747
+ type: string;
748
+ attr: string;
749
+ }
750
+ interface XmlInputElementCfg {
751
+ type: string;
752
+ enter: boolean;
753
+ }
754
+ interface XmlSubmitElementCfg {
755
+ type: string;
756
+ }
757
+ interface XmlCheckResultCfg {
758
+ type: string;
759
+ attr: string;
760
+ failedstr: string;
761
+ }
762
+ interface XmlCaptchaAmazonCfg {
763
+ type: string;
764
+ }
765
+ interface XmlCaptchaTextCfg {
766
+ type: string;
767
+ case: boolean;
768
+ lang: string;
769
+ math: boolean;
770
+ maxlen: string;
771
+ minlen: string;
772
+ question: boolean;
773
+ space: boolean;
774
+ texttype: string;
775
+ }
776
+ interface XmlCaptchaCoordinateCfg {
777
+ type: string;
778
+ lang: string;
779
+ maxclicks: string;
780
+ minclicks: string;
781
+ }
782
+ interface XmlTransformCfg {
783
+ defaultval: string;
784
+ usevar: boolean;
785
+ }
786
+ interface XmlFunCGethtmlCfg {
787
+ type: string;
788
+ subtype: string;
789
+ loc: string;
790
+ }
791
+ interface XmlFunCGettextCfg {
792
+ type: string;
793
+ subtype: string;
794
+ loc: string;
795
+ }
796
+ interface XmlFunCHtmlCfg {
797
+ type: string;
798
+ subtype: string;
799
+ loc: string;
800
+ content: string;
801
+ }
802
+ interface XmlFunCRemoveCfg {
803
+ type: string;
804
+ subtype: string;
805
+ loc: string;
806
+ }
807
+ interface XmlFunCTextCfg {
808
+ type: string;
809
+ subtype: string;
810
+ loc: string;
811
+ content: string;
812
+ }
813
+ interface XmlFunCeilCfg {
814
+ type: string;
815
+ }
816
+ interface XmlFunClosingsubstrCfg {
817
+ type: string;
818
+ openchar: string;
819
+ startstr: string;
820
+ position: string;
821
+ }
822
+ interface XmlFunCompressCfg {
823
+ type: string;
824
+ method: string;
825
+ flush: string;
826
+ finishflush: string;
827
+ chunksize: string;
828
+ windowbits: string;
829
+ level: string;
830
+ memlevel: string;
831
+ strategy: string;
832
+ sourceencoding: string;
833
+ targetencoding: string;
834
+ }
835
+ interface XmlFunConcatCfg {
836
+ type: string;
837
+ str1: string;
838
+ str2: string;
839
+ str3: string;
840
+ str4: string;
841
+ str5: string;
842
+ }
843
+ interface XmlFunConvertencodingCfg {
844
+ type: string;
845
+ sourceencoding: string;
846
+ targetencoding: string;
847
+ start: string;
848
+ end: string;
849
+ }
850
+ interface XmlFunDecompressCfg {
851
+ type: string;
852
+ method: string;
853
+ flush: string;
854
+ finishflush: string;
855
+ chunksize: string;
856
+ windowbits: string;
857
+ sourceencoding: string;
858
+ targetencoding: string;
859
+ start: string;
860
+ end: string;
861
+ }
862
+ interface XmlFunDecodeCfg {
863
+ type: string;
864
+ str1: string;
865
+ val1: string;
866
+ str2: string;
867
+ val2: string;
868
+ str3: string;
869
+ val3: string;
870
+ str4: string;
871
+ val4: string;
872
+ str5: string;
873
+ val5: string;
874
+ defaultval: string;
875
+ }
876
+ interface XmlFunFloorCfg {
877
+ type: string;
878
+ }
879
+ interface XmlFunHashcodeCfg {
880
+ type: string;
881
+ hashmethod: string;
882
+ }
883
+ interface XmlFunIncludesCfg {
884
+ type: string;
885
+ searchstr: string;
886
+ }
887
+ interface XmlFunInsertCfg {
888
+ type: string;
889
+ str: string;
890
+ }
891
+ interface XmlFunItemstoobjCfg {
892
+ type: string;
893
+ split: string;
894
+ kvsplit: string;
895
+ keytrim: boolean;
896
+ keynows: boolean;
897
+ valuetrim: boolean;
898
+ valuenows: boolean;
899
+ }
900
+ interface XmlFunJsonparseCfg {
901
+ type: string;
902
+ key1: string;
903
+ key2: string;
904
+ key3: string;
905
+ key4: string;
906
+ key5: string;
907
+ key6: string;
908
+ keys: boolean;
909
+ length: boolean;
910
+ srctype: string;
911
+ }
912
+ interface XmlFunLengthCfg {
913
+ type: string;
914
+ }
915
+ interface XmlFunMatchallCfg {
916
+ type: string;
917
+ pattern: string;
918
+ flags: string;
919
+ filter: string;
920
+ start: string;
921
+ end: string;
922
+ resulttype: string;
923
+ join: string;
924
+ }
925
+ interface XmlFunMaxCfg {
926
+ type: string;
927
+ split: string;
928
+ }
929
+ interface XmlFunMinCfg {
930
+ type: string;
931
+ split: string;
932
+ }
933
+ interface XmlFunNumeqCfg {
934
+ type: string;
935
+ num: string;
936
+ }
937
+ interface XmlFunNumgeCfg {
938
+ type: string;
939
+ num: string;
940
+ }
941
+ interface XmlFunNumgtCfg {
942
+ type: string;
943
+ num: string;
944
+ }
945
+ interface XmlFunNumleCfg {
946
+ type: string;
947
+ num: string;
948
+ }
949
+ interface XmlFunNumltCfg {
950
+ type: string;
951
+ num: string;
952
+ }
953
+ interface XmlFunNotincludesCfg {
954
+ type: string;
955
+ searchstr: string;
956
+ }
957
+ interface XmlFunPadendCfg {
958
+ type: string;
959
+ targetlen: string;
960
+ padstr: string;
961
+ }
962
+ interface XmlFunPadstartCfg {
963
+ type: string;
964
+ targetlen: string;
965
+ padstr: string;
966
+ }
967
+ interface XmlFunParsefloatCfg {
968
+ type: string;
969
+ }
970
+ interface XmlFunParseintCfg {
971
+ type: string;
972
+ }
973
+ interface XmlFunRandomarryidxesCfg {
974
+ type: string;
975
+ join: string;
976
+ }
977
+ interface XmlFunReplaceCfg {
978
+ type: string;
979
+ substr: string;
980
+ newstr: string;
981
+ regexp: boolean;
982
+ flags: string;
983
+ }
984
+ interface XmlFunRoundCfg {
985
+ type: string;
986
+ }
987
+ interface XmlFunSetcomplementCfg {
988
+ type: string;
989
+ array2: string;
990
+ split1: string;
991
+ split2: string;
992
+ sorttype: string;
993
+ valtype: string;
994
+ limit: string;
995
+ join: string;
996
+ }
997
+ interface XmlFunSetdifferenceCfg {
998
+ type: string;
999
+ array2: string;
1000
+ split1: string;
1001
+ split2: string;
1002
+ sorttype: string;
1003
+ valtype: string;
1004
+ limit: string;
1005
+ join: string;
1006
+ }
1007
+ interface XmlFunSetintersectionCfg {
1008
+ type: string;
1009
+ array2: string;
1010
+ split1: string;
1011
+ split2: string;
1012
+ sorttype: string;
1013
+ valtype: string;
1014
+ limit: string;
1015
+ join: string;
1016
+ }
1017
+ interface XmlFunSetunionCfg {
1018
+ type: string;
1019
+ array2: string;
1020
+ split1: string;
1021
+ split2: string;
1022
+ sorttype: string;
1023
+ valtype: string;
1024
+ limit: string;
1025
+ join: string;
1026
+ }
1027
+ interface XmlFunSliceCfg {
1028
+ type: string;
1029
+ beginidx: string;
1030
+ endidx: string;
1031
+ }
1032
+ interface XmlFunShuffleCfg {
1033
+ type: string;
1034
+ split: string;
1035
+ }
1036
+ interface XmlFunSortCfg {
1037
+ type: string;
1038
+ split: string;
1039
+ valtype: string;
1040
+ sorttype: string;
1041
+ join: string;
1042
+ }
1043
+ interface XmlFunSpecialconvertCfg {
1044
+ type: string;
1045
+ method: string;
1046
+ para1: string;
1047
+ para2: string;
1048
+ para3: string;
1049
+ para4: string;
1050
+ para5: string;
1051
+ }
1052
+ interface XmlFunSplitCfg {
1053
+ type: string;
1054
+ split: string;
1055
+ limit: string;
1056
+ filter: string;
1057
+ start: string;
1058
+ end: string;
1059
+ resulttype: string;
1060
+ val: string;
1061
+ insertstr: string;
1062
+ join: string;
1063
+ }
1064
+ interface XmlFunStreqCfg {
1065
+ type: string;
1066
+ str: string;
1067
+ }
1068
+ interface XmlFunSubstrafterCfg {
1069
+ type: string;
1070
+ substr: string;
1071
+ position: string;
1072
+ last: boolean;
1073
+ include: boolean;
1074
+ }
1075
+ interface XmlFunSubstrbeforeCfg {
1076
+ type: string;
1077
+ substr: string;
1078
+ position: string;
1079
+ last: boolean;
1080
+ include: boolean;
1081
+ }
1082
+ interface XmlFunToboolCfg {
1083
+ type: string;
1084
+ }
1085
+ interface XmlFunTolowercaseCfg {
1086
+ type: string;
1087
+ }
1088
+ interface XmlFunTonumCfg {
1089
+ type: string;
1090
+ }
1091
+ interface XmlFunTouppercaseCfg {
1092
+ type: string;
1093
+ }
1094
+ interface XmlFunTrimCfg {
1095
+ type: string;
1096
+ }
1097
+ interface XmlFunTrimendCfg {
1098
+ type: string;
1099
+ }
1100
+ interface XmlFunTrimstartCfg {
1101
+ type: string;
1102
+ }
1103
+ interface XmlFunUniqCfg {
1104
+ type: string;
1105
+ split: string;
1106
+ join: string;
1107
+ }
1108
+ interface XmlFunUrldelparamsCfg {
1109
+ type: string;
1110
+ param1: string;
1111
+ param2: string;
1112
+ param3: string;
1113
+ }
1114
+ interface XmlFunUrldecodeCfg {
1115
+ type: string;
1116
+ method: string;
1117
+ encoding: string;
1118
+ percent: boolean;
1119
+ }
1120
+ interface XmlFunUrlencodeCfg {
1121
+ type: string;
1122
+ method: string;
1123
+ encoding: string;
1124
+ percent: boolean;
1125
+ }
1126
+ interface XmlFunUrlgetCfg {
1127
+ type: string;
1128
+ attrorpath: string;
1129
+ hashmethod: string;
1130
+ hostnameparts: string;
1131
+ params: string;
1132
+ param: string;
1133
+ extname: string;
1134
+ validextnames: string;
1135
+ title: string;
1136
+ }
1137
+ interface XmlFunUrlsetparamsCfg {
1138
+ type: string;
1139
+ param1: string;
1140
+ val1: string;
1141
+ param2: string;
1142
+ val2: string;
1143
+ param3: string;
1144
+ val3: string;
1145
+ replace: boolean;
1146
+ }
1147
+ interface XmlFunMyfunCfg {
1148
+ type: string;
1149
+ name: string;
1150
+ arg1: string;
1151
+ arg2: string;
1152
+ arg3: string;
1153
+ arg4: string;
1154
+ arg5: string;
1155
+ }
1156
+ interface XmlGetAddedtaskparasCfg {
1157
+ type: string;
1158
+ tid: string;
1159
+ limit: string;
1160
+ join: string;
1161
+ interval: string;
1162
+ }
1163
+ interface XmlGetContentCfg {
1164
+ type: string;
1165
+ }
1166
+ interface XmlGetCookiesCfg {
1167
+ type: string;
1168
+ urls: string;
1169
+ domain: string;
1170
+ name: string;
1171
+ path: string;
1172
+ value: boolean;
1173
+ }
1174
+ interface XmlGetDatetimeCfg {
1175
+ type: string;
1176
+ format: string;
1177
+ }
1178
+ interface XmlGetExecedtaskparasCfg {
1179
+ type: string;
1180
+ tid: string;
1181
+ limit: string;
1182
+ join: string;
1183
+ interval: string;
1184
+ }
1185
+ interface XmlGetFileCfg {
1186
+ type: string;
1187
+ url: string;
1188
+ path: string;
1189
+ basedir: string;
1190
+ pathtype: string;
1191
+ hashmethod: string;
1192
+ hostnameparts: string;
1193
+ params: string;
1194
+ extname: string;
1195
+ validextnames: string;
1196
+ pathvarname: string;
1197
+ proxy: boolean;
1198
+ headers: string;
1199
+ referer: string;
1200
+ setvar: boolean;
1201
+ }
1202
+ interface XmlHttpbrowserCfg {
1203
+ name: string;
1204
+ minversion: string;
1205
+ maxversion: string;
1206
+ httpversion: string;
1207
+ }
1208
+ interface XmlHttpdeviceCfg {
1209
+ device: string;
1210
+ }
1211
+ interface XmlHttpheaderCfg {
1212
+ name: string;
1213
+ source: string;
1214
+ value: string;
1215
+ delete: boolean;
1216
+ }
1217
+ interface XmlHttposCfg {
1218
+ os: string;
1219
+ }
1220
+ interface XmlGetHttpheadersCfg {
1221
+ type: string;
1222
+ headermethod: string;
1223
+ origheaders: string;
1224
+ browserlist: string;
1225
+ httpversion: string;
1226
+ }
1227
+ interface XmlGetMhtmlCfg {
1228
+ type: string;
1229
+ path: string;
1230
+ basedir: string;
1231
+ pathtype: string;
1232
+ hashmethod: string;
1233
+ hostnameparts: string;
1234
+ pathvarname: string;
1235
+ }
1236
+ interface XmlGetPdfCfg {
1237
+ type: string;
1238
+ path: string;
1239
+ basedir: string;
1240
+ pathtype: string;
1241
+ hashmethod: string;
1242
+ hostnameparts: string;
1243
+ scale: string;
1244
+ displayheaderfooter: boolean;
1245
+ headertemplate: string;
1246
+ footertemplate: string;
1247
+ printbackground: boolean;
1248
+ landscape: boolean;
1249
+ pageranges: string;
1250
+ format: string;
1251
+ width: string;
1252
+ height: string;
1253
+ top: string;
1254
+ right: string;
1255
+ bottom: string;
1256
+ left: string;
1257
+ screen: boolean;
1258
+ onepage: boolean;
1259
+ hmargin: string;
1260
+ setvar: boolean;
1261
+ pathvarname: string;
1262
+ }
1263
+ interface XmlGetQueuedcntwithparasCfg {
1264
+ type: string;
1265
+ tid: string;
1266
+ parasstr: string;
1267
+ operator: string;
1268
+ }
1269
+ interface XmlGetQueuedtaskparasCfg {
1270
+ type: string;
1271
+ tid: string;
1272
+ limit: string;
1273
+ join: string;
1274
+ interval: string;
1275
+ }
1276
+ interface XmlGetRandomCfg {
1277
+ type: string;
1278
+ min: string;
1279
+ max: string;
1280
+ }
1281
+ interface XmlGetResponseCfg {
1282
+ type: string;
1283
+ requrl: string;
1284
+ idx: string;
1285
+ length: boolean;
1286
+ }
1287
+ interface XmlGetScreenshotCfg {
1288
+ type: string;
1289
+ basedir: string;
1290
+ path: string;
1291
+ extname: string;
1292
+ quality: string;
1293
+ fullpage: boolean;
1294
+ x: string;
1295
+ y: string;
1296
+ width: string;
1297
+ height: string;
1298
+ omitbackground: boolean;
1299
+ setvar: boolean;
1300
+ pathvarname: string;
1301
+ }
1302
+ interface XmlGetSpecialCfg {
1303
+ type: string;
1304
+ method: string;
1305
+ parasstr: string;
1306
+ }
1307
+ interface XmlGetTitleCfg {
1308
+ type: string;
1309
+ aspath: boolean;
1310
+ }
1311
+ interface XmlGetWindowCfg {
1312
+ type: string;
1313
+ key1: string;
1314
+ key2: string;
1315
+ key3: string;
1316
+ key4: string;
1317
+ key5: string;
1318
+ key6: string;
1319
+ keys: boolean;
1320
+ length: boolean;
1321
+ }
1322
+ type XmlActionConfig = XmlActionApiCfg | XmlActionBreakCfg | XmlActionCaptchaCfg | XmlActionClickCfg | XmlActionContinueCfg | XmlActionMiscCfg | XmlActionExitCfg | XmlActionExtractCfg | XmlActionExtractArrayCfg | XmlActionExtractScriptCfg | XmlActionExtractTableCfg | XmlActionGotoCfg | XmlActionHoverCfg | XmlActionIfelseCfg | XmlActionInputCfg | XmlActionInterceptClearCfg | XmlActionInterceptSetCfg | XmlActionLoopdowhileElementCfg | XmlActionLoopdowhileTemplstrCfg | XmlActionLoopforCfg | XmlActionLoopinelesCfg | XmlActionLoopinstrCfg | XmlActionScrollByCfg | XmlActionScrollIntoviewCfg | XmlActionScrollToCfg | XmlActionSelectCfg | XmlActionSetvarDbqueryCfg | XmlActionSetvarElementCfg | XmlActionSetvarFileCfg | XmlActionSetvarGetCfg | XmlActionSetvarOcrCfg | XmlActionSetvarSubtaskCfg | XmlActionSetvarTemplstrCfg | XmlActionSubtaskCfg | XmlActionWaitElementCfg | XmlActionWaitNavigationCfg | XmlActionWaitSleepCfg;
1323
+ type XmlFunConfig = XmlFunCGethtmlCfg | XmlFunCGettextCfg | XmlFunCHtmlCfg | XmlFunCRemoveCfg | XmlFunCTextCfg | XmlFunCeilCfg | XmlFunClosingsubstrCfg | XmlFunCompressCfg | XmlFunConcatCfg | XmlFunConvertencodingCfg | XmlFunDecompressCfg | XmlFunDecodeCfg | XmlFunFloorCfg | XmlFunHashcodeCfg | XmlFunIncludesCfg | XmlFunInsertCfg | XmlFunItemstoobjCfg | XmlFunJsonparseCfg | XmlFunLengthCfg | XmlFunMatchallCfg | XmlFunMaxCfg | XmlFunMinCfg | XmlFunNumeqCfg | XmlFunNumgeCfg | XmlFunNumgtCfg | XmlFunNumleCfg | XmlFunNumltCfg | XmlFunNotincludesCfg | XmlFunPadendCfg | XmlFunPadstartCfg | XmlFunParsefloatCfg | XmlFunParseintCfg | XmlFunRandomarryidxesCfg | XmlFunReplaceCfg | XmlFunRoundCfg | XmlFunSetcomplementCfg | XmlFunSetdifferenceCfg | XmlFunSetintersectionCfg | XmlFunSetunionCfg | XmlFunSliceCfg | XmlFunShuffleCfg | XmlFunSortCfg | XmlFunSpecialconvertCfg | XmlFunSplitCfg | XmlFunStreqCfg | XmlFunSubstrafterCfg | XmlFunSubstrbeforeCfg | XmlFunToboolCfg | XmlFunTolowercaseCfg | XmlFunTonumCfg | XmlFunTouppercaseCfg | XmlFunTrimCfg | XmlFunTrimendCfg | XmlFunTrimstartCfg | XmlFunUniqCfg | XmlFunUrldelparamsCfg | XmlFunUrldecodeCfg | XmlFunUrlencodeCfg | XmlFunUrlgetCfg | XmlFunUrlsetparamsCfg | XmlFunMyfunCfg;
1324
+ type XmlGetConfig = XmlGetAddedtaskparasCfg | XmlGetContentCfg | XmlGetCookiesCfg | XmlGetDatetimeCfg | XmlGetExecedtaskparasCfg | XmlGetFileCfg | XmlGetHttpheadersCfg | XmlGetMhtmlCfg | XmlGetPdfCfg | XmlGetQueuedcntwithparasCfg | XmlGetQueuedtaskparasCfg | XmlGetRandomCfg | XmlGetResponseCfg | XmlGetScreenshotCfg | XmlGetSpecialCfg | XmlGetTitleCfg | XmlGetWindowCfg;
404
1325
 
405
1326
  type TokenCaptchaType = "amazon" | "funcaptcha" | "geetest" | "keycaptcha" | "mtcaptcha" | "recaptcha" | "turnstile";
406
1327
  type RecognitionCaptchaType = "text" | "coordinate" | "grid" | "slider" | "rotation";
@@ -419,7 +1340,39 @@ interface ScraperStateData extends BrowserStateData {
419
1340
  */
420
1341
  userData: Record<string, string>;
421
1342
  }
1343
+ type LoginWhen = "current" | "popup";
422
1344
  type InParas = Record<string, string>;
1345
+ type XmlElementConfig = Record<string, string | boolean | any>;
1346
+ type ElementTagConfig = Record<string, string | boolean | any>;
1347
+ type ActionConfig = Record<string, string | boolean | any>;
1348
+ interface PageMatchInfo {
1349
+ /**
1350
+ * * eurl begins with http: urls = eurl.split(","); matchedFlag = href.startsWith(urls[0]) && urls.slice(1).every(item => href.includes(item));
1351
+ * * eurl doesn't begin with http: matchedFlag = href.match(new RegExp(eurl))
1352
+ * @default ""
1353
+ */
1354
+ eurl: string;
1355
+ /**
1356
+ * @default ""
1357
+ */
1358
+ eloc: string;
1359
+ /**
1360
+ * @default ""
1361
+ */
1362
+ pn1: string;
1363
+ /**
1364
+ * @default ""
1365
+ */
1366
+ pv1: string;
1367
+ /**
1368
+ * @default ""
1369
+ */
1370
+ pn2: string;
1371
+ /**
1372
+ * @default ""
1373
+ */
1374
+ pv2: string;
1375
+ }
423
1376
  interface FontttfConfig {
424
1377
  exloc: string;
425
1378
  inloc: string;
@@ -442,7 +1395,17 @@ interface FontsConfig {
442
1395
  fontsvgCfg?: XmlFontsvgCfg;
443
1396
  fontttfConfig?: FontttfConfig;
444
1397
  }
1398
+ interface CaptchaOptions {
1399
+ captchaTypes: CaptchaType[];
1400
+ pageMatchInfos?: PageMatchInfo[];
1401
+ }
1402
+ interface LoginOptions {
1403
+ loginWhens: LoginWhen[];
1404
+ pageMatchInfos: PageMatchInfo[];
1405
+ authInfo?: AuthInfo;
1406
+ }
445
1407
  type ElementSource = "browser" | "cheerio";
1408
+ type ElementSourceExt = ElementSource | "default";
446
1409
  interface TemplateInScraper {
447
1410
  templateId: TemplateId;
448
1411
  domainId: DomainId;
@@ -457,12 +1420,19 @@ interface TemplateInScraper {
457
1420
  configDetail: string;
458
1421
  capName?: string;
459
1422
  }
1423
+ interface TemplateConfig {
1424
+ template: TemplateInScraper;
1425
+ actionConfigs: XmlActionConfig[];
1426
+ fontsConfig: FontsConfig | null;
1427
+ myfunScripts: Record<string, string>;
1428
+ }
460
1429
  type AttrsInXml = Record<string, string>;
461
1430
  type DatatableColumnMap = Map<string, string>;
462
1431
  interface ParsedTemplate {
463
1432
  actionConfigs: XmlActionConfig[];
464
1433
  paraCfgs: XmlParaCfg[];
465
1434
  fontsConfig: FontsConfig | null;
1435
+ myfunScripts: Record<string, string>;
466
1436
  attrsInXml: AttrsInXml;
467
1437
  captchaTypes: CaptchaType[];
468
1438
  lastUsedTime: number;
@@ -471,6 +1441,8 @@ interface ParsedTemplate {
471
1441
  template?: TemplateInScraper;
472
1442
  }
473
1443
  type ParsedTemplateExt = Required<ParsedTemplate>;
1444
+ type AuthInfo = Record<string, string>;
1445
+ type UserData = Record<string, string>;
474
1446
  /**
475
1447
  * Network context used to execute the task
476
1448
  */
@@ -501,6 +1473,44 @@ interface TaskNetworkContext {
501
1473
  */
502
1474
  standaloneApiContext: LsdApiContext | null;
503
1475
  }
1476
+ interface TaskOptions {
1477
+ performOneTask?: PerformOneTask;
1478
+ /**
1479
+ * @default 0
1480
+ */
1481
+ taskId?: number;
1482
+ /**
1483
+ * @default []
1484
+ */
1485
+ skipActions?: string[];
1486
+ /**
1487
+ * @default []
1488
+ */
1489
+ extractColumns?: string[];
1490
+ /**
1491
+ * @default "1.0"
1492
+ */
1493
+ version?: string;
1494
+ /**
1495
+ * @default "::"
1496
+ */
1497
+ splitStr?: string;
1498
+ /**
1499
+ * @default template.defaultElementSource
1500
+ */
1501
+ defaultElementSource?: ElementSource;
1502
+ /**
1503
+ * @default { cookies: [], localStorage: [], headers: {}, userData: {} }
1504
+ */
1505
+ stateData?: ScraperStateData;
1506
+ /**
1507
+ * @default {}
1508
+ */
1509
+ authInfo?: AuthInfo;
1510
+ _mine?: boolean;
1511
+ _captchaOptions?: CaptchaOptions;
1512
+ _loginOptions?: LoginOptions;
1513
+ }
504
1514
  type DataRecord = Record<string, string>;
505
1515
  type ExecData = Record<string, DataRecord[]>;
506
1516
  interface Subtask {
@@ -535,6 +1545,22 @@ interface TaskResult {
535
1545
  misc?: TaskMisc;
536
1546
  }
537
1547
  type TaskType = "indAsync" | "indSync" | "memSync";
1548
+ type PerformOneTask = (templateId: TemplateId, parasStr: string, taskNetworContext: TaskNetworkContext, taskType?: TaskType, xmlStr?: string, taskId?: number, useNickName?: boolean) => Promise<TaskResult>;
1549
+ interface TemplateManagerOptions {
1550
+ /**
1551
+ * templateFilename: template${templateId}.xml
1552
+ * @default ./template/
1553
+ */
1554
+ templateDir?: string;
1555
+ /**
1556
+ * template: GET ${apiBasePath}/template?templateId=xxx&code=xxx
1557
+ * @default "https://web.letsscrapedata.com/api/nologin/"
1558
+ */
1559
+ apiBasePath?: string;
1560
+ }
1561
+ interface TemplateManger {
1562
+ getTemplateConfig(templatedId: number, code?: string): Promise<ParsedTemplate>;
1563
+ }
538
1564
  interface TemplateTasks {
539
1565
  tid: number;
540
1566
  parasstrs: string[];
@@ -592,6 +1618,11 @@ interface ScraperConfig {
592
1618
  * @default false
593
1619
  */
594
1620
  loadUnfinishedTasks?: boolean;
1621
+ /**
1622
+ * unit: minutes
1623
+ * @default 0
1624
+ */
1625
+ loadFailedTasksInterval?: number;
595
1626
  /**
596
1627
  * @default "", which will use current directory of process + "/data/"
597
1628
  * if not empty, baseDir must be an absolute path, and the directory must exist and have read and write permissions.
@@ -685,8 +1716,40 @@ interface ScraperConfig {
685
1716
  */
686
1717
  columnSeperator?: string;
687
1718
  }
1719
+ interface SimpleScheduler {
1720
+ }
1721
+ interface MyFunInData {
1722
+ origStr: string;
1723
+ arg1: string;
1724
+ arg2: string;
1725
+ arg3: string;
1726
+ arg4: string;
1727
+ arg5: string;
1728
+ }
1729
+ type MyFunResult = "string";
1730
+ interface ExtractScriptInData {
1731
+ vars: {
1732
+ inParas: InParas;
1733
+ userData: UserData;
1734
+ [key: string]: any;
1735
+ };
1736
+ responses: ResponseInterceptionItem[];
1737
+ tabName: string;
1738
+ maxLoops: number;
1739
+ errName: string;
1740
+ execData: ExecData;
1741
+ html?: string;
1742
+ }
1743
+ interface ExtractScriptResult {
1744
+ execData: ExecData;
1745
+ errName?: string;
1746
+ }
688
1747
 
689
1748
  declare function setScraperLogFun(logFun: LogFunction): boolean;
1749
+ declare function logdbg(...args: any[]): Promise<void>;
1750
+ declare function loginfo(...args: any[]): Promise<void>;
1751
+ declare function logwarn(...args: any[]): Promise<void>;
1752
+ declare function logerr(...args: any[]): Promise<void>;
690
1753
 
691
1754
  /** 修改node_modules/xml2js/lib/parser.js文件,添加如下内容(根据tagName自动添加type和subtype属性,如action_setvar_element添加 type="setvar" subtype="element"):
692
1755
  //////// start of LSD added by Joe ////////////////////////////////////////////////////////////////////////////////////////////
@@ -719,6 +1782,7 @@ declare class TaskParser {
719
1782
  static convertXmlToJson(xmlStr: string, defaultCfgFlag?: boolean): Promise<any>;
720
1783
  static getPartOfJsonCfg(jsonCfg: any, partName: string, optional?: boolean): any;
721
1784
  static getParaCfgsFromJsonCfg(jsonCfg: any): XmlParaCfg[];
1785
+ static getMyfunCfgsFromJsonCfg(jsonCfg: any): XmlMyfunCfg[];
722
1786
  static getAttrsInXml(jsonCfg: any): AttrsInXml;
723
1787
  static getCaptchTypes(jsonCfg: any): CaptchaType[];
724
1788
  static getDatableMapFromJsonCfg(jsonCfg: any): Map<string, DatatableColumnMap> | null;
@@ -733,9 +1797,283 @@ declare class TemplateManagerInScraper {
733
1797
  static clearTemplateConfig(templateId?: number): boolean;
734
1798
  }
735
1799
 
1800
+ declare class SysParas {
1801
+ #private;
1802
+ constructor(taskContext: any, templateId: number, taskId: number, version: string);
1803
+ get version(): string;
1804
+ get templateId(): number;
1805
+ get taskId(): number;
1806
+ get url(): any;
1807
+ get hasPopupPage(): "0" | "1";
1808
+ get subtaskNum(): string;
1809
+ get datatableNum(): string;
1810
+ get responses(): string;
1811
+ get currentDate(): string;
1812
+ get currentDateTime(): string;
1813
+ get currentIsoTime(): string;
1814
+ get currentLocalTime(): string;
1815
+ get currentUtcTime(): string;
1816
+ get currentTime(): string;
1817
+ get currentUnixMs(): string;
1818
+ get currentUnixSec(): string;
1819
+ get random10(): string;
1820
+ get random100(): string;
1821
+ get random1000(): string;
1822
+ get random10000(): string;
1823
+ get uuidv1(): string;
1824
+ get uuidv4(): string;
1825
+ }
1826
+
1827
+ interface ParasObject {
1828
+ inParas: InParas;
1829
+ authInfo: AuthInfo;
1830
+ sysParas: SysParas;
1831
+ userData: UserData;
1832
+ [key: string]: any;
1833
+ }
1834
+ type LoopElement = LsdElement | string | number;
1835
+ /**
1836
+ * Task Context used to execute the task:
1837
+ * * network context
1838
+ * * popup page
1839
+ * * stacks: loop/browser elements/cheerio elements
1840
+ * * state data
1841
+ * * task result
1842
+ */
1843
+ interface TaskContext {
1844
+ /**
1845
+ * page and all LsdApiContexts(if not null) share this proxy
1846
+ */
1847
+ proxy: Proxy | null;
1848
+ /**
1849
+ * page that is used to open web pages, null if task does not need open web pages
1850
+ */
1851
+ page: LsdPage | null;
1852
+ /**
1853
+ * browserContext's LsdApiContext that shares the state data between tasks, which use the pages in the same browserContext
1854
+ * * null if not browserContext
1855
+ */
1856
+ browserApiContext: LsdApiContext | null;
1857
+ /**
1858
+ * standalone LsdApiContext that shares the state data between tasks, which use this LsdApiContext
1859
+ */
1860
+ standaloneApiContext: LsdApiContext | null;
1861
+ /**
1862
+ * LsdApiContext that state data is binded to, which is equal to:
1863
+ * * browserApiContext if browserApiContext is not null
1864
+ * * standaloneApiContext if browserApiContext is null
1865
+ */
1866
+ stateApiContext: LsdApiContext | null;
1867
+ /**
1868
+ * LsdApiContext that is created when it is first used.
1869
+ */
1870
+ taskApiContext: LsdApiContext | null;
1871
+ /**
1872
+ * the last popup page of page, which is used to execute subtask:
1873
+ * * popupPage is always null if page is null
1874
+ */
1875
+ popupPage: LsdPage | null;
1876
+ /**
1877
+ * browser elements stack(elementSource is browser)
1878
+ */
1879
+ browserElesStack: LsdElement[];
1880
+ /**
1881
+ * CheeroPage that is used to extract data, which is created or refreshed using the page.content or paras variable("cheerioHtml" or "cheerioXml")
1882
+ */
1883
+ cheerioPage: CheerioPage | null;
1884
+ /**
1885
+ * cheerio elements stack(elementSource is cheerio)
1886
+ */
1887
+ cheerioElesStack: LsdElement[];
1888
+ /**
1889
+ * stack of all types of loops
1890
+ */
1891
+ loopsStack: LoopElement[];
1892
+ /**
1893
+ * whether action "continue" is launched and not processed
1894
+ */
1895
+ continueFlag: boolean;
1896
+ /**
1897
+ * whether action "break" is launched and not processed
1898
+ */
1899
+ breakFlag: boolean;
1900
+ origStateData: ScraperStateData;
1901
+ newStateData: ScraperStateData | null;
1902
+ execData: ExecData;
1903
+ subtasks: Subtask[];
1904
+ credits: number;
1905
+ subtaskResults: TaskResult[];
1906
+ }
1907
+
1908
+ type ApiContextType = "default" | "fetch" | "browser" | "standalone" | "state" | "task";
1909
+ type GetAddedTaskParasFun = (getCfg: XmlGetAddedtaskparasCfg) => Promise<string>;
1910
+ type GetExecedTaskParasFun = (getCfg: XmlGetExecedtaskparasCfg) => Promise<string>;
1911
+ type GetQueuedCntWithParasCfgFun = (getCfg: XmlGetQueuedcntwithparasCfg) => Promise<string>;
1912
+ type GetQueuedTaskParasFun = (getCfg: XmlGetQueuedtaskparasCfg) => Promise<string>;
1913
+ type GetDataFromServerFunc = (method: "get", requestUrl: string, data: any, options: any) => Promise<string>;
1914
+ interface GetOtherFunObj {
1915
+ getAddedTaskParasFun?: GetAddedTaskParasFun;
1916
+ getExecedTaskParasFun?: GetExecedTaskParasFun;
1917
+ getQueuedCntWithParasCfgFun?: GetQueuedCntWithParasCfgFun;
1918
+ getQueuedTaskParasFun?: GetQueuedTaskParasFun;
1919
+ getDataFromServerFun?: GetDataFromServerFunc;
1920
+ }
1921
+ type GetSpecialFun = (getCfg: XmlGetSpecialCfg, otherInfo: any) => Promise<string>;
1922
+ type GetSpecialFunObj = Record<string, GetSpecialFun>;
1923
+ declare class GetPara {
1924
+ static getSpecialFunObj: GetSpecialFunObj;
1925
+ static setGetSpecialFunObj(getSpecialFunObj: GetSpecialFunObj): boolean;
1926
+ static getOtherFunObj: GetOtherFunObj;
1927
+ static setGetOtherFunObj(getOtherFunObj: GetOtherFunObj): boolean;
1928
+ static noNeedToReturnContent: string;
1929
+ static getPara(getCfg: XmlGetConfig, otherInfo: any): Promise<string>;
1930
+ static getParaInElement(getCfg: XmlGetConfig, element: LsdElement, otherInfo: any): Promise<string>;
1931
+ static getParaInPage(getCfg: XmlGetConfig, page: LsdPage, otherInfo: any): Promise<string>;
1932
+ static getContent(page: LsdPage): Promise<string>;
1933
+ static getCookies(page: LsdPage, getCfg: XmlGetCookiesCfg): Promise<string>;
1934
+ static getDatetime(getCfg: XmlGetDatetimeCfg): string;
1935
+ static getFile(getCfg: XmlGetFileCfg, otherInfo: any): Promise<string>;
1936
+ static _genHeaderMethod: Record<string, any>;
1937
+ static hasSameOrigin(sourceUrl: string, targetUrl: string, sourceType?: string): boolean;
1938
+ static getHttpHeaders(getCfg: XmlGetHttpheadersCfg, otherInfo: any): Promise<string>;
1939
+ static getMhtml(page: LsdPage, getCfg: XmlGetMhtmlCfg, paras: ParasObject): Promise<string>;
1940
+ static _getScreenSize(origSize: string): string;
1941
+ static getPdf(page: LsdPage, getCfg: XmlGetPdfCfg, paras: ParasObject): Promise<string>;
1942
+ static getRandom(getCfg: XmlGetRandomCfg): string;
1943
+ static getResponse(getCfg: XmlGetResponseCfg, otherInfo: any): Promise<string>;
1944
+ static getScreenshot(pageOrElement: LsdPage | LsdElement, getCfg: XmlGetScreenshotCfg, paras: ParasObject): Promise<string>;
1945
+ static getSpecial(getCfg: XmlGetSpecialCfg, otherInfo: any): Promise<string>;
1946
+ static getTitle(page: LsdPage, getCfg: XmlGetTitleCfg): Promise<string>;
1947
+ static getWindow(page: LsdPage, getCfg: XmlGetWindowCfg): Promise<string>;
1948
+ }
1949
+
1950
+ type SpecialConvertFun = (origStr: string, funCfg: XmlFunSpecialconvertCfg) => string;
1951
+ type SpecialConvertFunObj = Record<string, SpecialConvertFun>;
1952
+ type FunctionConfig = any;
1953
+ type GetStdFunScriptFun = (name: string) => Promise<string>;
1954
+ declare class LsdString {
1955
+ #private;
1956
+ static specialConvertFunObj: SpecialConvertFunObj;
1957
+ static setSpecialConvertFunObj(specialConvertFunObj: SpecialConvertFunObj): boolean;
1958
+ static transformStr(origStr: string, funCfgs: FunctionConfig[], defaultVal: string, myfunScripts: Record<string, string>): Promise<string>;
1959
+ static setGetStdFunScript(fun: GetStdFunScriptFun): boolean;
1960
+ static __getElementBySelector($: any, loc: string, idx?: number): any;
1961
+ static __cheerioOptions: {
1962
+ xmlMode: boolean;
1963
+ decodeEntities: boolean;
1964
+ };
1965
+ static _c_attr(origStr: string, funCfg: FunctionConfig): any;
1966
+ static _c_data(origStr: string, funCfg: FunctionConfig): any;
1967
+ static _c_html(origStr: string, funCfg: FunctionConfig): any;
1968
+ static _c_length(origStr: string, funCfg: FunctionConfig): string;
1969
+ static _c_text(origStr: string, funCfg: FunctionConfig): any;
1970
+ static _ceil(origStr: string): string;
1971
+ static _closingsubstr(origStr: string, funCfg: FunctionConfig): string | -1;
1972
+ static __getCompressOptions(funCfg: FunctionConfig, compressFlag: boolean): {} | undefined;
1973
+ static _compress(origStr: string, funCfg: FunctionConfig): string;
1974
+ static _decompress(origStr: string, funCfg: FunctionConfig): string;
1975
+ static _concat(origStr: string, funCfg: FunctionConfig): string;
1976
+ static _specialconvert(origStr: string, funCfg: FunctionConfig): string;
1977
+ static _decode(origStr: string, funCfg: FunctionConfig): any;
1978
+ static __convertBufferToString(buffer: Buffer, encoding: BufferEncoding, startStr: string, endStr: string): string;
1979
+ static _convertencoding(origStr: string, funCfg: FunctionConfig): string;
1980
+ static _floor(origStr: string): string;
1981
+ static _hashcode(origStr: string, funCfg: FunctionConfig): string;
1982
+ static _includes(origStr: string, funCfg: FunctionConfig): string;
1983
+ static _insert(origStr: string, funCfg: FunctionConfig): string;
1984
+ static _itemstoobj(origStr: string, funCfg: FunctionConfig): string;
1985
+ static _jsonparse(origStr: string, funCfg: FunctionConfig): string;
1986
+ static _length(origStr: string): string;
1987
+ static _matchall(origStr: string, funCfg: FunctionConfig): string;
1988
+ static _max(origStr: string, funCfg: FunctionConfig): string;
1989
+ static _min(origStr: string, funCfg: FunctionConfig): string;
1990
+ static _numeq(origStr: string, funCfg: FunctionConfig): "0" | "1";
1991
+ static _numge(origStr: string, funCfg: FunctionConfig): "0" | "1";
1992
+ static _numgt(origStr: string, funCfg: FunctionConfig): "0" | "1";
1993
+ static _numle(origStr: string, funCfg: FunctionConfig): "0" | "1";
1994
+ static _numlt(origStr: string, funCfg: FunctionConfig): "0" | "1";
1995
+ static _notincludes(origStr: string, funCfg: FunctionConfig): string;
1996
+ static _padEnd(origStr: string, funCfg: FunctionConfig): string;
1997
+ static _padStart(origStr: string, funCfg: FunctionConfig): string;
1998
+ static _parseFloat(origStr: string): string;
1999
+ static _parseInt(origStr: string): string;
2000
+ static _randomarryidxes(origStr: string, funCfg: FunctionConfig): string;
2001
+ static _replace(origStr: string, funCfg: FunctionConfig): string;
2002
+ static _round(origStr: string): string;
2003
+ static __sort(origArr: string[], valtype: string, sorttype: string): string[];
2004
+ static _setcomplement(origStr: string, funCfg: FunctionConfig): string;
2005
+ static _setdifference(origStr: string, funCfg: FunctionConfig): string;
2006
+ static _setintersection(origStr: string, funCfg: FunctionConfig): string;
2007
+ static _setunion(origStr: string, funCfg: FunctionConfig): string;
2008
+ static _shuffle(origStr: string, funCfg: FunctionConfig): string;
2009
+ static _slice(origStr: string, funCfg: FunctionConfig): string;
2010
+ static _sort(origStr: string, funCfg: FunctionConfig): string;
2011
+ static _split(origStr: string, funCfg: FunctionConfig): string;
2012
+ static _streq(origStr: string, funCfg: FunctionConfig): "0" | "1";
2013
+ static _substrAfter(origStr: string, funCfg: FunctionConfig): string;
2014
+ static _substrBefore(origStr: string, funCfg: FunctionConfig): string;
2015
+ static _toBool(origStr: string): string | false;
2016
+ static _toLowerCase(origStr: string): string;
2017
+ static _toNum(origStr: string): string;
2018
+ static _toUpperCase(origStr: string): string;
2019
+ static _trim(origStr: string): string;
2020
+ static _trimEnd(origStr: string): string;
2021
+ static _trimStart(origStr: string): string;
2022
+ static _uniq(origStr: string, funCfg: FunctionConfig): string;
2023
+ static _urlencode(origStr: string, funCfg: FunctionConfig): string;
2024
+ static _urldecode(origStr: string, funCfg: FunctionConfig): string;
2025
+ static _urldelparams(origStr: string, funCfg: FunctionConfig): string;
2026
+ static _urlget(origStr: string, funCfg: FunctionConfig): string;
2027
+ static __setUrlParam(url: URL, param: string, val: string, replaceFlag: boolean): boolean;
2028
+ static _urlsetparams(origStr: string, funCfg: FunctionConfig): string;
2029
+ static _funPerformers: Record<string, Function>;
2030
+ }
2031
+ declare function getTransformExpStr(origStr: string, funCfgs: any[], defaultVal?: string, myFunPerformers?: Record<string, string>): Promise<string>;
2032
+
2033
+ declare class TaskScraper extends EventEmitter {
2034
+ #private;
2035
+ _fontsConfig: FontsConfig | null;
2036
+ _templateId: TemplateId;
2037
+ _taskId: number;
2038
+ _taskContext: TaskContext;
2039
+ _paras: ParasObject;
2040
+ constructor(templateConfig: TemplateConfig, inParas: InParas, taskNetworkContext: TaskNetworkContext, taskOptions?: TaskOptions);
2041
+ _procErrname(errName: string, scenarioName: string): boolean;
2042
+ _getCfgStrAttr(cfg: XmlElementConfig, cfgAttrName: string, transTemplFlag?: boolean, mustFlag?: boolean): string;
2043
+ _getCfgBoolAttr(cfg: XmlElementConfig, cfgAttrName: string): boolean;
2044
+ _getCfgTextContext(cfg: XmlElementConfig, mustFlag?: boolean): string;
2045
+ _getCfgChildrenOf(parentCfg: XmlElementConfig, cfgType: string, mustFlag?: boolean): XmlElementConfig[];
2046
+ /** 涉及登录、打码、数据页面验证的操作步骤:
2047
+ * 0. 相关action和假设前提:
2048
+ * 0.1. 带login/captcha/pageMatchInfo属性的相关页面操作action:click/goto [/input/scroll/select]
2049
+ * 0.2. 涉及popup的假设前提:会依次触发 original page.popup -> LsdPage.pagePopup -> this.popup事件
2050
+ * 0.3. 其它假设前提:一个完整的页面操作最多弹出一个数据页面(即第2.3步最多执行一次)
2051
+ * 1. 页面操作前procBeforePageOperation:
2052
+ * 1.1. 设置scraperTask.actionCfg
2053
+ * 1.2. 如果涉及popup页面:setTimeout定时发送超时事件
2054
+ * 2. 页面操作和关联处理(scrollBy可能包含多次操作,如下操作均可能执行多次;其它情况只会执行一次)
2055
+ * 2.1. 执行具体页面操作
2056
+ * 2.2. 页面操作后procAfterPageOperation:监听TEEV_NEWPAGE_execId事件(涉及popup时,参见假设前提)
2057
+ * 2.3. 如果期间弹出了期望的数据页面(actCfg.popupsubtask):
2058
+ * 2.3.1. 替换当前数据页面(false):
2059
+ * 2.3.2. 保存popupPage(true):待后面执行同步子任务使用
2060
+ * 2.4. 当前页面处理processLoginAndCaptchaPage:登录、打码、当前页面是否为期望页面
2061
+ * 3. 清除scraperTask.actionCfg
2062
+ */
2063
+ processNewPageLoginCaptcha(page: LsdPage, actCfg: ActionConfig, pageOperateFun: Function, pageOperateArgs: any[], actType?: string): Promise<boolean>;
2064
+ _performGetstatedataInMisc(miscCfg: XmlElementConfig): Promise<void>;
2065
+ run(): Promise<TaskResult>;
2066
+ __performMiscAction(actCfg: ActionConfig): Promise<boolean>;
2067
+ __processDbquery(actCfg: ActionConfig, defaultVal: string): Promise<string>;
2068
+ __processBrowserOcrCfg(ocrCfg: XmlElementConfig, $browserEles: LsdElement[]): Promise<string>;
2069
+ __decodeFontInContent(content: string, decodefontsvgCfg: XmlElementConfig | null, decodefontttfCfg: XmlElementConfig | null): Promise<string>;
2070
+ processLoginAndCaptchaPage(page: LsdPage, pageType: string): Promise<boolean>;
2071
+ __whenTaskEndFun(): Promise<boolean>;
2072
+ }
2073
+
736
2074
  declare function performOneTask(templateId: number, parasStr: string, taskNetworkContext: TaskNetworkContext, taskType?: TaskType, xmlStr?: string, taskId?: number, useNickName?: boolean): Promise<TaskResult>;
737
2075
 
738
2076
  declare function updateScraperConfig(config: ScraperConfig): Promise<boolean>;
739
2077
  declare function scraper(newTasks?: TemplateTasks[], config?: ScraperConfig): Promise<boolean>;
740
2078
 
741
- export { type AttrsInXml, type BrowserConfig, type ExecData, type ParsedTemplate, type ScraperConfig, TaskParser, TemplateManagerInScraper, type TemplatePara, type TemplateTasks, performOneTask, scraper, setScraperLogFun, updateScraperConfig };
2079
+ export { type ActionConfig, type ApiContextType, type AttrsInXml, type AuthInfo, type BrowserConfig, type CaptchaOptions, type DataFileFormat, type DataRecord, type DatatableColumnMap, type DomainId, type ElementSource, type ElementSourceExt, type ElementTagConfig, type ExecData, type ExtractScriptInData, type ExtractScriptResult, type FontsConfig, type FontttfConfig, type GetAddedTaskParasFun, type GetDataFromServerFunc, type GetExecedTaskParasFun, type GetOtherFunObj, GetPara, type GetQueuedCntWithParasCfgFun, type GetQueuedTaskParasFun, type GetSpecialFun, type GetSpecialFunObj, type GetStdFunScriptFun, type HttpHeaders, type InParas, type LoginOptions, type LoginWhen, LsdString, type MyFunInData, type MyFunResult, type PageMatchInfo, type ParsedTemplate, type ParsedTemplateExt, type PerformOneTask, type ScraperConfig, type ScraperStateData, type SimpleScheduler, type SpecialConvertFun, type SpecialConvertFunObj, type Subtask, type TaskData, type TaskMisc, type TaskNetworkContext, type TaskOptions, TaskParser, type TaskResult, TaskScraper, type TaskType, type TemplateConfig, type TemplateId, type TemplateInScraper, TemplateManagerInScraper, type TemplateManagerOptions, type TemplateManger, type TemplatePara, type TemplateTasks, type UserData, type XmlActionApiCfg, type XmlActionBreakCfg, type XmlActionCaptchaCfg, type XmlActionClickCfg, type XmlActionConfig, type XmlActionContinueCfg, type XmlActionExitCfg, type XmlActionExtractArrayCfg, type XmlActionExtractCfg, type XmlActionExtractScriptCfg, type XmlActionExtractTableCfg, type XmlActionGotoCfg, type XmlActionHoverCfg, type XmlActionIfelseCfg, type XmlActionInputCfg, type XmlActionInterceptClearCfg, type XmlActionInterceptSetCfg, type XmlActionLoopdowhileElementCfg, type XmlActionLoopdowhileTemplstrCfg, type XmlActionLoopforCfg, type XmlActionLoopinelesCfg, type XmlActionLoopinstrCfg, type XmlActionMiscCfg, type XmlActionScrollByCfg, type XmlActionScrollIntoviewCfg, type XmlActionScrollToCfg, type XmlActionSelectCfg, type XmlActionSetvarDbqueryCfg, type XmlActionSetvarElementCfg, type XmlActionSetvarFileCfg, type XmlActionSetvarGetCfg, type XmlActionSetvarOcrCfg, type XmlActionSetvarSubtaskCfg, type XmlActionSetvarTemplstrCfg, type XmlActionSubtaskCfg, type XmlActionWaitElementCfg, type XmlActionWaitNavigationCfg, type XmlActionWaitSleepCfg, type XmlAttrCfg, type XmlCaptchaAmazonCfg, type XmlCaptchaCoordinateCfg, type XmlCaptchaFuncaptchaCfg, type XmlCaptchaGeetestCfg, type XmlCaptchaKeycaptchaCfg, type XmlCaptchaMtcaptchaCfg, type XmlCaptchaRecaptchaCfg, type XmlCaptchaTextCfg, type XmlCaptchaTurnstileCfg, type XmlCheckResultCfg, type XmlColumnElementCfg, type XmlColumnOcrCfg, type XmlColumnPropertyCfg, type XmlColumnSubtaskCfg, type XmlColumnTemplstrCfg, type XmlCommentElementCfg, type XmlConditionElementCfg, type XmlConditionElseCfg, type XmlConditionTemplstrCfg, type XmlDbqueryCfg, type XmlDecodefontsvgCfg, type XmlDecodefontttfCfg, type XmlElecontentAttrCfg, type XmlElecontentImgbase64Cfg, type XmlElecontentInnerhtmlCfg, type XmlElecontentLengthCfg, type XmlElecontentOuterhtmlCfg, type XmlElecontentTextCfg, type XmlElementCfg, type XmlElementConfig, type XmlFileCfg, type XmlFontcharsCfg, type XmlFontcodesCfg, type XmlFontfamilyCfg, type XmlFontselectorCfg, type XmlFontsvgCfg, type XmlFontttfCfg, type XmlFunCGethtmlCfg, type XmlFunCGettextCfg, type XmlFunCHtmlCfg, type XmlFunCRemoveCfg, type XmlFunCTextCfg, type XmlFunCeilCfg, type XmlFunClosingsubstrCfg, type XmlFunCompressCfg, type XmlFunConcatCfg, type XmlFunConfig, type XmlFunConvertencodingCfg, type XmlFunDecodeCfg, type XmlFunDecompressCfg, type XmlFunFloorCfg, type XmlFunHashcodeCfg, type XmlFunIncludesCfg, type XmlFunInsertCfg, type XmlFunItemstoobjCfg, type XmlFunJsonparseCfg, type XmlFunLengthCfg, type XmlFunMatchallCfg, type XmlFunMaxCfg, type XmlFunMinCfg, type XmlFunMyfunCfg, type XmlFunNotincludesCfg, type XmlFunNumeqCfg, type XmlFunNumgeCfg, type XmlFunNumgtCfg, type XmlFunNumleCfg, type XmlFunNumltCfg, type XmlFunPadendCfg, type XmlFunPadstartCfg, type XmlFunParsefloatCfg, type XmlFunParseintCfg, type XmlFunRandomarryidxesCfg, type XmlFunReplaceCfg, type XmlFunRoundCfg, type XmlFunSetcomplementCfg, type XmlFunSetdifferenceCfg, type XmlFunSetintersectionCfg, type XmlFunSetunionCfg, type XmlFunShuffleCfg, type XmlFunSliceCfg, type XmlFunSortCfg, type XmlFunSpecialconvertCfg, type XmlFunSplitCfg, type XmlFunStreqCfg, type XmlFunSubstrafterCfg, type XmlFunSubstrbeforeCfg, type XmlFunToboolCfg, type XmlFunTolowercaseCfg, type XmlFunTonumCfg, type XmlFunTouppercaseCfg, type XmlFunTrimCfg, type XmlFunTrimendCfg, type XmlFunTrimstartCfg, type XmlFunUniqCfg, type XmlFunUrldecodeCfg, type XmlFunUrldelparamsCfg, type XmlFunUrlencodeCfg, type XmlFunUrlgetCfg, type XmlFunUrlsetparamsCfg, type XmlGetAddedtaskparasCfg, type XmlGetConfig, type XmlGetContentCfg, type XmlGetCookiesCfg, type XmlGetDatetimeCfg, type XmlGetExecedtaskparasCfg, type XmlGetFileCfg, type XmlGetHttpheadersCfg, type XmlGetMhtmlCfg, type XmlGetPdfCfg, type XmlGetQueuedcntwithparasCfg, type XmlGetQueuedtaskparasCfg, type XmlGetRandomCfg, type XmlGetResponseCfg, type XmlGetScreenshotCfg, type XmlGetSpecialCfg, type XmlGetTitleCfg, type XmlGetWindowCfg, type XmlHttpbrowserCfg, type XmlHttpdeviceCfg, type XmlHttpheaderCfg, type XmlHttposCfg, type XmlIframeCfg, type XmlImageElementCfg, type XmlInputElementCfg, type XmlMiscDelsyncdbdataCfg, type XmlMiscExtractdataCfg, type XmlMyfunCfg, type XmlOcrcfgApiCfg, type XmlOcrcfgTesseractCfg, type XmlParaCfg, type XmlRequestAbortCfg, type XmlRequestDataCfg, type XmlRequestHeaderCfg, type XmlResponseCacheCfg, type XmlResponseSaveCfg, type XmlResponseStatuscodeCfg, type XmlSubmitElementCfg, type XmlSubtaskCfg, type XmlTemplstrCfg, type XmlTransformCfg, getTransformExpStr, logdbg, logerr, loginfo, logwarn, performOneTask, scraper, setScraperLogFun, updateScraperConfig };