pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. pyxllib/algo/geo.py +12 -0
  2. pyxllib/algo/intervals.py +1 -1
  3. pyxllib/algo/matcher.py +78 -0
  4. pyxllib/algo/pupil.py +187 -19
  5. pyxllib/algo/specialist.py +2 -1
  6. pyxllib/algo/stat.py +38 -2
  7. {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
  8. pyxllib/autogui/activewin.py +246 -0
  9. pyxllib/autogui/all.py +9 -0
  10. pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
  11. pyxllib/autogui/uiautolib.py +362 -0
  12. pyxllib/autogui/wechat.py +827 -0
  13. pyxllib/autogui/wechat_msg.py +421 -0
  14. pyxllib/autogui/wxautolib.py +84 -0
  15. pyxllib/cv/slidercaptcha.py +137 -0
  16. pyxllib/data/echarts.py +123 -12
  17. pyxllib/data/jsonlib.py +89 -0
  18. pyxllib/data/pglib.py +514 -30
  19. pyxllib/data/sqlite.py +231 -4
  20. pyxllib/ext/JLineViewer.py +14 -1
  21. pyxllib/ext/drissionlib.py +277 -0
  22. pyxllib/ext/kq5034lib.py +0 -1594
  23. pyxllib/ext/robustprocfile.py +497 -0
  24. pyxllib/ext/unixlib.py +6 -5
  25. pyxllib/ext/utools.py +108 -95
  26. pyxllib/ext/webhook.py +32 -14
  27. pyxllib/ext/wjxlib.py +88 -0
  28. pyxllib/ext/wpsapi.py +124 -0
  29. pyxllib/ext/xlwork.py +9 -0
  30. pyxllib/ext/yuquelib.py +1003 -71
  31. pyxllib/file/docxlib.py +1 -1
  32. pyxllib/file/libreoffice.py +165 -0
  33. pyxllib/file/movielib.py +9 -0
  34. pyxllib/file/packlib/__init__.py +112 -75
  35. pyxllib/file/pdflib.py +1 -1
  36. pyxllib/file/pupil.py +1 -1
  37. pyxllib/file/specialist/dirlib.py +1 -1
  38. pyxllib/file/specialist/download.py +10 -3
  39. pyxllib/file/specialist/filelib.py +266 -55
  40. pyxllib/file/xlsxlib.py +205 -50
  41. pyxllib/file/xlsyncfile.py +341 -0
  42. pyxllib/prog/cachetools.py +64 -0
  43. pyxllib/prog/filelock.py +42 -0
  44. pyxllib/prog/multiprogs.py +940 -0
  45. pyxllib/prog/newbie.py +9 -2
  46. pyxllib/prog/pupil.py +129 -60
  47. pyxllib/prog/specialist/__init__.py +176 -2
  48. pyxllib/prog/specialist/bc.py +5 -2
  49. pyxllib/prog/specialist/browser.py +11 -2
  50. pyxllib/prog/specialist/datetime.py +68 -0
  51. pyxllib/prog/specialist/tictoc.py +12 -13
  52. pyxllib/prog/specialist/xllog.py +5 -5
  53. pyxllib/prog/xlosenv.py +7 -0
  54. pyxllib/text/airscript.js +744 -0
  55. pyxllib/text/charclasslib.py +17 -5
  56. pyxllib/text/jiebalib.py +6 -3
  57. pyxllib/text/jinjalib.py +32 -0
  58. pyxllib/text/jsa_ai_prompt.md +271 -0
  59. pyxllib/text/jscode.py +159 -4
  60. pyxllib/text/nestenv.py +1 -1
  61. pyxllib/text/newbie.py +12 -0
  62. pyxllib/text/pupil/common.py +26 -0
  63. pyxllib/text/specialist/ptag.py +2 -2
  64. pyxllib/text/templates/echart_base.html +11 -0
  65. pyxllib/text/templates/highlight_code.html +17 -0
  66. pyxllib/text/templates/latex_editor.html +103 -0
  67. pyxllib/text/xmllib.py +76 -14
  68. pyxllib/xl.py +2 -1
  69. pyxllib-0.3.197.dist-info/METADATA +48 -0
  70. pyxllib-0.3.197.dist-info/RECORD +126 -0
  71. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
  72. pyxllib/ext/autogui/__init__.py +0 -8
  73. pyxllib-0.3.96.dist-info/METADATA +0 -51
  74. pyxllib-0.3.96.dist-info/RECORD +0 -333
  75. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  76. pyxlpr/ai/__init__.py +0 -5
  77. pyxlpr/ai/clientlib.py +0 -1281
  78. pyxlpr/ai/specialist.py +0 -286
  79. pyxlpr/ai/torch_app.py +0 -172
  80. pyxlpr/ai/xlpaddle.py +0 -655
  81. pyxlpr/ai/xltorch.py +0 -705
  82. pyxlpr/data/__init__.py +0 -11
  83. pyxlpr/data/coco.py +0 -1325
  84. pyxlpr/data/datacls.py +0 -365
  85. pyxlpr/data/datasets.py +0 -200
  86. pyxlpr/data/gptlib.py +0 -1291
  87. pyxlpr/data/icdar/__init__.py +0 -96
  88. pyxlpr/data/icdar/deteval.py +0 -377
  89. pyxlpr/data/icdar/icdar2013.py +0 -341
  90. pyxlpr/data/icdar/iou.py +0 -340
  91. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  92. pyxlpr/data/imtextline.py +0 -473
  93. pyxlpr/data/labelme.py +0 -866
  94. pyxlpr/data/removeline.py +0 -179
  95. pyxlpr/data/specialist.py +0 -57
  96. pyxlpr/eval/__init__.py +0 -85
  97. pyxlpr/paddleocr.py +0 -776
  98. pyxlpr/ppocr/__init__.py +0 -15
  99. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  100. pyxlpr/ppocr/data/__init__.py +0 -135
  101. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  102. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  103. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  104. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  105. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  106. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  107. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  108. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  109. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  110. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  111. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  112. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  113. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  114. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  115. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  116. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  117. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  118. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  119. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  120. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  121. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  122. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  123. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  124. pyxlpr/ppocr/losses/__init__.py +0 -61
  125. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  126. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  127. pyxlpr/ppocr/losses/center_loss.py +0 -88
  128. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  129. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  130. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  131. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  132. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  133. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  134. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  135. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  136. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  137. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  138. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  139. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  140. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  141. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  142. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  143. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  144. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  145. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  146. pyxlpr/ppocr/metrics/__init__.py +0 -44
  147. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  148. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  149. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  150. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  151. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  152. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  153. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  154. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  155. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  156. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  157. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  158. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  159. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  160. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  161. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  162. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  163. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  164. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  165. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  166. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  167. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  168. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  169. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  170. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  171. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  172. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  173. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  174. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  175. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  176. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  177. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  178. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  179. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  180. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  181. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  182. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  183. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  184. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  185. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  186. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  187. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  188. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  189. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  190. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  191. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  192. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  193. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  194. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  195. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  196. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  197. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  198. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  199. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  200. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  201. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  202. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  203. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  204. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  205. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  206. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  207. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  208. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  209. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  210. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  211. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  212. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  213. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  214. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  215. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  216. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  217. pyxlpr/ppocr/tools/__init__.py +0 -14
  218. pyxlpr/ppocr/tools/eval.py +0 -83
  219. pyxlpr/ppocr/tools/export_center.py +0 -77
  220. pyxlpr/ppocr/tools/export_model.py +0 -129
  221. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  222. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  223. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  224. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  225. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  226. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  227. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  228. pyxlpr/ppocr/tools/infer_det.py +0 -134
  229. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  230. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  231. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  232. pyxlpr/ppocr/tools/infer_table.py +0 -107
  233. pyxlpr/ppocr/tools/program.py +0 -596
  234. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  235. pyxlpr/ppocr/tools/train.py +0 -163
  236. pyxlpr/ppocr/tools/xlprog.py +0 -748
  237. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  238. pyxlpr/ppocr/utils/__init__.py +0 -24
  239. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  240. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  241. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  242. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  243. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  244. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  245. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  246. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  247. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  248. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  249. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  250. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  251. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  252. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  253. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  254. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  255. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  256. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  257. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  258. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  259. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  260. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  261. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  262. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  263. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  264. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  265. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  266. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  267. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  268. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  269. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  270. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  271. pyxlpr/ppocr/utils/dict90.txt +0 -90
  272. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  273. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  274. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  275. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  276. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  277. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  278. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  279. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  280. pyxlpr/ppocr/utils/gen_label.py +0 -81
  281. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  282. pyxlpr/ppocr/utils/iou.py +0 -54
  283. pyxlpr/ppocr/utils/logging.py +0 -69
  284. pyxlpr/ppocr/utils/network.py +0 -84
  285. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  286. pyxlpr/ppocr/utils/profiler.py +0 -110
  287. pyxlpr/ppocr/utils/save_load.py +0 -150
  288. pyxlpr/ppocr/utils/stats.py +0 -72
  289. pyxlpr/ppocr/utils/utility.py +0 -80
  290. pyxlpr/ppstructure/__init__.py +0 -13
  291. pyxlpr/ppstructure/predict_system.py +0 -187
  292. pyxlpr/ppstructure/table/__init__.py +0 -13
  293. pyxlpr/ppstructure/table/eval_table.py +0 -72
  294. pyxlpr/ppstructure/table/matcher.py +0 -192
  295. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  296. pyxlpr/ppstructure/table/predict_table.py +0 -221
  297. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  298. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  299. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  300. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  301. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  302. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  303. pyxlpr/ppstructure/utility.py +0 -71
  304. pyxlpr/xlai.py +0 -10
  305. /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
  306. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxllib/data/sqlite.py CHANGED
@@ -4,16 +4,189 @@
4
4
  # @Email : 877362867@qq.com
5
5
  # @Date : 2022/04/12 08:59
6
6
 
7
+ import copy
7
8
  import json
8
9
  import re
9
10
  import sqlite3
11
+ import warnings
12
+ from collections import defaultdict
10
13
 
11
14
  import pandas as pd
12
15
 
16
+ # 旧版的pandas警告
17
+ warnings.filterwarnings('ignore', message="pandas only support SQLAlchemy connectable")
18
+ # 新版的pandas警告多了个's‘
19
+ warnings.filterwarnings('ignore', message="pandas only supports SQLAlchemy connectable")
20
+
21
+
22
+ class SqlBuilder:
23
+ def __init__(self, table=''):
24
+ self.table = table
25
+ self._select = []
26
+ self._set = []
27
+ self._where = []
28
+ self._order_by = []
29
+ self._group_by = []
30
+ self._limit = None # 限制最多读多少条数据
31
+ self._offset = None # 从第几条数据开始读
32
+
33
+ def copy(self):
34
+ # 拷贝一个当前状态的副本sql
35
+ return copy.deepcopy(self)
36
+
37
+ def __1_组件(self):
38
+ pass
39
+
40
+ def from_table(self, table):
41
+ self.table = table
42
+ return self
43
+
44
+ def select(self, *columns):
45
+ self._select.extend(columns)
46
+ return self
47
+
48
+ def set(self, *columns):
49
+ self._set.extend(columns)
50
+ return self
51
+
52
+ def where(self, condition):
53
+ if isinstance(condition, (list, tuple)):
54
+ self._where.extend(condition)
55
+ elif isinstance(condition, str):
56
+ self._where.append(condition)
57
+ else:
58
+ raise ValueError(f'不支持的where条件类型{type(condition)}')
59
+
60
+ return self
61
+
62
+ def where_dict_match(self, items):
63
+ """ 输入一个字典,要求每个key的字段对应上具体的某个取值value """
64
+ for k, v in items.items():
65
+ if isinstance(v, str):
66
+ self._where.append(f"{k} = '{v}'")
67
+ elif isinstance(v, (int, float)):
68
+ self._where.append(f"{k} = {v}")
69
+ else:
70
+ raise TypeError
71
+
72
+ def where_in(self, column, values):
73
+ if values is None:
74
+ return self
75
+
76
+ if isinstance(values, str):
77
+ values = [values]
78
+ values_str = ', '.join(f"'{str(value)}'" for value in values)
79
+ if len(values) == 1:
80
+ self._where.append(f"{column} = {values_str[0]}")
81
+ else:
82
+ self._where.append(f"{column} IN ({values_str})")
83
+ return self
84
+
85
+ def where_or(self, *conditions):
86
+ """ 输入的这一批条件,作为OR组合后成为一个整体条件
87
+ """
88
+ self._where.append(f"({' OR '.join(conditions)})")
89
+ return self
90
+
91
+ def where_mod(self, column, divisor, remainder):
92
+ """ 输入的column列的值对divisor取余,筛选出余数为remainder的记录
93
+ """
94
+ condition = f"({column} % {divisor} = {remainder})"
95
+ self._where.append(condition)
96
+ return self
97
+
98
+ def where_mod2(self, desc):
99
+ """ 使用一种特殊的格式化标记来设置规则
100
+
101
+ :param desc: 例如 'id%2=1'
102
+
103
+ todo 我好像傻了,画蛇添足,这个功能好像直接用 .where 就能完成~
104
+ """
105
+ if not desc:
106
+ return
107
+ column, divisor_remainder = desc.split('%')
108
+ divisor, remainder = map(int, divisor_remainder.split('='))
109
+ return self.where_mod(column, divisor, remainder)
110
+
111
+ def group_by(self, *columns):
112
+ self._group_by.extend(columns)
113
+ return self
114
+
115
+ def order_by(self, *columns):
116
+ self._order_by.extend(columns)
117
+ return self
118
+
119
+ def limit(self, limit, offset=None):
120
+ self._limit = limit
121
+ self._offset = offset
122
+ return self
123
+
124
+ def __2_build_初级命令(self):
125
+ pass
126
+
127
+ def build_select(self, *columns):
128
+ if columns:
129
+ columns = self._select + list(columns)
130
+ else:
131
+ columns = self._select
132
+
133
+ sql = [f"SELECT {', '.join(columns) or '*'}", f"FROM {self.table}"]
134
+ if self._where:
135
+ sql.append(f"WHERE {' AND '.join(self._where)}")
136
+ if self._group_by:
137
+ sql.append(f"GROUP BY {', '.join(self._group_by)}")
138
+ if self._order_by:
139
+ sql.append(f"ORDER BY {', '.join(self._order_by)}")
140
+ if self._limit is not None:
141
+ limit_clause = f"LIMIT {self._limit}"
142
+ if self._offset is not None:
143
+ limit_clause += f" OFFSET {self._offset}"
144
+ sql.append(limit_clause)
145
+ return '\n'.join(sql)
146
+
147
+ def build_count(self):
148
+ sql = [f"SELECT COUNT(*)", f"FROM {self.table}"]
149
+ if self._where:
150
+ sql.append(f"WHERE {' AND '.join(self._where)}")
151
+ if self._group_by:
152
+ sql.append(f"GROUP BY {', '.join(self._group_by)}")
153
+ return '\n'.join(sql)
154
+
155
+ def build_update(self):
156
+ sql = [f"UPDATE {self.table}"]
157
+ if self._set:
158
+ sql.append(f"SET {', '.join(self._set)}")
159
+ if self._where:
160
+ sql.append(f"WHERE {' AND '.join(self._where)}")
161
+ return '\n'.join(sql)
162
+
163
+ def __3_build_中级命令(self):
164
+ pass
165
+
166
+ def build_check_data_type(self, column):
167
+ """ 检查column的数据类型 """
168
+ sql = SqlBuilder('information_schema.columns')
169
+ sql.select(f"data_type")
170
+ sql.where(f"table_name='{self.table}' AND column_name='{column}'")
171
+ return sql.build_select()
172
+
173
+ def build_group_count(self, columns, count_column_name='cnt'):
174
+ sql = SqlBuilder(self.table)
175
+ if isinstance(columns, (list, tuple)):
176
+ columns = ', '.join(columns)
177
+ sql.select(columns, f"COUNT(*) {count_column_name}")
178
+ sql.group_by(columns)
179
+ sql.order_by(f'{count_column_name} DESC')
180
+ sql._where = self._where.copy()
181
+ return sql.build_select()
182
+
13
183
 
14
184
  class SqlBase:
15
185
  """ Sql语法通用的功能 """
16
186
 
187
+ def __init__(self, *args, **kwargs):
188
+ self._commit_cache = defaultdict(list)
189
+
17
190
  def __1_库(self):
18
191
  pass
19
192
 
@@ -81,6 +254,12 @@ class SqlBase:
81
254
  cols = ','.join(map(str, cols))
82
255
  self.execute(f'CREATE INDEX {index_name} ON {table_name}({cols})')
83
256
 
257
+ def create_index2(self, table_name, cols):
258
+ """ 创建一个简单的索引,索引名字自动生成 """
259
+ if not isinstance(cols, str):
260
+ cols = ','.join(map(str, cols))
261
+ self.execute(f'CREATE INDEX idx_{table_name}_{cols.replace(",", "_")} ON {table_name}({cols})')
262
+
84
263
  def keep_top_n_rows(self, table_name, num, col_name='id'):
85
264
  """ 只保留一小部分数据,常用来做lite、demo数据示例文件
86
265
 
@@ -109,7 +288,10 @@ class SqlBase:
109
288
 
110
289
  def exec2one(self, *args, **kwargs):
111
290
  """ 获得第1行的值 """
112
- return self.execute(*args, **kwargs).fetchone()[0]
291
+ try:
292
+ return self.execute(*args, **kwargs).fetchone()[0]
293
+ except TypeError:
294
+ return None
113
295
 
114
296
  def exec2row(self, *args, **kwargs):
115
297
  """ 获得第1行的值 """
@@ -149,6 +331,34 @@ class SqlBase:
149
331
  def __5_增删改查(self):
150
332
  pass
151
333
 
334
+ def commit_base(self, commit_type, query, params=None):
335
+ """
336
+ :param commit_type:
337
+ -1,先真正缓存在本地
338
+ False,传统的事务机制,虽然不会更新数据,但每一条依然会连接数据库,其实速度回挺慢的
339
+ True,传统的事务机制,但每条都作为独立事务,直接更新了
340
+ """
341
+ if commit_type == -1:
342
+ self._commit_cache[query].append(params)
343
+ elif commit_type is False:
344
+ self.execute(query, params)
345
+ elif commit_type is True:
346
+ self.execute(query, params)
347
+ self.commit()
348
+
349
+ def commit_all(self):
350
+ if not self._commit_cache:
351
+ self.commit()
352
+ return
353
+
354
+ for query, params in self._commit_cache.items():
355
+ cur = self.cursor()
356
+ cur.executemany(query, params)
357
+ cur.close()
358
+ self.commit()
359
+
360
+ self._commit_cache = defaultdict(list)
361
+
152
362
  def update_row(self, table_name, cols, where, *, commit=False):
153
363
  """ 【改】更新数据
154
364
 
@@ -164,9 +374,22 @@ class SqlBase:
164
374
  kvs = ','.join([f'{k}=%s' for k in cols.keys()])
165
375
  ops = ' AND '.join([f'{k}=%s' for k in where.keys()])
166
376
  vals = list(cols.values()) + list(where.values())
167
- self.execute(f'UPDATE {table_name} SET {kvs} WHERE {ops}', self.cvt_types(vals))
168
- if commit:
169
- self.commit()
377
+ self.commit_base(commit,
378
+ f'UPDATE {table_name} SET {kvs} WHERE {ops}',
379
+ self.cvt_types(vals))
380
+
381
+ def delete_row(self, table_name, where, *, commit=False):
382
+ """ 【删】删除数据
383
+
384
+ :param dict where: 怎么匹配到对应记录
385
+ :param commit: 建议减小commit频率,会极大降低性能
386
+ :return:
387
+ """
388
+ ops = ' AND '.join([f'{k}=%s' for k in where.keys()])
389
+ vals = list(where.values())
390
+ self.commit_base(commit,
391
+ f'DELETE FROM {table_name} WHERE {ops}',
392
+ self.cvt_types(vals))
170
393
 
171
394
  def select_col(self, table_name, col):
172
395
  """ 获得一列数据,常使用的功能,所以做了一个封装
@@ -238,6 +461,10 @@ class SqlBase:
238
461
  print("No 'FROM' keyword found in the data query.")
239
462
  return 0
240
463
 
464
+ def get_column_data_type(self, table_name, col_name):
465
+ """ 获取表格中某一列的数据类型 """
466
+ return self.exec2one(SqlBuilder(table_name).build_check_data_type(col_name))
467
+
241
468
 
242
469
  class Connection(sqlite3.Connection, SqlBase):
243
470
  """
@@ -390,9 +390,22 @@ class JLineViewer(QMainWindow):
390
390
  if parent is None:
391
391
  parent = QStandardItemModel()
392
392
  parent.setHorizontalHeaderLabels(['Key', 'Value'])
393
+
394
+ # if isinstance(data, dict):
395
+ # for key, value in data.items():
396
+ # self.dataToModel(key, value, parent)
397
+
398
+ # 判断数据类型,并相应处理
393
399
  if isinstance(data, dict):
394
400
  for key, value in data.items():
395
401
  self.dataToModel(key, value, parent)
402
+ elif isinstance(data, list):
403
+ # 处理列表:创建一个无key的父项,将列表元素作为子项添加
404
+ self.dataToModel("List", data, parent)
405
+ else:
406
+ # 处理基本数据类型:创建一个单独的条目
407
+ self.dataToModel("Value", data, parent)
408
+
396
409
  return parent
397
410
 
398
411
  def dataToModel(self, key, value, parent):
@@ -476,7 +489,7 @@ def start_jlineviewer(fname=None):
476
489
  if isinstance(fname, list): # 可以输入一个list字典数据,会转存到临时目录里查看
477
490
  tempfile = XlPath.tempfile(suffix='.jsonl')
478
491
  tempfile.write_jsonl(fname)
479
- fname = tempfile
492
+ fname = tempfile.as_posix()
480
493
  if fname:
481
494
  ex.showDialog(fname=fname)
482
495
  sys.exit(app.exec_())
@@ -0,0 +1,277 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2024/04/12
6
+
7
+ import re
8
+ from urllib.parse import unquote
9
+ import tempfile
10
+ import time
11
+ from urllib.parse import urlparse
12
+
13
+ from deprecated import deprecated
14
+ from loguru import logger
15
+ import DrissionPage
16
+ from DrissionPage import ChromiumPage, Chromium
17
+ from DrissionPage._pages.chromium_base import ChromiumBase
18
+ from DrissionPage._pages.chromium_tab import ChromiumTab
19
+ import DrissionPage.errors
20
+
21
+ from pyxllib.prog.pupil import inject_members
22
+ from pyxllib.text.pupil import strfind
23
+ from pyxllib.file.specialist import GetEtag
24
+
25
+
26
+ @deprecated(reason='get_dp_page逻辑不太对,请换用get_dp_tab')
27
+ def get_dp_page(dp_page=None) -> 'XlPage':
28
+ """
29
+
30
+ :param dp_page:
31
+ 默认None, 返回默认的page,一般就是当前页面
32
+ True, 新建一个page
33
+ str, 新建一个对应url的page
34
+ func(tab), 通过规则筛选tab,返回符合条件的第1个tab,否则新建一个tab
35
+ """
36
+
37
+ if isinstance(dp_page, Chromium):
38
+ return dp_page
39
+ elif isinstance(dp_page, ChromiumTab):
40
+ return dp_page.page
41
+ elif callable(dp_page):
42
+ page0 = Chromium()
43
+ for tab in page0.get_tabs():
44
+ if dp_page(tab):
45
+ return tab.page
46
+ return page0.new_tab().page
47
+ elif dp_page is True:
48
+ return Chromium().new_tab().page
49
+ elif isinstance(dp_page, str):
50
+ return Chromium().new_tab(dp_page).page
51
+ else:
52
+ return Chromium()
53
+
54
+
55
+ def get_dp_tab(dp_tab=None) -> 'XlTab':
56
+ """ 智能获取一个标签页tab
57
+
58
+ :param dp_tab:
59
+ 默认None, 返回默认的tab,一般就是当前页面
60
+ True, 新建一个tab
61
+ str, 新建一个对应url的tab,如果发现已存在的tab中已经有该url,则直接复用
62
+ func(tab), 通过规则筛选tab,返回符合条件的第1个tab,否则新建一个tab
63
+ """
64
+
65
+ if isinstance(dp_tab, Chromium):
66
+ return dp_tab.latest_tab
67
+ elif isinstance(dp_tab, ChromiumTab):
68
+ return dp_tab
69
+ elif callable(dp_tab):
70
+ page0 = Chromium()
71
+ for tab in page0.get_tabs():
72
+ if dp_tab(tab):
73
+ return tab
74
+ return page0.new_tab()
75
+ elif dp_tab is True:
76
+ return Chromium().new_tab()
77
+ elif isinstance(dp_tab, str):
78
+ def dp_page2(tab): # 默认开启页面复用
79
+ if tab.url == dp_tab:
80
+ return tab
81
+
82
+ tab = get_dp_tab(dp_page2)
83
+ if tab.url == 'about:blank':
84
+ tab.get(dp_tab)
85
+ return tab
86
+ else:
87
+ return Chromium().latest_tab
88
+
89
+
90
+ def get_latest_not_dev_tab(browser=None):
91
+ """ 开发工具本身也会算一个tab,这个函数返回最新的一个不是开发工具的tab """
92
+ if browser is None:
93
+ browser = Chromium()
94
+ tabs = browser.get_tabs()
95
+ for tab in tabs:
96
+ if strfind(tab.url, ['devtools://', 'chrome-extension://']) != -1:
97
+ continue
98
+ return tab
99
+
100
+
101
+ class XlChromiumBase(ChromiumBase):
102
+ def get2(self, url, show_errmsg=False, retry=None, interval=None):
103
+ """
104
+ 240418周四21:57,DrissionPage-4.0.4.21 官方自带page.get,有时候会有bug,不会实际刷新url,这里加个代码进行fix
105
+ """
106
+ old_url = self.url
107
+ if old_url == url:
108
+ # page.refresh()
109
+ return
110
+
111
+ ele = self.active_ele
112
+ self.get(url, show_errmsg=show_errmsg, retry=retry, interval=interval)
113
+ try: # 如果新页面获取成功,理论上旧的ele会失效
114
+ ele
115
+ self.refresh() # 如果不报错,这里网站要强制更新
116
+ return self
117
+ except DrissionPage.errors.ElementLostError:
118
+ return self
119
+
120
+ def get_download_files(self: Chromium):
121
+ """ 获取下载列表
122
+
123
+ (241205周四21:02,这个功能原本是用来做页面文件下载的,
124
+ 但后来知道dp有更简洁的解决方案后,其实原本功能意义已不大,
125
+ 只是作为一个结构化解析下载页面的功能,可以保留参考)
126
+
127
+ :param search_name: 搜索文件名,输入该参数时,只会从上往下找到第一个匹配的文件
128
+ 否则返回一个list结构,存储下载清单里的文件
129
+ :return:
130
+
131
+ todo 默认应该显示是不全的,有时间可以考虑往下滑动继续检索的功能
132
+ """
133
+ files = []
134
+ page2 = Chromium().new_tab('chrome://downloads/')
135
+ items = page2('tag:downloads-manager', timeout=1).sr('#mainContainer')('#downloadsList').eles(
136
+ 'tag:downloads-item')
137
+ for item in items:
138
+ loc = unquote(item.sr('tag:img').attr('src').replace('+', ' '))
139
+ file = re.search(r'path=(.+?)(&scale=(\d+(\.\d+)?)x)?$', loc).group(1)
140
+
141
+ files.append({
142
+ 'file': file,
143
+ 'url': unquote(item.sr('#url').attr('href'))
144
+ })
145
+
146
+ page2.close()
147
+
148
+ return files
149
+
150
+ def wait_page_not_change(self, interval=3):
151
+ """ 等待直到页面内容不再变化
152
+
153
+ :param interval: 时间间隔,需要判断当前内容和interval秒后的内容,看内容是否欧发生改变
154
+ """
155
+ last_html, last_etag = None, None
156
+ while True:
157
+ html = self.html
158
+ etag = GetEtag.from_text(html)
159
+ if etag == last_etag:
160
+ break
161
+
162
+ last_html, last_etag = html, etag
163
+ time.sleep(interval)
164
+ return last_html
165
+
166
+ def action_type(self, ele, text, clear=True):
167
+ """ 基于action实现的重写入,常用于日期相关操作
168
+ 因为很多日期类组件,直接使用ele.input是不生效的,哪怕看似显示了文本,但其实并没有触发js改动,需要用动作链来实现
169
+ """
170
+ from DrissionPage.common import Keys
171
+ if clear:
172
+ self.actions.click(ele).key_down(Keys.CTRL).type('a').key_up(Keys.CTRL).type(text)
173
+ else:
174
+ self.actions.click(ele).type(text)
175
+
176
+
177
+ inject_members(XlChromiumBase, ChromiumBase)
178
+
179
+
180
+ class XlPage(XlChromiumBase, ChromiumTab, Chromium):
181
+ """ 只作为一个类型标记,无实质功能。在猴子补丁背景下,让ide能正确跳转函数定义。 """
182
+ pass
183
+
184
+
185
+ class XlTab(XlChromiumBase, ChromiumTab, Chromium):
186
+ pass
187
+
188
+
189
+ def wait_page_not_change(page, interval=3):
190
+ page.wait_page_not_change(interval)
191
+
192
+
193
+ class DpWebBase:
194
+ """ 基于dp开发的爬虫工具的一个基础类 """
195
+
196
+ def __init__(self, url=None, *, base_url=None):
197
+ self.browser = Chromium()
198
+ self.browser.set.download_path(tempfile.gettempdir())
199
+
200
+ parsed_url = urlparse(url)
201
+ root_url = f"{parsed_url.scheme}://{parsed_url.netloc}" # 构建基础 URL
202
+ self.tab: XlTab = self.browser.new_tab(url)
203
+ self.base_url = base_url or root_url # 使用基础 URL 作为 base_url,后续同域名网站去重用
204
+
205
+ def close_if_exceeds_min_tabs(self, min_tabs_to_keep=1):
206
+ """ 检查同网站的tab数量,如果超过最小保留数量则关闭当前页面 """
207
+ try: # 靠Py结束触发的可能报错:ImportError: sys.meta_path is None, Python is likely shutting down
208
+ if self.tab and self.base_url and len(self.browser.get_tabs(url=self.base_url)) > min_tabs_to_keep:
209
+ self.tab.close()
210
+ except Exception as e:
211
+ pass
212
+
213
+ # 实测效果不稳定,感觉还不如手动触发吧~
214
+ # def __del__(self):
215
+ # """ 我习惯每次新任务建立新的tab处理,并在结束后自动检查同网页打开的标签是否不唯一则删掉 """
216
+ # self.close_if_exceeds_min_tabs()
217
+
218
+
219
+ def close_duplicate_tabs(browser=None):
220
+ """ 关闭浏览器重复标签页
221
+
222
+ 遍历所有标签页(从前往后,dp的get_tabs拿到的tab就是从新到旧的),对每个域名仅保留第一个出现的标签页,其余同域名标签页关闭;
223
+ 如果最后还剩多个标签页,则把'chrome://newtab/'也关掉。
224
+ """
225
+ # 1 初始化
226
+ if browser is None:
227
+ browser = Chromium()
228
+
229
+ # 250115周三21:12 这步不稳定,会报错,不知道为啥。导致dp最后经常没有清理tabs
230
+ # 250204周二09:21,好像是浏览器重启更新到最新版本就行了~ 这里也要加个try
231
+ try:
232
+ all_tabs = browser.get_tabs()
233
+ except TimeoutError:
234
+ logger.warning(
235
+ 'browser.get_tabs()运行报错,请清查浏览器是否已更新但没有重启。本次将browser.quit()退出整个浏览器。')
236
+ # 你不让我关tabs是吧,那我就把整个浏览器关了
237
+ browser.quit()
238
+ return
239
+
240
+ seen_domains = set()
241
+
242
+ # 2 第一次遍历:保留首个出现的域名,其余重复则关闭
243
+ for t in all_tabs:
244
+ parsed_url = urlparse(t.url)
245
+ domain = parsed_url.netloc # netloc 通常可拿到域名部分
246
+ # logger.info(f'{t.url}, {domain}')
247
+
248
+ if domain in seen_domains:
249
+ t.close()
250
+ else:
251
+ seen_domains.add(domain)
252
+
253
+ # 3 第二次遍历:如果剩余标签页 > 1,则关掉chrome://newtab/
254
+ remaining_tabs = browser.get_tabs()
255
+ if len(remaining_tabs) > 1:
256
+ for t in remaining_tabs:
257
+ if t.url.startswith('chrome://newtab'):
258
+ t.close()
259
+
260
+
261
+ def dp_check_quit():
262
+ """ 检查当前页面是否只剩空标签页,则浏览器可以自动退出 """
263
+ browser = Chromium()
264
+ try:
265
+ tabs = browser.get_tabs()
266
+ except TimeoutError:
267
+ logger.warning('browser.get_tabs()运行报错,浏览器可能已更新但没有重启。将退出浏览器。')
268
+ browser.quit()
269
+ return
270
+
271
+ # 检查是否只剩下空标签页
272
+ if len(tabs) == 1 and tabs[0].url.startswith('chrome://newtab'):
273
+ # 如果只剩下一个空标签页,则退出浏览器
274
+ browser.quit()
275
+ elif len(tabs) == 0:
276
+ # 如果没有标签页,也退出浏览器
277
+ browser.quit()