pyxllib 0.3.96__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -529
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -311
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -461
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -240
  11. pyxllib/algo/stat.py +494 -458
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. {pyxlpr → pyxllib/autogui}/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/{ext/autogui → autogui}/autogui.py +852 -823
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/{ext/autogui → autogui}/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -129
  34. pyxllib/data/jsonlib.py +89 -0
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -643
  37. pyxllib/data/sqlite.py +568 -341
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -492
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -0
  43. pyxllib/ext/kq5034lib.py +12 -1606
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -0
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -826
  50. pyxllib/ext/utools.py +351 -338
  51. pyxllib/ext/webhook.py +124 -101
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -0
  54. pyxllib/ext/wpsapi.py +124 -0
  55. pyxllib/ext/xlwork.py +9 -0
  56. pyxllib/ext/yuquelib.py +1105 -173
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -0
  61. pyxllib/file/movielib.py +148 -139
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -293
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -186
  71. pyxllib/file/specialist/filelib.py +2829 -2618
  72. pyxllib/file/xlsxlib.py +3131 -2976
  73. pyxllib/file/xlsyncfile.py +341 -0
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -0
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -0
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -0
  80. pyxllib/prog/newbie.py +451 -444
  81. pyxllib/prog/pupil.py +1197 -1128
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -217
  84. pyxllib/prog/specialist/bc.py +203 -200
  85. pyxllib/prog/specialist/browser.py +497 -488
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +199 -131
  88. pyxllib/prog/specialist/tictoc.py +240 -241
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -101
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -0
  98. pyxllib/text/charclasslib.py +121 -109
  99. pyxllib/text/jiebalib.py +267 -264
  100. pyxllib/text/jinjalib.py +32 -0
  101. pyxllib/text/jsa_ai_prompt.md +271 -0
  102. pyxllib/text/jscode.py +922 -767
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -288
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1095
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +11 -0
  116. pyxllib/text/templates/highlight_code.html +17 -0
  117. pyxllib/text/templates/latex_editor.html +103 -0
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -685
  120. pyxllib/xl.py +42 -38
  121. pyxllib/xlcv.py +17 -17
  122. pyxllib-0.3.200.dist-info/METADATA +48 -0
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +1 -2
  125. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info/licenses}/LICENSE +190 -190
  126. pyxllib/ext/autogui/__init__.py +0 -8
  127. pyxllib-0.3.96.dist-info/METADATA +0 -51
  128. pyxllib-0.3.96.dist-info/RECORD +0 -333
  129. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  130. pyxlpr/ai/__init__.py +0 -5
  131. pyxlpr/ai/clientlib.py +0 -1281
  132. pyxlpr/ai/specialist.py +0 -286
  133. pyxlpr/ai/torch_app.py +0 -172
  134. pyxlpr/ai/xlpaddle.py +0 -655
  135. pyxlpr/ai/xltorch.py +0 -705
  136. pyxlpr/data/__init__.py +0 -11
  137. pyxlpr/data/coco.py +0 -1325
  138. pyxlpr/data/datacls.py +0 -365
  139. pyxlpr/data/datasets.py +0 -200
  140. pyxlpr/data/gptlib.py +0 -1291
  141. pyxlpr/data/icdar/__init__.py +0 -96
  142. pyxlpr/data/icdar/deteval.py +0 -377
  143. pyxlpr/data/icdar/icdar2013.py +0 -341
  144. pyxlpr/data/icdar/iou.py +0 -340
  145. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  146. pyxlpr/data/imtextline.py +0 -473
  147. pyxlpr/data/labelme.py +0 -866
  148. pyxlpr/data/removeline.py +0 -179
  149. pyxlpr/data/specialist.py +0 -57
  150. pyxlpr/eval/__init__.py +0 -85
  151. pyxlpr/paddleocr.py +0 -776
  152. pyxlpr/ppocr/__init__.py +0 -15
  153. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  154. pyxlpr/ppocr/data/__init__.py +0 -135
  155. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  156. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  157. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  158. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  159. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  160. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  161. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  162. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  163. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  164. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  165. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  166. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  167. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  168. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  169. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  170. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  171. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  172. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  173. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  174. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  175. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  176. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  177. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  178. pyxlpr/ppocr/losses/__init__.py +0 -61
  179. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  180. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  181. pyxlpr/ppocr/losses/center_loss.py +0 -88
  182. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  183. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  184. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  185. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  186. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  187. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  188. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  189. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  190. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  191. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  192. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  193. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  194. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  195. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  196. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  197. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  198. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  199. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  200. pyxlpr/ppocr/metrics/__init__.py +0 -44
  201. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  202. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  203. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  204. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  205. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  206. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  207. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  208. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  209. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  210. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  211. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  212. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  213. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  214. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  215. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  216. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  217. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  218. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  219. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  220. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  221. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  222. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  223. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  224. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  225. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  226. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  227. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  228. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  229. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  230. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  231. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  232. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  233. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  234. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  235. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  236. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  237. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  238. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  239. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  240. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  241. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  242. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  243. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  244. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  245. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  246. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  247. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  248. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  249. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  250. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  251. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  252. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  253. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  254. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  255. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  256. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  257. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  258. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  259. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  260. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  261. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  262. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  263. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  264. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  265. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  266. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  267. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  268. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  269. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  270. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  271. pyxlpr/ppocr/tools/__init__.py +0 -14
  272. pyxlpr/ppocr/tools/eval.py +0 -83
  273. pyxlpr/ppocr/tools/export_center.py +0 -77
  274. pyxlpr/ppocr/tools/export_model.py +0 -129
  275. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  276. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  277. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  278. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  279. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  280. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  281. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  282. pyxlpr/ppocr/tools/infer_det.py +0 -134
  283. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  284. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  285. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  286. pyxlpr/ppocr/tools/infer_table.py +0 -107
  287. pyxlpr/ppocr/tools/program.py +0 -596
  288. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  289. pyxlpr/ppocr/tools/train.py +0 -163
  290. pyxlpr/ppocr/tools/xlprog.py +0 -748
  291. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  292. pyxlpr/ppocr/utils/__init__.py +0 -24
  293. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  294. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  295. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  296. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  297. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  298. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  299. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  300. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  301. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  302. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  303. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  304. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  305. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  306. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  307. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  308. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  309. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  310. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  311. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  312. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  313. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  314. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  315. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  316. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  317. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  318. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  319. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  320. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  321. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  322. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  323. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  324. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  325. pyxlpr/ppocr/utils/dict90.txt +0 -90
  326. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  327. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  328. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  329. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  330. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  331. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  332. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  333. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  334. pyxlpr/ppocr/utils/gen_label.py +0 -81
  335. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  336. pyxlpr/ppocr/utils/iou.py +0 -54
  337. pyxlpr/ppocr/utils/logging.py +0 -69
  338. pyxlpr/ppocr/utils/network.py +0 -84
  339. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  340. pyxlpr/ppocr/utils/profiler.py +0 -110
  341. pyxlpr/ppocr/utils/save_load.py +0 -150
  342. pyxlpr/ppocr/utils/stats.py +0 -72
  343. pyxlpr/ppocr/utils/utility.py +0 -80
  344. pyxlpr/ppstructure/__init__.py +0 -13
  345. pyxlpr/ppstructure/predict_system.py +0 -187
  346. pyxlpr/ppstructure/table/__init__.py +0 -13
  347. pyxlpr/ppstructure/table/eval_table.py +0 -72
  348. pyxlpr/ppstructure/table/matcher.py +0 -192
  349. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  350. pyxlpr/ppstructure/table/predict_table.py +0 -221
  351. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  352. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  353. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  354. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  355. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  356. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  357. pyxlpr/ppstructure/utility.py +0 -71
  358. pyxlpr/xlai.py +0 -10
pyxllib/data/sqlite.py CHANGED
@@ -1,341 +1,568 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2022/04/12 08:59
6
-
7
- import json
8
- import re
9
- import sqlite3
10
-
11
- import pandas as pd
12
-
13
-
14
- class SqlBase:
15
- """ Sql语法通用的功能 """
16
-
17
- def __1_库(self):
18
- pass
19
-
20
- def __2_表格(self):
21
- pass
22
-
23
- def get_table_names(self):
24
- """ 获得所有表格名 """
25
- raise NotImplementedError
26
-
27
- def create_table(self, table_name, column_descs):
28
- """ 【DDL增】
29
-
30
- :param table_name:
31
- :param column_descs:
32
- str, 正常的列格式描述,例如 'c1 text, c2 blob'
33
- dict, k是列名,v是一个具体的值,供分析参考格式类型
34
- :return:
35
- """
36
- # 1 列数据格式智能分析
37
- if not isinstance(column_descs, str):
38
- descs = []
39
- for k, v in column_descs.items():
40
- t = self.autotype(v)
41
- descs.append(f'{k} {t}')
42
- column_descs = ','.join(descs)
43
-
44
- # 2 新建表格
45
- self.execute(f'CREATE TABLE {table_name}({column_descs})')
46
-
47
- def get_column_names(self, table_name):
48
- """ 获得一个表格中所有字段名 """
49
- raise NotImplementedError
50
-
51
- def ensure_column(self, table_name, col_name, col_type='', *, col_ref_val=None, **kwargs):
52
- """ 【DDL改】添加字段
53
-
54
- :param table_name:
55
- :param col_name:
56
- :param col_type:
57
- :param col_ref_val: 可以通过具体的数值,智能判断列数据格式
58
- :return:
59
- """
60
- if col_name in self.get_column_names(table_name):
61
- return
62
-
63
- if col_ref_val:
64
- col_type = self.autotype(col_ref_val)
65
- if not col_type:
66
- col_type = 'text'
67
-
68
- self.execute(f'ALTER TABLE {table_name} ADD COLUMN {col_name} {col_type}')
69
-
70
- def delete_column(self, table_name, col_name):
71
- """ 删除表格中的一个列
72
-
73
- 其实直接删除也不麻烦:ALTER TABLE {table_name} DROP COLUMN {col_name}
74
- 这个封装主要是鲁棒性考虑,只有列存在时才会删除
75
- """
76
- if col_name in self.get_column_names(table_name):
77
- self.execute(f'ALTER TABLE {table_name} DROP COLUMN {col_name}')
78
-
79
- def create_index(self, index_name, table_name, cols):
80
- if not isinstance(cols, str):
81
- cols = ','.join(map(str, cols))
82
- self.execute(f'CREATE INDEX {index_name} ON {table_name}({cols})')
83
-
84
- def keep_top_n_rows(self, table_name, num, col_name='id'):
85
- """ 只保留一小部分数据,常用来做lite、demo数据示例文件
86
-
87
- :param col_name: 参照的列名
88
- """
89
- self.execute(f'DELETE FROM {table_name} WHERE {col_name} NOT IN'
90
- f'(SELECT {col_name} FROM {table_name} LIMIT {num})')
91
- self.commit()
92
-
93
- def clear_table(self, table_name):
94
- """ 【DDL删】清空表格内容 """
95
- self.execute(f'DELETE FROM {table_name}')
96
-
97
- def count_all_talbe_rows(self):
98
- """ 统计所有表格的数据行数 """
99
- names = self.get_table_names()
100
- ls = []
101
- for name in names:
102
- n = self.execute(f'SELECT count(*) FROM {name}').fetchone()[0]
103
- ls.append([name, n])
104
- ls.sort(key=lambda x: -x[1])
105
- return ls
106
-
107
- def __3_execute(self):
108
- pass
109
-
110
- def exec2one(self, *args, **kwargs):
111
- """ 获得第1行的值 """
112
- return self.execute(*args, **kwargs).fetchone()[0]
113
-
114
- def exec2row(self, *args, **kwargs):
115
- """ 获得第1行的值 """
116
- return self.execute(*args, **kwargs).fetchone()
117
-
118
- def exec2col(self, *args, **kwargs):
119
- """ 获得第1列的值 """
120
- return [row[0] for row in self.execute(*args, **kwargs).fetchall()]
121
-
122
- # 兼容旧接口
123
- exec_col = exec2col
124
-
125
- def exec2df(self, *args, **kwargs):
126
- """ 获得pandas.DataFrame类型的返回值 """
127
- return pd.read_sql(*args, self, **kwargs)
128
-
129
- def __4_数据类型(self):
130
- pass
131
-
132
- @classmethod
133
- def cvt_type(cls, val):
134
- """ py一些内存对象,需要进行适当的格式转换,才能存储到sql中
135
- """
136
- raise NotImplementedError
137
-
138
- @classmethod
139
- def cvt_types(cls, vals):
140
- """ 批量转换类型
141
- """
142
- return [cls.cvt_type(v) for v in vals]
143
-
144
- @classmethod
145
- def autotype(cls, val):
146
- """ 自动判断一个py内存对象应该以什么类型进行存储 """
147
- raise NotImplementedError
148
-
149
- def __5_增删改查(self):
150
- pass
151
-
152
- def update_row(self, table_name, cols, where, *, commit=False):
153
- """ 【改】更新数据
154
-
155
- 虽然名称是update_row,但where条件满足时,是有可能批量替换多行的
156
-
157
- :param dict cols: 要更新的字段及值
158
- :param dict where: 怎么匹配到对应记录
159
- :param commit: 建议减小commit频率,会极大降低性能
160
- :return:
161
-
162
- >> xldb.update('xlapi', {'input': d}, {'id': x['id']})
163
- """
164
- kvs = ','.join([f'{k}=%s' for k in cols.keys()])
165
- ops = ' AND '.join([f'{k}=%s' for k in where.keys()])
166
- vals = list(cols.values()) + list(where.values())
167
- self.execute(f'UPDATE {table_name} SET {kvs} WHERE {ops}', self.cvt_types(vals))
168
- if commit:
169
- self.commit()
170
-
171
- def select_col(self, table_name, col):
172
- """ 获得一列数据,常使用的功能,所以做了一个封装
173
-
174
- 注意,"exec"前缀的方法一般返回的是迭代器,而"select"前缀获得一般是直接的全部列表、结果
175
- """
176
- return self.exec2col(f'SELECT {col} FROM {table_name}')
177
-
178
- def group_count(self, table_name, cols, count_column_name='cnt'):
179
- """ 【查】分组统计各组值组合出现次数
180
- 分析{table}表中,{cols}出现的种类和次数,按照出现次数从多到少排序
181
-
182
- :param str|list cols: 输入逗号','隔开的字符串,比如
183
- con.group_count('gpu_trace', 'host_name,total_memory')
184
- 后记:写list也行,会自动join为字符串
185
- """
186
- if not isinstance(cols, str):
187
- cols = ','.join(map(str, cols))
188
- sql = f'SELECT {cols}, COUNT(*) {count_column_name} FROM {table_name} ' \
189
- f'GROUP BY {cols} ORDER BY {count_column_name} DESC'
190
- records = self.execute(sql).fetchall()
191
- df = pd.DataFrame.from_records(records, columns=cols.split(',') + [count_column_name])
192
- return df
193
-
194
- def get_count_by_altering_query(self, data_query: str) -> int:
195
- """
196
- 从给定的SQL SELECT查询中获取行数计数。这个方法通过修改原始的SELECT查询,
197
- 将其转换为一个COUNT查询来实现计数。这种方法特别适用于在获取大量数据之前,
198
- 需要预估数据量的场景。
199
-
200
- 问题背景:
201
- 在进行大规模数据处理前,了解数据的规模可以帮助进行更有效的资源分配和性能优化。
202
- 传统的做法是分两步执行:首先计算数据总量,然后再执行实际的数据提取。
203
- 这个函数旨在通过单个查询来简化这一流程,减少数据库的负载和响应时间。
204
-
205
- 实现机制:
206
- 函数首先使用正则表达式识别出SQL查询的FROM关键词,这是因为无论SELECT查询的复杂程度如何,
207
- 计数的核心都是保留FROM及其后面的表和条件语句。然后,它构造一个新的COUNT查询,
208
- 替换原始查询中的SELECT部分。最后,函数执行这个新的查询并返回结果。
209
-
210
- :param data_query (str): 原始的SQL SELECT查询字符串。
211
- :return int: 查询结果的行数。
212
-
213
- 示例:
214
- >> sql = SqlBase()
215
- >> query = "SELECT id, name FROM users WHERE active = True"
216
- >> count = sql.get_count_by_altering_query(query)
217
- >> print(count)
218
- 45
219
-
220
- 注意:
221
- - 这个函数假设输入的是合法的SQL SELECT查询。
222
- - 函数依赖于数据库连接的execute方法能够正确执行转换后的COUNT查询。
223
- - 在一些复杂的SQL查询中,特别是包含子查询、特殊函数或复杂的JOIN操作时,
224
- 请确保转换后的计数查询仍然有效。
225
- """
226
- # 使用正则表达式定位'FROM'(考虑各种大小写情况),并确保它前后是空格或语句的开始/结束
227
- match = re.search(r'\bFROM\b', data_query, flags=re.IGNORECASE)
228
- if match:
229
- from_index = match.start()
230
- count_query = 'SELECT COUNT(*) ' + data_query[from_index:] # 构造计数查询
231
- try:
232
- result = self.execute(count_query).fetchone() # 执行查询
233
- return result[0] if result else 0 # 返回计数结果
234
- except Exception as e:
235
- print(f"Error executing count query: {e}")
236
- return 0
237
- else:
238
- print("No 'FROM' keyword found in the data query.")
239
- return 0
240
-
241
-
242
- class Connection(sqlite3.Connection, SqlBase):
243
- """
244
- DDL - 数据定义语言
245
- CREATE 创建一个新的表,一个表的视图,或者数据库中的其他对象。
246
- ALTER 修改数据库中的某个已有的数据库对象,比如一个表。
247
- DROP 删除整个表,或者表的视图,或者数据库中的其他对象。
248
- DML - 数据操作语言
249
- INSERT 创建一条记录。
250
- UPDATE 修改记录。
251
- DELETE 删除记录。
252
- DQL - 数据查询语言
253
- SELECT 从一个或多个表中检索某些记录。
254
- """
255
-
256
- def __1_库(self):
257
- pass
258
-
259
- def vacuum(self):
260
- """ 删除数据后,文件不会直接减小,需要使用vacuum来实际压缩文件占用空间 """
261
- self.execute('vacuum') # 不用 commit
262
-
263
- def __2_表格(self):
264
- pass
265
-
266
- def get_table_names(self):
267
- """ 获得所有表格名 """
268
- return [x[0] for x in self.execute("SELECT name FROM sqlite_master WHERE type='table'")]
269
-
270
- def has_table(self, table_name):
271
- res = self.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'").fetchone()
272
- return bool(res)
273
-
274
- def get_column_names(self, table_name):
275
- """ 【查】表格有哪些字段
276
- """
277
- names = [item[1] for item in self.execute(f'PRAGMA table_info({table_name})')]
278
- return names
279
-
280
- def __3_execute(self):
281
- pass
282
-
283
- def exec2nametuple(self, *args, **kwargs):
284
- cur = self.cursor()
285
- cur.row_factory = sqlite3.Row
286
- return cur.execute(*args, **kwargs)
287
-
288
- def exec2dict(self, *args, **kwargs):
289
- """ execute基础上,改成返回值为dict类型 """
290
-
291
- def dict_factory(cursor, row):
292
- d = {}
293
- for idx, col in enumerate(cursor.description):
294
- d[col[0]] = row[idx]
295
- return d
296
-
297
- cur = self.cursor() # todo 不关是不是不好?如果出错了是不是会事务未结束导致无法修改表格结构?是否有auto close的机制?
298
- cur.row_factory = dict_factory
299
- return cur.execute(*args, **kwargs)
300
-
301
- # 兼容老版本
302
- exec_dict = exec2dict
303
-
304
- def __4_数据类型(self):
305
- pass
306
-
307
- @classmethod
308
- def cvt_type(cls, val):
309
- if isinstance(val, (dict, list, tuple)):
310
- val = json.dumps(val, ensure_ascii=False)
311
- return val
312
-
313
- @classmethod
314
- def autotype(cls, val):
315
- if isinstance(val, str):
316
- return 'text'
317
- elif isinstance(val, (int, bool)):
318
- return 'integer'
319
- elif isinstance(val, float):
320
- return 'real'
321
- else: # 其他dict、list等类型,可以用json.dumps或str转文本存储
322
- return 'text'
323
-
324
- def __5_增删改查(self):
325
- pass
326
-
327
- def insert_row(self, table_name, cols, if_exists='IGNORE'):
328
- """ 【增】插入新数据
329
-
330
- :param table_name:
331
- :param cols: 一般是用字典表示的要插入的值
332
- :param if_exists: 如果已存在的处理策略
333
- IGNORE,跳过
334
- REPLACE,替换
335
- :return:
336
-
337
- 注意加了 OR IGNORE,支持重复数据自动忽略插入
338
- """
339
- ks = ','.join(cols.keys())
340
- vs = ','.join('?' * (len(cols.keys())))
341
- self.execute(f'INSERT OR {if_exists} INTO {table_name}({ks}) VALUES ({vs})', self.cvt_types(cols.values()))
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2022/04/12 08:59
6
+
7
+ import copy
8
+ import json
9
+ import re
10
+ import sqlite3
11
+ import warnings
12
+ from collections import defaultdict
13
+
14
+ import pandas as pd
15
+
16
+ # 旧版的pandas警告
17
+ warnings.filterwarnings('ignore', message="pandas only support SQLAlchemy connectable")
18
+ # 新版的pandas警告多了个's‘
19
+ warnings.filterwarnings('ignore', message="pandas only supports SQLAlchemy connectable")
20
+
21
+
22
+ class SqlBuilder:
23
+ def __init__(self, table=''):
24
+ self.table = table
25
+ self._select = []
26
+ self._set = []
27
+ self._where = []
28
+ self._order_by = []
29
+ self._group_by = []
30
+ self._limit = None # 限制最多读多少条数据
31
+ self._offset = None # 从第几条数据开始读
32
+
33
+ def copy(self):
34
+ # 拷贝一个当前状态的副本sql
35
+ return copy.deepcopy(self)
36
+
37
+ def __1_组件(self):
38
+ pass
39
+
40
+ def from_table(self, table):
41
+ self.table = table
42
+ return self
43
+
44
+ def select(self, *columns):
45
+ self._select.extend(columns)
46
+ return self
47
+
48
+ def set(self, *columns):
49
+ self._set.extend(columns)
50
+ return self
51
+
52
+ def where(self, condition):
53
+ if isinstance(condition, (list, tuple)):
54
+ self._where.extend(condition)
55
+ elif isinstance(condition, str):
56
+ self._where.append(condition)
57
+ else:
58
+ raise ValueError(f'不支持的where条件类型{type(condition)}')
59
+
60
+ return self
61
+
62
+ def where_dict_match(self, items):
63
+ """ 输入一个字典,要求每个key的字段对应上具体的某个取值value """
64
+ for k, v in items.items():
65
+ if isinstance(v, str):
66
+ self._where.append(f"{k} = '{v}'")
67
+ elif isinstance(v, (int, float)):
68
+ self._where.append(f"{k} = {v}")
69
+ else:
70
+ raise TypeError
71
+
72
+ def where_in(self, column, values):
73
+ if values is None:
74
+ return self
75
+
76
+ if isinstance(values, str):
77
+ values = [values]
78
+ values_str = ', '.join(f"'{str(value)}'" for value in values)
79
+ if len(values) == 1:
80
+ self._where.append(f"{column} = {values_str[0]}")
81
+ else:
82
+ self._where.append(f"{column} IN ({values_str})")
83
+ return self
84
+
85
+ def where_or(self, *conditions):
86
+ """ 输入的这一批条件,作为OR组合后成为一个整体条件
87
+ """
88
+ self._where.append(f"({' OR '.join(conditions)})")
89
+ return self
90
+
91
+ def where_mod(self, column, divisor, remainder):
92
+ """ 输入的column列的值对divisor取余,筛选出余数为remainder的记录
93
+ """
94
+ condition = f"({column} % {divisor} = {remainder})"
95
+ self._where.append(condition)
96
+ return self
97
+
98
+ def where_mod2(self, desc):
99
+ """ 使用一种特殊的格式化标记来设置规则
100
+
101
+ :param desc: 例如 'id%2=1'
102
+
103
+ todo 我好像傻了,画蛇添足,这个功能好像直接用 .where 就能完成~
104
+ """
105
+ if not desc:
106
+ return
107
+ column, divisor_remainder = desc.split('%')
108
+ divisor, remainder = map(int, divisor_remainder.split('='))
109
+ return self.where_mod(column, divisor, remainder)
110
+
111
+ def group_by(self, *columns):
112
+ self._group_by.extend(columns)
113
+ return self
114
+
115
+ def order_by(self, *columns):
116
+ self._order_by.extend(columns)
117
+ return self
118
+
119
+ def limit(self, limit, offset=None):
120
+ self._limit = limit
121
+ self._offset = offset
122
+ return self
123
+
124
+ def __2_build_初级命令(self):
125
+ pass
126
+
127
+ def build_select(self, *columns):
128
+ if columns:
129
+ columns = self._select + list(columns)
130
+ else:
131
+ columns = self._select
132
+
133
+ sql = [f"SELECT {', '.join(columns) or '*'}", f"FROM {self.table}"]
134
+ if self._where:
135
+ sql.append(f"WHERE {' AND '.join(self._where)}")
136
+ if self._group_by:
137
+ sql.append(f"GROUP BY {', '.join(self._group_by)}")
138
+ if self._order_by:
139
+ sql.append(f"ORDER BY {', '.join(self._order_by)}")
140
+ if self._limit is not None:
141
+ limit_clause = f"LIMIT {self._limit}"
142
+ if self._offset is not None:
143
+ limit_clause += f" OFFSET {self._offset}"
144
+ sql.append(limit_clause)
145
+ return '\n'.join(sql)
146
+
147
+ def build_count(self):
148
+ sql = [f"SELECT COUNT(*)", f"FROM {self.table}"]
149
+ if self._where:
150
+ sql.append(f"WHERE {' AND '.join(self._where)}")
151
+ if self._group_by:
152
+ sql.append(f"GROUP BY {', '.join(self._group_by)}")
153
+ return '\n'.join(sql)
154
+
155
+ def build_update(self):
156
+ sql = [f"UPDATE {self.table}"]
157
+ if self._set:
158
+ sql.append(f"SET {', '.join(self._set)}")
159
+ if self._where:
160
+ sql.append(f"WHERE {' AND '.join(self._where)}")
161
+ return '\n'.join(sql)
162
+
163
+ def __3_build_中级命令(self):
164
+ pass
165
+
166
+ def build_check_data_type(self, column):
167
+ """ 检查column的数据类型 """
168
+ sql = SqlBuilder('information_schema.columns')
169
+ sql.select(f"data_type")
170
+ sql.where(f"table_name='{self.table}' AND column_name='{column}'")
171
+ return sql.build_select()
172
+
173
+ def build_group_count(self, columns, count_column_name='cnt'):
174
+ sql = SqlBuilder(self.table)
175
+ if isinstance(columns, (list, tuple)):
176
+ columns = ', '.join(columns)
177
+ sql.select(columns, f"COUNT(*) {count_column_name}")
178
+ sql.group_by(columns)
179
+ sql.order_by(f'{count_column_name} DESC')
180
+ sql._where = self._where.copy()
181
+ return sql.build_select()
182
+
183
+
184
+ class SqlBase:
185
+ """ Sql语法通用的功能 """
186
+
187
+ def __init__(self, *args, **kwargs):
188
+ self._commit_cache = defaultdict(list)
189
+
190
+ def __1_库(self):
191
+ pass
192
+
193
+ def __2_表格(self):
194
+ pass
195
+
196
+ def get_table_names(self):
197
+ """ 获得所有表格名 """
198
+ raise NotImplementedError
199
+
200
+ def create_table(self, table_name, column_descs):
201
+ """ 【DDL增】
202
+
203
+ :param table_name:
204
+ :param column_descs:
205
+ str, 正常的列格式描述,例如 'c1 text, c2 blob'
206
+ dict, k是列名,v是一个具体的值,供分析参考格式类型
207
+ :return:
208
+ """
209
+ # 1 列数据格式智能分析
210
+ if not isinstance(column_descs, str):
211
+ descs = []
212
+ for k, v in column_descs.items():
213
+ t = self.autotype(v)
214
+ descs.append(f'{k} {t}')
215
+ column_descs = ','.join(descs)
216
+
217
+ # 2 新建表格
218
+ self.execute(f'CREATE TABLE {table_name}({column_descs})')
219
+
220
+ def get_column_names(self, table_name):
221
+ """ 获得一个表格中所有字段名 """
222
+ raise NotImplementedError
223
+
224
+ def ensure_column(self, table_name, col_name, col_type='', *, col_ref_val=None, **kwargs):
225
+ """ 【DDL改】添加字段
226
+
227
+ :param table_name:
228
+ :param col_name:
229
+ :param col_type:
230
+ :param col_ref_val: 可以通过具体的数值,智能判断列数据格式
231
+ :return:
232
+ """
233
+ if col_name in self.get_column_names(table_name):
234
+ return
235
+
236
+ if col_ref_val:
237
+ col_type = self.autotype(col_ref_val)
238
+ if not col_type:
239
+ col_type = 'text'
240
+
241
+ self.execute(f'ALTER TABLE {table_name} ADD COLUMN {col_name} {col_type}')
242
+
243
+ def delete_column(self, table_name, col_name):
244
+ """ 删除表格中的一个列
245
+
246
+ 其实直接删除也不麻烦:ALTER TABLE {table_name} DROP COLUMN {col_name}
247
+ 这个封装主要是鲁棒性考虑,只有列存在时才会删除
248
+ """
249
+ if col_name in self.get_column_names(table_name):
250
+ self.execute(f'ALTER TABLE {table_name} DROP COLUMN {col_name}')
251
+
252
+ def create_index(self, index_name, table_name, cols):
253
+ if not isinstance(cols, str):
254
+ cols = ','.join(map(str, cols))
255
+ self.execute(f'CREATE INDEX {index_name} ON {table_name}({cols})')
256
+
257
+ def create_index2(self, table_name, cols):
258
+ """ 创建一个简单的索引,索引名字自动生成 """
259
+ if not isinstance(cols, str):
260
+ cols = ','.join(map(str, cols))
261
+ self.execute(f'CREATE INDEX idx_{table_name}_{cols.replace(",", "_")} ON {table_name}({cols})')
262
+
263
+ def keep_top_n_rows(self, table_name, num, col_name='id'):
264
+ """ 只保留一小部分数据,常用来做lite、demo数据示例文件
265
+
266
+ :param col_name: 参照的列名
267
+ """
268
+ self.execute(f'DELETE FROM {table_name} WHERE {col_name} NOT IN'
269
+ f'(SELECT {col_name} FROM {table_name} LIMIT {num})')
270
+ self.commit()
271
+
272
+ def clear_table(self, table_name):
273
+ """ 【DDL删】清空表格内容 """
274
+ self.execute(f'DELETE FROM {table_name}')
275
+
276
+ def count_all_talbe_rows(self):
277
+ """ 统计所有表格的数据行数 """
278
+ names = self.get_table_names()
279
+ ls = []
280
+ for name in names:
281
+ n = self.execute(f'SELECT count(*) FROM {name}').fetchone()[0]
282
+ ls.append([name, n])
283
+ ls.sort(key=lambda x: -x[1])
284
+ return ls
285
+
286
+ def __3_execute(self):
287
+ pass
288
+
289
+ def exec2one(self, *args, **kwargs):
290
+ """ 获得第1行的值 """
291
+ try:
292
+ return self.execute(*args, **kwargs).fetchone()[0]
293
+ except TypeError:
294
+ return None
295
+
296
+ def exec2row(self, *args, **kwargs):
297
+ """ 获得第1行的值 """
298
+ return self.execute(*args, **kwargs).fetchone()
299
+
300
+ def exec2col(self, *args, **kwargs):
301
+ """ 获得第1列的值 """
302
+ return [row[0] for row in self.execute(*args, **kwargs).fetchall()]
303
+
304
+ # 兼容旧接口
305
+ exec_col = exec2col
306
+
307
+ def exec2df(self, *args, **kwargs):
308
+ """ 获得pandas.DataFrame类型的返回值 """
309
+ return pd.read_sql(*args, self, **kwargs)
310
+
311
+ def __4_数据类型(self):
312
+ pass
313
+
314
+ @classmethod
315
+ def cvt_type(cls, val):
316
+ """ py一些内存对象,需要进行适当的格式转换,才能存储到sql中
317
+ """
318
+ raise NotImplementedError
319
+
320
+ @classmethod
321
+ def cvt_types(cls, vals):
322
+ """ 批量转换类型
323
+ """
324
+ return [cls.cvt_type(v) for v in vals]
325
+
326
+ @classmethod
327
+ def autotype(cls, val):
328
+ """ 自动判断一个py内存对象应该以什么类型进行存储 """
329
+ raise NotImplementedError
330
+
331
+ def __5_增删改查(self):
332
+ pass
333
+
334
+ def commit_base(self, commit_type, query, params=None):
335
+ """
336
+ :param commit_type:
337
+ -1,先真正缓存在本地
338
+ False,传统的事务机制,虽然不会更新数据,但每一条依然会连接数据库,其实速度回挺慢的
339
+ True,传统的事务机制,但每条都作为独立事务,直接更新了
340
+ """
341
+ if commit_type == -1:
342
+ self._commit_cache[query].append(params)
343
+ elif commit_type is False:
344
+ self.execute(query, params)
345
+ elif commit_type is True:
346
+ self.execute(query, params)
347
+ self.commit()
348
+
349
+ def commit_all(self):
350
+ if not self._commit_cache:
351
+ self.commit()
352
+ return
353
+
354
+ for query, params in self._commit_cache.items():
355
+ cur = self.cursor()
356
+ cur.executemany(query, params)
357
+ cur.close()
358
+ self.commit()
359
+
360
+ self._commit_cache = defaultdict(list)
361
+
362
+ def update_row(self, table_name, cols, where, *, commit=False):
363
+ """ 【改】更新数据
364
+
365
+ 虽然名称是update_row,但where条件满足时,是有可能批量替换多行的
366
+
367
+ :param dict cols: 要更新的字段及值
368
+ :param dict where: 怎么匹配到对应记录
369
+ :param commit: 建议减小commit频率,会极大降低性能
370
+ :return:
371
+
372
+ >> xldb.update('xlapi', {'input': d}, {'id': x['id']})
373
+ """
374
+ kvs = ','.join([f'{k}=%s' for k in cols.keys()])
375
+ ops = ' AND '.join([f'{k}=%s' for k in where.keys()])
376
+ vals = list(cols.values()) + list(where.values())
377
+ self.commit_base(commit,
378
+ f'UPDATE {table_name} SET {kvs} WHERE {ops}',
379
+ self.cvt_types(vals))
380
+
381
+ def delete_row(self, table_name, where, *, commit=False):
382
+ """ 【删】删除数据
383
+
384
+ :param dict where: 怎么匹配到对应记录
385
+ :param commit: 建议减小commit频率,会极大降低性能
386
+ :return:
387
+ """
388
+ ops = ' AND '.join([f'{k}=%s' for k in where.keys()])
389
+ vals = list(where.values())
390
+ self.commit_base(commit,
391
+ f'DELETE FROM {table_name} WHERE {ops}',
392
+ self.cvt_types(vals))
393
+
394
+ def select_col(self, table_name, col):
395
+ """ 获得一列数据,常使用的功能,所以做了一个封装
396
+
397
+ 注意,"exec"前缀的方法一般返回的是迭代器,而"select"前缀获得一般是直接的全部列表、结果
398
+ """
399
+ return self.exec2col(f'SELECT {col} FROM {table_name}')
400
+
401
+ def group_count(self, table_name, cols, count_column_name='cnt'):
402
+ """ 【查】分组统计各组值组合出现次数
403
+ 分析{table}表中,{cols}出现的种类和次数,按照出现次数从多到少排序
404
+
405
+ :param str|list cols: 输入逗号','隔开的字符串,比如
406
+ con.group_count('gpu_trace', 'host_name,total_memory')
407
+ 后记:写list也行,会自动join为字符串
408
+ """
409
+ if not isinstance(cols, str):
410
+ cols = ','.join(map(str, cols))
411
+ sql = f'SELECT {cols}, COUNT(*) {count_column_name} FROM {table_name} ' \
412
+ f'GROUP BY {cols} ORDER BY {count_column_name} DESC'
413
+ records = self.execute(sql).fetchall()
414
+ df = pd.DataFrame.from_records(records, columns=cols.split(',') + [count_column_name])
415
+ return df
416
+
417
+ def get_count_by_altering_query(self, data_query: str) -> int:
418
+ """
419
+ 从给定的SQL SELECT查询中获取行数计数。这个方法通过修改原始的SELECT查询,
420
+ 将其转换为一个COUNT查询来实现计数。这种方法特别适用于在获取大量数据之前,
421
+ 需要预估数据量的场景。
422
+
423
+ 问题背景:
424
+ 在进行大规模数据处理前,了解数据的规模可以帮助进行更有效的资源分配和性能优化。
425
+ 传统的做法是分两步执行:首先计算数据总量,然后再执行实际的数据提取。
426
+ 这个函数旨在通过单个查询来简化这一流程,减少数据库的负载和响应时间。
427
+
428
+ 实现机制:
429
+ 函数首先使用正则表达式识别出SQL查询的FROM关键词,这是因为无论SELECT查询的复杂程度如何,
430
+ 计数的核心都是保留FROM及其后面的表和条件语句。然后,它构造一个新的COUNT查询,
431
+ 替换原始查询中的SELECT部分。最后,函数执行这个新的查询并返回结果。
432
+
433
+ :param data_query (str): 原始的SQL SELECT查询字符串。
434
+ :return int: 查询结果的行数。
435
+
436
+ 示例:
437
+ >> sql = SqlBase()
438
+ >> query = "SELECT id, name FROM users WHERE active = True"
439
+ >> count = sql.get_count_by_altering_query(query)
440
+ >> print(count)
441
+ 45
442
+
443
+ 注意:
444
+ - 这个函数假设输入的是合法的SQL SELECT查询。
445
+ - 函数依赖于数据库连接的execute方法能够正确执行转换后的COUNT查询。
446
+ - 在一些复杂的SQL查询中,特别是包含子查询、特殊函数或复杂的JOIN操作时,
447
+ 请确保转换后的计数查询仍然有效。
448
+ """
449
+ # 使用正则表达式定位'FROM'(考虑各种大小写情况),并确保它前后是空格或语句的开始/结束
450
+ match = re.search(r'\bFROM\b', data_query, flags=re.IGNORECASE)
451
+ if match:
452
+ from_index = match.start()
453
+ count_query = 'SELECT COUNT(*) ' + data_query[from_index:] # 构造计数查询
454
+ try:
455
+ result = self.execute(count_query).fetchone() # 执行查询
456
+ return result[0] if result else 0 # 返回计数结果
457
+ except Exception as e:
458
+ print(f"Error executing count query: {e}")
459
+ return 0
460
+ else:
461
+ print("No 'FROM' keyword found in the data query.")
462
+ return 0
463
+
464
+ def get_column_data_type(self, table_name, col_name):
465
+ """ 获取表格中某一列的数据类型 """
466
+ return self.exec2one(SqlBuilder(table_name).build_check_data_type(col_name))
467
+
468
+
469
+ class Connection(sqlite3.Connection, SqlBase):
470
+ """
471
+ DDL - 数据定义语言
472
+ CREATE 创建一个新的表,一个表的视图,或者数据库中的其他对象。
473
+ ALTER 修改数据库中的某个已有的数据库对象,比如一个表。
474
+ DROP 删除整个表,或者表的视图,或者数据库中的其他对象。
475
+ DML - 数据操作语言
476
+ INSERT 创建一条记录。
477
+ UPDATE 修改记录。
478
+ DELETE 删除记录。
479
+ DQL - 数据查询语言
480
+ SELECT 从一个或多个表中检索某些记录。
481
+ """
482
+
483
+ def __1_库(self):
484
+ pass
485
+
486
+ def vacuum(self):
487
+ """ 删除数据后,文件不会直接减小,需要使用vacuum来实际压缩文件占用空间 """
488
+ self.execute('vacuum') # 不用 commit
489
+
490
+ def __2_表格(self):
491
+ pass
492
+
493
+ def get_table_names(self):
494
+ """ 获得所有表格名 """
495
+ return [x[0] for x in self.execute("SELECT name FROM sqlite_master WHERE type='table'")]
496
+
497
+ def has_table(self, table_name):
498
+ res = self.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'").fetchone()
499
+ return bool(res)
500
+
501
+ def get_column_names(self, table_name):
502
+ """ 【查】表格有哪些字段
503
+ """
504
+ names = [item[1] for item in self.execute(f'PRAGMA table_info({table_name})')]
505
+ return names
506
+
507
+ def __3_execute(self):
508
+ pass
509
+
510
+ def exec2nametuple(self, *args, **kwargs):
511
+ cur = self.cursor()
512
+ cur.row_factory = sqlite3.Row
513
+ return cur.execute(*args, **kwargs)
514
+
515
+ def exec2dict(self, *args, **kwargs):
516
+ """ execute基础上,改成返回值为dict类型 """
517
+
518
+ def dict_factory(cursor, row):
519
+ d = {}
520
+ for idx, col in enumerate(cursor.description):
521
+ d[col[0]] = row[idx]
522
+ return d
523
+
524
+ cur = self.cursor() # todo 不关是不是不好?如果出错了是不是会事务未结束导致无法修改表格结构?是否有auto close的机制?
525
+ cur.row_factory = dict_factory
526
+ return cur.execute(*args, **kwargs)
527
+
528
+ # 兼容老版本
529
+ exec_dict = exec2dict
530
+
531
+ def __4_数据类型(self):
532
+ pass
533
+
534
+ @classmethod
535
+ def cvt_type(cls, val):
536
+ if isinstance(val, (dict, list, tuple)):
537
+ val = json.dumps(val, ensure_ascii=False)
538
+ return val
539
+
540
+ @classmethod
541
+ def autotype(cls, val):
542
+ if isinstance(val, str):
543
+ return 'text'
544
+ elif isinstance(val, (int, bool)):
545
+ return 'integer'
546
+ elif isinstance(val, float):
547
+ return 'real'
548
+ else: # 其他dict、list等类型,可以用json.dumps或str转文本存储
549
+ return 'text'
550
+
551
+ def __5_增删改查(self):
552
+ pass
553
+
554
+ def insert_row(self, table_name, cols, if_exists='IGNORE'):
555
+ """ 【增】插入新数据
556
+
557
+ :param table_name:
558
+ :param cols: 一般是用字典表示的要插入的值
559
+ :param if_exists: 如果已存在的处理策略
560
+ IGNORE,跳过
561
+ REPLACE,替换
562
+ :return:
563
+
564
+ 注意加了 OR IGNORE,支持重复数据自动忽略插入
565
+ """
566
+ ks = ','.join(cols.keys())
567
+ vs = ','.join('?' * (len(cols.keys())))
568
+ self.execute(f'INSERT OR {if_exists} INTO {table_name}({ks}) VALUES ({vs})', self.cvt_types(cols.values()))