pyxllib 0.3.96__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -529
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -311
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -461
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -240
  11. pyxllib/algo/stat.py +494 -458
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. {pyxlpr → pyxllib/autogui}/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/{ext/autogui → autogui}/autogui.py +852 -823
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/{ext/autogui → autogui}/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -129
  34. pyxllib/data/jsonlib.py +89 -0
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -643
  37. pyxllib/data/sqlite.py +568 -341
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -492
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -0
  43. pyxllib/ext/kq5034lib.py +12 -1606
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -0
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -826
  50. pyxllib/ext/utools.py +351 -338
  51. pyxllib/ext/webhook.py +124 -101
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -0
  54. pyxllib/ext/wpsapi.py +124 -0
  55. pyxllib/ext/xlwork.py +9 -0
  56. pyxllib/ext/yuquelib.py +1105 -173
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -0
  61. pyxllib/file/movielib.py +148 -139
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -293
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -186
  71. pyxllib/file/specialist/filelib.py +2829 -2618
  72. pyxllib/file/xlsxlib.py +3131 -2976
  73. pyxllib/file/xlsyncfile.py +341 -0
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -0
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -0
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -0
  80. pyxllib/prog/newbie.py +451 -444
  81. pyxllib/prog/pupil.py +1197 -1128
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -217
  84. pyxllib/prog/specialist/bc.py +203 -200
  85. pyxllib/prog/specialist/browser.py +497 -488
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +199 -131
  88. pyxllib/prog/specialist/tictoc.py +240 -241
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -101
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -0
  98. pyxllib/text/charclasslib.py +121 -109
  99. pyxllib/text/jiebalib.py +267 -264
  100. pyxllib/text/jinjalib.py +32 -0
  101. pyxllib/text/jsa_ai_prompt.md +271 -0
  102. pyxllib/text/jscode.py +922 -767
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -288
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1095
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +11 -0
  116. pyxllib/text/templates/highlight_code.html +17 -0
  117. pyxllib/text/templates/latex_editor.html +103 -0
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -685
  120. pyxllib/xl.py +42 -38
  121. pyxllib/xlcv.py +17 -17
  122. pyxllib-0.3.200.dist-info/METADATA +48 -0
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +1 -2
  125. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info/licenses}/LICENSE +190 -190
  126. pyxllib/ext/autogui/__init__.py +0 -8
  127. pyxllib-0.3.96.dist-info/METADATA +0 -51
  128. pyxllib-0.3.96.dist-info/RECORD +0 -333
  129. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  130. pyxlpr/ai/__init__.py +0 -5
  131. pyxlpr/ai/clientlib.py +0 -1281
  132. pyxlpr/ai/specialist.py +0 -286
  133. pyxlpr/ai/torch_app.py +0 -172
  134. pyxlpr/ai/xlpaddle.py +0 -655
  135. pyxlpr/ai/xltorch.py +0 -705
  136. pyxlpr/data/__init__.py +0 -11
  137. pyxlpr/data/coco.py +0 -1325
  138. pyxlpr/data/datacls.py +0 -365
  139. pyxlpr/data/datasets.py +0 -200
  140. pyxlpr/data/gptlib.py +0 -1291
  141. pyxlpr/data/icdar/__init__.py +0 -96
  142. pyxlpr/data/icdar/deteval.py +0 -377
  143. pyxlpr/data/icdar/icdar2013.py +0 -341
  144. pyxlpr/data/icdar/iou.py +0 -340
  145. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  146. pyxlpr/data/imtextline.py +0 -473
  147. pyxlpr/data/labelme.py +0 -866
  148. pyxlpr/data/removeline.py +0 -179
  149. pyxlpr/data/specialist.py +0 -57
  150. pyxlpr/eval/__init__.py +0 -85
  151. pyxlpr/paddleocr.py +0 -776
  152. pyxlpr/ppocr/__init__.py +0 -15
  153. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  154. pyxlpr/ppocr/data/__init__.py +0 -135
  155. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  156. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  157. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  158. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  159. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  160. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  161. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  162. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  163. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  164. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  165. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  166. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  167. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  168. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  169. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  170. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  171. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  172. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  173. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  174. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  175. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  176. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  177. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  178. pyxlpr/ppocr/losses/__init__.py +0 -61
  179. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  180. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  181. pyxlpr/ppocr/losses/center_loss.py +0 -88
  182. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  183. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  184. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  185. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  186. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  187. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  188. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  189. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  190. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  191. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  192. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  193. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  194. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  195. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  196. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  197. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  198. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  199. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  200. pyxlpr/ppocr/metrics/__init__.py +0 -44
  201. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  202. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  203. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  204. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  205. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  206. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  207. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  208. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  209. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  210. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  211. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  212. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  213. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  214. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  215. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  216. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  217. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  218. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  219. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  220. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  221. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  222. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  223. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  224. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  225. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  226. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  227. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  228. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  229. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  230. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  231. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  232. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  233. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  234. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  235. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  236. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  237. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  238. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  239. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  240. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  241. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  242. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  243. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  244. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  245. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  246. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  247. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  248. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  249. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  250. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  251. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  252. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  253. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  254. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  255. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  256. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  257. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  258. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  259. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  260. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  261. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  262. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  263. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  264. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  265. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  266. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  267. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  268. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  269. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  270. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  271. pyxlpr/ppocr/tools/__init__.py +0 -14
  272. pyxlpr/ppocr/tools/eval.py +0 -83
  273. pyxlpr/ppocr/tools/export_center.py +0 -77
  274. pyxlpr/ppocr/tools/export_model.py +0 -129
  275. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  276. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  277. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  278. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  279. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  280. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  281. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  282. pyxlpr/ppocr/tools/infer_det.py +0 -134
  283. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  284. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  285. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  286. pyxlpr/ppocr/tools/infer_table.py +0 -107
  287. pyxlpr/ppocr/tools/program.py +0 -596
  288. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  289. pyxlpr/ppocr/tools/train.py +0 -163
  290. pyxlpr/ppocr/tools/xlprog.py +0 -748
  291. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  292. pyxlpr/ppocr/utils/__init__.py +0 -24
  293. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  294. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  295. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  296. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  297. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  298. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  299. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  300. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  301. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  302. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  303. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  304. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  305. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  306. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  307. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  308. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  309. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  310. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  311. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  312. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  313. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  314. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  315. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  316. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  317. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  318. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  319. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  320. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  321. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  322. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  323. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  324. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  325. pyxlpr/ppocr/utils/dict90.txt +0 -90
  326. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  327. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  328. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  329. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  330. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  331. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  332. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  333. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  334. pyxlpr/ppocr/utils/gen_label.py +0 -81
  335. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  336. pyxlpr/ppocr/utils/iou.py +0 -54
  337. pyxlpr/ppocr/utils/logging.py +0 -69
  338. pyxlpr/ppocr/utils/network.py +0 -84
  339. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  340. pyxlpr/ppocr/utils/profiler.py +0 -110
  341. pyxlpr/ppocr/utils/save_load.py +0 -150
  342. pyxlpr/ppocr/utils/stats.py +0 -72
  343. pyxlpr/ppocr/utils/utility.py +0 -80
  344. pyxlpr/ppstructure/__init__.py +0 -13
  345. pyxlpr/ppstructure/predict_system.py +0 -187
  346. pyxlpr/ppstructure/table/__init__.py +0 -13
  347. pyxlpr/ppstructure/table/eval_table.py +0 -72
  348. pyxlpr/ppstructure/table/matcher.py +0 -192
  349. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  350. pyxlpr/ppstructure/table/predict_table.py +0 -221
  351. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  352. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  353. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  354. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  355. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  356. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  357. pyxlpr/ppstructure/utility.py +0 -71
  358. pyxlpr/xlai.py +0 -10
pyxllib/text/jiebalib.py CHANGED
@@ -1,264 +1,267 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2023/11/05
6
-
7
- """ 基于jieba库的一些文本处理功能 """
8
-
9
- from collections import Counter
10
- import re
11
-
12
- from tqdm import tqdm
13
- import pandas as pd
14
-
15
- import jieba
16
- import jieba.posseg as pseg
17
-
18
- from pyxllib.prog.pupil import DictTool, run_once
19
- from pyxllib.file.specialist import XlPath
20
- from pyxllib.algo.stat import update_dataframes_to_excel
21
-
22
-
23
- def jieba_add_words(words):
24
- for w in words:
25
- jieba.add_word(w)
26
-
27
-
28
- def jieba_del_words(words):
29
- for w in words:
30
- jieba.del_word(w)
31
-
32
-
33
- @run_once('str')
34
- def jieba_cut(text):
35
- return tuple(jieba.cut(text))
36
-
37
-
38
- @run_once('str')
39
- def pseg_cut(text):
40
- return tuple(pseg.cut(text))
41
-
42
-
43
- def _count_word_frequency(texts, function_word=True):
44
- """ 统计关键词出现频数 (主要是协助计算tf-idf)
45
-
46
- :param texts: 输入字符串列表
47
- :param function_word: 是否要统计虚词
48
- :return: 一个dict
49
- key: 分词名称
50
- values: [x, y],x是出现总频数,y是这个词在多少篇文章中出现过
51
-
52
- >>> _count_word_frequency(['正正正正', '正反正', '反反反反'])
53
- {'正正': [1, 1], '反反': [2, 1]}
54
-
55
- 原没有过滤词性的结果:{'正正': [2, 1], '正': [1, 1], '反正': [1, 1], '反反': [2, 1]}
56
- """
57
-
58
- d = dict()
59
- for text in tqdm(texts, '词频统计'):
60
- wordflags = list(pseg.cut(text))
61
- words = set()
62
- for word, flag in wordflags:
63
- # 虚词不做记录
64
- if (not function_word) and flag in ('uj', 'd', 'p', 'c', 'u', 'xc'):
65
- continue
66
- words.add(word)
67
- if word not in d:
68
- d[word] = [0, 0]
69
- d[word][0] += 1
70
- for word in words:
71
- d[word][1] += 1
72
- return d
73
-
74
-
75
- def analyse_tf_idf(texts, outfile=None, sheet_name='tf-idf', *, function_word=True):
76
- """ 分析tf-idf
77
-
78
- :param list[str] texts: 多份文件的文本内容
79
- :return: 一个DataFrame数据
80
-
81
- 这个算法jieba可能有些自带库可以搞,但是自己写一下也不难啦
82
- 注意我这里返回的tf-idf中,是放大了总频数倍的,这样显示的数值大一点,看起来舒服~
83
- """
84
- from math import log10
85
-
86
- frequency = _count_word_frequency(texts, function_word)
87
- DictTool.isub(frequency, [' ', '\t', '\n'])
88
-
89
- n = len(texts)
90
- sum_frequency = sum([v[0] for v in frequency.values()])
91
-
92
- li = []
93
- for k, v in frequency.items():
94
- idf = log10(n / v[1])
95
- # idf = 1
96
- li.append([k, v[0], v[0] / sum_frequency, v[1], idf, v[0] * idf])
97
- df = pd.DataFrame.from_records(li, columns=('词汇', '频数', '频率', '出现该词文章数', 'idf', 'tf-idf'))
98
- df.sort_values(by='tf-idf', ascending=False, inplace=True)
99
-
100
- if outfile:
101
- update_dataframes_to_excel(outfile, {sheet_name: df})
102
-
103
- return df
104
-
105
-
106
- class TextClassifier:
107
- def __init__(self, texts=None):
108
- """ 文本分类器
109
-
110
- :param list[str] texts: 文本内容
111
- """
112
-
113
- self.texts = []
114
- self.tfidf = {}
115
- self.vecs = [] # 每份文本对应的向量化表达
116
- self.default_tfidf = 1 # 如果没有计算tf-idf,可以全部默认用权重1
117
-
118
- if texts:
119
- for text in texts:
120
- self.texts.append(text)
121
-
122
- def get_text_tf(self, text, *,
123
- function_word_weight=0.2,
124
- normalize=True,
125
- ingore_words=(' ', '\t', '\n'),
126
- add_flag=False):
127
- """ 这里可以定制提取text关键词的算法
128
-
129
- :param function_word_weight: 这里可以自定义功能性词汇权重,一般是设一个小数降低权重
130
-
131
- 一般是定制一些过滤规则,比如过滤掉一些词性,或者过滤掉一些词
132
- """
133
- ct = Counter()
134
-
135
- # 1 初步的分词,以及是否要过滤虚词
136
- wordflags = list(pseg_cut(text))
137
- for word, flag in wordflags:
138
- if flag in ('uj', 'd', 'p', 'c', 'u', 'xc', 'x'):
139
- if add_flag:
140
- ct[word + ',' + flag] += function_word_weight
141
- else:
142
- ct[word] += function_word_weight
143
- else:
144
- if add_flag:
145
- ct[word + ',' + flag] += 1
146
- else:
147
- ct[word] += 1
148
-
149
- # 2 归一化一些词
150
- if normalize:
151
- ct2 = Counter()
152
- for k, v in ct.items():
153
- # 如果需要对一些词汇做归一化,也可以这里设置
154
- k = re.sub(r'\d', '0', k) # 把数字都换成0
155
- ct2[k] += v
156
- ct = ct2
157
-
158
- # 3 过滤掉一些词
159
- if ingore_words:
160
- for k in ingore_words:
161
- if k in ct:
162
- del ct[k]
163
-
164
- return ct
165
-
166
- def compute_tfidf(self, outfile=None, sheet_name='tf-idf', normalize=False, function_word_weight=0.2, add_flag=False):
167
- """ 重算tfidf表 """
168
- from math import log10
169
-
170
- # 1 统计频数和出现该词的文章数
171
- d = dict()
172
- for text in tqdm(self.texts, '词频统计'):
173
- ct = self.get_text_tf(text, normalize=normalize, function_word_weight=function_word_weight, add_flag=add_flag)
174
- for k, v in ct.items():
175
- if k not in d:
176
- d[k] = [0, 0]
177
- d[k] = [d[k][0] + v, d[k][1] + 1]
178
-
179
- # 2 计算tfidf
180
- n = len(self.texts)
181
- sum_tf = sum([v[0] for v in d.values()])
182
- ls = []
183
- for k, v in d.items():
184
- idf = log10(n / v[1])
185
- # idf = 1
186
- ls.append([k, v[0], v[0] / sum_tf, v[1], idf, v[0] * idf])
187
-
188
- df = pd.DataFrame.from_records(ls, columns=('词汇', '频数', '频率', '出现该词文章数', 'idf', 'tf-idf'))
189
- df.sort_values(by='tf-idf', ascending=False, inplace=True)
190
-
191
- # 3 保存到文件
192
- if outfile:
193
- update_dataframes_to_excel(outfile, {sheet_name: df})
194
-
195
- self.tfidf = {row['词汇']: row['tf-idf'] for idx, row in df.iterrows()}
196
- self.default_tfidf = df.loc[len(df) - 1]['tf-idf'] # 最后条的权重作为其他未见词的默认权重
197
-
198
- return df
199
-
200
- def normalization(self, d):
201
- """ 向量归一化
202
-
203
- 输入一个类字典结构表示的向量,对向量做归一化处理
204
- """
205
- length = sum([v * v for v in d.values()]) ** 0.5 # 向量长度
206
- return {k: v / length for k, v in d.items()}
207
-
208
- def get_text_vec(self, text):
209
- """ 获取文本的向量化表达
210
-
211
- :param str text: 文本内容
212
- """
213
- ct = self.get_text_tf(text)
214
- vec = {k: v * self.tfidf.get(k, self.default_tfidf) for k, v in ct.items()}
215
- vec = self.normalization(vec)
216
- return vec
217
-
218
- def compute_vecs(self):
219
- """ 重置向量化表达 """
220
- vecs = []
221
- for text in tqdm(self.texts, desc='query向量化'):
222
- vecs.append(self.get_text_vec(text))
223
- self.vecs = vecs
224
- return vecs
225
-
226
- def cosine_similar(self, x, y):
227
- """ 两个向量的余弦相似度,值越大越相似
228
-
229
- 这里是简化的,只算两个向量的点积,请确保输入的都是单位长度的向量
230
- 注意这里x和y都是稀疏矩阵的存储形式,传入的是dict结构
231
- """
232
- keys = x.keys() & y.keys() # 求出x和y共有的键值
233
- return sum([x[k] * y[k] for k in keys])
234
-
235
- def find_similar_vec(self, x, maxn=10):
236
- """ 找与x最相近的向量,返回下标和相似度
237
-
238
- :pamra x: 待查找的对象
239
- :param maxn: 返回最相近的前maxn个对象
240
- """
241
- if isinstance(x, str):
242
- x = self.get_text_vec(x)
243
-
244
- # todo 使用并行计算?或者其实也可以向量化,但向量化是稀疏矩阵,挺占空间的
245
- sims = [(i, self.cosine_similar(x, v)) for i, v in enumerate(self.vecs)]
246
- sims.sort(key=lambda x: x[1], reverse=True)
247
- return sims[:maxn]
248
-
249
- def refine_vecs(self):
250
- """ 优化向量数据 """
251
- # 1 计算每个向量的长度
252
- vecs = []
253
- for vec in tqdm(self.vecs, '优化向量'):
254
- vec = [(k, v) for k, v in vec.items()]
255
- vec.sort(key=lambda x: x[1], reverse=True)
256
- vec2 = {}
257
- for k, v in vec:
258
- if v < 0.0001:
259
- break
260
- vec2[k] = round(v, 4)
261
- vecs.append(vec2)
262
-
263
- self.vecs = vecs
264
- return self.vecs
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2023/11/05
6
+
7
+ """ 基于jieba库的一些文本处理功能 """
8
+
9
+ from collections import Counter
10
+ import re
11
+
12
+ from tqdm import tqdm
13
+ import pandas as pd
14
+
15
+ import jieba
16
+ import jieba.posseg as pseg
17
+ from simhash import Simhash
18
+
19
+ from pyxllib.prog.pupil import DictTool, run_once
20
+ from pyxllib.file.specialist import XlPath
21
+ from pyxllib.algo.stat import update_dataframes_to_excel
22
+
23
+
24
+ def jieba_add_words(words):
25
+ for w in words:
26
+ jieba.add_word(w)
27
+
28
+
29
+ def jieba_del_words(words):
30
+ for w in words:
31
+ jieba.del_word(w)
32
+
33
+
34
+ @run_once('str')
35
+ def jieba_cut(text):
36
+ return tuple(jieba.cut(text))
37
+
38
+
39
+ @run_once('str')
40
+ def pseg_cut(text):
41
+ return tuple(pseg.cut(text))
42
+
43
+
44
+ def _count_word_frequency(texts, function_word=True):
45
+ """ 统计关键词出现频数 (主要是协助计算tf-idf)
46
+
47
+ :param texts: 输入字符串列表
48
+ :param function_word: 是否要统计虚词
49
+ :return: 一个dict
50
+ key: 分词名称
51
+ values: [x, y],x是出现总频数,y是这个词在多少篇文章中出现过
52
+
53
+ >>> _count_word_frequency(['正正正正', '正反正', '反反反反'])
54
+ {'正正': [1, 1], '反反': [2, 1]}
55
+
56
+ 原没有过滤词性的结果:{'正正': [2, 1], '正': [1, 1], '反正': [1, 1], '反反': [2, 1]}
57
+ """
58
+
59
+ d = dict()
60
+ for text in tqdm(texts, '词频统计'):
61
+ wordflags = list(pseg.cut(text))
62
+ words = set()
63
+ for word, flag in wordflags:
64
+ # 虚词不做记录
65
+ if (not function_word) and flag in ('uj', 'd', 'p', 'c', 'u', 'xc'):
66
+ continue
67
+ words.add(word)
68
+ if word not in d:
69
+ d[word] = [0, 0]
70
+ d[word][0] += 1
71
+ for word in words:
72
+ d[word][1] += 1
73
+ return d
74
+
75
+
76
+ def analyse_tf_idf(texts, outfile=None, sheet_name='tf-idf', *, function_word=True):
77
+ """ 分析tf-idf值
78
+
79
+ :param list[str] texts: 多份文件的文本内容
80
+ :return: 一个DataFrame数据
81
+
82
+ 这个算法jieba可能有些自带库可以搞,但是自己写一下也不难啦
83
+ 注意我这里返回的tf-idf中,是放大了总频数倍的,这样显示的数值大一点,看起来舒服~
84
+ """
85
+ from math import log10
86
+
87
+ frequency = _count_word_frequency(texts, function_word)
88
+ DictTool.isub(frequency, [' ', '\t', '\n'])
89
+
90
+ n = len(texts)
91
+ sum_frequency = sum([v[0] for v in frequency.values()])
92
+
93
+ li = []
94
+ for k, v in frequency.items():
95
+ idf = log10(n / v[1])
96
+ # idf = 1
97
+ li.append([k, v[0], v[0] / sum_frequency, v[1], idf, v[0] * idf])
98
+ df = pd.DataFrame.from_records(li, columns=('词汇', '频数', '频率', '出现该词文章数', 'idf', 'tf-idf'))
99
+ df.sort_values(by='tf-idf', ascending=False, inplace=True)
100
+
101
+ if outfile:
102
+ update_dataframes_to_excel(outfile, {sheet_name: df})
103
+
104
+ return df
105
+
106
+
107
+ class TextClassifier:
108
+ def __init__(self, texts=None):
109
+ """ 文本分类器
110
+
111
+ :param list[str] texts: 文本内容
112
+ """
113
+
114
+ self.texts = []
115
+ self.tfidf = {}
116
+ self.vecs = [] # 每份文本对应的向量化表达
117
+ self.default_tfidf = 1 # 如果没有计算tf-idf,可以全部默认用权重1
118
+
119
+ if texts:
120
+ for text in texts:
121
+ self.texts.append(text)
122
+
123
+ def get_text_tf(self, text, *,
124
+ function_word_weight=0.2,
125
+ normalize=True,
126
+ ingore_words=(' ', '\t', '\n'),
127
+ add_flag=False):
128
+ """ 这里可以定制提取text关键词的算法
129
+
130
+ :param function_word_weight: 这里可以自定义功能性词汇权重,一般是设一个小数降低权重
131
+
132
+ 一般是定制一些过滤规则,比如过滤掉一些词性,或者过滤掉一些词
133
+ """
134
+ ct = Counter()
135
+
136
+ # 1 初步的分词,以及是否要过滤虚词
137
+ wordflags = list(pseg_cut(text))
138
+ for word, flag in wordflags:
139
+ if flag in ('uj', 'd', 'p', 'c', 'u', 'xc', 'x'):
140
+ if add_flag:
141
+ ct[word + ',' + flag] += function_word_weight
142
+ else:
143
+ ct[word] += function_word_weight
144
+ else:
145
+ if add_flag:
146
+ ct[word + ',' + flag] += 1
147
+ else:
148
+ ct[word] += 1
149
+
150
+ # 2 归一化一些词
151
+ if normalize:
152
+ ct2 = Counter()
153
+ for k, v in ct.items():
154
+ # 如果需要对一些词汇做归一化,也可以这里设置
155
+ k = re.sub(r'\d', '0', k) # 把数字都换成0
156
+ ct2[k] += v
157
+ ct = ct2
158
+
159
+ # 3 过滤掉一些词
160
+ if ingore_words:
161
+ for k in ingore_words:
162
+ if k in ct:
163
+ del ct[k]
164
+
165
+ return ct
166
+
167
+ def compute_tfidf(self, outfile=None, sheet_name='tf-idf', normalize=False, function_word_weight=0.2,
168
+ add_flag=False):
169
+ """ 重算tfidf表 """
170
+ from math import log10
171
+
172
+ # 1 统计频数和出现该词的文章数
173
+ d = dict()
174
+ for text in tqdm(self.texts, '词频统计'):
175
+ ct = self.get_text_tf(text, normalize=normalize, function_word_weight=function_word_weight,
176
+ add_flag=add_flag)
177
+ for k, v in ct.items():
178
+ if k not in d:
179
+ d[k] = [0, 0]
180
+ d[k] = [d[k][0] + v, d[k][1] + 1]
181
+
182
+ # 2 计算tfidf
183
+ n = len(self.texts)
184
+ sum_tf = sum([v[0] for v in d.values()])
185
+ ls = []
186
+ for k, v in d.items():
187
+ idf = log10(n / v[1])
188
+ # idf = 1
189
+ ls.append([k, v[0], v[0] / sum_tf, v[1], idf, v[0] * idf])
190
+
191
+ df = pd.DataFrame.from_records(ls, columns=('词汇', '频数', '频率', '出现该词文章数', 'idf', 'tf-idf'))
192
+ df.sort_values(by='tf-idf', ascending=False, inplace=True)
193
+
194
+ # 3 保存到文件
195
+ if outfile:
196
+ update_dataframes_to_excel(outfile, {sheet_name: df})
197
+
198
+ self.tfidf = {row['词汇']: row['tf-idf'] for idx, row in df.iterrows()}
199
+ self.default_tfidf = df.loc[len(df) - 1]['tf-idf'] # 最后条的权重作为其他未见词的默认权重
200
+
201
+ return df
202
+
203
+ def normalization(self, d):
204
+ """ 向量归一化
205
+
206
+ 输入一个类字典结构表示的向量,对向量做归一化处理
207
+ """
208
+ length = sum([v * v for v in d.values()]) ** 0.5 # 向量长度
209
+ return {k: v / length for k, v in d.items()}
210
+
211
+ def get_text_vec(self, text):
212
+ """ 获取文本的向量化表达
213
+
214
+ :param str text: 文本内容
215
+ """
216
+ ct = self.get_text_tf(text)
217
+ vec = {k: v * self.tfidf.get(k, self.default_tfidf) for k, v in ct.items()}
218
+ vec = self.normalization(vec)
219
+ return vec
220
+
221
+ def compute_vecs(self):
222
+ """ 重置向量化表达 """
223
+ vecs = []
224
+ for text in tqdm(self.texts, desc='query向量化'):
225
+ vecs.append(self.get_text_vec(text))
226
+ self.vecs = vecs
227
+ return vecs
228
+
229
+ def cosine_similar(self, x, y):
230
+ """ 两个向量的余弦相似度,值越大越相似
231
+
232
+ 这里是简化的,只算两个向量的点积,请确保输入的都是单位长度的向量
233
+ 注意这里xy都是稀疏矩阵的存储形式,传入的是dict结构
234
+ """
235
+ keys = x.keys() & y.keys() # 求出x和y共有的键值
236
+ return sum([x[k] * y[k] for k in keys])
237
+
238
+ def find_similar_vec(self, x, maxn=10):
239
+ """ 找与x最相近的向量,返回下标和相似度
240
+
241
+ :pamra x: 待查找的对象
242
+ :param maxn: 返回最相近的前maxn个对象
243
+ """
244
+ if isinstance(x, str):
245
+ x = self.get_text_vec(x)
246
+
247
+ # todo 使用并行计算?或者其实也可以向量化,但向量化是稀疏矩阵,挺占空间的
248
+ sims = [(i, self.cosine_similar(x, v)) for i, v in enumerate(self.vecs)]
249
+ sims.sort(key=lambda x: x[1], reverse=True)
250
+ return sims[:maxn]
251
+
252
+ def refine_vecs(self):
253
+ """ 优化向量数据,去掉权重小余0.0001的维度 """
254
+ # 1 计算每个向量的长度
255
+ vecs = []
256
+ for vec in tqdm(self.vecs, '优化向量'):
257
+ vec = [(k, v) for k, v in vec.items()]
258
+ vec.sort(key=lambda x: x[1], reverse=True)
259
+ vec2 = {}
260
+ for k, v in vec:
261
+ if v < 0.0001:
262
+ break
263
+ vec2[k] = round(v, 4)
264
+ vecs.append(vec2)
265
+
266
+ self.vecs = vecs
267
+ return self.vecs
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : 陈坤泽
4
+ # @Email : 877362867@qq.com
5
+ # @Date : 2024/05/26
6
+
7
+ from pyxllib.prog.pupil import check_install_package
8
+
9
+ # 一个xpath解析库
10
+ check_install_package('jinja2')
11
+
12
+ import jinja2
13
+ from jinja2 import Template, Environment
14
+
15
+ from pyxllib.file.specialist import XlPath
16
+
17
+
18
+ def set_template(s, *args, **kwargs):
19
+ """ todo 这个名字会不会太容易冲突了? """
20
+ return Template(s.strip(), *args, **kwargs)
21
+
22
+
23
+ def set_meta_template(s, meta_start='[[', meta_end=']]', **kwargs):
24
+ """ 支持预先用某些格式渲染后,再返回标准渲染模板 """
25
+ t = Template(s.strip(), variable_start_string=meta_start,
26
+ variable_end_string=meta_end).render(**kwargs)
27
+ return Template(t)
28
+
29
+
30
+ def get_jinja_template(name, **kwargs):
31
+ template = Environment(**kwargs).from_string((XlPath(__file__).parent / f'templates/{name}').read_text())
32
+ return template