pyxllib 0.3.96__py3-none-any.whl → 0.3.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (358) hide show
  1. pyxllib/__init__.py +21 -21
  2. pyxllib/algo/__init__.py +8 -8
  3. pyxllib/algo/disjoint.py +54 -54
  4. pyxllib/algo/geo.py +541 -529
  5. pyxllib/algo/intervals.py +964 -964
  6. pyxllib/algo/matcher.py +389 -311
  7. pyxllib/algo/newbie.py +166 -166
  8. pyxllib/algo/pupil.py +629 -461
  9. pyxllib/algo/shapelylib.py +67 -67
  10. pyxllib/algo/specialist.py +241 -240
  11. pyxllib/algo/stat.py +494 -458
  12. pyxllib/algo/treelib.py +149 -149
  13. pyxllib/algo/unitlib.py +66 -66
  14. {pyxlpr → pyxllib/autogui}/__init__.py +5 -5
  15. pyxllib/autogui/activewin.py +246 -0
  16. pyxllib/autogui/all.py +9 -0
  17. pyxllib/{ext/autogui → autogui}/autogui.py +852 -823
  18. pyxllib/autogui/uiautolib.py +362 -0
  19. pyxllib/{ext/autogui → autogui}/virtualkey.py +102 -102
  20. pyxllib/autogui/wechat.py +827 -0
  21. pyxllib/autogui/wechat_msg.py +421 -0
  22. pyxllib/autogui/wxautolib.py +84 -0
  23. pyxllib/cv/__init__.py +5 -5
  24. pyxllib/cv/expert.py +267 -267
  25. pyxllib/cv/imfile.py +159 -159
  26. pyxllib/cv/imhash.py +39 -39
  27. pyxllib/cv/pupil.py +9 -9
  28. pyxllib/cv/rgbfmt.py +1525 -1525
  29. pyxllib/cv/slidercaptcha.py +137 -0
  30. pyxllib/cv/trackbartools.py +251 -251
  31. pyxllib/cv/xlcvlib.py +1040 -1040
  32. pyxllib/cv/xlpillib.py +423 -423
  33. pyxllib/data/echarts.py +240 -129
  34. pyxllib/data/jsonlib.py +89 -0
  35. pyxllib/data/oss.py +72 -72
  36. pyxllib/data/pglib.py +1127 -643
  37. pyxllib/data/sqlite.py +568 -341
  38. pyxllib/data/sqllib.py +297 -297
  39. pyxllib/ext/JLineViewer.py +505 -492
  40. pyxllib/ext/__init__.py +6 -6
  41. pyxllib/ext/demolib.py +246 -246
  42. pyxllib/ext/drissionlib.py +277 -0
  43. pyxllib/ext/kq5034lib.py +12 -1606
  44. pyxllib/ext/old.py +663 -663
  45. pyxllib/ext/qt.py +449 -449
  46. pyxllib/ext/robustprocfile.py +497 -0
  47. pyxllib/ext/seleniumlib.py +76 -76
  48. pyxllib/ext/tk.py +173 -173
  49. pyxllib/ext/unixlib.py +827 -826
  50. pyxllib/ext/utools.py +351 -338
  51. pyxllib/ext/webhook.py +124 -101
  52. pyxllib/ext/win32lib.py +40 -40
  53. pyxllib/ext/wjxlib.py +88 -0
  54. pyxllib/ext/wpsapi.py +124 -0
  55. pyxllib/ext/xlwork.py +9 -0
  56. pyxllib/ext/yuquelib.py +1105 -173
  57. pyxllib/file/__init__.py +17 -17
  58. pyxllib/file/docxlib.py +761 -761
  59. pyxllib/file/gitlib.py +309 -309
  60. pyxllib/file/libreoffice.py +165 -0
  61. pyxllib/file/movielib.py +148 -139
  62. pyxllib/file/newbie.py +10 -10
  63. pyxllib/file/onenotelib.py +1469 -1469
  64. pyxllib/file/packlib/__init__.py +330 -293
  65. pyxllib/file/packlib/zipfile.py +2441 -2441
  66. pyxllib/file/pdflib.py +426 -426
  67. pyxllib/file/pupil.py +185 -185
  68. pyxllib/file/specialist/__init__.py +685 -685
  69. pyxllib/file/specialist/dirlib.py +799 -799
  70. pyxllib/file/specialist/download.py +193 -186
  71. pyxllib/file/specialist/filelib.py +2829 -2618
  72. pyxllib/file/xlsxlib.py +3131 -2976
  73. pyxllib/file/xlsyncfile.py +341 -0
  74. pyxllib/prog/__init__.py +5 -5
  75. pyxllib/prog/cachetools.py +64 -0
  76. pyxllib/prog/deprecatedlib.py +233 -233
  77. pyxllib/prog/filelock.py +42 -0
  78. pyxllib/prog/ipyexec.py +253 -253
  79. pyxllib/prog/multiprogs.py +940 -0
  80. pyxllib/prog/newbie.py +451 -444
  81. pyxllib/prog/pupil.py +1197 -1128
  82. pyxllib/prog/sitepackages.py +33 -33
  83. pyxllib/prog/specialist/__init__.py +391 -217
  84. pyxllib/prog/specialist/bc.py +203 -200
  85. pyxllib/prog/specialist/browser.py +497 -488
  86. pyxllib/prog/specialist/common.py +347 -347
  87. pyxllib/prog/specialist/datetime.py +199 -131
  88. pyxllib/prog/specialist/tictoc.py +240 -241
  89. pyxllib/prog/specialist/xllog.py +180 -180
  90. pyxllib/prog/xlosenv.py +108 -101
  91. pyxllib/stdlib/__init__.py +17 -17
  92. pyxllib/stdlib/tablepyxl/__init__.py +10 -10
  93. pyxllib/stdlib/tablepyxl/style.py +303 -303
  94. pyxllib/stdlib/tablepyxl/tablepyxl.py +130 -130
  95. pyxllib/text/__init__.py +8 -8
  96. pyxllib/text/ahocorasick.py +39 -39
  97. pyxllib/text/airscript.js +744 -0
  98. pyxllib/text/charclasslib.py +121 -109
  99. pyxllib/text/jiebalib.py +267 -264
  100. pyxllib/text/jinjalib.py +32 -0
  101. pyxllib/text/jsa_ai_prompt.md +271 -0
  102. pyxllib/text/jscode.py +922 -767
  103. pyxllib/text/latex/__init__.py +158 -158
  104. pyxllib/text/levenshtein.py +303 -303
  105. pyxllib/text/nestenv.py +1215 -1215
  106. pyxllib/text/newbie.py +300 -288
  107. pyxllib/text/pupil/__init__.py +8 -8
  108. pyxllib/text/pupil/common.py +1121 -1095
  109. pyxllib/text/pupil/xlalign.py +326 -326
  110. pyxllib/text/pycode.py +47 -47
  111. pyxllib/text/specialist/__init__.py +8 -8
  112. pyxllib/text/specialist/common.py +112 -112
  113. pyxllib/text/specialist/ptag.py +186 -186
  114. pyxllib/text/spellchecker.py +172 -172
  115. pyxllib/text/templates/echart_base.html +11 -0
  116. pyxllib/text/templates/highlight_code.html +17 -0
  117. pyxllib/text/templates/latex_editor.html +103 -0
  118. pyxllib/text/vbacode.py +17 -17
  119. pyxllib/text/xmllib.py +747 -685
  120. pyxllib/xl.py +42 -38
  121. pyxllib/xlcv.py +17 -17
  122. pyxllib-0.3.200.dist-info/METADATA +48 -0
  123. pyxllib-0.3.200.dist-info/RECORD +126 -0
  124. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info}/WHEEL +1 -2
  125. {pyxllib-0.3.96.dist-info → pyxllib-0.3.200.dist-info/licenses}/LICENSE +190 -190
  126. pyxllib/ext/autogui/__init__.py +0 -8
  127. pyxllib-0.3.96.dist-info/METADATA +0 -51
  128. pyxllib-0.3.96.dist-info/RECORD +0 -333
  129. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  130. pyxlpr/ai/__init__.py +0 -5
  131. pyxlpr/ai/clientlib.py +0 -1281
  132. pyxlpr/ai/specialist.py +0 -286
  133. pyxlpr/ai/torch_app.py +0 -172
  134. pyxlpr/ai/xlpaddle.py +0 -655
  135. pyxlpr/ai/xltorch.py +0 -705
  136. pyxlpr/data/__init__.py +0 -11
  137. pyxlpr/data/coco.py +0 -1325
  138. pyxlpr/data/datacls.py +0 -365
  139. pyxlpr/data/datasets.py +0 -200
  140. pyxlpr/data/gptlib.py +0 -1291
  141. pyxlpr/data/icdar/__init__.py +0 -96
  142. pyxlpr/data/icdar/deteval.py +0 -377
  143. pyxlpr/data/icdar/icdar2013.py +0 -341
  144. pyxlpr/data/icdar/iou.py +0 -340
  145. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  146. pyxlpr/data/imtextline.py +0 -473
  147. pyxlpr/data/labelme.py +0 -866
  148. pyxlpr/data/removeline.py +0 -179
  149. pyxlpr/data/specialist.py +0 -57
  150. pyxlpr/eval/__init__.py +0 -85
  151. pyxlpr/paddleocr.py +0 -776
  152. pyxlpr/ppocr/__init__.py +0 -15
  153. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  154. pyxlpr/ppocr/data/__init__.py +0 -135
  155. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  156. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  157. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  158. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  159. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  160. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  161. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  162. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  163. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  164. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  165. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  166. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  167. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  168. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  169. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  170. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  171. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  172. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  173. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  174. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  175. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  176. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  177. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  178. pyxlpr/ppocr/losses/__init__.py +0 -61
  179. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  180. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  181. pyxlpr/ppocr/losses/center_loss.py +0 -88
  182. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  183. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  184. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  185. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  186. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  187. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  188. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  189. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  190. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  191. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  192. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  193. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  194. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  195. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  196. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  197. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  198. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  199. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  200. pyxlpr/ppocr/metrics/__init__.py +0 -44
  201. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  202. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  203. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  204. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  205. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  206. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  207. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  208. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  209. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  210. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  211. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  212. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  213. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  214. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  215. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  216. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  217. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  218. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  219. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  220. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  221. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  222. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  223. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  224. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  225. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  226. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  227. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  228. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  229. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  230. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  231. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  232. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  233. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  234. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  235. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  236. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  237. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  238. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  239. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  240. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  241. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  242. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  243. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  244. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  245. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  246. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  247. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  248. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  249. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  250. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  251. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  252. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  253. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  254. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  255. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  256. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  257. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  258. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  259. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  260. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  261. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  262. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  263. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  264. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  265. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  266. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  267. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  268. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  269. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  270. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  271. pyxlpr/ppocr/tools/__init__.py +0 -14
  272. pyxlpr/ppocr/tools/eval.py +0 -83
  273. pyxlpr/ppocr/tools/export_center.py +0 -77
  274. pyxlpr/ppocr/tools/export_model.py +0 -129
  275. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  276. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  277. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  278. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  279. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  280. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  281. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  282. pyxlpr/ppocr/tools/infer_det.py +0 -134
  283. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  284. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  285. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  286. pyxlpr/ppocr/tools/infer_table.py +0 -107
  287. pyxlpr/ppocr/tools/program.py +0 -596
  288. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  289. pyxlpr/ppocr/tools/train.py +0 -163
  290. pyxlpr/ppocr/tools/xlprog.py +0 -748
  291. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  292. pyxlpr/ppocr/utils/__init__.py +0 -24
  293. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  294. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  295. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  296. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  297. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  298. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  299. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  300. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  301. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  302. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  303. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  304. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  305. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  306. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  307. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  308. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  309. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  310. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  311. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  312. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  313. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  314. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  315. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  316. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  317. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  318. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  319. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  320. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  321. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  322. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  323. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  324. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  325. pyxlpr/ppocr/utils/dict90.txt +0 -90
  326. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  327. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  328. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  329. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  330. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  331. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  332. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  333. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  334. pyxlpr/ppocr/utils/gen_label.py +0 -81
  335. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  336. pyxlpr/ppocr/utils/iou.py +0 -54
  337. pyxlpr/ppocr/utils/logging.py +0 -69
  338. pyxlpr/ppocr/utils/network.py +0 -84
  339. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  340. pyxlpr/ppocr/utils/profiler.py +0 -110
  341. pyxlpr/ppocr/utils/save_load.py +0 -150
  342. pyxlpr/ppocr/utils/stats.py +0 -72
  343. pyxlpr/ppocr/utils/utility.py +0 -80
  344. pyxlpr/ppstructure/__init__.py +0 -13
  345. pyxlpr/ppstructure/predict_system.py +0 -187
  346. pyxlpr/ppstructure/table/__init__.py +0 -13
  347. pyxlpr/ppstructure/table/eval_table.py +0 -72
  348. pyxlpr/ppstructure/table/matcher.py +0 -192
  349. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  350. pyxlpr/ppstructure/table/predict_table.py +0 -221
  351. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  352. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  353. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  354. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  355. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  356. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  357. pyxlpr/ppstructure/utility.py +0 -71
  358. pyxlpr/xlai.py +0 -10
@@ -1,72 +0,0 @@
1
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import os
15
- import sys
16
- __dir__ = os.path.dirname(os.path.abspath(__file__))
17
- sys.path.append(__dir__)
18
- sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
19
-
20
- import cv2
21
- import json
22
- from tqdm import tqdm
23
- from pyxlpr.ppstructure.table.table_metric import TEDS
24
- from pyxlpr.ppstructure.table.predict_table import TableSystem
25
- from pyxlpr.ppstructure.utility import init_args
26
- from pyxlpr.ppocr.utils.logging import get_logger
27
-
28
- logger = get_logger()
29
-
30
-
31
- def parse_args():
32
- parser = init_args()
33
- parser.add_argument("--gt_path", type=str)
34
- return parser.parse_args()
35
-
36
- def main(gt_path, img_root, args):
37
- teds = TEDS(n_jobs=16)
38
-
39
- text_sys = TableSystem(args)
40
- jsons_gt = json.load(open(gt_path)) # gt
41
- pred_htmls = []
42
- gt_htmls = []
43
- for img_name in tqdm(jsons_gt):
44
- # read image
45
- img = cv2.imread(os.path.join(img_root,img_name))
46
- pred_html = text_sys(img)
47
- pred_htmls.append(pred_html)
48
-
49
- gt_structures, gt_bboxes, gt_contents = jsons_gt[img_name]
50
- gt_html, gt = get_gt_html(gt_structures, gt_contents)
51
- gt_htmls.append(gt_html)
52
- scores = teds.batch_evaluate_html(gt_htmls, pred_htmls)
53
- logger.info('teds:', sum(scores) / len(scores))
54
-
55
-
56
- def get_gt_html(gt_structures, gt_contents):
57
- end_html = []
58
- td_index = 0
59
- for tag in gt_structures:
60
- if '</td>' in tag:
61
- if gt_contents[td_index] != []:
62
- end_html.extend(gt_contents[td_index])
63
- end_html.append(tag)
64
- td_index += 1
65
- else:
66
- end_html.append(tag)
67
- return ''.join(end_html), end_html
68
-
69
-
70
- if __name__ == '__main__':
71
- args = parse_args()
72
- main(args.gt_path,args.image_dir, args)
@@ -1,192 +0,0 @@
1
- import json
2
- def distance(box_1, box_2):
3
- x1, y1, x2, y2 = box_1
4
- x3, y3, x4, y4 = box_2
5
- dis = abs(x3 - x1) + abs(y3 - y1) + abs(x4- x2) + abs(y4 - y2)
6
- dis_2 = abs(x3 - x1) + abs(y3 - y1)
7
- dis_3 = abs(x4- x2) + abs(y4 - y2)
8
- return dis + min(dis_2, dis_3)
9
-
10
- def compute_iou(rec1, rec2):
11
- """
12
- computing IoU
13
- :param rec1: (y0, x0, y1, x1), which reflects
14
- (top, left, bottom, right)
15
- :param rec2: (y0, x0, y1, x1)
16
- :return: scala value of IoU
17
- """
18
- # computing area of each rectangles
19
- S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1])
20
- S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1])
21
-
22
- # computing the sum_area
23
- sum_area = S_rec1 + S_rec2
24
-
25
- # find the each edge of intersect rectangle
26
- left_line = max(rec1[1], rec2[1])
27
- right_line = min(rec1[3], rec2[3])
28
- top_line = max(rec1[0], rec2[0])
29
- bottom_line = min(rec1[2], rec2[2])
30
-
31
- # judge if there is an intersect
32
- if left_line >= right_line or top_line >= bottom_line:
33
- return 0.0
34
- else:
35
- intersect = (right_line - left_line) * (bottom_line - top_line)
36
- return (intersect / (sum_area - intersect))*1.0
37
-
38
-
39
-
40
- def matcher_merge(ocr_bboxes, pred_bboxes):
41
- all_dis = []
42
- ious = []
43
- matched = {}
44
- for i, gt_box in enumerate(ocr_bboxes):
45
- distances = []
46
- for j, pred_box in enumerate(pred_bboxes):
47
- # compute l1 distence and IOU between two boxes
48
- distances.append((distance(gt_box, pred_box), 1. - compute_iou(gt_box, pred_box)))
49
- sorted_distances = distances.copy()
50
- # select nearest cell
51
- sorted_distances = sorted(sorted_distances, key = lambda item: (item[1], item[0]))
52
- if distances.index(sorted_distances[0]) not in matched.keys():
53
- matched[distances.index(sorted_distances[0])] = [i]
54
- else:
55
- matched[distances.index(sorted_distances[0])].append(i)
56
- return matched#, sum(ious) / len(ious)
57
-
58
- def complex_num(pred_bboxes):
59
- complex_nums = []
60
- for bbox in pred_bboxes:
61
- distances = []
62
- temp_ious = []
63
- for pred_bbox in pred_bboxes:
64
- if bbox != pred_bbox:
65
- distances.append(distance(bbox, pred_bbox))
66
- temp_ious.append(compute_iou(bbox, pred_bbox))
67
- complex_nums.append(temp_ious[distances.index(min(distances))])
68
- return sum(complex_nums) / len(complex_nums)
69
-
70
- def get_rows(pred_bboxes):
71
- pre_bbox = pred_bboxes[0]
72
- res = []
73
- step = 0
74
- for i in range(len(pred_bboxes)):
75
- bbox = pred_bboxes[i]
76
- if bbox[1] - pre_bbox[1] > 2 or bbox[0] - pre_bbox[0] < 0:
77
- break
78
- else:
79
- res.append(bbox)
80
- step += 1
81
- for i in range(step):
82
- pred_bboxes.pop(0)
83
- return res, pred_bboxes
84
- def refine_rows(pred_bboxes): # 微调整行的框,使在一条水平线上
85
- ys_1 = []
86
- ys_2 = []
87
- for box in pred_bboxes:
88
- ys_1.append(box[1])
89
- ys_2.append(box[3])
90
- min_y_1 = sum(ys_1) / len(ys_1)
91
- min_y_2 = sum(ys_2) / len(ys_2)
92
- re_boxes = []
93
- for box in pred_bboxes:
94
- box[1] = min_y_1
95
- box[3] = min_y_2
96
- re_boxes.append(box)
97
- return re_boxes
98
-
99
- def matcher_refine_row(gt_bboxes, pred_bboxes):
100
- before_refine_pred_bboxes = pred_bboxes.copy()
101
- pred_bboxes = []
102
- while(len(before_refine_pred_bboxes) != 0):
103
- row_bboxes, before_refine_pred_bboxes = get_rows(before_refine_pred_bboxes)
104
- print(row_bboxes)
105
- pred_bboxes.extend(refine_rows(row_bboxes))
106
- all_dis = []
107
- ious = []
108
- matched = {}
109
- for i, gt_box in enumerate(gt_bboxes):
110
- distances = []
111
- #temp_ious = []
112
- for j, pred_box in enumerate(pred_bboxes):
113
- distances.append(distance(gt_box, pred_box))
114
- #temp_ious.append(compute_iou(gt_box, pred_box))
115
- #all_dis.append(min(distances))
116
- #ious.append(temp_ious[distances.index(min(distances))])
117
- if distances.index(min(distances)) not in matched.keys():
118
- matched[distances.index(min(distances))] = [i]
119
- else:
120
- matched[distances.index(min(distances))].append(i)
121
- return matched#, sum(ious) / len(ious)
122
-
123
-
124
-
125
- #先挑选出一行,再进行匹配
126
- def matcher_structure_1(gt_bboxes, pred_bboxes_rows, pred_bboxes):
127
- gt_box_index = 0
128
- delete_gt_bboxes = gt_bboxes.copy()
129
- match_bboxes_ready = []
130
- matched = {}
131
- while(len(delete_gt_bboxes) != 0):
132
- row_bboxes, delete_gt_bboxes = get_rows(delete_gt_bboxes)
133
- row_bboxes = sorted(row_bboxes, key = lambda key: key[0])
134
- if len(pred_bboxes_rows) > 0:
135
- match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
136
- print(row_bboxes)
137
- for i, gt_box in enumerate(row_bboxes):
138
- #print(gt_box)
139
- pred_distances = []
140
- distances = []
141
- for pred_bbox in pred_bboxes:
142
- pred_distances.append(distance(gt_box, pred_bbox))
143
- for j, pred_box in enumerate(match_bboxes_ready):
144
- distances.append(distance(gt_box, pred_box))
145
- index = pred_distances.index(min(distances))
146
- #print('index', index)
147
- if index not in matched.keys():
148
- matched[index] = [gt_box_index]
149
- else:
150
- matched[index].append(gt_box_index)
151
- gt_box_index += 1
152
- return matched
153
-
154
- def matcher_structure(gt_bboxes, pred_bboxes_rows, pred_bboxes):
155
- '''
156
- gt_bboxes: 排序后
157
- pred_bboxes:
158
- '''
159
- pre_bbox = gt_bboxes[0]
160
- matched = {}
161
- match_bboxes_ready = []
162
- match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
163
- for i, gt_box in enumerate(gt_bboxes):
164
-
165
- pred_distances = []
166
- for pred_bbox in pred_bboxes:
167
- pred_distances.append(distance(gt_box, pred_bbox))
168
- distances = []
169
- gap_pre = gt_box[1] - pre_bbox[1]
170
- gap_pre_1 = gt_box[0] - pre_bbox[2]
171
- #print(gap_pre, len(pred_bboxes_rows))
172
- if (gap_pre_1 < 0 and len(pred_bboxes_rows) > 0):
173
- match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
174
- if len(pred_bboxes_rows) == 1:
175
- match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
176
- if len(match_bboxes_ready) == 0 and len(pred_bboxes_rows) > 0:
177
- match_bboxes_ready.extend(pred_bboxes_rows.pop(0))
178
- if len(match_bboxes_ready) == 0 and len(pred_bboxes_rows) == 0:
179
- break
180
- #print(match_bboxes_ready)
181
- for j, pred_box in enumerate(match_bboxes_ready):
182
- distances.append(distance(gt_box, pred_box))
183
- index = pred_distances.index(min(distances))
184
- #print(gt_box, index)
185
- #match_bboxes_ready.pop(distances.index(min(distances)))
186
- print(gt_box, match_bboxes_ready[distances.index(min(distances))])
187
- if index not in matched.keys():
188
- matched[index] = [i]
189
- else:
190
- matched[index].append(i)
191
- pre_bbox = gt_box
192
- return matched
@@ -1,136 +0,0 @@
1
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- import os
15
- import sys
16
-
17
- __dir__ = os.path.dirname(os.path.abspath(__file__))
18
- sys.path.append(__dir__)
19
- sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
20
-
21
- os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
22
-
23
- import cv2
24
- import numpy as np
25
- import time
26
-
27
- import pyxlpr.ppocr.tools.infer.utility as utility
28
- from pyxlpr.ppocr.data import create_operators, transform
29
- from pyxlpr.ppocr.postprocess import build_post_process
30
- from pyxlpr.ppocr.utils.logging import get_logger
31
- from pyxlpr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
32
- from pyxlpr.ppstructure.utility import parse_args
33
-
34
- logger = get_logger()
35
-
36
-
37
- class TableStructurer(object):
38
- def __init__(self, args):
39
- pre_process_list = [{
40
- 'ResizeTableImage': {
41
- 'max_len': args.table_max_len
42
- }
43
- }, {
44
- 'NormalizeImage': {
45
- 'std': [0.229, 0.224, 0.225],
46
- 'mean': [0.485, 0.456, 0.406],
47
- 'scale': '1./255.',
48
- 'order': 'hwc'
49
- }
50
- }, {
51
- 'PaddingTableImage': None
52
- }, {
53
- 'ToCHWImage': None
54
- }, {
55
- 'KeepKeys': {
56
- 'keep_keys': ['image']
57
- }
58
- }]
59
- postprocess_params = {
60
- 'name': 'TableLabelDecode',
61
- "character_type": args.table_char_type,
62
- "character_dict_path": args.table_char_dict_path,
63
- }
64
-
65
- self.preprocess_op = create_operators(pre_process_list)
66
- self.postprocess_op = build_post_process(postprocess_params)
67
- self.predictor, self.input_tensor, self.output_tensors, self.config = \
68
- utility.create_predictor(args, 'table', logger)
69
-
70
- def __call__(self, img):
71
- ori_im = img.copy()
72
- data = {'image': img}
73
- data = transform(data, self.preprocess_op)
74
- img = data[0]
75
- if img is None:
76
- return None, 0
77
- img = np.expand_dims(img, axis=0)
78
- img = img.copy()
79
- starttime = time.time()
80
-
81
- self.input_tensor.copy_from_cpu(img)
82
- self.predictor.run()
83
- outputs = []
84
- for output_tensor in self.output_tensors:
85
- output = output_tensor.copy_to_cpu()
86
- outputs.append(output)
87
-
88
- preds = {}
89
- preds['structure_probs'] = outputs[1]
90
- preds['loc_preds'] = outputs[0]
91
-
92
- post_result = self.postprocess_op(preds)
93
-
94
- structure_str_list = post_result['structure_str_list']
95
- res_loc = post_result['res_loc']
96
- imgh, imgw = ori_im.shape[0:2]
97
- res_loc_final = []
98
- for rno in range(len(res_loc[0])):
99
- x0, y0, x1, y1 = res_loc[0][rno]
100
- left = max(int(imgw * x0), 0)
101
- top = max(int(imgh * y0), 0)
102
- right = min(int(imgw * x1), imgw - 1)
103
- bottom = min(int(imgh * y1), imgh - 1)
104
- res_loc_final.append([left, top, right, bottom])
105
-
106
- structure_str_list = structure_str_list[0][:-1]
107
- structure_str_list = ['<html>', '<body>', '<table>'] + structure_str_list + ['</table>', '</body>', '</html>']
108
-
109
- elapse = time.time() - starttime
110
- return (structure_str_list, res_loc_final), elapse
111
-
112
-
113
- def main(args):
114
- image_file_list = get_image_file_list(args.image_dir)
115
- table_structurer = TableStructurer(args)
116
- count = 0
117
- total_time = 0
118
- for image_file in image_file_list:
119
- img, flag = check_and_read_gif(image_file)
120
- if not flag:
121
- img = cv2.imread(image_file)
122
- if img is None:
123
- logger.info("error in loading image:{}".format(image_file))
124
- continue
125
- structure_res, elapse = table_structurer(img)
126
-
127
- logger.info("result: {}".format(structure_res))
128
-
129
- if count > 0:
130
- total_time += elapse
131
- count += 1
132
- logger.info("Predict time of {}: {}".format(image_file, elapse))
133
-
134
-
135
- if __name__ == "__main__":
136
- main(parse_args())
@@ -1,221 +0,0 @@
1
- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import os
16
- import sys
17
- import subprocess
18
-
19
- __dir__ = os.path.dirname(os.path.abspath(__file__))
20
- sys.path.append(__dir__)
21
- sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
22
- sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
23
-
24
- os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
25
- import cv2
26
- import copy
27
- import numpy as np
28
- import time
29
- import pyxlpr.ppocr.tools.infer.predict_rec as predict_rec
30
- import pyxlpr.ppocr.tools.infer.predict_det as predict_det
31
- from pyxlpr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
32
- from pyxlpr.ppocr.utils.logging import get_logger
33
- from pyxlpr.ppstructure.table.matcher import distance, compute_iou
34
- from pyxlpr.ppstructure.utility import parse_args
35
- import pyxlpr.ppstructure.table.predict_structure as predict_strture
36
-
37
- logger = get_logger()
38
-
39
-
40
- def expand(pix, det_box, shape):
41
- x0, y0, x1, y1 = det_box
42
- # print(shape)
43
- h, w, c = shape
44
- tmp_x0 = x0 - pix
45
- tmp_x1 = x1 + pix
46
- tmp_y0 = y0 - pix
47
- tmp_y1 = y1 + pix
48
- x0_ = tmp_x0 if tmp_x0 >= 0 else 0
49
- x1_ = tmp_x1 if tmp_x1 <= w else w
50
- y0_ = tmp_y0 if tmp_y0 >= 0 else 0
51
- y1_ = tmp_y1 if tmp_y1 <= h else h
52
- return x0_, y0_, x1_, y1_
53
-
54
-
55
- class TableSystem(object):
56
- def __init__(self, args, text_detector=None, text_recognizer=None):
57
- self.text_detector = predict_det.TextDetector(args) if text_detector is None else text_detector
58
- self.text_recognizer = predict_rec.TextRecognizer(args) if text_recognizer is None else text_recognizer
59
- self.table_structurer = predict_strture.TableStructurer(args)
60
-
61
- def __call__(self, img):
62
- ori_im = img.copy()
63
- structure_res, elapse = self.table_structurer(copy.deepcopy(img))
64
- dt_boxes, elapse = self.text_detector(copy.deepcopy(img))
65
- dt_boxes = sorted_boxes(dt_boxes)
66
-
67
- r_boxes = []
68
- for box in dt_boxes:
69
- x_min = box[:, 0].min() - 1
70
- x_max = box[:, 0].max() + 1
71
- y_min = box[:, 1].min() - 1
72
- y_max = box[:, 1].max() + 1
73
- box = [x_min, y_min, x_max, y_max]
74
- r_boxes.append(box)
75
- dt_boxes = np.array(r_boxes)
76
-
77
- logger.debug("dt_boxes num : {}, elapse : {}".format(
78
- len(dt_boxes), elapse))
79
- if dt_boxes is None:
80
- return None, None
81
- img_crop_list = []
82
-
83
- for i in range(len(dt_boxes)):
84
- det_box = dt_boxes[i]
85
- x0, y0, x1, y1 = expand(2, det_box, ori_im.shape)
86
- text_rect = ori_im[int(y0):int(y1), int(x0):int(x1), :]
87
- img_crop_list.append(text_rect)
88
- rec_res, elapse = self.text_recognizer(img_crop_list)
89
- logger.debug("rec_res num : {}, elapse : {}".format(
90
- len(rec_res), elapse))
91
-
92
- pred_html, pred = self.rebuild_table(structure_res, dt_boxes, rec_res)
93
- return pred_html
94
-
95
- def rebuild_table(self, structure_res, dt_boxes, rec_res):
96
- pred_structures, pred_bboxes = structure_res
97
- matched_index = self.match_result(dt_boxes, pred_bboxes)
98
- pred_html, pred = self.get_pred_html(pred_structures, matched_index, rec_res)
99
- return pred_html, pred
100
-
101
- def match_result(self, dt_boxes, pred_bboxes):
102
- matched = {}
103
- for i, gt_box in enumerate(dt_boxes):
104
- # gt_box = [np.min(gt_box[:, 0]), np.min(gt_box[:, 1]), np.max(gt_box[:, 0]), np.max(gt_box[:, 1])]
105
- distances = []
106
- for j, pred_box in enumerate(pred_bboxes):
107
- distances.append(
108
- (distance(gt_box, pred_box), 1. - compute_iou(gt_box, pred_box))) # 获取两两cell之间的L1距离和 1- IOU
109
- sorted_distances = distances.copy()
110
- # 根据距离和IOU挑选最"近"的cell
111
- sorted_distances = sorted(sorted_distances, key=lambda item: (item[1], item[0]))
112
- if distances.index(sorted_distances[0]) not in matched.keys():
113
- matched[distances.index(sorted_distances[0])] = [i]
114
- else:
115
- matched[distances.index(sorted_distances[0])].append(i)
116
- return matched
117
-
118
- def get_pred_html(self, pred_structures, matched_index, ocr_contents):
119
- end_html = []
120
- td_index = 0
121
- for tag in pred_structures:
122
- if '</td>' in tag:
123
- if td_index in matched_index.keys():
124
- b_with = False
125
- if '<b>' in ocr_contents[matched_index[td_index][0]] and len(matched_index[td_index]) > 1:
126
- b_with = True
127
- end_html.extend('<b>')
128
- for i, td_index_index in enumerate(matched_index[td_index]):
129
- content = ocr_contents[td_index_index][0]
130
- if len(matched_index[td_index]) > 1:
131
- if len(content) == 0:
132
- continue
133
- if content[0] == ' ':
134
- content = content[1:]
135
- if '<b>' in content:
136
- content = content[3:]
137
- if '</b>' in content:
138
- content = content[:-4]
139
- if len(content) == 0:
140
- continue
141
- if i != len(matched_index[td_index]) - 1 and ' ' != content[-1]:
142
- content += ' '
143
- end_html.extend(content)
144
- if b_with:
145
- end_html.extend('</b>')
146
-
147
- end_html.append(tag)
148
- td_index += 1
149
- else:
150
- end_html.append(tag)
151
- return ''.join(end_html), end_html
152
-
153
-
154
- def sorted_boxes(dt_boxes):
155
- """
156
- Sort text boxes in order from top to bottom, left to right
157
- args:
158
- dt_boxes(array):detected text boxes with shape [4, 2]
159
- return:
160
- sorted boxes(array) with shape [4, 2]
161
- """
162
- num_boxes = dt_boxes.shape[0]
163
- sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
164
- _boxes = list(sorted_boxes)
165
-
166
- for i in range(num_boxes - 1):
167
- if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
168
- (_boxes[i + 1][0][0] < _boxes[i][0][0]):
169
- tmp = _boxes[i]
170
- _boxes[i] = _boxes[i + 1]
171
- _boxes[i + 1] = tmp
172
- return _boxes
173
-
174
-
175
- def to_excel(html_table, excel_path):
176
- from tablepyxl import tablepyxl
177
- tablepyxl.document_to_xl(html_table, excel_path)
178
-
179
-
180
- def main(args):
181
- image_file_list = get_image_file_list(args.image_dir)
182
- image_file_list = image_file_list[args.process_id::args.total_process_num]
183
- os.makedirs(args.output, exist_ok=True)
184
-
185
- text_sys = TableSystem(args)
186
- img_num = len(image_file_list)
187
- for i, image_file in enumerate(image_file_list):
188
- logger.info("[{}/{}] {}".format(i, img_num, image_file))
189
- img, flag = check_and_read_gif(image_file)
190
- excel_path = os.path.join(args.output, os.path.basename(image_file).split('.')[0] + '.xlsx')
191
- if not flag:
192
- img = cv2.imread(image_file)
193
- if img is None:
194
- logger.error("error in loading image:{}".format(image_file))
195
- continue
196
- starttime = time.time()
197
- pred_html = text_sys(img)
198
-
199
- to_excel(pred_html, excel_path)
200
- logger.info('excel saved to {}'.format(excel_path))
201
- logger.info(pred_html)
202
- elapse = time.time() - starttime
203
- logger.info("Predict time : {:.3f}s".format(elapse))
204
-
205
-
206
- if __name__ == "__main__":
207
- args = parse_args()
208
- if args.use_mp:
209
- p_list = []
210
- total_process_num = args.total_process_num
211
- for process_id in range(total_process_num):
212
- cmd = [sys.executable, "-u"] + sys.argv + [
213
- "--process_id={}".format(process_id),
214
- "--use_mp={}".format(False)
215
- ]
216
- p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
217
- p_list.append(p)
218
- for p in p_list:
219
- p.wait()
220
- else:
221
- main(args)
@@ -1,16 +0,0 @@
1
- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- __all__ = ['TEDS']
16
- from .table_metric import TEDS
@@ -1,51 +0,0 @@
1
- from tqdm import tqdm
2
- from concurrent.futures import ProcessPoolExecutor, as_completed
3
-
4
-
5
- def parallel_process(array, function, n_jobs=16, use_kwargs=False, front_num=0):
6
- """
7
- A parallel version of the map function with a progress bar.
8
- Args:
9
- array (array-like): An array to iterate over.
10
- function (function): A python function to apply to the elements of array
11
- n_jobs (int, default=16): The number of cores to use
12
- use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of
13
- keyword arguments to function
14
- front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job.
15
- Useful for catching bugs
16
- Returns:
17
- [function(array[0]), function(array[1]), ...]
18
- """
19
- # We run the first few iterations serially to catch bugs
20
- if front_num > 0:
21
- front = [function(**a) if use_kwargs else function(a)
22
- for a in array[:front_num]]
23
- else:
24
- front = []
25
- # If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
26
- if n_jobs == 1:
27
- return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])]
28
- # Assemble the workers
29
- with ProcessPoolExecutor(max_workers=n_jobs) as pool:
30
- # Pass the elements of array into function
31
- if use_kwargs:
32
- futures = [pool.submit(function, **a) for a in array[front_num:]]
33
- else:
34
- futures = [pool.submit(function, a) for a in array[front_num:]]
35
- kwargs = {
36
- 'total': len(futures),
37
- 'unit': 'it',
38
- 'unit_scale': True,
39
- 'leave': True
40
- }
41
- # Print out the progress as tasks complete
42
- for f in tqdm(as_completed(futures), **kwargs):
43
- pass
44
- out = []
45
- # Get the results from the futures.
46
- for i, future in tqdm(enumerate(futures)):
47
- try:
48
- out.append(future.result())
49
- except Exception as e:
50
- out.append(e)
51
- return front + out