pyxllib 0.3.96__py3-none-any.whl → 0.3.197__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (306) hide show
  1. pyxllib/algo/geo.py +12 -0
  2. pyxllib/algo/intervals.py +1 -1
  3. pyxllib/algo/matcher.py +78 -0
  4. pyxllib/algo/pupil.py +187 -19
  5. pyxllib/algo/specialist.py +2 -1
  6. pyxllib/algo/stat.py +38 -2
  7. {pyxlpr → pyxllib/autogui}/__init__.py +1 -1
  8. pyxllib/autogui/activewin.py +246 -0
  9. pyxllib/autogui/all.py +9 -0
  10. pyxllib/{ext/autogui → autogui}/autogui.py +40 -11
  11. pyxllib/autogui/uiautolib.py +362 -0
  12. pyxllib/autogui/wechat.py +827 -0
  13. pyxllib/autogui/wechat_msg.py +421 -0
  14. pyxllib/autogui/wxautolib.py +84 -0
  15. pyxllib/cv/slidercaptcha.py +137 -0
  16. pyxllib/data/echarts.py +123 -12
  17. pyxllib/data/jsonlib.py +89 -0
  18. pyxllib/data/pglib.py +514 -30
  19. pyxllib/data/sqlite.py +231 -4
  20. pyxllib/ext/JLineViewer.py +14 -1
  21. pyxllib/ext/drissionlib.py +277 -0
  22. pyxllib/ext/kq5034lib.py +0 -1594
  23. pyxllib/ext/robustprocfile.py +497 -0
  24. pyxllib/ext/unixlib.py +6 -5
  25. pyxllib/ext/utools.py +108 -95
  26. pyxllib/ext/webhook.py +32 -14
  27. pyxllib/ext/wjxlib.py +88 -0
  28. pyxllib/ext/wpsapi.py +124 -0
  29. pyxllib/ext/xlwork.py +9 -0
  30. pyxllib/ext/yuquelib.py +1003 -71
  31. pyxllib/file/docxlib.py +1 -1
  32. pyxllib/file/libreoffice.py +165 -0
  33. pyxllib/file/movielib.py +9 -0
  34. pyxllib/file/packlib/__init__.py +112 -75
  35. pyxllib/file/pdflib.py +1 -1
  36. pyxllib/file/pupil.py +1 -1
  37. pyxllib/file/specialist/dirlib.py +1 -1
  38. pyxllib/file/specialist/download.py +10 -3
  39. pyxllib/file/specialist/filelib.py +266 -55
  40. pyxllib/file/xlsxlib.py +205 -50
  41. pyxllib/file/xlsyncfile.py +341 -0
  42. pyxllib/prog/cachetools.py +64 -0
  43. pyxllib/prog/filelock.py +42 -0
  44. pyxllib/prog/multiprogs.py +940 -0
  45. pyxllib/prog/newbie.py +9 -2
  46. pyxllib/prog/pupil.py +129 -60
  47. pyxllib/prog/specialist/__init__.py +176 -2
  48. pyxllib/prog/specialist/bc.py +5 -2
  49. pyxllib/prog/specialist/browser.py +11 -2
  50. pyxllib/prog/specialist/datetime.py +68 -0
  51. pyxllib/prog/specialist/tictoc.py +12 -13
  52. pyxllib/prog/specialist/xllog.py +5 -5
  53. pyxllib/prog/xlosenv.py +7 -0
  54. pyxllib/text/airscript.js +744 -0
  55. pyxllib/text/charclasslib.py +17 -5
  56. pyxllib/text/jiebalib.py +6 -3
  57. pyxllib/text/jinjalib.py +32 -0
  58. pyxllib/text/jsa_ai_prompt.md +271 -0
  59. pyxllib/text/jscode.py +159 -4
  60. pyxllib/text/nestenv.py +1 -1
  61. pyxllib/text/newbie.py +12 -0
  62. pyxllib/text/pupil/common.py +26 -0
  63. pyxllib/text/specialist/ptag.py +2 -2
  64. pyxllib/text/templates/echart_base.html +11 -0
  65. pyxllib/text/templates/highlight_code.html +17 -0
  66. pyxllib/text/templates/latex_editor.html +103 -0
  67. pyxllib/text/xmllib.py +76 -14
  68. pyxllib/xl.py +2 -1
  69. pyxllib-0.3.197.dist-info/METADATA +48 -0
  70. pyxllib-0.3.197.dist-info/RECORD +126 -0
  71. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info}/WHEEL +1 -2
  72. pyxllib/ext/autogui/__init__.py +0 -8
  73. pyxllib-0.3.96.dist-info/METADATA +0 -51
  74. pyxllib-0.3.96.dist-info/RECORD +0 -333
  75. pyxllib-0.3.96.dist-info/top_level.txt +0 -2
  76. pyxlpr/ai/__init__.py +0 -5
  77. pyxlpr/ai/clientlib.py +0 -1281
  78. pyxlpr/ai/specialist.py +0 -286
  79. pyxlpr/ai/torch_app.py +0 -172
  80. pyxlpr/ai/xlpaddle.py +0 -655
  81. pyxlpr/ai/xltorch.py +0 -705
  82. pyxlpr/data/__init__.py +0 -11
  83. pyxlpr/data/coco.py +0 -1325
  84. pyxlpr/data/datacls.py +0 -365
  85. pyxlpr/data/datasets.py +0 -200
  86. pyxlpr/data/gptlib.py +0 -1291
  87. pyxlpr/data/icdar/__init__.py +0 -96
  88. pyxlpr/data/icdar/deteval.py +0 -377
  89. pyxlpr/data/icdar/icdar2013.py +0 -341
  90. pyxlpr/data/icdar/iou.py +0 -340
  91. pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -463
  92. pyxlpr/data/imtextline.py +0 -473
  93. pyxlpr/data/labelme.py +0 -866
  94. pyxlpr/data/removeline.py +0 -179
  95. pyxlpr/data/specialist.py +0 -57
  96. pyxlpr/eval/__init__.py +0 -85
  97. pyxlpr/paddleocr.py +0 -776
  98. pyxlpr/ppocr/__init__.py +0 -15
  99. pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -226
  100. pyxlpr/ppocr/data/__init__.py +0 -135
  101. pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -26
  102. pyxlpr/ppocr/data/imaug/__init__.py +0 -67
  103. pyxlpr/ppocr/data/imaug/copy_paste.py +0 -170
  104. pyxlpr/ppocr/data/imaug/east_process.py +0 -437
  105. pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -244
  106. pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -114
  107. pyxlpr/ppocr/data/imaug/label_ops.py +0 -789
  108. pyxlpr/ppocr/data/imaug/make_border_map.py +0 -184
  109. pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -106
  110. pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -126
  111. pyxlpr/ppocr/data/imaug/operators.py +0 -433
  112. pyxlpr/ppocr/data/imaug/pg_process.py +0 -906
  113. pyxlpr/ppocr/data/imaug/randaugment.py +0 -143
  114. pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -239
  115. pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -533
  116. pyxlpr/ppocr/data/imaug/sast_process.py +0 -777
  117. pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -17
  118. pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -120
  119. pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -168
  120. pyxlpr/ppocr/data/lmdb_dataset.py +0 -115
  121. pyxlpr/ppocr/data/pgnet_dataset.py +0 -104
  122. pyxlpr/ppocr/data/pubtab_dataset.py +0 -107
  123. pyxlpr/ppocr/data/simple_dataset.py +0 -372
  124. pyxlpr/ppocr/losses/__init__.py +0 -61
  125. pyxlpr/ppocr/losses/ace_loss.py +0 -52
  126. pyxlpr/ppocr/losses/basic_loss.py +0 -135
  127. pyxlpr/ppocr/losses/center_loss.py +0 -88
  128. pyxlpr/ppocr/losses/cls_loss.py +0 -30
  129. pyxlpr/ppocr/losses/combined_loss.py +0 -67
  130. pyxlpr/ppocr/losses/det_basic_loss.py +0 -208
  131. pyxlpr/ppocr/losses/det_db_loss.py +0 -80
  132. pyxlpr/ppocr/losses/det_east_loss.py +0 -63
  133. pyxlpr/ppocr/losses/det_pse_loss.py +0 -149
  134. pyxlpr/ppocr/losses/det_sast_loss.py +0 -121
  135. pyxlpr/ppocr/losses/distillation_loss.py +0 -272
  136. pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -140
  137. pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -113
  138. pyxlpr/ppocr/losses/rec_aster_loss.py +0 -99
  139. pyxlpr/ppocr/losses/rec_att_loss.py +0 -39
  140. pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -44
  141. pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -70
  142. pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -30
  143. pyxlpr/ppocr/losses/rec_sar_loss.py +0 -28
  144. pyxlpr/ppocr/losses/rec_srn_loss.py +0 -47
  145. pyxlpr/ppocr/losses/table_att_loss.py +0 -109
  146. pyxlpr/ppocr/metrics/__init__.py +0 -44
  147. pyxlpr/ppocr/metrics/cls_metric.py +0 -45
  148. pyxlpr/ppocr/metrics/det_metric.py +0 -82
  149. pyxlpr/ppocr/metrics/distillation_metric.py +0 -73
  150. pyxlpr/ppocr/metrics/e2e_metric.py +0 -86
  151. pyxlpr/ppocr/metrics/eval_det_iou.py +0 -274
  152. pyxlpr/ppocr/metrics/kie_metric.py +0 -70
  153. pyxlpr/ppocr/metrics/rec_metric.py +0 -75
  154. pyxlpr/ppocr/metrics/table_metric.py +0 -50
  155. pyxlpr/ppocr/modeling/architectures/__init__.py +0 -32
  156. pyxlpr/ppocr/modeling/architectures/base_model.py +0 -88
  157. pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -60
  158. pyxlpr/ppocr/modeling/backbones/__init__.py +0 -54
  159. pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -268
  160. pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -246
  161. pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -285
  162. pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -265
  163. pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -186
  164. pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -138
  165. pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -258
  166. pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -48
  167. pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -210
  168. pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -143
  169. pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -307
  170. pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -286
  171. pyxlpr/ppocr/modeling/heads/__init__.py +0 -54
  172. pyxlpr/ppocr/modeling/heads/cls_head.py +0 -52
  173. pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -118
  174. pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -121
  175. pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -37
  176. pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -128
  177. pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -253
  178. pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -206
  179. pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -163
  180. pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -393
  181. pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -202
  182. pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -88
  183. pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -826
  184. pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -402
  185. pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -280
  186. pyxlpr/ppocr/modeling/heads/self_attention.py +0 -406
  187. pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -246
  188. pyxlpr/ppocr/modeling/necks/__init__.py +0 -32
  189. pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -111
  190. pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -188
  191. pyxlpr/ppocr/modeling/necks/fpn.py +0 -138
  192. pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -314
  193. pyxlpr/ppocr/modeling/necks/rnn.py +0 -92
  194. pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -284
  195. pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -110
  196. pyxlpr/ppocr/modeling/transforms/__init__.py +0 -28
  197. pyxlpr/ppocr/modeling/transforms/stn.py +0 -135
  198. pyxlpr/ppocr/modeling/transforms/tps.py +0 -308
  199. pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -156
  200. pyxlpr/ppocr/optimizer/__init__.py +0 -61
  201. pyxlpr/ppocr/optimizer/learning_rate.py +0 -228
  202. pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -49
  203. pyxlpr/ppocr/optimizer/optimizer.py +0 -160
  204. pyxlpr/ppocr/optimizer/regularizer.py +0 -52
  205. pyxlpr/ppocr/postprocess/__init__.py +0 -55
  206. pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -33
  207. pyxlpr/ppocr/postprocess/db_postprocess.py +0 -234
  208. pyxlpr/ppocr/postprocess/east_postprocess.py +0 -143
  209. pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -200
  210. pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -52
  211. pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -15
  212. pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -29
  213. pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -14
  214. pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -118
  215. pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -654
  216. pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -355
  217. pyxlpr/ppocr/tools/__init__.py +0 -14
  218. pyxlpr/ppocr/tools/eval.py +0 -83
  219. pyxlpr/ppocr/tools/export_center.py +0 -77
  220. pyxlpr/ppocr/tools/export_model.py +0 -129
  221. pyxlpr/ppocr/tools/infer/predict_cls.py +0 -151
  222. pyxlpr/ppocr/tools/infer/predict_det.py +0 -300
  223. pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -169
  224. pyxlpr/ppocr/tools/infer/predict_rec.py +0 -414
  225. pyxlpr/ppocr/tools/infer/predict_system.py +0 -204
  226. pyxlpr/ppocr/tools/infer/utility.py +0 -629
  227. pyxlpr/ppocr/tools/infer_cls.py +0 -83
  228. pyxlpr/ppocr/tools/infer_det.py +0 -134
  229. pyxlpr/ppocr/tools/infer_e2e.py +0 -122
  230. pyxlpr/ppocr/tools/infer_kie.py +0 -153
  231. pyxlpr/ppocr/tools/infer_rec.py +0 -146
  232. pyxlpr/ppocr/tools/infer_table.py +0 -107
  233. pyxlpr/ppocr/tools/program.py +0 -596
  234. pyxlpr/ppocr/tools/test_hubserving.py +0 -117
  235. pyxlpr/ppocr/tools/train.py +0 -163
  236. pyxlpr/ppocr/tools/xlprog.py +0 -748
  237. pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -94
  238. pyxlpr/ppocr/utils/__init__.py +0 -24
  239. pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -117
  240. pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -162
  241. pyxlpr/ppocr/utils/dict/be_dict.txt +0 -145
  242. pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -140
  243. pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -8421
  244. pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -163
  245. pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -167
  246. pyxlpr/ppocr/utils/dict/en_dict.txt +0 -63
  247. pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -136
  248. pyxlpr/ppocr/utils/dict/french_dict.txt +0 -136
  249. pyxlpr/ppocr/utils/dict/german_dict.txt +0 -143
  250. pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -162
  251. pyxlpr/ppocr/utils/dict/it_dict.txt +0 -118
  252. pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -4399
  253. pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -153
  254. pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -3688
  255. pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -185
  256. pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -153
  257. pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -153
  258. pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -96
  259. pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -130
  260. pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -91
  261. pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -134
  262. pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -125
  263. pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -128
  264. pyxlpr/ppocr/utils/dict/table_dict.txt +0 -277
  265. pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -2759
  266. pyxlpr/ppocr/utils/dict/te_dict.txt +0 -151
  267. pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -114
  268. pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -142
  269. pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -137
  270. pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -110
  271. pyxlpr/ppocr/utils/dict90.txt +0 -90
  272. pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -574
  273. pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -83
  274. pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -87
  275. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -457
  276. pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -592
  277. pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -162
  278. pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -162
  279. pyxlpr/ppocr/utils/en_dict.txt +0 -95
  280. pyxlpr/ppocr/utils/gen_label.py +0 -81
  281. pyxlpr/ppocr/utils/ic15_dict.txt +0 -36
  282. pyxlpr/ppocr/utils/iou.py +0 -54
  283. pyxlpr/ppocr/utils/logging.py +0 -69
  284. pyxlpr/ppocr/utils/network.py +0 -84
  285. pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -6623
  286. pyxlpr/ppocr/utils/profiler.py +0 -110
  287. pyxlpr/ppocr/utils/save_load.py +0 -150
  288. pyxlpr/ppocr/utils/stats.py +0 -72
  289. pyxlpr/ppocr/utils/utility.py +0 -80
  290. pyxlpr/ppstructure/__init__.py +0 -13
  291. pyxlpr/ppstructure/predict_system.py +0 -187
  292. pyxlpr/ppstructure/table/__init__.py +0 -13
  293. pyxlpr/ppstructure/table/eval_table.py +0 -72
  294. pyxlpr/ppstructure/table/matcher.py +0 -192
  295. pyxlpr/ppstructure/table/predict_structure.py +0 -136
  296. pyxlpr/ppstructure/table/predict_table.py +0 -221
  297. pyxlpr/ppstructure/table/table_metric/__init__.py +0 -16
  298. pyxlpr/ppstructure/table/table_metric/parallel.py +0 -51
  299. pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -247
  300. pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -13
  301. pyxlpr/ppstructure/table/tablepyxl/style.py +0 -283
  302. pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -118
  303. pyxlpr/ppstructure/utility.py +0 -71
  304. pyxlpr/xlai.py +0 -10
  305. /pyxllib/{ext/autogui → autogui}/virtualkey.py +0 -0
  306. {pyxllib-0.3.96.dist-info → pyxllib-0.3.197.dist-info/licenses}/LICENSE +0 -0
pyxlpr/data/imtextline.py DELETED
@@ -1,473 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # @Author : 陈坤泽
4
- # @Email : 877362867@qq.com
5
- # @Date : 2020/11/17
6
-
7
- """ 图片文本行标注相关处理
8
- """
9
-
10
- from pyxllib.xlcv import *
11
-
12
- from functools import reduce
13
-
14
- from shapely.geometry import MultiPolygon
15
-
16
- from pyxllib.algo.geo import split_vector_interval
17
- from pyxllib.algo.disjoint import disjoint_set
18
- from pyxllib.algo.shapelylib import ShapelyPolygon
19
-
20
-
21
- class TextlineShape:
22
- """ 一个文本行标注对象
23
-
24
- 这里的基础功能主要是几何关系计算,可以继承类后扩展其他功能
25
- """
26
-
27
- def __init__(self, box, *, shrink_bound=False):
28
- """
29
- :param box: 可以转成Polygon的数据类型
30
- :param shrink_bound: 倾斜度过大的文本框,需要特殊处理,把外接矩形缩小会更准确些。
31
- 使用这个参数后,斜的框可以当成矩形框理解、处理
32
-
33
- 详细文档:https://www.yuque.com/xlpr/pyxllib/textlineshape
34
- """
35
- self.polygon = ShapelyPolygon.gen(box)
36
- self.bounds = self.polygon.bounds
37
- if shrink_bound:
38
- b = self.bounds
39
- total_area = (b[2] - b[0]) * (b[3] - b[1])
40
- # 缩放比例
41
- self.bounds = bound_scale(self.bounds, self.polygon.area / total_area)
42
- # self.bounds = bound_scale(self.bounds, 0.5)
43
-
44
- self.minx, self.maxx = self.bounds[0], self.bounds[2]
45
- self.width = self.maxx - self.minx
46
- self.miny, self.maxy = self.bounds[1], self.bounds[3]
47
- self.height = self.maxy - self.miny
48
- self.centroid = self.polygon.centroid
49
-
50
- def in_the_same_line(self, other):
51
- """ 两个框在同一个文本行(一般特指在同一水平行的文本) """
52
- if other.miny < self.centroid.y < other.maxy:
53
- return True
54
- elif self.miny < other.centroid.y < self.maxy:
55
- return True
56
- else:
57
- return False
58
-
59
- def in_the_same_column(self, other):
60
- if other.minx < self.centroid.x < other.maxx:
61
- return True
62
- elif self.minx < other.centroid.x < self.maxx:
63
- return True
64
- else:
65
- return False
66
-
67
- def is_lr_intersect(self, other, gap=5):
68
- """ 左右相交
69
- """
70
- if other.minx - gap <= self.minx <= other.maxx + gap:
71
- return True
72
- elif other.minx - gap <= self.maxx <= other.maxx + gap:
73
- return True
74
- else:
75
- return False
76
-
77
- def is_tb_intersect(self, other, gap=5):
78
- """ 上下相交
79
- """
80
- # 这个 gap 规则是不动产的,不能放在通用规则里
81
- # gap = min(50, self.height / 2, other.height / 2) # 允许的最大间距,默认按照最小的高,但还要再设置一个50的上限
82
- if other.miny - gap <= self.miny <= other.maxy + gap:
83
- return True
84
- elif other.miny - gap <= self.maxy <= other.maxy + gap:
85
- return True
86
- else:
87
- return False
88
-
89
- def is_intersect(self, other):
90
- return self.polygon.intersects(other)
91
-
92
- def __add__(self, other):
93
- """ 合并两个文本行 """
94
- box = rect2polygon(MultiPolygon([self.polygon, other.polygon]).bounds)
95
- return TextlineShape(box)
96
-
97
- def __lt__(self, other):
98
- """ 框的排序准则 """
99
- if self.in_the_same_line(other):
100
- return self.centroid.x < other.centroid.x
101
- else:
102
- return self.centroid.y < other.centroid.y
103
-
104
- @classmethod
105
- def merge(cls, shapes):
106
- """ 将同张图片里的多个shape进行合并 """
107
- # 1 对文本框分组
108
- shape_groups = disjoint_set(shapes, lambda x, y: x.is_intersect(y))
109
-
110
- # 2 合并文本内容
111
- new_shapes = []
112
- for group in shape_groups:
113
- shape = reduce(lambda x, y: x + y, sorted(group))
114
- new_shapes.append(shape)
115
- return new_shapes
116
-
117
-
118
- def im_textline_split(im, maxsplit=None, minwidth=3):
119
- """ 这是最基础版本的示例:比较干净,白底黑字,没有太大倾斜的处理情况
120
-
121
- 一般各种特殊任务的数据,需要根据具体任务定制、修改该函数
122
- """
123
- img = xlcv.read(im, 0)
124
- m = np.mean(img)
125
- # 比较干净的图可以这样,直接做二值化,并且对二值化中的图要求比较高,基本不能出现一个文字的像素
126
- bi = img < m
127
- vec = bi.sum(axis=0)
128
- return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
129
-
130
-
131
- def merge_labels_by_widths(labels, widths, sep=' '):
132
- """ 一组数量不少于len(widths)的labels,参照widths给的每一部分权重,合并文本内容
133
-
134
- 算是和图片分割配套的相关功能,往往文本内容要跟着图片的切割情况进行拆分
135
-
136
- 这个算法其实也可以用来做拆分,比如要把'abcdefg'拆成[20, 30]的两段,
137
- 可以用list先把前者变成单字符的list就行了 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
138
-
139
- :param labels: 一组字符串
140
- :param widths: 一组参考宽度
141
- :param sep: 拼接的时候使用的间隔符
142
- :return: 尽可能拼接出符合参考宽度的一组字符串
143
-
144
- >>> merge_labels_by_widths(['aa', 'bbb', 'c', 'ccc'], [10,10,20])
145
- ['aa', 'bbb', 'c ccc']
146
- >>> merge_labels_by_widths(['a', 'a', 'b', 'b'], [13, 10, 10])
147
- ['a a', 'b', 'b']
148
- >>> merge_labels_by_widths(['a', 'a', 'b', 'b'], [10, 10, 10])
149
- ['a', 'a', 'b b']
150
- >>> merge_labels_by_widths(['a', 'b', 'c'], [11, 12, 13])
151
- ['a', 'b', 'c']
152
- >>> merge_labels_by_widths(['a'], [10, 12]) # labels比widths少时,后面的统一用''填充
153
- ['a', '']
154
- >>> merge_labels_by_widths([''], [10, 12])
155
- ['', '']
156
-
157
- TODO 感觉实现的代码还有点凌乱,可能还有改进空间
158
- """
159
- # 1 统一量纲
160
- if len(labels) < len(widths):
161
- labels += [''] * (len(widths) - len(labels))
162
- label_widths = [strwidth(x) for x in labels]
163
- n_label = len(labels)
164
- assert sum(widths), 'widths必须要有权重值'
165
- r = sum(label_widths) / sum(widths)
166
- widths = [r * w for w in widths]
167
-
168
- # 2 用贪心算法合并
169
- need_merge = n_label - len(widths)
170
- i, k, new_labels = 0, 0, []
171
- for w in widths:
172
- if k < need_merge:
173
- label_width = label_widths[i]
174
- j = i + 1
175
- while j < n_label and k < need_merge and abs(label_width + label_widths[j] - w) < abs(label_width - w):
176
- label_width += label_widths[j]
177
- j += 1
178
- k += 1
179
- new_labels.append(sep.join(labels[i:j]))
180
- i = j
181
- elif k == need_merge:
182
- new_labels += labels[i:]
183
- i = n_label
184
- break
185
- # 还有未匹配使用的,全部拼接到末尾
186
- if i + 1 <= n_label:
187
- new_labels[-1] = ' '.join([new_labels[-1]] + labels[i:])
188
-
189
- return new_labels
190
-
191
-
192
- class TextlineAnnotation(TextlineShape):
193
- """ coco格式的标注 """
194
-
195
- def __init__(self, anno):
196
- super().__init__(xywh2ltrb(anno['bbox']))
197
- self.anno = anno
198
-
199
- def __add__(self, other):
200
- """ 两个coco标注的合并 """
201
- # 以 self 框的属性为基准
202
- anno, anno2 = self.anno.copy(), other.anno
203
-
204
- # 合并后的 bbox
205
- anno['bbox'] = ltrb2xywh(MultiPolygon([self.polygon, other.polygon]).bounds)
206
-
207
- # 合并分割属性
208
- if anno2['segmentation']:
209
- anno['segmentation'] += anno2['segmentation']
210
-
211
- # 合并 label
212
- if 'label' in anno or 'label' in anno2:
213
- text = anno2.get('label', '')
214
- if text: text = ' ' + text
215
- anno['label'] = anno.get('label', '') + text
216
-
217
- return TextlineAnnotation(anno)
218
-
219
- @classmethod
220
- def merge(cls, annotations):
221
- """ 合并同一文本行上相近、相交的文本标注 """
222
- # 1 转 shape 格式
223
- shapes = [cls(x) for x in annotations]
224
-
225
- # 2 对文本框分组
226
- shape_groups = disjoint_set(shapes, lambda x, y: x.in_the_same_line(y) and x.is_lr_intersect(y))
227
-
228
- # 3 合并文本内容
229
- new_shapes = []
230
- for group in shape_groups:
231
- shape = reduce(lambda x, y: x + y, sorted(group))
232
- new_shapes.append(shape)
233
-
234
- # 4 转回 annotations 格式
235
- return [x.anno for x in new_shapes]
236
-
237
- @classmethod
238
- def split(cls, im, annotations, split_func=im_textline_split):
239
- """ coco标注格式的处理,将图片im对应的文本行标注结果 annos,按照空白背景切分开
240
-
241
- :param im: 图片数据
242
- :param annotations: coco 格式的 annotations
243
- :param split_func: 分析图片数据时所用投影分析函数,需要返回带有文本内容的列区间
244
-
245
- 如果有label文本,会跟着一起切割处理
246
-
247
- :return:
248
- 新的annotations数组
249
- 注意,有的图片处理起来会有问题,此时会返回 [],建议丢弃这些图片
250
- """
251
- new_annos = []
252
- for anno in annotations:
253
- # 仅测试某个特定的 anno
254
- # if anno['id'] != 2345:
255
- # continue
256
-
257
- x, y, w, h = anno['bbox']
258
- _, t, _, b = xywh2ltrb(anno['bbox'])
259
- subim = xlcv.get_sub(im, xywh2ltrb(anno['bbox']))
260
- spans = split_func(subim)
261
- # print(anno['label'], spans)
262
- # 左右放宽一些,并且计算基于全图的绝对坐标
263
- spans = [[x + max(span[0] - 3, 0), x + min(span[1] + 3, w)] for span in spans]
264
-
265
- if len(spans) == 0:
266
- # 一些特殊情况,很可能是框标的位置偏了,质量不行
267
- return [] # 整张图的标注都不要了,直接返回空值
268
- elif len(spans) == 1:
269
- l, r = spans[0]
270
- a = copy.copy(anno)
271
- a['bbox'] = ltrb2xywh([l, t, r, b])
272
- new_annos.append(a)
273
- else: # 拆分出了多段
274
- # 这里 label 最好也要拆一下
275
- labels = anno['label'].split()
276
- if len(labels) > len(spans):
277
- labels = merge_labels_by_widths(labels, [(span[1] - span[0]) for span in spans])
278
- elif len(labels) < len(spans):
279
- # imwrite(subim, 'subim.jpg')
280
- # print(x, y, w, h)
281
- # 要检查出现这些情况的所有数据:labels的少于spans
282
- get_xllog().warning(DPrint.format({'$异常': 'len(labels)<len(spans)',
283
- 'labels': labels, 'spans': spans}))
284
- # 这种情况先保留原始框
285
- new_annos.append(anno)
286
- continue
287
-
288
- for span, label in zip(spans, labels):
289
- l, r = span
290
- a = copy.copy(anno)
291
- a['bbox'] = ltrb2xywh([l, t, r, b])
292
- a['label'] = label
293
- new_annos.append(a)
294
-
295
- return new_annos
296
-
297
-
298
- class TextlineSpliter:
299
- """
300
- TextString2016、Casia 基本都可以直接用
301
- """
302
-
303
- @classmethod
304
- def spliter(cls, im, maxsplit=None, minwidth=3):
305
- """ (核心处理接口功能)比较干净,白底黑字,没有太大倾斜的处理情况
306
- 如果有其他特殊情况,记得要重置这个处理方式,见EnglishWord
307
-
308
- :param im: 输入图片路径,或者np.ndarray矩阵
309
- :param maxsplit: 最大切分数量,即最多得到几个子区间
310
- 没设置的时候,会对所有满足条件的情况进行切割
311
- :param minwidth: 每个切分位置最小具有的宽度
312
- :return: [(l, r), (l, r), ...] 每一段文本的左右区间
313
-
314
- 详细文档:https://www.yuque.com/xlpr/data/cx6xm5
315
- """
316
- img = xlcv.read(im, 0)
317
- m = np.mean(img)
318
- # 比较干净的图可以这样,直接做二值化,并且对二值化中的图要求比较高,基本不能出现一个文字的像素
319
- bi = img < m
320
- vec = bi.sum(axis=0) - 2
321
- return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
322
-
323
- @classmethod
324
- def split_img(cls, file, maxsplit=None, minwidth=3):
325
- """
326
- :param file: 输入np.ndarray图片,或者pil图片,或者图片路径
327
- :param maxsplit:
328
- :param minwidth:
329
- :return: 返回切分后的np.ndarray格式的图片清单
330
- """
331
- img = xlcv.read(file)
332
- vec = cls.spliter(img, maxsplit, minwidth)
333
- imgs = [img[:, l:r + 1] for l, r in vec]
334
- return imgs
335
-
336
- @classmethod
337
- def spliter_img(cls, file, maxsplit=None, minwidth=3):
338
- """ 可视化,测试一张图的切分效果
339
- 如果不是测试self.root里的图片,可以直接输入一个绝对路径的图片file
340
- """
341
- im = xlcv.read(file, 0)
342
- cols = cls.spliter(im, maxsplit=maxsplit, minwidth=minwidth)
343
-
344
- lines = [[c, 0, c, im.shape[0] - 1] for c in np.array(cols, dtype=int).reshape(-1)]
345
- # 偶数区间划为为红色
346
- im2 = xlcv.lines(im, lines[::4], [0, 0, 255])
347
- im2 = xlcv.lines(im2, lines[1::4], [0, 0, 255])
348
- # 奇数区间划分为蓝色
349
- im2 = xlcv.lines(im2, lines[2::4], [255, 0, 0])
350
- im2 = xlcv.lines(im2, lines[3::4], [255, 0, 0])
351
-
352
- return im2
353
-
354
- @classmethod
355
- def show_spliter_imgs(cls, dir_state, *, save=None, show=True):
356
- ImagesDir.debug_func(dir_state, # 随机抽取10张图片
357
- lambda img_file: cls.spliter_img(img_file, maxsplit=None, minwidth=3), # 执行功能
358
- save=save, # 结果保存位置
359
- show=show) # 是否imshow结果图
360
-
361
- @classmethod
362
- def relabel_labelfile(cls, p, maxsplit=None, minwidth=3, imgdir='images'):
363
- """ 对一份文件里标注的所有图片,批量进行转换,并加入一列新的坐标数据 """
364
- lines = p.read().splitlines()
365
- res = []
366
- for line in lines:
367
- line = line.split(maxsplit=1)
368
- im = xlcv.read(p.parent / f'{imgdir}/{line[0]}', 0)
369
- cols = cls.spliter(im, maxsplit, minwidth)
370
- line.append(' '.join(map(str, np.array(cols, dtype=int).reshape(-1))))
371
- res.append('\t'.join(line))
372
- content = '\n'.join(res)
373
- p.with_stem(p.stem + f'+text_interval-minw={minwidth}').write(content, if_exists='replace')
374
-
375
- @classmethod
376
- def relabel_labelfiles(cls, root, maxsplit=None, minwidth=3, imgdir='images'):
377
- """ 切分所有的文件
378
- :param root: 根目录
379
- :param imgdir: 图片所在子目录名称
380
- :return:
381
- """
382
- root = Dir(root)
383
- cls.relabel_labelfile(root / 'val.txt', maxsplit, minwidth, imgdir)
384
- cls.relabel_labelfile(root / 'test.txt', maxsplit, minwidth, imgdir)
385
- cls.relabel_labelfile(root / 'train.txt', maxsplit, minwidth, imgdir)
386
-
387
- @classmethod
388
- def split_labelfiles(cls, src, dst, minwidth=3, imgdir='images'):
389
- def func(name):
390
- """ 对一份文件里标注的所有图片,批量进行转换,并加入一列新的坐标数据
391
-
392
- p 原来的.txt标注文件路径
393
- p_im 原来的图片路径
394
- q 切割后的.txt标注文件路径
395
- q_im 切割后的图片路径
396
-
397
- """
398
- p, q = File(name, src), File(name, dst)
399
- if not p: return
400
- lines = p.read().splitlines()
401
- res = []
402
- for line in lines:
403
- # 获得图片文件,切分的单词
404
- line = line.split(maxsplit=1)
405
- if len(line) < 2: continue
406
-
407
- p_im = File(p.parent / f'{imgdir}/{line[0]}')
408
- # print(p_im)
409
- words = line[1].split()
410
-
411
- if len(words) < 2:
412
- q_im = File(f'{imgdir}/{p_im.name}', dst)
413
- p_im.copy(q_im)
414
- res.append(f'{q_im.name}\t{words[0]}')
415
- else:
416
- # 切分图片
417
- imgs = cls.split_img(p_im, len(words), minwidth)
418
- # 重新生成标注
419
- for k, im in enumerate(imgs):
420
- q_im = File(f'{imgdir}/{p_im.stem}_{k}', dst, suffix=p_im.suffix)
421
- xlcv.write(im, q_im, if_exists='replace')
422
- res.append(f'{q_im.name}\t{words[k]}')
423
- content = '\n'.join(res)
424
- q.write(content, if_exists='replace')
425
-
426
- src, dst = Dir(src), Dir(dst)
427
- for name in ['val.txt', 'test.txt', 'train.txt']:
428
- # for name in ['append.txt']:
429
- # for name in ['val.txt']:
430
- func(name)
431
-
432
-
433
- class EnglishWordTLS(TextlineSpliter):
434
- @classmethod
435
- def spliter(cls, img, maxsplit=None, minwidth=3):
436
- """ 同 TextLineSpliter.spliter
437
- 这个功能针对处理 带噪声干扰的白底黑字图片
438
- """
439
- img = xlcv.read(img, 0)
440
- h, w = img.shape
441
- vec = img[int(h / 3):int(2 * h / 3)].mean(axis=0) # 只用上下中间的三分之一
442
- vec = vec.mean() - vec + 5 # 文字变正,背景变负;因为背景有很多黑点噪声,还要多减一
443
- return split_vector_interval(vec, maxsplit=maxsplit, minwidth=minwidth)
444
-
445
-
446
- class TLSMain:
447
- def textstring2016(self):
448
- # d = TextLineSpliter('/home/datasets/textGroup/TextString2016/')
449
- d = r'D:\datasets\TextString2016'
450
- # ob.test('images/T0000-03.jpg', minwidth=3)
451
- TextlineSpliter.relabel_labelfiles(d, minwidth=3)
452
-
453
- def casia(self):
454
- os.chdir('/home/datasets/textGroup/casia/offlinehw/CASIA-HWDB2.x_pngImg_line')
455
- TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.0_savePTTSImg_line', minwidth=3)
456
- TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.1_savePTTSImg_line', minwidth=3)
457
- TextlineSpliter.relabel_labelfiles('CASIA-HWDB2.2_savePTTSImg_line', minwidth=3)
458
-
459
- def english_word(self):
460
- # ob.test('total/1.jpg', 4, 3)
461
- EnglishWordTLS.relabel_labelfiles(r'D:\datasets\english-word', minwidth=10, imgdir='total')
462
-
463
- def sroie(self):
464
- path = Dir('SROIE2019/task1train_626p_repo/task1train_626p_patch/')
465
- root = Dir(path, '/home/datasets/textGroup')
466
- TextlineSpliter.show_spliter_imgs(root.select('images/*.png').sample(10),
467
- save=File(path / 'temp', '/home/datasets/textGroup'),
468
- show=False)
469
-
470
-
471
- if __name__ == '__main__':
472
- with TicToc(__name__):
473
- pass