pyxllib 0.3.60.2__tar.gz → 0.3.61__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyxllib-0.3.60.2/pyxllib.egg-info → pyxllib-0.3.61}/PKG-INFO +1 -1
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/stat.py +1 -4
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/specialist/__init__.py +174 -33
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/specialist/filelib.py +163 -44
- {pyxllib-0.3.60.2 → pyxllib-0.3.61/pyxllib.egg-info}/PKG-INFO +1 -1
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/coco.py +2 -2
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/gptlib.py +92 -83
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/setup.py +1 -1
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/LICENSE +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/MANIFEST.in +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/README.md +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/disjoint.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/geo.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/intervals.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/newbie.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/pupil.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/shapelylib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/specialist.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/treelib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/algo/unitlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/expert.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/imfile.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/imhash.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/pupil.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/rgbfmt.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/trackbartools.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/xlcvlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/cv/xlpillib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/data/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/data/echarts.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/data/oss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/data/pglib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/data/sqlite.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/data/sqllib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/JLineViewer.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/autogui/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/autogui/autogui.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/autogui/virtualkey.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/demolib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/kq5034lib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/old.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/qt.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/seleniumlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/tk.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/unixlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/utools.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/webhook.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/ext/win32lib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/docxlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/gitlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/movielib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/newbie.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/onenotelib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/packlib/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/packlib/zipfile.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/pdflib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/pupil.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/specialist/dirlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/specialist/download.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/file/xlsxlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/deprecatedlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/newbie.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/pupil.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/sitepackages.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/bc.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/browser.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/common.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/datetime.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/tictoc.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/specialist/xllog.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/prog/xlosenv.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/stdlib/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/stdlib/tablepyxl/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/stdlib/tablepyxl/style.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/stdlib/tablepyxl/tablepyxl.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/ahocorasick.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/latex/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/levenshtein.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/nestenv.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/newbie.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/pupil/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/pupil/common.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/pupil/xlalign.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/pycode.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/specialist/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/specialist/common.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/specialist/ptag.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/spellchecker.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/text/xmllib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/xl.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib/xlcv.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib.egg-info/SOURCES.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib.egg-info/dependency_links.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib.egg-info/requires.txt +4 -4
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxllib.egg-info/top_level.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ai/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ai/clientlib.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ai/specialist.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ai/torch_app.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ai/xlpaddle.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ai/xltorch.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/datacls.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/datasets.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/icdar/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/icdar/deteval.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/icdar/icdar2013.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/icdar/iou.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/imtextline.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/labelme.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/removeline.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/data/specialist.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/eval/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/paddleocr.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/copy_paste.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/east_process.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/label_ops.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/make_border_map.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/operators.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/pg_process.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/randaugment.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/sast_process.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/lmdb_dataset.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/pgnet_dataset.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/pubtab_dataset.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/data/simple_dataset.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/ace_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/basic_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/center_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/cls_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/combined_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/det_basic_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/det_db_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/det_east_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/det_pse_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/det_sast_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/distillation_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_aster_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_att_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_sar_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/rec_srn_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/losses/table_att_loss.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/cls_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/det_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/distillation_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/e2e_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/eval_det_iou.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/kie_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/rec_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/metrics/table_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/architectures/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/architectures/base_model.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/cls_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/self_attention.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/rnn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/transforms/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/transforms/stn.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/transforms/tps.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/optimizer/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/optimizer/learning_rate.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/optimizer/optimizer.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/optimizer/regularizer.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/db_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/east_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/eval.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/export_center.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/export_model.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer/predict_cls.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer/predict_det.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer/predict_rec.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer/predict_system.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer/utility.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer_cls.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer_det.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer_e2e.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer_kie.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer_rec.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/infer_table.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/program.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/test_hubserving.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/train.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/tools/xlprog.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/be_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/en_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/french_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/german_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/it_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/table_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/te_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/dict90.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/en_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/gen_label.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/ic15_dict.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/iou.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/logging.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/network.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/profiler.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/save_load.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/stats.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppocr/utils/utility.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/predict_system.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/eval_table.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/matcher.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/predict_structure.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/predict_table.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/table_metric/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/table_metric/parallel.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/tablepyxl/style.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/ppstructure/utility.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/pyxlpr/xlai.py +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/requirements.txt +0 -0
- {pyxllib-0.3.60.2 → pyxllib-0.3.61}/setup.cfg +0 -0
@@ -205,10 +205,7 @@ def write_dataframes_to_excel(outfile, dataframes, order_mode='序号'):
|
|
205
205
|
if start == 1:
|
206
206
|
start = 0
|
207
207
|
for col_num, value in enumerate(df.columns, start=start):
|
208
|
-
|
209
|
-
writer.sheets[sheet_name].write(0, col_num, value, head_format)
|
210
|
-
except IndexError: # 有bug,先跳过
|
211
|
-
pass
|
208
|
+
writer.sheets[sheet_name].write(0, col_num, value, head_format)
|
212
209
|
|
213
210
|
|
214
211
|
def read_dataframes_from_excel(infile):
|
@@ -4,6 +4,9 @@
|
|
4
4
|
# @Email : 877362867@qq.com
|
5
5
|
# @Date : 2021/06/06 17:46
|
6
6
|
|
7
|
+
from itertools import islice
|
8
|
+
import multiprocessing
|
9
|
+
import multiprocessing.dummy
|
7
10
|
|
8
11
|
from pyxllib.file.specialist.filelib import *
|
9
12
|
from pyxllib.file.specialist.dirlib import *
|
@@ -50,15 +53,42 @@ class JsonlDataFile:
|
|
50
53
|
# 只读取部分数据
|
51
54
|
self.read_partial_records(num_records)
|
52
55
|
|
56
|
+
def __len__(self):
|
57
|
+
return len(self.records)
|
58
|
+
|
59
|
+
def yield_record(self, start=0, end=None, step=1, batch_size=None):
|
60
|
+
""" 返回指定区间的记录
|
61
|
+
|
62
|
+
:param int start: 起始记录索引,默认为0
|
63
|
+
:param int end: 结束记录索引,默认为None(读取到记录末尾)
|
64
|
+
:param int step: 步长,默认为1
|
65
|
+
:param int batch_size: 每批返回的记录数,如果为None,则逐记录返回
|
66
|
+
"""
|
67
|
+
total_records = len(self.records) # 获取总记录数
|
68
|
+
|
69
|
+
# 处理负索引
|
70
|
+
if start < 0 or (end is not None and end < 0):
|
71
|
+
if start < 0:
|
72
|
+
start = total_records + start
|
73
|
+
if end is not None and end < 0:
|
74
|
+
end = total_records + end
|
75
|
+
|
76
|
+
iterator = islice(self.records, start, end, step)
|
77
|
+
while True:
|
78
|
+
batch = list(islice(iterator, batch_size))
|
79
|
+
if not batch:
|
80
|
+
break
|
81
|
+
if batch_size is None:
|
82
|
+
yield from batch
|
83
|
+
else:
|
84
|
+
yield batch
|
85
|
+
|
53
86
|
def read_partial_records(self, num_records):
|
54
87
|
""" 从jsonl文件中只读取指定数量的记录 """
|
55
88
|
if self.infile and self.infile.is_file():
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
if not line:
|
60
|
-
break # 如果已经读完文件,跳出循环
|
61
|
-
self.records.append(json.loads(line))
|
89
|
+
lines = next(self.infile.yield_line(batch_size=num_records))
|
90
|
+
for line in lines:
|
91
|
+
self.records.append(json.loads(line))
|
62
92
|
|
63
93
|
def save(self, outfile=None, ensure_ascii=False):
|
64
94
|
""" 将当前数据保存到指定的jsonl文件中 """
|
@@ -208,31 +238,24 @@ class JsonlDataFile:
|
|
208
238
|
self.records += other.records
|
209
239
|
return self
|
210
240
|
|
211
|
-
def
|
241
|
+
def process_each_record(self, func, *, inplace=False, print_mode=0, threads_num=1):
|
212
242
|
""" 对records中的每个record应用函数func,可以选择是否在原地修改,以及是否显示进度条
|
213
243
|
|
214
244
|
:param function func: 对record进行处理的函数,应接受一个record作为参数并返回处理后的record,如果返回None则删除该record
|
215
245
|
:param bool inplace: 是否在原地修改records,如果为False(默认),则创建新的JsonlDataFile并返回
|
216
246
|
:param int print_mode: 是否显示处理过程的进度条,0表示不显示(默认),1表示显示
|
217
247
|
:return JsonlDataFile or None: 如果inplace为False,则返回新的JsonlDataFile,否则返回None
|
248
|
+
:param int threads_num: 线程数,默认为1,即单线程
|
218
249
|
|
219
250
|
遍历self.records,对每个record执行func函数,如果func返回None,则不包含该record到新的records中。
|
220
|
-
|
221
|
-
>>> data_file = JsonlDataFile()
|
222
|
-
>>> data_file.records = [{'a': 1}, {'b': 2}, {'c': 3}]
|
223
|
-
>>> func = lambda x: {k: v * 2 for k, v in x.items()} if 'a' in x else None # 定义一个只处理含有'a'的record并将其值翻倍的函数
|
224
|
-
>>> new_data_file = data_file.apply_function_to_records(func, print_mode=1)
|
225
|
-
>>> new_data_file.records
|
226
|
-
[{'a': 2}]
|
227
|
-
>>> data_file.records # 原始的data_file并没有被修改
|
228
|
-
[{'a': 1}, {'b': 2}, {'c': 3}]
|
229
251
|
"""
|
230
|
-
|
231
|
-
|
232
|
-
|
252
|
+
with multiprocessing.dummy.Pool(threads_num) as executor:
|
253
|
+
if print_mode == 1:
|
254
|
+
results = tqdm(executor.imap(func, self.records), total=len(self.records))
|
255
|
+
else:
|
256
|
+
results = executor.imap(func, self.records)
|
233
257
|
|
234
|
-
|
235
|
-
new_records = [func(record) for record in records]
|
258
|
+
new_records = list(results)
|
236
259
|
|
237
260
|
if inplace:
|
238
261
|
self.records = new_records
|
@@ -242,38 +265,156 @@ class JsonlDataFile:
|
|
242
265
|
new_data_file.records = new_records
|
243
266
|
return new_data_file
|
244
267
|
|
268
|
+
def update_each_record(self, func, print_mode=0):
|
269
|
+
""" 遍历并对原始数据进行更改 """
|
270
|
+
self.process_each_record(func, inplace=True, print_mode=print_mode)
|
271
|
+
|
245
272
|
|
246
273
|
class JsonlDataDir:
|
247
|
-
|
274
|
+
""" 注意这个类开发目标,应该是尽量去模拟JsonDataFile,让下游工作更好衔接统一 """
|
275
|
+
|
276
|
+
def __init__(self, root):
|
248
277
|
""" 一般用来处理较大的jsonl文件,将其该放到一个目录里,拆分成多个jsonl文件
|
249
278
|
|
250
279
|
注意待处理的文件名是依照 01.jsonl, 02.jsonl,... 的格式识别的,不要改动这个规则
|
251
280
|
"""
|
252
|
-
self.
|
281
|
+
self.root = XlPath(root)
|
253
282
|
self.files = []
|
254
|
-
for f in self.
|
255
|
-
if re.match(r'
|
283
|
+
for f in self.root.glob_files('*.jsonl'):
|
284
|
+
if re.match(r'_?\d+$', f.stem): # 目前先用'_?'兼容旧版,但以后应该固定只匹配_\d+
|
256
285
|
self.files.append(f)
|
257
286
|
|
287
|
+
def __bool__(self):
|
288
|
+
if self.root.is_dir() and self.files:
|
289
|
+
return True
|
290
|
+
else:
|
291
|
+
return False
|
292
|
+
|
293
|
+
def count_records(self):
|
294
|
+
total = 0
|
295
|
+
for f in self.files:
|
296
|
+
total += len(JsonlDataFile(f).records)
|
297
|
+
return total
|
298
|
+
|
258
299
|
def check(self):
|
300
|
+
""" 检查一些数据状态 """
|
259
301
|
print('文件数:', len(self.files))
|
260
302
|
|
261
303
|
@classmethod
|
262
|
-
def init_from_file(cls, file, lines_per_file=
|
304
|
+
def init_from_file(cls, file, lines_per_file=10000):
|
263
305
|
""" 从一个jsonl文件初始化一个JsonlDataDir对象 """
|
264
306
|
file = XlPath(file)
|
265
307
|
dst_dir = file.parent / file.stem
|
266
|
-
if not dst_dir.is_dir():
|
308
|
+
if not dst_dir.is_dir() and file.is_file():
|
267
309
|
file.split_to_dir(lines_per_file, dst_dir)
|
268
310
|
c = cls(dst_dir)
|
269
311
|
return c
|
270
312
|
|
271
|
-
def
|
272
|
-
"""
|
313
|
+
def rearrange(self, lines_per_file=10000):
|
314
|
+
""" 重新整理划分文件
|
315
|
+
|
316
|
+
:param int lines_per_file: 每个文件的行数
|
317
|
+
"""
|
318
|
+
output_dir = self.root
|
319
|
+
|
320
|
+
# 使用临时文件名前缀,以便在处理完成后更改为最终的文件名
|
321
|
+
temp_prefix = 'temp_'
|
322
|
+
|
323
|
+
new_file_count = 1
|
324
|
+
new_file = None
|
325
|
+
line_count = 0
|
326
|
+
|
327
|
+
# 计算总行数以确定文件名的前导零数量
|
328
|
+
total_lines = sum(1 for file in self.files for _ in file.open('r', encoding='utf-8'))
|
329
|
+
num_digits = len(str((total_lines + lines_per_file - 1) // lines_per_file))
|
330
|
+
|
331
|
+
for file in self.files:
|
332
|
+
with file.open('r', encoding='utf-8') as f:
|
333
|
+
for line in f:
|
334
|
+
if line_count == 0:
|
335
|
+
if new_file is not None:
|
336
|
+
new_file.close()
|
337
|
+
new_file_name = f'{temp_prefix}{new_file_count:0{num_digits}d}.jsonl'
|
338
|
+
new_file_path = output_dir / new_file_name
|
339
|
+
new_file = new_file_path.open('w', encoding='utf-8')
|
340
|
+
new_file_count += 1
|
341
|
+
|
342
|
+
new_file.write(line)
|
343
|
+
line_count += 1
|
344
|
+
|
345
|
+
if line_count == lines_per_file:
|
346
|
+
line_count = 0
|
347
|
+
|
348
|
+
if new_file is not None:
|
349
|
+
new_file.close()
|
350
|
+
|
351
|
+
# 删除旧文件
|
352
|
+
for file in self.files:
|
353
|
+
os.remove(file)
|
354
|
+
|
355
|
+
# 将临时文件名更改为最终的文件名
|
356
|
+
for temp_file in output_dir.glob(f'{temp_prefix}*.jsonl'):
|
357
|
+
final_name = temp_file.name[len(temp_prefix):]
|
358
|
+
temp_file.rename(output_dir / final_name)
|
359
|
+
|
360
|
+
def yield_record(self, batch_size=None):
|
361
|
+
""" 返回数据记录
|
362
|
+
|
363
|
+
:param int batch_size: 每批返回的记录数,如果为None,则逐条返回
|
273
364
|
"""
|
274
|
-
n = len(self.files)
|
275
365
|
for i, file in enumerate(self.files):
|
276
|
-
|
366
|
+
data = file.read_jsonl()
|
367
|
+
iterator = iter(data)
|
368
|
+
while True:
|
369
|
+
batch = list(islice(iterator, batch_size))
|
370
|
+
if not batch:
|
371
|
+
break
|
372
|
+
if batch_size is None:
|
373
|
+
yield from batch
|
374
|
+
else:
|
375
|
+
yield batch
|
376
|
+
|
377
|
+
def process_each_record(self, func, *, inplace=False,
|
378
|
+
print_mode=1, desc=None,
|
379
|
+
processes_num=1, threads_num=1):
|
380
|
+
""" 封装的对每个record进行操作的函数
|
381
|
+
|
382
|
+
:param int processes_num: 进程数,每个文件为单独一个进程
|
383
|
+
:param int threads_num: 线程数,每个文件处理的时候使用几个线程
|
384
|
+
"""
|
385
|
+
for i, file in tqdm(enumerate(self.files), desc=desc, disable=not print_mode):
|
277
386
|
data_file = JsonlDataFile(file)
|
278
|
-
data_file.
|
279
|
-
|
387
|
+
data_file.process_each_record(func, inplace=inplace,
|
388
|
+
threads_num=threads_num,
|
389
|
+
print_mode=print_mode > 1)
|
390
|
+
if inplace:
|
391
|
+
data_file.save(file)
|
392
|
+
|
393
|
+
def update_each_record(self, func, desc=None):
|
394
|
+
""" 封装的对每个record进行操作的函数
|
395
|
+
"""
|
396
|
+
self.process_each_record(func, inplace=True, desc=desc)
|
397
|
+
|
398
|
+
def process_each_file(self, func, *, desc=None):
|
399
|
+
for i, file in tqdm(enumerate(self.files), desc=desc):
|
400
|
+
func(file)
|
401
|
+
|
402
|
+
def process_each_records(self, func, *, inplace=False, desc=None):
|
403
|
+
for i, file in tqdm(enumerate(self.files), desc=desc):
|
404
|
+
records = XlPath(file).read_jsonl()
|
405
|
+
new_records = func(records) # 如果使用inplace,那么需要函数配套返回新的records
|
406
|
+
if inplace:
|
407
|
+
XlPath(file).write_jsonl(new_records)
|
408
|
+
|
409
|
+
def save(self, dst_path=None):
|
410
|
+
""" 将数据合并到一个jsonl文件中 """
|
411
|
+
if not dst_path:
|
412
|
+
dst_path = self.root.parent / f'{self.root.name}.jsonl'
|
413
|
+
dst_path = XlPath(dst_path)
|
414
|
+
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
415
|
+
with dst_path.open('w', encoding='utf8') as f:
|
416
|
+
for file in tqdm(self.files, desc=f'合并文件并保存 {dst_path.name}'):
|
417
|
+
with file.open('r', encoding='utf8') as f2:
|
418
|
+
for line in f2:
|
419
|
+
if line.strip(): # 不存储空行
|
420
|
+
f.write(line)
|
@@ -21,6 +21,8 @@ import tempfile
|
|
21
21
|
import ujson
|
22
22
|
from collections import defaultdict, Counter
|
23
23
|
import math
|
24
|
+
from itertools import islice
|
25
|
+
import datetime
|
24
26
|
|
25
27
|
# import chardet
|
26
28
|
import charset_normalizer
|
@@ -916,22 +918,57 @@ class XlPath(type(pathlib.Path())):
|
|
916
918
|
# 判断路径字符串是否包含相对路径字符串
|
917
919
|
return item_str.startswith(abs_path_str) or abs_path_str == item_str
|
918
920
|
|
919
|
-
def
|
920
|
-
"""
|
921
|
-
|
922
|
-
|
921
|
+
def get_total_lines(self, encoding='utf-8', skip_blank=False):
|
922
|
+
""" 统计文件的行数(注意会统计空行,所以在某些场合可能与预期理解的条目数不太一致)
|
923
|
+
|
924
|
+
:param str encoding: 文件编码,默认为'utf-8'
|
925
|
+
:param bool skip_blank: 是否跳过空白行,默认为True
|
926
|
+
:return: 文件的行数
|
927
|
+
"""
|
928
|
+
line_count = 0
|
929
|
+
with open(self, 'r', encoding=encoding) as file:
|
930
|
+
for line in file:
|
931
|
+
if skip_blank and not line.strip(): # 跳过空白行
|
932
|
+
continue
|
933
|
+
line_count += 1
|
934
|
+
return line_count
|
935
|
+
|
936
|
+
def yield_line(self, start=0, end=None, step=1, batch_size=None, encoding='utf-8'):
|
937
|
+
""" 返回指定区间的文件行
|
938
|
+
|
939
|
+
:param int start: 起始行,默认为0
|
940
|
+
:param int end: 结束行,默认为None(读取到文件末尾)
|
941
|
+
:param int step: 步长,默认为1
|
942
|
+
:param int batch_size: 每批返回的行数,如果为None,则逐行返回
|
943
|
+
"""
|
944
|
+
total_lines = None # 使用局部变量缓存总行数
|
945
|
+
# 处理负索引
|
946
|
+
if start < 0 or (end is not None and end < 0):
|
947
|
+
total_lines = total_lines or self.get_total_lines()
|
948
|
+
if start < 0:
|
949
|
+
start = total_lines + start
|
950
|
+
if end is not None and end < 0:
|
951
|
+
end = total_lines + end
|
952
|
+
|
953
|
+
with open(self, 'r', encoding=encoding) as file:
|
954
|
+
iterator = islice(file, start, end, step)
|
923
955
|
while True:
|
924
|
-
|
925
|
-
if not
|
956
|
+
batch = list(islice(iterator, batch_size))
|
957
|
+
if not batch:
|
926
958
|
break
|
927
|
-
|
928
|
-
|
959
|
+
batch = [line.rstrip('\n') for line in batch] # 删除每行末尾的换行符
|
960
|
+
if batch_size is None:
|
961
|
+
yield from batch
|
962
|
+
else:
|
963
|
+
yield batch
|
929
964
|
|
930
|
-
def split_to_dir(self, lines_per_file, dst_dir=None, encoding='utf-8'
|
965
|
+
def split_to_dir(self, lines_per_file, dst_dir=None, encoding='utf-8',
|
966
|
+
filename_template="_{index}{suffix}"):
|
931
967
|
""" 将文件按行拆分到多个子文件中
|
932
968
|
|
933
969
|
:param int lines_per_file: 打算拆分的每个新文件的行数
|
934
|
-
:param str dst_dir: 目标目录,未输入的时候,输出到同stem
|
970
|
+
:param str dst_dir: 目标目录,未输入的时候,输出到同stem名的目录下
|
971
|
+
:param str filename_template: 文件名模板,可以包含 {stem}, {index} 和 {suffix} 占位符
|
935
972
|
:return list: 拆分的文件路径列表
|
936
973
|
拆分后文件名类似如下: 01.jsonl, 02.jsonl, ...
|
937
974
|
"""
|
@@ -950,7 +987,6 @@ class XlPath(type(pathlib.Path())):
|
|
950
987
|
|
951
988
|
# 2 拆分文件
|
952
989
|
split_files = [] # 用于保存拆分的文件路径
|
953
|
-
tmp_files = [] # 用于保存临时文件路径
|
954
990
|
outfile = None
|
955
991
|
filename_format = "{:04d}"
|
956
992
|
outfile_index = 0
|
@@ -964,7 +1000,6 @@ class XlPath(type(pathlib.Path())):
|
|
964
1000
|
outfile.close()
|
965
1001
|
outfile_path = dst_dir / f"{self.stem}_{filename_format.format(outfile_index)}{suffix}"
|
966
1002
|
outfile = open(outfile_path, 'w', encoding='utf-8')
|
967
|
-
tmp_files.append(str(outfile_path))
|
968
1003
|
split_files.append(outfile_path) # 先占位,后面再填充
|
969
1004
|
outfile_index += 1
|
970
1005
|
outfile.write(line)
|
@@ -976,13 +1011,54 @@ class XlPath(type(pathlib.Path())):
|
|
976
1011
|
# 3 重新设置文件名的对齐宽度
|
977
1012
|
new_filename_format = "{:0" + str(len(str(len(split_files)))) + "d}"
|
978
1013
|
for i, old_file in enumerate(split_files):
|
979
|
-
new_name = dst_dir /
|
1014
|
+
new_name = dst_dir / filename_template.format(stem=self.stem,
|
1015
|
+
index=new_filename_format.format(i),
|
1016
|
+
suffix=suffix)
|
980
1017
|
os.rename(old_file, new_name)
|
981
1018
|
split_files[i] = new_name
|
982
1019
|
|
983
1020
|
# 返回拆分的文件路径列表
|
984
1021
|
return split_files
|
985
1022
|
|
1023
|
+
def merge_from_files(self, files,
|
1024
|
+
ignore_empty_lines_between_files=False,
|
1025
|
+
encoding='utf-8'):
|
1026
|
+
""" 将多个文件合并到一个文件中
|
1027
|
+
|
1028
|
+
:param list files: 要合并的文件列表
|
1029
|
+
:param bool ignore_empty_lines_between_files: 是否忽略文件间的空行
|
1030
|
+
:param str encoding: 文件编码,默认为'utf-8'
|
1031
|
+
:return XlPath: 合并后的文件路径
|
1032
|
+
"""
|
1033
|
+
# 合并文件
|
1034
|
+
prev_line_end_with_newline = True # 记录上一次text的最后一个字符是否为'\n'
|
1035
|
+
with open(self, 'w', encoding=encoding) as outfile:
|
1036
|
+
for i, file in enumerate(files):
|
1037
|
+
file = XlPath(file)
|
1038
|
+
text = file.read_text(encoding=encoding)
|
1039
|
+
if ignore_empty_lines_between_files:
|
1040
|
+
text = text.rstrip('\n')
|
1041
|
+
if i > 0 and not prev_line_end_with_newline and text != '':
|
1042
|
+
outfile.write('\n')
|
1043
|
+
outfile.write(text)
|
1044
|
+
prev_line_end_with_newline = text.endswith('\n')
|
1045
|
+
|
1046
|
+
def merge_from_dir(self, src_dir, filename_template="_{index}{suffix}", encoding='utf-8'):
|
1047
|
+
""" 将目录中的多个文件合并到一个文件中
|
1048
|
+
|
1049
|
+
:param str src_dir: 要合并的文件所在的目录
|
1050
|
+
:param str filename_template: 文件名模板,可以包含 {stem}, {index} 和 {suffix} 占位符
|
1051
|
+
:param str encoding: 文件编码,默认为'utf-8'
|
1052
|
+
:return XlPath: 合并后的文件路径
|
1053
|
+
"""
|
1054
|
+
src_dir = XlPath(src_dir)
|
1055
|
+
stem = src_dir.name
|
1056
|
+
|
1057
|
+
pattern = filename_template.format(stem=stem, index="(\d+)", suffix=".*")
|
1058
|
+
files = [file for file in src_dir.iterdir() if re.match(pattern, file.name)] # 获取目录中符合模式的文件
|
1059
|
+
|
1060
|
+
self.merge_from_files(files, ignore_empty_lines_between_files=True, encoding=encoding)
|
1061
|
+
|
986
1062
|
def __1_read_write(self):
|
987
1063
|
""" 参考标准库的
|
988
1064
|
read_bytes、read_text
|
@@ -1089,7 +1165,7 @@ class XlPath(type(pathlib.Path())):
|
|
1089
1165
|
def write_jsonl(self, list_data, ensure_ascii=False):
|
1090
1166
|
""" 由于这种格式主要是跟商汤这边对接,就尽量跟它们的格式进行兼容 """
|
1091
1167
|
content = '\n'.join([json.dumps(x, ensure_ascii=ensure_ascii) for x in list_data])
|
1092
|
-
self.write_text_unix(content
|
1168
|
+
self.write_text_unix(content)
|
1093
1169
|
|
1094
1170
|
def read_csv(self, encoding='utf8', *, errors='strict', return_mode: bool = False,
|
1095
1171
|
delimiter=',', quotechar='"', **kwargs):
|
@@ -1551,11 +1627,49 @@ class XlPath(type(pathlib.Path())):
|
|
1551
1627
|
else:
|
1552
1628
|
return msg
|
1553
1629
|
|
1554
|
-
def check_summary(self, print_mode=False,
|
1630
|
+
def check_summary(self, print_mode=True, return_mode=False, **kwargs):
|
1631
|
+
if self.is_dir():
|
1632
|
+
res = self._check_dir_summary(print_mode, **kwargs)
|
1633
|
+
elif self.is_file():
|
1634
|
+
res = self._check_file_summary(print_mode, **kwargs)
|
1635
|
+
else:
|
1636
|
+
res = '文件不存在'
|
1637
|
+
print(res)
|
1638
|
+
|
1639
|
+
if return_mode:
|
1640
|
+
return res
|
1641
|
+
|
1642
|
+
def _check_file_summary(self, print_mode=True, **kwargs):
|
1643
|
+
""" 对文件进行通用的状态检查
|
1644
|
+
|
1645
|
+
:param bool print_mode: 是否将统计信息打印到控制台
|
1646
|
+
:return dict: 文件的统计信息
|
1647
|
+
"""
|
1648
|
+
file_summary = {}
|
1649
|
+
|
1650
|
+
# 文件大小
|
1651
|
+
file_summary['文件大小'] = self.size(human_readable=True)
|
1652
|
+
|
1653
|
+
# 文件行数
|
1654
|
+
file_summary['文件行数'] = self.get_total_lines()
|
1655
|
+
|
1656
|
+
# 文件修改时间
|
1657
|
+
mod_time_str = datetime.datetime.fromtimestamp(self.mtime()).strftime('%Y-%m-%d %H:%M:%S')
|
1658
|
+
file_summary['修改时间'] = mod_time_str
|
1659
|
+
|
1660
|
+
# 如果print_mode为True,则将统计信息打印到控制台
|
1661
|
+
if print_mode:
|
1662
|
+
for key, value in file_summary.items():
|
1663
|
+
print(f"{key}: {value}")
|
1664
|
+
|
1665
|
+
return file_summary
|
1666
|
+
|
1667
|
+
def _check_dir_summary(self, print_mode=True, hash_func=None, run_mode=99):
|
1555
1668
|
""" 对文件夹情况进行通用的状态检查
|
1556
1669
|
|
1557
1670
|
:param hash_func: 可以传入自定义的hash函数,用于第四块的重复文件运算
|
1558
1671
|
其实默认的get_etag就没啥问题,只是有时候为了性能考虑,可能会传入一个支持,提前有缓存知道etag的函数
|
1672
|
+
:param int run_mode: 只运行编号内的功能
|
1559
1673
|
"""
|
1560
1674
|
if not self.is_dir():
|
1561
1675
|
return ''
|
@@ -1567,43 +1681,48 @@ class XlPath(type(pathlib.Path())):
|
|
1567
1681
|
|
1568
1682
|
# 一 目录大小,二 各后缀文件大小
|
1569
1683
|
msg = []
|
1570
|
-
|
1571
|
-
|
1684
|
+
if run_mode >= 1: # 1和2目前是绑定一起运行的
|
1685
|
+
printf('【' + self.as_posix() + '】目录检查')
|
1686
|
+
printf('\n'.join(self.check_size('list')))
|
1572
1687
|
|
1573
1688
|
# 三 重名文件
|
1574
|
-
|
1575
|
-
|
1689
|
+
if run_mode >= 3:
|
1690
|
+
printf('\n三、重名文件(忽略大小写,跨目录检查name重复情况)')
|
1691
|
+
printf('\n'.join(self.check_repeat_name_files(print_mode=False)))
|
1576
1692
|
|
1577
1693
|
# 四 重复文件
|
1578
|
-
|
1579
|
-
|
1694
|
+
if run_mode >= 4:
|
1695
|
+
printf('\n四、重复文件(etag相同)')
|
1696
|
+
printf('\n'.join(self.check_repeat_files(print_mode=False, hash_func=hash_func)))
|
1580
1697
|
|
1581
1698
|
# 五 错误扩展名
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1699
|
+
if run_mode >= 5:
|
1700
|
+
printf('\n五、错误扩展名')
|
1701
|
+
for i, (f1, suffix2) in enumerate(self.xglob_faker_suffix_files('**/*'), start=1):
|
1702
|
+
printf(f'{i}、{f1.relpath(self)} -> {suffix2}')
|
1585
1703
|
|
1586
1704
|
# 六 文件配对
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1705
|
+
if run_mode >= 6:
|
1706
|
+
printf('\n六、文件配对(检查每个目录里stem名称是否配对,列出文件组成不单一的目录结构,请重点检查落单未配对的情况)')
|
1707
|
+
prompt = False
|
1708
|
+
for root, dirs, files in os.walk(self):
|
1709
|
+
suffix_counts = defaultdict(list)
|
1710
|
+
for file in files:
|
1711
|
+
stem, suffix = os.path.splitext(file)
|
1712
|
+
suffix_counts[stem].append(suffix)
|
1713
|
+
suffix_counts = {k: tuple(sorted(v)) for k, v in suffix_counts.items()}
|
1714
|
+
suffix_counts2 = {v: k for k, v in suffix_counts.items()} # 反向存储,如果有重复v会进行覆盖
|
1715
|
+
ct = Counter(suffix_counts.values())
|
1716
|
+
if len(ct.keys()) > 1:
|
1717
|
+
printf(root)
|
1718
|
+
for k, v in ct.most_common():
|
1719
|
+
tag = f'\t{k}: {v}'
|
1720
|
+
if v == 1:
|
1721
|
+
tag += f',{suffix_counts2[k]}'
|
1722
|
+
if len(k) > 1 and not prompt:
|
1723
|
+
tag += f'\t标记注解:有{v}组stem相同文件,配套有{k}这些后缀。其他标记同理。'
|
1724
|
+
prompt = True
|
1725
|
+
printf(tag)
|
1607
1726
|
|
1608
1727
|
return '\n'.join(msg)
|
1609
1728
|
|
@@ -41,7 +41,7 @@ from pyxllib.prog.pupil import DictTool
|
|
41
41
|
from pyxllib.prog.specialist import mtqdm
|
42
42
|
from pyxllib.algo.pupil import Groups, make_index_function, matchpairs
|
43
43
|
from pyxllib.algo.geo import rect_bounds, rect2polygon, reshape_coords, ltrb2xywh, xywh2ltrb, ComputeIou
|
44
|
-
from pyxllib.algo.stat import
|
44
|
+
from pyxllib.algo.stat import write_dataframes_to_excel
|
45
45
|
from pyxllib.file.specialist import PathGroups, XlPath
|
46
46
|
from pyxllib.prog.specialist import get_xllog
|
47
47
|
from pyxlpr.data.icdar import IcdarEval
|
@@ -1272,7 +1272,7 @@ class CocoMatch(CocoParser, CocoMatchBase):
|
|
1272
1272
|
print(self.parse_dt_score())
|
1273
1273
|
|
1274
1274
|
def to_excel(self, savepath, *, segmentation=False):
|
1275
|
-
|
1275
|
+
write_dataframes_to_excel(savepath,
|
1276
1276
|
{'images': self.images,
|
1277
1277
|
'categories': self.categories,
|
1278
1278
|
'match_anns': self.match_anns})
|