pyxllib 0.3.61__tar.gz → 0.3.62__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyxllib-0.3.61/pyxllib.egg-info → pyxllib-0.3.62}/PKG-INFO +1 -1
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/specialist/__init__.py +6 -3
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/xlsxlib.py +368 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62/pyxllib.egg-info}/PKG-INFO +1 -1
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/gptlib.py +26 -5
- {pyxllib-0.3.61 → pyxllib-0.3.62}/setup.py +1 -1
- {pyxllib-0.3.61 → pyxllib-0.3.62}/LICENSE +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/MANIFEST.in +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/README.md +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/disjoint.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/geo.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/intervals.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/newbie.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/pupil.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/shapelylib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/specialist.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/stat.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/treelib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/algo/unitlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/expert.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/imfile.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/imhash.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/pupil.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/rgbfmt.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/trackbartools.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/xlcvlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/cv/xlpillib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/data/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/data/echarts.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/data/oss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/data/pglib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/data/sqlite.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/data/sqllib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/JLineViewer.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/autogui/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/autogui/autogui.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/autogui/virtualkey.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/demolib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/kq5034lib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/old.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/qt.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/seleniumlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/tk.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/unixlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/utools.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/webhook.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/ext/win32lib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/docxlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/gitlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/movielib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/newbie.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/onenotelib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/packlib/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/packlib/zipfile.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/pdflib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/pupil.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/specialist/dirlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/specialist/download.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/file/specialist/filelib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/deprecatedlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/newbie.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/pupil.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/sitepackages.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/bc.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/browser.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/common.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/datetime.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/tictoc.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/specialist/xllog.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/prog/xlosenv.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/stdlib/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/stdlib/tablepyxl/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/stdlib/tablepyxl/style.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/stdlib/tablepyxl/tablepyxl.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/ahocorasick.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/latex/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/levenshtein.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/nestenv.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/newbie.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/pupil/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/pupil/common.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/pupil/xlalign.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/pycode.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/specialist/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/specialist/common.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/specialist/ptag.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/spellchecker.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/text/xmllib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/xl.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib/xlcv.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib.egg-info/SOURCES.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib.egg-info/dependency_links.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib.egg-info/requires.txt +4 -4
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxllib.egg-info/top_level.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ai/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ai/clientlib.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ai/specialist.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ai/torch_app.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ai/xlpaddle.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ai/xltorch.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/coco.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/datacls.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/datasets.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/icdar/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/icdar/deteval.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/icdar/icdar2013.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/icdar/iou.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/icdar/rrc_evaluation_funcs_1_1.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/imtextline.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/labelme.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/removeline.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/data/specialist.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/eval/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/paddleocr.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/configs/rec/multi_language/generate_multi_language_configs.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/ColorJitter.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/copy_paste.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/east_process.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/gen_table_mask.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/iaa_augment.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/label_ops.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/make_border_map.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/make_pse_gt.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/make_shrink_map.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/operators.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/pg_process.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/randaugment.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/random_crop_data.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/rec_img_aug.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/sast_process.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/text_image_aug/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/text_image_aug/augment.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/imaug/text_image_aug/warp_mls.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/lmdb_dataset.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/pgnet_dataset.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/pubtab_dataset.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/data/simple_dataset.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/ace_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/basic_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/center_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/cls_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/combined_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/det_basic_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/det_db_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/det_east_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/det_pse_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/det_sast_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/distillation_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/e2e_pg_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/kie_sdmgr_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_aster_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_att_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_ctc_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_enhanced_ctc_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_nrtr_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_sar_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/rec_srn_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/losses/table_att_loss.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/cls_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/det_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/distillation_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/e2e_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/eval_det_iou.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/kie_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/rec_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/metrics/table_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/architectures/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/architectures/base_model.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/architectures/distillation_model.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/det_mobilenet_v3.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/det_resnet_vd.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/det_resnet_vd_sast.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/e2e_resnet_vd_pg.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/kie_unet_sdmgr.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_mobilenet_v3.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_mv1_enhance.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_nrtr_mtb.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_resnet_31.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_resnet_aster.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_resnet_fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/backbones/rec_resnet_vd.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/cls_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/det_db_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/det_east_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/det_pse_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/det_sast_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/e2e_pg_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/kie_sdmgr_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/multiheadAttention.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/rec_aster_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/rec_att_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/rec_ctc_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/rec_nrtr_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/rec_sar_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/rec_srn_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/self_attention.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/heads/table_att_head.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/db_fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/east_fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/pg_fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/rnn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/sast_fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/necks/table_fpn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/transforms/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/transforms/stn.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/transforms/tps.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/modeling/transforms/tps_spatial_transformer.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/optimizer/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/optimizer/learning_rate.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/optimizer/lr_scheduler.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/optimizer/optimizer.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/optimizer/regularizer.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/cls_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/db_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/east_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/locality_aware_nms.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/pg_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/pse_postprocess/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/pse_postprocess/pse/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/pse_postprocess/pse/setup.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/pse_postprocess/pse_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/rec_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/postprocess/sast_postprocess.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/eval.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/export_center.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/export_model.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer/predict_cls.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer/predict_det.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer/predict_e2e.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer/predict_rec.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer/predict_system.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer/utility.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer_cls.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer_det.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer_e2e.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer_kie.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer_rec.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/infer_table.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/program.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/test_hubserving.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/train.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/tools/xlprog.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/EN_symbol_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ar_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/arabic_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/be_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/bg_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/chinese_cht_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/cyrillic_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/devanagari_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/en_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/fa_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/french_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/german_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/hi_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/it_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/japan_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ka_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/korean_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/latin_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/mr_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ne_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/oc_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/pu_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/rs_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/rsc_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ru_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ta_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/table_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/table_structure_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/te_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ug_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/uk_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/ur_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict/xi_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/dict90.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_metric/Deteval.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_metric/polygon_fast.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_utils/extract_batchsize.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_fast.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_utils/extract_textpoint_slow.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_utils/pgnet_pp_utils.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/e2e_utils/visual.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/en_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/gen_label.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/ic15_dict.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/iou.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/logging.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/network.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/ppocr_keys_v1.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/profiler.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/save_load.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/stats.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppocr/utils/utility.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/predict_system.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/eval_table.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/matcher.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/predict_structure.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/predict_table.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/table_metric/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/table_metric/parallel.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/table_metric/table_metric.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/tablepyxl/__init__.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/tablepyxl/style.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/table/tablepyxl/tablepyxl.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/ppstructure/utility.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/pyxlpr/xlai.py +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/requirements.txt +0 -0
- {pyxllib-0.3.61 → pyxllib-0.3.62}/setup.cfg +0 -0
@@ -86,9 +86,12 @@ class JsonlDataFile:
|
|
86
86
|
def read_partial_records(self, num_records):
|
87
87
|
""" 从jsonl文件中只读取指定数量的记录 """
|
88
88
|
if self.infile and self.infile.is_file():
|
89
|
-
|
90
|
-
|
91
|
-
|
89
|
+
try:
|
90
|
+
lines = next(self.infile.yield_line(batch_size=num_records))
|
91
|
+
for line in lines:
|
92
|
+
self.records.append(json.loads(line))
|
93
|
+
except StopIteration:
|
94
|
+
self.records = []
|
92
95
|
|
93
96
|
def save(self, outfile=None, ensure_ascii=False):
|
94
97
|
""" 将当前数据保存到指定的jsonl文件中 """
|
@@ -16,9 +16,11 @@ check_install_package('xlrd2')
|
|
16
16
|
check_install_package('yattag')
|
17
17
|
check_install_package('jsonpickle')
|
18
18
|
|
19
|
+
import random
|
19
20
|
import datetime
|
20
21
|
import json
|
21
22
|
import re
|
23
|
+
from pathlib import Path
|
22
24
|
|
23
25
|
import openpyxl
|
24
26
|
from openpyxl.cell.cell import MergedCell
|
@@ -26,6 +28,7 @@ from openpyxl.styles import Font
|
|
26
28
|
from openpyxl.utils.cell import get_column_letter, column_index_from_string
|
27
29
|
import pandas as pd
|
28
30
|
import jsonpickle
|
31
|
+
from collections import Counter, OrderedDict
|
29
32
|
|
30
33
|
from pyxllib.prog.pupil import inject_members, dprint, xlmd5
|
31
34
|
from pyxllib.prog.specialist import browser
|
@@ -1135,6 +1138,60 @@ class XlWorkbook(openpyxl.Workbook):
|
|
1135
1138
|
""" 基于to_json计算的md5,一般用来判断不同workbook间是否相同 """
|
1136
1139
|
return xlmd5(json.dumps(self.to_json(reduction_degree)))
|
1137
1140
|
|
1141
|
+
def extract_summary(self):
|
1142
|
+
""" 更新后的函数:提取整个Excel工作簿的摘要信息 """
|
1143
|
+
wb = self
|
1144
|
+
|
1145
|
+
all_sheets_summary = []
|
1146
|
+
|
1147
|
+
for ws in wb._sheets: # 非数据表,也要遍历出来,所以使用了_sheets
|
1148
|
+
# 如果是标准工作表(Worksheet),使用现有的摘要提取机制
|
1149
|
+
if isinstance(ws, openpyxl.worksheet.worksheet.Worksheet):
|
1150
|
+
# 找到使用范围和表头范围
|
1151
|
+
used_range = find_used_range_optimized(ws)
|
1152
|
+
if used_range:
|
1153
|
+
header_range, data_range = split_header_and_data(ws, used_range)
|
1154
|
+
|
1155
|
+
# 提取表头结构
|
1156
|
+
header_structure = extract_header_structure(ws, header_range)
|
1157
|
+
|
1158
|
+
summary = ({
|
1159
|
+
"sheetName": ws.title,
|
1160
|
+
"sheetType": "Worksheet",
|
1161
|
+
"usedRange": used_range,
|
1162
|
+
"headerRange": header_range,
|
1163
|
+
"header": header_structure,
|
1164
|
+
'dataRange': data_range,
|
1165
|
+
'data': extract_field_summaries(ws, header_range, data_range)
|
1166
|
+
})
|
1167
|
+
|
1168
|
+
if not summary['data']: # 如果没有数据,则大概率是数据透视表,是计算出来的,读取不到~
|
1169
|
+
summary['sheetType'] = 'PivotTable'
|
1170
|
+
del summary['data']
|
1171
|
+
else:
|
1172
|
+
summary = ({
|
1173
|
+
"sheetName": ws.title,
|
1174
|
+
"sheetType": "DialogOrMacroSheet",
|
1175
|
+
"usedRange": None,
|
1176
|
+
})
|
1177
|
+
|
1178
|
+
# 如果是其他类型的工作表,提供基础摘要
|
1179
|
+
else:
|
1180
|
+
summary = ({
|
1181
|
+
"sheetName": ws.title,
|
1182
|
+
"sheetType": ws.__class__.__name__ # 使用工作表的类名作为类型
|
1183
|
+
})
|
1184
|
+
|
1185
|
+
all_sheets_summary.append(summary)
|
1186
|
+
|
1187
|
+
workbook_summary = {
|
1188
|
+
"fileName": Path(self.path).name if self.path else None,
|
1189
|
+
"sheetNames": wb.sheetnames,
|
1190
|
+
"sheets": all_sheets_summary
|
1191
|
+
}
|
1192
|
+
|
1193
|
+
return workbook_summary
|
1194
|
+
|
1138
1195
|
|
1139
1196
|
inject_members(XlWorkbook, openpyxl.Workbook)
|
1140
1197
|
|
@@ -1142,3 +1199,314 @@ inject_members(XlWorkbook, openpyxl.Workbook)
|
|
1142
1199
|
def excel2md5(file, reduction_degree=1):
|
1143
1200
|
wb = openpyxl.load_workbook(file)
|
1144
1201
|
return wb.to_md5(reduction_degree)
|
1202
|
+
|
1203
|
+
|
1204
|
+
def __提取表格摘要信息():
|
1205
|
+
""" """
|
1206
|
+
|
1207
|
+
|
1208
|
+
def parse_range_address(address):
|
1209
|
+
""" 解析单元格范围地址。
|
1210
|
+
|
1211
|
+
:param str address: 单元格范围地址,例如 'A1', 'A1:B3', '1:3', 'A:B' 等。
|
1212
|
+
:return dict: 一个包含 'left', 'top', 'right', 'bottom' 的字典。
|
1213
|
+
"""
|
1214
|
+
# 初始化默认值
|
1215
|
+
left, right, top, bottom = None, None, None, None
|
1216
|
+
|
1217
|
+
# 分割地址以获取开始和结束
|
1218
|
+
parts = address.split(":")
|
1219
|
+
start_cell = parts[0]
|
1220
|
+
end_cell = parts[1] if len(parts) > 1 else start_cell
|
1221
|
+
|
1222
|
+
# 如果 start_cell 是行号
|
1223
|
+
if start_cell.isdigit():
|
1224
|
+
top = int(start_cell)
|
1225
|
+
else:
|
1226
|
+
# 尝试从 start_cell 提取列
|
1227
|
+
try:
|
1228
|
+
left = column_index_from_string(start_cell.rstrip('1234567890'))
|
1229
|
+
top = int(''.join(filter(str.isdigit, start_cell))) if any(
|
1230
|
+
char.isdigit() for char in start_cell) else None
|
1231
|
+
except ValueError:
|
1232
|
+
left = None
|
1233
|
+
|
1234
|
+
# 如果 end_cell 是行号
|
1235
|
+
if end_cell.isdigit():
|
1236
|
+
bottom = int(end_cell)
|
1237
|
+
else:
|
1238
|
+
# 尝试从 end_cell 提取列
|
1239
|
+
try:
|
1240
|
+
right = column_index_from_string(end_cell.rstrip('1234567890'))
|
1241
|
+
bottom = int(''.join(filter(str.isdigit, end_cell))) if any(char.isdigit() for char in end_cell) else None
|
1242
|
+
except ValueError:
|
1243
|
+
right = None
|
1244
|
+
|
1245
|
+
# 如果只提供了一个部分 (例如 '1', 'A'),将最大值设置为最小值
|
1246
|
+
if len(parts) == 1:
|
1247
|
+
right = left if left is not None else right
|
1248
|
+
bottom = top if top is not None else bottom
|
1249
|
+
|
1250
|
+
return {"left": left, "top": top, "right": right, "bottom": bottom}
|
1251
|
+
|
1252
|
+
|
1253
|
+
def build_range_address(left=None, top=None, right=None, bottom=None):
|
1254
|
+
""" 构建单元格范围地址。
|
1255
|
+
|
1256
|
+
:return str: 单元格范围地址,例如 'A1', 'A1:B3', '1:3', 'A:B' 等。
|
1257
|
+
"""
|
1258
|
+
start_cell = f"{get_column_letter(left) if left else ''}{top if top else ''}"
|
1259
|
+
end_cell = f"{get_column_letter(right) if right else ''}{bottom if bottom else ''}"
|
1260
|
+
|
1261
|
+
# 当开始和结束单元格相同时,只返回一个单元格地址
|
1262
|
+
if start_cell == end_cell:
|
1263
|
+
return start_cell
|
1264
|
+
# 当其中一个单元格是空字符串时,只返回另一个单元格地址
|
1265
|
+
elif not start_cell or not end_cell:
|
1266
|
+
return start_cell or end_cell
|
1267
|
+
else:
|
1268
|
+
return f"{start_cell}:{end_cell}"
|
1269
|
+
|
1270
|
+
|
1271
|
+
def find_used_range_optimized(ws):
|
1272
|
+
""" 定位有效数据区间
|
1273
|
+
目前假设每个ws只有一个数据表,但以后可以考虑找多个used_range,多个数据表
|
1274
|
+
"""
|
1275
|
+
# 初始化边界值
|
1276
|
+
left, right, top, bottom = None, None, None, None
|
1277
|
+
|
1278
|
+
# 找到最上方的行
|
1279
|
+
for row in ws.iter_rows():
|
1280
|
+
if any(cell.value is not None for cell in row):
|
1281
|
+
top = row[0].row
|
1282
|
+
break
|
1283
|
+
|
1284
|
+
# 找到最左边的列
|
1285
|
+
for col in ws.iter_cols():
|
1286
|
+
if any(cell.value is not None for cell in col):
|
1287
|
+
left = col[0].column
|
1288
|
+
break
|
1289
|
+
|
1290
|
+
# 找到最下方的行
|
1291
|
+
rows = list(ws.iter_rows(min_row=top, max_row=ws.max_row))
|
1292
|
+
for row in reversed(rows):
|
1293
|
+
if any(cell.value is not None for cell in row):
|
1294
|
+
bottom = row[0].row
|
1295
|
+
break
|
1296
|
+
|
1297
|
+
# 找到最右边的列
|
1298
|
+
cols = list(ws.iter_cols(min_col=left, max_col=ws.max_column))
|
1299
|
+
for col in reversed(cols):
|
1300
|
+
if any(cell.value is not None for cell in col):
|
1301
|
+
right = col[0].column
|
1302
|
+
break
|
1303
|
+
|
1304
|
+
# 使用 build_range_address 获取 used_range
|
1305
|
+
used_range = build_range_address(left=left, top=top, right=right, bottom=bottom)
|
1306
|
+
|
1307
|
+
return used_range
|
1308
|
+
|
1309
|
+
|
1310
|
+
def is_string_type(value):
|
1311
|
+
"""检查值是否为字符串类型,不是数值或日期类型"""
|
1312
|
+
# 首先检查日期类型
|
1313
|
+
try:
|
1314
|
+
pd.to_datetime(value, errors='raise')
|
1315
|
+
return False
|
1316
|
+
except (ValueError, TypeError, OverflowError):
|
1317
|
+
pass
|
1318
|
+
|
1319
|
+
# 检查是否为浮点数类型
|
1320
|
+
try:
|
1321
|
+
float(value)
|
1322
|
+
return False
|
1323
|
+
except (ValueError, TypeError):
|
1324
|
+
return True
|
1325
|
+
|
1326
|
+
|
1327
|
+
def score_row(row):
|
1328
|
+
score = 0
|
1329
|
+
for cell in row:
|
1330
|
+
if cell.value is not None:
|
1331
|
+
if is_string_type(cell.value):
|
1332
|
+
score += 1 # Add positive score for string type
|
1333
|
+
else:
|
1334
|
+
score -= 1 # Subtract score for non-string type
|
1335
|
+
|
1336
|
+
# 检查填充颜色和边框,为得分增加0.5分
|
1337
|
+
if cell.fill.start_color.index != 'FFFFFFFF' or \
|
1338
|
+
(cell.border.left.style or cell.border.right.style or
|
1339
|
+
cell.border.top.style or cell.border.bottom.style):
|
1340
|
+
score += 0.5
|
1341
|
+
return score
|
1342
|
+
|
1343
|
+
|
1344
|
+
def find_header_row(ws, used_range, max_rows_to_check=10):
|
1345
|
+
"""找到工作表中的表头行"""
|
1346
|
+
range_details = parse_range_address(used_range)
|
1347
|
+
|
1348
|
+
# 初始化得分列表
|
1349
|
+
row_scores = []
|
1350
|
+
|
1351
|
+
# 只检查指定的最大行数
|
1352
|
+
rows_to_check = min(range_details['bottom'] - range_details['top'] + 1, max_rows_to_check)
|
1353
|
+
|
1354
|
+
# 为每行评分
|
1355
|
+
for row in ws.iter_rows(min_row=range_details['top'], max_row=range_details['top'] + rows_to_check - 1,
|
1356
|
+
min_col=range_details['left'], max_col=range_details['right']):
|
1357
|
+
row_scores.append(score_row(row))
|
1358
|
+
|
1359
|
+
# 计算行与行之间分数变化的加权
|
1360
|
+
weighted_scores = []
|
1361
|
+
for i, score in enumerate(row_scores):
|
1362
|
+
b = score - row_scores[i + 1] if i < len(row_scores) - 1 else 0
|
1363
|
+
y = score + b
|
1364
|
+
weighted_scores.append(y)
|
1365
|
+
|
1366
|
+
# 确定表头行的位置
|
1367
|
+
header_row = weighted_scores.index(max(weighted_scores)) + range_details['top']
|
1368
|
+
|
1369
|
+
# 从used_range的起始行到找到的表头行都视为表头
|
1370
|
+
header_range = build_range_address(left=range_details['left'], top=range_details['top'],
|
1371
|
+
right=range_details['right'], bottom=header_row)
|
1372
|
+
return header_range
|
1373
|
+
|
1374
|
+
|
1375
|
+
def split_header_and_data(ws, used_range, max_rows_to_check=10):
|
1376
|
+
""" 将工作表的used_range拆分为表头范围和数据范围 """
|
1377
|
+
header_range = find_header_row(ws, used_range, max_rows_to_check)
|
1378
|
+
header_details = parse_range_address(header_range)
|
1379
|
+
used_range_details = parse_range_address(used_range)
|
1380
|
+
|
1381
|
+
# 数据范围是紧接着表头下面的部分,直到used_range的结束
|
1382
|
+
data_range = build_range_address(left=used_range_details['left'], top=header_details['bottom'] + 1,
|
1383
|
+
right=used_range_details['right'], bottom=used_range_details['bottom'])
|
1384
|
+
return header_range, data_range
|
1385
|
+
|
1386
|
+
|
1387
|
+
def extract_header_structure(ws, header_range):
|
1388
|
+
""" 根据合并的单元格提取表头结构 """
|
1389
|
+
header_range_details = parse_range_address(header_range)
|
1390
|
+
|
1391
|
+
header_structure = {}
|
1392
|
+
merged_addresses = set()
|
1393
|
+
|
1394
|
+
# 处理合并的单元格
|
1395
|
+
for merged_range in ws.merged_cells.ranges:
|
1396
|
+
# 如果合并的单元格在提供的表头范围内
|
1397
|
+
if merged_range.bounds[1] <= header_range_details['bottom'] \
|
1398
|
+
and merged_range.bounds[3] >= header_range_details['top']:
|
1399
|
+
top_left_cell = ws.cell(row=merged_range.bounds[1], column=merged_range.bounds[0])
|
1400
|
+
address = build_range_address(left=merged_range.bounds[0], top=merged_range.bounds[1],
|
1401
|
+
right=merged_range.bounds[2], bottom=merged_range.bounds[3])
|
1402
|
+
header_structure[address] = top_left_cell.value
|
1403
|
+
for row in range(merged_range.bounds[1], merged_range.bounds[3] + 1):
|
1404
|
+
for col in range(merged_range.bounds[0], merged_range.bounds[2] + 1):
|
1405
|
+
merged_addresses.add((row, col))
|
1406
|
+
|
1407
|
+
# 处理未合并的单元格
|
1408
|
+
for row in ws.iter_rows(min_row=header_range_details['top'], max_row=header_range_details['bottom'],
|
1409
|
+
min_col=header_range_details['left'], max_col=header_range_details['right']):
|
1410
|
+
for cell in row:
|
1411
|
+
# 如果这个单元格的地址还没有被添加到结构中,并且它有一个值
|
1412
|
+
if (cell.row, cell.column) not in merged_addresses and cell.value:
|
1413
|
+
header_structure[cell.coordinate] = cell.value
|
1414
|
+
|
1415
|
+
return header_structure
|
1416
|
+
|
1417
|
+
|
1418
|
+
def determine_field_type_and_summary(ws, col, start_row, end_row):
|
1419
|
+
""" 根据指定的列范围确定字段的摘要信息 """
|
1420
|
+
|
1421
|
+
# 初始化存储
|
1422
|
+
number_formats = []
|
1423
|
+
sample_values = []
|
1424
|
+
numeric_values = []
|
1425
|
+
date_values = []
|
1426
|
+
time_values = []
|
1427
|
+
|
1428
|
+
# 从指定范围中抽取10个值
|
1429
|
+
rows = list(ws.iter_rows(min_col=col, max_col=col, min_row=start_row, max_row=end_row))
|
1430
|
+
sample_indices = random.sample(range(len(rows)), min(10, len(rows)))
|
1431
|
+
sample_indices.sort()
|
1432
|
+
sample_rows = [rows[i] for i in sample_indices]
|
1433
|
+
|
1434
|
+
for row in sample_rows:
|
1435
|
+
cell = row[0]
|
1436
|
+
number_formats.append(cell.number_format)
|
1437
|
+
|
1438
|
+
# If cell value is a date or time, format it using its number_format
|
1439
|
+
if isinstance(cell.value, (datetime.datetime, datetime.date)):
|
1440
|
+
formatted_value = cell.value.strftime('%Y-%m-%d')
|
1441
|
+
sample_values.append(formatted_value)
|
1442
|
+
elif isinstance(cell.value, datetime.time):
|
1443
|
+
formatted_value = cell.value.strftime('%H:%M:%S')
|
1444
|
+
sample_values.append(formatted_value)
|
1445
|
+
else:
|
1446
|
+
sample_values.append(cell.value)
|
1447
|
+
|
1448
|
+
# 对于整列,收集所有数值value
|
1449
|
+
for row in rows:
|
1450
|
+
cell = row[0]
|
1451
|
+
if isinstance(cell.value, (int, float)):
|
1452
|
+
numeric_values.append(cell.value)
|
1453
|
+
elif isinstance(cell.value, (datetime.datetime, datetime.date)):
|
1454
|
+
date_values.append(cell.value)
|
1455
|
+
elif isinstance(cell.value, datetime.time):
|
1456
|
+
time_values.append(cell.value)
|
1457
|
+
|
1458
|
+
# 从抽样值中提取最多5个出现最多的值,每个值最多显示20个字符
|
1459
|
+
value_counts = Counter(sample_values).most_common(5)
|
1460
|
+
truncated_values = []
|
1461
|
+
for value, _ in value_counts:
|
1462
|
+
if isinstance(value, str) and len(value) > 20:
|
1463
|
+
truncated_values.append(value[:17] + '...')
|
1464
|
+
else:
|
1465
|
+
truncated_values.append(value)
|
1466
|
+
|
1467
|
+
# 计算数值范围
|
1468
|
+
if numeric_values:
|
1469
|
+
value_range = (min(numeric_values), max(numeric_values))
|
1470
|
+
elif date_values:
|
1471
|
+
date_range = (min(date_values), max(date_values))
|
1472
|
+
value_range = (date_range[0].strftime('%Y-%m-%d'),
|
1473
|
+
date_range[1].strftime('%Y-%m-%d'))
|
1474
|
+
elif time_values:
|
1475
|
+
time_range = (min(time_values), max(time_values))
|
1476
|
+
value_range = (time_range[0].strftime('%H:%M:%S'),
|
1477
|
+
time_range[1].strftime('%H:%M:%S'))
|
1478
|
+
else:
|
1479
|
+
value_range = None
|
1480
|
+
|
1481
|
+
summary = {
|
1482
|
+
"number_formats": sorted(Counter(number_formats).keys(), key=number_formats.count, reverse=True),
|
1483
|
+
"sample_values": truncated_values,
|
1484
|
+
"numeric_range": value_range
|
1485
|
+
}
|
1486
|
+
|
1487
|
+
return summary
|
1488
|
+
|
1489
|
+
|
1490
|
+
def extract_field_summaries(ws, header_range, data_range):
|
1491
|
+
""" 再次优化为每个字段生成摘要信息的函数 """
|
1492
|
+
header_details = parse_range_address(header_range)
|
1493
|
+
data_details = parse_range_address(data_range)
|
1494
|
+
|
1495
|
+
field_summaries = {}
|
1496
|
+
for col in ws.iter_cols(min_col=header_details['left'], max_col=header_details['right']):
|
1497
|
+
header_cell = col[header_details['bottom'] - header_details['top']]
|
1498
|
+
if header_cell.value:
|
1499
|
+
field_summaries[header_cell.value] = determine_field_type_and_summary(
|
1500
|
+
ws, header_cell.column, header_details['bottom'] + 1, data_details['bottom']
|
1501
|
+
)
|
1502
|
+
|
1503
|
+
return field_summaries
|
1504
|
+
|
1505
|
+
|
1506
|
+
def extract_workbook_summary(file_path):
|
1507
|
+
""" 更新后的函数:提取整个Excel工作簿的摘要信息 """
|
1508
|
+
|
1509
|
+
wb = openpyxl.load_workbook(file_path)
|
1510
|
+
res = wb.extract_summary()
|
1511
|
+
res['fileName'] = Path(file_path).name
|
1512
|
+
return res
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# @Author : 陈坤泽
|
4
4
|
# @Email : 877362867@qq.com
|
5
5
|
# @Date : 2023/07/13 14:26
|
6
|
-
|
6
|
+
|
7
7
|
|
8
8
|
from pyxllib.prog.pupil import check_install_package
|
9
9
|
from joblib import Parallel, delayed
|
@@ -15,8 +15,10 @@ import json
|
|
15
15
|
import re
|
16
16
|
import html
|
17
17
|
import random
|
18
|
+
import copy
|
18
19
|
from urllib.parse import unquote
|
19
20
|
from collections import OrderedDict
|
21
|
+
from collections import Counter
|
20
22
|
|
21
23
|
import pandas as pd
|
22
24
|
from transformers import GPT2TokenizerFast
|
@@ -232,7 +234,7 @@ class GptChatJsonl(JsonlDataFile):
|
|
232
234
|
texts[i]['content'] = text['content'].strip()
|
233
235
|
|
234
236
|
# 3 添加会话conversation
|
235
|
-
item = {'id': record_id or self.start_id,
|
237
|
+
item = {'id': str(record_id or self.start_id), # 要转成字符串类型,不然容易出问题
|
236
238
|
'text': texts,
|
237
239
|
'first_text_length': len(texts[0]['content'])}
|
238
240
|
if extra:
|
@@ -623,7 +625,7 @@ class GptChatDir:
|
|
623
625
|
if not dir_path.is_dir():
|
624
626
|
dir_path.mkdir(parents=True, exist_ok=True)
|
625
627
|
|
626
|
-
def
|
628
|
+
def summary_records(self):
|
627
629
|
""" 一些统计信息 """
|
628
630
|
# 1 chat信息
|
629
631
|
gcd1 = self.chatted_dir or self.chat_dir
|
@@ -657,6 +659,22 @@ class GptChatDir:
|
|
657
659
|
# gtj = GptTrainJsonl(self.train_file)
|
658
660
|
# gtj.analyze_text_length()
|
659
661
|
|
662
|
+
def summary_downloads(self):
|
663
|
+
""" 统计下载的文件情况 """
|
664
|
+
print('【每个目录文件数量】')
|
665
|
+
files_each_dir = []
|
666
|
+
for d in self.download_files_dir.glob_dirs():
|
667
|
+
files_each_dir.append(len(list(d.rglob_files())))
|
668
|
+
print(ValuesStat(files_each_dir).summary())
|
669
|
+
print(Counter(files_each_dir))
|
670
|
+
|
671
|
+
print('【每个文件大小】')
|
672
|
+
filesizes_each_dir = []
|
673
|
+
for d in self.download_files_dir.glob_dirs():
|
674
|
+
for f in d.rglob_files():
|
675
|
+
filesizes_each_dir.append(f.size())
|
676
|
+
print(ValuesStat(filesizes_each_dir).summary())
|
677
|
+
|
660
678
|
def create_chat(self):
|
661
679
|
""" 生成chat数据,具体内容方式跟业务有关 """
|
662
680
|
raise NotImplementedError
|
@@ -779,11 +797,14 @@ class GptChatDir:
|
|
779
797
|
# pl = Parallel(n_jobs=n_jobs, backend='threading', timeout=5)
|
780
798
|
# pl(delayed(func)(x) for x in tqdm(gcj1.records))
|
781
799
|
|
782
|
-
def create_verify(self, n_jobs=1):
|
800
|
+
def create_verify(self, n_jobs=1, num_records=None):
|
801
|
+
"""
|
802
|
+
:param num_records: 每个文件最多提取多少条目,用于小批量运行调试
|
803
|
+
"""
|
783
804
|
print('【create_verify】得到更准确或精确后处理的验证集')
|
784
805
|
n = len(self.post_dir.files)
|
785
806
|
for i, post_file in enumerate(self.post_dir.files):
|
786
|
-
gcj = GptChatJsonl(post_file)
|
807
|
+
gcj = GptChatJsonl(post_file, num_records=num_records)
|
787
808
|
gcj2 = GptChatJsonl()
|
788
809
|
for x in tqdm(gcj.records, desc=f'第{i + 1}/{n}个文件'):
|
789
810
|
y = self.post2verify_record(x)
|
@@ -30,7 +30,7 @@ _dir = Path(__file__).parent
|
|
30
30
|
|
31
31
|
setup(
|
32
32
|
name='pyxllib', # pip 安装时用的名字
|
33
|
-
version='0.3.
|
33
|
+
version='0.3.62', # 当前版本,每次更新上传到pypi都需要修改; 第4位版本号一般是修紧急bug
|
34
34
|
author='code4101',
|
35
35
|
author_email='877362867@qq.com',
|
36
36
|
url='https://github.com/XLPRUtils/pyxllib',
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|