deepdoctection 0.29__tar.gz → 0.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.29 → deepdoctection-0.31}/PKG-INFO +39 -61
- {deepdoctection-0.29 → deepdoctection-0.31}/README.md +3 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/__init__.py +6 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/analyzer/dd.py +13 -8
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/base.py +0 -19
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/custom.py +6 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/custom_serialize.py +20 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/parallel_map.py +22 -17
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/serialize.py +5 -4
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/stats.py +5 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/annotation.py +35 -14
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/box.py +9 -6
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/convert.py +3 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/image.py +66 -29
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/view.py +62 -24
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/adapter.py +4 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/base.py +87 -14
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/dataflow_builder.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/info.py +2 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/fintabnet.py +3 -3
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/layouttest.py +2 -7
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtabnet.py +3 -3
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/accmetric.py +7 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/base.py +5 -4
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/eval.py +9 -7
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/tedsmetric.py +9 -3
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/tp_eval_callback.py +8 -7
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/base.py +39 -13
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/d2detect.py +164 -64
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/deskew.py +32 -7
- deepdoctection-0.31/deepdoctection/extern/doctrocr.py +532 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/fastlang.py +45 -7
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/hfdetr.py +90 -33
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/hflayoutlm.py +109 -22
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/model.py +30 -11
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pdftext.py +2 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pt/ptutils.py +3 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tessocr.py +134 -22
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/texocr.py +4 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpcompat.py +4 -4
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tpdetect.py +50 -23
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/d2struct.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/hfstruct.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/laylmstruct.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/maputils.py +19 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/prodigystruct.py +15 -13
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/pubstruct.py +10 -10
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/tpstruct.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/anngen.py +35 -8
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/base.py +53 -19
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/cell.py +29 -8
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/common.py +12 -4
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/doctectionpipe.py +4 -3
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/language.py +3 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/layout.py +3 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/lm.py +2 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/order.py +67 -39
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/refine.py +18 -10
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/segment.py +34 -20
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/text.py +14 -8
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/transform.py +16 -8
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/d2_frcnn_train.py +17 -14
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/hf_detr_train.py +13 -9
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/hf_layoutlm_train.py +31 -19
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/__init__.py +3 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/concurrency.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/context.py +5 -5
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/develop.py +2 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/env_info.py +64 -27
- deepdoctection-0.31/deepdoctection/utils/error.py +84 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/file_utils.py +28 -17
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/fs.py +16 -14
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/logger.py +43 -19
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/pdf_utils.py +14 -7
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/settings.py +5 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/transform.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/utils.py +0 -6
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/viz.py +83 -14
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/PKG-INFO +39 -61
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/SOURCES.txt +1 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/requires.txt +32 -60
- {deepdoctection-0.29 → deepdoctection-0.31}/setup.py +24 -22
- deepdoctection-0.31/tests/analyzer/test_dd.py +202 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/conftest.py +0 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/data.py +88 -48
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_image.py +50 -4
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/conftest.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_funsd.py +2 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/test_info.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/conftest.py +10 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/data.py +2 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_deskew.py +13 -3
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_doctrocr.py +46 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_hfdetr.py +2 -2
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_tessocr.py +62 -3
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/data.py +6 -6
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_prodigystruct.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_anngen.py +6 -6
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_cell.py +21 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_layout.py +1 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_segment.py +1 -1
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_text.py +4 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_transform.py +2 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests_d2/test_d2detect.py +1 -1
- deepdoctection-0.29/deepdoctection/extern/doctrocr.py +0 -293
- deepdoctection-0.29/tests/analyzer/test_dd.py +0 -200
- {deepdoctection-0.29 → deepdoctection-0.31}/LICENSE +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/detection_types.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/setup.cfg +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/analyzer/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/conftest.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_common.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_custom.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_custom_serialize.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_parallel_map.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/dataflow/test_stats.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/conftest.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_annotation.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_box.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_convert.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datapoint/test_view.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_doclaynet.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_fintabnet.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_iiitar13k.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_layouttest.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_publaynet.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_pubtables1m.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_pubtabnet.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/instances/test_rvlcdip.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/test_adapter.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/datasets/test_registry.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/conftest.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_accmetric.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_cocometric.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_eval.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_registry.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/eval/test_tedsmetric.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_fastlang.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_hflayoutlm.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_pdftext.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_texocr.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/extern/test_tpdetect.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/conftest.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_cats.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_cocostruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_d2struct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_hfstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_iiitar13k.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_laylmstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_misc.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_pubstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_tpstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_utils.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/mapper/test_xfundstruct.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_common.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_language.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_lm.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_order.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_refine.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/pipe/test_registry.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/test_utils.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/conftest.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/test_d2_frcnn_train.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests/train/test_tp_frcnn_train.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests_d2/__init__.py +0 -0
- {deepdoctection-0.29 → deepdoctection-0.31}/tests_d2/conftest.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -10,42 +10,45 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
10
10
|
Classifier: Natural Language :: English
|
|
11
11
|
Classifier: Operating System :: POSIX :: Linux
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
16
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
17
|
Requires-Python: >=3.8
|
|
15
18
|
Description-Content-Type: text/markdown
|
|
16
19
|
License-File: LICENSE
|
|
17
|
-
Requires-Dist: catalogue==2.0.
|
|
20
|
+
Requires-Dist: catalogue==2.0.10
|
|
18
21
|
Requires-Dist: huggingface_hub>=0.12.0
|
|
19
|
-
Requires-Dist: importlib-metadata>=
|
|
20
|
-
Requires-Dist: jsonlines==3.
|
|
22
|
+
Requires-Dist: importlib-metadata>=5.0.0
|
|
23
|
+
Requires-Dist: jsonlines==3.1.0
|
|
21
24
|
Requires-Dist: mock==4.0.3
|
|
22
25
|
Requires-Dist: networkx>=2.7.1
|
|
23
26
|
Requires-Dist: numpy>=1.21
|
|
24
27
|
Requires-Dist: packaging>=20.0
|
|
25
28
|
Requires-Dist: Pillow>=10.0.0
|
|
26
29
|
Requires-Dist: pypdf>=3.16.0
|
|
27
|
-
Requires-Dist: pyyaml
|
|
30
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
28
31
|
Requires-Dist: pyzmq>=16
|
|
29
32
|
Requires-Dist: termcolor>=1.1
|
|
30
33
|
Requires-Dist: tabulate>=0.7.7
|
|
31
34
|
Requires-Dist: tqdm==4.64.0
|
|
32
35
|
Provides-Extra: tf
|
|
33
|
-
Requires-Dist: catalogue==2.0.
|
|
36
|
+
Requires-Dist: catalogue==2.0.10; extra == "tf"
|
|
34
37
|
Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
|
|
35
|
-
Requires-Dist: importlib-metadata>=
|
|
36
|
-
Requires-Dist: jsonlines==3.
|
|
38
|
+
Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
|
|
39
|
+
Requires-Dist: jsonlines==3.1.0; extra == "tf"
|
|
37
40
|
Requires-Dist: mock==4.0.3; extra == "tf"
|
|
38
41
|
Requires-Dist: networkx>=2.7.1; extra == "tf"
|
|
39
42
|
Requires-Dist: numpy>=1.21; extra == "tf"
|
|
40
43
|
Requires-Dist: packaging>=20.0; extra == "tf"
|
|
41
44
|
Requires-Dist: Pillow>=10.0.0; extra == "tf"
|
|
42
45
|
Requires-Dist: pypdf>=3.16.0; extra == "tf"
|
|
43
|
-
Requires-Dist: pyyaml
|
|
46
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "tf"
|
|
44
47
|
Requires-Dist: pyzmq>=16; extra == "tf"
|
|
45
48
|
Requires-Dist: termcolor>=1.1; extra == "tf"
|
|
46
49
|
Requires-Dist: tabulate>=0.7.7; extra == "tf"
|
|
47
50
|
Requires-Dist: tqdm==4.64.0; extra == "tf"
|
|
48
|
-
Requires-Dist: tensorpack; extra == "tf"
|
|
51
|
+
Requires-Dist: tensorpack==0.11; extra == "tf"
|
|
49
52
|
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
50
53
|
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
51
54
|
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
@@ -53,47 +56,47 @@ Requires-Dist: python-doctr==0.7.0; extra == "tf"
|
|
|
53
56
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
54
57
|
Requires-Dist: boto3; extra == "tf"
|
|
55
58
|
Requires-Dist: pdfplumber>=0.7.1; extra == "tf"
|
|
56
|
-
Requires-Dist: fasttext; extra == "tf"
|
|
57
|
-
Requires-Dist: jdeskew; extra == "tf"
|
|
59
|
+
Requires-Dist: fasttext==0.9.2; extra == "tf"
|
|
60
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "tf"
|
|
58
61
|
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
59
62
|
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
60
63
|
Requires-Dist: lxml>=4.9.1; extra == "tf"
|
|
61
64
|
Provides-Extra: pt
|
|
62
|
-
Requires-Dist: catalogue==2.0.
|
|
65
|
+
Requires-Dist: catalogue==2.0.10; extra == "pt"
|
|
63
66
|
Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
|
|
64
|
-
Requires-Dist: importlib-metadata>=
|
|
65
|
-
Requires-Dist: jsonlines==3.
|
|
67
|
+
Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
|
|
68
|
+
Requires-Dist: jsonlines==3.1.0; extra == "pt"
|
|
66
69
|
Requires-Dist: mock==4.0.3; extra == "pt"
|
|
67
70
|
Requires-Dist: networkx>=2.7.1; extra == "pt"
|
|
68
71
|
Requires-Dist: numpy>=1.21; extra == "pt"
|
|
69
72
|
Requires-Dist: packaging>=20.0; extra == "pt"
|
|
70
73
|
Requires-Dist: Pillow>=10.0.0; extra == "pt"
|
|
71
74
|
Requires-Dist: pypdf>=3.16.0; extra == "pt"
|
|
72
|
-
Requires-Dist: pyyaml
|
|
75
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "pt"
|
|
73
76
|
Requires-Dist: pyzmq>=16; extra == "pt"
|
|
74
77
|
Requires-Dist: termcolor>=1.1; extra == "pt"
|
|
75
78
|
Requires-Dist: tabulate>=0.7.7; extra == "pt"
|
|
76
79
|
Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
77
|
-
Requires-Dist: timm; extra == "pt"
|
|
80
|
+
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
78
81
|
Requires-Dist: transformers>=4.36.0; extra == "pt"
|
|
79
|
-
Requires-Dist: accelerate; extra == "pt"
|
|
82
|
+
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
80
83
|
Requires-Dist: python-doctr==0.7.0; extra == "pt"
|
|
81
84
|
Requires-Dist: boto3; extra == "pt"
|
|
82
85
|
Requires-Dist: pdfplumber>=0.7.1; extra == "pt"
|
|
83
|
-
Requires-Dist: fasttext; extra == "pt"
|
|
84
|
-
Requires-Dist: jdeskew; extra == "pt"
|
|
86
|
+
Requires-Dist: fasttext==0.9.2; extra == "pt"
|
|
87
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "pt"
|
|
85
88
|
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
86
89
|
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
87
90
|
Requires-Dist: lxml>=4.9.1; extra == "pt"
|
|
88
91
|
Provides-Extra: docs
|
|
89
|
-
Requires-Dist: tensorpack; extra == "docs"
|
|
92
|
+
Requires-Dist: tensorpack==0.11; extra == "docs"
|
|
90
93
|
Requires-Dist: boto3; extra == "docs"
|
|
91
94
|
Requires-Dist: transformers>=4.36.0; extra == "docs"
|
|
92
|
-
Requires-Dist: accelerate; extra == "docs"
|
|
95
|
+
Requires-Dist: accelerate>=0.29.1; extra == "docs"
|
|
93
96
|
Requires-Dist: pdfplumber>=0.7.1; extra == "docs"
|
|
94
97
|
Requires-Dist: lxml>=4.9.1; extra == "docs"
|
|
95
|
-
Requires-Dist: lxml-stubs; extra == "docs"
|
|
96
|
-
Requires-Dist: jdeskew; extra == "docs"
|
|
98
|
+
Requires-Dist: lxml-stubs>=0.5.1; extra == "docs"
|
|
99
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "docs"
|
|
97
100
|
Requires-Dist: jinja2==3.0.3; extra == "docs"
|
|
98
101
|
Requires-Dist: mkdocs-material; extra == "docs"
|
|
99
102
|
Requires-Dist: mkdocstrings-python; extra == "docs"
|
|
@@ -102,47 +105,20 @@ Provides-Extra: dev
|
|
|
102
105
|
Requires-Dist: python-dotenv==1.0.0; extra == "dev"
|
|
103
106
|
Requires-Dist: click; extra == "dev"
|
|
104
107
|
Requires-Dist: black==23.7.0; extra == "dev"
|
|
105
|
-
Requires-Dist: isort; extra == "dev"
|
|
108
|
+
Requires-Dist: isort==5.13.2; extra == "dev"
|
|
106
109
|
Requires-Dist: pylint==2.17.4; extra == "dev"
|
|
107
110
|
Requires-Dist: mypy==1.4.1; extra == "dev"
|
|
108
111
|
Requires-Dist: wandb; extra == "dev"
|
|
109
|
-
Requires-Dist: types-PyYAML; extra == "dev"
|
|
110
|
-
Requires-Dist: types-termcolor
|
|
111
|
-
Requires-Dist: types-tabulate; extra == "dev"
|
|
112
|
-
Requires-Dist: types-tqdm; extra == "dev"
|
|
113
|
-
Requires-Dist: lxml-stubs; extra == "dev"
|
|
114
|
-
Requires-Dist: types-Pillow; extra == "dev"
|
|
115
|
-
Requires-Dist: types-urllib3; extra == "dev"
|
|
112
|
+
Requires-Dist: types-PyYAML>=6.0.12.12; extra == "dev"
|
|
113
|
+
Requires-Dist: types-termcolor>=1.1.3; extra == "dev"
|
|
114
|
+
Requires-Dist: types-tabulate>=0.9.0.3; extra == "dev"
|
|
115
|
+
Requires-Dist: types-tqdm>=4.66.0.5; extra == "dev"
|
|
116
|
+
Requires-Dist: lxml-stubs>=0.5.1; extra == "dev"
|
|
117
|
+
Requires-Dist: types-Pillow>=10.2.0.20240406; extra == "dev"
|
|
118
|
+
Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
|
|
116
119
|
Provides-Extra: test
|
|
117
|
-
Requires-Dist: pytest; extra == "test"
|
|
120
|
+
Requires-Dist: pytest==8.0.2; extra == "test"
|
|
118
121
|
Requires-Dist: pytest-cov; extra == "test"
|
|
119
|
-
Provides-Extra: hf
|
|
120
|
-
Requires-Dist: catalogue==2.0.7; extra == "hf"
|
|
121
|
-
Requires-Dist: huggingface_hub>=0.12.0; extra == "hf"
|
|
122
|
-
Requires-Dist: importlib-metadata>=4.11.2; extra == "hf"
|
|
123
|
-
Requires-Dist: jsonlines==3.0.0; extra == "hf"
|
|
124
|
-
Requires-Dist: mock==4.0.3; extra == "hf"
|
|
125
|
-
Requires-Dist: networkx>=2.7.1; extra == "hf"
|
|
126
|
-
Requires-Dist: numpy>=1.21; extra == "hf"
|
|
127
|
-
Requires-Dist: packaging>=20.0; extra == "hf"
|
|
128
|
-
Requires-Dist: Pillow>=10.0.0; extra == "hf"
|
|
129
|
-
Requires-Dist: pypdf>=3.16.0; extra == "hf"
|
|
130
|
-
Requires-Dist: pyyaml==6.0; extra == "hf"
|
|
131
|
-
Requires-Dist: pyzmq>=16; extra == "hf"
|
|
132
|
-
Requires-Dist: termcolor>=1.1; extra == "hf"
|
|
133
|
-
Requires-Dist: tabulate>=0.7.7; extra == "hf"
|
|
134
|
-
Requires-Dist: tqdm==4.64.0; extra == "hf"
|
|
135
|
-
Requires-Dist: timm; extra == "hf"
|
|
136
|
-
Requires-Dist: transformers>=4.36.0; extra == "hf"
|
|
137
|
-
Requires-Dist: accelerate; extra == "hf"
|
|
138
|
-
Requires-Dist: python-doctr==0.7.0; extra == "hf"
|
|
139
|
-
Requires-Dist: boto3; extra == "hf"
|
|
140
|
-
Requires-Dist: pdfplumber>=0.7.1; extra == "hf"
|
|
141
|
-
Requires-Dist: fasttext; extra == "hf"
|
|
142
|
-
Requires-Dist: jdeskew; extra == "hf"
|
|
143
|
-
Requires-Dist: apted==1.0.3; extra == "hf"
|
|
144
|
-
Requires-Dist: distance==0.1.3; extra == "hf"
|
|
145
|
-
Requires-Dist: lxml>=4.9.1; extra == "hf"
|
|
146
122
|
|
|
147
123
|
|
|
148
124
|
<p align="center">
|
|
@@ -188,7 +164,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
|
|
|
188
164
|
Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
|
|
189
165
|
[docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
|
|
190
166
|
- Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
|
|
191
|
-
not required anymore for basic inference.
|
|
167
|
+
not required anymore for basic inference.
|
|
168
|
+
- [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
|
|
169
|
+
(not contained in the built-in Analyzer).
|
|
192
170
|
|
|
193
171
|
**deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
|
|
194
172
|
post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
|
|
@@ -42,7 +42,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
|
|
|
42
42
|
Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
|
|
43
43
|
[docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
|
|
44
44
|
- Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
|
|
45
|
-
not required anymore for basic inference.
|
|
45
|
+
not required anymore for basic inference.
|
|
46
|
+
- [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
|
|
47
|
+
(not contained in the built-in Analyzer).
|
|
46
48
|
|
|
47
49
|
**deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
|
|
48
50
|
post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
|
|
@@ -27,7 +27,7 @@ from .utils.logger import logger
|
|
|
27
27
|
|
|
28
28
|
# pylint: enable=wrong-import-position
|
|
29
29
|
|
|
30
|
-
__version__ = 0.
|
|
30
|
+
__version__ = 0.31
|
|
31
31
|
|
|
32
32
|
_IMPORT_STRUCTURE = {
|
|
33
33
|
"analyzer": [
|
|
@@ -179,6 +179,7 @@ _IMPORT_STRUCTURE = {
|
|
|
179
179
|
"Jdeskewer",
|
|
180
180
|
"DoctrTextlineDetector",
|
|
181
181
|
"DoctrTextRecognizer",
|
|
182
|
+
"DocTrRotationTransformer",
|
|
182
183
|
"FasttextLangDetector",
|
|
183
184
|
"HFDetrDerivedDetector",
|
|
184
185
|
"HFLayoutLmTokenClassifierBase",
|
|
@@ -194,6 +195,7 @@ _IMPORT_STRUCTURE = {
|
|
|
194
195
|
"ModelDownloadManager",
|
|
195
196
|
"PdfPlumberTextDetector",
|
|
196
197
|
"TesseractOcrDetector",
|
|
198
|
+
"TesseractRotationTransformer",
|
|
197
199
|
"TextractOcrDetector",
|
|
198
200
|
"TPFrcnnDetector",
|
|
199
201
|
],
|
|
@@ -279,7 +281,7 @@ _IMPORT_STRUCTURE = {
|
|
|
279
281
|
"PubtablesSegmentationService",
|
|
280
282
|
"SegmentationResult",
|
|
281
283
|
"TextExtractionService",
|
|
282
|
-
"
|
|
284
|
+
"SimpleTransformService",
|
|
283
285
|
],
|
|
284
286
|
"train": [
|
|
285
287
|
"D2Trainer",
|
|
@@ -343,6 +345,8 @@ _IMPORT_STRUCTURE = {
|
|
|
343
345
|
"get_opencv_requirement",
|
|
344
346
|
"pillow_available",
|
|
345
347
|
"get_pillow_requirement",
|
|
348
|
+
"spacy_available",
|
|
349
|
+
"get_spacy_requirement",
|
|
346
350
|
"load_image_from_file",
|
|
347
351
|
"load_bytes_from_pdf_file",
|
|
348
352
|
"get_load_image_func",
|
|
@@ -54,7 +54,7 @@ from ..utils.file_utils import (
|
|
|
54
54
|
tf_available,
|
|
55
55
|
)
|
|
56
56
|
from ..utils.fs import get_configs_dir_path, get_package_path, mkdir_p
|
|
57
|
-
from ..utils.logger import logger
|
|
57
|
+
from ..utils.logger import LoggingRecord, logger
|
|
58
58
|
from ..utils.metacfg import AttrDict, set_config_by_yaml
|
|
59
59
|
from ..utils.settings import CellType, LayoutType
|
|
60
60
|
from ..utils.transform import PadTransform
|
|
@@ -113,11 +113,12 @@ def config_sanity_checks(cfg: AttrDict) -> None:
|
|
|
113
113
|
"""Some config sanity checks"""
|
|
114
114
|
if cfg.USE_PDF_MINER and cfg.USE_OCR and cfg.OCR.USE_DOCTR:
|
|
115
115
|
raise ValueError("Configuration USE_PDF_MINER= True and USE_OCR=True and USE_DOCTR=True is not allowed")
|
|
116
|
-
if cfg.
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
116
|
+
if cfg.USE_OCR:
|
|
117
|
+
if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
"Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True "
|
|
120
|
+
"and set the other two to False. Only one OCR system can be activated."
|
|
121
|
+
)
|
|
121
122
|
|
|
122
123
|
|
|
123
124
|
def build_detector(
|
|
@@ -231,9 +232,13 @@ def build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer,
|
|
|
231
232
|
weights = cfg.OCR.WEIGHTS.DOCTR_RECOGNITION.TF if cfg.LIB == "TF" else cfg.OCR.WEIGHTS.DOCTR_RECOGNITION.PT
|
|
232
233
|
weights_path = ModelDownloadManager.maybe_download_weights_and_configs(weights)
|
|
233
234
|
profile = ModelCatalog.get_profile(weights)
|
|
235
|
+
# get_full_path_configs will complete the path even if the model is not registered
|
|
236
|
+
config_path = ModelCatalog.get_full_path_configs(weights) if profile.config is not None else None
|
|
234
237
|
if profile.architecture is None:
|
|
235
238
|
raise ValueError("model profile.architecture must be specified")
|
|
236
|
-
return DoctrTextRecognizer(
|
|
239
|
+
return DoctrTextRecognizer(
|
|
240
|
+
profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB, path_config_json=config_path
|
|
241
|
+
)
|
|
237
242
|
if cfg.OCR.USE_TEXTRACT:
|
|
238
243
|
credentials_kwargs = {
|
|
239
244
|
"aws_access_key_id": environ.get("ACCESS_KEY"),
|
|
@@ -445,7 +450,7 @@ def get_dd_analyzer(
|
|
|
445
450
|
cfg.update_args(config_overwrite)
|
|
446
451
|
|
|
447
452
|
config_sanity_checks(cfg)
|
|
448
|
-
logger.info("Config: \n
|
|
453
|
+
logger.info(LoggingRecord(f"Config: \n {str(cfg)}", cfg.to_dict())) # type: ignore
|
|
449
454
|
|
|
450
455
|
# will silent all TP logging while building the tower
|
|
451
456
|
if tensorpack_available():
|
|
@@ -17,25 +17,6 @@ from typing import Any, Iterator, no_type_check
|
|
|
17
17
|
from ..utils.utils import get_rng
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class DataFlowTerminated(BaseException):
|
|
21
|
-
"""
|
|
22
|
-
An exception indicating that the DataFlow is unable to produce any more
|
|
23
|
-
data, i.e. something wrong happened so that calling `__iter__`
|
|
24
|
-
cannot give a valid iterator anymore.
|
|
25
|
-
In most DataFlow this will never be raised.
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class DataFlowResetStateNotCalled(BaseException):
|
|
30
|
-
"""
|
|
31
|
-
An exception indicating that `reset_state()` has not been called before starting
|
|
32
|
-
iteration.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
def __init__(self) -> None:
|
|
36
|
-
super().__init__("Iterating a dataflow requires .reset_state() to be called first")
|
|
37
|
-
|
|
38
|
-
|
|
39
20
|
class DataFlowReentrantGuard:
|
|
40
21
|
"""
|
|
41
22
|
A tool to enforce non-reentrancy.
|
|
@@ -25,10 +25,11 @@ from typing import Any, Callable, Iterable, Iterator, List, Optional
|
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
|
|
28
|
-
from ..utils.
|
|
28
|
+
from ..utils.error import DataFlowResetStateNotCalledError
|
|
29
|
+
from ..utils.logger import LoggingRecord, logger
|
|
29
30
|
from ..utils.tqdm import get_tqdm
|
|
30
31
|
from ..utils.utils import get_rng
|
|
31
|
-
from .base import DataFlow, DataFlowReentrantGuard,
|
|
32
|
+
from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
|
|
32
33
|
from .serialize import DataFromIterable, DataFromList
|
|
33
34
|
|
|
34
35
|
__all__ = ["CacheData", "CustomDataFromList", "CustomDataFromIterable"]
|
|
@@ -65,7 +66,7 @@ class CacheData(ProxyDataFlow):
|
|
|
65
66
|
|
|
66
67
|
def __iter__(self) -> Iterator[Any]:
|
|
67
68
|
if self._guard is None:
|
|
68
|
-
raise
|
|
69
|
+
raise DataFlowResetStateNotCalledError()
|
|
69
70
|
|
|
70
71
|
with self._guard:
|
|
71
72
|
if self.buffer:
|
|
@@ -139,10 +140,10 @@ class CustomDataFromList(DataFromList):
|
|
|
139
140
|
|
|
140
141
|
def __iter__(self) -> Iterator[Any]:
|
|
141
142
|
if self.rng is None:
|
|
142
|
-
raise
|
|
143
|
+
raise DataFlowResetStateNotCalledError()
|
|
143
144
|
if self.rebalance_func is not None:
|
|
144
145
|
lst_tmp = self.rebalance_func(self.lst)
|
|
145
|
-
logger.info("subset size after re-balancing:
|
|
146
|
+
logger.info(LoggingRecord(f"CustomDataFromList: subset size after re-balancing: {len(lst_tmp)}"))
|
|
146
147
|
else:
|
|
147
148
|
lst_tmp = self.lst
|
|
148
149
|
|
|
@@ -23,16 +23,20 @@ import itertools
|
|
|
23
23
|
import json
|
|
24
24
|
import os
|
|
25
25
|
from collections import defaultdict
|
|
26
|
+
from pathlib import Path
|
|
26
27
|
from typing import DefaultDict, Dict, List, Optional, Sequence, Union
|
|
27
28
|
|
|
28
29
|
from jsonlines import Reader, Writer
|
|
30
|
+
from tabulate import tabulate
|
|
31
|
+
from termcolor import colored
|
|
29
32
|
|
|
30
33
|
from ..utils.context import timed_operation
|
|
31
34
|
from ..utils.detection_types import JsonDict, Pathlike
|
|
35
|
+
from ..utils.error import FileExtensionError
|
|
32
36
|
from ..utils.identifier import get_uuid_from_str
|
|
33
37
|
from ..utils.pdf_utils import PDFStreamer
|
|
34
38
|
from ..utils.tqdm import get_tqdm
|
|
35
|
-
from ..utils.utils import
|
|
39
|
+
from ..utils.utils import is_file_extension
|
|
36
40
|
from .base import DataFlow
|
|
37
41
|
from .common import FlattenData, JoinData, MapData
|
|
38
42
|
from .custom import CacheData, CustomDataFromIterable, CustomDataFromList
|
|
@@ -186,6 +190,11 @@ class SerializerFiles:
|
|
|
186
190
|
df2: DataFlow
|
|
187
191
|
df3: DataFlow
|
|
188
192
|
|
|
193
|
+
if isinstance(path, str):
|
|
194
|
+
path = Path(path)
|
|
195
|
+
if not path.exists():
|
|
196
|
+
raise NotADirectoryError(f"The path {path} to the directory or file does not exist")
|
|
197
|
+
|
|
189
198
|
if shuffle:
|
|
190
199
|
sort = False
|
|
191
200
|
it1 = os.walk(path, topdown=False)
|
|
@@ -217,7 +226,7 @@ class SerializerFiles:
|
|
|
217
226
|
"""
|
|
218
227
|
Not implemented
|
|
219
228
|
"""
|
|
220
|
-
raise NotImplementedError
|
|
229
|
+
raise NotImplementedError()
|
|
221
230
|
|
|
222
231
|
|
|
223
232
|
class CocoParser:
|
|
@@ -277,8 +286,14 @@ class CocoParser:
|
|
|
277
286
|
"""
|
|
278
287
|
Print information about the annotation file.
|
|
279
288
|
"""
|
|
289
|
+
rows = []
|
|
280
290
|
for key, value in self.dataset["info"].items():
|
|
281
|
-
|
|
291
|
+
row = [key, value]
|
|
292
|
+
rows.append(row)
|
|
293
|
+
|
|
294
|
+
header = ["key", "value"]
|
|
295
|
+
table = tabulate(rows, headers=header, tablefmt="fancy_grid", stralign="left", numalign="left")
|
|
296
|
+
print(colored(table, "cyan"))
|
|
282
297
|
|
|
283
298
|
def get_ann_ids(
|
|
284
299
|
self,
|
|
@@ -493,7 +508,7 @@ class SerializerCoco:
|
|
|
493
508
|
"""
|
|
494
509
|
Not implemented
|
|
495
510
|
"""
|
|
496
|
-
raise NotImplementedError
|
|
511
|
+
raise NotImplementedError()
|
|
497
512
|
|
|
498
513
|
|
|
499
514
|
class SerializerPdfDoc:
|
|
@@ -541,7 +556,7 @@ class SerializerPdfDoc:
|
|
|
541
556
|
"""
|
|
542
557
|
Not implemented
|
|
543
558
|
"""
|
|
544
|
-
raise NotImplementedError
|
|
559
|
+
raise NotImplementedError()
|
|
545
560
|
|
|
546
561
|
@staticmethod
|
|
547
562
|
def split(path: Pathlike, path_target: Optional[Pathlike] = None, max_datapoint: Optional[int] = None) -> None:
|
|
@@ -28,8 +28,9 @@ from typing import Any, Callable, Iterator, List, no_type_check
|
|
|
28
28
|
import zmq
|
|
29
29
|
|
|
30
30
|
from ..utils.concurrency import StoppableThread, enable_death_signal, start_proc_mask_signal
|
|
31
|
-
from ..utils.
|
|
32
|
-
from .
|
|
31
|
+
from ..utils.error import DataFlowTerminatedError
|
|
32
|
+
from ..utils.logger import LoggingRecord, logger
|
|
33
|
+
from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
|
|
33
34
|
from .common import RepeatedData
|
|
34
35
|
from .serialize import PickleSerializer
|
|
35
36
|
|
|
@@ -48,15 +49,15 @@ def _zmq_catch_error(name):
|
|
|
48
49
|
try:
|
|
49
50
|
yield
|
|
50
51
|
except zmq.ContextTerminated as exc:
|
|
51
|
-
logger.info("[
|
|
52
|
-
raise
|
|
52
|
+
logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Context terminated."))
|
|
53
|
+
raise DataFlowTerminatedError() from exc
|
|
53
54
|
except zmq.ZMQError as exc:
|
|
54
55
|
if exc.errno == errno.ENOTSOCK: # socket closed
|
|
55
|
-
logger.info("[
|
|
56
|
-
raise
|
|
57
|
-
raise ValueError from exc
|
|
56
|
+
logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Socket closed."))
|
|
57
|
+
raise DataFlowTerminatedError() from exc
|
|
58
|
+
raise ValueError() from exc
|
|
58
59
|
except Exception as exc:
|
|
59
|
-
raise ValueError from exc
|
|
60
|
+
raise ValueError() from exc
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
@no_type_check
|
|
@@ -78,8 +79,8 @@ def _get_pipe_name(name):
|
|
|
78
79
|
class _ParallelMapData(ProxyDataFlow, ABC):
|
|
79
80
|
def __init__(self, df: DataFlow, buffer_size: int, strict: bool = False) -> None:
|
|
80
81
|
super().__init__(df)
|
|
81
|
-
if
|
|
82
|
-
raise ValueError("buffer_size must be a positive number")
|
|
82
|
+
if buffer_size <= 0:
|
|
83
|
+
raise ValueError(f"buffer_size must be a positive number, got {buffer_size}")
|
|
83
84
|
self._buffer_size = buffer_size
|
|
84
85
|
self._buffer_occupancy = 0 # actual #elements in buffer, only useful in strict mode
|
|
85
86
|
self._strict = strict
|
|
@@ -95,12 +96,12 @@ class _ParallelMapData(ProxyDataFlow, ABC):
|
|
|
95
96
|
@no_type_check
|
|
96
97
|
@abstractmethod
|
|
97
98
|
def _recv(self):
|
|
98
|
-
raise NotImplementedError
|
|
99
|
+
raise NotImplementedError()
|
|
99
100
|
|
|
100
101
|
@no_type_check
|
|
101
102
|
@abstractmethod
|
|
102
103
|
def _send(self, dp: Any):
|
|
103
|
-
raise NotImplementedError
|
|
104
|
+
raise NotImplementedError()
|
|
104
105
|
|
|
105
106
|
@no_type_check
|
|
106
107
|
def _recv_filter_none(self):
|
|
@@ -312,7 +313,8 @@ class _MultiProcessZMQDataFlow(DataFlow, ABC):
|
|
|
312
313
|
for x in self._procs:
|
|
313
314
|
x.terminate()
|
|
314
315
|
x.join(5)
|
|
315
|
-
logger.info("
|
|
316
|
+
logger.info(LoggingRecord(f"_MultiProcessZMQDataFlow [{type(self).__name__}] successfully cleaned-up."))
|
|
317
|
+
|
|
316
318
|
except Exception: # pylint: disable=W0703
|
|
317
319
|
pass
|
|
318
320
|
|
|
@@ -323,9 +325,12 @@ def _bind_guard(sock, name):
|
|
|
323
325
|
sock.bind(name)
|
|
324
326
|
except zmq.ZMQError:
|
|
325
327
|
logger.error(
|
|
326
|
-
|
|
327
|
-
|
|
328
|
+
LoggingRecord(
|
|
329
|
+
f"ZMQError in socket.bind('{name}'). Perhaps you're using pipes on a non-local file system. "
|
|
330
|
+
"See documentation of MultiProcessRunnerZMQ for more information."
|
|
331
|
+
)
|
|
328
332
|
)
|
|
333
|
+
|
|
329
334
|
raise
|
|
330
335
|
|
|
331
336
|
|
|
@@ -394,8 +399,8 @@ class MultiProcessMapData(_ParallelMapData, _MultiProcessZMQDataFlow):
|
|
|
394
399
|
|
|
395
400
|
_ParallelMapData.__init__(self, df, buffer_size, strict)
|
|
396
401
|
_MultiProcessZMQDataFlow.__init__(self)
|
|
397
|
-
if
|
|
398
|
-
raise ValueError("num_proc must be a positive number")
|
|
402
|
+
if num_proc <= 0:
|
|
403
|
+
raise ValueError(f"num_proc must be a positive number, got {num_proc}")
|
|
399
404
|
self.num_proc = num_proc
|
|
400
405
|
self.map_func = map_func
|
|
401
406
|
self._strict = strict
|
|
@@ -16,7 +16,8 @@ from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
|
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
19
|
-
from .
|
|
19
|
+
from ..utils.error import DataFlowResetStateNotCalledError
|
|
20
|
+
from .base import DataFlow, RNGDataFlow
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class DataFromList(RNGDataFlow):
|
|
@@ -44,7 +45,7 @@ class DataFromList(RNGDataFlow):
|
|
|
44
45
|
for k in idxs:
|
|
45
46
|
yield self.lst[k]
|
|
46
47
|
else:
|
|
47
|
-
raise
|
|
48
|
+
raise DataFlowResetStateNotCalledError()
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
class DataFromIterable(DataFlow):
|
|
@@ -63,7 +64,7 @@ class DataFromIterable(DataFlow):
|
|
|
63
64
|
|
|
64
65
|
def __len__(self) -> int:
|
|
65
66
|
if self._len is None:
|
|
66
|
-
raise NotImplementedError
|
|
67
|
+
raise NotImplementedError()
|
|
67
68
|
return self._len
|
|
68
69
|
|
|
69
70
|
def __iter__(self) -> Iterator[Any]:
|
|
@@ -107,7 +108,7 @@ class FakeData(RNGDataFlow):
|
|
|
107
108
|
|
|
108
109
|
def __iter__(self) -> Iterator[Any]:
|
|
109
110
|
if self.rng is None:
|
|
110
|
-
raise
|
|
111
|
+
raise DataFlowResetStateNotCalledError()
|
|
111
112
|
if self.random:
|
|
112
113
|
for _ in range(self._size):
|
|
113
114
|
val = []
|
|
@@ -23,7 +23,7 @@ from typing import Any, Optional, Tuple, Union
|
|
|
23
23
|
import numpy as np
|
|
24
24
|
import numpy.typing as npt
|
|
25
25
|
|
|
26
|
-
from ..utils.logger import logger
|
|
26
|
+
from ..utils.logger import LoggingRecord, logger
|
|
27
27
|
from ..utils.tqdm import get_tqdm
|
|
28
28
|
from .base import DataFlow, ProxyDataFlow
|
|
29
29
|
|
|
@@ -95,7 +95,7 @@ class MeanFromDataFlow(ProxyDataFlow):
|
|
|
95
95
|
self.df.reset_state()
|
|
96
96
|
itr = iter(self.df)
|
|
97
97
|
|
|
98
|
-
logger.info("Calculating mean")
|
|
98
|
+
logger.info(LoggingRecord("Calculating mean"))
|
|
99
99
|
|
|
100
100
|
len_df: Optional[int]
|
|
101
101
|
try:
|
|
@@ -139,7 +139,7 @@ class MeanFromDataFlow(ProxyDataFlow):
|
|
|
139
139
|
if n == self.max_datapoints:
|
|
140
140
|
break
|
|
141
141
|
|
|
142
|
-
logger.info("Mean from
|
|
142
|
+
logger.info(LoggingRecord(f"Mean from {n} datapoints along axis {self.axis}: {self.mean}"))
|
|
143
143
|
|
|
144
144
|
return self.mean
|
|
145
145
|
|
|
@@ -216,7 +216,7 @@ class StdFromDataFlow(ProxyDataFlow):
|
|
|
216
216
|
self.df.reset_state()
|
|
217
217
|
itr = iter(self.df)
|
|
218
218
|
|
|
219
|
-
logger.info("Calculating standard deviation")
|
|
219
|
+
logger.info(LoggingRecord("Calculating standard deviation"))
|
|
220
220
|
try:
|
|
221
221
|
len_df = len(self.df)
|
|
222
222
|
except NotImplementedError:
|
|
@@ -266,6 +266,6 @@ class StdFromDataFlow(ProxyDataFlow):
|
|
|
266
266
|
var = (ex2 - (ex * ex) / n) / (n - 1)
|
|
267
267
|
self.std = np.sqrt(var)
|
|
268
268
|
|
|
269
|
-
logger.info("Standard deviation from
|
|
269
|
+
logger.info(LoggingRecord(f"Standard deviation from {n} datapoints along axis {self.axis}: {self.std}"))
|
|
270
270
|
|
|
271
271
|
return self.std
|