deepdoctection 0.30__tar.gz → 0.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.30 → deepdoctection-0.31}/PKG-INFO +33 -58
- {deepdoctection-0.30 → deepdoctection-0.31}/README.md +3 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/__init__.py +4 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/analyzer/dd.py +6 -5
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/base.py +0 -19
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/custom.py +4 -3
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/custom_serialize.py +14 -5
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/parallel_map.py +12 -11
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/serialize.py +5 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/annotation.py +33 -12
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/box.py +1 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/convert.py +3 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/image.py +66 -29
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/view.py +57 -25
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/adapter.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/base.py +83 -10
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/dataflow_builder.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/info.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/layouttest.py +2 -7
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/accmetric.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/base.py +5 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/eval.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/tp_eval_callback.py +5 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/base.py +39 -13
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/d2detect.py +164 -64
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/deskew.py +32 -7
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/doctrocr.py +227 -39
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/fastlang.py +45 -7
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/hfdetr.py +90 -33
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/hflayoutlm.py +109 -22
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pdftext.py +2 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pt/ptutils.py +3 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tessocr.py +134 -22
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/texocr.py +2 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpcompat.py +4 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/preproc.py +2 -7
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tpdetect.py +50 -23
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/d2struct.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/hfstruct.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/laylmstruct.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/maputils.py +13 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/prodigystruct.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/pubstruct.py +10 -10
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/tpstruct.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/anngen.py +35 -8
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/base.py +53 -19
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/cell.py +29 -8
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/common.py +12 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/doctectionpipe.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/language.py +3 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/layout.py +3 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/lm.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/refine.py +18 -10
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/segment.py +21 -16
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/text.py +14 -8
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/transform.py +16 -9
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/d2_frcnn_train.py +15 -12
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/hf_detr_train.py +8 -6
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/hf_layoutlm_train.py +16 -11
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/__init__.py +3 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/concurrency.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/context.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/env_info.py +55 -22
- deepdoctection-0.31/deepdoctection/utils/error.py +84 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/file_utils.py +4 -15
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/fs.py +7 -7
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/pdf_utils.py +5 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/settings.py +5 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/transform.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/utils.py +0 -6
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/viz.py +44 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/PKG-INFO +33 -58
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/SOURCES.txt +1 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/requires.txt +29 -57
- {deepdoctection-0.30 → deepdoctection-0.31}/setup.py +20 -21
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/conftest.py +0 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/data.py +88 -48
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_image.py +50 -4
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_funsd.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/test_info.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/conftest.py +10 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/data.py +2 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_deskew.py +13 -3
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_doctrocr.py +46 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_hfdetr.py +2 -2
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_tessocr.py +62 -3
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/data.py +6 -6
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_anngen.py +6 -6
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_cell.py +21 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_layout.py +1 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_text.py +4 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_transform.py +2 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests_d2/test_d2detect.py +1 -1
- {deepdoctection-0.30 → deepdoctection-0.31}/LICENSE +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/detection_types.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/setup.cfg +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/analyzer/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/analyzer/test_dd.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/conftest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_common.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_custom.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_custom_serialize.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_parallel_map.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/dataflow/test_stats.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/conftest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_annotation.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_box.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_convert.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datapoint/test_view.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/conftest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_doclaynet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_fintabnet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_iiitar13k.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_layouttest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_publaynet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_pubtables1m.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_pubtabnet.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/instances/test_rvlcdip.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/test_adapter.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/datasets/test_registry.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/conftest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_accmetric.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_cocometric.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_eval.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_registry.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/eval/test_tedsmetric.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_fastlang.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_hflayoutlm.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_pdftext.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_texocr.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/extern/test_tpdetect.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/conftest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_cats.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_cocostruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_d2struct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_hfstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_iiitar13k.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_laylmstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_misc.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_prodigystruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_pubstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_tpstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_utils.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/mapper/test_xfundstruct.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_common.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_language.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_lm.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_order.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_refine.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_registry.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/pipe/test_segment.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/test_utils.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/conftest.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/test_d2_frcnn_train.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests/train/test_tp_frcnn_train.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests_d2/__init__.py +0 -0
- {deepdoctection-0.30 → deepdoctection-0.31}/tests_d2/conftest.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.31
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -17,9 +17,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
17
17
|
Requires-Python: >=3.8
|
|
18
18
|
Description-Content-Type: text/markdown
|
|
19
19
|
License-File: LICENSE
|
|
20
|
-
Requires-Dist: catalogue==2.0.
|
|
20
|
+
Requires-Dist: catalogue==2.0.10
|
|
21
21
|
Requires-Dist: huggingface_hub>=0.12.0
|
|
22
|
-
Requires-Dist: importlib-metadata>=
|
|
22
|
+
Requires-Dist: importlib-metadata>=5.0.0
|
|
23
23
|
Requires-Dist: jsonlines==3.1.0
|
|
24
24
|
Requires-Dist: mock==4.0.3
|
|
25
25
|
Requires-Dist: networkx>=2.7.1
|
|
@@ -27,15 +27,15 @@ Requires-Dist: numpy>=1.21
|
|
|
27
27
|
Requires-Dist: packaging>=20.0
|
|
28
28
|
Requires-Dist: Pillow>=10.0.0
|
|
29
29
|
Requires-Dist: pypdf>=3.16.0
|
|
30
|
-
Requires-Dist: pyyaml
|
|
30
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
31
31
|
Requires-Dist: pyzmq>=16
|
|
32
32
|
Requires-Dist: termcolor>=1.1
|
|
33
33
|
Requires-Dist: tabulate>=0.7.7
|
|
34
34
|
Requires-Dist: tqdm==4.64.0
|
|
35
35
|
Provides-Extra: tf
|
|
36
|
-
Requires-Dist: catalogue==2.0.
|
|
36
|
+
Requires-Dist: catalogue==2.0.10; extra == "tf"
|
|
37
37
|
Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
|
|
38
|
-
Requires-Dist: importlib-metadata>=
|
|
38
|
+
Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
|
|
39
39
|
Requires-Dist: jsonlines==3.1.0; extra == "tf"
|
|
40
40
|
Requires-Dist: mock==4.0.3; extra == "tf"
|
|
41
41
|
Requires-Dist: networkx>=2.7.1; extra == "tf"
|
|
@@ -43,12 +43,12 @@ Requires-Dist: numpy>=1.21; extra == "tf"
|
|
|
43
43
|
Requires-Dist: packaging>=20.0; extra == "tf"
|
|
44
44
|
Requires-Dist: Pillow>=10.0.0; extra == "tf"
|
|
45
45
|
Requires-Dist: pypdf>=3.16.0; extra == "tf"
|
|
46
|
-
Requires-Dist: pyyaml
|
|
46
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "tf"
|
|
47
47
|
Requires-Dist: pyzmq>=16; extra == "tf"
|
|
48
48
|
Requires-Dist: termcolor>=1.1; extra == "tf"
|
|
49
49
|
Requires-Dist: tabulate>=0.7.7; extra == "tf"
|
|
50
50
|
Requires-Dist: tqdm==4.64.0; extra == "tf"
|
|
51
|
-
Requires-Dist: tensorpack; extra == "tf"
|
|
51
|
+
Requires-Dist: tensorpack==0.11; extra == "tf"
|
|
52
52
|
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
53
53
|
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
54
54
|
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
@@ -56,15 +56,15 @@ Requires-Dist: python-doctr==0.7.0; extra == "tf"
|
|
|
56
56
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
57
57
|
Requires-Dist: boto3; extra == "tf"
|
|
58
58
|
Requires-Dist: pdfplumber>=0.7.1; extra == "tf"
|
|
59
|
-
Requires-Dist: fasttext; extra == "tf"
|
|
60
|
-
Requires-Dist: jdeskew; extra == "tf"
|
|
59
|
+
Requires-Dist: fasttext==0.9.2; extra == "tf"
|
|
60
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "tf"
|
|
61
61
|
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
62
62
|
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
63
63
|
Requires-Dist: lxml>=4.9.1; extra == "tf"
|
|
64
64
|
Provides-Extra: pt
|
|
65
|
-
Requires-Dist: catalogue==2.0.
|
|
65
|
+
Requires-Dist: catalogue==2.0.10; extra == "pt"
|
|
66
66
|
Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
|
|
67
|
-
Requires-Dist: importlib-metadata>=
|
|
67
|
+
Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
|
|
68
68
|
Requires-Dist: jsonlines==3.1.0; extra == "pt"
|
|
69
69
|
Requires-Dist: mock==4.0.3; extra == "pt"
|
|
70
70
|
Requires-Dist: networkx>=2.7.1; extra == "pt"
|
|
@@ -72,31 +72,31 @@ Requires-Dist: numpy>=1.21; extra == "pt"
|
|
|
72
72
|
Requires-Dist: packaging>=20.0; extra == "pt"
|
|
73
73
|
Requires-Dist: Pillow>=10.0.0; extra == "pt"
|
|
74
74
|
Requires-Dist: pypdf>=3.16.0; extra == "pt"
|
|
75
|
-
Requires-Dist: pyyaml
|
|
75
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "pt"
|
|
76
76
|
Requires-Dist: pyzmq>=16; extra == "pt"
|
|
77
77
|
Requires-Dist: termcolor>=1.1; extra == "pt"
|
|
78
78
|
Requires-Dist: tabulate>=0.7.7; extra == "pt"
|
|
79
79
|
Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
80
|
-
Requires-Dist: timm; extra == "pt"
|
|
80
|
+
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
81
81
|
Requires-Dist: transformers>=4.36.0; extra == "pt"
|
|
82
|
-
Requires-Dist: accelerate; extra == "pt"
|
|
82
|
+
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
83
83
|
Requires-Dist: python-doctr==0.7.0; extra == "pt"
|
|
84
84
|
Requires-Dist: boto3; extra == "pt"
|
|
85
85
|
Requires-Dist: pdfplumber>=0.7.1; extra == "pt"
|
|
86
|
-
Requires-Dist: fasttext; extra == "pt"
|
|
87
|
-
Requires-Dist: jdeskew; extra == "pt"
|
|
86
|
+
Requires-Dist: fasttext==0.9.2; extra == "pt"
|
|
87
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "pt"
|
|
88
88
|
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
89
89
|
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
90
90
|
Requires-Dist: lxml>=4.9.1; extra == "pt"
|
|
91
91
|
Provides-Extra: docs
|
|
92
|
-
Requires-Dist: tensorpack; extra == "docs"
|
|
92
|
+
Requires-Dist: tensorpack==0.11; extra == "docs"
|
|
93
93
|
Requires-Dist: boto3; extra == "docs"
|
|
94
94
|
Requires-Dist: transformers>=4.36.0; extra == "docs"
|
|
95
|
-
Requires-Dist: accelerate; extra == "docs"
|
|
95
|
+
Requires-Dist: accelerate>=0.29.1; extra == "docs"
|
|
96
96
|
Requires-Dist: pdfplumber>=0.7.1; extra == "docs"
|
|
97
97
|
Requires-Dist: lxml>=4.9.1; extra == "docs"
|
|
98
|
-
Requires-Dist: lxml-stubs; extra == "docs"
|
|
99
|
-
Requires-Dist: jdeskew; extra == "docs"
|
|
98
|
+
Requires-Dist: lxml-stubs>=0.5.1; extra == "docs"
|
|
99
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "docs"
|
|
100
100
|
Requires-Dist: jinja2==3.0.3; extra == "docs"
|
|
101
101
|
Requires-Dist: mkdocs-material; extra == "docs"
|
|
102
102
|
Requires-Dist: mkdocstrings-python; extra == "docs"
|
|
@@ -105,47 +105,20 @@ Provides-Extra: dev
|
|
|
105
105
|
Requires-Dist: python-dotenv==1.0.0; extra == "dev"
|
|
106
106
|
Requires-Dist: click; extra == "dev"
|
|
107
107
|
Requires-Dist: black==23.7.0; extra == "dev"
|
|
108
|
-
Requires-Dist: isort; extra == "dev"
|
|
108
|
+
Requires-Dist: isort==5.13.2; extra == "dev"
|
|
109
109
|
Requires-Dist: pylint==2.17.4; extra == "dev"
|
|
110
110
|
Requires-Dist: mypy==1.4.1; extra == "dev"
|
|
111
111
|
Requires-Dist: wandb; extra == "dev"
|
|
112
|
-
Requires-Dist: types-PyYAML; extra == "dev"
|
|
113
|
-
Requires-Dist: types-termcolor
|
|
114
|
-
Requires-Dist: types-tabulate; extra == "dev"
|
|
115
|
-
Requires-Dist: types-tqdm; extra == "dev"
|
|
116
|
-
Requires-Dist: lxml-stubs; extra == "dev"
|
|
117
|
-
Requires-Dist: types-Pillow; extra == "dev"
|
|
118
|
-
Requires-Dist: types-urllib3; extra == "dev"
|
|
112
|
+
Requires-Dist: types-PyYAML>=6.0.12.12; extra == "dev"
|
|
113
|
+
Requires-Dist: types-termcolor>=1.1.3; extra == "dev"
|
|
114
|
+
Requires-Dist: types-tabulate>=0.9.0.3; extra == "dev"
|
|
115
|
+
Requires-Dist: types-tqdm>=4.66.0.5; extra == "dev"
|
|
116
|
+
Requires-Dist: lxml-stubs>=0.5.1; extra == "dev"
|
|
117
|
+
Requires-Dist: types-Pillow>=10.2.0.20240406; extra == "dev"
|
|
118
|
+
Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
|
|
119
119
|
Provides-Extra: test
|
|
120
|
-
Requires-Dist: pytest; extra == "test"
|
|
120
|
+
Requires-Dist: pytest==8.0.2; extra == "test"
|
|
121
121
|
Requires-Dist: pytest-cov; extra == "test"
|
|
122
|
-
Provides-Extra: hf
|
|
123
|
-
Requires-Dist: catalogue==2.0.7; extra == "hf"
|
|
124
|
-
Requires-Dist: huggingface_hub>=0.12.0; extra == "hf"
|
|
125
|
-
Requires-Dist: importlib-metadata>=4.11.2; extra == "hf"
|
|
126
|
-
Requires-Dist: jsonlines==3.1.0; extra == "hf"
|
|
127
|
-
Requires-Dist: mock==4.0.3; extra == "hf"
|
|
128
|
-
Requires-Dist: networkx>=2.7.1; extra == "hf"
|
|
129
|
-
Requires-Dist: numpy>=1.21; extra == "hf"
|
|
130
|
-
Requires-Dist: packaging>=20.0; extra == "hf"
|
|
131
|
-
Requires-Dist: Pillow>=10.0.0; extra == "hf"
|
|
132
|
-
Requires-Dist: pypdf>=3.16.0; extra == "hf"
|
|
133
|
-
Requires-Dist: pyyaml==6.0; extra == "hf"
|
|
134
|
-
Requires-Dist: pyzmq>=16; extra == "hf"
|
|
135
|
-
Requires-Dist: termcolor>=1.1; extra == "hf"
|
|
136
|
-
Requires-Dist: tabulate>=0.7.7; extra == "hf"
|
|
137
|
-
Requires-Dist: tqdm==4.64.0; extra == "hf"
|
|
138
|
-
Requires-Dist: timm; extra == "hf"
|
|
139
|
-
Requires-Dist: transformers>=4.36.0; extra == "hf"
|
|
140
|
-
Requires-Dist: accelerate; extra == "hf"
|
|
141
|
-
Requires-Dist: python-doctr==0.7.0; extra == "hf"
|
|
142
|
-
Requires-Dist: boto3; extra == "hf"
|
|
143
|
-
Requires-Dist: pdfplumber>=0.7.1; extra == "hf"
|
|
144
|
-
Requires-Dist: fasttext; extra == "hf"
|
|
145
|
-
Requires-Dist: jdeskew; extra == "hf"
|
|
146
|
-
Requires-Dist: apted==1.0.3; extra == "hf"
|
|
147
|
-
Requires-Dist: distance==0.1.3; extra == "hf"
|
|
148
|
-
Requires-Dist: lxml>=4.9.1; extra == "hf"
|
|
149
122
|
|
|
150
123
|
|
|
151
124
|
<p align="center">
|
|
@@ -191,7 +164,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
|
|
|
191
164
|
Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
|
|
192
165
|
[docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
|
|
193
166
|
- Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
|
|
194
|
-
not required anymore for basic inference.
|
|
167
|
+
not required anymore for basic inference.
|
|
168
|
+
- [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
|
|
169
|
+
(not contained in the built-in Analyzer).
|
|
195
170
|
|
|
196
171
|
**deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
|
|
197
172
|
post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
|
|
@@ -42,7 +42,9 @@ pipelines. Its core function does not depend on any specific deep learning libra
|
|
|
42
42
|
Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
|
|
43
43
|
[docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
|
|
44
44
|
- Document layout analysis and table recognition now runs with Torchscript (CPU) as well and Detectron2 is
|
|
45
|
-
not required anymore for basic inference.
|
|
45
|
+
not required anymore for basic inference.
|
|
46
|
+
- [**new**] More angle predictors for determining the rotation of a document based on Tesseract and DocTr
|
|
47
|
+
(not contained in the built-in Analyzer).
|
|
46
48
|
|
|
47
49
|
**deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
|
|
48
50
|
post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
|
|
@@ -27,7 +27,7 @@ from .utils.logger import logger
|
|
|
27
27
|
|
|
28
28
|
# pylint: enable=wrong-import-position
|
|
29
29
|
|
|
30
|
-
__version__ = 0.
|
|
30
|
+
__version__ = 0.31
|
|
31
31
|
|
|
32
32
|
_IMPORT_STRUCTURE = {
|
|
33
33
|
"analyzer": [
|
|
@@ -179,6 +179,7 @@ _IMPORT_STRUCTURE = {
|
|
|
179
179
|
"Jdeskewer",
|
|
180
180
|
"DoctrTextlineDetector",
|
|
181
181
|
"DoctrTextRecognizer",
|
|
182
|
+
"DocTrRotationTransformer",
|
|
182
183
|
"FasttextLangDetector",
|
|
183
184
|
"HFDetrDerivedDetector",
|
|
184
185
|
"HFLayoutLmTokenClassifierBase",
|
|
@@ -194,6 +195,7 @@ _IMPORT_STRUCTURE = {
|
|
|
194
195
|
"ModelDownloadManager",
|
|
195
196
|
"PdfPlumberTextDetector",
|
|
196
197
|
"TesseractOcrDetector",
|
|
198
|
+
"TesseractRotationTransformer",
|
|
197
199
|
"TextractOcrDetector",
|
|
198
200
|
"TPFrcnnDetector",
|
|
199
201
|
],
|
|
@@ -279,7 +281,7 @@ _IMPORT_STRUCTURE = {
|
|
|
279
281
|
"PubtablesSegmentationService",
|
|
280
282
|
"SegmentationResult",
|
|
281
283
|
"TextExtractionService",
|
|
282
|
-
"
|
|
284
|
+
"SimpleTransformService",
|
|
283
285
|
],
|
|
284
286
|
"train": [
|
|
285
287
|
"D2Trainer",
|
|
@@ -113,11 +113,12 @@ def config_sanity_checks(cfg: AttrDict) -> None:
|
|
|
113
113
|
"""Some config sanity checks"""
|
|
114
114
|
if cfg.USE_PDF_MINER and cfg.USE_OCR and cfg.OCR.USE_DOCTR:
|
|
115
115
|
raise ValueError("Configuration USE_PDF_MINER= True and USE_OCR=True and USE_DOCTR=True is not allowed")
|
|
116
|
-
if cfg.
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
116
|
+
if cfg.USE_OCR:
|
|
117
|
+
if cfg.OCR.USE_TESSERACT + cfg.OCR.USE_DOCTR + cfg.OCR.USE_TEXTRACT != 1:
|
|
118
|
+
raise ValueError(
|
|
119
|
+
"Choose either OCR.USE_TESSERACT=True or OCR.USE_DOCTR=True or OCR.USE_TEXTRACT=True "
|
|
120
|
+
"and set the other two to False. Only one OCR system can be activated."
|
|
121
|
+
)
|
|
121
122
|
|
|
122
123
|
|
|
123
124
|
def build_detector(
|
|
@@ -17,25 +17,6 @@ from typing import Any, Iterator, no_type_check
|
|
|
17
17
|
from ..utils.utils import get_rng
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class DataFlowTerminated(BaseException):
|
|
21
|
-
"""
|
|
22
|
-
An exception indicating that the DataFlow is unable to produce any more
|
|
23
|
-
data, i.e. something wrong happened so that calling `__iter__`
|
|
24
|
-
cannot give a valid iterator anymore.
|
|
25
|
-
In most DataFlow this will never be raised.
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class DataFlowResetStateNotCalled(BaseException):
|
|
30
|
-
"""
|
|
31
|
-
An exception indicating that `reset_state()` has not been called before starting
|
|
32
|
-
iteration.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
def __init__(self) -> None:
|
|
36
|
-
super().__init__("Iterating a dataflow requires .reset_state() to be called first")
|
|
37
|
-
|
|
38
|
-
|
|
39
20
|
class DataFlowReentrantGuard:
|
|
40
21
|
"""
|
|
41
22
|
A tool to enforce non-reentrancy.
|
|
@@ -25,10 +25,11 @@ from typing import Any, Callable, Iterable, Iterator, List, Optional
|
|
|
25
25
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
|
|
28
|
+
from ..utils.error import DataFlowResetStateNotCalledError
|
|
28
29
|
from ..utils.logger import LoggingRecord, logger
|
|
29
30
|
from ..utils.tqdm import get_tqdm
|
|
30
31
|
from ..utils.utils import get_rng
|
|
31
|
-
from .base import DataFlow, DataFlowReentrantGuard,
|
|
32
|
+
from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
|
|
32
33
|
from .serialize import DataFromIterable, DataFromList
|
|
33
34
|
|
|
34
35
|
__all__ = ["CacheData", "CustomDataFromList", "CustomDataFromIterable"]
|
|
@@ -65,7 +66,7 @@ class CacheData(ProxyDataFlow):
|
|
|
65
66
|
|
|
66
67
|
def __iter__(self) -> Iterator[Any]:
|
|
67
68
|
if self._guard is None:
|
|
68
|
-
raise
|
|
69
|
+
raise DataFlowResetStateNotCalledError()
|
|
69
70
|
|
|
70
71
|
with self._guard:
|
|
71
72
|
if self.buffer:
|
|
@@ -139,7 +140,7 @@ class CustomDataFromList(DataFromList):
|
|
|
139
140
|
|
|
140
141
|
def __iter__(self) -> Iterator[Any]:
|
|
141
142
|
if self.rng is None:
|
|
142
|
-
raise
|
|
143
|
+
raise DataFlowResetStateNotCalledError()
|
|
143
144
|
if self.rebalance_func is not None:
|
|
144
145
|
lst_tmp = self.rebalance_func(self.lst)
|
|
145
146
|
logger.info(LoggingRecord(f"CustomDataFromList: subset size after re-balancing: {len(lst_tmp)}"))
|
|
@@ -27,13 +27,16 @@ from pathlib import Path
|
|
|
27
27
|
from typing import DefaultDict, Dict, List, Optional, Sequence, Union
|
|
28
28
|
|
|
29
29
|
from jsonlines import Reader, Writer
|
|
30
|
+
from tabulate import tabulate
|
|
31
|
+
from termcolor import colored
|
|
30
32
|
|
|
31
33
|
from ..utils.context import timed_operation
|
|
32
34
|
from ..utils.detection_types import JsonDict, Pathlike
|
|
35
|
+
from ..utils.error import FileExtensionError
|
|
33
36
|
from ..utils.identifier import get_uuid_from_str
|
|
34
37
|
from ..utils.pdf_utils import PDFStreamer
|
|
35
38
|
from ..utils.tqdm import get_tqdm
|
|
36
|
-
from ..utils.utils import
|
|
39
|
+
from ..utils.utils import is_file_extension
|
|
37
40
|
from .base import DataFlow
|
|
38
41
|
from .common import FlattenData, JoinData, MapData
|
|
39
42
|
from .custom import CacheData, CustomDataFromIterable, CustomDataFromList
|
|
@@ -223,7 +226,7 @@ class SerializerFiles:
|
|
|
223
226
|
"""
|
|
224
227
|
Not implemented
|
|
225
228
|
"""
|
|
226
|
-
raise NotImplementedError
|
|
229
|
+
raise NotImplementedError()
|
|
227
230
|
|
|
228
231
|
|
|
229
232
|
class CocoParser:
|
|
@@ -283,8 +286,14 @@ class CocoParser:
|
|
|
283
286
|
"""
|
|
284
287
|
Print information about the annotation file.
|
|
285
288
|
"""
|
|
289
|
+
rows = []
|
|
286
290
|
for key, value in self.dataset["info"].items():
|
|
287
|
-
|
|
291
|
+
row = [key, value]
|
|
292
|
+
rows.append(row)
|
|
293
|
+
|
|
294
|
+
header = ["key", "value"]
|
|
295
|
+
table = tabulate(rows, headers=header, tablefmt="fancy_grid", stralign="left", numalign="left")
|
|
296
|
+
print(colored(table, "cyan"))
|
|
288
297
|
|
|
289
298
|
def get_ann_ids(
|
|
290
299
|
self,
|
|
@@ -499,7 +508,7 @@ class SerializerCoco:
|
|
|
499
508
|
"""
|
|
500
509
|
Not implemented
|
|
501
510
|
"""
|
|
502
|
-
raise NotImplementedError
|
|
511
|
+
raise NotImplementedError()
|
|
503
512
|
|
|
504
513
|
|
|
505
514
|
class SerializerPdfDoc:
|
|
@@ -547,7 +556,7 @@ class SerializerPdfDoc:
|
|
|
547
556
|
"""
|
|
548
557
|
Not implemented
|
|
549
558
|
"""
|
|
550
|
-
raise NotImplementedError
|
|
559
|
+
raise NotImplementedError()
|
|
551
560
|
|
|
552
561
|
@staticmethod
|
|
553
562
|
def split(path: Pathlike, path_target: Optional[Pathlike] = None, max_datapoint: Optional[int] = None) -> None:
|
|
@@ -28,8 +28,9 @@ from typing import Any, Callable, Iterator, List, no_type_check
|
|
|
28
28
|
import zmq
|
|
29
29
|
|
|
30
30
|
from ..utils.concurrency import StoppableThread, enable_death_signal, start_proc_mask_signal
|
|
31
|
+
from ..utils.error import DataFlowTerminatedError
|
|
31
32
|
from ..utils.logger import LoggingRecord, logger
|
|
32
|
-
from .base import DataFlow, DataFlowReentrantGuard,
|
|
33
|
+
from .base import DataFlow, DataFlowReentrantGuard, ProxyDataFlow
|
|
33
34
|
from .common import RepeatedData
|
|
34
35
|
from .serialize import PickleSerializer
|
|
35
36
|
|
|
@@ -49,14 +50,14 @@ def _zmq_catch_error(name):
|
|
|
49
50
|
yield
|
|
50
51
|
except zmq.ContextTerminated as exc:
|
|
51
52
|
logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Context terminated."))
|
|
52
|
-
raise
|
|
53
|
+
raise DataFlowTerminatedError() from exc
|
|
53
54
|
except zmq.ZMQError as exc:
|
|
54
55
|
if exc.errno == errno.ENOTSOCK: # socket closed
|
|
55
56
|
logger.info(LoggingRecord(f"_zmq_catch_error: [{name}] Socket closed."))
|
|
56
|
-
raise
|
|
57
|
-
raise ValueError from exc
|
|
57
|
+
raise DataFlowTerminatedError() from exc
|
|
58
|
+
raise ValueError() from exc
|
|
58
59
|
except Exception as exc:
|
|
59
|
-
raise ValueError from exc
|
|
60
|
+
raise ValueError() from exc
|
|
60
61
|
|
|
61
62
|
|
|
62
63
|
@no_type_check
|
|
@@ -78,8 +79,8 @@ def _get_pipe_name(name):
|
|
|
78
79
|
class _ParallelMapData(ProxyDataFlow, ABC):
|
|
79
80
|
def __init__(self, df: DataFlow, buffer_size: int, strict: bool = False) -> None:
|
|
80
81
|
super().__init__(df)
|
|
81
|
-
if
|
|
82
|
-
raise ValueError("buffer_size must be a positive number")
|
|
82
|
+
if buffer_size <= 0:
|
|
83
|
+
raise ValueError(f"buffer_size must be a positive number, got {buffer_size}")
|
|
83
84
|
self._buffer_size = buffer_size
|
|
84
85
|
self._buffer_occupancy = 0 # actual #elements in buffer, only useful in strict mode
|
|
85
86
|
self._strict = strict
|
|
@@ -95,12 +96,12 @@ class _ParallelMapData(ProxyDataFlow, ABC):
|
|
|
95
96
|
@no_type_check
|
|
96
97
|
@abstractmethod
|
|
97
98
|
def _recv(self):
|
|
98
|
-
raise NotImplementedError
|
|
99
|
+
raise NotImplementedError()
|
|
99
100
|
|
|
100
101
|
@no_type_check
|
|
101
102
|
@abstractmethod
|
|
102
103
|
def _send(self, dp: Any):
|
|
103
|
-
raise NotImplementedError
|
|
104
|
+
raise NotImplementedError()
|
|
104
105
|
|
|
105
106
|
@no_type_check
|
|
106
107
|
def _recv_filter_none(self):
|
|
@@ -398,8 +399,8 @@ class MultiProcessMapData(_ParallelMapData, _MultiProcessZMQDataFlow):
|
|
|
398
399
|
|
|
399
400
|
_ParallelMapData.__init__(self, df, buffer_size, strict)
|
|
400
401
|
_MultiProcessZMQDataFlow.__init__(self)
|
|
401
|
-
if
|
|
402
|
-
raise ValueError("num_proc must be a positive number")
|
|
402
|
+
if num_proc <= 0:
|
|
403
|
+
raise ValueError(f"num_proc must be a positive number, got {num_proc}")
|
|
403
404
|
self.num_proc = num_proc
|
|
404
405
|
self.map_func = map_func
|
|
405
406
|
self._strict = strict
|
|
@@ -16,7 +16,8 @@ from typing import Any, Iterable, Iterator, List, Optional, Tuple, Union
|
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
19
|
-
from .
|
|
19
|
+
from ..utils.error import DataFlowResetStateNotCalledError
|
|
20
|
+
from .base import DataFlow, RNGDataFlow
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class DataFromList(RNGDataFlow):
|
|
@@ -44,7 +45,7 @@ class DataFromList(RNGDataFlow):
|
|
|
44
45
|
for k in idxs:
|
|
45
46
|
yield self.lst[k]
|
|
46
47
|
else:
|
|
47
|
-
raise
|
|
48
|
+
raise DataFlowResetStateNotCalledError()
|
|
48
49
|
|
|
49
50
|
|
|
50
51
|
class DataFromIterable(DataFlow):
|
|
@@ -63,7 +64,7 @@ class DataFromIterable(DataFlow):
|
|
|
63
64
|
|
|
64
65
|
def __len__(self) -> int:
|
|
65
66
|
if self._len is None:
|
|
66
|
-
raise NotImplementedError
|
|
67
|
+
raise NotImplementedError()
|
|
67
68
|
return self._len
|
|
68
69
|
|
|
69
70
|
def __iter__(self) -> Iterator[Any]:
|
|
@@ -107,7 +108,7 @@ class FakeData(RNGDataFlow):
|
|
|
107
108
|
|
|
108
109
|
def __iter__(self) -> Iterator[Any]:
|
|
109
110
|
if self.rng is None:
|
|
110
|
-
raise
|
|
111
|
+
raise DataFlowResetStateNotCalledError()
|
|
111
112
|
if self.random:
|
|
112
113
|
for _ in range(self._size):
|
|
113
114
|
val = []
|
|
@@ -24,6 +24,7 @@ from dataclasses import dataclass, field
|
|
|
24
24
|
from typing import Any, Dict, List, Optional, Union, no_type_check
|
|
25
25
|
|
|
26
26
|
from ..utils.detection_types import JsonDict
|
|
27
|
+
from ..utils.error import AnnotationError, UUIDError
|
|
27
28
|
from ..utils.identifier import get_uuid, is_uuid_like
|
|
28
29
|
from ..utils.logger import LoggingRecord, logger
|
|
29
30
|
from ..utils.settings import DefaultType, ObjectTypes, SummaryType, TypeOrStr, get_type
|
|
@@ -36,7 +37,16 @@ def ann_from_dict(cls, **kwargs):
|
|
|
36
37
|
"""
|
|
37
38
|
A factory function to create subclasses of annotations from a given dict
|
|
38
39
|
"""
|
|
39
|
-
|
|
40
|
+
_init_kwargs = {
|
|
41
|
+
"external_id": kwargs.get("external_id"),
|
|
42
|
+
"category_name": kwargs.get("category_name"),
|
|
43
|
+
"category_id": kwargs.get("category_id"),
|
|
44
|
+
"score": kwargs.get("score"),
|
|
45
|
+
"service_id": kwargs.get("service_id"),
|
|
46
|
+
"model_id": kwargs.get("model_id"),
|
|
47
|
+
"session_id": kwargs.get("session_id"),
|
|
48
|
+
}
|
|
49
|
+
ann = cls(**_init_kwargs)
|
|
40
50
|
ann.active = kwargs.get("active")
|
|
41
51
|
ann._annotation_id = kwargs.get("_annotation_id") # pylint: disable=W0212
|
|
42
52
|
if isinstance(kwargs.get("sub_categories"), dict):
|
|
@@ -74,11 +84,17 @@ class Annotation(ABC):
|
|
|
74
84
|
id will not depend on the defining attributes.
|
|
75
85
|
|
|
76
86
|
`_annotation_id`: Unique id for annotations. Will always be given as string representation of a md5-hash.
|
|
87
|
+
`service_id`: Service that generated the annotation. This will be the name of a pipeline component
|
|
88
|
+
`model_id`: Model that generated the annotation. This will be the name of particular model
|
|
89
|
+
`session_id`: Session id for the annotation. This will be the id of the session in which the annotation was created.
|
|
77
90
|
"""
|
|
78
91
|
|
|
79
92
|
active: bool = field(default=True, init=False, repr=True)
|
|
80
93
|
external_id: Optional[Union[str, int]] = field(default=None, init=True, repr=False)
|
|
81
94
|
_annotation_id: Optional[str] = field(default=None, init=False, repr=True)
|
|
95
|
+
service_id: Optional[str] = field(default=None)
|
|
96
|
+
model_id: Optional[str] = field(default=None)
|
|
97
|
+
session_id: Optional[str] = field(default=None)
|
|
82
98
|
|
|
83
99
|
def __post_init__(self) -> None:
|
|
84
100
|
"""
|
|
@@ -101,7 +117,7 @@ class Annotation(ABC):
|
|
|
101
117
|
"""
|
|
102
118
|
if self._annotation_id:
|
|
103
119
|
return self._annotation_id
|
|
104
|
-
raise
|
|
120
|
+
raise AnnotationError("Dump annotation first or pass external_id to create an annotation id")
|
|
105
121
|
|
|
106
122
|
@annotation_id.setter
|
|
107
123
|
def annotation_id(self, input_id: str) -> None:
|
|
@@ -109,13 +125,13 @@ class Annotation(ABC):
|
|
|
109
125
|
annotation_id setter
|
|
110
126
|
"""
|
|
111
127
|
if self._annotation_id is not None:
|
|
112
|
-
raise
|
|
128
|
+
raise AnnotationError("Annotation_id already defined and cannot be reset")
|
|
113
129
|
if is_uuid_like(input_id):
|
|
114
130
|
self._annotation_id = input_id
|
|
115
131
|
elif isinstance(input_id, property):
|
|
116
132
|
pass
|
|
117
133
|
else:
|
|
118
|
-
raise
|
|
134
|
+
raise AnnotationError("Annotation_id must be uuid3 string")
|
|
119
135
|
|
|
120
136
|
@abstractmethod
|
|
121
137
|
def get_defining_attributes(self) -> List[str]:
|
|
@@ -126,13 +142,13 @@ class Annotation(ABC):
|
|
|
126
142
|
|
|
127
143
|
:return: A list of attributes.
|
|
128
144
|
"""
|
|
129
|
-
raise NotImplementedError
|
|
145
|
+
raise NotImplementedError()
|
|
130
146
|
|
|
131
147
|
def _assert_attributes_have_str(self, state_id: bool = False) -> None:
|
|
132
148
|
defining_attributes = self.get_state_attributes() if state_id else self.get_defining_attributes()
|
|
133
149
|
for attr in defining_attributes:
|
|
134
150
|
if not hasattr(eval("self." + attr), "__str__"): # pylint: disable=W0123
|
|
135
|
-
raise
|
|
151
|
+
raise AnnotationError(f"Attribute {attr} must have __str__ method")
|
|
136
152
|
|
|
137
153
|
@staticmethod
|
|
138
154
|
def set_annotation_id(annotation: "CategoryAnnotation", *container_id_context: Optional[str]) -> str:
|
|
@@ -179,7 +195,7 @@ class Annotation(ABC):
|
|
|
179
195
|
|
|
180
196
|
:return: Annotation instance
|
|
181
197
|
"""
|
|
182
|
-
raise NotImplementedError
|
|
198
|
+
raise NotImplementedError()
|
|
183
199
|
|
|
184
200
|
@staticmethod
|
|
185
201
|
@abstractmethod
|
|
@@ -189,7 +205,7 @@ class Annotation(ABC):
|
|
|
189
205
|
|
|
190
206
|
:return: A list of attributes.
|
|
191
207
|
"""
|
|
192
|
-
raise NotImplementedError
|
|
208
|
+
raise NotImplementedError()
|
|
193
209
|
|
|
194
210
|
@property
|
|
195
211
|
def state_id(self) -> str:
|
|
@@ -290,7 +306,12 @@ class CategoryAnnotation(Annotation):
|
|
|
290
306
|
"""
|
|
291
307
|
|
|
292
308
|
if sub_category_name in self.sub_categories:
|
|
293
|
-
raise
|
|
309
|
+
raise AnnotationError(
|
|
310
|
+
f"sub category {sub_category_name} already defined: "
|
|
311
|
+
f"annotation_id: {self.annotation_id}, "
|
|
312
|
+
f"category_name: {self.category_name}, "
|
|
313
|
+
f"category_id: {self.category_id}"
|
|
314
|
+
)
|
|
294
315
|
|
|
295
316
|
if self._annotation_id is not None:
|
|
296
317
|
if annotation._annotation_id is None: # pylint: disable=W0212
|
|
@@ -333,7 +354,7 @@ class CategoryAnnotation(Annotation):
|
|
|
333
354
|
:param annotation_id: An annotation id
|
|
334
355
|
"""
|
|
335
356
|
if not is_uuid_like(annotation_id):
|
|
336
|
-
raise
|
|
357
|
+
raise UUIDError("Annotation_id must be uuid")
|
|
337
358
|
|
|
338
359
|
key_type = get_type(key)
|
|
339
360
|
if key not in self.relationships:
|
|
@@ -436,14 +457,14 @@ class ImageAnnotation(CategoryAnnotation):
|
|
|
436
457
|
box = self.bounding_box
|
|
437
458
|
if box:
|
|
438
459
|
return box
|
|
439
|
-
raise
|
|
460
|
+
raise AnnotationError(f"bounding_box has not been initialized for {self.annotation_id}")
|
|
440
461
|
|
|
441
462
|
def get_summary(self, key: ObjectTypes) -> CategoryAnnotation:
|
|
442
463
|
"""Get summary sub categories from `image`. Raises `ValueError` if `key` is not available"""
|
|
443
464
|
if self.image:
|
|
444
465
|
if self.image.summary:
|
|
445
466
|
return self.image.summary.get_sub_category(key)
|
|
446
|
-
raise
|
|
467
|
+
raise AnnotationError(f"Summary does not exist for {self.annotation_id} and key: {key}")
|
|
447
468
|
|
|
448
469
|
|
|
449
470
|
@dataclass
|
|
@@ -28,6 +28,7 @@ import numpy.typing as npt
|
|
|
28
28
|
from numpy import float32
|
|
29
29
|
|
|
30
30
|
from ..utils.detection_types import ImageType
|
|
31
|
+
from ..utils.error import BoundingBoxError
|
|
31
32
|
from ..utils.file_utils import cocotools_available
|
|
32
33
|
from ..utils.logger import LoggingRecord, logger
|
|
33
34
|
|
|
@@ -140,10 +141,6 @@ def iou(boxes1: npt.NDArray[float32], boxes2: npt.NDArray[float32]) -> npt.NDArr
|
|
|
140
141
|
return np_iou(boxes1, boxes2)
|
|
141
142
|
|
|
142
143
|
|
|
143
|
-
class BoundingBoxError(BaseException):
|
|
144
|
-
"""Special exception only for `BoundingBox`"""
|
|
145
|
-
|
|
146
|
-
|
|
147
144
|
@dataclass
|
|
148
145
|
class BoundingBox:
|
|
149
146
|
"""
|
|
@@ -32,6 +32,7 @@ from pypdf import PdfReader
|
|
|
32
32
|
|
|
33
33
|
from ..utils.detection_types import ImageType
|
|
34
34
|
from ..utils.develop import deprecated
|
|
35
|
+
from ..utils.error import DependencyError
|
|
35
36
|
from ..utils.pdf_utils import pdf_to_np_array
|
|
36
37
|
from ..utils.viz import viz_handler
|
|
37
38
|
|
|
@@ -121,7 +122,8 @@ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -
|
|
|
121
122
|
"""
|
|
122
123
|
from pdf2image import convert_from_bytes # type: ignore # pylint: disable=C0415, E0401
|
|
123
124
|
|
|
124
|
-
|
|
125
|
+
if which("pdftoppm") is None:
|
|
126
|
+
raise DependencyError("convert_pdf_bytes_to_np_array requires poppler to be installed")
|
|
125
127
|
|
|
126
128
|
with BytesIO(pdf_bytes) as pdf_file:
|
|
127
129
|
pdf = PdfReader(pdf_file).pages[0]
|