deepdoctection 0.26__tar.gz → 0.27__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.26 → deepdoctection-0.27}/PKG-INFO +128 -9
- {deepdoctection-0.26 → deepdoctection-0.27}/README.md +7 -7
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/__init__.py +7 -1
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/analyzer/dd.py +15 -3
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/configs/conf_dd_one.yaml +4 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/convert.py +5 -10
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/image.py +2 -2
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/view.py +38 -18
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/save.py +3 -3
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/d2detect.py +1 -2
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/doctrocr.py +14 -9
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/common.py +2 -3
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +6 -6
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +3 -3
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +6 -2
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +5 -3
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +3 -1
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/predict.py +1 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/laylmstruct.py +2 -3
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/context.py +2 -2
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/file_utils.py +63 -26
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/fs.py +6 -6
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/pdf_utils.py +2 -2
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/settings.py +8 -1
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/transform.py +9 -9
- deepdoctection-0.27/deepdoctection/utils/viz.py +659 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/PKG-INFO +128 -9
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/requires.txt +9 -10
- {deepdoctection-0.26 → deepdoctection-0.27}/setup.cfg +1 -2
- {deepdoctection-0.26 → deepdoctection-0.27}/setup.py +11 -9
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/analyzer/test_dd.py +6 -57
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/conftest.py +2 -0
- deepdoctection-0.26/deepdoctection/utils/viz.py +0 -340
- {deepdoctection-0.26 → deepdoctection-0.27}/LICENSE +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/cell.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/common.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/segment.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/hf_detr_train.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/detection_types.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/systools.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/analyzer/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/data.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_common.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_custom.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_custom_serialize.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_parallel_map.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/dataflow/test_stats.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_annotation.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_box.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_convert.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_image.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datapoint/test_view.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_doclaynet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_fintabnet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_funsd.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_iiitar13k.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_layouttest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_publaynet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_pubtables1m.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_pubtabnet.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/instances/test_rvlcdip.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/test_adapter.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/test_info.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/datasets/test_registry.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_accmetric.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_cocometric.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_eval.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_registry.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/eval/test_tedsmetric.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/data.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_deskew.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_doctrocr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_fastlang.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_hfdetr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_hflayoutlm.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_pdftext.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_tessocr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_texocr.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/extern/test_tpdetect.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/data.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_cats.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_cocostruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_d2struct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_hfstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_iiitar13k.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_laylmstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_misc.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_prodigystruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_pubstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_tpstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_utils.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/mapper/test_xfundstruct.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_anngen.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_cell.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_common.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_language.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_layout.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_lm.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_order.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_refine.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_registry.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_segment.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_text.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/pipe/test_transform.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/test_utils.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/test_d2_frcnn_train.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests/train/test_tp_frcnn_train.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests_d2/__init__.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests_d2/conftest.py +0 -0
- {deepdoctection-0.26 → deepdoctection-0.27}/tests_d2/test_d2detect.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -13,17 +13,136 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
13
13
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
14
|
Requires-Python: >=3.8
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: catalogue==2.0.7
|
|
18
|
+
Requires-Dist: huggingface_hub>=0.12.0
|
|
19
|
+
Requires-Dist: importlib-metadata>=4.11.2
|
|
20
|
+
Requires-Dist: jsonlines==3.0.0
|
|
21
|
+
Requires-Dist: mock==4.0.3
|
|
22
|
+
Requires-Dist: networkx>=2.7.1
|
|
23
|
+
Requires-Dist: numpy>=1.21
|
|
24
|
+
Requires-Dist: packaging>=20.0
|
|
25
|
+
Requires-Dist: Pillow>=10.0.0
|
|
26
|
+
Requires-Dist: pypdf2
|
|
27
|
+
Requires-Dist: pyyaml==6.0
|
|
28
|
+
Requires-Dist: pyzmq>=16
|
|
29
|
+
Requires-Dist: termcolor>=1.1
|
|
30
|
+
Requires-Dist: tabulate>=0.7.7
|
|
31
|
+
Requires-Dist: tqdm==4.64.0
|
|
16
32
|
Provides-Extra: tf
|
|
33
|
+
Requires-Dist: catalogue==2.0.7; extra == "tf"
|
|
34
|
+
Requires-Dist: huggingface_hub>=0.12.0; extra == "tf"
|
|
35
|
+
Requires-Dist: importlib-metadata>=4.11.2; extra == "tf"
|
|
36
|
+
Requires-Dist: jsonlines==3.0.0; extra == "tf"
|
|
37
|
+
Requires-Dist: mock==4.0.3; extra == "tf"
|
|
38
|
+
Requires-Dist: networkx>=2.7.1; extra == "tf"
|
|
39
|
+
Requires-Dist: numpy>=1.21; extra == "tf"
|
|
40
|
+
Requires-Dist: packaging>=20.0; extra == "tf"
|
|
41
|
+
Requires-Dist: Pillow>=10.0.0; extra == "tf"
|
|
42
|
+
Requires-Dist: pypdf2; extra == "tf"
|
|
43
|
+
Requires-Dist: pyyaml==6.0; extra == "tf"
|
|
44
|
+
Requires-Dist: pyzmq>=16; extra == "tf"
|
|
45
|
+
Requires-Dist: termcolor>=1.1; extra == "tf"
|
|
46
|
+
Requires-Dist: tabulate>=0.7.7; extra == "tf"
|
|
47
|
+
Requires-Dist: tqdm==4.64.0; extra == "tf"
|
|
48
|
+
Requires-Dist: tensorpack; extra == "tf"
|
|
49
|
+
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
50
|
+
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
51
|
+
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
52
|
+
Requires-Dist: python-doctr==0.7.0; extra == "tf"
|
|
53
|
+
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
54
|
+
Requires-Dist: boto3; extra == "tf"
|
|
55
|
+
Requires-Dist: pdfplumber>=0.7.1; extra == "tf"
|
|
56
|
+
Requires-Dist: fasttext; extra == "tf"
|
|
57
|
+
Requires-Dist: jdeskew; extra == "tf"
|
|
58
|
+
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
59
|
+
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
60
|
+
Requires-Dist: lxml>=4.9.1; extra == "tf"
|
|
17
61
|
Provides-Extra: pt
|
|
62
|
+
Requires-Dist: catalogue==2.0.7; extra == "pt"
|
|
63
|
+
Requires-Dist: huggingface_hub>=0.12.0; extra == "pt"
|
|
64
|
+
Requires-Dist: importlib-metadata>=4.11.2; extra == "pt"
|
|
65
|
+
Requires-Dist: jsonlines==3.0.0; extra == "pt"
|
|
66
|
+
Requires-Dist: mock==4.0.3; extra == "pt"
|
|
67
|
+
Requires-Dist: networkx>=2.7.1; extra == "pt"
|
|
68
|
+
Requires-Dist: numpy>=1.21; extra == "pt"
|
|
69
|
+
Requires-Dist: packaging>=20.0; extra == "pt"
|
|
70
|
+
Requires-Dist: Pillow>=10.0.0; extra == "pt"
|
|
71
|
+
Requires-Dist: pypdf2; extra == "pt"
|
|
72
|
+
Requires-Dist: pyyaml==6.0; extra == "pt"
|
|
73
|
+
Requires-Dist: pyzmq>=16; extra == "pt"
|
|
74
|
+
Requires-Dist: termcolor>=1.1; extra == "pt"
|
|
75
|
+
Requires-Dist: tabulate>=0.7.7; extra == "pt"
|
|
76
|
+
Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
77
|
+
Requires-Dist: timm; extra == "pt"
|
|
78
|
+
Requires-Dist: transformers; extra == "pt"
|
|
79
|
+
Requires-Dist: python-doctr==0.7.0; extra == "pt"
|
|
80
|
+
Requires-Dist: boto3; extra == "pt"
|
|
81
|
+
Requires-Dist: pdfplumber>=0.7.1; extra == "pt"
|
|
82
|
+
Requires-Dist: fasttext; extra == "pt"
|
|
83
|
+
Requires-Dist: jdeskew; extra == "pt"
|
|
84
|
+
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
85
|
+
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
86
|
+
Requires-Dist: lxml>=4.9.1; extra == "pt"
|
|
18
87
|
Provides-Extra: docs
|
|
88
|
+
Requires-Dist: tensorpack; extra == "docs"
|
|
89
|
+
Requires-Dist: boto3; extra == "docs"
|
|
90
|
+
Requires-Dist: transformers; extra == "docs"
|
|
91
|
+
Requires-Dist: pdfplumber>=0.7.1; extra == "docs"
|
|
92
|
+
Requires-Dist: lxml>=4.9.1; extra == "docs"
|
|
93
|
+
Requires-Dist: lxml-stubs; extra == "docs"
|
|
94
|
+
Requires-Dist: jdeskew; extra == "docs"
|
|
95
|
+
Requires-Dist: jinja2==3.0.3; extra == "docs"
|
|
96
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
97
|
+
Requires-Dist: mkdocstrings-python; extra == "docs"
|
|
98
|
+
Requires-Dist: griffe==0.25.0; extra == "docs"
|
|
19
99
|
Provides-Extra: dev
|
|
100
|
+
Requires-Dist: click; extra == "dev"
|
|
101
|
+
Requires-Dist: black==23.7.0; extra == "dev"
|
|
102
|
+
Requires-Dist: isort; extra == "dev"
|
|
103
|
+
Requires-Dist: pylint==2.17.4; extra == "dev"
|
|
104
|
+
Requires-Dist: mypy==1.4.1; extra == "dev"
|
|
105
|
+
Requires-Dist: wandb; extra == "dev"
|
|
106
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
107
|
+
Requires-Dist: types-termcolor==1.1.3; extra == "dev"
|
|
108
|
+
Requires-Dist: types-tabulate; extra == "dev"
|
|
109
|
+
Requires-Dist: types-tqdm; extra == "dev"
|
|
110
|
+
Requires-Dist: lxml-stubs; extra == "dev"
|
|
111
|
+
Requires-Dist: types-Pillow; extra == "dev"
|
|
112
|
+
Requires-Dist: types-urllib3; extra == "dev"
|
|
20
113
|
Provides-Extra: test
|
|
114
|
+
Requires-Dist: pytest; extra == "test"
|
|
115
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
21
116
|
Provides-Extra: hf
|
|
22
|
-
|
|
117
|
+
Requires-Dist: catalogue==2.0.7; extra == "hf"
|
|
118
|
+
Requires-Dist: huggingface_hub>=0.12.0; extra == "hf"
|
|
119
|
+
Requires-Dist: importlib-metadata>=4.11.2; extra == "hf"
|
|
120
|
+
Requires-Dist: jsonlines==3.0.0; extra == "hf"
|
|
121
|
+
Requires-Dist: mock==4.0.3; extra == "hf"
|
|
122
|
+
Requires-Dist: networkx>=2.7.1; extra == "hf"
|
|
123
|
+
Requires-Dist: numpy>=1.21; extra == "hf"
|
|
124
|
+
Requires-Dist: packaging>=20.0; extra == "hf"
|
|
125
|
+
Requires-Dist: Pillow>=10.0.0; extra == "hf"
|
|
126
|
+
Requires-Dist: pypdf2; extra == "hf"
|
|
127
|
+
Requires-Dist: pyyaml==6.0; extra == "hf"
|
|
128
|
+
Requires-Dist: pyzmq>=16; extra == "hf"
|
|
129
|
+
Requires-Dist: termcolor>=1.1; extra == "hf"
|
|
130
|
+
Requires-Dist: tabulate>=0.7.7; extra == "hf"
|
|
131
|
+
Requires-Dist: tqdm==4.64.0; extra == "hf"
|
|
132
|
+
Requires-Dist: timm; extra == "hf"
|
|
133
|
+
Requires-Dist: transformers; extra == "hf"
|
|
134
|
+
Requires-Dist: python-doctr==0.7.0; extra == "hf"
|
|
135
|
+
Requires-Dist: boto3; extra == "hf"
|
|
136
|
+
Requires-Dist: pdfplumber>=0.7.1; extra == "hf"
|
|
137
|
+
Requires-Dist: fasttext; extra == "hf"
|
|
138
|
+
Requires-Dist: jdeskew; extra == "hf"
|
|
139
|
+
Requires-Dist: apted==1.0.3; extra == "hf"
|
|
140
|
+
Requires-Dist: distance==0.1.3; extra == "hf"
|
|
141
|
+
Requires-Dist: lxml>=4.9.1; extra == "hf"
|
|
23
142
|
|
|
24
143
|
|
|
25
144
|
<p align="center">
|
|
26
|
-
<img src="https://github.com/deepdoctection/deepdoctection/
|
|
145
|
+
<img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
|
|
27
146
|
<h3 align="center">
|
|
28
147
|
A Document AI Package
|
|
29
148
|
</h3>
|
|
@@ -123,20 +242,20 @@ plt.axis('off')
|
|
|
123
242
|
plt.imshow(image)
|
|
124
243
|
```
|
|
125
244
|
|
|
126
|
-

|
|
127
246
|
|
|
128
247
|
```
|
|
129
248
|
HTML(page.tables[0].html)
|
|
130
249
|
```
|
|
131
250
|
|
|
132
|
-

|
|
133
252
|
|
|
134
253
|
|
|
135
254
|
```
|
|
136
255
|
print(page.text)
|
|
137
256
|
```
|
|
138
257
|
|
|
139
|
-

|
|
140
259
|
|
|
141
260
|
|
|
142
261
|
## Documentation
|
|
@@ -150,14 +269,14 @@ documentation.
|
|
|
150
269
|
|
|
151
270
|
## Requirements
|
|
152
271
|
|
|
153
|
-

|
|
154
273
|
|
|
155
274
|
Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
|
|
156
275
|
separately.
|
|
157
276
|
|
|
158
277
|
- Linux or macOS. (Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available)
|
|
159
278
|
- Python >= 3.8
|
|
160
|
-
- 1.
|
|
279
|
+
- 1.12 <= PyTorch < 2.0 **or** Tensorflow >= 2.9 and CUDA. If you want to run the models provided by Tensorpack a GPU is
|
|
161
280
|
required. You can run on PyTorch with a CPU only.
|
|
162
281
|
- **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
|
|
163
282
|
images.
|
|
@@ -230,7 +349,7 @@ pip install deepdoctection
|
|
|
230
349
|
This will ignore all model libraries (layers above the **deep**doctection layer in the diagram) and you
|
|
231
350
|
will be responsible to install them by yourself. Note, that you will not be able to run any pipeline with this setup.
|
|
232
351
|
|
|
233
|
-
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/
|
|
352
|
+
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
|
|
234
353
|
|
|
235
354
|
|
|
236
355
|
### Installation from source
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
|
|
2
2
|
<p align="center">
|
|
3
|
-
<img src="https://github.com/deepdoctection/deepdoctection/
|
|
3
|
+
<img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
|
|
4
4
|
<h3 align="center">
|
|
5
5
|
A Document AI Package
|
|
6
6
|
</h3>
|
|
@@ -100,20 +100,20 @@ plt.axis('off')
|
|
|
100
100
|
plt.imshow(image)
|
|
101
101
|
```
|
|
102
102
|
|
|
103
|
-

|
|
104
104
|
|
|
105
105
|
```
|
|
106
106
|
HTML(page.tables[0].html)
|
|
107
107
|
```
|
|
108
108
|
|
|
109
|
-

|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
```
|
|
113
113
|
print(page.text)
|
|
114
114
|
```
|
|
115
115
|
|
|
116
|
-

|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
## Documentation
|
|
@@ -127,14 +127,14 @@ documentation.
|
|
|
127
127
|
|
|
128
128
|
## Requirements
|
|
129
129
|
|
|
130
|
-

|
|
131
131
|
|
|
132
132
|
Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
|
|
133
133
|
separately.
|
|
134
134
|
|
|
135
135
|
- Linux or macOS. (Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available)
|
|
136
136
|
- Python >= 3.8
|
|
137
|
-
- 1.
|
|
137
|
+
- 1.12 <= PyTorch < 2.0 **or** Tensorflow >= 2.9 and CUDA. If you want to run the models provided by Tensorpack a GPU is
|
|
138
138
|
required. You can run on PyTorch with a CPU only.
|
|
139
139
|
- **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF documents into
|
|
140
140
|
images.
|
|
@@ -207,7 +207,7 @@ pip install deepdoctection
|
|
|
207
207
|
This will ignore all model libraries (layers above the **deep**doctection layer in the diagram) and you
|
|
208
208
|
will be responsible to install them by yourself. Note, that you will not be able to run any pipeline with this setup.
|
|
209
209
|
|
|
210
|
-
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/
|
|
210
|
+
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
|
|
211
211
|
|
|
212
212
|
|
|
213
213
|
### Installation from source
|
|
@@ -14,7 +14,7 @@ from packaging import version
|
|
|
14
14
|
from .utils.file_utils import _LazyModule, get_tf_version, pytorch_available, tf_available
|
|
15
15
|
from .utils.logger import logger
|
|
16
16
|
|
|
17
|
-
__version__ = 0.
|
|
17
|
+
__version__ = 0.27
|
|
18
18
|
|
|
19
19
|
_IMPORT_STRUCTURE = {
|
|
20
20
|
"analyzer": ["get_dd_analyzer", "build_analyzer"],
|
|
@@ -311,6 +311,10 @@ _IMPORT_STRUCTURE = {
|
|
|
311
311
|
"get_fasttext_requirement",
|
|
312
312
|
"wandb_available",
|
|
313
313
|
"get_wandb_requirement",
|
|
314
|
+
"opencv_available",
|
|
315
|
+
"get_opencv_requirement",
|
|
316
|
+
"pillow_available",
|
|
317
|
+
"get_pillow_requirement",
|
|
314
318
|
"load_image_from_file",
|
|
315
319
|
"load_bytes_from_pdf_file",
|
|
316
320
|
"get_load_image_func",
|
|
@@ -378,6 +382,7 @@ _IMPORT_STRUCTURE = {
|
|
|
378
382
|
"draw_text",
|
|
379
383
|
"draw_boxes",
|
|
380
384
|
"interactive_imshow",
|
|
385
|
+
"viz_handler",
|
|
381
386
|
],
|
|
382
387
|
}
|
|
383
388
|
|
|
@@ -403,6 +408,7 @@ if tf_available():
|
|
|
403
408
|
except Exception: # pylint: disable=W0703
|
|
404
409
|
pass
|
|
405
410
|
|
|
411
|
+
|
|
406
412
|
# Direct imports for type-checking
|
|
407
413
|
if TYPE_CHECKING:
|
|
408
414
|
from .analyzer import *
|
|
@@ -36,7 +36,7 @@ from ..extern.tessocr import TesseractOcrDetector
|
|
|
36
36
|
from ..extern.texocr import TextractOcrDetector
|
|
37
37
|
from ..pipe.base import PipelineComponent
|
|
38
38
|
from ..pipe.cell import DetectResultGenerator, SubImageLayoutService
|
|
39
|
-
from ..pipe.common import MatchingService, PageParsingService
|
|
39
|
+
from ..pipe.common import AnnotationNmsService, MatchingService, PageParsingService
|
|
40
40
|
from ..pipe.doctectionpipe import DoctectionPipe
|
|
41
41
|
from ..pipe.layout import ImageLayoutService
|
|
42
42
|
from ..pipe.order import TextOrderService
|
|
@@ -206,7 +206,7 @@ def _build_ocr(cfg: AttrDict) -> Union[TesseractOcrDetector, DoctrTextRecognizer
|
|
|
206
206
|
profile = ModelCatalog.get_profile(weights)
|
|
207
207
|
if profile.architecture is None:
|
|
208
208
|
raise ValueError("model profile.architecture must be specified")
|
|
209
|
-
return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE)
|
|
209
|
+
return DoctrTextRecognizer(profile.architecture, weights_path, cfg.DEVICE, lib=cfg.LIB)
|
|
210
210
|
if cfg.OCR.USE_TEXTRACT:
|
|
211
211
|
credentials_kwargs = {
|
|
212
212
|
"aws_access_key_id": environ.get("ACCESS_KEY"),
|
|
@@ -225,7 +225,7 @@ def _build_doctr_word(cfg: AttrDict) -> DoctrTextlineDetector:
|
|
|
225
225
|
raise ValueError("model profile.architecture must be specified")
|
|
226
226
|
if profile.categories is None:
|
|
227
227
|
raise ValueError("model profile.categories must be specified")
|
|
228
|
-
return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE)
|
|
228
|
+
return DoctrTextlineDetector(profile.architecture, weights_path, profile.categories, cfg.DEVICE, lib=cfg.LIB)
|
|
229
229
|
|
|
230
230
|
|
|
231
231
|
def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
|
|
@@ -242,6 +242,17 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
|
|
|
242
242
|
layout = _build_service(d_layout, cfg, "LAYOUT")
|
|
243
243
|
pipe_component_list.append(layout)
|
|
244
244
|
|
|
245
|
+
# setup layout nms service
|
|
246
|
+
if cfg.LAYOUT_NMS_PAIRS.COMBINATIONS and cfg.USE_LAYOUT:
|
|
247
|
+
if not isinstance(cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, list) and not isinstance(
|
|
248
|
+
cfg.LAYOUT_NMS_PAIRS.COMBINATIONS[0], list
|
|
249
|
+
):
|
|
250
|
+
raise ValueError("LAYOUT_NMS_PAIRS mus be a list of lists")
|
|
251
|
+
layout_nms_serivce = AnnotationNmsService(
|
|
252
|
+
cfg.LAYOUT_NMS_PAIRS.COMBINATIONS, cfg.LAYOUT_NMS_PAIRS.THRESHOLDS, cfg.LAYOUT_NMS_PAIRS.PRIORITY
|
|
253
|
+
)
|
|
254
|
+
pipe_component_list.append(layout_nms_serivce)
|
|
255
|
+
|
|
245
256
|
# setup tables service
|
|
246
257
|
if cfg.USE_TABLE_SEGMENTATION:
|
|
247
258
|
d_item = _build_detector(cfg, "ITEM")
|
|
@@ -302,6 +313,7 @@ def build_analyzer(cfg: AttrDict) -> DoctectionPipe:
|
|
|
302
313
|
)
|
|
303
314
|
pipe_component_list.append(text)
|
|
304
315
|
|
|
316
|
+
if cfg.USE_PDF_MINER or cfg.USE_OCR:
|
|
305
317
|
match = MatchingService(
|
|
306
318
|
parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES,
|
|
307
319
|
child_categories=LayoutType.word,
|
|
@@ -25,7 +25,6 @@ from io import BytesIO
|
|
|
25
25
|
from shutil import which
|
|
26
26
|
from typing import Any, Optional, Union, no_type_check
|
|
27
27
|
|
|
28
|
-
import cv2
|
|
29
28
|
import numpy as np
|
|
30
29
|
from numpy import uint8
|
|
31
30
|
from numpy.typing import NDArray
|
|
@@ -34,6 +33,7 @@ from PyPDF2 import PdfReader
|
|
|
34
33
|
from ..utils.detection_types import ImageType
|
|
35
34
|
from ..utils.develop import deprecated
|
|
36
35
|
from ..utils.pdf_utils import pdf_to_np_array
|
|
36
|
+
from ..utils.viz import viz_handler
|
|
37
37
|
|
|
38
38
|
__all__ = [
|
|
39
39
|
"convert_b64_to_np_array",
|
|
@@ -81,9 +81,8 @@ def convert_b64_to_np_array(image: str) -> ImageType:
|
|
|
81
81
|
:param image: An image as base64 string.
|
|
82
82
|
:return: numpy array.
|
|
83
83
|
"""
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
return np_array.astype(uint8)
|
|
84
|
+
|
|
85
|
+
return viz_handler.convert_b64_to_np(image).astype(uint8)
|
|
87
86
|
|
|
88
87
|
|
|
89
88
|
def convert_np_array_to_b64(np_image: ImageType) -> str:
|
|
@@ -93,9 +92,7 @@ def convert_np_array_to_b64(np_image: ImageType) -> str:
|
|
|
93
92
|
:param np_image: An image as numpy array.
|
|
94
93
|
:return: An image as base64 string.
|
|
95
94
|
"""
|
|
96
|
-
|
|
97
|
-
image = base64.b64encode(np_encode[1]).decode("utf-8") # type: ignore
|
|
98
|
-
return image
|
|
95
|
+
return viz_handler.convert_np_to_b64(np_image)
|
|
99
96
|
|
|
100
97
|
|
|
101
98
|
@no_type_check
|
|
@@ -106,9 +103,7 @@ def convert_np_array_to_b64_b(np_image: ImageType) -> bytes:
|
|
|
106
103
|
:param np_image: An image as numpy array.
|
|
107
104
|
:return: An image as base64 bytes.
|
|
108
105
|
"""
|
|
109
|
-
|
|
110
|
-
b_image = np_encode[1].tobytes()
|
|
111
|
-
return b_image
|
|
106
|
+
return viz_handler.encode(np_image)
|
|
112
107
|
|
|
113
108
|
|
|
114
109
|
@deprecated("Use convert_pdf_bytes_to_np_array_v2", "2022-02-23")
|
|
@@ -626,8 +626,8 @@ class Image:
|
|
|
626
626
|
self.remove_image_from_lower_hierachy()
|
|
627
627
|
export_dict = self.as_dict()
|
|
628
628
|
export_dict["location"] = str(export_dict["location"])
|
|
629
|
-
if
|
|
630
|
-
export_dict["_image"] =
|
|
629
|
+
if not image_to_json:
|
|
630
|
+
export_dict["_image"] = None
|
|
631
631
|
if dry:
|
|
632
632
|
return export_dict
|
|
633
633
|
with open(path_json, "w", encoding="UTF-8") as file:
|
|
@@ -23,7 +23,6 @@ simplify consumption
|
|
|
23
23
|
from copy import copy
|
|
24
24
|
from typing import Any, Dict, List, Mapping, Optional, Sequence, Set, Tuple, Type, Union, no_type_check
|
|
25
25
|
|
|
26
|
-
import cv2
|
|
27
26
|
import numpy as np
|
|
28
27
|
|
|
29
28
|
from ..utils.detection_types import ImageType, JsonDict, Pathlike
|
|
@@ -39,7 +38,7 @@ from ..utils.settings import (
|
|
|
39
38
|
WordType,
|
|
40
39
|
get_type,
|
|
41
40
|
)
|
|
42
|
-
from ..utils.viz import draw_boxes, interactive_imshow
|
|
41
|
+
from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
|
|
43
42
|
from .annotation import ContainerAnnotation, ImageAnnotation, SummaryAnnotation, ann_from_dict
|
|
44
43
|
from .box import BoundingBox
|
|
45
44
|
from .image import Image
|
|
@@ -415,6 +414,17 @@ class Page(Image):
|
|
|
415
414
|
text_container: ObjectTypes
|
|
416
415
|
floating_text_block_categories: List[ObjectTypes]
|
|
417
416
|
image_orig: Image
|
|
417
|
+
_attribute_names: Set[str] = {
|
|
418
|
+
"text",
|
|
419
|
+
"chunks",
|
|
420
|
+
"tables",
|
|
421
|
+
"layouts",
|
|
422
|
+
"words",
|
|
423
|
+
"file_name",
|
|
424
|
+
"location",
|
|
425
|
+
"document_id",
|
|
426
|
+
"page_number",
|
|
427
|
+
}
|
|
418
428
|
|
|
419
429
|
@no_type_check
|
|
420
430
|
def get_annotation(
|
|
@@ -734,7 +744,9 @@ class Page(Image):
|
|
|
734
744
|
)
|
|
735
745
|
else:
|
|
736
746
|
img = draw_boxes(self.image, boxes, category_names_list)
|
|
737
|
-
|
|
747
|
+
scale_fx, scale_fy = 1.3, 1.3
|
|
748
|
+
scaled_width, scaled_height = int(self.width * scale_fx), int(self.height * scale_fy)
|
|
749
|
+
img = viz_handler.resize(img, scaled_width, scaled_height, "VIZ")
|
|
738
750
|
else:
|
|
739
751
|
img = self.image
|
|
740
752
|
|
|
@@ -744,24 +756,32 @@ class Page(Image):
|
|
|
744
756
|
return img
|
|
745
757
|
return None
|
|
746
758
|
|
|
747
|
-
@
|
|
748
|
-
def get_attribute_names() -> Set[str]:
|
|
759
|
+
@classmethod
|
|
760
|
+
def get_attribute_names(cls) -> Set[str]:
|
|
749
761
|
"""
|
|
750
762
|
:return: A set of registered attributes.
|
|
751
763
|
"""
|
|
752
|
-
return set(PageType).union(
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
"
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
764
|
+
return set(PageType).union(cls._attribute_names)
|
|
765
|
+
|
|
766
|
+
@classmethod
|
|
767
|
+
def add_attribute_name(cls, attribute_name: Union[str, ObjectTypes]) -> None:
|
|
768
|
+
"""
|
|
769
|
+
Adding a custom attribute name to a Page class.
|
|
770
|
+
|
|
771
|
+
**Example:**
|
|
772
|
+
|
|
773
|
+
Page.add_attribute_name("foo")
|
|
774
|
+
|
|
775
|
+
page = Page.from_image(...)
|
|
776
|
+
print(page.foo)
|
|
777
|
+
|
|
778
|
+
Note, that the attribute must be registered as a valid `ObjectTypes`
|
|
779
|
+
|
|
780
|
+
:param attribute_name: attribute name to add
|
|
781
|
+
"""
|
|
782
|
+
|
|
783
|
+
attribute_name = get_type(attribute_name)
|
|
784
|
+
cls._attribute_names.add(attribute_name.value)
|
|
765
785
|
|
|
766
786
|
def save(
|
|
767
787
|
self,
|
|
@@ -23,13 +23,12 @@ import json
|
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
from typing import Optional
|
|
25
25
|
|
|
26
|
-
from cv2 import imwrite
|
|
27
|
-
|
|
28
26
|
from ..dataflow import DataFlow, MapData, SerializerJsonlines
|
|
29
27
|
from ..datapoint.convert import convert_b64_to_np_array
|
|
30
28
|
from ..datapoint.image import Image
|
|
31
29
|
from ..utils.detection_types import JsonDict, Pathlike
|
|
32
30
|
from ..utils.fs import mkdir_p
|
|
31
|
+
from ..utils.viz import viz_handler
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
def dataflow_to_json(
|
|
@@ -84,7 +83,8 @@ def dataflow_to_json(
|
|
|
84
83
|
target_file_png = path / "image" / (dp["file_name"].split(".")[0] + ".png")
|
|
85
84
|
image = dp.pop("_image")
|
|
86
85
|
image = convert_b64_to_np_array(image)
|
|
87
|
-
|
|
86
|
+
|
|
87
|
+
viz_handler.write_image(str(target_file_png), image)
|
|
88
88
|
|
|
89
89
|
with open(target_file, "w", encoding="UTF-8") as file:
|
|
90
90
|
json.dump(dp, file)
|
|
@@ -23,7 +23,6 @@ from copy import copy
|
|
|
23
23
|
from pathlib import Path
|
|
24
24
|
from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence
|
|
25
25
|
|
|
26
|
-
import cv2
|
|
27
26
|
import numpy as np
|
|
28
27
|
|
|
29
28
|
from ..utils.detection_types import ImageType, Requirement
|
|
@@ -130,7 +129,7 @@ def d2_jit_predict_image(
|
|
|
130
129
|
keep = batched_nms(boxes, scores, class_masks, nms_thresh_class_agnostic).cpu()
|
|
131
130
|
|
|
132
131
|
# The exported model does not contain the final resize step, so we need to add it manually here
|
|
133
|
-
inverse_resizer = ResizeTransform(new_height, new_width, height, width,
|
|
132
|
+
inverse_resizer = ResizeTransform(new_height, new_width, height, width, "VIZ")
|
|
134
133
|
np_boxes = np.reshape(boxes.cpu().numpy(), (-1, 2))
|
|
135
134
|
np_boxes = inverse_resizer.apply_coords(np_boxes)
|
|
136
135
|
np_boxes = np.reshape(np_boxes, (-1, 4))
|
|
@@ -62,14 +62,14 @@ def _set_device_str(device: Optional[str] = None) -> str:
|
|
|
62
62
|
return device
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def _load_model(path_weights: str, doctr_predictor: Any, device: str) -> None:
|
|
66
|
-
if pytorch_available():
|
|
65
|
+
def _load_model(path_weights: str, doctr_predictor: Any, device: str, lib: str) -> None:
|
|
66
|
+
if lib == "PT" and pytorch_available():
|
|
67
67
|
state_dict = torch.load(path_weights, map_location=device)
|
|
68
68
|
for key in list(state_dict.keys()):
|
|
69
69
|
state_dict["model." + key] = state_dict.pop(key)
|
|
70
70
|
doctr_predictor.load_state_dict(state_dict)
|
|
71
71
|
doctr_predictor.to(device)
|
|
72
|
-
elif tf_available():
|
|
72
|
+
elif lib == "TF" and tf_available():
|
|
73
73
|
# Unzip the archive
|
|
74
74
|
params_path = Path(path_weights).parent
|
|
75
75
|
is_zip_path = path_weights.endswith(".zip")
|
|
@@ -99,7 +99,7 @@ def doctr_predict_text_lines(np_img: ImageType, predictor: "DetectionPredictor",
|
|
|
99
99
|
DetectionResult(
|
|
100
100
|
box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.word
|
|
101
101
|
)
|
|
102
|
-
for box in raw_output[0]
|
|
102
|
+
for box in raw_output[0]["words"]
|
|
103
103
|
]
|
|
104
104
|
return detection_results
|
|
105
105
|
|
|
@@ -173,7 +173,9 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
173
173
|
path_weights: str,
|
|
174
174
|
categories: Mapping[str, TypeOrStr],
|
|
175
175
|
device: Optional[Literal["cpu", "cuda"]] = None,
|
|
176
|
+
lib: str = "TF",
|
|
176
177
|
) -> None:
|
|
178
|
+
self.lib = lib
|
|
177
179
|
self.name = "doctr_text_detector"
|
|
178
180
|
self.architecture = architecture
|
|
179
181
|
self.path_weights = path_weights
|
|
@@ -205,14 +207,14 @@ class DoctrTextlineDetector(ObjectDetector):
|
|
|
205
207
|
raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextlineDetector")
|
|
206
208
|
|
|
207
209
|
def clone(self) -> PredictorBase:
|
|
208
|
-
return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input)
|
|
210
|
+
return self.__class__(self.architecture, self.path_weights, self.categories, self.device_input, self.lib)
|
|
209
211
|
|
|
210
212
|
def possible_categories(self) -> List[ObjectTypes]:
|
|
211
213
|
return [LayoutType.word]
|
|
212
214
|
|
|
213
215
|
def load_model(self) -> None:
|
|
214
216
|
"""Loading model weights"""
|
|
215
|
-
_load_model(self.path_weights, self.doctr_predictor, self.device)
|
|
217
|
+
_load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
|
|
216
218
|
|
|
217
219
|
|
|
218
220
|
class DoctrTextRecognizer(TextRecognizer):
|
|
@@ -252,7 +254,10 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
252
254
|
|
|
253
255
|
"""
|
|
254
256
|
|
|
255
|
-
def __init__(
|
|
257
|
+
def __init__(
|
|
258
|
+
self, architecture: str, path_weights: str, device: Optional[Literal["cpu", "cuda"]] = None, lib: str = "TF"
|
|
259
|
+
) -> None:
|
|
260
|
+
self.lib = lib
|
|
256
261
|
self.name = "doctr_text_recognizer"
|
|
257
262
|
self.architecture = architecture
|
|
258
263
|
self.path_weights = path_weights
|
|
@@ -281,8 +286,8 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
281
286
|
raise ModuleNotFoundError("Neither Tensorflow nor PyTorch has been installed. Cannot use DoctrTextRecognizer")
|
|
282
287
|
|
|
283
288
|
def clone(self) -> PredictorBase:
|
|
284
|
-
return self.__class__(self.architecture, self.path_weights, self.device_input)
|
|
289
|
+
return self.__class__(self.architecture, self.path_weights, self.device_input, self.lib)
|
|
285
290
|
|
|
286
291
|
def load_model(self) -> None:
|
|
287
292
|
"""Loading model weights"""
|
|
288
|
-
_load_model(self.path_weights, self.doctr_predictor, self.device)
|
|
293
|
+
_load_model(self.path_weights, self.doctr_predictor, self.device, self.lib)
|
|
@@ -10,7 +10,6 @@ This file is modified from
|
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
import cv2
|
|
14
13
|
import numpy as np
|
|
15
14
|
from tensorpack.dataflow.imgaug import ImageAugmentor, ResizeTransform # pylint: disable=E0401
|
|
16
15
|
|
|
@@ -25,11 +24,11 @@ class CustomResize(ImageAugmentor):
|
|
|
25
24
|
Try resizing the shortest edge to a certain number while avoiding the longest edge to exceed max_size.
|
|
26
25
|
"""
|
|
27
26
|
|
|
28
|
-
def __init__(self, short_edge_length, max_size, interp=
|
|
27
|
+
def __init__(self, short_edge_length, max_size, interp=1):
|
|
29
28
|
"""
|
|
30
29
|
:param short_edge_length: a [min, max] interval from which to sample the shortest edge length.
|
|
31
30
|
:param max_size: maximum allowed longest edge length.
|
|
32
|
-
:param interp:
|
|
31
|
+
:param interp: Interpolation mode. We use Tensorpack's internal `ResizeTransform`, that always requires OpenCV
|
|
33
32
|
"""
|
|
34
33
|
super().__init__()
|
|
35
34
|
if isinstance(short_edge_length, int):
|