deepdoctection 0.39.2__tar.gz → 0.39.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/PKG-INFO +3 -3
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/__init__.py +1 -1
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datapoint/image.py +18 -6
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/registry.py +5 -1
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/doctrocr.py +8 -7
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/cats.py +4 -3
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/segment.py +8 -6
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection.egg-info/PKG-INFO +3 -3
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection.egg-info/requires.txt +2 -2
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/setup.py +1 -1
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/LICENSE +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/README.md +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/analyzer/_config.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/analyzer/factory.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datapoint/convert.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datapoint/view.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/common.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/sub_layout.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/train/hf_detr_train.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/context.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/env_info.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/file_utils.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/pdf_utils.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/settings.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/setup.cfg +0 -0
- {deepdoctection-0.39.2 → deepdoctection-0.39.4}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.39.
|
|
3
|
+
Version: 0.39.4
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -57,7 +57,7 @@ Requires-Dist: tensorpack==0.11; extra == "tf"
|
|
|
57
57
|
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
58
58
|
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
59
59
|
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
60
|
-
Requires-Dist: python-doctr==0.
|
|
60
|
+
Requires-Dist: python-doctr==0.9.0; extra == "tf"
|
|
61
61
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
62
62
|
Requires-Dist: boto3==1.34.102; extra == "tf"
|
|
63
63
|
Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
|
|
@@ -88,7 +88,7 @@ Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
|
88
88
|
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
89
89
|
Requires-Dist: transformers>=4.48.0; extra == "pt"
|
|
90
90
|
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
91
|
-
Requires-Dist: python-doctr==0.
|
|
91
|
+
Requires-Dist: python-doctr==0.9.0; extra == "pt"
|
|
92
92
|
Requires-Dist: boto3==1.34.102; extra == "pt"
|
|
93
93
|
Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
|
|
94
94
|
Requires-Dist: fasttext-wheel; extra == "pt"
|
|
@@ -702,11 +702,11 @@ class Image:
|
|
|
702
702
|
return get_uuid(self.image_id, *container_ids)
|
|
703
703
|
|
|
704
704
|
def save(
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
705
|
+
self,
|
|
706
|
+
image_to_json: bool = True,
|
|
707
|
+
highest_hierarchy_only: bool = False,
|
|
708
|
+
path: Optional[PathLikeOrStr] = None,
|
|
709
|
+
dry: bool = False,
|
|
710
710
|
) -> Optional[Union[ImageDict, str]]:
|
|
711
711
|
"""
|
|
712
712
|
Export image as dictionary. As numpy array cannot be serialized `image` values will be converted into
|
|
@@ -719,6 +719,18 @@ class Image:
|
|
|
719
719
|
|
|
720
720
|
:return: optional dict
|
|
721
721
|
"""
|
|
722
|
+
|
|
723
|
+
def set_image_keys_to_none(d): # type: ignore
|
|
724
|
+
if isinstance(d, dict):
|
|
725
|
+
for key, value in d.items():
|
|
726
|
+
if key == '_image':
|
|
727
|
+
d[key] = None
|
|
728
|
+
else:
|
|
729
|
+
set_image_keys_to_none(value)
|
|
730
|
+
elif isinstance(d, list):
|
|
731
|
+
for item in d:
|
|
732
|
+
set_image_keys_to_none(item)
|
|
733
|
+
|
|
722
734
|
if path is None:
|
|
723
735
|
path = Path(self.location)
|
|
724
736
|
path = Path(path)
|
|
@@ -734,7 +746,7 @@ class Image:
|
|
|
734
746
|
export_dict = self.as_dict()
|
|
735
747
|
export_dict["location"] = fspath(export_dict["location"])
|
|
736
748
|
if not image_to_json:
|
|
737
|
-
export_dict
|
|
749
|
+
set_image_keys_to_none(export_dict)
|
|
738
750
|
if dry:
|
|
739
751
|
return export_dict
|
|
740
752
|
with open(path_json, "w", encoding="UTF-8") as file:
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Module for DatasetRegistry
|
|
20
20
|
"""
|
|
21
|
+
import inspect
|
|
21
22
|
|
|
22
23
|
import catalogue # type: ignore
|
|
23
24
|
from tabulate import tabulate
|
|
@@ -47,7 +48,10 @@ def get_dataset(name: str) -> DatasetBase:
|
|
|
47
48
|
:param name: A dataset name
|
|
48
49
|
:return: An instance of a dataset
|
|
49
50
|
"""
|
|
50
|
-
|
|
51
|
+
ds = dataset_registry.get(name)
|
|
52
|
+
if inspect.isclass(ds):
|
|
53
|
+
return ds()
|
|
54
|
+
return ds
|
|
51
55
|
|
|
52
56
|
|
|
53
57
|
def print_dataset_infos(add_license: bool = True, add_info: bool = True) -> None:
|
|
@@ -79,8 +79,8 @@ def _load_model(
|
|
|
79
79
|
state_dict = torch.load(os.fspath(path_weights), map_location=device)
|
|
80
80
|
for key in list(state_dict.keys()):
|
|
81
81
|
state_dict["model." + key] = state_dict.pop(key)
|
|
82
|
-
doctr_predictor.load_state_dict(state_dict)
|
|
83
|
-
doctr_predictor.to(device)
|
|
82
|
+
doctr_predictor.load_state_dict(state_dict) # type: ignore
|
|
83
|
+
doctr_predictor.to(device) # type: ignore
|
|
84
84
|
elif lib == "TF":
|
|
85
85
|
# Unzip the archive
|
|
86
86
|
params_path = Path(path_weights).parent
|
|
@@ -88,9 +88,9 @@ def _load_model(
|
|
|
88
88
|
if is_zip_path:
|
|
89
89
|
with ZipFile(path_weights, "r") as file:
|
|
90
90
|
file.extractall(path=params_path)
|
|
91
|
-
doctr_predictor.model.load_weights(params_path / "weights")
|
|
91
|
+
doctr_predictor.model.load_weights(params_path / "weights") # type: ignore
|
|
92
92
|
else:
|
|
93
|
-
doctr_predictor.model.load_weights(os.fspath(path_weights))
|
|
93
|
+
doctr_predictor.model.load_weights(os.fspath(path_weights)) # type: ignore
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
def auto_select_lib_for_doctr() -> Literal["PT", "TF"]:
|
|
@@ -125,7 +125,7 @@ def doctr_predict_text_lines(
|
|
|
125
125
|
DetectionResult(
|
|
126
126
|
box=box[:4].tolist(), class_id=1, score=box[4], absolute_coords=False, class_name=LayoutType.WORD
|
|
127
127
|
)
|
|
128
|
-
for box in raw_output[0]["words"]
|
|
128
|
+
for box in raw_output[0]["words"] # type: ignore
|
|
129
129
|
]
|
|
130
130
|
return detection_results
|
|
131
131
|
|
|
@@ -480,7 +480,7 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
480
480
|
return auto_select_lib_for_doctr()
|
|
481
481
|
|
|
482
482
|
def clear_model(self) -> None:
|
|
483
|
-
self.doctr_predictor = None
|
|
483
|
+
self.doctr_predictor = None # type: ignore
|
|
484
484
|
|
|
485
485
|
|
|
486
486
|
class DocTrRotationTransformer(ImageTransformer):
|
|
@@ -527,7 +527,8 @@ class DocTrRotationTransformer(ImageTransformer):
|
|
|
527
527
|
return viz_handler.rotate_image(np_img, specification.angle) # type: ignore
|
|
528
528
|
|
|
529
529
|
def predict(self, np_img: PixelValues) -> DetectionResult:
|
|
530
|
-
angle = estimate_orientation(np_img, self.number_contours,
|
|
530
|
+
angle = estimate_orientation(np_img, n_ct=self.number_contours,
|
|
531
|
+
ratio_threshold_for_lines=self.ratio_threshold_for_lines)
|
|
531
532
|
if angle < 0:
|
|
532
533
|
angle += 360
|
|
533
534
|
return DetectionResult(angle=round(angle, 2))
|
|
@@ -32,7 +32,7 @@ from .maputils import LabelSummarizer, curry
|
|
|
32
32
|
@curry
|
|
33
33
|
def cat_to_sub_cat(
|
|
34
34
|
dp: Image,
|
|
35
|
-
categories_dict_names_as_key: dict[TypeOrStr, int],
|
|
35
|
+
categories_dict_names_as_key: Optional[dict[TypeOrStr, int]] = None,
|
|
36
36
|
cat_to_sub_cat_dict: Optional[dict[TypeOrStr, TypeOrStr]] = None,
|
|
37
37
|
) -> Image:
|
|
38
38
|
"""
|
|
@@ -45,7 +45,8 @@ def cat_to_sub_cat(
|
|
|
45
45
|
:param cat_to_sub_cat_dict: e.g. {'foo': 'sub_cat_1', 'bak': 'sub_cat_2'}
|
|
46
46
|
:return: Image with updated Annotations
|
|
47
47
|
"""
|
|
48
|
-
|
|
48
|
+
if categories_dict_names_as_key is None:
|
|
49
|
+
categories_dict_names_as_key = {}
|
|
49
50
|
if cat_to_sub_cat_dict is None:
|
|
50
51
|
return dp
|
|
51
52
|
cat_to_sub_cat_dict_obj_type = {get_type(key): get_type(value) for key, value in cat_to_sub_cat_dict.items()}
|
|
@@ -54,7 +55,7 @@ def cat_to_sub_cat(
|
|
|
54
55
|
sub_cat = ann.get_sub_category(sub_cat_type)
|
|
55
56
|
if sub_cat:
|
|
56
57
|
ann.category_name = sub_cat.category_name
|
|
57
|
-
ann.category_id = categories_dict_names_as_key
|
|
58
|
+
ann.category_id = categories_dict_names_as_key.get(ann.category_name,DEFAULT_CATEGORY_ID)
|
|
58
59
|
|
|
59
60
|
return dp
|
|
60
61
|
|
|
@@ -1190,14 +1190,16 @@ class PubtablesSegmentationService(PipelineComponent):
|
|
|
1190
1190
|
for key, value in cell_rn_cn_to_ann_id.items():
|
|
1191
1191
|
if key[idx] == item_number:
|
|
1192
1192
|
cell_ann = dp.get_annotation(annotation_ids=value)[0]
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1193
|
+
if item_header_cell_name not in cell_ann.sub_categories:
|
|
1194
|
+
self.dp_manager.set_category_annotation(
|
|
1195
|
+
item_header_cell_name, None, item_header_cell_name, cell_ann.annotation_id
|
|
1196
|
+
)
|
|
1196
1197
|
else:
|
|
1197
1198
|
cell_ann = dp.get_annotation(annotation_ids=value)[0]
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1199
|
+
if CellType.BODY not in cell_ann.sub_categories:
|
|
1200
|
+
self.dp_manager.set_category_annotation(
|
|
1201
|
+
item_header_cell_name, None, CellType.BODY, cell_ann.annotation_id
|
|
1202
|
+
)
|
|
1201
1203
|
|
|
1202
1204
|
# TODO: the summaries should be sub categories of the underlying ann
|
|
1203
1205
|
self.dp_manager.set_summary_annotation(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.39.
|
|
3
|
+
Version: 0.39.4
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -57,7 +57,7 @@ Requires-Dist: tensorpack==0.11; extra == "tf"
|
|
|
57
57
|
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
58
58
|
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
59
59
|
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
60
|
-
Requires-Dist: python-doctr==0.
|
|
60
|
+
Requires-Dist: python-doctr==0.9.0; extra == "tf"
|
|
61
61
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
62
62
|
Requires-Dist: boto3==1.34.102; extra == "tf"
|
|
63
63
|
Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
|
|
@@ -88,7 +88,7 @@ Requires-Dist: tqdm==4.64.0; extra == "pt"
|
|
|
88
88
|
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
89
89
|
Requires-Dist: transformers>=4.48.0; extra == "pt"
|
|
90
90
|
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
91
|
-
Requires-Dist: python-doctr==0.
|
|
91
|
+
Requires-Dist: python-doctr==0.9.0; extra == "pt"
|
|
92
92
|
Requires-Dist: boto3==1.34.102; extra == "pt"
|
|
93
93
|
Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
|
|
94
94
|
Requires-Dist: fasttext-wheel; extra == "pt"
|
|
@@ -69,7 +69,7 @@ tqdm==4.64.0
|
|
|
69
69
|
timm>=0.9.16
|
|
70
70
|
transformers>=4.48.0
|
|
71
71
|
accelerate>=0.29.1
|
|
72
|
-
python-doctr==0.
|
|
72
|
+
python-doctr==0.9.0
|
|
73
73
|
boto3==1.34.102
|
|
74
74
|
pdfplumber>=0.11.0
|
|
75
75
|
fasttext-wheel
|
|
@@ -105,7 +105,7 @@ tensorpack==0.11
|
|
|
105
105
|
protobuf==3.20.1
|
|
106
106
|
tensorflow-addons>=0.17.1
|
|
107
107
|
tf2onnx>=1.9.2
|
|
108
|
-
python-doctr==0.
|
|
108
|
+
python-doctr==0.9.0
|
|
109
109
|
pycocotools>=2.0.2
|
|
110
110
|
boto3==1.34.102
|
|
111
111
|
pdfplumber>=0.11.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/doclaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/fintabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/iiitar13k.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/layouttest.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/publaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/pubtables1m.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/pubtabnet.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/rvlcdip.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/xsl/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py
RENAMED
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.39.2 → deepdoctection-0.39.4}/deepdoctection.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|