deepdoctection 0.37.3__tar.gz → 0.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.37.3 → deepdoctection-0.39}/PKG-INFO +15 -5
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/__init__.py +5 -1
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/analyzer/_config.py +2 -1
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/analyzer/dd.py +6 -5
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/analyzer/factory.py +16 -6
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/configs/conf_dd_one.yaml +126 -85
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datapoint/box.py +2 -4
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datapoint/convert.py +14 -8
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datapoint/image.py +12 -5
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datapoint/view.py +151 -53
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/hfdetr.py +4 -3
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/model.py +6 -97
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/cats.py +21 -10
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/match.py +0 -22
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/misc.py +12 -2
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/pubstruct.py +1 -1
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/doctectionpipe.py +20 -3
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/lm.py +20 -5
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/refine.py +6 -13
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/segment.py +225 -46
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/sub_layout.py +40 -22
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/train/hf_layoutlm_train.py +3 -1
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/pdf_utils.py +17 -9
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection.egg-info/PKG-INFO +15 -5
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection.egg-info/requires.txt +3 -3
- {deepdoctection-0.37.3 → deepdoctection-0.39}/setup.cfg +1 -1
- {deepdoctection-0.37.3 → deepdoctection-0.39}/setup.py +3 -2
- {deepdoctection-0.37.3 → deepdoctection-0.39}/LICENSE +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/README.md +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/doctrocr.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/common.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/train/hf_detr_train.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/context.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/env_info.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/file_utils.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/settings.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.37.3 → deepdoctection-0.39}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.39
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: catalogue==2.0.10
|
|
20
|
-
Requires-Dist: huggingface_hub
|
|
20
|
+
Requires-Dist: huggingface_hub>=0.26.0
|
|
21
21
|
Requires-Dist: importlib-metadata>=5.0.0
|
|
22
22
|
Requires-Dist: jsonlines==3.1.0
|
|
23
23
|
Requires-Dist: lazy-imports==0.3.1
|
|
@@ -36,7 +36,7 @@ Requires-Dist: tabulate>=0.7.7
|
|
|
36
36
|
Requires-Dist: tqdm==4.64.0
|
|
37
37
|
Provides-Extra: tf
|
|
38
38
|
Requires-Dist: catalogue==2.0.10; extra == "tf"
|
|
39
|
-
Requires-Dist: huggingface_hub
|
|
39
|
+
Requires-Dist: huggingface_hub>=0.26.0; extra == "tf"
|
|
40
40
|
Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
|
|
41
41
|
Requires-Dist: jsonlines==3.1.0; extra == "tf"
|
|
42
42
|
Requires-Dist: lazy-imports==0.3.1; extra == "tf"
|
|
@@ -68,7 +68,7 @@ Requires-Dist: distance==0.1.3; extra == "tf"
|
|
|
68
68
|
Requires-Dist: lxml>=4.9.1; extra == "tf"
|
|
69
69
|
Provides-Extra: pt
|
|
70
70
|
Requires-Dist: catalogue==2.0.10; extra == "pt"
|
|
71
|
-
Requires-Dist: huggingface_hub
|
|
71
|
+
Requires-Dist: huggingface_hub>=0.26.0; extra == "pt"
|
|
72
72
|
Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
|
|
73
73
|
Requires-Dist: jsonlines==3.1.0; extra == "pt"
|
|
74
74
|
Requires-Dist: lazy-imports==0.3.1; extra == "pt"
|
|
@@ -127,6 +127,16 @@ Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
|
|
|
127
127
|
Provides-Extra: test
|
|
128
128
|
Requires-Dist: pytest==8.0.2; extra == "test"
|
|
129
129
|
Requires-Dist: pytest-cov; extra == "test"
|
|
130
|
+
Dynamic: author
|
|
131
|
+
Dynamic: classifier
|
|
132
|
+
Dynamic: description
|
|
133
|
+
Dynamic: description-content-type
|
|
134
|
+
Dynamic: home-page
|
|
135
|
+
Dynamic: license
|
|
136
|
+
Dynamic: provides-extra
|
|
137
|
+
Dynamic: requires-dist
|
|
138
|
+
Dynamic: requires-python
|
|
139
|
+
Dynamic: summary
|
|
130
140
|
|
|
131
141
|
|
|
132
142
|
<p align="center">
|
|
@@ -6,6 +6,7 @@ Init file for deepdoctection package. This file is used to import all submodules
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import importlib.util
|
|
9
|
+
import os
|
|
9
10
|
|
|
10
11
|
# Before doing anything else, check if the .env file exists and load it
|
|
11
12
|
if importlib.util.find_spec("dotenv") is not None:
|
|
@@ -24,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
|
|
|
24
25
|
|
|
25
26
|
# pylint: enable=wrong-import-position
|
|
26
27
|
|
|
27
|
-
__version__ = "0.
|
|
28
|
+
__version__ = "0.39"
|
|
28
29
|
|
|
29
30
|
_IMPORT_STRUCTURE = {
|
|
30
31
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
|
|
@@ -423,6 +424,9 @@ _IMPORT_STRUCTURE = {
|
|
|
423
424
|
env_info = collect_env_info()
|
|
424
425
|
logger.debug(LoggingRecord(msg=env_info))
|
|
425
426
|
auto_select_pdf_render_framework()
|
|
427
|
+
os.environ["DPI"] = "300"
|
|
428
|
+
os.environ["IMAGE_WIDTH"] = ""
|
|
429
|
+
os.environ["IMAGE_HEIGHT"] = ""
|
|
426
430
|
|
|
427
431
|
# Direct imports for type-checking
|
|
428
432
|
if TYPE_CHECKING:
|
|
@@ -91,7 +91,8 @@ cfg.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUM
|
|
|
91
91
|
cfg.SEGMENTATION.CELL_NAMES = [CellType.HEADER, CellType.BODY, LayoutType.CELL]
|
|
92
92
|
cfg.SEGMENTATION.ITEM_NAMES = [LayoutType.ROW, LayoutType.COLUMN]
|
|
93
93
|
cfg.SEGMENTATION.SUB_ITEM_NAMES = [CellType.ROW_NUMBER, CellType.COLUMN_NUMBER]
|
|
94
|
-
|
|
94
|
+
cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES = [CellType.COLUMN_HEADER, CellType.ROW_HEADER]
|
|
95
|
+
cfg.SEGMENTATION.PUBTABLES_ITEM_HEADER_THRESHOLDS = [0.6, 0.0001]
|
|
95
96
|
cfg.SEGMENTATION.STRETCH_RULE = "equal"
|
|
96
97
|
|
|
97
98
|
cfg.USE_TABLE_REFINEMENT = True
|
|
@@ -32,7 +32,6 @@ from ..extern.pt.ptutils import get_torch_device
|
|
|
32
32
|
from ..extern.tp.tfutils import disable_tp_layer_logging, get_tf_device
|
|
33
33
|
from ..pipe.doctectionpipe import DoctectionPipe
|
|
34
34
|
from ..utils.env_info import ENV_VARS_TRUE
|
|
35
|
-
from ..utils.error import DependencyError
|
|
36
35
|
from ..utils.file_utils import tensorpack_available
|
|
37
36
|
from ..utils.fs import get_configs_dir_path, get_package_path, maybe_copy_config_to_cache
|
|
38
37
|
from ..utils.logger import LoggingRecord, logger
|
|
@@ -118,13 +117,15 @@ def get_dd_analyzer(
|
|
|
118
117
|
:return: A DoctectionPipe instance with given configs
|
|
119
118
|
"""
|
|
120
119
|
config_overwrite = [] if config_overwrite is None else config_overwrite
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
if os.environ.get("DD_USE_TF", "0") in ENV_VARS_TRUE:
|
|
121
|
+
lib = "TF"
|
|
123
122
|
device = get_tf_device()
|
|
124
|
-
elif
|
|
123
|
+
elif os.environ.get("DD_USE_TORCH", "0") in ENV_VARS_TRUE:
|
|
124
|
+
lib = "PT"
|
|
125
125
|
device = get_torch_device()
|
|
126
126
|
else:
|
|
127
|
-
|
|
127
|
+
lib = None
|
|
128
|
+
device = None
|
|
128
129
|
dd_one_config_path = maybe_copy_config_to_cache(
|
|
129
130
|
get_package_path(), get_configs_dir_path() / "dd", _DD_ONE, reset_config_file
|
|
130
131
|
)
|
|
@@ -48,10 +48,11 @@ from ..pipe.segment import PubtablesSegmentationService, TableSegmentationServic
|
|
|
48
48
|
from ..pipe.sub_layout import DetectResultGenerator, SubImageLayoutService
|
|
49
49
|
from ..pipe.text import TextExtractionService
|
|
50
50
|
from ..pipe.transform import SimpleTransformService
|
|
51
|
+
from ..utils.error import DependencyError
|
|
51
52
|
from ..utils.file_utils import detectron2_available
|
|
52
53
|
from ..utils.fs import get_configs_dir_path
|
|
53
54
|
from ..utils.metacfg import AttrDict
|
|
54
|
-
from ..utils.settings import LayoutType, Relationships
|
|
55
|
+
from ..utils.settings import CellType, LayoutType, Relationships
|
|
55
56
|
from ..utils.transform import PadTransform
|
|
56
57
|
|
|
57
58
|
with try_import() as image_guard:
|
|
@@ -62,8 +63,6 @@ __all__ = [
|
|
|
62
63
|
"ServiceFactory",
|
|
63
64
|
]
|
|
64
65
|
|
|
65
|
-
# from ._config import cfg
|
|
66
|
-
|
|
67
66
|
|
|
68
67
|
class ServiceFactory:
|
|
69
68
|
"""
|
|
@@ -94,6 +93,8 @@ class ServiceFactory:
|
|
|
94
93
|
:param config: configuration object
|
|
95
94
|
:param mode: either `LAYOUT`,`CELL` or `ITEM`
|
|
96
95
|
"""
|
|
96
|
+
if config.LIB is None:
|
|
97
|
+
raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
|
|
97
98
|
weights = (
|
|
98
99
|
getattr(config.TF, mode).WEIGHTS
|
|
99
100
|
if config.LIB == "TF"
|
|
@@ -264,14 +265,17 @@ class ServiceFactory:
|
|
|
264
265
|
:param mode: either `LAYOUT`,`CELL` or `ITEM`
|
|
265
266
|
:return: `SubImageLayoutService` instance
|
|
266
267
|
"""
|
|
267
|
-
|
|
268
|
+
exclude_category_names = []
|
|
268
269
|
padder = None
|
|
269
270
|
if mode == "ITEM":
|
|
270
271
|
if detector.__class__.__name__ in ("HFDetrDerivedDetector",):
|
|
271
|
-
|
|
272
|
+
exclude_category_names.extend(
|
|
273
|
+
[LayoutType.TABLE, CellType.COLUMN_HEADER, CellType.PROJECTED_ROW_HEADER, CellType.SPANNING]
|
|
274
|
+
)
|
|
272
275
|
padder = ServiceFactory.build_padder(config, mode)
|
|
273
276
|
detect_result_generator = DetectResultGenerator(
|
|
274
|
-
|
|
277
|
+
categories_name_as_key=detector.categories.get_categories(as_dict=True, name_as_key=True),
|
|
278
|
+
exclude_category_names=exclude_category_names,
|
|
275
279
|
)
|
|
276
280
|
return SubImageLayoutService(
|
|
277
281
|
sub_image_detector=detector,
|
|
@@ -307,6 +311,8 @@ class ServiceFactory:
|
|
|
307
311
|
config_overwrite=[f"LANGUAGES={config.LANGUAGE}"] if config.LANGUAGE is not None else None,
|
|
308
312
|
)
|
|
309
313
|
if config.OCR.USE_DOCTR:
|
|
314
|
+
if config.LIB is None:
|
|
315
|
+
raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
|
|
310
316
|
weights = (
|
|
311
317
|
config.OCR.WEIGHTS.DOCTR_RECOGNITION.TF
|
|
312
318
|
if config.LIB == "TF"
|
|
@@ -350,6 +356,8 @@ class ServiceFactory:
|
|
|
350
356
|
:param config: configuration object
|
|
351
357
|
:return: DoctrTextlineDetector
|
|
352
358
|
"""
|
|
359
|
+
if config.LIB is None:
|
|
360
|
+
raise DependencyError("At least one of the env variables DD_USE_TF or DD_USE_TORCH must be set.")
|
|
353
361
|
weights = config.OCR.WEIGHTS.DOCTR_WORD.TF if config.LIB == "TF" else config.OCR.WEIGHTS.DOCTR_WORD.PT
|
|
354
362
|
weights_path = ModelDownloadManager.maybe_download_weights_and_configs(weights)
|
|
355
363
|
profile = ModelCatalog.get_profile(weights)
|
|
@@ -399,6 +407,8 @@ class ServiceFactory:
|
|
|
399
407
|
spanning_cell_names=config.SEGMENTATION.PUBTABLES_SPANNING_CELL_NAMES,
|
|
400
408
|
item_names=config.SEGMENTATION.PUBTABLES_ITEM_NAMES,
|
|
401
409
|
sub_item_names=config.SEGMENTATION.PUBTABLES_SUB_ITEM_NAMES,
|
|
410
|
+
item_header_cell_names=config.SEGMENTATION.PUBTABLES_ITEM_HEADER_CELL_NAMES,
|
|
411
|
+
item_header_thresholds=config.SEGMENTATION.PUBTABLES_ITEM_HEADER_THRESHOLDS,
|
|
402
412
|
stretch_rule=config.SEGMENTATION.STRETCH_RULE,
|
|
403
413
|
)
|
|
404
414
|
|
|
@@ -1,104 +1,145 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
DEVICE: null
|
|
2
|
+
LANGUAGE: null
|
|
3
|
+
LAYOUT_LINK:
|
|
4
|
+
CHILD_CATEGORIES: []
|
|
5
|
+
PARENTAL_CATEGORIES: []
|
|
6
|
+
LAYOUT_NMS_PAIRS:
|
|
7
|
+
COMBINATIONS: null
|
|
8
|
+
PRIORITY: null
|
|
9
|
+
THRESHOLDS: null
|
|
10
|
+
LIB: null
|
|
11
|
+
OCR:
|
|
12
|
+
CONFIG:
|
|
13
|
+
TESSERACT: dd/conf_tesseract.yaml
|
|
14
|
+
USE_DOCTR: false
|
|
15
|
+
USE_TESSERACT: true
|
|
16
|
+
USE_TEXTRACT: false
|
|
17
|
+
WEIGHTS:
|
|
18
|
+
DOCTR_RECOGNITION:
|
|
19
|
+
PT: doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt
|
|
20
|
+
TF: doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip
|
|
21
|
+
DOCTR_WORD:
|
|
22
|
+
PT: doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt
|
|
23
|
+
TF: doctr/db_resnet50/tf/db_resnet50-adcafc63.zip
|
|
24
|
+
PDF_MINER:
|
|
25
|
+
X_TOLERANCE: 3
|
|
26
|
+
Y_TOLERANCE: 3
|
|
27
|
+
PT:
|
|
8
28
|
CELL:
|
|
9
|
-
|
|
10
|
-
|
|
29
|
+
FILTER: null
|
|
30
|
+
WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
|
|
31
|
+
WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
|
|
11
32
|
ITEM:
|
|
12
|
-
|
|
13
|
-
FILTER:
|
|
14
|
-
PT:
|
|
15
|
-
LAYOUT:
|
|
16
|
-
WEIGHTS: layout/d2_model_0829999_layout_inf_only.pt
|
|
17
|
-
WEIGHTS_TS: layout/d2_model_0829999_layout_inf_only.ts
|
|
18
|
-
FILTER:
|
|
33
|
+
FILTER: null
|
|
19
34
|
PAD:
|
|
20
|
-
TOP: 60
|
|
21
|
-
RIGHT: 60
|
|
22
35
|
BOTTOM: 60
|
|
23
36
|
LEFT: 60
|
|
24
|
-
|
|
37
|
+
RIGHT: 60
|
|
38
|
+
TOP: 60
|
|
25
39
|
WEIGHTS: item/d2_model_1639999_item_inf_only.pt
|
|
26
40
|
WEIGHTS_TS: item/d2_model_1639999_item_inf_only.ts
|
|
27
|
-
|
|
41
|
+
LAYOUT:
|
|
42
|
+
FILTER: null
|
|
28
43
|
PAD:
|
|
29
|
-
TOP: 60
|
|
30
|
-
RIGHT: 60
|
|
31
44
|
BOTTOM: 60
|
|
32
45
|
LEFT: 60
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
LAYOUT_NMS_PAIRS:
|
|
38
|
-
COMBINATIONS:
|
|
39
|
-
THRESHOLDS:
|
|
40
|
-
PRIORITY:
|
|
46
|
+
RIGHT: 60
|
|
47
|
+
TOP: 60
|
|
48
|
+
WEIGHTS: layout/d2_model_0829999_layout_inf_only.pt
|
|
49
|
+
WEIGHTS_TS: layout/d2_model_0829999_layout_inf_only.ts
|
|
41
50
|
SEGMENTATION:
|
|
42
51
|
ASSIGNMENT_RULE: ioa
|
|
43
|
-
THRESHOLD_ROWS: 0.4
|
|
44
|
-
THRESHOLD_COLS: 0.4
|
|
45
|
-
FULL_TABLE_TILING: True
|
|
46
|
-
REMOVE_IOU_THRESHOLD_ROWS: 0.001
|
|
47
|
-
REMOVE_IOU_THRESHOLD_COLS: 0.001
|
|
48
52
|
CELL_CATEGORY_ID: 12
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
53
|
+
CELL_NAMES:
|
|
54
|
+
- header
|
|
55
|
+
- body
|
|
56
|
+
- cell
|
|
57
|
+
FULL_TABLE_TILING: true
|
|
58
|
+
ITEM_NAMES:
|
|
59
|
+
- row
|
|
60
|
+
- column
|
|
61
|
+
PUBTABLES_CELL_NAMES:
|
|
62
|
+
- spanning
|
|
63
|
+
- row_header
|
|
64
|
+
- column_header
|
|
65
|
+
- projected_row_header
|
|
66
|
+
- cell
|
|
67
|
+
PUBTABLES_ITEM_NAMES:
|
|
68
|
+
- row
|
|
69
|
+
- column
|
|
70
|
+
PUBTABLES_SPANNING_CELL_NAMES:
|
|
71
|
+
- spanning
|
|
72
|
+
- row_header
|
|
73
|
+
- column_header
|
|
74
|
+
- projected_row_header
|
|
75
|
+
PUBTABLES_SUB_ITEM_NAMES:
|
|
76
|
+
- row_number
|
|
77
|
+
- column_number
|
|
78
|
+
PUBTABLES_ITEM_HEADER_CELL_NAMES:
|
|
75
79
|
- column_header
|
|
76
|
-
- projected_row_header
|
|
77
|
-
- spanning
|
|
78
80
|
- row_header
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
81
|
+
PUBTABLES_ITEM_HEADER_THRESHOLDS:
|
|
82
|
+
- 0.6
|
|
83
|
+
- 0.0001
|
|
84
|
+
REMOVE_IOU_THRESHOLD_COLS: 0.001
|
|
85
|
+
REMOVE_IOU_THRESHOLD_ROWS: 0.001
|
|
86
|
+
STRETCH_RULE: equal
|
|
87
|
+
SUB_ITEM_NAMES:
|
|
88
|
+
- row_number
|
|
89
|
+
- column_number
|
|
90
|
+
TABLE_NAME: table
|
|
91
|
+
THRESHOLD_COLS: 0.4
|
|
92
|
+
THRESHOLD_ROWS: 0.4
|
|
93
|
+
TEXT_CONTAINER: word
|
|
82
94
|
TEXT_ORDERING:
|
|
83
|
-
TEXT_BLOCK_CATEGORIES:
|
|
84
|
-
- title
|
|
85
|
-
- text
|
|
86
|
-
- list
|
|
87
|
-
- cell
|
|
88
|
-
- column_header
|
|
89
|
-
- projected_row_header
|
|
90
|
-
- spanning
|
|
91
|
-
- row_header
|
|
92
|
-
FLOATING_TEXT_BLOCK_CATEGORIES:
|
|
93
|
-
- title
|
|
94
|
-
- text
|
|
95
|
-
- list
|
|
96
|
-
INCLUDE_RESIDUAL_TEXT_CONTAINER: False
|
|
97
|
-
STARTING_POINT_TOLERANCE: 0.005
|
|
98
95
|
BROKEN_LINE_TOLERANCE: 0.003
|
|
96
|
+
FLOATING_TEXT_BLOCK_CATEGORIES:
|
|
97
|
+
- text
|
|
98
|
+
- title
|
|
99
|
+
- figure
|
|
100
|
+
- list
|
|
99
101
|
HEIGHT_TOLERANCE: 2.0
|
|
102
|
+
INCLUDE_RESIDUAL_TEXT_CONTAINER: false
|
|
100
103
|
PARAGRAPH_BREAK: 0.035
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
STARTING_POINT_TOLERANCE: 0.005
|
|
105
|
+
TEXT_BLOCK_CATEGORIES:
|
|
106
|
+
- text
|
|
107
|
+
- title
|
|
108
|
+
- list
|
|
109
|
+
- cell
|
|
110
|
+
- figure
|
|
111
|
+
- column_header
|
|
112
|
+
- projected_row_header
|
|
113
|
+
- spanning
|
|
114
|
+
- row_header
|
|
115
|
+
TF:
|
|
116
|
+
CELL:
|
|
117
|
+
FILTER: null
|
|
118
|
+
WEIGHTS: cell/model-1800000_inf_only.data-00000-of-00001
|
|
119
|
+
ITEM:
|
|
120
|
+
FILTER: null
|
|
121
|
+
WEIGHTS: item/model-1620000_inf_only.data-00000-of-00001
|
|
122
|
+
LAYOUT:
|
|
123
|
+
FILTER: null
|
|
124
|
+
WEIGHTS: layout/model-800000_inf_only.data-00000-of-00001
|
|
125
|
+
USE_LAYOUT: true
|
|
126
|
+
USE_LAYOUT_LINK: false
|
|
127
|
+
USE_LAYOUT_NMS: false
|
|
128
|
+
USE_OCR: true
|
|
129
|
+
USE_PDF_MINER: false
|
|
130
|
+
USE_ROTATOR: false
|
|
131
|
+
USE_TABLE_REFINEMENT: true
|
|
132
|
+
USE_TABLE_SEGMENTATION: true
|
|
133
|
+
WORD_MATCHING:
|
|
134
|
+
MAX_PARENT_ONLY: true
|
|
135
|
+
PARENTAL_CATEGORIES:
|
|
136
|
+
- text
|
|
137
|
+
- title
|
|
138
|
+
- list
|
|
139
|
+
- cell
|
|
140
|
+
- column_header
|
|
141
|
+
- projected_row_header
|
|
142
|
+
- spanning
|
|
143
|
+
- row_header
|
|
144
|
+
RULE: ioa
|
|
145
|
+
THRESHOLD: 0.6
|
|
@@ -491,10 +491,8 @@ def global_to_local_coords(global_box: BoundingBox, embedding_box: BoundingBox)
|
|
|
491
491
|
|
|
492
492
|
def merge_boxes(*boxes: BoundingBox) -> BoundingBox:
|
|
493
493
|
"""
|
|
494
|
-
Generating the smallest box containing an arbitrary tuple/list of boxes.
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
:param boxes: An arbitrary tuple/list of bounding boxes `BoundingBox` all having absolute_coords="True".
|
|
494
|
+
Generating the smallest box containing an arbitrary tuple/list of boxes.
|
|
495
|
+
:param boxes: An arbitrary tuple/list of bounding boxes `BoundingBox`.
|
|
498
496
|
"""
|
|
499
497
|
absolute_coords = boxes[0].absolute_coords
|
|
500
498
|
assert all(box.absolute_coords == absolute_coords for box in boxes), "all boxes must have same absolute_coords"
|
|
@@ -154,7 +154,9 @@ def convert_pdf_bytes_to_np_array(pdf_bytes: bytes, dpi: Optional[int] = None) -
|
|
|
154
154
|
return np_array.astype(uint8)
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
def convert_pdf_bytes_to_np_array_v2(
|
|
157
|
+
def convert_pdf_bytes_to_np_array_v2(
|
|
158
|
+
pdf_bytes: bytes, dpi: Optional[int] = None, width: Optional[int] = None, height: Optional[int] = None
|
|
159
|
+
) -> PixelValues:
|
|
158
160
|
"""
|
|
159
161
|
Converts a pdf passed as bytes into a numpy array. We use poppler or pdfmium to convert the pdf to an image.
|
|
160
162
|
If both is available you can steer the selection of the render engine with environment variables:
|
|
@@ -165,17 +167,21 @@ def convert_pdf_bytes_to_np_array_v2(pdf_bytes: bytes, dpi: Optional[int] = 200)
|
|
|
165
167
|
:param pdf_bytes: A pdf as bytes object. A byte representation can from a pdf file can be generated e.g. with
|
|
166
168
|
`utils.fs.load_bytes_from_pdf_file`
|
|
167
169
|
:param dpi: The dpi value of the resulting output image. For high resolution set dpi=300.
|
|
170
|
+
:param width: The width of the resulting output image. This option does only work when using Poppler as
|
|
171
|
+
PDF renderer
|
|
172
|
+
:param height: The height of the resulting output image. This option does only work when using Poppler as
|
|
173
|
+
PDF renderer
|
|
168
174
|
:return: Image as numpy array.
|
|
169
175
|
"""
|
|
170
176
|
|
|
171
|
-
with BytesIO(pdf_bytes) as pdf_file:
|
|
172
|
-
pdf = PdfReader(pdf_file).pages[0]
|
|
173
|
-
shape = pdf.mediabox # pylint: disable=E1101
|
|
174
|
-
height = shape[3] - shape[1]
|
|
175
|
-
width = shape[2] - shape[0]
|
|
176
|
-
|
|
177
177
|
if dpi is None:
|
|
178
|
-
|
|
178
|
+
if width is None or height is None:
|
|
179
|
+
with BytesIO(pdf_bytes) as pdf_file:
|
|
180
|
+
pdf = PdfReader(pdf_file).pages[0]
|
|
181
|
+
shape = pdf.mediabox # pylint: disable=E1101
|
|
182
|
+
height = shape[3] - shape[1]
|
|
183
|
+
width = shape[2] - shape[0]
|
|
184
|
+
return pdf_to_np_array(pdf_bytes, size=(int(width), int(height))) # type: ignore
|
|
179
185
|
return pdf_to_np_array(pdf_bytes, dpi=dpi)
|
|
180
186
|
|
|
181
187
|
|
|
@@ -153,7 +153,7 @@ class Image:
|
|
|
153
153
|
self.set_width_height(self._image.shape[1], self._image.shape[0])
|
|
154
154
|
self._self_embedding()
|
|
155
155
|
elif isinstance(image, bytes):
|
|
156
|
-
self._image = convert_pdf_bytes_to_np_array_v2(image, dpi=environ
|
|
156
|
+
self._image = convert_pdf_bytes_to_np_array_v2(image, dpi=int(environ["DPI"]))
|
|
157
157
|
self.set_width_height(self._image.shape[1], self._image.shape[0])
|
|
158
158
|
self._self_embedding()
|
|
159
159
|
else:
|
|
@@ -428,7 +428,7 @@ class Image:
|
|
|
428
428
|
A list of attributes to suspend from as_dict creation.
|
|
429
429
|
"""
|
|
430
430
|
|
|
431
|
-
return ["
|
|
431
|
+
return ["_annotation_ids", "_category_name"]
|
|
432
432
|
|
|
433
433
|
def define_annotation_id(self, annotation: Annotation) -> str:
|
|
434
434
|
"""
|
|
@@ -572,24 +572,31 @@ class Image:
|
|
|
572
572
|
ann = self.get_annotation(annotation_ids=annotation_id)[0]
|
|
573
573
|
if ann.image is None:
|
|
574
574
|
raise ImageError("When adding sub images to ImageAnnotation then ImageAnnotation.image must not be None")
|
|
575
|
-
|
|
576
|
-
box = ann.bounding_box.to_list("xyxy")
|
|
575
|
+
box = ann.get_bounding_box(self.image_id).to_list("xyxy")
|
|
577
576
|
proposals = self.get_annotation(category_names)
|
|
578
577
|
points = np.array([prop.get_bounding_box(self.image_id).center for prop in proposals])
|
|
578
|
+
if not points.size:
|
|
579
|
+
return
|
|
579
580
|
ann_ids = np.array([prop.annotation_id for prop in proposals])
|
|
580
581
|
indices = np.where(
|
|
581
582
|
(box[0] < points[:, 0]) & (box[1] < points[:, 1]) & (box[2] > points[:, 0]) & (box[3] > points[:, 1])
|
|
582
583
|
)[0]
|
|
583
584
|
selected_ids = ann_ids[indices]
|
|
584
585
|
sub_images = self.get_annotation(annotation_ids=selected_ids.tolist())
|
|
586
|
+
ann_box = ann.get_bounding_box(self.image_id)
|
|
587
|
+
if not ann_box.absolute_coords:
|
|
588
|
+
ann_box = ann_box.transform(self.width, self.height, absolute_coords=True)
|
|
585
589
|
for sub_image in sub_images:
|
|
586
590
|
if sub_image.image is None:
|
|
587
591
|
raise ImageError(
|
|
588
592
|
"When setting an embedding to ImageAnnotation then ImageAnnotation.image must not be None"
|
|
589
593
|
)
|
|
594
|
+
sub_image_box = sub_image.get_bounding_box(self.image_id)
|
|
595
|
+
if not sub_image_box.absolute_coords:
|
|
596
|
+
sub_image_box = sub_image_box.transform(self.width, self.height, absolute_coords=True)
|
|
590
597
|
sub_image.image.set_embedding(
|
|
591
598
|
annotation_id,
|
|
592
|
-
global_to_local_coords(
|
|
599
|
+
global_to_local_coords(sub_image_box, ann_box),
|
|
593
600
|
)
|
|
594
601
|
ann.image.dump(sub_image)
|
|
595
602
|
|