deepdoctection 0.43__tar.gz → 0.43.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.43 → deepdoctection-0.43.2}/PKG-INFO +5 -6
- {deepdoctection-0.43 → deepdoctection-0.43.2}/README.md +4 -5
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/__init__.py +3 -2
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/analyzer/config.py +15 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datapoint/view.py +1 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/doctrocr.py +5 -2
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/texocr.py +1 -1
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/order.py +9 -6
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/context.py +1 -1
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/develop.py +1 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/env_info.py +1 -1
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/file_utils.py +0 -9
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/settings.py +1 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection.egg-info/PKG-INFO +5 -6
- {deepdoctection-0.43 → deepdoctection-0.43.2}/LICENSE +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/analyzer/factory.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/configs/profiles.jsonl +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datapoint/convert.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datapoint/image.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/base.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/common.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/segment.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/sub_layout.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/train/hf_detr_train.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/pdf_utils.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection.egg-info/requires.txt +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/setup.cfg +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/setup.py +0 -0
- {deepdoctection-0.43 → deepdoctection-0.43.2}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.43
|
|
3
|
+
Version: 0.43.2
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -192,7 +192,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
|
192
192
|
- Fine-tuning and evaluation tools.
|
|
193
193
|
- Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
|
|
194
194
|
|
|
195
|
-
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/
|
|
195
|
+
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
|
|
196
196
|
for an easy start.
|
|
197
197
|
|
|
198
198
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
@@ -245,11 +245,11 @@ alt="text" width="40%">
|
|
|
245
245
|
|
|
246
246
|
## Requirements
|
|
247
247
|
|
|
248
|
-

|
|
249
249
|
|
|
250
250
|
- Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
|
|
251
251
|
- Python >= 3.9
|
|
252
|
-
-
|
|
252
|
+
- 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
|
|
253
253
|
Tensorflow support will be stopped from Python 3.11 onwards.
|
|
254
254
|
- To fine-tune models, a GPU is recommended.
|
|
255
255
|
|
|
@@ -344,8 +344,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
344
344
|
|
|
345
345
|
### Running a Docker container from Docker hub
|
|
346
346
|
|
|
347
|
-
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
|
|
348
|
-
com/r/deepdoctection/deepdoctection).
|
|
347
|
+
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
349
348
|
|
|
350
349
|
```
|
|
351
350
|
docker pull deepdoctection/deepdoctection:<release_tag>
|
|
@@ -49,7 +49,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
|
49
49
|
- Fine-tuning and evaluation tools.
|
|
50
50
|
- Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
|
|
51
51
|
|
|
52
|
-
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/
|
|
52
|
+
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
|
|
53
53
|
for an easy start.
|
|
54
54
|
|
|
55
55
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
@@ -102,11 +102,11 @@ alt="text" width="40%">
|
|
|
102
102
|
|
|
103
103
|
## Requirements
|
|
104
104
|
|
|
105
|
-

|
|
106
106
|
|
|
107
107
|
- Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
|
|
108
108
|
- Python >= 3.9
|
|
109
|
-
-
|
|
109
|
+
- 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
|
|
110
110
|
Tensorflow support will be stopped from Python 3.11 onwards.
|
|
111
111
|
- To fine-tune models, a GPU is recommended.
|
|
112
112
|
|
|
@@ -201,8 +201,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
201
201
|
|
|
202
202
|
### Running a Docker container from Docker hub
|
|
203
203
|
|
|
204
|
-
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
|
|
205
|
-
com/r/deepdoctection/deepdoctection).
|
|
204
|
+
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
206
205
|
|
|
207
206
|
```
|
|
208
207
|
docker pull deepdoctection/deepdoctection:<release_tag>
|
|
@@ -25,11 +25,11 @@ from .utils.logger import LoggingRecord, logger
|
|
|
25
25
|
|
|
26
26
|
# pylint: enable=wrong-import-position
|
|
27
27
|
|
|
28
|
-
__version__ = "0.43"
|
|
28
|
+
__version__ = "0.43.2"
|
|
29
29
|
|
|
30
30
|
_IMPORT_STRUCTURE = {
|
|
31
31
|
"analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
|
|
32
|
-
"configs": [],
|
|
32
|
+
"configs": ["update_cfg_from_defaults"],
|
|
33
33
|
"dataflow": [
|
|
34
34
|
"DataFlowTerminated",
|
|
35
35
|
"DataFlowResetStateNotCalled",
|
|
@@ -98,6 +98,7 @@ _IMPORT_STRUCTURE = {
|
|
|
98
98
|
"List",
|
|
99
99
|
"Cell",
|
|
100
100
|
"Table",
|
|
101
|
+
"IMAGE_DEFAULTS",
|
|
101
102
|
"Page",
|
|
102
103
|
],
|
|
103
104
|
"datasets": [
|
|
@@ -902,3 +902,18 @@ cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
|
|
|
902
902
|
# Freezes the configuration to make it immutable.
|
|
903
903
|
# This prevents accidental modification at runtime.
|
|
904
904
|
cfg.freeze()
|
|
905
|
+
|
|
906
|
+
def update_cfg_from_defaults() -> None:
|
|
907
|
+
"""
|
|
908
|
+
Update the configuration with current values from IMAGE_DEFAULTS.
|
|
909
|
+
"""
|
|
910
|
+
cfg.freeze(False)
|
|
911
|
+
|
|
912
|
+
# Update all dependent fields from IMAGE_DEFAULTS
|
|
913
|
+
cfg.TEXT_CONTAINER = IMAGE_DEFAULTS.TEXT_CONTAINER
|
|
914
|
+
cfg.WORD_MATCHING.PARENTAL_CATEGORIES = IMAGE_DEFAULTS.TEXT_BLOCK_CATEGORIES
|
|
915
|
+
cfg.TEXT_ORDERING.TEXT_BLOCK_CATEGORIES = IMAGE_DEFAULTS.TEXT_BLOCK_CATEGORIES
|
|
916
|
+
cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK_CATEGORIES = IMAGE_DEFAULTS.FLOATING_TEXT_BLOCK_CATEGORIES
|
|
917
|
+
|
|
918
|
+
# Re-freeze the configuration
|
|
919
|
+
cfg.freeze()
|
|
@@ -269,7 +269,10 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
|
|
|
269
269
|
if self.lib == "PT":
|
|
270
270
|
self.device = get_torch_device(device)
|
|
271
271
|
|
|
272
|
-
self.doctr_predictor = self.get_wrapped_model(self.architecture,
|
|
272
|
+
self.doctr_predictor = self.get_wrapped_model(self.architecture,
|
|
273
|
+
self.path_weights,
|
|
274
|
+
self.device,
|
|
275
|
+
self.lib)
|
|
273
276
|
|
|
274
277
|
def predict(self, np_img: PixelValues) -> list[DetectionResult]:
|
|
275
278
|
"""
|
|
@@ -424,7 +427,7 @@ class DoctrTextRecognizer(TextRecognizer):
|
|
|
424
427
|
return _get_doctr_requirements()
|
|
425
428
|
|
|
426
429
|
def clone(self) -> DoctrTextRecognizer:
|
|
427
|
-
return self.__class__(self.architecture, self.path_weights, self.device, self.lib)
|
|
430
|
+
return self.__class__(self.architecture, self.path_weights, self.device, self.lib, self.path_config_json)
|
|
428
431
|
|
|
429
432
|
@staticmethod
|
|
430
433
|
def load_model(
|
|
@@ -60,7 +60,7 @@ def _textract_to_detectresult(response: JsonDict, width: int, height: int, text_
|
|
|
60
60
|
return all_results
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def predict_text(np_img: PixelValues, client: boto3.client, text_lines: bool) -> list[DetectionResult]:
|
|
63
|
+
def predict_text(np_img: PixelValues, client: boto3.client, text_lines: bool) -> list[DetectionResult]:
|
|
64
64
|
"""
|
|
65
65
|
Calls AWS Textract client (`detect_document_text`) and returns plain OCR results.
|
|
66
66
|
AWS account required.
|
|
@@ -738,10 +738,8 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
738
738
|
text_block_anns.extend(residual_text_container_anns)
|
|
739
739
|
for text_block_ann in text_block_anns:
|
|
740
740
|
self.order_text_in_text_block(text_block_ann)
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
]
|
|
744
|
-
self.order_blocks(floating_text_block_anns_to_order)
|
|
741
|
+
floating_text_block_anns = dp.get_annotation(category_names=self.floating_text_block_categories)
|
|
742
|
+
self.order_blocks(floating_text_block_anns)
|
|
745
743
|
self._create_columns()
|
|
746
744
|
|
|
747
745
|
def _create_columns(self) -> None:
|
|
@@ -803,9 +801,14 @@ class TextOrderService(TextLineServiceMixin):
|
|
|
803
801
|
if self.include_residual_text_container:
|
|
804
802
|
add_category.append(LayoutType.LINE)
|
|
805
803
|
|
|
806
|
-
|
|
804
|
+
if set(self.floating_text_block_categories) <= set(
|
|
807
805
|
self.text_block_categories + tuple(add_category)
|
|
808
|
-
)
|
|
806
|
+
):
|
|
807
|
+
logger.warning("In most cases floating_text_block_categories must be a subset of text_block_categories. "
|
|
808
|
+
"Adding categories to floating_text_block_categories, that do not belong to "
|
|
809
|
+
"text_block_categories makes only sense for categories set have CHILD relationships with"
|
|
810
|
+
" annotations that belong to text_block_categories.")
|
|
811
|
+
|
|
809
812
|
|
|
810
813
|
def get_meta_annotation(self) -> MetaAnnotation:
|
|
811
814
|
add_category = [self.text_container]
|
|
@@ -38,7 +38,7 @@ __all__ = ["timeout_manager", "save_tmp_file", "timed_operation"]
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
@contextmanager
|
|
41
|
-
def timeout_manager(proc: Any, seconds: Optional[int] = None) -> Iterator[str]:
|
|
41
|
+
def timeout_manager(proc: Any, seconds: Optional[int] = None) -> Iterator[str]:
|
|
42
42
|
"""
|
|
43
43
|
Manager for time handling while some process is being called.
|
|
44
44
|
|
|
@@ -71,6 +71,7 @@ def log_deprecated(name: str, text: str, eos: str = "", max_num_warnings: Option
|
|
|
71
71
|
logger.info(LoggingRecord(f"[Deprecated] {info_msg}"))
|
|
72
72
|
|
|
73
73
|
|
|
74
|
+
|
|
74
75
|
def deprecated(
|
|
75
76
|
text: str = "", eos: str = "", max_num_warnings: Optional[int] = None
|
|
76
77
|
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
@@ -462,7 +462,7 @@ def pt_info(data: KeyValEnvInfos) -> KeyValEnvInfos:
|
|
|
462
462
|
data.append(("torchvision arch flags", msg))
|
|
463
463
|
except (ImportError, AttributeError):
|
|
464
464
|
data.append(("torchvision._C", "Not found"))
|
|
465
|
-
except AttributeError:
|
|
465
|
+
except (AttributeError, ModuleNotFoundError):
|
|
466
466
|
data.append(("torchvision", "unknown"))
|
|
467
467
|
|
|
468
468
|
return data
|
|
@@ -12,7 +12,6 @@ import importlib.util
|
|
|
12
12
|
import multiprocessing as mp
|
|
13
13
|
import string
|
|
14
14
|
import subprocess
|
|
15
|
-
import sys
|
|
16
15
|
from os import environ, path
|
|
17
16
|
from shutil import which
|
|
18
17
|
from types import ModuleType
|
|
@@ -22,7 +21,6 @@ import importlib_metadata
|
|
|
22
21
|
from packaging import version
|
|
23
22
|
|
|
24
23
|
from .error import DependencyError
|
|
25
|
-
from .logger import LoggingRecord, logger
|
|
26
24
|
from .metacfg import AttrDict
|
|
27
25
|
from .types import PathLikeOrStr, Requirement
|
|
28
26
|
|
|
@@ -662,13 +660,6 @@ def get_doctr_requirement() -> Requirement:
|
|
|
662
660
|
On macOS, if `poppler` is not available, this function will recursively check the requirement.
|
|
663
661
|
It is not yet known how to check whether `pango`, `gdk-pixbuf`, and `libffi` are installed.
|
|
664
662
|
"""
|
|
665
|
-
if sys.platform == "darwin":
|
|
666
|
-
if not get_poppler_version():
|
|
667
|
-
return get_doctr_requirement()
|
|
668
|
-
# don't know yet how to check whether pango gdk-pixbuf libffi are installed
|
|
669
|
-
logger.info(
|
|
670
|
-
LoggingRecord("package requires weasyprint. Check that poppler pango gdk-pixbuf libffi are installed")
|
|
671
|
-
)
|
|
672
663
|
return "doctr", doctr_available(), _DOCTR_ERR_MSG
|
|
673
664
|
|
|
674
665
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.43
|
|
3
|
+
Version: 0.43.2
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -192,7 +192,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
|
192
192
|
- Fine-tuning and evaluation tools.
|
|
193
193
|
- Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
|
|
194
194
|
|
|
195
|
-
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/
|
|
195
|
+
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
|
|
196
196
|
for an easy start.
|
|
197
197
|
|
|
198
198
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
@@ -245,11 +245,11 @@ alt="text" width="40%">
|
|
|
245
245
|
|
|
246
246
|
## Requirements
|
|
247
247
|
|
|
248
|
-

|
|
249
249
|
|
|
250
250
|
- Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
|
|
251
251
|
- Python >= 3.9
|
|
252
|
-
-
|
|
252
|
+
- 2.2 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
|
|
253
253
|
Tensorflow support will be stopped from Python 3.11 onwards.
|
|
254
254
|
- To fine-tune models, a GPU is recommended.
|
|
255
255
|
|
|
@@ -344,8 +344,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
344
344
|
|
|
345
345
|
### Running a Docker container from Docker hub
|
|
346
346
|
|
|
347
|
-
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
|
|
348
|
-
com/r/deepdoctection/deepdoctection).
|
|
347
|
+
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
349
348
|
|
|
350
349
|
```
|
|
351
350
|
docker pull deepdoctection/deepdoctection:<release_tag>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/doclaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/fintabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/iiitar13k.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/layouttest.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/publaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/pubtables1m.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/pubtabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/xsl/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/config/config.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py
RENAMED
|
File without changes
|
{deepdoctection-0.43 → deepdoctection-0.43.2}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|