deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepdoctection
|
|
3
|
+
Version: 0.43
|
|
4
|
+
Summary: Repository for Document AI
|
|
5
|
+
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
|
+
Author: Dr. Janis Meyer
|
|
7
|
+
License: Apache License 2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Natural Language :: English
|
|
11
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Python: >=3.9
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: catalogue==2.0.10
|
|
21
|
+
Requires-Dist: huggingface_hub>=0.26.0
|
|
22
|
+
Requires-Dist: importlib-metadata>=5.0.0
|
|
23
|
+
Requires-Dist: jsonlines==3.1.0
|
|
24
|
+
Requires-Dist: lazy-imports==0.3.1
|
|
25
|
+
Requires-Dist: mock==4.0.3
|
|
26
|
+
Requires-Dist: networkx>=2.7.1
|
|
27
|
+
Requires-Dist: numpy<2.0,>=1.21
|
|
28
|
+
Requires-Dist: packaging>=20.0
|
|
29
|
+
Requires-Dist: Pillow>=10.0.0
|
|
30
|
+
Requires-Dist: pypdf>=3.16.0
|
|
31
|
+
Requires-Dist: pypdfium2>=4.30.0
|
|
32
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
33
|
+
Requires-Dist: pyzmq>=16
|
|
34
|
+
Requires-Dist: scipy>=1.13.1
|
|
35
|
+
Requires-Dist: termcolor>=1.1
|
|
36
|
+
Requires-Dist: tabulate>=0.7.7
|
|
37
|
+
Requires-Dist: tqdm>=4.64.0
|
|
38
|
+
Provides-Extra: tf
|
|
39
|
+
Requires-Dist: catalogue==2.0.10; extra == "tf"
|
|
40
|
+
Requires-Dist: huggingface_hub>=0.26.0; extra == "tf"
|
|
41
|
+
Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
|
|
42
|
+
Requires-Dist: jsonlines==3.1.0; extra == "tf"
|
|
43
|
+
Requires-Dist: lazy-imports==0.3.1; extra == "tf"
|
|
44
|
+
Requires-Dist: mock==4.0.3; extra == "tf"
|
|
45
|
+
Requires-Dist: networkx>=2.7.1; extra == "tf"
|
|
46
|
+
Requires-Dist: numpy<2.0,>=1.21; extra == "tf"
|
|
47
|
+
Requires-Dist: packaging>=20.0; extra == "tf"
|
|
48
|
+
Requires-Dist: Pillow>=10.0.0; extra == "tf"
|
|
49
|
+
Requires-Dist: pypdf>=3.16.0; extra == "tf"
|
|
50
|
+
Requires-Dist: pypdfium2>=4.30.0; extra == "tf"
|
|
51
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "tf"
|
|
52
|
+
Requires-Dist: pyzmq>=16; extra == "tf"
|
|
53
|
+
Requires-Dist: scipy>=1.13.1; extra == "tf"
|
|
54
|
+
Requires-Dist: termcolor>=1.1; extra == "tf"
|
|
55
|
+
Requires-Dist: tabulate>=0.7.7; extra == "tf"
|
|
56
|
+
Requires-Dist: tqdm>=4.64.0; extra == "tf"
|
|
57
|
+
Requires-Dist: tensorpack==0.11; extra == "tf"
|
|
58
|
+
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
59
|
+
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
60
|
+
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
61
|
+
Requires-Dist: python-doctr==0.9.0; extra == "tf"
|
|
62
|
+
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
63
|
+
Requires-Dist: boto3==1.34.102; extra == "tf"
|
|
64
|
+
Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
|
|
65
|
+
Requires-Dist: fasttext-wheel; extra == "tf"
|
|
66
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "tf"
|
|
67
|
+
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
68
|
+
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
69
|
+
Requires-Dist: lxml>=4.9.1; extra == "tf"
|
|
70
|
+
Provides-Extra: pt
|
|
71
|
+
Requires-Dist: catalogue==2.0.10; extra == "pt"
|
|
72
|
+
Requires-Dist: huggingface_hub>=0.26.0; extra == "pt"
|
|
73
|
+
Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
|
|
74
|
+
Requires-Dist: jsonlines==3.1.0; extra == "pt"
|
|
75
|
+
Requires-Dist: lazy-imports==0.3.1; extra == "pt"
|
|
76
|
+
Requires-Dist: mock==4.0.3; extra == "pt"
|
|
77
|
+
Requires-Dist: networkx>=2.7.1; extra == "pt"
|
|
78
|
+
Requires-Dist: numpy<2.0,>=1.21; extra == "pt"
|
|
79
|
+
Requires-Dist: packaging>=20.0; extra == "pt"
|
|
80
|
+
Requires-Dist: Pillow>=10.0.0; extra == "pt"
|
|
81
|
+
Requires-Dist: pypdf>=3.16.0; extra == "pt"
|
|
82
|
+
Requires-Dist: pypdfium2>=4.30.0; extra == "pt"
|
|
83
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "pt"
|
|
84
|
+
Requires-Dist: pyzmq>=16; extra == "pt"
|
|
85
|
+
Requires-Dist: scipy>=1.13.1; extra == "pt"
|
|
86
|
+
Requires-Dist: termcolor>=1.1; extra == "pt"
|
|
87
|
+
Requires-Dist: tabulate>=0.7.7; extra == "pt"
|
|
88
|
+
Requires-Dist: tqdm>=4.64.0; extra == "pt"
|
|
89
|
+
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
90
|
+
Requires-Dist: transformers>=4.48.0; extra == "pt"
|
|
91
|
+
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
92
|
+
Requires-Dist: python-doctr==0.9.0; extra == "pt"
|
|
93
|
+
Requires-Dist: pycocotools>=2.0.2; extra == "pt"
|
|
94
|
+
Requires-Dist: boto3==1.34.102; extra == "pt"
|
|
95
|
+
Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
|
|
96
|
+
Requires-Dist: fasttext-wheel; extra == "pt"
|
|
97
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "pt"
|
|
98
|
+
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
99
|
+
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
100
|
+
Requires-Dist: lxml>=4.9.1; extra == "pt"
|
|
101
|
+
Provides-Extra: docs
|
|
102
|
+
Requires-Dist: tensorpack==0.11; extra == "docs"
|
|
103
|
+
Requires-Dist: boto3==1.34.102; extra == "docs"
|
|
104
|
+
Requires-Dist: transformers>=4.48.0; extra == "docs"
|
|
105
|
+
Requires-Dist: accelerate>=0.29.1; extra == "docs"
|
|
106
|
+
Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
|
|
107
|
+
Requires-Dist: lxml>=4.9.1; extra == "docs"
|
|
108
|
+
Requires-Dist: lxml-stubs>=0.5.1; extra == "docs"
|
|
109
|
+
Requires-Dist: jdeskew>=0.2.2; extra == "docs"
|
|
110
|
+
Requires-Dist: jinja2; extra == "docs"
|
|
111
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
112
|
+
Requires-Dist: mkdocstrings-python; extra == "docs"
|
|
113
|
+
Requires-Dist: griffe==0.25.0; extra == "docs"
|
|
114
|
+
Provides-Extra: dev
|
|
115
|
+
Requires-Dist: python-dotenv==1.0.0; extra == "dev"
|
|
116
|
+
Requires-Dist: click; extra == "dev"
|
|
117
|
+
Requires-Dist: black==23.7.0; extra == "dev"
|
|
118
|
+
Requires-Dist: isort==5.13.2; extra == "dev"
|
|
119
|
+
Requires-Dist: pylint==2.17.4; extra == "dev"
|
|
120
|
+
Requires-Dist: mypy==1.4.1; extra == "dev"
|
|
121
|
+
Requires-Dist: wandb; extra == "dev"
|
|
122
|
+
Requires-Dist: types-PyYAML>=6.0.12.12; extra == "dev"
|
|
123
|
+
Requires-Dist: types-termcolor>=1.1.3; extra == "dev"
|
|
124
|
+
Requires-Dist: types-tabulate>=0.9.0.3; extra == "dev"
|
|
125
|
+
Requires-Dist: types-tqdm>=4.66.0.5; extra == "dev"
|
|
126
|
+
Requires-Dist: lxml-stubs>=0.5.1; extra == "dev"
|
|
127
|
+
Requires-Dist: types-Pillow>=10.2.0.20240406; extra == "dev"
|
|
128
|
+
Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
|
|
129
|
+
Provides-Extra: test
|
|
130
|
+
Requires-Dist: pytest==8.0.2; extra == "test"
|
|
131
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
132
|
+
Dynamic: author
|
|
133
|
+
Dynamic: classifier
|
|
134
|
+
Dynamic: description
|
|
135
|
+
Dynamic: description-content-type
|
|
136
|
+
Dynamic: home-page
|
|
137
|
+
Dynamic: license
|
|
138
|
+
Dynamic: license-file
|
|
139
|
+
Dynamic: provides-extra
|
|
140
|
+
Dynamic: requires-dist
|
|
141
|
+
Dynamic: requires-python
|
|
142
|
+
Dynamic: summary
|
|
143
|
+
|
|
144
|
+
<p align="center">
|
|
145
|
+
<img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
|
|
146
|
+
</p>
|
|
147
|
+
|
|
148
|
+

|
|
149
|
+

|
|
150
|
+

|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
------------------------------------------------------------------------------------------------------------------------
|
|
154
|
+
# NEW
|
|
155
|
+
|
|
156
|
+
Version `v.0.43` includes a significant redesign of the Analyzer's default configuration. Key changes include:
|
|
157
|
+
|
|
158
|
+
* More powerful models for Document Layout Analysis and OCR.
|
|
159
|
+
* Expanded functionality.
|
|
160
|
+
* Less dependencies.
|
|
161
|
+
|
|
162
|
+
------------------------------------------------------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
<p align="center">
|
|
165
|
+
<h1 align="center">
|
|
166
|
+
A Package for Document Understanding
|
|
167
|
+
</h1>
|
|
168
|
+
</p>
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
**deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
|
|
173
|
+
It also provides a framework for training, evaluating and inferencing Document AI models.
|
|
174
|
+
|
|
175
|
+
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
176
|
+
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
|
|
177
|
+
|
|
178
|
+
# Overview
|
|
179
|
+
|
|
180
|
+
- Document layout analysis and table recognition in PyTorch with
|
|
181
|
+
[**Detectron2**](https://github.com/facebookresearch/detectron2/tree/main/detectron2) and
|
|
182
|
+
[**Transformers**](https://github.com/huggingface/transformers)
|
|
183
|
+
or Tensorflow and [**Tensorpack**](https://github.com/tensorpack),
|
|
184
|
+
- OCR with support of [**Tesseract**](https://github.com/tesseract-ocr/tesseract), [**DocTr**](https://github.com/mindee/doctr) and
|
|
185
|
+
[**AWS Textract**](https://aws.amazon.com/textract/),
|
|
186
|
+
- Document and token classification with the [**LayoutLM**](https://github.com/microsoft/unilm) family,
|
|
187
|
+
[**LiLT**](https://github.com/jpWang/LiLT) and selected
|
|
188
|
+
[**Bert**](https://huggingface.co/docs/transformers/model_doc/xlm-roberta)-style including features like sliding windows.
|
|
189
|
+
- Text mining for native PDFs with [**pdfplumber**](https://github.com/jsvine/pdfplumber),
|
|
190
|
+
- Language detection with [**fastText**](https://github.com/facebookresearch/fastText),
|
|
191
|
+
- Deskewing and rotating images with [**jdeskew**](https://github.com/phamquiluan/jdeskew).
|
|
192
|
+
- Fine-tuning and evaluation tools.
|
|
193
|
+
- Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
|
|
194
|
+
|
|
195
|
+
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb)
|
|
196
|
+
for an easy start.
|
|
197
|
+
|
|
198
|
+
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
199
|
+
|
|
200
|
+
# Example
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
import deepdoctection as dd
|
|
204
|
+
from IPython.core.display import HTML
|
|
205
|
+
from matplotlib import pyplot as plt
|
|
206
|
+
|
|
207
|
+
analyzer = dd.get_dd_analyzer() # instantiate the built-in analyzer similar to the Hugging Face space demo
|
|
208
|
+
|
|
209
|
+
df = analyzer.analyze(path = "/path/to/your/doc.pdf") # setting up pipeline
|
|
210
|
+
df.reset_state() # Trigger some initialization
|
|
211
|
+
|
|
212
|
+
doc = iter(df)
|
|
213
|
+
page = next(doc)
|
|
214
|
+
|
|
215
|
+
image = page.viz(show_figures=True, show_residual_layouts=True)
|
|
216
|
+
plt.figure(figsize = (25,17))
|
|
217
|
+
plt.axis('off')
|
|
218
|
+
plt.imshow(image)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
<p align="center">
|
|
222
|
+
<img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_sample.png"
|
|
223
|
+
alt="sample" width="40%">
|
|
224
|
+
</p>
|
|
225
|
+
|
|
226
|
+
```
|
|
227
|
+
HTML(page.tables[0].html)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
<p align="center">
|
|
231
|
+
<img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_table.png"
|
|
232
|
+
alt="table" width="40%">
|
|
233
|
+
</p>
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
print(page.text)
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
<p align="center">
|
|
240
|
+
<img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_text.png"
|
|
241
|
+
alt="text" width="40%">
|
|
242
|
+
</p>
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
## Requirements
|
|
247
|
+
|
|
248
|
+

|
|
249
|
+
|
|
250
|
+
- Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
|
|
251
|
+
- Python >= 3.9
|
|
252
|
+
- 1.13 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
|
|
253
|
+
Tensorflow support will be stopped from Python 3.11 onwards.
|
|
254
|
+
- To fine-tune models, a GPU is recommended.
|
|
255
|
+
|
|
256
|
+
| Task | PyTorch | Torchscript | Tensorflow |
|
|
257
|
+
|---------------------------------------------|:-------:|----------------|:------------:|
|
|
258
|
+
| Layout detection via Detectron2/Tensorpack | ✅ | ✅ (CPU only) | ✅ (GPU only) |
|
|
259
|
+
| Table recognition via Detectron2/Tensorpack | ✅ | ✅ (CPU only) | ✅ (GPU only) |
|
|
260
|
+
| Table transformer via Transformers | ✅ | ❌ | ❌ |
|
|
261
|
+
| Deformable-Detr | ✅ | ❌ | ❌ |
|
|
262
|
+
| DocTr | ✅ | ❌ | ✅ |
|
|
263
|
+
| LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
|
|
264
|
+
|
|
265
|
+
## Installation
|
|
266
|
+
|
|
267
|
+
We recommend using a virtual environment.
|
|
268
|
+
|
|
269
|
+
#### Get started installation
|
|
270
|
+
|
|
271
|
+
For a simple setup which is enough to parse documents with the default setting, install the following:
|
|
272
|
+
|
|
273
|
+
**PyTorch**
|
|
274
|
+
|
|
275
|
+
```
|
|
276
|
+
pip install transformers
|
|
277
|
+
pip install python-doctr
|
|
278
|
+
pip install deepdoctection
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**TensorFlow**
|
|
282
|
+
|
|
283
|
+
```
|
|
284
|
+
pip install tensorpack
|
|
285
|
+
pip install python-doctr
|
|
286
|
+
pip install deepdoctection
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
|
|
290
|
+
|
|
291
|
+
#### Full installation
|
|
292
|
+
|
|
293
|
+
The following installation will give you ALL models available within the Deep Learning framework as well as all models
|
|
294
|
+
that are independent of Tensorflow/PyTorch.
|
|
295
|
+
|
|
296
|
+
**PyTorch**
|
|
297
|
+
|
|
298
|
+
First install **Detectron2** separately as it is not distributed via PyPi. Check the instruction
|
|
299
|
+
[here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html) or try:
|
|
300
|
+
|
|
301
|
+
```
|
|
302
|
+
pip install detectron2@git+https://github.com/deepdoctection/detectron2.git
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
Then install **deep**doctection with all its dependencies:
|
|
306
|
+
|
|
307
|
+
```
|
|
308
|
+
pip install deepdoctection[pt]
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
**Tensorflow**
|
|
312
|
+
|
|
313
|
+
```
|
|
314
|
+
pip install deepdoctection[tf]
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
### Installation from source
|
|
322
|
+
|
|
323
|
+
Download the repository or clone via
|
|
324
|
+
|
|
325
|
+
```
|
|
326
|
+
git clone https://github.com/deepdoctection/deepdoctection.git
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
**PyTorch**
|
|
330
|
+
|
|
331
|
+
```
|
|
332
|
+
cd deepdoctection
|
|
333
|
+
pip install ".[pt]" # or "pip install -e .[pt]"
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
**Tensorflow**
|
|
337
|
+
|
|
338
|
+
```
|
|
339
|
+
cd deepdoctection
|
|
340
|
+
pip install ".[tf]" # or "pip install -e .[tf]"
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
### Running a Docker container from Docker hub
|
|
346
|
+
|
|
347
|
+
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
|
|
348
|
+
com/r/deepdoctection/deepdoctection).
|
|
349
|
+
|
|
350
|
+
```
|
|
351
|
+
docker pull deepdoctection/deepdoctection:<release_tag>
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
Use the Docker compose file `./docker/pytorch-gpu/docker-compose.yaml`.
|
|
355
|
+
In the `.env` file provided, specify the host directory where **deep**doctection's cache should be stored.
|
|
356
|
+
Additionally, specify a working directory to mount files to be processed into the container.
|
|
357
|
+
|
|
358
|
+
```
|
|
359
|
+
docker compose up -d
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
will start the container. There is no endpoint exposed, though.
|
|
363
|
+
|
|
364
|
+
## Credits
|
|
365
|
+
|
|
366
|
+
We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
|
|
367
|
+
to develop this framework.
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
## If you like **deep**doctection ...
|
|
371
|
+
|
|
372
|
+
...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
|
|
373
|
+
|
|
374
|
+
## License
|
|
375
|
+
|
|
376
|
+
Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
deepdoctection/__init__.py,sha256=T4BXZotL855uGwIHhore8lZAfSinpIcrpeIvrVsSCyc,12910
|
|
2
|
+
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
deepdoctection/analyzer/__init__.py,sha256=wg0BcFwdCeREwzZfa--Yx8HUJ9LPv5z5PmLwtkZdPH8,772
|
|
4
|
+
deepdoctection/analyzer/config.py,sha256=CgH4etvZL0JdeIHAXMFdDro2VvVsF2itgQar_Ml94pw,41185
|
|
5
|
+
deepdoctection/analyzer/dd.py,sha256=2BGvZpl9o9khcaOV52-DPHMrs0DsqUO8cpdqFVHHzDQ,5176
|
|
6
|
+
deepdoctection/analyzer/factory.py,sha256=DI0S38KAG2sIROrSximsWJsMbem91a9zXaeWsDNvkGg,37574
|
|
7
|
+
deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
|
|
8
|
+
deepdoctection/configs/conf_dd_one.yaml,sha256=DHqAIKH3jRam54QO7qib2zutmpyFA8TqdV5UvIV191A,3688
|
|
9
|
+
deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
|
|
10
|
+
deepdoctection/configs/profiles.jsonl,sha256=zhMpsJWdfeSj2oq2J0BbiKhHnE7PIq47PA8-I1Th0pA,30266
|
|
11
|
+
deepdoctection/dataflow/__init__.py,sha256=pY4lhjTes2BU-0AdIIRMnRqo9Sv6TopVE_SNfLmpgnc,828
|
|
12
|
+
deepdoctection/dataflow/base.py,sha256=ZLRijyHI1J7tBfnE-q7eqUieYMMERjtK-c1oK40dBkk,6556
|
|
13
|
+
deepdoctection/dataflow/common.py,sha256=DKD_pRZBCt2vO3oNZcOvdoC3jThabTNcNbTS16mpVR0,10351
|
|
14
|
+
deepdoctection/dataflow/custom.py,sha256=xlw1Op4J3a8PNIlzY5stAY6olpBTN8KMhj1KQ7gf8tA,6792
|
|
15
|
+
deepdoctection/dataflow/custom_serialize.py,sha256=zWDx1_mkPpeot9VN-4P1C2sFtK6vYUaFoSs6UiiLMZA,23234
|
|
16
|
+
deepdoctection/dataflow/parallel_map.py,sha256=Xhem9lvNDKVd_x02Ih9qB4J6bEnxNbc8uHIro8mX9UU,15783
|
|
17
|
+
deepdoctection/dataflow/serialize.py,sha256=G5kfkFHyhy3E3AusTvTizBi0EVDU2YZov9a-LmXPjy4,4592
|
|
18
|
+
deepdoctection/dataflow/stats.py,sha256=AN5cbagveaDGqCXaYj6iWITpn-a2eO_AumA-vQNQ_XE,9764
|
|
19
|
+
deepdoctection/datapoint/__init__.py,sha256=DOhcN81MhyPUuFw9F4pyEDyZseeD9qxb8NdB_Q_81GA,1599
|
|
20
|
+
deepdoctection/datapoint/annotation.py,sha256=f32BNmzUGJoNMeGst2RGC2jmjJpzzjxyBRKFG8FCubY,23092
|
|
21
|
+
deepdoctection/datapoint/box.py,sha256=QAS8sK2Ge4_ysW6zOYkLlzNwhSyw_mhYcYsxscClEno,31453
|
|
22
|
+
deepdoctection/datapoint/convert.py,sha256=6ENXX3tBdY8ogb2NBPxsOsQMGnQux8ol5nrUfWS5tYE,7352
|
|
23
|
+
deepdoctection/datapoint/image.py,sha256=nDaWUtdD5j6l_iXW9d2PoIyXBC8M3_idoEIXm7JWGyQ,35139
|
|
24
|
+
deepdoctection/datapoint/view.py,sha256=5TYmKpNNZwJb-NrUXv08H3_zSfHKDHhg6LnEZjBqVns,56622
|
|
25
|
+
deepdoctection/datasets/__init__.py,sha256=4ifjIwWCPYiS31GzUlVDScrkNOrb1eo5xHlRXNyg_58,994
|
|
26
|
+
deepdoctection/datasets/adapter.py,sha256=VSLM_980aHi4TpgOxfxiBHiF_fUXyh348PXet6zTo-4,7779
|
|
27
|
+
deepdoctection/datasets/base.py,sha256=HTIquJir2BZRTLl1HSQM0ICfvjIaWAjJeyz3BEHgdb0,23175
|
|
28
|
+
deepdoctection/datasets/dataflow_builder.py,sha256=0vwkItr0wVbKPtTXoS6uJLO9QQNWbS0Ri7CySuywWxU,4186
|
|
29
|
+
deepdoctection/datasets/info.py,sha256=DLRYq3cHp3L34CcSXPUJ8j8wguJp2aVdoH-AhODNLBA,20814
|
|
30
|
+
deepdoctection/datasets/registry.py,sha256=qYRVycNYFeAzWB7jENGYzokgyzIEvTRb49he2UmPUe8,3451
|
|
31
|
+
deepdoctection/datasets/save.py,sha256=uIRmp3c6o4XDubs7Ay0Sf6zh3gOMFArv3qEn-hq3sBQ,3364
|
|
32
|
+
deepdoctection/datasets/instances/__init__.py,sha256=HIEyl1gZ_IsXda2x3NP8uDROJT8FKwfhrO4xRc_olIk,1428
|
|
33
|
+
deepdoctection/datasets/instances/doclaynet.py,sha256=dc1O7zj4iKrZXbEEALdKKC-1_19Nz4Ln-QoYDeziT7M,12429
|
|
34
|
+
deepdoctection/datasets/instances/fintabnet.py,sha256=ejWH4GnQOdwRkbQoEfAX8IxXhXHQCDHFHMx9Lrl_KIQ,11970
|
|
35
|
+
deepdoctection/datasets/instances/funsd.py,sha256=cDd8ThEwTPy8CarLQMzJykGsfUyGNhSxWaMZ9QvpImc,7276
|
|
36
|
+
deepdoctection/datasets/instances/iiitar13k.py,sha256=iMvPkSX4gKElYb90oTZ36oFxutAth_Tezh6f-wui06s,7014
|
|
37
|
+
deepdoctection/datasets/instances/layouttest.py,sha256=T-ri1ylmhUaF3xW9DV7IRBynWa5kKBK_MQAx42ERf6M,4624
|
|
38
|
+
deepdoctection/datasets/instances/publaynet.py,sha256=SpOsJM3tCwXlKcY8RQ4eKSsGktF9i0B3w-zsuHhlGWk,5540
|
|
39
|
+
deepdoctection/datasets/instances/pubtables1m.py,sha256=-B5i1s_OyfpNuz5qf_CNP4hxEdDgv-vz-4cpMYAvBCc,12610
|
|
40
|
+
deepdoctection/datasets/instances/pubtabnet.py,sha256=ljllMQ-y_2Jvu4p29AsOPin9RHogVl84dNcsVqDtk50,8687
|
|
41
|
+
deepdoctection/datasets/instances/rvlcdip.py,sha256=DGiWjC1iDZPqMo8P6-GOxIRAdrQOmyUCTLpRNKIlsJM,6847
|
|
42
|
+
deepdoctection/datasets/instances/xfund.py,sha256=1QKsmyZJIbpZj6vdtHJrfRaA3NFKrP_yXbl8EJO-YNU,9143
|
|
43
|
+
deepdoctection/datasets/instances/xsl/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
|
|
44
|
+
deepdoctection/datasets/instances/xsl/pascal_voc.xsl,sha256=DlzFV2P8NtQKXVe96i-mIcPWmL6tsW7NQjgCuz2pCL4,1952
|
|
45
|
+
deepdoctection/eval/__init__.py,sha256=deGj63ejU9f3nthBU6GI25QIQidKWJmIW4q8fpn12bU,920
|
|
46
|
+
deepdoctection/eval/accmetric.py,sha256=TlOFUU9y9BBjJKVsRMyoVKpLZl6AflNsZ4thqSEie4k,19957
|
|
47
|
+
deepdoctection/eval/base.py,sha256=mYVvzD_wVPwsrBqcl1O4Vqqhg1yGtlG6hkuMjVZvt-k,5290
|
|
48
|
+
deepdoctection/eval/cocometric.py,sha256=H-BsLeV9S93tG6jzUN-3FCPXYiUUoTAYuznE5SvS9Bc,11070
|
|
49
|
+
deepdoctection/eval/eval.py,sha256=UUL-wk39RONLMOOyH3WjjpHunZJiQluXZFqir8eaDtY,19808
|
|
50
|
+
deepdoctection/eval/registry.py,sha256=us6EGN_tAia1Mk1mwWQwDeE-xqxcuopztdi8n-ieGbg,1100
|
|
51
|
+
deepdoctection/eval/tedsmetric.py,sha256=EcNeJynsmxyl5bOH3bjy2wE647ONf0SF5OZyGbVu35Q,9963
|
|
52
|
+
deepdoctection/eval/tp_eval_callback.py,sha256=lqrOn2tdaRiF_Vr_9CwBr2ryatcWu3mQKya8YZ2pA9A,5261
|
|
53
|
+
deepdoctection/extern/__init__.py,sha256=jG2qe5_X7fJFnJlx04Lf1KUTXKKKYtCkKR7WQ7looUk,991
|
|
54
|
+
deepdoctection/extern/base.py,sha256=vs4EO9vkfyTW-nVM0dnlqsmnPPeHIXI2wW5b4Wpiz-Y,31547
|
|
55
|
+
deepdoctection/extern/d2detect.py,sha256=I0oEkprr5iVpKpM3a3nknAU-sXwNoDQdp_B1gzzODsk,22374
|
|
56
|
+
deepdoctection/extern/deskew.py,sha256=L_jU0rXh03qzwaT79EIqE_zYMUVeFwWDbsGbtahuL2k,3124
|
|
57
|
+
deepdoctection/extern/doctrocr.py,sha256=d468jZpR4WBvZPt5Vze70dh6tZacBM1HO0pcy8ynOFM,25358
|
|
58
|
+
deepdoctection/extern/fastlang.py,sha256=4D9A-_hTXUcvXG6IJJknX34LrD71v08XtNdWgvXD7fE,4736
|
|
59
|
+
deepdoctection/extern/hfdetr.py,sha256=N3eLNI5BsQS9_7YZyBeWndSgUydJij7ugZA9p4V1xaQ,14316
|
|
60
|
+
deepdoctection/extern/hflayoutlm.py,sha256=3mZZ3byn00jSrLWO2vZFas9j4VrhbYQNmF1mwPG2ElQ,59642
|
|
61
|
+
deepdoctection/extern/hflm.py,sha256=y-9brzmT2NYtFoNcWHABNg2ZZQXSOP9CyqtT1OoeV9U,9754
|
|
62
|
+
deepdoctection/extern/model.py,sha256=-GbnuhLFq7jpBOvtpJe6IhGXxQdqwiM8epEd7IRELoU,18234
|
|
63
|
+
deepdoctection/extern/pdftext.py,sha256=ljzPQn3yYAlS6MoZqzixD-fO2GlHwu1aMiOQ6qMIzbg,7513
|
|
64
|
+
deepdoctection/extern/tessocr.py,sha256=SuPmngsJg38riL4b09z6_FIzJH6H3RIwoighG2GPMYM,17457
|
|
65
|
+
deepdoctection/extern/texocr.py,sha256=93vGj0TX2gENMFV6_FDk3et1sDecrNeuozv5EfOR5nk,5931
|
|
66
|
+
deepdoctection/extern/tpdetect.py,sha256=Kr00n80V_OfE-EGfpjiVw1eAQ2n2tuT-hSco-dLSR9E,8516
|
|
67
|
+
deepdoctection/extern/pt/__init__.py,sha256=3Cu0ZHjbYsJomru7-RQXEHihEQLegZrmLetlHiqS58I,742
|
|
68
|
+
deepdoctection/extern/pt/nms.py,sha256=2lSpEH8cI_QXdz5xL_OaitqsGoHhp5xvDssK5Yo8q4Q,2218
|
|
69
|
+
deepdoctection/extern/pt/ptutils.py,sha256=AmovwBx6WGhSE45Sxt3WYQ3Nu1ZF44dJ5WcFiH7KVsE,2132
|
|
70
|
+
deepdoctection/extern/tp/__init__.py,sha256=8QMkcA7tChCr1QXiA0551lZS2jTsECBrrL2YUanpFAk,706
|
|
71
|
+
deepdoctection/extern/tp/tfutils.py,sha256=paX5nOO2L8G4ze1AmpdizCDezMxF3yqNMXFvwFWh42A,4056
|
|
72
|
+
deepdoctection/extern/tp/tpcompat.py,sha256=u6qV5bhr4UUPCP_Bz3I1Z0b5ZdFEShN84JR6fFq6tJI,6249
|
|
73
|
+
deepdoctection/extern/tp/tpfrcnn/__init__.py,sha256=OzDaR5A8HGz9a4VwjLiR9rN1Nf1cSebv8DVEMxStFOw,703
|
|
74
|
+
deepdoctection/extern/tp/tpfrcnn/common.py,sha256=fCxwi2u752ZlI_DtIkLC_x9j9tyo1nnirAi2PmnziD4,3830
|
|
75
|
+
deepdoctection/extern/tp/tpfrcnn/predict.py,sha256=957dnhCByS-FZH13efFWADhodaV4lKto-ikLPetfvEQ,4338
|
|
76
|
+
deepdoctection/extern/tp/tpfrcnn/preproc.py,sha256=oHN9keBurjdNQqXmsb5BgURB5nl-eEp0KHvO1DPRQL4,12009
|
|
77
|
+
deepdoctection/extern/tp/tpfrcnn/config/__init__.py,sha256=RhJiXId6vUSw_Pi49SPwj0jrf61VxxptXoGeBKtT42M,705
|
|
78
|
+
deepdoctection/extern/tp/tpfrcnn/config/config.py,sha256=Xh3TBYWvPhoOhPRjncvv9FJ75T_4IAuzBEuPv751DFg,11531
|
|
79
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py,sha256=RhJiXId6vUSw_Pi49SPwj0jrf61VxxptXoGeBKtT42M,705
|
|
80
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py,sha256=H7xoWhRwCh-vlHAL5hCEolKBJ8Y2xe9duZuBuLs0ZwQ,9835
|
|
81
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py,sha256=4jgWyja-_V44zJVfK4ySmknhnhqfb9f6ruVwbh387aE,13752
|
|
82
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py,sha256=UvZ8_34dNjCvxsTxCJvrlpqUpQb9gWxgwRoIKgedIog,7361
|
|
83
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py,sha256=plovKReX6rFjnL_ravLUUCZ49ZFni87FlRJGK0fXqco,5777
|
|
84
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py,sha256=Ejd0Z2uUrdAfRjXQoS-lBVPukLlw8geP0yXcF61-nk4,11486
|
|
85
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py,sha256=KsL08NNy4PEvBu53HV6bMio58oqIfVrcoqpti27pZOI,18166
|
|
86
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py,sha256=B-rImWGWLNNe4UPJfhTpi4f1LUMCW8YJAbwoJFiG__o,4966
|
|
87
|
+
deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py,sha256=F7NGrvKyPZRxnl96zoFyezNzymFJvQghMjGslsc7iFg,9028
|
|
88
|
+
deepdoctection/extern/tp/tpfrcnn/utils/__init__.py,sha256=kiPlXxHlTGN9eI7YE9BgwteOQ_nCYCuqqSLO5JfkCTQ,695
|
|
89
|
+
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py,sha256=aBLqPg_ApaiimtBRaOsLKTZZFIBh87vVtqjLPMaX9fQ,2379
|
|
90
|
+
deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py,sha256=O-q1GQiOEd1lN1MQDsJvHwD2OmBO-qHNeqJ1Qnec93g,3539
|
|
91
|
+
deepdoctection/mapper/__init__.py,sha256=H2fPGQ1Es0Osyvd9pSNjtyKDpPZuzsyqk2DPyAp1ckI,1099
|
|
92
|
+
deepdoctection/mapper/cats.py,sha256=YEnf5uOvyf_UFcEtN5ddJxF7LGwkwdPWjBE14QvSPV4,17320
|
|
93
|
+
deepdoctection/mapper/cocostruct.py,sha256=BbykSMXklsr6YJ4HRDYEABL1NUxndZvhKPr683aIG_A,6287
|
|
94
|
+
deepdoctection/mapper/d2struct.py,sha256=XiIuQAcC-ekn97RHz2hALcD02Mpdze7Lrfm4vPEB9Iw,11481
|
|
95
|
+
deepdoctection/mapper/hfstruct.py,sha256=15eOUwQ_f3rflZJdnQzIaN7tpj8dhDKDRlAykOtiDsk,5727
|
|
96
|
+
deepdoctection/mapper/laylmstruct.py,sha256=_10260AtRcF2xdkALz2JatiAKMcNIwNMbJgO__hOPN8,42094
|
|
97
|
+
deepdoctection/mapper/maputils.py,sha256=21Oyt4I8IV5jSgtplBP-opPTKk3idgJnA3s8ICPvMvc,8977
|
|
98
|
+
deepdoctection/mapper/match.py,sha256=Q_Dq95IpO9o0gRKk-Jg7ua0eiZ2rMHUhIhwXygT2aGU,10202
|
|
99
|
+
deepdoctection/mapper/misc.py,sha256=LYSORlUR7sn0Qf-wgpTyVNwGgnpuKN9ln7TAiFrbBrQ,7366
|
|
100
|
+
deepdoctection/mapper/pascalstruct.py,sha256=PviZjhTk4p5HDUTlF8qhWPyraKD0uh51f2hoNqA1Bbg,3838
|
|
101
|
+
deepdoctection/mapper/prodigystruct.py,sha256=OWzPUbNDrqwFipH8YWI5eSxwMdA7qYczaFdsHNrE_4c,7001
|
|
102
|
+
deepdoctection/mapper/pubstruct.py,sha256=UTyfUmzMSuf2BXtdYwHjK7ngsIwAxSZjwTxDtz6DySg,23416
|
|
103
|
+
deepdoctection/mapper/tpstruct.py,sha256=dxtEVHYVnkH-zjjbHzkFrPgS9eheys6E-CMlsjaOnxo,5468
|
|
104
|
+
deepdoctection/mapper/xfundstruct.py,sha256=XLUZ-yBMWtKFQ40vxHl6p8EZZvl68JdwJlV00A93Zy8,9108
|
|
105
|
+
deepdoctection/pipe/__init__.py,sha256=E3cYAVWOvMzIN7jbKFyqLjFXahcFGcAGkb1uChM_XCY,1034
|
|
106
|
+
deepdoctection/pipe/anngen.py,sha256=Hfi7C6-iOv7t8tjFoz4FuIhcz6yMZx52f5SG9bsVnLg,16365
|
|
107
|
+
deepdoctection/pipe/base.py,sha256=oszB_DepcFtORvDdGTZZPWMhk01C68RUWXHjeX7SF3M,18163
|
|
108
|
+
deepdoctection/pipe/common.py,sha256=OcsqHr_c66Yqt98hFeKwaa0mciWMCauw0HZ3YnHx8MU,24586
|
|
109
|
+
deepdoctection/pipe/concurrency.py,sha256=_EKZi4eCeF3mVHytZL_fMwyqa25C2aR9g8vrIFB8iR4,9780
|
|
110
|
+
deepdoctection/pipe/doctectionpipe.py,sha256=ik5F92F3klI5Nve_AnyIRj-ApMoKHSR2SjcWWnI0d2g,14063
|
|
111
|
+
deepdoctection/pipe/language.py,sha256=T5g5_2GIsbTltAmn_PFymMUMoik8_b0uJNx8f5dT9MM,5898
|
|
112
|
+
deepdoctection/pipe/layout.py,sha256=oAldMtwyZee1IqpuflKKvmeL2Z_nXFiqwFMS4VYv5eI,6391
|
|
113
|
+
deepdoctection/pipe/lm.py,sha256=nYI2bm0sc9d3JMlIPyNyd4XxXFRBIHRUYfMImuek6b4,19793
|
|
114
|
+
deepdoctection/pipe/order.py,sha256=9OarsHKwVqT1bTDIn7XGeGLgpetEJW3uLjuJQDdhjG4,40684
|
|
115
|
+
deepdoctection/pipe/refine.py,sha256=SrMcAWXRO5tJpqaZCEz9RzvjPyiQiE8fZ9TXBcaBKck,23310
|
|
116
|
+
deepdoctection/pipe/registry.py,sha256=uT5fnHjffoNGk2JPuD2-pMYtO3Iko7-wrwVZVCWLtok,906
|
|
117
|
+
deepdoctection/pipe/segment.py,sha256=rHhEWr5zZ1ppj-gMa-q-UCr1AYTWpUW7oA1umwebqBI,61302
|
|
118
|
+
deepdoctection/pipe/sub_layout.py,sha256=Wh4_uW-6CISe0xwD1AbJX1uk_4ygiUlQHV95gnl7280,14135
|
|
119
|
+
deepdoctection/pipe/text.py,sha256=4fYLCXoE-wFz0atAwbXiy-bjiJuKjNx3i3IHa54YW-0,11009
|
|
120
|
+
deepdoctection/pipe/transform.py,sha256=X1ZUvb6N9YBdJm4XOI7Fe4TZH1OJgJnmOi4DFK-B75U,4797
|
|
121
|
+
deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
|
|
122
|
+
deepdoctection/train/d2_frcnn_train.py,sha256=edmyNTBRMM_TuL_1D6G2TSY9CBqNndIuyKree_KAso0,15508
|
|
123
|
+
deepdoctection/train/hf_detr_train.py,sha256=El-VHggdBObttFQwFIfQs5xm7aaxpC5IzNUJ1gF4Z6E,13278
|
|
124
|
+
deepdoctection/train/hf_layoutlm_train.py,sha256=bNL5OCLKytshG6kaTJDLTQOcvWKwEsYVmnj8zPd7uio,23634
|
|
125
|
+
deepdoctection/train/tp_frcnn_train.py,sha256=Tltb-v2JD5oPuHCZGA9B5DM4ZaidoBITlH93QX-KPKI,13570
|
|
126
|
+
deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
|
|
127
|
+
deepdoctection/utils/concurrency.py,sha256=9ly81D5i2ZFzKfXMIUSmhT42eMs4QttsRhWXdkIk6Aw,5832
|
|
128
|
+
deepdoctection/utils/context.py,sha256=GXgIGQ10JwosE6FQSPdlJoCXyeM_cahEcbQ2mxGNofI,4538
|
|
129
|
+
deepdoctection/utils/develop.py,sha256=4myrqBDypM6tQ2a2Jo3Q20RuE_W2czykpXBwgXPrxNw,3568
|
|
130
|
+
deepdoctection/utils/env_info.py,sha256=yyRyjQT3xz73yEcqauPM6GdCpvWwYGAH-_KUHfXBrtM,19855
|
|
131
|
+
deepdoctection/utils/error.py,sha256=sIry8F5MZ0yLvKfAwVz90IorKWVvjoRqcC0L8qq8mLk,2480
|
|
132
|
+
deepdoctection/utils/file_utils.py,sha256=D4cua4i3Q-4ZeSRXXWEL4z7hp0M1qh9rrVSfS5t-Hzo,25643
|
|
133
|
+
deepdoctection/utils/fs.py,sha256=KTS9FJzZk9le_vmIPr9IisJw0AyTfjkyX1KoWQy4DNs,12729
|
|
134
|
+
deepdoctection/utils/identifier.py,sha256=Jt12MeZf7eC1qciY5Fp_AYUGxYVcjsy7xNBUvJil7dU,2270
|
|
135
|
+
deepdoctection/utils/logger.py,sha256=ddQ0xBStluf8OvoRlEB8YkqyRR-ZYgyJYLClTmJJMAU,10290
|
|
136
|
+
deepdoctection/utils/metacfg.py,sha256=5M390--ZMoyJEt5oZOwFMGt2i8OF_ayeb0NVmUO_3OQ,7235
|
|
137
|
+
deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
|
|
138
|
+
deepdoctection/utils/pdf_utils.py,sha256=BrxTuY9j0COyIRkJchJ0tt2h6ZsA2an6z-H8E8QwgUQ,13490
|
|
139
|
+
deepdoctection/utils/settings.py,sha256=OrFEe9Mll3UuDhjyS-cTCv_q1ZSr30Jpl9nQxk__t2I,12824
|
|
140
|
+
deepdoctection/utils/tqdm.py,sha256=kx3Ivf0x85S0ZmEaN5mImu0V6isOgygOU8iyr2U99XU,1850
|
|
141
|
+
deepdoctection/utils/transform.py,sha256=jgeCyQWLN9q79jCGW7jysyKUKcJ1AVMk8OslF-3fbag,16095
|
|
142
|
+
deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
|
|
143
|
+
deepdoctection/utils/utils.py,sha256=NBUb1qbx8Jm-AvYN1Sdbk0huXhbAKxZ-ZtOcMespsMM,7064
|
|
144
|
+
deepdoctection/utils/viz.py,sha256=bujRIujvX317rPz4jBrj0yd3WP8wPjDUiI5GUrw9MzQ,27339
|
|
145
|
+
deepdoctection-0.43.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
146
|
+
deepdoctection-0.43.dist-info/METADATA,sha256=F08hSp-kyzwW1tsTWyRMtWG3TaJsA8_LgueZ7irkBqA,13404
|
|
147
|
+
deepdoctection-0.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
148
|
+
deepdoctection-0.43.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
149
|
+
deepdoctection-0.43.dist-info/RECORD,,
|