deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +4 -2
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +919 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +162 -108
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +205 -119
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +26 -17
  104. deepdoctection/utils/env_info.py +86 -37
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -71
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.1.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.1.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0
@@ -1,431 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: deepdoctection
3
- Version: 0.42.1
4
- Summary: Repository for Document AI
5
- Home-page: https://github.com/deepdoctection/deepdoctection
6
- Author: Dr. Janis Meyer
7
- License: Apache License 2.0
8
- Classifier: Development Status :: 4 - Beta
9
- Classifier: License :: OSI Approved :: Apache Software License
10
- Classifier: Natural Language :: English
11
- Classifier: Operating System :: POSIX :: Linux
12
- Classifier: Programming Language :: Python :: 3.9
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: Programming Language :: Python :: 3.11
15
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
- Requires-Python: >=3.9
17
- Description-Content-Type: text/markdown
18
- License-File: LICENSE
19
- Requires-Dist: catalogue==2.0.10
20
- Requires-Dist: huggingface_hub>=0.26.0
21
- Requires-Dist: importlib-metadata>=5.0.0
22
- Requires-Dist: jsonlines==3.1.0
23
- Requires-Dist: lazy-imports==0.3.1
24
- Requires-Dist: mock==4.0.3
25
- Requires-Dist: networkx>=2.7.1
26
- Requires-Dist: numpy<2.0,>=1.21
27
- Requires-Dist: packaging>=20.0
28
- Requires-Dist: Pillow>=10.0.0
29
- Requires-Dist: pypdf>=3.16.0
30
- Requires-Dist: pypdfium2>=4.30.0
31
- Requires-Dist: pyyaml>=6.0.1
32
- Requires-Dist: pyzmq>=16
33
- Requires-Dist: scipy>=1.13.1
34
- Requires-Dist: termcolor>=1.1
35
- Requires-Dist: tabulate>=0.7.7
36
- Requires-Dist: tqdm>=4.64.0
37
- Provides-Extra: tf
38
- Requires-Dist: catalogue==2.0.10; extra == "tf"
39
- Requires-Dist: huggingface_hub>=0.26.0; extra == "tf"
40
- Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
41
- Requires-Dist: jsonlines==3.1.0; extra == "tf"
42
- Requires-Dist: lazy-imports==0.3.1; extra == "tf"
43
- Requires-Dist: mock==4.0.3; extra == "tf"
44
- Requires-Dist: networkx>=2.7.1; extra == "tf"
45
- Requires-Dist: numpy<2.0,>=1.21; extra == "tf"
46
- Requires-Dist: packaging>=20.0; extra == "tf"
47
- Requires-Dist: Pillow>=10.0.0; extra == "tf"
48
- Requires-Dist: pypdf>=3.16.0; extra == "tf"
49
- Requires-Dist: pypdfium2>=4.30.0; extra == "tf"
50
- Requires-Dist: pyyaml>=6.0.1; extra == "tf"
51
- Requires-Dist: pyzmq>=16; extra == "tf"
52
- Requires-Dist: scipy>=1.13.1; extra == "tf"
53
- Requires-Dist: termcolor>=1.1; extra == "tf"
54
- Requires-Dist: tabulate>=0.7.7; extra == "tf"
55
- Requires-Dist: tqdm>=4.64.0; extra == "tf"
56
- Requires-Dist: tensorpack==0.11; extra == "tf"
57
- Requires-Dist: protobuf==3.20.1; extra == "tf"
58
- Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
59
- Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
60
- Requires-Dist: python-doctr==0.9.0; extra == "tf"
61
- Requires-Dist: pycocotools>=2.0.2; extra == "tf"
62
- Requires-Dist: boto3==1.34.102; extra == "tf"
63
- Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
64
- Requires-Dist: fasttext-wheel; extra == "tf"
65
- Requires-Dist: jdeskew>=0.2.2; extra == "tf"
66
- Requires-Dist: apted==1.0.3; extra == "tf"
67
- Requires-Dist: distance==0.1.3; extra == "tf"
68
- Requires-Dist: lxml>=4.9.1; extra == "tf"
69
- Provides-Extra: pt
70
- Requires-Dist: catalogue==2.0.10; extra == "pt"
71
- Requires-Dist: huggingface_hub>=0.26.0; extra == "pt"
72
- Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
73
- Requires-Dist: jsonlines==3.1.0; extra == "pt"
74
- Requires-Dist: lazy-imports==0.3.1; extra == "pt"
75
- Requires-Dist: mock==4.0.3; extra == "pt"
76
- Requires-Dist: networkx>=2.7.1; extra == "pt"
77
- Requires-Dist: numpy<2.0,>=1.21; extra == "pt"
78
- Requires-Dist: packaging>=20.0; extra == "pt"
79
- Requires-Dist: Pillow>=10.0.0; extra == "pt"
80
- Requires-Dist: pypdf>=3.16.0; extra == "pt"
81
- Requires-Dist: pypdfium2>=4.30.0; extra == "pt"
82
- Requires-Dist: pyyaml>=6.0.1; extra == "pt"
83
- Requires-Dist: pyzmq>=16; extra == "pt"
84
- Requires-Dist: scipy>=1.13.1; extra == "pt"
85
- Requires-Dist: termcolor>=1.1; extra == "pt"
86
- Requires-Dist: tabulate>=0.7.7; extra == "pt"
87
- Requires-Dist: tqdm>=4.64.0; extra == "pt"
88
- Requires-Dist: timm>=0.9.16; extra == "pt"
89
- Requires-Dist: transformers>=4.48.0; extra == "pt"
90
- Requires-Dist: accelerate>=0.29.1; extra == "pt"
91
- Requires-Dist: python-doctr==0.9.0; extra == "pt"
92
- Requires-Dist: boto3==1.34.102; extra == "pt"
93
- Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
94
- Requires-Dist: fasttext-wheel; extra == "pt"
95
- Requires-Dist: jdeskew>=0.2.2; extra == "pt"
96
- Requires-Dist: apted==1.0.3; extra == "pt"
97
- Requires-Dist: distance==0.1.3; extra == "pt"
98
- Requires-Dist: lxml>=4.9.1; extra == "pt"
99
- Provides-Extra: docs
100
- Requires-Dist: tensorpack==0.11; extra == "docs"
101
- Requires-Dist: boto3==1.34.102; extra == "docs"
102
- Requires-Dist: transformers>=4.48.0; extra == "docs"
103
- Requires-Dist: accelerate>=0.29.1; extra == "docs"
104
- Requires-Dist: pdfplumber>=0.11.0; extra == "docs"
105
- Requires-Dist: lxml>=4.9.1; extra == "docs"
106
- Requires-Dist: lxml-stubs>=0.5.1; extra == "docs"
107
- Requires-Dist: jdeskew>=0.2.2; extra == "docs"
108
- Requires-Dist: jinja2==3.0.3; extra == "docs"
109
- Requires-Dist: mkdocs-material; extra == "docs"
110
- Requires-Dist: mkdocstrings-python; extra == "docs"
111
- Requires-Dist: griffe==0.25.0; extra == "docs"
112
- Provides-Extra: dev
113
- Requires-Dist: python-dotenv==1.0.0; extra == "dev"
114
- Requires-Dist: click; extra == "dev"
115
- Requires-Dist: black==23.7.0; extra == "dev"
116
- Requires-Dist: isort==5.13.2; extra == "dev"
117
- Requires-Dist: pylint==2.17.4; extra == "dev"
118
- Requires-Dist: mypy==1.4.1; extra == "dev"
119
- Requires-Dist: wandb; extra == "dev"
120
- Requires-Dist: types-PyYAML>=6.0.12.12; extra == "dev"
121
- Requires-Dist: types-termcolor>=1.1.3; extra == "dev"
122
- Requires-Dist: types-tabulate>=0.9.0.3; extra == "dev"
123
- Requires-Dist: types-tqdm>=4.66.0.5; extra == "dev"
124
- Requires-Dist: lxml-stubs>=0.5.1; extra == "dev"
125
- Requires-Dist: types-Pillow>=10.2.0.20240406; extra == "dev"
126
- Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
127
- Provides-Extra: test
128
- Requires-Dist: pytest==8.0.2; extra == "test"
129
- Requires-Dist: pytest-cov; extra == "test"
130
- Dynamic: author
131
- Dynamic: classifier
132
- Dynamic: description
133
- Dynamic: description-content-type
134
- Dynamic: home-page
135
- Dynamic: license
136
- Dynamic: license-file
137
- Dynamic: provides-extra
138
- Dynamic: requires-dist
139
- Dynamic: requires-python
140
- Dynamic: summary
141
-
142
-
143
- <p align="center">
144
- <img src="https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_logo.png" alt="Deep Doctection Logo" width="60%">
145
- <h3 align="center">
146
- A Document AI Package
147
- </h3>
148
- </p>
149
-
150
-
151
- **deep**doctection is a Python library that orchestrates document extraction and document layout analysis tasks using deep learning models. It does
152
- not implement models but enables you to build pipelines using highly acknowledged libraries for object detection, OCR
153
- and selected NLP tasks and provides an integrated framework for fine-tuning, evaluating and running models. For more
154
- specific text processing tasks use one of the many other great NLP libraries.
155
-
156
- **deep**doctection focuses on applications and is made for those who want to solve real world problems related to
157
- document extraction from PDFs or scans in various image formats.
158
-
159
- Check the demo of a document layout analysis pipeline with OCR on
160
- :hugs: [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
161
-
162
- # Overview
163
-
164
- **deep**doctection provides model wrappers of supported libraries for various tasks to be integrated into
165
- pipelines. Its core function does not depend on any specific deep learning library. Selected models for the following
166
- tasks are currently supported:
167
-
168
- - Document layout analysis including table recognition in Tensorflow with [**Tensorpack**](https://github.com/tensorpack),
169
- or PyTorch with [**Detectron2**](https://github.com/facebookresearch/detectron2/tree/main/detectron2),
170
- - OCR with support of [**Tesseract**](https://github.com/tesseract-ocr/tesseract), [**DocTr**](https://github.com/mindee/doctr)
171
- (Tensorflow and PyTorch implementations available) and a wrapper to an API for a commercial solution,
172
- - Text mining for native PDFs with [**pdfplumber**](https://github.com/jsvine/pdfplumber),
173
- - Language detection with [**fastText**](https://github.com/facebookresearch/fastText),
174
- - Deskewing and rotating images with [**jdeskew**](https://github.com/phamquiluan/jdeskew).
175
- - Document and token classification with all LayoutLM models provided by the
176
- [**Transformer library**](https://github.com/huggingface/transformers).
177
- (Yes, you can use any LayoutLM-model with any of the provided OCR-or pdfplumber tools straight away!).
178
- - Table detection and table structure recognition with
179
- [**table-transformer**](https://github.com/microsoft/table-transformer).
180
- - There is a small dataset for token classification [available](https://huggingface.co/datasets/deepdoctection/FRFPE)
181
- and a lot of new [tutorials](https://github.com/deepdoctection/notebooks/blob/main/Layoutlm_v2_on_custom_token_classification.ipynb)
182
- to show, how to train and evaluate this dataset using LayoutLMv1, LayoutLMv2, LayoutXLM and LayoutLMv3.
183
- - Comprehensive configuration of **analyzer** like choosing different models, output parsing, OCR selection.
184
- Check this [notebook](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Configuration.ipynb) or the
185
- [docs](https://deepdoctection.readthedocs.io/en/latest/tutorials/analyzer_configuration_notebook/) for more infos.
186
- - Document layout analysis and table recognition now runs with
187
- [**Torchscript**](https://pytorch.org/docs/stable/jit.html) (CPU) as well and [**Detectron2**](https://github.com/facebookresearch/detectron2/tree/main/detectron2) is not required
188
- anymore for basic inference.
189
- - More angle predictors for determining the rotation of a document based on [**Tesseract**](https://github.com/tesseract-ocr/tesseract) and [**DocTr**](https://github.com/mindee/doctr)
190
- - Token classification with [**LiLT**](https://github.com/jpWang/LiLT) via
191
- [**transformers**](https://github.com/huggingface/transformers).
192
- We have added a model wrapper for token classification with LiLT and added a some LiLT models to the model catalog
193
- that seem to look promising, especially if you want to train a model on non-english data. The training script for
194
- LayoutLM can be used for LiLT as well.
195
- - [**new**] There are two notebooks available that show, how to write a
196
- [custom predictor](https://github.com/deepdoctection/notebooks/blob/main/Doclaynet_Analyzer_Config.ipynb) based on
197
- a third party library that has not been supported yet and how to use
198
- [advanced configuration](https://github.com/deepdoctection/notebooks/blob/main/Doclaynet_Analyzer_Config.ipynb) to
199
- get links between layout segments e.g. captions and tables or figures.
200
-
201
- **deep**doctection provides on top of that methods for pre-processing inputs to models like cropping or resizing and to
202
- post-process results, like validating duplicate outputs, relating words to detected layout segments or ordering words
203
- into contiguous text. You will get an output in JSON format that you can customize even further by yourself.
204
-
205
- Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb) in the
206
- [notebook repo](https://github.com/deepdoctection/notebooks) for an easy start.
207
-
208
- Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
209
-
210
- ## Models
211
-
212
- **deep**doctection or its support libraries provide pre-trained models that are in most of the cases available at the
213
- [**Hugging Face Model Hub**](https://huggingface.co/deepdoctection) or that will be automatically downloaded once
214
- requested. For instance, you can find pre-trained object detection models from the Tensorpack or Detectron2 framework
215
- for coarse layout analysis, table cell detection and table recognition.
216
-
217
- ## Datasets and training scripts
218
-
219
- Training is a substantial part to get pipelines ready on some specific domain, let it be document layout analysis,
220
- document classification or NER. **deep**doctection provides training scripts for models that are based on trainers
221
- developed from the library that hosts the model code. Moreover, **deep**doctection hosts code to some well established
222
- datasets like **Publaynet** that makes it easy to experiment. It also contains mappings from widely used data
223
- formats like COCO and it has a dataset framework (akin to [**datasets**](https://github.com/huggingface/datasets) so that
224
- setting up training on a custom dataset becomes very easy. [**This notebook**](https://github.com/deepdoctection/notebooks/blob/main/Datasets_and_Eval.ipynb)
225
- shows you how to do this.
226
-
227
- ## Evaluation
228
-
229
- **deep**doctection comes equipped with a framework that allows you to evaluate predictions of a single or multiple
230
- models in a pipeline against some ground truth. Check again [**here**](https://github.com/deepdoctection/notebooks/blob/main/Datasets_and_Eval.ipynb) how it is
231
- done.
232
-
233
- ## Inference
234
-
235
- Having set up a pipeline it takes you a few lines of code to instantiate the pipeline and after a for loop all pages will
236
- be processed through the pipeline.
237
-
238
- ```python
239
- import deepdoctection as dd
240
- from IPython.core.display import HTML
241
- from matplotlib import pyplot as plt
242
-
243
- analyzer = dd.get_dd_analyzer() # instantiate the built-in analyzer similar to the Hugging Face space demo
244
-
245
- df = analyzer.analyze(path = "/path/to/your/doc.pdf") # setting up pipeline
246
- df.reset_state() # Trigger some initialization
247
-
248
- doc = iter(df)
249
- page = next(doc)
250
-
251
- image = page.viz()
252
- plt.figure(figsize = (25,17))
253
- plt.axis('off')
254
- plt.imshow(image)
255
- ```
256
-
257
- ![text](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_sample.png)
258
-
259
- ```
260
- HTML(page.tables[0].html)
261
- ```
262
-
263
- ![table](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_table.png)
264
-
265
-
266
- ```
267
- print(page.text)
268
- ```
269
-
270
- ![table](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/dd_rm_text.png)
271
-
272
-
273
- ## Documentation
274
-
275
- There is an extensive [**documentation**](https://deepdoctection.readthedocs.io/en/latest/index.html#) available
276
- containing tutorials, design concepts and the API. We want to present things as comprehensively and understandably
277
- as possible. However, we are aware that there are still many areas where significant improvements can be made in terms
278
- of clarity, grammar and correctness. We look forward to every hint and comment that increases the quality of the
279
- documentation.
280
-
281
-
282
- ## Requirements
283
-
284
- ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/requirements_deepdoctection_081124.png)
285
-
286
- Everything in the overview listed below the **deep**doctection layer are necessary requirements and have to be installed
287
- separately.
288
-
289
- - Linux or macOS. (Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available)
290
- - Python >= 3.9
291
- - 1.13 <= PyTorch **or** 2.11 <= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
292
- In general, if you want to train or fine-tune models, a GPU is required.
293
-
294
- - With respect to the Deep Learning framework, you must decide between [Tensorflow](https://www.tensorflow.org/install?hl=en)
295
- and [PyTorch](https://pytorch.org/get-started/locally/).
296
- - [Tesseract](https://github.com/tesseract-ocr/tesseract) OCR engine will be used through a Python wrapper. The core
297
- engine has to be installed separately.
298
-
299
-
300
- - For release `v.0.34.0` and below **deep**doctection uses Python wrappers for [Poppler](https://poppler.freedesktop.org/) to convert PDF
301
- documents into images. For release `v.0.35.0` this dependency will be optional.
302
-
303
- The following overview shows the availability of the models in conjunction with the DL framework.
304
-
305
- | Task | PyTorch | Torchscript | Tensorflow |
306
- |-----------------------------------------------|:-------:|----------------|:------------:|
307
- | Layout detection via Detectron2/Tensorpack | ✅ | ✅ (CPU only) | ✅ (GPU only) |
308
- | Table recognition via Detectron2/Tensorpack | ✅ | ✅ (CPU only) | ✅ (GPU only) |
309
- | Table transformer via Transformers | ✅ | ❌ | ❌ |
310
- | DocTr | ✅ | ❌ | ✅ |
311
- | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
312
-
313
-
314
-
315
- ## Installation
316
-
317
- We recommend using a virtual environment. You can install the package via pip or from source.
318
-
319
- ### Install with pip from PyPi
320
-
321
- #### Minimal installation
322
-
323
- If you want to get started with a minimal setting (e.g. running the **deep**doctection analyzer with
324
- default configuration or trying the 'Get started notebook'), install **deep**doctection with
325
-
326
- ```
327
- pip install deepdoctection
328
- ```
329
-
330
- If you want to use the Tensorflow framework, please install Tensorpack separately. Detectron2 will not be installed
331
- and layout models/ table recognition models will run with Torchscript on a CPU.
332
-
333
- #### Full installation
334
-
335
- The following installation will give you ALL models available within the Deep Learning framework as well as all models
336
- that are independent of Tensorflow/PyTorch. Please note, that the dependencies are very complex. We try hard to keep
337
- the requirements up to date though.
338
-
339
- For **Tensorflow**, run
340
-
341
- ```
342
- pip install deepdoctection[tf]
343
- ```
344
-
345
- For **PyTorch**,
346
-
347
- first install **Detectron2** separately as it is not distributed via PyPi. Check the instruction
348
- [here](https://detectron2.readthedocs.io/en/latest/tutorials/install.html). Then run
349
-
350
- ```
351
- pip install deepdoctection[pt]
352
- ```
353
-
354
- This will install **deep**doctection with all dependencies listed above the **deep**doctection layer. Use this setting,
355
- if you want to get started or want to explore all features.
356
-
357
- If you want to have more control with your installation and are looking for fewer dependencies then
358
- install **deep**doctection with the basic setup only.
359
-
360
- ```
361
- pip install deepdoctection
362
- ```
363
-
364
- This will ignore all model libraries (layers above the **deep**doctection layer in the diagram) and you
365
- will be responsible to install them by yourself. Note, that you will not be able to run any pipeline with this setup.
366
-
367
- For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
368
-
369
-
370
- ### Installation from source
371
-
372
- Download the repository or clone via
373
-
374
- ```
375
- git clone https://github.com/deepdoctection/deepdoctection.git
376
- ```
377
-
378
- To get started with **Tensorflow**, run:
379
-
380
- ```
381
- cd deepdoctection
382
- pip install ".[tf]"
383
- ```
384
-
385
- Installing the full **PyTorch** setup from source will also install **Detectron2** for you:
386
-
387
- ```
388
- cd deepdoctection
389
- pip install ".[source-pt]"
390
- ```
391
-
392
- ### Running a Docker container from Docker hub
393
-
394
- Starting from release `v.0.27.0`, pre-existing Docker images can be downloaded from the
395
- [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
396
-
397
- ```
398
- docker pull deepdoctection/deepdoctection:<release_tag>
399
- ```
400
-
401
- To start the container, you can use the Docker compose file `./docker/pytorch-gpu/docker-compose.yaml`.
402
- In the `.env` file provided, specify the host directory where **deep**doctection's cache should be stored.
403
- This directory will be mounted. Additionally, specify a working directory to mount files to be processed into the
404
- container.
405
-
406
- ```
407
- docker compose up -d
408
- ```
409
-
410
- will start the container.
411
-
412
- ## Credits
413
-
414
- We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
415
- to develop this framework.
416
-
417
- ## Problems
418
-
419
- We try hard to eliminate bugs. We also know that the code is not free of issues. We welcome all issues relevant to this
420
- repo and try to address them as quickly as possible. Bug fixes or enhancements will be deployed in a new release every 10
421
- to 12 weeks.
422
-
423
- ## If you like **deep**doctection ...
424
-
425
- ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
426
-
427
-
428
- ## License
429
-
430
- Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE)
431
- for additional information.
@@ -1,148 +0,0 @@
1
- deepdoctection/__init__.py,sha256=rNPP6I2lwOt7lHvcnYd1UJq9oNzYMcH9Zsd8sbs9iaQ,12882
2
- deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
- deepdoctection/analyzer/_config.py,sha256=nJoTP4OZEKFxdNcMqJZFE3xCbtTGZ0w8V5nW1BCMBVQ,5124
5
- deepdoctection/analyzer/dd.py,sha256=Mnt3cwRWNSlQ8Vf2_fgRC30aU2J39QjYJWQSU22JiRY,6255
6
- deepdoctection/analyzer/factory.py,sha256=py6e2NlKG8-LYzw8k8dgvfp9o1VOuy-OMRDQelKWI9o,34520
7
- deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
8
- deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
9
- deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
10
- deepdoctection/dataflow/__init__.py,sha256=CWRHMpmJaPk4xY_oIIFubCt-z11SguWrMWxHZ7rdrvY,845
11
- deepdoctection/dataflow/base.py,sha256=z4DCComSj5wStEPjtk0093cNNGfUMiDqx8dqz36nS_o,6221
12
- deepdoctection/dataflow/common.py,sha256=MyGA2VWlNMjQdIN_Jd-o0Ec3bDJmjQit4Nv0v43OCSQ,10119
13
- deepdoctection/dataflow/custom.py,sha256=3CK_1oL9p6nbOq8WtH5_vQUo70_8Z8pXY7kG0OFqzug,6803
14
- deepdoctection/dataflow/custom_serialize.py,sha256=WocuiYo2gkih5Z9lWAoIIfUewwYSDOhHzG7ZZjKlUic,22827
15
- deepdoctection/dataflow/parallel_map.py,sha256=8FhxJBWV-kjJrJ27jQtP3yYF6Ev6rz98worO60oi96c,15837
16
- deepdoctection/dataflow/serialize.py,sha256=4pYC7m9h53JCu99waVeKpHDpsCDDdYCrSZpP2QYSsgs,4555
17
- deepdoctection/dataflow/stats.py,sha256=Bsr6v7lcesKXUYtO9wjqlzx_Yq_uyIF3Lel-tQ0i4wI,9619
18
- deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SPb7C1lOY,1643
19
- deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
20
- deepdoctection/datapoint/box.py,sha256=XPhC_xHqLZJjzafg1pIS_CxnVB5-0_yk-twsZZ3ncUU,30093
21
- deepdoctection/datapoint/convert.py,sha256=h3ky-Qn6YA8Qoyy5SMUkjJq___cK0hbcwFygDyqqm-4,7123
22
- deepdoctection/datapoint/image.py,sha256=_jN46UJUsOi6GC6VEUcp3L_vLL-iYRW05RKcFLWb6Dc,34048
23
- deepdoctection/datapoint/view.py,sha256=Gj47_nlIVoaDvTtypQHzxHLJLXN2uxOhhVRHERDQJlE,52135
24
- deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
- deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
26
- deepdoctection/datasets/base.py,sha256=X5Sr0yyal9x8rqWaWYr5mA_bE11UzN1iFXmWu605q2Y,22713
27
- deepdoctection/datasets/dataflow_builder.py,sha256=cYU2zV3gZW2bFvMHimlO9VIl3BAUaCwML08cCIQ8Em4,4107
28
- deepdoctection/datasets/info.py,sha256=sC1QCOdLWFMooVmiShZ43sLUpAi3FK4d0fsLyl_9-gA,20548
29
- deepdoctection/datasets/registry.py,sha256=utiB-PnE6vc5HvjcudO0O4Urp2BC3snqswY6d8uPQAo,3388
30
- deepdoctection/datasets/save.py,sha256=Y9508Qqp8gIGN7pbGgVBBnkiC6NdCb9L2YR4wVvEUxM,3350
31
- deepdoctection/datasets/instances/__init__.py,sha256=XEc_4vT5lDn6bbZID9ujDEumWu8Ec2W-QS4pI_bfWWE,1388
32
- deepdoctection/datasets/instances/doclaynet.py,sha256=Az7USCqF0lMk1n1Dk59uUrBgBNAbKEjtUvZnCgdUH70,12286
33
- deepdoctection/datasets/instances/fintabnet.py,sha256=rOaKD5bruokkbZwjRhEz-5uAKZiMAXzmre_Ecc0YMTk,12250
34
- deepdoctection/datasets/instances/funsd.py,sha256=K6WQQOaEJrw1TcevScHET-AY9l9aj64dhLRyZ1J86p0,7183
35
- deepdoctection/datasets/instances/iiitar13k.py,sha256=HGBqR_PeTqpzLV9npWh5VdlHUg0t8vUTcrm3Z2kL6Qk,6923
36
- deepdoctection/datasets/instances/layouttest.py,sha256=nNYDUNcgWA0PilN0w0vAourwXvpcRupVStvlESTc1e0,4515
37
- deepdoctection/datasets/instances/publaynet.py,sha256=jUKq9iYW0qD5qXMe2xFz8xNxsL0KBTufuwSMkfzwaxA,5447
38
- deepdoctection/datasets/instances/pubtables1m.py,sha256=KtEHh8G98e-RjIqRbe5fbLNrD52oWA9kKDb3sUPqAk0,12524
39
- deepdoctection/datasets/instances/pubtabnet.py,sha256=TidllHJdxM8Ii0_gjliLWOWTtTaKjl89Khe1o0jLW4M,8806
40
- deepdoctection/datasets/instances/rvlcdip.py,sha256=8Vb_iCuR85N8Mx7d5KJ-lXe6PiZPjG0Jfq6EwY1qtkw,6831
41
- deepdoctection/datasets/instances/xfund.py,sha256=effTZfWI8MvGXCyRhdtwphHCr7WNF8sQOOjUCcKaTVE,9002
42
- deepdoctection/datasets/instances/xsl/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
43
- deepdoctection/datasets/instances/xsl/pascal_voc.xsl,sha256=DlzFV2P8NtQKXVe96i-mIcPWmL6tsW7NQjgCuz2pCL4,1952
44
- deepdoctection/eval/__init__.py,sha256=rbns4tSEQ30QLj8h0mm3A0dCaKuN9LDxxpVypKKSXSE,932
45
- deepdoctection/eval/accmetric.py,sha256=4bND-xz9AZu9ACYRkEzn9V6Jn8MEiqnF7kxSp4k_baE,19655
46
- deepdoctection/eval/base.py,sha256=gCvhTdwEaCKplYTWPMjGvtB_0Vbq2KBJWFHq8mMlLPA,4814
47
- deepdoctection/eval/cocometric.py,sha256=4cpNmF3xZjInCOWOoVU_7itQxLI-zr0O6suNjPU2xWc,11020
48
- deepdoctection/eval/eval.py,sha256=B9PUZBjj6KzXHLOxUVn3QHiOcBQogfJmp9mjopbMo9k,19721
49
- deepdoctection/eval/registry.py,sha256=v4mp-s67vBVRu1nQzuGlYPViQnMSeIXEcF_WmvfUCoU,1051
50
- deepdoctection/eval/tedsmetric.py,sha256=rKw-734Y9CpBtIfkBSPQF2vAZxnIdWrI9Zc723P7RxI,9529
51
- deepdoctection/eval/tp_eval_callback.py,sha256=SXsXumoyxq-MIH9Cep5eUOwnNshMbKmC6mYOGwCg0pM,5283
52
- deepdoctection/extern/__init__.py,sha256=9Iks9b4Q_LynjcV167TVCoK8YsQRUcA2jjmAmDNA_X8,1056
53
- deepdoctection/extern/base.py,sha256=oRuoAduVchsR3H7Ddm-KAe_smt0N6PlQftPqJ75FWfA,28944
54
- deepdoctection/extern/d2detect.py,sha256=zrKv1yurApnjD7QZIZk_8LYCahjmN82MQUjHjv8zvkQ,22127
55
- deepdoctection/extern/deskew.py,sha256=iax1ztkguGDfD4KQMDyuvanR4J2VgpCRuVWWDTwViu4,3083
56
- deepdoctection/extern/doctrocr.py,sha256=htlFswrdRncEqC3PKjzXIifEu4ua3i09CNtrfEnlgBg,24723
57
- deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
58
- deepdoctection/extern/hfdetr.py,sha256=JzHrrTyzS9qh6T2TsvKboAGZkIhno2txmSoLQ5Vd-lo,12077
59
- deepdoctection/extern/hflayoutlm.py,sha256=tFaf90FRbZzhSycdp8rGkeiPywQa6UcTEEwbayIXkr0,57023
60
- deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
61
- deepdoctection/extern/model.py,sha256=lbVwDa3vD6VwCD_dsozcI8b4xDZs4KJ1628SxaDdtHQ,55378
62
- deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
63
- deepdoctection/extern/tessocr.py,sha256=gRYJsk0jBRMG_ZLXbuJeRYPSPuVjXNwThs4op1hHpoA,17450
64
- deepdoctection/extern/texocr.py,sha256=yMt5ZzKtsjd7ogrcNXba7zccGGGF9LXK194EtER6YNQ,5804
65
- deepdoctection/extern/tpdetect.py,sha256=yAk1duQdoX-_pHLHgvhU7OOSiDy863q6XUMpjpYR734,8477
66
- deepdoctection/extern/pt/__init__.py,sha256=3Cu0ZHjbYsJomru7-RQXEHihEQLegZrmLetlHiqS58I,742
67
- deepdoctection/extern/pt/nms.py,sha256=Zr1rwWSnMKg2Ukq1N4i9C-ey-9bS5TlfJ9YTL8d1RwA,1573
68
- deepdoctection/extern/pt/ptutils.py,sha256=AerrFR8nCN6-RrTbSqZhH3Q2Cn-WcC5RnE5XO9Pl1IM,1994
69
- deepdoctection/extern/tp/__init__.py,sha256=8QMkcA7tChCr1QXiA0551lZS2jTsECBrrL2YUanpFAk,706
70
- deepdoctection/extern/tp/tfutils.py,sha256=U586EuPGDLSuO6hbDrCHKshYoGmuV9WPP1jZjmQW9uw,3553
71
- deepdoctection/extern/tp/tpcompat.py,sha256=rPW_JrYtz9PbV20dZiMKm6DTrjS1A3rAdhrh8PG9BuM,5461
72
- deepdoctection/extern/tp/tpfrcnn/__init__.py,sha256=OzDaR5A8HGz9a4VwjLiR9rN1Nf1cSebv8DVEMxStFOw,703
73
- deepdoctection/extern/tp/tpfrcnn/common.py,sha256=fCxwi2u752ZlI_DtIkLC_x9j9tyo1nnirAi2PmnziD4,3830
74
- deepdoctection/extern/tp/tpfrcnn/predict.py,sha256=957dnhCByS-FZH13efFWADhodaV4lKto-ikLPetfvEQ,4338
75
- deepdoctection/extern/tp/tpfrcnn/preproc.py,sha256=oHN9keBurjdNQqXmsb5BgURB5nl-eEp0KHvO1DPRQL4,12009
76
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py,sha256=RhJiXId6vUSw_Pi49SPwj0jrf61VxxptXoGeBKtT42M,705
77
- deepdoctection/extern/tp/tpfrcnn/config/config.py,sha256=-T8AwNAIPR-_5OL1oEqm-Qe9GbN6JjAPVUuUw_XfMVc,11405
78
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py,sha256=RhJiXId6vUSw_Pi49SPwj0jrf61VxxptXoGeBKtT42M,705
79
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py,sha256=H7xoWhRwCh-vlHAL5hCEolKBJ8Y2xe9duZuBuLs0ZwQ,9835
80
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py,sha256=4jgWyja-_V44zJVfK4ySmknhnhqfb9f6ruVwbh387aE,13752
81
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py,sha256=UvZ8_34dNjCvxsTxCJvrlpqUpQb9gWxgwRoIKgedIog,7361
82
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py,sha256=plovKReX6rFjnL_ravLUUCZ49ZFni87FlRJGK0fXqco,5777
83
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py,sha256=Ejd0Z2uUrdAfRjXQoS-lBVPukLlw8geP0yXcF61-nk4,11486
84
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py,sha256=KsL08NNy4PEvBu53HV6bMio58oqIfVrcoqpti27pZOI,18166
85
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py,sha256=B-rImWGWLNNe4UPJfhTpi4f1LUMCW8YJAbwoJFiG__o,4966
86
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py,sha256=F7NGrvKyPZRxnl96zoFyezNzymFJvQghMjGslsc7iFg,9028
87
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py,sha256=kiPlXxHlTGN9eI7YE9BgwteOQ_nCYCuqqSLO5JfkCTQ,695
88
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py,sha256=aBLqPg_ApaiimtBRaOsLKTZZFIBh87vVtqjLPMaX9fQ,2379
89
- deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py,sha256=O-q1GQiOEd1lN1MQDsJvHwD2OmBO-qHNeqJ1Qnec93g,3539
90
- deepdoctection/mapper/__init__.py,sha256=Xqb34aCjslZDQnqQgCSvnloL5DbdT9eHhn-StpVPbzE,1130
91
- deepdoctection/mapper/cats.py,sha256=O06WGkpOIlSNMCy5VESl2HYOFDTuT9ls4aZIaWUv9VU,16602
92
- deepdoctection/mapper/cocostruct.py,sha256=GcbUpPFUg67pcOHQluWBFOFcGaYnlZcTmwBDERBVgCA,5978
93
- deepdoctection/mapper/d2struct.py,sha256=CPmjBIqGpBUEh-4hcLbHXQQPHIMTyNvcp9B5miCqZd0,11019
94
- deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
95
- deepdoctection/mapper/laylmstruct.py,sha256=Es_aQOsfCkereJLOd1yaXhNAEEFJkODRuThUJ-d6hHU,42904
96
- deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
97
- deepdoctection/mapper/match.py,sha256=TBc2yAWdQbM3sS64TerOJZwhPPMxk9cmfSXctKdwIU8,10269
98
- deepdoctection/mapper/misc.py,sha256=vX-fV420Te00eD-cqTiWBV2twHqdBcBV2_7rAFRgPRg,7164
99
- deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
100
- deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
101
- deepdoctection/mapper/pubstruct.py,sha256=PAJ2N1HSPNS6F2ZrIwlD7PiBhIM-rJscK_Ti8OR_IGs,23370
102
- deepdoctection/mapper/tpstruct.py,sha256=W8-K3DKM9U0LwsmAMqtEqpfol1F1t5rNXh2595GCPTk,5251
103
- deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
104
- deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
105
- deepdoctection/pipe/anngen.py,sha256=7wvp7eghDwrgcIyu1vjRxmVy4SADPbn-k4ud8y2bgjU,15338
106
- deepdoctection/pipe/base.py,sha256=wlza9aDOKnHKrXmaz8MLyLz0nMqqcIWQ-6Lu944aicE,15390
107
- deepdoctection/pipe/common.py,sha256=qnNa9ZzDJgMx62rlQ0bx_rtfhrHeLCK29swtSZjei3o,23058
108
- deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
109
- deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
110
- deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
111
- deepdoctection/pipe/layout.py,sha256=ThULc0b1f9KyaXYk9z0qbuJ0nhIodah9PcrEq2xKpAY,5670
112
- deepdoctection/pipe/lm.py,sha256=x9NoYpivdjQF1r76a7PPrUuBEmuHP7ZukuXFDkXhXBc,17572
113
- deepdoctection/pipe/order.py,sha256=0KNiMinedjfuDVVHxJSaDL1yl4Sub-miMPcEC4gGwPA,39423
114
- deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
115
- deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
116
- deepdoctection/pipe/segment.py,sha256=lR_aCY7yJm0GCygLPRVQBv_WgWvCNgG9F_B-37qrbMU,59846
117
- deepdoctection/pipe/sub_layout.py,sha256=D73H5b2Zl35fN58TaY0_nGhwI9Nwj3wqDdDPz8ce9Fg,13538
118
- deepdoctection/pipe/text.py,sha256=tLlJtneM__WsrAvp4pQFqwNlmq2RLqKqiPXlJ2lkniU,10483
119
- deepdoctection/pipe/transform.py,sha256=eCSRbyxHLz11owOHFA9UDX7tOJPZG2eiPWIGJv2odbk,4890
120
- deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
121
- deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
122
- deepdoctection/train/hf_detr_train.py,sha256=cFZdV0XEYsReMBCrIYZsNFT6E2OBpp6vb1FJAE-qGRs,12073
123
- deepdoctection/train/hf_layoutlm_train.py,sha256=8kiGp_8GEyqCkLgeMgCJOLJWSVoKWkUBHsZtDjZOcRk,22556
124
- deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
125
- deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
126
- deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
127
- deepdoctection/utils/context.py,sha256=VSnJnTtRGuq3w-0-syTf9DXOhR7WsPvWLLWTxKIBYec,4186
128
- deepdoctection/utils/develop.py,sha256=4HyTarkFbJwctL-Hgu1TU_LSJppHvaroDbcyHsxhIA8,3444
129
- deepdoctection/utils/env_info.py,sha256=TnCA-LOTj4WIHd9yvn1AaoPWsLmPgc42l-BJmGV6zmM,19147
130
- deepdoctection/utils/error.py,sha256=_3q9VepKfEhsM3H033_Fu0hwBzMSjsWALsjyJbGAZr8,2367
131
- deepdoctection/utils/file_utils.py,sha256=IRElrcND0YEiU1QELw5hfXeNA39uE2_nyzh9-X7YcxI,19477
132
- deepdoctection/utils/fs.py,sha256=x842BxUP5bbjJ2cofw-g4dKJv4QAaGzda4qnAazabO4,10281
133
- deepdoctection/utils/identifier.py,sha256=QkNaGGqPynHwDPnd3_m8iur4Cv64rcQa7qolCE7Qphk,2159
134
- deepdoctection/utils/logger.py,sha256=J0OVKiXP_2A82MWbbJoOeMEJ-75aZu5npgaS_yI6mVA,10003
135
- deepdoctection/utils/metacfg.py,sha256=hD76KQ_RnD_5B02qLI2Zxf3WfnsnXhEI_KUTKpw91RI,5711
136
- deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
137
- deepdoctection/utils/pdf_utils.py,sha256=Fi0eZ2GbnO7N61Rd8b8YRKRff4dalHAzkcn3zpGPoic,13119
138
- deepdoctection/utils/settings.py,sha256=nmA7aqzHTMrAhodNbPjr-afLRt2EWjpx8ipQeE-bqqs,12590
139
- deepdoctection/utils/tqdm.py,sha256=cBUtR0L1x0KMeYrLP2rrzyzCamCjpQAKroHXLv81_pk,1820
140
- deepdoctection/utils/transform.py,sha256=5mY5D6hhk6cKFp0T1LJ2_jMjjBxJopcFZffAN5PKvFU,13699
141
- deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
142
- deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
- deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
- deepdoctection-0.42.1.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.42.1.dist-info/METADATA,sha256=ckwg7bNCfNi-IPFAnYURuZULlHgrf2SfikingNB8ztM,19763
146
- deepdoctection-0.42.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
147
- deepdoctection-0.42.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.42.1.dist-info/RECORD,,