deepdoctection 0.43.4__tar.gz → 0.43.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/PKG-INFO +63 -15
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/README.md +62 -14
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/__init__.py +1 -1
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/base.py +1 -1
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection.egg-info/PKG-INFO +63 -15
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/LICENSE +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/analyzer/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/analyzer/config.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/analyzer/dd.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/analyzer/factory.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/configs/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/configs/conf_dd_one.yaml +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/configs/conf_tesseract.yaml +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/configs/profiles.jsonl +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/base.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/common.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/custom.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/custom_serialize.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/parallel_map.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/serialize.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/dataflow/stats.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datapoint/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datapoint/annotation.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datapoint/box.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datapoint/convert.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datapoint/image.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datapoint/view.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/adapter.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/dataflow_builder.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/info.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/doclaynet.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/fintabnet.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/funsd.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/layouttest.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/publaynet.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xfund.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/registry.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/save.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/accmetric.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/base.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/cocometric.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/eval.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/registry.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/tedsmetric.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/eval/tp_eval_callback.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/base.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/d2detect.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/deskew.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/doctrocr.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/fastlang.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/hfdetr.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/hflayoutlm.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/hflm.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/model.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/pdftext.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/pt/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/pt/nms.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/pt/ptutils.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tessocr.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/texocr.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tfutils.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpcompat.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tpdetect.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/cats.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/cocostruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/d2struct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/hfstruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/laylmstruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/maputils.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/match.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/misc.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/pascalstruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/prodigystruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/pubstruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/tpstruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/mapper/xfundstruct.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/anngen.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/base.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/common.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/concurrency.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/doctectionpipe.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/language.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/layout.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/lm.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/order.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/refine.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/registry.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/segment.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/sub_layout.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/text.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/pipe/transform.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/py.typed +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/train/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/train/d2_frcnn_train.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/train/hf_detr_train.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/train/hf_layoutlm_train.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/train/tp_frcnn_train.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/__init__.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/concurrency.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/context.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/develop.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/env_info.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/error.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/file_utils.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/fs.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/identifier.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/logger.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/metacfg.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/mocks.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/pdf_utils.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/settings.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/tqdm.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/transform.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/types.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/utils.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/utils/viz.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection.egg-info/SOURCES.txt +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection.egg-info/dependency_links.txt +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection.egg-info/requires.txt +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection.egg-info/top_level.txt +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/setup.cfg +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/setup.py +0 -0
- {deepdoctection-0.43.4 → deepdoctection-0.43.5}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.43.
|
|
3
|
+
Version: 0.43.5
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -168,13 +168,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
|
|
|
168
168
|
</p>
|
|
169
169
|
|
|
170
170
|
|
|
171
|
-
|
|
172
171
|
**deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
|
|
173
172
|
It also provides a framework for training, evaluating and inferencing Document AI models.
|
|
174
173
|
|
|
175
|
-
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
176
|
-
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
|
|
177
|
-
|
|
178
174
|
# Overview
|
|
179
175
|
|
|
180
176
|
- Document layout analysis and table recognition in PyTorch with
|
|
@@ -197,6 +193,54 @@ for an easy start.
|
|
|
197
193
|
|
|
198
194
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
199
195
|
|
|
196
|
+
|
|
197
|
+
----------------------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
# Hugging Face Space Demo
|
|
200
|
+
|
|
201
|
+
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
202
|
+
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
pip install gradio_client # requires Python >= 3.10
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
To process a single image:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from gradio_client import Client, handle_file
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
|
|
215
|
+
client = Client("deepdoctection/deepdoctection")
|
|
216
|
+
result = client.predict(
|
|
217
|
+
img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
|
|
218
|
+
pdf=None,
|
|
219
|
+
max_datapoints = 2,
|
|
220
|
+
api_name = "/analyze_image"
|
|
221
|
+
)
|
|
222
|
+
print(result)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
To process a PDF document:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from gradio_client import Client, handle_file
|
|
229
|
+
|
|
230
|
+
if __name__ == "__main__":
|
|
231
|
+
|
|
232
|
+
client = Client("deepdoctection/deepdoctection")
|
|
233
|
+
result = client.predict(
|
|
234
|
+
img=None,
|
|
235
|
+
pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
|
|
236
|
+
max_datapoints = 2, # increase to process up to 9 pages
|
|
237
|
+
api_name = "/analyze_image"
|
|
238
|
+
)
|
|
239
|
+
print(result)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
--------------------------------------------------------------------------------------------------------
|
|
243
|
+
|
|
200
244
|
# Example
|
|
201
245
|
|
|
202
246
|
```python
|
|
@@ -242,8 +286,9 @@ alt="text" width="40%">
|
|
|
242
286
|
</p>
|
|
243
287
|
|
|
244
288
|
|
|
289
|
+
-----------------------------------------------------------------------------------------
|
|
245
290
|
|
|
246
|
-
|
|
291
|
+
# Requirements
|
|
247
292
|
|
|
248
293
|

|
|
249
294
|
|
|
@@ -262,11 +307,13 @@ alt="text" width="40%">
|
|
|
262
307
|
| DocTr | ✅ | ❌ | ✅ |
|
|
263
308
|
| LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
|
|
264
309
|
|
|
265
|
-
|
|
310
|
+
------------------------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
# Installation
|
|
266
313
|
|
|
267
314
|
We recommend using a virtual environment.
|
|
268
315
|
|
|
269
|
-
|
|
316
|
+
## Get started installation
|
|
270
317
|
|
|
271
318
|
For a simple setup which is enough to parse documents with the default setting, install the following:
|
|
272
319
|
|
|
@@ -288,7 +335,7 @@ pip install deepdoctection
|
|
|
288
335
|
|
|
289
336
|
Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
|
|
290
337
|
|
|
291
|
-
|
|
338
|
+
### Full installation
|
|
292
339
|
|
|
293
340
|
The following installation will give you ALL models available within the Deep Learning framework as well as all models
|
|
294
341
|
that are independent of Tensorflow/PyTorch.
|
|
@@ -318,7 +365,7 @@ pip install deepdoctection[tf]
|
|
|
318
365
|
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
|
|
319
366
|
|
|
320
367
|
|
|
321
|
-
|
|
368
|
+
## Installation from source
|
|
322
369
|
|
|
323
370
|
Download the repository or clone via
|
|
324
371
|
|
|
@@ -341,8 +388,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
341
388
|
```
|
|
342
389
|
|
|
343
390
|
|
|
344
|
-
|
|
345
|
-
### Running a Docker container from Docker hub
|
|
391
|
+
## Running a Docker container from Docker hub
|
|
346
392
|
|
|
347
393
|
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
348
394
|
|
|
@@ -360,16 +406,18 @@ docker compose up -d
|
|
|
360
406
|
|
|
361
407
|
will start the container. There is no endpoint exposed, though.
|
|
362
408
|
|
|
363
|
-
|
|
409
|
+
-----------------------------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
# Credits
|
|
364
412
|
|
|
365
413
|
We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
|
|
366
414
|
to develop this framework.
|
|
367
415
|
|
|
368
416
|
|
|
369
|
-
|
|
417
|
+
# If you like **deep**doctection ...
|
|
370
418
|
|
|
371
419
|
...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
|
|
372
420
|
|
|
373
|
-
|
|
421
|
+
# License
|
|
374
422
|
|
|
375
423
|
Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
|
|
@@ -25,13 +25,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
|
|
|
25
25
|
</p>
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
|
|
29
28
|
**deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
|
|
30
29
|
It also provides a framework for training, evaluating and inferencing Document AI models.
|
|
31
30
|
|
|
32
|
-
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
33
|
-
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
|
|
34
|
-
|
|
35
31
|
# Overview
|
|
36
32
|
|
|
37
33
|
- Document layout analysis and table recognition in PyTorch with
|
|
@@ -54,6 +50,54 @@ for an easy start.
|
|
|
54
50
|
|
|
55
51
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
56
52
|
|
|
53
|
+
|
|
54
|
+
----------------------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
# Hugging Face Space Demo
|
|
57
|
+
|
|
58
|
+
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
59
|
+
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
pip install gradio_client # requires Python >= 3.10
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
To process a single image:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from gradio_client import Client, handle_file
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
|
|
72
|
+
client = Client("deepdoctection/deepdoctection")
|
|
73
|
+
result = client.predict(
|
|
74
|
+
img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
|
|
75
|
+
pdf=None,
|
|
76
|
+
max_datapoints = 2,
|
|
77
|
+
api_name = "/analyze_image"
|
|
78
|
+
)
|
|
79
|
+
print(result)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
To process a PDF document:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from gradio_client import Client, handle_file
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
|
|
89
|
+
client = Client("deepdoctection/deepdoctection")
|
|
90
|
+
result = client.predict(
|
|
91
|
+
img=None,
|
|
92
|
+
pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
|
|
93
|
+
max_datapoints = 2, # increase to process up to 9 pages
|
|
94
|
+
api_name = "/analyze_image"
|
|
95
|
+
)
|
|
96
|
+
print(result)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
--------------------------------------------------------------------------------------------------------
|
|
100
|
+
|
|
57
101
|
# Example
|
|
58
102
|
|
|
59
103
|
```python
|
|
@@ -99,8 +143,9 @@ alt="text" width="40%">
|
|
|
99
143
|
</p>
|
|
100
144
|
|
|
101
145
|
|
|
146
|
+
-----------------------------------------------------------------------------------------
|
|
102
147
|
|
|
103
|
-
|
|
148
|
+
# Requirements
|
|
104
149
|
|
|
105
150
|

|
|
106
151
|
|
|
@@ -119,11 +164,13 @@ alt="text" width="40%">
|
|
|
119
164
|
| DocTr | ✅ | ❌ | ✅ |
|
|
120
165
|
| LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
|
|
121
166
|
|
|
122
|
-
|
|
167
|
+
------------------------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
# Installation
|
|
123
170
|
|
|
124
171
|
We recommend using a virtual environment.
|
|
125
172
|
|
|
126
|
-
|
|
173
|
+
## Get started installation
|
|
127
174
|
|
|
128
175
|
For a simple setup which is enough to parse documents with the default setting, install the following:
|
|
129
176
|
|
|
@@ -145,7 +192,7 @@ pip install deepdoctection
|
|
|
145
192
|
|
|
146
193
|
Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
|
|
147
194
|
|
|
148
|
-
|
|
195
|
+
### Full installation
|
|
149
196
|
|
|
150
197
|
The following installation will give you ALL models available within the Deep Learning framework as well as all models
|
|
151
198
|
that are independent of Tensorflow/PyTorch.
|
|
@@ -175,7 +222,7 @@ pip install deepdoctection[tf]
|
|
|
175
222
|
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
|
|
176
223
|
|
|
177
224
|
|
|
178
|
-
|
|
225
|
+
## Installation from source
|
|
179
226
|
|
|
180
227
|
Download the repository or clone via
|
|
181
228
|
|
|
@@ -198,8 +245,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
198
245
|
```
|
|
199
246
|
|
|
200
247
|
|
|
201
|
-
|
|
202
|
-
### Running a Docker container from Docker hub
|
|
248
|
+
## Running a Docker container from Docker hub
|
|
203
249
|
|
|
204
250
|
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
205
251
|
|
|
@@ -217,16 +263,18 @@ docker compose up -d
|
|
|
217
263
|
|
|
218
264
|
will start the container. There is no endpoint exposed, though.
|
|
219
265
|
|
|
220
|
-
|
|
266
|
+
-----------------------------------------------------------------------------------------------
|
|
267
|
+
|
|
268
|
+
# Credits
|
|
221
269
|
|
|
222
270
|
We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
|
|
223
271
|
to develop this framework.
|
|
224
272
|
|
|
225
273
|
|
|
226
|
-
|
|
274
|
+
# If you like **deep**doctection ...
|
|
227
275
|
|
|
228
276
|
...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
|
|
229
277
|
|
|
230
|
-
|
|
278
|
+
# License
|
|
231
279
|
|
|
232
280
|
Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
|
|
@@ -484,7 +484,7 @@ class CustomDataset(DatasetBase):
|
|
|
484
484
|
return DatasetInfo(
|
|
485
485
|
name=self.name,
|
|
486
486
|
type=self.type,
|
|
487
|
-
|
|
487
|
+
short_description=self.description if self.description is not None else "",
|
|
488
488
|
license="",
|
|
489
489
|
url="",
|
|
490
490
|
splits={},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.43.
|
|
3
|
+
Version: 0.43.5
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -168,13 +168,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
|
|
|
168
168
|
</p>
|
|
169
169
|
|
|
170
170
|
|
|
171
|
-
|
|
172
171
|
**deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
|
|
173
172
|
It also provides a framework for training, evaluating and inferencing Document AI models.
|
|
174
173
|
|
|
175
|
-
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
176
|
-
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
|
|
177
|
-
|
|
178
174
|
# Overview
|
|
179
175
|
|
|
180
176
|
- Document layout analysis and table recognition in PyTorch with
|
|
@@ -197,6 +193,54 @@ for an easy start.
|
|
|
197
193
|
|
|
198
194
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
199
195
|
|
|
196
|
+
|
|
197
|
+
----------------------------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
# Hugging Face Space Demo
|
|
200
|
+
|
|
201
|
+
Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
202
|
+
[**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
|
|
203
|
+
|
|
204
|
+
```
|
|
205
|
+
pip install gradio_client # requires Python >= 3.10
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
To process a single image:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from gradio_client import Client, handle_file
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
|
|
215
|
+
client = Client("deepdoctection/deepdoctection")
|
|
216
|
+
result = client.predict(
|
|
217
|
+
img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
|
|
218
|
+
pdf=None,
|
|
219
|
+
max_datapoints = 2,
|
|
220
|
+
api_name = "/analyze_image"
|
|
221
|
+
)
|
|
222
|
+
print(result)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
To process a PDF document:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from gradio_client import Client, handle_file
|
|
229
|
+
|
|
230
|
+
if __name__ == "__main__":
|
|
231
|
+
|
|
232
|
+
client = Client("deepdoctection/deepdoctection")
|
|
233
|
+
result = client.predict(
|
|
234
|
+
img=None,
|
|
235
|
+
pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
|
|
236
|
+
max_datapoints = 2, # increase to process up to 9 pages
|
|
237
|
+
api_name = "/analyze_image"
|
|
238
|
+
)
|
|
239
|
+
print(result)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
--------------------------------------------------------------------------------------------------------
|
|
243
|
+
|
|
200
244
|
# Example
|
|
201
245
|
|
|
202
246
|
```python
|
|
@@ -242,8 +286,9 @@ alt="text" width="40%">
|
|
|
242
286
|
</p>
|
|
243
287
|
|
|
244
288
|
|
|
289
|
+
-----------------------------------------------------------------------------------------
|
|
245
290
|
|
|
246
|
-
|
|
291
|
+
# Requirements
|
|
247
292
|
|
|
248
293
|

|
|
249
294
|
|
|
@@ -262,11 +307,13 @@ alt="text" width="40%">
|
|
|
262
307
|
| DocTr | ✅ | ❌ | ✅ |
|
|
263
308
|
| LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
|
|
264
309
|
|
|
265
|
-
|
|
310
|
+
------------------------------------------------------------------------------------------
|
|
311
|
+
|
|
312
|
+
# Installation
|
|
266
313
|
|
|
267
314
|
We recommend using a virtual environment.
|
|
268
315
|
|
|
269
|
-
|
|
316
|
+
## Get started installation
|
|
270
317
|
|
|
271
318
|
For a simple setup which is enough to parse documents with the default setting, install the following:
|
|
272
319
|
|
|
@@ -288,7 +335,7 @@ pip install deepdoctection
|
|
|
288
335
|
|
|
289
336
|
Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
|
|
290
337
|
|
|
291
|
-
|
|
338
|
+
### Full installation
|
|
292
339
|
|
|
293
340
|
The following installation will give you ALL models available within the Deep Learning framework as well as all models
|
|
294
341
|
that are independent of Tensorflow/PyTorch.
|
|
@@ -318,7 +365,7 @@ pip install deepdoctection[tf]
|
|
|
318
365
|
For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
|
|
319
366
|
|
|
320
367
|
|
|
321
|
-
|
|
368
|
+
## Installation from source
|
|
322
369
|
|
|
323
370
|
Download the repository or clone via
|
|
324
371
|
|
|
@@ -341,8 +388,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
341
388
|
```
|
|
342
389
|
|
|
343
390
|
|
|
344
|
-
|
|
345
|
-
### Running a Docker container from Docker hub
|
|
391
|
+
## Running a Docker container from Docker hub
|
|
346
392
|
|
|
347
393
|
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
348
394
|
|
|
@@ -360,16 +406,18 @@ docker compose up -d
|
|
|
360
406
|
|
|
361
407
|
will start the container. There is no endpoint exposed, though.
|
|
362
408
|
|
|
363
|
-
|
|
409
|
+
-----------------------------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
# Credits
|
|
364
412
|
|
|
365
413
|
We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
|
|
366
414
|
to develop this framework.
|
|
367
415
|
|
|
368
416
|
|
|
369
|
-
|
|
417
|
+
# If you like **deep**doctection ...
|
|
370
418
|
|
|
371
419
|
...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
|
|
372
420
|
|
|
373
|
-
|
|
421
|
+
# License
|
|
374
422
|
|
|
375
423
|
Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/doclaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/fintabnet.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/iiitar13k.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/layouttest.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/publaynet.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/pubtables1m.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/pubtabnet.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/rvlcdip.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xsl/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py
RENAMED
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{deepdoctection-0.43.4 → deepdoctection-0.43.5}/deepdoctection.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|