deepdoctection 0.43.4__tar.gz → 0.43.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show
  1. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/PKG-INFO +65 -17
  2. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/README.md +64 -16
  3. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/__init__.py +1 -1
  4. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/analyzer/config.py +1 -1
  5. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/configs/profiles.jsonl +1 -0
  6. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datapoint/view.py +25 -13
  7. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/base.py +1 -1
  8. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/model.py +1 -1
  9. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection.egg-info/PKG-INFO +65 -17
  10. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/LICENSE +0 -0
  11. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/analyzer/__init__.py +0 -0
  12. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/analyzer/dd.py +0 -0
  13. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/analyzer/factory.py +0 -0
  14. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/configs/__init__.py +0 -0
  15. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  16. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  17. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/__init__.py +0 -0
  18. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/base.py +0 -0
  19. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/common.py +0 -0
  20. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/custom.py +0 -0
  21. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/custom_serialize.py +0 -0
  22. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/parallel_map.py +0 -0
  23. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/serialize.py +0 -0
  24. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/dataflow/stats.py +0 -0
  25. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datapoint/__init__.py +0 -0
  26. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datapoint/annotation.py +0 -0
  27. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datapoint/box.py +0 -0
  28. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datapoint/convert.py +0 -0
  29. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datapoint/image.py +0 -0
  30. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/__init__.py +0 -0
  31. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/adapter.py +0 -0
  32. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/dataflow_builder.py +0 -0
  33. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/info.py +0 -0
  34. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/__init__.py +0 -0
  35. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  36. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  37. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/funsd.py +0 -0
  38. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  39. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/layouttest.py +0 -0
  40. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/publaynet.py +0 -0
  41. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  42. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  43. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  44. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/xfund.py +0 -0
  45. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  46. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  47. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/registry.py +0 -0
  48. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/datasets/save.py +0 -0
  49. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/__init__.py +0 -0
  50. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/accmetric.py +0 -0
  51. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/base.py +0 -0
  52. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/cocometric.py +0 -0
  53. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/eval.py +0 -0
  54. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/registry.py +0 -0
  55. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/tedsmetric.py +0 -0
  56. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/eval/tp_eval_callback.py +0 -0
  57. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/__init__.py +0 -0
  58. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/base.py +0 -0
  59. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/d2detect.py +0 -0
  60. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/deskew.py +0 -0
  61. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/doctrocr.py +0 -0
  62. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/fastlang.py +0 -0
  63. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/hfdetr.py +0 -0
  64. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/hflayoutlm.py +0 -0
  65. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/hflm.py +0 -0
  66. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/pdftext.py +0 -0
  67. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/pt/__init__.py +0 -0
  68. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/pt/nms.py +0 -0
  69. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/pt/ptutils.py +0 -0
  70. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tessocr.py +0 -0
  71. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/texocr.py +0 -0
  72. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/__init__.py +0 -0
  73. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tfutils.py +0 -0
  74. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpcompat.py +0 -0
  75. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  76. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  77. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  78. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  79. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  80. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  81. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  82. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  83. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  84. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  85. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  86. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  87. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  88. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  89. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  90. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  91. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  92. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  93. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/extern/tpdetect.py +0 -0
  94. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/__init__.py +0 -0
  95. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/cats.py +0 -0
  96. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/cocostruct.py +0 -0
  97. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/d2struct.py +0 -0
  98. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/hfstruct.py +0 -0
  99. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/laylmstruct.py +0 -0
  100. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/maputils.py +0 -0
  101. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/match.py +0 -0
  102. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/misc.py +0 -0
  103. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/pascalstruct.py +0 -0
  104. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/prodigystruct.py +0 -0
  105. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/pubstruct.py +0 -0
  106. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/tpstruct.py +0 -0
  107. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/mapper/xfundstruct.py +0 -0
  108. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/__init__.py +0 -0
  109. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/anngen.py +0 -0
  110. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/base.py +0 -0
  111. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/common.py +0 -0
  112. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/concurrency.py +0 -0
  113. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/doctectionpipe.py +0 -0
  114. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/language.py +0 -0
  115. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/layout.py +0 -0
  116. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/lm.py +0 -0
  117. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/order.py +0 -0
  118. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/refine.py +0 -0
  119. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/registry.py +0 -0
  120. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/segment.py +0 -0
  121. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/sub_layout.py +0 -0
  122. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/text.py +0 -0
  123. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/pipe/transform.py +0 -0
  124. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/py.typed +0 -0
  125. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/train/__init__.py +0 -0
  126. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/train/d2_frcnn_train.py +0 -0
  127. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/train/hf_detr_train.py +0 -0
  128. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  129. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/train/tp_frcnn_train.py +0 -0
  130. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/__init__.py +0 -0
  131. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/concurrency.py +0 -0
  132. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/context.py +0 -0
  133. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/develop.py +0 -0
  134. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/env_info.py +0 -0
  135. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/error.py +0 -0
  136. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/file_utils.py +0 -0
  137. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/fs.py +0 -0
  138. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/identifier.py +0 -0
  139. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/logger.py +0 -0
  140. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/metacfg.py +0 -0
  141. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/mocks.py +0 -0
  142. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/pdf_utils.py +0 -0
  143. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/settings.py +0 -0
  144. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/tqdm.py +0 -0
  145. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/transform.py +0 -0
  146. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/types.py +0 -0
  147. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/utils.py +0 -0
  148. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection/utils/viz.py +0 -0
  149. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection.egg-info/SOURCES.txt +0 -0
  150. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection.egg-info/dependency_links.txt +0 -0
  151. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection.egg-info/requires.txt +0 -0
  152. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/setup.cfg +0 -0
  154. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/setup.py +0 -0
  155. {deepdoctection-0.43.4 → deepdoctection-0.43.6}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.4
3
+ Version: 0.43.6
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -168,13 +168,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
168
168
  </p>
169
169
 
170
170
 
171
-
172
171
  **deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
173
172
  It also provides a framework for training, evaluating and inferencing Document AI models.
174
173
 
175
- Check the demo of a document layout analysis pipeline with OCR on 🤗
176
- [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
177
-
178
174
  # Overview
179
175
 
180
176
  - Document layout analysis and table recognition in PyTorch with
@@ -197,6 +193,54 @@ for an easy start.
197
193
 
198
194
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
199
195
 
196
+
197
+ ----------------------------------------------------------------------------------------
198
+
199
+ # Hugging Face Space Demo
200
+
201
+ Check the demo of a document layout analysis pipeline with OCR on 🤗
202
+ [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
203
+
204
+ ```
205
+ pip install gradio_client # requires Python >= 3.10
206
+ ```
207
+
208
+ To process a single image:
209
+
210
+ ```python
211
+ from gradio_client import Client, handle_file
212
+
213
+ if __name__ == "__main__":
214
+
215
+ client = Client("deepdoctection/deepdoctection")
216
+ result = client.predict(
217
+ img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
218
+ pdf=None,
219
+ max_datapoints = 2,
220
+ api_name = "/analyze_image"
221
+ )
222
+ print(result)
223
+ ```
224
+
225
+ To process a PDF document:
226
+
227
+ ```python
228
+ from gradio_client import Client, handle_file
229
+
230
+ if __name__ == "__main__":
231
+
232
+ client = Client("deepdoctection/deepdoctection")
233
+ result = client.predict(
234
+ img=None,
235
+ pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
236
+ max_datapoints = 2, # increase to process up to 9 pages
237
+ api_name = "/analyze_image"
238
+ )
239
+ print(result)
240
+ ```
241
+
242
+ --------------------------------------------------------------------------------------------------------
243
+
200
244
  # Example
201
245
 
202
246
  ```python
@@ -242,8 +286,9 @@ alt="text" width="40%">
242
286
  </p>
243
287
 
244
288
 
289
+ -----------------------------------------------------------------------------------------
245
290
 
246
- ## Requirements
291
+ # Requirements
247
292
 
248
293
  ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
249
294
 
@@ -262,11 +307,13 @@ alt="text" width="40%">
262
307
  | DocTr | ✅ | ❌ | ✅ |
263
308
  | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
264
309
 
265
- ## Installation
310
+ ------------------------------------------------------------------------------------------
311
+
312
+ # Installation
266
313
 
267
314
  We recommend using a virtual environment.
268
315
 
269
- #### Get started installation
316
+ ## Get started installation
270
317
 
271
318
  For a simple setup which is enough to parse documents with the default setting, install the following:
272
319
 
@@ -274,7 +321,7 @@ For a simple setup which is enough to parse documents with the default setting,
274
321
 
275
322
  ```
276
323
  pip install transformers
277
- pip install python-doctr
324
+ pip install python-doctr==0.9.0
278
325
  pip install deepdoctection
279
326
  ```
280
327
 
@@ -282,13 +329,13 @@ pip install deepdoctection
282
329
 
283
330
  ```
284
331
  pip install tensorpack
285
- pip install python-doctr
332
+ pip install python-doctr==0.9.0
286
333
  pip install deepdoctection
287
334
  ```
288
335
 
289
336
  Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
290
337
 
291
- #### Full installation
338
+ ### Full installation
292
339
 
293
340
  The following installation will give you ALL models available within the Deep Learning framework as well as all models
294
341
  that are independent of Tensorflow/PyTorch.
@@ -318,7 +365,7 @@ pip install deepdoctection[tf]
318
365
  For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
319
366
 
320
367
 
321
- ### Installation from source
368
+ ## Installation from source
322
369
 
323
370
  Download the repository or clone via
324
371
 
@@ -341,8 +388,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
341
388
  ```
342
389
 
343
390
 
344
-
345
- ### Running a Docker container from Docker hub
391
+ ## Running a Docker container from Docker hub
346
392
 
347
393
  Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
348
394
 
@@ -360,16 +406,18 @@ docker compose up -d
360
406
 
361
407
  will start the container. There is no endpoint exposed, though.
362
408
 
363
- ## Credits
409
+ -----------------------------------------------------------------------------------------------
410
+
411
+ # Credits
364
412
 
365
413
  We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
366
414
  to develop this framework.
367
415
 
368
416
 
369
- ## If you like **deep**doctection ...
417
+ # If you like **deep**doctection ...
370
418
 
371
419
  ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
372
420
 
373
- ## License
421
+ # License
374
422
 
375
423
  Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
@@ -25,13 +25,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
25
25
  </p>
26
26
 
27
27
 
28
-
29
28
  **deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
30
29
  It also provides a framework for training, evaluating and inferencing Document AI models.
31
30
 
32
- Check the demo of a document layout analysis pipeline with OCR on 🤗
33
- [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
34
-
35
31
  # Overview
36
32
 
37
33
  - Document layout analysis and table recognition in PyTorch with
@@ -54,6 +50,54 @@ for an easy start.
54
50
 
55
51
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
56
52
 
53
+
54
+ ----------------------------------------------------------------------------------------
55
+
56
+ # Hugging Face Space Demo
57
+
58
+ Check the demo of a document layout analysis pipeline with OCR on 🤗
59
+ [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
60
+
61
+ ```
62
+ pip install gradio_client # requires Python >= 3.10
63
+ ```
64
+
65
+ To process a single image:
66
+
67
+ ```python
68
+ from gradio_client import Client, handle_file
69
+
70
+ if __name__ == "__main__":
71
+
72
+ client = Client("deepdoctection/deepdoctection")
73
+ result = client.predict(
74
+ img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
75
+ pdf=None,
76
+ max_datapoints = 2,
77
+ api_name = "/analyze_image"
78
+ )
79
+ print(result)
80
+ ```
81
+
82
+ To process a PDF document:
83
+
84
+ ```python
85
+ from gradio_client import Client, handle_file
86
+
87
+ if __name__ == "__main__":
88
+
89
+ client = Client("deepdoctection/deepdoctection")
90
+ result = client.predict(
91
+ img=None,
92
+ pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
93
+ max_datapoints = 2, # increase to process up to 9 pages
94
+ api_name = "/analyze_image"
95
+ )
96
+ print(result)
97
+ ```
98
+
99
+ --------------------------------------------------------------------------------------------------------
100
+
57
101
  # Example
58
102
 
59
103
  ```python
@@ -99,8 +143,9 @@ alt="text" width="40%">
99
143
  </p>
100
144
 
101
145
 
146
+ -----------------------------------------------------------------------------------------
102
147
 
103
- ## Requirements
148
+ # Requirements
104
149
 
105
150
  ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
106
151
 
@@ -119,11 +164,13 @@ alt="text" width="40%">
119
164
  | DocTr | ✅ | ❌ | ✅ |
120
165
  | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
121
166
 
122
- ## Installation
167
+ ------------------------------------------------------------------------------------------
168
+
169
+ # Installation
123
170
 
124
171
  We recommend using a virtual environment.
125
172
 
126
- #### Get started installation
173
+ ## Get started installation
127
174
 
128
175
  For a simple setup which is enough to parse documents with the default setting, install the following:
129
176
 
@@ -131,7 +178,7 @@ For a simple setup which is enough to parse documents with the default setting,
131
178
 
132
179
  ```
133
180
  pip install transformers
134
- pip install python-doctr
181
+ pip install python-doctr==0.9.0
135
182
  pip install deepdoctection
136
183
  ```
137
184
 
@@ -139,13 +186,13 @@ pip install deepdoctection
139
186
 
140
187
  ```
141
188
  pip install tensorpack
142
- pip install python-doctr
189
+ pip install python-doctr==0.9.0
143
190
  pip install deepdoctection
144
191
  ```
145
192
 
146
193
  Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
147
194
 
148
- #### Full installation
195
+ ### Full installation
149
196
 
150
197
  The following installation will give you ALL models available within the Deep Learning framework as well as all models
151
198
  that are independent of Tensorflow/PyTorch.
@@ -175,7 +222,7 @@ pip install deepdoctection[tf]
175
222
  For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
176
223
 
177
224
 
178
- ### Installation from source
225
+ ## Installation from source
179
226
 
180
227
  Download the repository or clone via
181
228
 
@@ -198,8 +245,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
198
245
  ```
199
246
 
200
247
 
201
-
202
- ### Running a Docker container from Docker hub
248
+ ## Running a Docker container from Docker hub
203
249
 
204
250
  Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
205
251
 
@@ -217,16 +263,18 @@ docker compose up -d
217
263
 
218
264
  will start the container. There is no endpoint exposed, though.
219
265
 
220
- ## Credits
266
+ -----------------------------------------------------------------------------------------------
267
+
268
+ # Credits
221
269
 
222
270
  We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
223
271
  to develop this framework.
224
272
 
225
273
 
226
- ## If you like **deep**doctection ...
274
+ # If you like **deep**doctection ...
227
275
 
228
276
  ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
229
277
 
230
- ## License
278
+ # License
231
279
 
232
280
  Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.43.4"
28
+ __version__ = "0.43.6"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -629,7 +629,7 @@ cfg.PT.ENFORCE_WEIGHTS.ITEM = True
629
629
 
630
630
  # Specifies the PyTorch model weights for item detection.
631
631
  # Use either .pt or .safetensors files.
632
- cfg.PT.ITEM.WEIGHTS = "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin"
632
+ cfg.PT.ITEM.WEIGHTS = "deepdoctection/tatr_tab_struct_v2/model.safetensors"
633
633
 
634
634
  # Specifies the TorchScript model for item detection.
635
635
  # Use .ts files for deployment without model implementation dependencies.
@@ -30,3 +30,4 @@
30
30
  {"name": "Felix92/doctr-torch-parseq-multilingual-v1/pytorch_model.bin", "description": "", "size": [63286381], "tp_model": false, "config": "Felix92/doctr-torch-parseq-multilingual-v1/config.json", "preprocessor_config": null, "hf_repo_id": "Felix92/doctr-torch-parseq-multilingual-v1", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "parseq", "padding": null}
31
31
  {"name": "doctr/crnn_vgg16_bn/pt/master-fde31e4a.pt", "description": "MASTER", "size": [63286381], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.7.0/master-fde31e4a.pt&src=0"], "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "master", "padding": null}
32
32
  {"name": "Aryn/deformable-detr-DocLayNet/model.safetensors", "description": "Deformable DEtection TRansformer (DETR), trained on DocLayNet (including 80k annotated pages in 11 classes).", "size": [115511753], "tp_model": false, "config": "Aryn/deformable-detr-DocLayNet/config.json", "preprocessor_config": "Aryn/deformable-detr-DocLayNet/preprocessor_config.json", "hf_repo_id": "Aryn/deformable-detr-DocLayNet", "hf_model_name": "model.safetensors", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "default_type", "2": "caption", "11": "text", "12": "title", "3": "footnote", "4": "formula", "5": "list_item", "6": "page_footer", "7": "page_header", "8": "figure", "9": "section_header", "10": "table"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
33
+ {"name": "deepdoctection/tatr_tab_struct_v2/model.safetensors", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper Aligning benchmark datasets for table structure recognition by Smock et al. This model is devoted to table structure recognition and assumes to receive a slightly croppedtable as input. It will predict rows, column and spanning cells. Use a padding of around 5 pixels. This artefact has been converted from deepdoctection/tatr_tab_struct_v2/pytorch_model.bin and should be used to reduce security issues", "size": [115511753], "tp_model": false, "config": "deepdoctection/tatr_tab_struct_v2/config.json", "preprocessor_config": "deepdoctection/tatr_tab_struct_v2/preprocessor_config.json", "hf_repo_id": "deepdoctection/tatr_tab_struct_v2", "hf_model_name": "model.safetensors", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "column", "3": "row", "4": "column_header", "5": "projected_row_header", "6": "spanning"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
@@ -195,7 +195,9 @@ class Word(ImageAnnotationBaseView):
195
195
  attr_names = (
196
196
  set(WordType)
197
197
  .union(super().get_attribute_names())
198
- .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK, Relationships.LINK})
198
+ .union(
199
+ {Relationships.READING_ORDER, Relationships.LAYOUT_LINK, Relationships.LINK, Relationships.SUCCESSOR}
200
+ )
199
201
  )
200
202
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
201
203
 
@@ -384,16 +386,10 @@ class Table(Layout):
384
386
  Returns:
385
387
  A list of a table cells.
386
388
  """
387
- all_relation_ids = self.get_relationship(Relationships.CHILD)
388
- cell_anns: list[Cell] = self.base_page.get_annotation( # type: ignore
389
- annotation_ids=all_relation_ids,
390
- category_names=[
391
- LayoutType.CELL,
392
- CellType.HEADER,
393
- CellType.BODY,
394
- CellType.SPANNING,
395
- ],
396
- )
389
+ cell_anns: list[Cell] = []
390
+ for row_number in range(1, self.number_of_rows + 1): # type: ignore
391
+ cell_anns.extend(self.row(row_number)) # type: ignore
392
+
397
393
  return cell_anns
398
394
 
399
395
  @property
@@ -592,6 +588,16 @@ class Table(Layout):
592
588
  )
593
589
  return table_list
594
590
 
591
+ @property
592
+ def csv_(self) -> list[list[list[Text_]]]:
593
+ cells = self.cells
594
+ table_list = [[[] for _ in range(self.number_of_columns)] for _ in range(self.number_of_rows)] # type: ignore
595
+ for cell in cells:
596
+ table_list[cell.row_number - 1][cell.column_number - 1].append(cell.text_) # type: ignore
597
+ return table_list
598
+
599
+
600
+
595
601
  def __str__(self) -> str:
596
602
  out = " ".join([" ".join(row + ["\n"]) for row in self.csv])
597
603
  return out
@@ -599,7 +605,13 @@ class Table(Layout):
599
605
  @property
600
606
  def text(self) -> str:
601
607
  try:
602
- return str(self)
608
+ cells = self.cells
609
+ if not cells:
610
+ return super().text
611
+ text_list: list[str] = []
612
+ for cell in cells:
613
+ text_list.append(cell.text)
614
+ return " ".join(text_list)
603
615
  except (TypeError, AnnotationError):
604
616
  return super().text
605
617
 
@@ -616,7 +628,7 @@ class Table(Layout):
616
628
  token_class_ids: list[str] = []
617
629
  token_tag_ids: list[str] = []
618
630
  for cell in cells:
619
- text.extend(cell.text_["text"])
631
+ text.append(cell.text_["text"])
620
632
  words.extend(cell.text_["words"])
621
633
  ann_ids.extend(cell.text_["ann_ids"])
622
634
  token_classes.extend(cell.text_["token_classes"])
@@ -484,7 +484,7 @@ class CustomDataset(DatasetBase):
484
484
  return DatasetInfo(
485
485
  name=self.name,
486
486
  type=self.type,
487
- description=self.description if self.description is not None else "",
487
+ short_description=self.description if self.description is not None else "",
488
488
  license="",
489
489
  url="",
490
490
  splits={},
@@ -306,7 +306,7 @@ class ModelCatalog:
306
306
 
307
307
  # Loading default profiles
308
308
  dd_profile_path = maybe_copy_config_to_cache(
309
- get_package_path(), get_cache_dir_path(), "deepdoctection/configs/profiles.jsonl", False
309
+ get_package_path(), get_cache_dir_path(), "deepdoctection/configs/profiles.jsonl", True
310
310
  )
311
311
  ModelCatalog.load_profiles_from_file(dd_profile_path)
312
312
  # Additional profiles can be added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.4
3
+ Version: 0.43.6
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -168,13 +168,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
168
168
  </p>
169
169
 
170
170
 
171
-
172
171
  **deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
173
172
  It also provides a framework for training, evaluating and inferencing Document AI models.
174
173
 
175
- Check the demo of a document layout analysis pipeline with OCR on 🤗
176
- [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
177
-
178
174
  # Overview
179
175
 
180
176
  - Document layout analysis and table recognition in PyTorch with
@@ -197,6 +193,54 @@ for an easy start.
197
193
 
198
194
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
199
195
 
196
+
197
+ ----------------------------------------------------------------------------------------
198
+
199
+ # Hugging Face Space Demo
200
+
201
+ Check the demo of a document layout analysis pipeline with OCR on 🤗
202
+ [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
203
+
204
+ ```
205
+ pip install gradio_client # requires Python >= 3.10
206
+ ```
207
+
208
+ To process a single image:
209
+
210
+ ```python
211
+ from gradio_client import Client, handle_file
212
+
213
+ if __name__ == "__main__":
214
+
215
+ client = Client("deepdoctection/deepdoctection")
216
+ result = client.predict(
217
+ img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
218
+ pdf=None,
219
+ max_datapoints = 2,
220
+ api_name = "/analyze_image"
221
+ )
222
+ print(result)
223
+ ```
224
+
225
+ To process a PDF document:
226
+
227
+ ```python
228
+ from gradio_client import Client, handle_file
229
+
230
+ if __name__ == "__main__":
231
+
232
+ client = Client("deepdoctection/deepdoctection")
233
+ result = client.predict(
234
+ img=None,
235
+ pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
236
+ max_datapoints = 2, # increase to process up to 9 pages
237
+ api_name = "/analyze_image"
238
+ )
239
+ print(result)
240
+ ```
241
+
242
+ --------------------------------------------------------------------------------------------------------
243
+
200
244
  # Example
201
245
 
202
246
  ```python
@@ -242,8 +286,9 @@ alt="text" width="40%">
242
286
  </p>
243
287
 
244
288
 
289
+ -----------------------------------------------------------------------------------------
245
290
 
246
- ## Requirements
291
+ # Requirements
247
292
 
248
293
  ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
249
294
 
@@ -262,11 +307,13 @@ alt="text" width="40%">
262
307
  | DocTr | ✅ | ❌ | ✅ |
263
308
  | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
264
309
 
265
- ## Installation
310
+ ------------------------------------------------------------------------------------------
311
+
312
+ # Installation
266
313
 
267
314
  We recommend using a virtual environment.
268
315
 
269
- #### Get started installation
316
+ ## Get started installation
270
317
 
271
318
  For a simple setup which is enough to parse documents with the default setting, install the following:
272
319
 
@@ -274,7 +321,7 @@ For a simple setup which is enough to parse documents with the default setting,
274
321
 
275
322
  ```
276
323
  pip install transformers
277
- pip install python-doctr
324
+ pip install python-doctr==0.9.0
278
325
  pip install deepdoctection
279
326
  ```
280
327
 
@@ -282,13 +329,13 @@ pip install deepdoctection
282
329
 
283
330
  ```
284
331
  pip install tensorpack
285
- pip install python-doctr
332
+ pip install python-doctr==0.9.0
286
333
  pip install deepdoctection
287
334
  ```
288
335
 
289
336
  Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
290
337
 
291
- #### Full installation
338
+ ### Full installation
292
339
 
293
340
  The following installation will give you ALL models available within the Deep Learning framework as well as all models
294
341
  that are independent of Tensorflow/PyTorch.
@@ -318,7 +365,7 @@ pip install deepdoctection[tf]
318
365
  For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
319
366
 
320
367
 
321
- ### Installation from source
368
+ ## Installation from source
322
369
 
323
370
  Download the repository or clone via
324
371
 
@@ -341,8 +388,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
341
388
  ```
342
389
 
343
390
 
344
-
345
- ### Running a Docker container from Docker hub
391
+ ## Running a Docker container from Docker hub
346
392
 
347
393
  Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
348
394
 
@@ -360,16 +406,18 @@ docker compose up -d
360
406
 
361
407
  will start the container. There is no endpoint exposed, though.
362
408
 
363
- ## Credits
409
+ -----------------------------------------------------------------------------------------------
410
+
411
+ # Credits
364
412
 
365
413
  We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
366
414
  to develop this framework.
367
415
 
368
416
 
369
- ## If you like **deep**doctection ...
417
+ # If you like **deep**doctection ...
370
418
 
371
419
  ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
372
420
 
373
- ## License
421
+ # License
374
422
 
375
423
  Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
File without changes