deepdoctection 0.43.3__tar.gz → 0.43.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (155) hide show
  1. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/PKG-INFO +63 -15
  2. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/README.md +62 -14
  3. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/__init__.py +1 -1
  4. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/analyzer/config.py +1 -0
  5. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datapoint/view.py +31 -5
  6. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/base.py +1 -1
  7. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/doctrocr.py +1 -4
  8. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/anngen.py +1 -1
  9. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/order.py +7 -8
  10. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/develop.py +0 -1
  11. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection.egg-info/PKG-INFO +63 -15
  12. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/LICENSE +0 -0
  13. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/analyzer/__init__.py +0 -0
  14. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/analyzer/dd.py +0 -0
  15. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/analyzer/factory.py +0 -0
  16. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/configs/__init__.py +0 -0
  17. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/configs/conf_dd_one.yaml +0 -0
  18. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/configs/conf_tesseract.yaml +0 -0
  19. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/configs/profiles.jsonl +0 -0
  20. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/__init__.py +0 -0
  21. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/base.py +0 -0
  22. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/common.py +0 -0
  23. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/custom.py +0 -0
  24. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/custom_serialize.py +0 -0
  25. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/parallel_map.py +0 -0
  26. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/serialize.py +0 -0
  27. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/dataflow/stats.py +0 -0
  28. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datapoint/__init__.py +0 -0
  29. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datapoint/annotation.py +0 -0
  30. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datapoint/box.py +0 -0
  31. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datapoint/convert.py +0 -0
  32. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datapoint/image.py +0 -0
  33. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/__init__.py +0 -0
  34. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/adapter.py +0 -0
  35. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/dataflow_builder.py +0 -0
  36. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/info.py +0 -0
  37. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/__init__.py +0 -0
  38. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/doclaynet.py +0 -0
  39. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/fintabnet.py +0 -0
  40. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/funsd.py +0 -0
  41. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/iiitar13k.py +0 -0
  42. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/layouttest.py +0 -0
  43. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/publaynet.py +0 -0
  44. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/pubtables1m.py +0 -0
  45. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/pubtabnet.py +0 -0
  46. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/rvlcdip.py +0 -0
  47. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xfund.py +0 -0
  48. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xsl/__init__.py +0 -0
  49. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/instances/xsl/pascal_voc.xsl +0 -0
  50. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/registry.py +0 -0
  51. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/datasets/save.py +0 -0
  52. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/__init__.py +0 -0
  53. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/accmetric.py +0 -0
  54. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/base.py +0 -0
  55. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/cocometric.py +0 -0
  56. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/eval.py +0 -0
  57. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/registry.py +0 -0
  58. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/tedsmetric.py +0 -0
  59. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/eval/tp_eval_callback.py +0 -0
  60. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/__init__.py +0 -0
  61. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/base.py +0 -0
  62. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/d2detect.py +0 -0
  63. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/deskew.py +0 -0
  64. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/fastlang.py +0 -0
  65. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/hfdetr.py +0 -0
  66. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/hflayoutlm.py +0 -0
  67. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/hflm.py +0 -0
  68. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/model.py +0 -0
  69. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/pdftext.py +0 -0
  70. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/pt/__init__.py +0 -0
  71. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/pt/nms.py +0 -0
  72. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/pt/ptutils.py +0 -0
  73. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tessocr.py +0 -0
  74. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/texocr.py +0 -0
  75. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/__init__.py +0 -0
  76. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tfutils.py +0 -0
  77. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpcompat.py +0 -0
  78. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/__init__.py +0 -0
  79. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/common.py +0 -0
  80. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/config/__init__.py +0 -0
  81. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/config/config.py +0 -0
  82. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +0 -0
  83. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +0 -0
  84. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +0 -0
  85. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +0 -0
  86. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +0 -0
  87. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +0 -0
  88. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +0 -0
  89. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +0 -0
  90. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +0 -0
  91. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/predict.py +0 -0
  92. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/preproc.py +0 -0
  93. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +0 -0
  94. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +0 -0
  95. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py +0 -0
  96. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/extern/tpdetect.py +0 -0
  97. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/__init__.py +0 -0
  98. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/cats.py +0 -0
  99. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/cocostruct.py +0 -0
  100. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/d2struct.py +0 -0
  101. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/hfstruct.py +0 -0
  102. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/laylmstruct.py +0 -0
  103. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/maputils.py +0 -0
  104. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/match.py +0 -0
  105. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/misc.py +0 -0
  106. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/pascalstruct.py +0 -0
  107. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/prodigystruct.py +0 -0
  108. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/pubstruct.py +0 -0
  109. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/tpstruct.py +0 -0
  110. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/mapper/xfundstruct.py +0 -0
  111. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/__init__.py +0 -0
  112. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/base.py +0 -0
  113. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/common.py +0 -0
  114. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/concurrency.py +0 -0
  115. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/doctectionpipe.py +0 -0
  116. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/language.py +0 -0
  117. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/layout.py +0 -0
  118. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/lm.py +0 -0
  119. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/refine.py +0 -0
  120. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/registry.py +0 -0
  121. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/segment.py +0 -0
  122. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/sub_layout.py +0 -0
  123. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/text.py +0 -0
  124. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/pipe/transform.py +0 -0
  125. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/py.typed +0 -0
  126. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/train/__init__.py +0 -0
  127. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/train/d2_frcnn_train.py +0 -0
  128. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/train/hf_detr_train.py +0 -0
  129. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/train/hf_layoutlm_train.py +0 -0
  130. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/train/tp_frcnn_train.py +0 -0
  131. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/__init__.py +0 -0
  132. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/concurrency.py +0 -0
  133. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/context.py +0 -0
  134. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/env_info.py +0 -0
  135. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/error.py +0 -0
  136. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/file_utils.py +0 -0
  137. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/fs.py +0 -0
  138. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/identifier.py +0 -0
  139. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/logger.py +0 -0
  140. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/metacfg.py +0 -0
  141. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/mocks.py +0 -0
  142. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/pdf_utils.py +0 -0
  143. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/settings.py +0 -0
  144. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/tqdm.py +0 -0
  145. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/transform.py +0 -0
  146. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/types.py +0 -0
  147. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/utils.py +0 -0
  148. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection/utils/viz.py +0 -0
  149. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection.egg-info/SOURCES.txt +0 -0
  150. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection.egg-info/dependency_links.txt +0 -0
  151. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection.egg-info/requires.txt +0 -0
  152. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/deepdoctection.egg-info/top_level.txt +0 -0
  153. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/setup.cfg +0 -0
  154. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/setup.py +0 -0
  155. {deepdoctection-0.43.3 → deepdoctection-0.43.5}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.3
3
+ Version: 0.43.5
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -168,13 +168,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
168
168
  </p>
169
169
 
170
170
 
171
-
172
171
  **deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
173
172
  It also provides a framework for training, evaluating and inferencing Document AI models.
174
173
 
175
- Check the demo of a document layout analysis pipeline with OCR on 🤗
176
- [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
177
-
178
174
  # Overview
179
175
 
180
176
  - Document layout analysis and table recognition in PyTorch with
@@ -197,6 +193,54 @@ for an easy start.
197
193
 
198
194
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
199
195
 
196
+
197
+ ----------------------------------------------------------------------------------------
198
+
199
+ # Hugging Face Space Demo
200
+
201
+ Check the demo of a document layout analysis pipeline with OCR on 🤗
202
+ [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
203
+
204
+ ```
205
+ pip install gradio_client # requires Python >= 3.10
206
+ ```
207
+
208
+ To process a single image:
209
+
210
+ ```python
211
+ from gradio_client import Client, handle_file
212
+
213
+ if __name__ == "__main__":
214
+
215
+ client = Client("deepdoctection/deepdoctection")
216
+ result = client.predict(
217
+ img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
218
+ pdf=None,
219
+ max_datapoints = 2,
220
+ api_name = "/analyze_image"
221
+ )
222
+ print(result)
223
+ ```
224
+
225
+ To process a PDF document:
226
+
227
+ ```python
228
+ from gradio_client import Client, handle_file
229
+
230
+ if __name__ == "__main__":
231
+
232
+ client = Client("deepdoctection/deepdoctection")
233
+ result = client.predict(
234
+ img=None,
235
+ pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
236
+ max_datapoints = 2, # increase to process up to 9 pages
237
+ api_name = "/analyze_image"
238
+ )
239
+ print(result)
240
+ ```
241
+
242
+ --------------------------------------------------------------------------------------------------------
243
+
200
244
  # Example
201
245
 
202
246
  ```python
@@ -242,8 +286,9 @@ alt="text" width="40%">
242
286
  </p>
243
287
 
244
288
 
289
+ -----------------------------------------------------------------------------------------
245
290
 
246
- ## Requirements
291
+ # Requirements
247
292
 
248
293
  ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
249
294
 
@@ -262,11 +307,13 @@ alt="text" width="40%">
262
307
  | DocTr | ✅ | ❌ | ✅ |
263
308
  | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
264
309
 
265
- ## Installation
310
+ ------------------------------------------------------------------------------------------
311
+
312
+ # Installation
266
313
 
267
314
  We recommend using a virtual environment.
268
315
 
269
- #### Get started installation
316
+ ## Get started installation
270
317
 
271
318
  For a simple setup which is enough to parse documents with the default setting, install the following:
272
319
 
@@ -288,7 +335,7 @@ pip install deepdoctection
288
335
 
289
336
  Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
290
337
 
291
- #### Full installation
338
+ ### Full installation
292
339
 
293
340
  The following installation will give you ALL models available within the Deep Learning framework as well as all models
294
341
  that are independent of Tensorflow/PyTorch.
@@ -318,7 +365,7 @@ pip install deepdoctection[tf]
318
365
  For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
319
366
 
320
367
 
321
- ### Installation from source
368
+ ## Installation from source
322
369
 
323
370
  Download the repository or clone via
324
371
 
@@ -341,8 +388,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
341
388
  ```
342
389
 
343
390
 
344
-
345
- ### Running a Docker container from Docker hub
391
+ ## Running a Docker container from Docker hub
346
392
 
347
393
  Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
348
394
 
@@ -360,16 +406,18 @@ docker compose up -d
360
406
 
361
407
  will start the container. There is no endpoint exposed, though.
362
408
 
363
- ## Credits
409
+ -----------------------------------------------------------------------------------------------
410
+
411
+ # Credits
364
412
 
365
413
  We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
366
414
  to develop this framework.
367
415
 
368
416
 
369
- ## If you like **deep**doctection ...
417
+ # If you like **deep**doctection ...
370
418
 
371
419
  ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
372
420
 
373
- ## License
421
+ # License
374
422
 
375
423
  Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
@@ -25,13 +25,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
25
25
  </p>
26
26
 
27
27
 
28
-
29
28
  **deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
30
29
  It also provides a framework for training, evaluating and inferencing Document AI models.
31
30
 
32
- Check the demo of a document layout analysis pipeline with OCR on 🤗
33
- [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
34
-
35
31
  # Overview
36
32
 
37
33
  - Document layout analysis and table recognition in PyTorch with
@@ -54,6 +50,54 @@ for an easy start.
54
50
 
55
51
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
56
52
 
53
+
54
+ ----------------------------------------------------------------------------------------
55
+
56
+ # Hugging Face Space Demo
57
+
58
+ Check the demo of a document layout analysis pipeline with OCR on 🤗
59
+ [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
60
+
61
+ ```
62
+ pip install gradio_client # requires Python >= 3.10
63
+ ```
64
+
65
+ To process a single image:
66
+
67
+ ```python
68
+ from gradio_client import Client, handle_file
69
+
70
+ if __name__ == "__main__":
71
+
72
+ client = Client("deepdoctection/deepdoctection")
73
+ result = client.predict(
74
+ img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
75
+ pdf=None,
76
+ max_datapoints = 2,
77
+ api_name = "/analyze_image"
78
+ )
79
+ print(result)
80
+ ```
81
+
82
+ To process a PDF document:
83
+
84
+ ```python
85
+ from gradio_client import Client, handle_file
86
+
87
+ if __name__ == "__main__":
88
+
89
+ client = Client("deepdoctection/deepdoctection")
90
+ result = client.predict(
91
+ img=None,
92
+ pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
93
+ max_datapoints = 2, # increase to process up to 9 pages
94
+ api_name = "/analyze_image"
95
+ )
96
+ print(result)
97
+ ```
98
+
99
+ --------------------------------------------------------------------------------------------------------
100
+
57
101
  # Example
58
102
 
59
103
  ```python
@@ -99,8 +143,9 @@ alt="text" width="40%">
99
143
  </p>
100
144
 
101
145
 
146
+ -----------------------------------------------------------------------------------------
102
147
 
103
- ## Requirements
148
+ # Requirements
104
149
 
105
150
  ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
106
151
 
@@ -119,11 +164,13 @@ alt="text" width="40%">
119
164
  | DocTr | ✅ | ❌ | ✅ |
120
165
  | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
121
166
 
122
- ## Installation
167
+ ------------------------------------------------------------------------------------------
168
+
169
+ # Installation
123
170
 
124
171
  We recommend using a virtual environment.
125
172
 
126
- #### Get started installation
173
+ ## Get started installation
127
174
 
128
175
  For a simple setup which is enough to parse documents with the default setting, install the following:
129
176
 
@@ -145,7 +192,7 @@ pip install deepdoctection
145
192
 
146
193
  Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
147
194
 
148
- #### Full installation
195
+ ### Full installation
149
196
 
150
197
  The following installation will give you ALL models available within the Deep Learning framework as well as all models
151
198
  that are independent of Tensorflow/PyTorch.
@@ -175,7 +222,7 @@ pip install deepdoctection[tf]
175
222
  For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
176
223
 
177
224
 
178
- ### Installation from source
225
+ ## Installation from source
179
226
 
180
227
  Download the repository or clone via
181
228
 
@@ -198,8 +245,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
198
245
  ```
199
246
 
200
247
 
201
-
202
- ### Running a Docker container from Docker hub
248
+ ## Running a Docker container from Docker hub
203
249
 
204
250
  Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
205
251
 
@@ -217,16 +263,18 @@ docker compose up -d
217
263
 
218
264
  will start the container. There is no endpoint exposed, though.
219
265
 
220
- ## Credits
266
+ -----------------------------------------------------------------------------------------------
267
+
268
+ # Credits
221
269
 
222
270
  We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
223
271
  to develop this framework.
224
272
 
225
273
 
226
- ## If you like **deep**doctection ...
274
+ # If you like **deep**doctection ...
227
275
 
228
276
  ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
229
277
 
230
- ## License
278
+ # License
231
279
 
232
280
  Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.43.3"
28
+ __version__ = "0.43.5"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -903,6 +903,7 @@ cfg.LAYOUT_LINK.CHILD_CATEGORIES = [LayoutType.CAPTION]
903
903
  # This prevents accidental modification at runtime.
904
904
  cfg.freeze()
905
905
 
906
+
906
907
  def update_cfg_from_defaults() -> None:
907
908
  """
908
909
  Update the configuration with current values from IMAGE_DEFAULTS.
@@ -72,6 +72,18 @@ class ImageAnnotationBaseView(ImageAnnotation):
72
72
 
73
73
  base_page: Page
74
74
 
75
+ @property
76
+ def b64_image(self) -> Optional[str]:
77
+ """
78
+ Returns:
79
+ The base64 encoded image of the page if available, otherwise None.
80
+ """
81
+
82
+ if self.image is not None:
83
+ if self.image.image is not None:
84
+ return viz_handler.convert_np_to_b64(self.image.image)
85
+ return None
86
+
75
87
  @property
76
88
  def bbox(self) -> list[float]:
77
89
  """
@@ -157,7 +169,7 @@ class ImageAnnotationBaseView(ImageAnnotation):
157
169
  """
158
170
 
159
171
  # sub categories and summary sub categories are valid attribute names
160
- attr_names = {"bbox", "np_image"}.union({cat.value for cat in self.sub_categories})
172
+ attr_names = {"bbox", "np_image", "b64_image"}.union({cat.value for cat in self.sub_categories})
161
173
  if self.image:
162
174
  attr_names = attr_names.union({cat.value for cat in self.image.summary.sub_categories.keys()})
163
175
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
@@ -774,6 +786,7 @@ class Page(Image):
774
786
  "residual_layouts",
775
787
  "document_summary",
776
788
  "document_mapping",
789
+ "b64_image",
777
790
  }
778
791
  include_residual_text_container: bool = True
779
792
 
@@ -902,6 +915,17 @@ class Page(Image):
902
915
  """
903
916
  return self.get_annotation(category_names=self.residual_text_block_categories)
904
917
 
918
+ @property
919
+ def b64_image(self) -> Optional[str]:
920
+ """
921
+ Returns:
922
+ The base64 encoded image of the page if available, otherwise None.
923
+ """
924
+
925
+ if self.image_orig.image is not None:
926
+ return viz_handler.convert_np_to_b64(self.image_orig.image)
927
+ return None
928
+
905
929
  @classmethod
906
930
  def from_image(
907
931
  cls,
@@ -1410,10 +1434,12 @@ class Page(Image):
1410
1434
  for block in block_with_order:
1411
1435
  all_words.extend(block.get_ordered_words()) # type: ignore
1412
1436
  return [
1413
- {"word": word.characters,
1414
- "entity": word.token_tag.value,
1415
- "annotation_id": word.annotation_id,
1416
- "successor_annotation_id": word.successor[0].annotation_id if word.successor else None}
1437
+ {
1438
+ "word": word.characters,
1439
+ "entity": word.token_tag.value,
1440
+ "annotation_id": word.annotation_id,
1441
+ "successor_annotation_id": word.successor[0].annotation_id if word.successor else None,
1442
+ }
1417
1443
  for word in all_words
1418
1444
  if word.token_tag not in (TokenClasses.OTHER, None)
1419
1445
  ]
@@ -484,7 +484,7 @@ class CustomDataset(DatasetBase):
484
484
  return DatasetInfo(
485
485
  name=self.name,
486
486
  type=self.type,
487
- description=self.description if self.description is not None else "",
487
+ short_description=self.description if self.description is not None else "",
488
488
  license="",
489
489
  url="",
490
490
  splits={},
@@ -269,10 +269,7 @@ class DoctrTextlineDetector(DoctrTextlineDetectorMixin):
269
269
  if self.lib == "PT":
270
270
  self.device = get_torch_device(device)
271
271
 
272
- self.doctr_predictor = self.get_wrapped_model(self.architecture,
273
- self.path_weights,
274
- self.device,
275
- self.lib)
272
+ self.doctr_predictor = self.get_wrapped_model(self.architecture, self.path_weights, self.device, self.lib)
276
273
 
277
274
  def predict(self, np_img: PixelValues) -> list[DetectionResult]:
278
275
  """
@@ -19,7 +19,7 @@
19
19
  Datapoint manager
20
20
  """
21
21
  from dataclasses import asdict
22
- from typing import Optional, Union, Sequence
22
+ from typing import Optional, Sequence, Union
23
23
 
24
24
  import numpy as np
25
25
 
@@ -801,14 +801,13 @@ class TextOrderService(TextLineServiceMixin):
801
801
  if self.include_residual_text_container:
802
802
  add_category.append(LayoutType.LINE)
803
803
 
804
- if set(self.floating_text_block_categories) <= set(
805
- self.text_block_categories + tuple(add_category)
806
- ):
807
- logger.warning("In most cases floating_text_block_categories must be a subset of text_block_categories. "
808
- "Adding categories to floating_text_block_categories, that do not belong to "
809
- "text_block_categories makes only sense for categories set have CHILD relationships with"
810
- " annotations that belong to text_block_categories.")
811
-
804
+ if set(self.floating_text_block_categories) <= set(self.text_block_categories + tuple(add_category)):
805
+ logger.warning(
806
+ "In most cases floating_text_block_categories must be a subset of text_block_categories. "
807
+ "Adding categories to floating_text_block_categories, that do not belong to "
808
+ "text_block_categories makes only sense for categories set have CHILD relationships with"
809
+ " annotations that belong to text_block_categories."
810
+ )
812
811
 
813
812
  def get_meta_annotation(self) -> MetaAnnotation:
814
813
  add_category = [self.text_container]
@@ -71,7 +71,6 @@ def log_deprecated(name: str, text: str, eos: str = "", max_num_warnings: Option
71
71
  logger.info(LoggingRecord(f"[Deprecated] {info_msg}"))
72
72
 
73
73
 
74
-
75
74
  def deprecated(
76
75
  text: str = "", eos: str = "", max_num_warnings: Optional[int] = None
77
76
  ) -> Callable[[Callable[..., T]], Callable[..., T]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.3
3
+ Version: 0.43.5
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -168,13 +168,9 @@ Version `v.0.43` includes a significant redesign of the Analyzer's default confi
168
168
  </p>
169
169
 
170
170
 
171
-
172
171
  **deep**doctection is a Python library that orchestrates Scan and PDF document layout analysis and extraction for RAG.
173
172
  It also provides a framework for training, evaluating and inferencing Document AI models.
174
173
 
175
- Check the demo of a document layout analysis pipeline with OCR on 🤗
176
- [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection).
177
-
178
174
  # Overview
179
175
 
180
176
  - Document layout analysis and table recognition in PyTorch with
@@ -197,6 +193,54 @@ for an easy start.
197
193
 
198
194
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
199
195
 
196
+
197
+ ----------------------------------------------------------------------------------------
198
+
199
+ # Hugging Face Space Demo
200
+
201
+ Check the demo of a document layout analysis pipeline with OCR on 🤗
202
+ [**Hugging Face spaces**](https://huggingface.co/spaces/deepdoctection/deepdoctection) or use the gradio client.
203
+
204
+ ```
205
+ pip install gradio_client # requires Python >= 3.10
206
+ ```
207
+
208
+ To process a single image:
209
+
210
+ ```python
211
+ from gradio_client import Client, handle_file
212
+
213
+ if __name__ == "__main__":
214
+
215
+ client = Client("deepdoctection/deepdoctection")
216
+ result = client.predict(
217
+ img=handle_file('/local_path/to/dir/file_name.jpeg'), # accepts image files, e.g. JPEG, PNG
218
+ pdf=None,
219
+ max_datapoints = 2,
220
+ api_name = "/analyze_image"
221
+ )
222
+ print(result)
223
+ ```
224
+
225
+ To process a PDF document:
226
+
227
+ ```python
228
+ from gradio_client import Client, handle_file
229
+
230
+ if __name__ == "__main__":
231
+
232
+ client = Client("deepdoctection/deepdoctection")
233
+ result = client.predict(
234
+ img=None,
235
+ pdf=handle_file("/local_path/to/dir/your_doc.pdf"),
236
+ max_datapoints = 2, # increase to process up to 9 pages
237
+ api_name = "/analyze_image"
238
+ )
239
+ print(result)
240
+ ```
241
+
242
+ --------------------------------------------------------------------------------------------------------
243
+
200
244
  # Example
201
245
 
202
246
  ```python
@@ -242,8 +286,9 @@ alt="text" width="40%">
242
286
  </p>
243
287
 
244
288
 
289
+ -----------------------------------------------------------------------------------------
245
290
 
246
- ## Requirements
291
+ # Requirements
247
292
 
248
293
  ![requirements](https://github.com/deepdoctection/deepdoctection/raw/master/docs/tutorials/_imgs/install_01.png)
249
294
 
@@ -262,11 +307,13 @@ alt="text" width="40%">
262
307
  | DocTr | ✅ | ❌ | ✅ |
263
308
  | LayoutLM (v1, v2, v3, XLM) via Transformers | ✅ | ❌ | ❌ |
264
309
 
265
- ## Installation
310
+ ------------------------------------------------------------------------------------------
311
+
312
+ # Installation
266
313
 
267
314
  We recommend using a virtual environment.
268
315
 
269
- #### Get started installation
316
+ ## Get started installation
270
317
 
271
318
  For a simple setup which is enough to parse documents with the default setting, install the following:
272
319
 
@@ -288,7 +335,7 @@ pip install deepdoctection
288
335
 
289
336
  Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
290
337
 
291
- #### Full installation
338
+ ### Full installation
292
339
 
293
340
  The following installation will give you ALL models available within the Deep Learning framework as well as all models
294
341
  that are independent of Tensorflow/PyTorch.
@@ -318,7 +365,7 @@ pip install deepdoctection[tf]
318
365
  For further information, please consult the [**full installation instructions**](https://deepdoctection.readthedocs.io/en/latest/install/).
319
366
 
320
367
 
321
- ### Installation from source
368
+ ## Installation from source
322
369
 
323
370
  Download the repository or clone via
324
371
 
@@ -341,8 +388,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
341
388
  ```
342
389
 
343
390
 
344
-
345
- ### Running a Docker container from Docker hub
391
+ ## Running a Docker container from Docker hub
346
392
 
347
393
  Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
348
394
 
@@ -360,16 +406,18 @@ docker compose up -d
360
406
 
361
407
  will start the container. There is no endpoint exposed, though.
362
408
 
363
- ## Credits
409
+ -----------------------------------------------------------------------------------------------
410
+
411
+ # Credits
364
412
 
365
413
  We thank all libraries that provide high quality code and pre-trained models. Without, it would have been impossible
366
414
  to develop this framework.
367
415
 
368
416
 
369
- ## If you like **deep**doctection ...
417
+ # If you like **deep**doctection ...
370
418
 
371
419
  ...you can easily support the project by making it more visible. Leaving a star or a recommendation will help.
372
420
 
373
- ## License
421
+ # License
374
422
 
375
423
  Distributed under the Apache 2.0 License. Check [LICENSE](https://github.com/deepdoctection/deepdoctection/blob/master/LICENSE) for additional information.
File without changes