python-doctr 0.9.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {python_doctr-0.9.0 → python_doctr-0.10.0}/PKG-INFO +11 -11
  2. {python_doctr-0.9.0 → python_doctr-0.10.0}/README.md +1 -1
  3. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/cord.py +10 -1
  4. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/funsd.py +11 -1
  5. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/ic03.py +11 -1
  6. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/ic13.py +10 -1
  7. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/iiit5k.py +26 -16
  8. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/imgur5k.py +10 -1
  9. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/sroie.py +11 -1
  10. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/svhn.py +11 -1
  11. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/svt.py +11 -1
  12. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/synthtext.py +11 -1
  13. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/utils.py +7 -2
  14. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/vocabs.py +6 -2
  15. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/wildreceipt.py +12 -1
  16. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/file_utils.py +19 -0
  17. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/elements.py +12 -4
  18. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/builder.py +2 -2
  19. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/tensorflow.py +13 -6
  20. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/pytorch.py +2 -0
  21. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/tensorflow.py +14 -8
  22. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/predictor/pytorch.py +11 -7
  23. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/predictor/tensorflow.py +10 -6
  24. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/resnet/tensorflow.py +21 -8
  25. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/textnet/tensorflow.py +11 -5
  26. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vgg/tensorflow.py +9 -3
  27. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vit/tensorflow.py +10 -4
  28. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/zoo.py +22 -10
  29. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
  30. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/tensorflow.py +14 -11
  31. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/tensorflow.py +23 -11
  32. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/predictor/tensorflow.py +2 -2
  33. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/factory/hub.py +5 -6
  34. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/base.py +4 -0
  35. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/pytorch.py +4 -0
  36. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/tensorflow.py +8 -1
  37. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/transformer/tensorflow.py +0 -2
  38. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/pytorch.py +1 -1
  39. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/tensorflow.py +1 -1
  40. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/base.py +24 -12
  41. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/pytorch.py +4 -0
  42. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/tensorflow.py +8 -1
  43. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/preprocessor/tensorflow.py +1 -1
  44. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/crnn/tensorflow.py +8 -6
  45. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/tensorflow.py +9 -4
  46. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/tensorflow.py +10 -8
  47. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/sar/tensorflow.py +7 -3
  48. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/tensorflow.py +9 -4
  49. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/utils/pytorch.py +1 -1
  50. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/utils/tensorflow.py +15 -15
  51. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/pytorch.py +1 -1
  52. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/pytorch.py +7 -6
  53. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/tensorflow.py +15 -12
  54. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/geometry.py +106 -19
  55. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/metrics.py +1 -1
  56. python_doctr-0.10.0/doctr/utils/reconstitution.py +212 -0
  57. python_doctr-0.10.0/doctr/version.py +1 -0
  58. {python_doctr-0.9.0 → python_doctr-0.10.0}/pyproject.toml +11 -12
  59. {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/PKG-INFO +11 -11
  60. {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/requires.txt +9 -9
  61. {python_doctr-0.9.0 → python_doctr-0.10.0}/setup.py +1 -1
  62. python_doctr-0.9.0/doctr/utils/reconstitution.py +0 -126
  63. python_doctr-0.9.0/doctr/version.py +0 -1
  64. {python_doctr-0.9.0 → python_doctr-0.10.0}/LICENSE +0 -0
  65. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/__init__.py +0 -0
  66. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/contrib/__init__.py +0 -0
  67. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/contrib/artefacts.py +0 -0
  68. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/contrib/base.py +0 -0
  69. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/__init__.py +0 -0
  70. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/__init__.py +0 -0
  71. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/base.py +0 -0
  72. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/pytorch.py +0 -0
  73. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/datasets/tensorflow.py +0 -0
  74. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/detection.py +0 -0
  75. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/doc_artefacts.py +0 -0
  76. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/__init__.py +0 -0
  77. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/base.py +0 -0
  78. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/pytorch.py +0 -0
  79. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/generator/tensorflow.py +0 -0
  80. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/iiithws.py +0 -0
  81. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/loader.py +0 -0
  82. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/mjsynth.py +0 -0
  83. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/ocr.py +0 -0
  84. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/orientation.py +0 -0
  85. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/datasets/recognition.py +0 -0
  86. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/__init__.py +0 -0
  87. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/html.py +0 -0
  88. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/__init__.py +0 -0
  89. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/base.py +0 -0
  90. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/pytorch.py +0 -0
  91. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/image/tensorflow.py +0 -0
  92. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/pdf.py +0 -0
  93. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/io/reader.py +0 -0
  94. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/__init__.py +0 -0
  95. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/_utils.py +0 -0
  96. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/__init__.py +0 -0
  97. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
  98. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/pytorch.py +0 -0
  99. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
  100. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/predictor/__init__.py +0 -0
  101. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/resnet/__init__.py +0 -0
  102. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/resnet/pytorch.py +0 -0
  103. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/textnet/__init__.py +0 -0
  104. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/textnet/pytorch.py +0 -0
  105. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vgg/__init__.py +0 -0
  106. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vgg/pytorch.py +0 -0
  107. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vit/__init__.py +0 -0
  108. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/classification/vit/pytorch.py +0 -0
  109. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/core.py +0 -0
  110. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/__init__.py +0 -0
  111. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/__init__.py +0 -0
  112. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/base.py +0 -0
  113. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/pytorch.py +0 -0
  114. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
  115. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/core.py +0 -0
  116. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
  117. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/base.py +0 -0
  118. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/pytorch.py +0 -0
  119. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/__init__.py +0 -0
  120. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/base.py +0 -0
  121. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/fast/pytorch.py +0 -0
  122. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/__init__.py +0 -0
  123. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/base.py +0 -0
  124. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/linknet/pytorch.py +0 -0
  125. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/predictor/__init__.py +0 -0
  126. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/predictor/pytorch.py +0 -0
  127. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/detection/zoo.py +0 -0
  128. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/factory/__init__.py +0 -0
  129. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/kie_predictor/__init__.py +0 -0
  130. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/__init__.py +0 -0
  131. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/layers/__init__.py +0 -0
  132. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/layers/pytorch.py +0 -0
  133. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/layers/tensorflow.py +0 -0
  134. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/transformer/__init__.py +0 -0
  135. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/transformer/pytorch.py +0 -0
  136. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/__init__.py +0 -0
  137. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/predictor/__init__.py +0 -0
  138. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/preprocessor/__init__.py +0 -0
  139. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/preprocessor/pytorch.py +0 -0
  140. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/__init__.py +0 -0
  141. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/core.py +0 -0
  142. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/crnn/__init__.py +0 -0
  143. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/crnn/pytorch.py +0 -0
  144. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/__init__.py +0 -0
  145. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/base.py +0 -0
  146. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/master/pytorch.py +0 -0
  147. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/__init__.py +0 -0
  148. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/base.py +0 -0
  149. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/parseq/pytorch.py +0 -0
  150. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/__init__.py +0 -0
  151. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/_utils.py +0 -0
  152. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/pytorch.py +0 -0
  153. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/predictor/tensorflow.py +0 -0
  154. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/sar/__init__.py +0 -0
  155. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/sar/pytorch.py +0 -0
  156. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/utils.py +0 -0
  157. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/__init__.py +0 -0
  158. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/base.py +0 -0
  159. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/pytorch.py +0 -0
  160. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/recognition/zoo.py +0 -0
  161. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/utils/__init__.py +0 -0
  162. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/models/zoo.py +0 -0
  163. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/py.typed +0 -0
  164. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/__init__.py +0 -0
  165. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/__init__.py +0 -0
  166. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/base.py +0 -0
  167. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/functional/tensorflow.py +0 -0
  168. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/__init__.py +0 -0
  169. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/transforms/modules/base.py +0 -0
  170. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/__init__.py +0 -0
  171. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/common_types.py +0 -0
  172. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/data.py +0 -0
  173. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/fonts.py +0 -0
  174. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/multithreading.py +0 -0
  175. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/repr.py +0 -0
  176. {python_doctr-0.9.0 → python_doctr-0.10.0}/doctr/utils/visualization.py +0 -0
  177. {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/SOURCES.txt +0 -0
  178. {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/dependency_links.txt +0 -0
  179. {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/top_level.txt +0 -0
  180. {python_doctr-0.9.0 → python_doctr-0.10.0}/python_doctr.egg-info/zip-safe +0 -0
  181. {python_doctr-0.9.0 → python_doctr-0.10.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-doctr
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -226,7 +226,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
226
  Requires-Python: <4,>=3.9.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
- Requires-Dist: numpy<2.0.0,>=1.16.0
229
+ Requires-Dist: numpy<3.0.0,>=1.16.0
230
230
  Requires-Dist: scipy<2.0.0,>=1.4.0
231
231
  Requires-Dist: h5py<4.0.0,>=3.1.0
232
232
  Requires-Dist: opencv-python<5.0.0,>=4.5.0
@@ -241,11 +241,12 @@ Requires-Dist: defusedxml>=0.7.0
241
241
  Requires-Dist: anyascii>=0.3.2
242
242
  Requires-Dist: tqdm>=4.30.0
243
243
  Provides-Extra: tf
244
- Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "tf"
244
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
245
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
245
246
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
246
247
  Provides-Extra: torch
247
- Requires-Dist: torch<3.0.0,>=1.12.0; extra == "torch"
248
- Requires-Dist: torchvision>=0.13.0; extra == "torch"
248
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
249
+ Requires-Dist: torchvision>=0.15.0; extra == "torch"
249
250
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
250
251
  Provides-Extra: html
251
252
  Requires-Dist: weasyprint>=55.0; extra == "html"
@@ -257,7 +258,6 @@ Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
257
258
  Provides-Extra: testing
258
259
  Requires-Dist: pytest>=5.3.2; extra == "testing"
259
260
  Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
260
- Requires-Dist: hdf5storage>=0.1.18; extra == "testing"
261
261
  Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
262
262
  Requires-Dist: requests>=2.20.0; extra == "testing"
263
263
  Requires-Dist: psutil>=5.9.5; extra == "testing"
@@ -275,17 +275,17 @@ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
275
275
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
276
276
  Requires-Dist: furo>=2022.3.4; extra == "docs"
277
277
  Provides-Extra: dev
278
- Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "dev"
278
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
279
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
279
280
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
280
- Requires-Dist: torch<3.0.0,>=1.12.0; extra == "dev"
281
- Requires-Dist: torchvision>=0.13.0; extra == "dev"
281
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
282
+ Requires-Dist: torchvision>=0.15.0; extra == "dev"
282
283
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
283
284
  Requires-Dist: weasyprint>=55.0; extra == "dev"
284
285
  Requires-Dist: matplotlib>=3.1.0; extra == "dev"
285
286
  Requires-Dist: mplcursors>=0.3; extra == "dev"
286
287
  Requires-Dist: pytest>=5.3.2; extra == "dev"
287
288
  Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
288
- Requires-Dist: hdf5storage>=0.1.18; extra == "dev"
289
289
  Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
290
290
  Requires-Dist: requests>=2.20.0; extra == "dev"
291
291
  Requires-Dist: psutil>=5.9.5; extra == "dev"
@@ -464,7 +464,7 @@ pip install "python-doctr[torch,viz,html,contib]"
464
464
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
465
465
 
466
466
  - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
467
- - PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
467
+ - PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
468
468
 
469
469
  ### Developer mode
470
470
 
@@ -161,7 +161,7 @@ pip install "python-doctr[torch,viz,html,contib]"
161
161
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
162
162
 
163
163
  - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
164
- - PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
164
+ - PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
165
165
 
166
166
  ### Developer mode
167
167
 
@@ -33,6 +33,7 @@ class CORD(VisionDataset):
33
33
  train: whether the subset should be the training one
34
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
35
  recognition_task: whether the dataset should be used for recognition task
36
+ detection_task: whether the dataset should be used for detection task
36
37
  **kwargs: keyword arguments from `VisionDataset`.
37
38
  """
38
39
 
@@ -53,6 +54,7 @@ class CORD(VisionDataset):
53
54
  train: bool = True,
54
55
  use_polygons: bool = False,
55
56
  recognition_task: bool = False,
57
+ detection_task: bool = False,
56
58
  **kwargs: Any,
57
59
  ) -> None:
58
60
  url, sha256, name = self.TRAIN if train else self.TEST
@@ -64,10 +66,15 @@ class CORD(VisionDataset):
64
66
  pre_transforms=convert_target_to_relative if not recognition_task else None,
65
67
  **kwargs,
66
68
  )
69
+ if recognition_task and detection_task:
70
+ raise ValueError(
71
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
72
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
73
+ )
67
74
 
68
75
  # List images
69
76
  tmp_root = os.path.join(self.root, "image")
70
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
77
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
71
78
  self.train = train
72
79
  np_dtype = np.float32
73
80
  for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
@@ -109,6 +116,8 @@ class CORD(VisionDataset):
109
116
  )
110
117
  for crop, label in zip(crops, list(text_targets)):
111
118
  self.data.append((crop, label))
119
+ elif detection_task:
120
+ self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
112
121
  else:
113
122
  self.data.append((
114
123
  img_path,
@@ -33,6 +33,7 @@ class FUNSD(VisionDataset):
33
33
  train: whether the subset should be the training one
34
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
35
  recognition_task: whether the dataset should be used for recognition task
36
+ detection_task: whether the dataset should be used for detection task
36
37
  **kwargs: keyword arguments from `VisionDataset`.
37
38
  """
38
39
 
@@ -45,6 +46,7 @@ class FUNSD(VisionDataset):
45
46
  train: bool = True,
46
47
  use_polygons: bool = False,
47
48
  recognition_task: bool = False,
49
+ detection_task: bool = False,
48
50
  **kwargs: Any,
49
51
  ) -> None:
50
52
  super().__init__(
@@ -55,6 +57,12 @@ class FUNSD(VisionDataset):
55
57
  pre_transforms=convert_target_to_relative if not recognition_task else None,
56
58
  **kwargs,
57
59
  )
60
+ if recognition_task and detection_task:
61
+ raise ValueError(
62
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
63
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
64
+ )
65
+
58
66
  self.train = train
59
67
  np_dtype = np.float32
60
68
 
@@ -63,7 +71,7 @@ class FUNSD(VisionDataset):
63
71
 
64
72
  # # List images
65
73
  tmp_root = os.path.join(self.root, subfolder, "images")
66
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
74
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
67
75
  for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
68
76
  # File existence check
69
77
  if not os.path.exists(os.path.join(tmp_root, img_path)):
@@ -100,6 +108,8 @@ class FUNSD(VisionDataset):
100
108
  # filter labels with unknown characters
101
109
  if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
102
110
  self.data.append((crop, label))
111
+ elif detection_task:
112
+ self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
103
113
  else:
104
114
  self.data.append((
105
115
  img_path,
@@ -32,6 +32,7 @@ class IC03(VisionDataset):
32
32
  train: whether the subset should be the training one
33
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
34
34
  recognition_task: whether the dataset should be used for recognition task
35
+ detection_task: whether the dataset should be used for detection task
35
36
  **kwargs: keyword arguments from `VisionDataset`.
36
37
  """
37
38
 
@@ -51,6 +52,7 @@ class IC03(VisionDataset):
51
52
  train: bool = True,
52
53
  use_polygons: bool = False,
53
54
  recognition_task: bool = False,
55
+ detection_task: bool = False,
54
56
  **kwargs: Any,
55
57
  ) -> None:
56
58
  url, sha256, file_name = self.TRAIN if train else self.TEST
@@ -62,8 +64,14 @@ class IC03(VisionDataset):
62
64
  pre_transforms=convert_target_to_relative if not recognition_task else None,
63
65
  **kwargs,
64
66
  )
67
+ if recognition_task and detection_task:
68
+ raise ValueError(
69
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
70
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
71
+ )
72
+
65
73
  self.train = train
66
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
74
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
67
75
  np_dtype = np.float32
68
76
 
69
77
  # Load xml data
@@ -117,6 +125,8 @@ class IC03(VisionDataset):
117
125
  for crop, label in zip(crops, labels):
118
126
  if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
119
127
  self.data.append((crop, label))
128
+ elif detection_task:
129
+ self.data.append((name.text, boxes))
120
130
  else:
121
131
  self.data.append((name.text, dict(boxes=boxes, labels=labels)))
122
132
 
@@ -38,6 +38,7 @@ class IC13(AbstractDataset):
38
38
  label_folder: folder with all annotation files for the images
39
39
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
40
40
  recognition_task: whether the dataset should be used for recognition task
41
+ detection_task: whether the dataset should be used for detection task
41
42
  **kwargs: keyword arguments from `AbstractDataset`.
42
43
  """
43
44
 
@@ -47,11 +48,17 @@ class IC13(AbstractDataset):
47
48
  label_folder: str,
48
49
  use_polygons: bool = False,
49
50
  recognition_task: bool = False,
51
+ detection_task: bool = False,
50
52
  **kwargs: Any,
51
53
  ) -> None:
52
54
  super().__init__(
53
55
  img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
54
56
  )
57
+ if recognition_task and detection_task:
58
+ raise ValueError(
59
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
60
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
61
+ )
55
62
 
56
63
  # File existence check
57
64
  if not os.path.exists(label_folder) or not os.path.exists(img_folder):
@@ -59,7 +66,7 @@ class IC13(AbstractDataset):
59
66
  f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}"
60
67
  )
61
68
 
62
- self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
69
+ self.data: List[Tuple[Union[Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
63
70
  np_dtype = np.float32
64
71
 
65
72
  img_names = os.listdir(img_folder)
@@ -95,5 +102,7 @@ class IC13(AbstractDataset):
95
102
  crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
96
103
  for crop, label in zip(crops, labels):
97
104
  self.data.append((crop, label))
105
+ elif detection_task:
106
+ self.data.append((img_path, box_targets))
98
107
  else:
99
108
  self.data.append((img_path, dict(boxes=box_targets, labels=labels)))
@@ -34,6 +34,7 @@ class IIIT5K(VisionDataset):
34
34
  train: whether the subset should be the training one
35
35
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
36
36
  recognition_task: whether the dataset should be used for recognition task
37
+ detection_task: whether the dataset should be used for detection task
37
38
  **kwargs: keyword arguments from `VisionDataset`.
38
39
  """
39
40
 
@@ -45,6 +46,7 @@ class IIIT5K(VisionDataset):
45
46
  train: bool = True,
46
47
  use_polygons: bool = False,
47
48
  recognition_task: bool = False,
49
+ detection_task: bool = False,
48
50
  **kwargs: Any,
49
51
  ) -> None:
50
52
  super().__init__(
@@ -55,6 +57,12 @@ class IIIT5K(VisionDataset):
55
57
  pre_transforms=convert_target_to_relative if not recognition_task else None,
56
58
  **kwargs,
57
59
  )
60
+ if recognition_task and detection_task:
61
+ raise ValueError(
62
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
63
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
64
+ )
65
+
58
66
  self.train = train
59
67
 
60
68
  # Load mat data
@@ -62,7 +70,7 @@ class IIIT5K(VisionDataset):
62
70
  mat_file = "trainCharBound" if self.train else "testCharBound"
63
71
  mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0]
64
72
 
65
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
73
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
66
74
  np_dtype = np.float32
67
75
 
68
76
  for img_path, label, box_targets in tqdm(iterable=mat_data, desc="Unpacking IIIT5K", total=len(mat_data)):
@@ -73,24 +81,26 @@ class IIIT5K(VisionDataset):
73
81
  if not os.path.exists(os.path.join(tmp_root, _raw_path)):
74
82
  raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}")
75
83
 
84
+ if use_polygons:
85
+ # (x, y) coordinates of top left, top right, bottom right, bottom left corners
86
+ box_targets = [
87
+ [
88
+ [box[0], box[1]],
89
+ [box[0] + box[2], box[1]],
90
+ [box[0] + box[2], box[1] + box[3]],
91
+ [box[0], box[1] + box[3]],
92
+ ]
93
+ for box in box_targets
94
+ ]
95
+ else:
96
+ # xmin, ymin, xmax, ymax
97
+ box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
98
+
76
99
  if recognition_task:
77
100
  self.data.append((_raw_path, _raw_label))
101
+ elif detection_task:
102
+ self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
78
103
  else:
79
- if use_polygons:
80
- # (x, y) coordinates of top left, top right, bottom right, bottom left corners
81
- box_targets = [
82
- [
83
- [box[0], box[1]],
84
- [box[0] + box[2], box[1]],
85
- [box[0] + box[2], box[1] + box[3]],
86
- [box[0], box[1] + box[3]],
87
- ]
88
- for box in box_targets
89
- ]
90
- else:
91
- # xmin, ymin, xmax, ymax
92
- box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
93
-
94
104
  # label are casted to list where each char corresponds to the character's bounding box
95
105
  self.data.append((
96
106
  _raw_path,
@@ -46,6 +46,7 @@ class IMGUR5K(AbstractDataset):
46
46
  train: whether the subset should be the training one
47
47
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
48
48
  recognition_task: whether the dataset should be used for recognition task
49
+ detection_task: whether the dataset should be used for detection task
49
50
  **kwargs: keyword arguments from `AbstractDataset`.
50
51
  """
51
52
 
@@ -56,17 +57,23 @@ class IMGUR5K(AbstractDataset):
56
57
  train: bool = True,
57
58
  use_polygons: bool = False,
58
59
  recognition_task: bool = False,
60
+ detection_task: bool = False,
59
61
  **kwargs: Any,
60
62
  ) -> None:
61
63
  super().__init__(
62
64
  img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
63
65
  )
66
+ if recognition_task and detection_task:
67
+ raise ValueError(
68
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
69
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
70
+ )
64
71
 
65
72
  # File existence check
66
73
  if not os.path.exists(label_path) or not os.path.exists(img_folder):
67
74
  raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
68
75
 
69
- self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
76
+ self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
70
77
  self.train = train
71
78
  np_dtype = np.float32
72
79
 
@@ -132,6 +139,8 @@ class IMGUR5K(AbstractDataset):
132
139
  tmp_img = Image.fromarray(crop)
133
140
  tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
134
141
  reco_images_counter += 1
142
+ elif detection_task:
143
+ self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
135
144
  else:
136
145
  self.data.append((img_path, dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=labels)))
137
146
 
@@ -33,6 +33,7 @@ class SROIE(VisionDataset):
33
33
  train: whether the subset should be the training one
34
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
35
  recognition_task: whether the dataset should be used for recognition task
36
+ detection_task: whether the dataset should be used for detection task
36
37
  **kwargs: keyword arguments from `VisionDataset`.
37
38
  """
38
39
 
@@ -52,6 +53,7 @@ class SROIE(VisionDataset):
52
53
  train: bool = True,
53
54
  use_polygons: bool = False,
54
55
  recognition_task: bool = False,
56
+ detection_task: bool = False,
55
57
  **kwargs: Any,
56
58
  ) -> None:
57
59
  url, sha256, name = self.TRAIN if train else self.TEST
@@ -63,10 +65,16 @@ class SROIE(VisionDataset):
63
65
  pre_transforms=convert_target_to_relative if not recognition_task else None,
64
66
  **kwargs,
65
67
  )
68
+ if recognition_task and detection_task:
69
+ raise ValueError(
70
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
71
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
72
+ )
73
+
66
74
  self.train = train
67
75
 
68
76
  tmp_root = os.path.join(self.root, "images")
69
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
77
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
70
78
  np_dtype = np.float32
71
79
 
72
80
  for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking SROIE", total=len(os.listdir(tmp_root))):
@@ -94,6 +102,8 @@ class SROIE(VisionDataset):
94
102
  for crop, label in zip(crops, labels):
95
103
  if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
96
104
  self.data.append((crop, label))
105
+ elif detection_task:
106
+ self.data.append((img_path, coords))
97
107
  else:
98
108
  self.data.append((img_path, dict(boxes=coords, labels=labels)))
99
109
 
@@ -32,6 +32,7 @@ class SVHN(VisionDataset):
32
32
  train: whether the subset should be the training one
33
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
34
34
  recognition_task: whether the dataset should be used for recognition task
35
+ detection_task: whether the dataset should be used for detection task
35
36
  **kwargs: keyword arguments from `VisionDataset`.
36
37
  """
37
38
 
@@ -52,6 +53,7 @@ class SVHN(VisionDataset):
52
53
  train: bool = True,
53
54
  use_polygons: bool = False,
54
55
  recognition_task: bool = False,
56
+ detection_task: bool = False,
55
57
  **kwargs: Any,
56
58
  ) -> None:
57
59
  url, sha256, name = self.TRAIN if train else self.TEST
@@ -63,8 +65,14 @@ class SVHN(VisionDataset):
63
65
  pre_transforms=convert_target_to_relative if not recognition_task else None,
64
66
  **kwargs,
65
67
  )
68
+ if recognition_task and detection_task:
69
+ raise ValueError(
70
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
71
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
72
+ )
73
+
66
74
  self.train = train
67
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
75
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
68
76
  np_dtype = np.float32
69
77
 
70
78
  tmp_root = os.path.join(self.root, "train" if train else "test")
@@ -122,6 +130,8 @@ class SVHN(VisionDataset):
122
130
  for crop, label in zip(crops, label_targets):
123
131
  if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
124
132
  self.data.append((crop, label))
133
+ elif detection_task:
134
+ self.data.append((img_name, box_targets))
125
135
  else:
126
136
  self.data.append((img_name, dict(boxes=box_targets, labels=label_targets)))
127
137
 
@@ -32,6 +32,7 @@ class SVT(VisionDataset):
32
32
  train: whether the subset should be the training one
33
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
34
34
  recognition_task: whether the dataset should be used for recognition task
35
+ detection_task: whether the dataset should be used for detection task
35
36
  **kwargs: keyword arguments from `VisionDataset`.
36
37
  """
37
38
 
@@ -43,6 +44,7 @@ class SVT(VisionDataset):
43
44
  train: bool = True,
44
45
  use_polygons: bool = False,
45
46
  recognition_task: bool = False,
47
+ detection_task: bool = False,
46
48
  **kwargs: Any,
47
49
  ) -> None:
48
50
  super().__init__(
@@ -53,8 +55,14 @@ class SVT(VisionDataset):
53
55
  pre_transforms=convert_target_to_relative if not recognition_task else None,
54
56
  **kwargs,
55
57
  )
58
+ if recognition_task and detection_task:
59
+ raise ValueError(
60
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
61
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
62
+ )
63
+
56
64
  self.train = train
57
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
65
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
58
66
  np_dtype = np.float32
59
67
 
60
68
  # Load xml data
@@ -108,6 +116,8 @@ class SVT(VisionDataset):
108
116
  for crop, label in zip(crops, labels):
109
117
  if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
110
118
  self.data.append((crop, label))
119
+ elif detection_task:
120
+ self.data.append((name.text, boxes))
111
121
  else:
112
122
  self.data.append((name.text, dict(boxes=boxes, labels=labels)))
113
123
 
@@ -35,6 +35,7 @@ class SynthText(VisionDataset):
35
35
  train: whether the subset should be the training one
36
36
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
37
37
  recognition_task: whether the dataset should be used for recognition task
38
+ detection_task: whether the dataset should be used for detection task
38
39
  **kwargs: keyword arguments from `VisionDataset`.
39
40
  """
40
41
 
@@ -46,6 +47,7 @@ class SynthText(VisionDataset):
46
47
  train: bool = True,
47
48
  use_polygons: bool = False,
48
49
  recognition_task: bool = False,
50
+ detection_task: bool = False,
49
51
  **kwargs: Any,
50
52
  ) -> None:
51
53
  super().__init__(
@@ -56,8 +58,14 @@ class SynthText(VisionDataset):
56
58
  pre_transforms=convert_target_to_relative if not recognition_task else None,
57
59
  **kwargs,
58
60
  )
61
+ if recognition_task and detection_task:
62
+ raise ValueError(
63
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
64
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
65
+ )
66
+
59
67
  self.train = train
60
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
68
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
61
69
  np_dtype = np.float32
62
70
 
63
71
  # Load mat data
@@ -111,6 +119,8 @@ class SynthText(VisionDataset):
111
119
  tmp_img = Image.fromarray(crop)
112
120
  tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png"))
113
121
  reco_images_counter += 1
122
+ elif detection_task:
123
+ self.data.append((img_path[0], np.asarray(word_boxes, dtype=np_dtype)))
114
124
  else:
115
125
  self.data.append((img_path[0], dict(boxes=np.asarray(word_boxes, dtype=np_dtype), labels=labels)))
116
126
 
@@ -169,8 +169,13 @@ def encode_sequences(
169
169
  return encoded_data
170
170
 
171
171
 
172
- def convert_target_to_relative(img: ImageTensor, target: Dict[str, Any]) -> Tuple[ImageTensor, Dict[str, Any]]:
173
- target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
172
+ def convert_target_to_relative(
173
+ img: ImageTensor, target: Union[np.ndarray, Dict[str, Any]]
174
+ ) -> Tuple[ImageTensor, Union[Dict[str, Any], np.ndarray]]:
175
+ if isinstance(target, np.ndarray):
176
+ target = convert_to_relative_coords(target, get_img_shape(img))
177
+ else:
178
+ target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img))
174
179
  return img, target
175
180
 
176
181
 
@@ -25,6 +25,7 @@ VOCABS: Dict[str, str] = {
25
25
  "hindi_punctuation": "।,?!:्ॐ॰॥॰",
26
26
  "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
27
27
  "bangla_digits": "০১২৩৪৫৬৭৮৯",
28
+ "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
28
29
  }
29
30
 
30
31
  VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
@@ -53,12 +54,15 @@ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
53
54
  VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
54
55
  VOCABS["vietnamese"] = (
55
56
  VOCABS["english"]
56
- + "áàảạãăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
57
- + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
57
+ + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựiíìỉĩịýỳỷỹỵ"
58
+ + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰIÍÌỈĨỊÝỲỶỸỴ"
58
59
  )
59
60
  VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
60
61
  VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
61
62
  VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
63
+ VOCABS["ukrainian"] = (
64
+ VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
65
+ )
62
66
  VOCABS["multilingual"] = "".join(
63
67
  dict.fromkeys(
64
68
  VOCABS["french"]
@@ -40,6 +40,7 @@ class WILDRECEIPT(AbstractDataset):
40
40
  train: whether the subset should be the training one
41
41
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
42
42
  recognition_task: whether the dataset should be used for recognition task
43
+ detection_task: whether the dataset should be used for detection task
43
44
  **kwargs: keyword arguments from `AbstractDataset`.
44
45
  """
45
46
 
@@ -50,11 +51,19 @@ class WILDRECEIPT(AbstractDataset):
50
51
  train: bool = True,
51
52
  use_polygons: bool = False,
52
53
  recognition_task: bool = False,
54
+ detection_task: bool = False,
53
55
  **kwargs: Any,
54
56
  ) -> None:
55
57
  super().__init__(
56
58
  img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
57
59
  )
60
+ # Task check
61
+ if recognition_task and detection_task:
62
+ raise ValueError(
63
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
64
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
65
+ )
66
+
58
67
  # File existence check
59
68
  if not os.path.exists(label_path) or not os.path.exists(img_folder):
60
69
  raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}")
@@ -62,7 +71,7 @@ class WILDRECEIPT(AbstractDataset):
62
71
  tmp_root = img_folder
63
72
  self.train = train
64
73
  np_dtype = np.float32
65
- self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any]]]] = []
74
+ self.data: List[Tuple[Union[str, Path, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
66
75
 
67
76
  with open(label_path, "r") as file:
68
77
  data = file.read()
@@ -100,6 +109,8 @@ class WILDRECEIPT(AbstractDataset):
100
109
  for crop, label in zip(crops, list(text_targets)):
101
110
  if label and " " not in label:
102
111
  self.data.append((crop, label))
112
+ elif detection_task:
113
+ self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
103
114
  else:
104
115
  self.data.append((
105
116
  img_path,
@@ -35,6 +35,20 @@ else: # pragma: no cover
35
35
  logging.info("Disabling PyTorch because USE_TF is set")
36
36
  _torch_available = False
37
37
 
38
+ # Compatibility fix to make sure tensorflow.keras stays at Keras 2
39
+ if "TF_USE_LEGACY_KERAS" not in os.environ:
40
+ os.environ["TF_USE_LEGACY_KERAS"] = "1"
41
+
42
+ elif os.environ["TF_USE_LEGACY_KERAS"] != "1":
43
+ raise ValueError(
44
+ "docTR is only compatible with Keras 2, but you have explicitly set `TF_USE_LEGACY_KERAS` to `0`. "
45
+ )
46
+
47
+
48
+ def ensure_keras_v2() -> None: # pragma: no cover
49
+ if not os.environ.get("TF_USE_LEGACY_KERAS") == "1":
50
+ os.environ["TF_USE_LEGACY_KERAS"] = "1"
51
+
38
52
 
39
53
  if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
40
54
  _tf_available = importlib.util.find_spec("tensorflow") is not None
@@ -65,6 +79,11 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
65
79
  _tf_available = False
66
80
  else:
67
81
  logging.info(f"TensorFlow version {_tf_version} available.")
82
+ ensure_keras_v2()
83
+ import tensorflow as tf
84
+
85
+ # Enable eager execution - this is required for some models to work properly
86
+ tf.config.run_functions_eagerly(True)
68
87
  else: # pragma: no cover
69
88
  logging.info("Disabling Tensorflow because USE_TORCH is set")
70
89
  _tf_available = False