python-doctr 0.10.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. {python_doctr-0.10.0 → python_doctr-0.12.0}/PKG-INFO +38 -20
  2. {python_doctr-0.10.0 → python_doctr-0.12.0}/README.md +28 -14
  3. python_doctr-0.12.0/doctr/contrib/__init__.py +1 -0
  4. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/contrib/artefacts.py +7 -9
  5. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/contrib/base.py +8 -17
  6. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/__init__.py +1 -0
  7. python_doctr-0.12.0/doctr/datasets/coco_text.py +139 -0
  8. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/cord.py +10 -8
  9. python_doctr-0.12.0/doctr/datasets/datasets/__init__.py +6 -0
  10. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/datasets/base.py +16 -16
  11. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/datasets/pytorch.py +12 -12
  12. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/datasets/tensorflow.py +10 -10
  13. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/detection.py +6 -9
  14. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/doc_artefacts.py +3 -4
  15. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/funsd.py +9 -8
  16. python_doctr-0.12.0/doctr/datasets/generator/__init__.py +6 -0
  17. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/generator/base.py +16 -17
  18. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/generator/pytorch.py +1 -3
  19. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/generator/tensorflow.py +1 -3
  20. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/ic03.py +5 -6
  21. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/ic13.py +6 -6
  22. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/iiit5k.py +10 -6
  23. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/iiithws.py +4 -5
  24. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/imgur5k.py +15 -7
  25. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/loader.py +4 -7
  26. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/mjsynth.py +6 -5
  27. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/ocr.py +3 -4
  28. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/orientation.py +3 -4
  29. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/recognition.py +4 -5
  30. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/sroie.py +6 -5
  31. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/svhn.py +7 -6
  32. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/svt.py +6 -7
  33. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/synthtext.py +19 -7
  34. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/utils.py +41 -35
  35. python_doctr-0.12.0/doctr/datasets/vocabs.py +1140 -0
  36. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/datasets/wildreceipt.py +14 -10
  37. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/file_utils.py +11 -7
  38. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/elements.py +96 -82
  39. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/html.py +1 -3
  40. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/__init__.py +3 -3
  41. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/base.py +2 -5
  42. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/pytorch.py +3 -12
  43. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/image/tensorflow.py +2 -11
  44. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/pdf.py +5 -7
  45. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/reader.py +5 -11
  46. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/_utils.py +15 -23
  47. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/builder.py +30 -48
  48. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/__init__.py +1 -0
  49. {python_doctr-0.10.0/doctr/models/classification/mobilenet → python_doctr-0.12.0/doctr/models/classification/magc_resnet}/__init__.py +3 -3
  50. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/pytorch.py +11 -15
  51. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/tensorflow.py +11 -14
  52. {python_doctr-0.10.0/doctr/models/classification/vgg → python_doctr-0.12.0/doctr/models/classification/mobilenet}/__init__.py +3 -3
  53. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/pytorch.py +20 -18
  54. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/tensorflow.py +19 -23
  55. python_doctr-0.12.0/doctr/models/classification/predictor/__init__.py +6 -0
  56. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/pytorch.py +7 -9
  57. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/tensorflow.py +6 -8
  58. python_doctr-0.12.0/doctr/models/classification/resnet/__init__.py +6 -0
  59. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/pytorch.py +47 -34
  60. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/tensorflow.py +45 -35
  61. {python_doctr-0.10.0/doctr/transforms/functional → python_doctr-0.12.0/doctr/models/classification/textnet}/__init__.py +3 -3
  62. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/pytorch.py +20 -18
  63. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/tensorflow.py +19 -17
  64. python_doctr-0.12.0/doctr/models/classification/vgg/__init__.py +6 -0
  65. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/pytorch.py +21 -8
  66. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/tensorflow.py +20 -14
  67. python_doctr-0.12.0/doctr/models/classification/vip/__init__.py +4 -0
  68. python_doctr-0.12.0/doctr/models/classification/vip/layers/__init__.py +4 -0
  69. python_doctr-0.12.0/doctr/models/classification/vip/layers/pytorch.py +615 -0
  70. python_doctr-0.12.0/doctr/models/classification/vip/pytorch.py +505 -0
  71. python_doctr-0.12.0/doctr/models/classification/vit/__init__.py +6 -0
  72. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vit/pytorch.py +18 -15
  73. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/vit/tensorflow.py +15 -12
  74. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/classification/zoo.py +23 -14
  75. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/core.py +3 -3
  76. python_doctr-0.12.0/doctr/models/detection/_utils/__init__.py +7 -0
  77. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/base.py +4 -7
  78. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/pytorch.py +1 -5
  79. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/tensorflow.py +1 -5
  80. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/core.py +2 -8
  81. python_doctr-0.12.0/doctr/models/detection/differentiable_binarization/__init__.py +6 -0
  82. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/base.py +10 -21
  83. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/pytorch.py +37 -31
  84. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +26 -29
  85. python_doctr-0.12.0/doctr/models/detection/fast/__init__.py +6 -0
  86. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/fast/base.py +8 -17
  87. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/fast/pytorch.py +37 -35
  88. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/fast/tensorflow.py +24 -28
  89. python_doctr-0.12.0/doctr/models/detection/linknet/__init__.py +6 -0
  90. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/base.py +8 -18
  91. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/pytorch.py +34 -28
  92. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/tensorflow.py +24 -25
  93. python_doctr-0.12.0/doctr/models/detection/predictor/__init__.py +6 -0
  94. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/pytorch.py +6 -7
  95. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/tensorflow.py +5 -6
  96. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/zoo.py +27 -7
  97. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/factory/hub.py +6 -10
  98. python_doctr-0.12.0/doctr/models/kie_predictor/__init__.py +6 -0
  99. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/base.py +4 -5
  100. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/pytorch.py +19 -20
  101. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/tensorflow.py +14 -15
  102. python_doctr-0.12.0/doctr/models/modules/layers/__init__.py +6 -0
  103. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/layers/pytorch.py +55 -10
  104. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/layers/tensorflow.py +5 -7
  105. python_doctr-0.12.0/doctr/models/modules/transformer/__init__.py +6 -0
  106. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/pytorch.py +12 -13
  107. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/tensorflow.py +9 -10
  108. python_doctr-0.12.0/doctr/models/modules/vision_transformer/__init__.py +6 -0
  109. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/pytorch.py +2 -3
  110. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/tensorflow.py +3 -3
  111. python_doctr-0.12.0/doctr/models/predictor/__init__.py +6 -0
  112. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/predictor/base.py +28 -29
  113. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/predictor/pytorch.py +13 -14
  114. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/predictor/tensorflow.py +9 -10
  115. python_doctr-0.12.0/doctr/models/preprocessor/__init__.py +6 -0
  116. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/preprocessor/pytorch.py +13 -17
  117. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/preprocessor/tensorflow.py +10 -14
  118. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/__init__.py +1 -0
  119. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/core.py +3 -7
  120. python_doctr-0.12.0/doctr/models/recognition/crnn/__init__.py +6 -0
  121. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/pytorch.py +30 -29
  122. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/tensorflow.py +21 -24
  123. python_doctr-0.12.0/doctr/models/recognition/master/__init__.py +6 -0
  124. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/master/base.py +3 -7
  125. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/master/pytorch.py +32 -25
  126. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/master/tensorflow.py +22 -25
  127. python_doctr-0.12.0/doctr/models/recognition/parseq/__init__.py +6 -0
  128. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/base.py +3 -7
  129. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/pytorch.py +47 -29
  130. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/tensorflow.py +29 -27
  131. python_doctr-0.12.0/doctr/models/recognition/predictor/__init__.py +6 -0
  132. python_doctr-0.12.0/doctr/models/recognition/predictor/_utils.py +145 -0
  133. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/pytorch.py +9 -9
  134. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/tensorflow.py +8 -9
  135. python_doctr-0.12.0/doctr/models/recognition/sar/__init__.py +6 -0
  136. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/pytorch.py +30 -22
  137. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/tensorflow.py +22 -24
  138. python_doctr-0.12.0/doctr/models/recognition/utils.py +93 -0
  139. python_doctr-0.12.0/doctr/models/recognition/viptr/__init__.py +4 -0
  140. python_doctr-0.12.0/doctr/models/recognition/viptr/pytorch.py +277 -0
  141. python_doctr-0.12.0/doctr/models/recognition/vitstr/__init__.py +6 -0
  142. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/base.py +3 -7
  143. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/pytorch.py +28 -21
  144. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/tensorflow.py +22 -23
  145. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/recognition/zoo.py +27 -11
  146. python_doctr-0.12.0/doctr/models/utils/__init__.py +6 -0
  147. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/utils/pytorch.py +41 -34
  148. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/utils/tensorflow.py +31 -23
  149. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/zoo.py +1 -5
  150. python_doctr-0.12.0/doctr/transforms/functional/__init__.py +6 -0
  151. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/functional/base.py +4 -11
  152. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/functional/pytorch.py +20 -28
  153. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/functional/tensorflow.py +10 -22
  154. python_doctr-0.12.0/doctr/transforms/modules/__init__.py +8 -0
  155. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/modules/base.py +48 -55
  156. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/modules/pytorch.py +58 -22
  157. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/modules/tensorflow.py +18 -32
  158. python_doctr-0.12.0/doctr/utils/common_types.py +17 -0
  159. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/data.py +9 -13
  160. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/fonts.py +2 -7
  161. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/geometry.py +17 -48
  162. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/metrics.py +17 -37
  163. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/multithreading.py +4 -6
  164. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/reconstitution.py +9 -13
  165. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/repr.py +2 -3
  166. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/visualization.py +16 -29
  167. python_doctr-0.12.0/doctr/version.py +1 -0
  168. {python_doctr-0.10.0 → python_doctr-0.12.0}/pyproject.toml +9 -6
  169. {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/PKG-INFO +38 -20
  170. {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/SOURCES.txt +7 -0
  171. {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/requires.txt +13 -2
  172. {python_doctr-0.10.0 → python_doctr-0.12.0}/setup.py +2 -2
  173. python_doctr-0.10.0/doctr/contrib/__init__.py +0 -0
  174. python_doctr-0.10.0/doctr/datasets/datasets/__init__.py +0 -6
  175. python_doctr-0.10.0/doctr/datasets/generator/__init__.py +0 -6
  176. python_doctr-0.10.0/doctr/datasets/vocabs.py +0 -82
  177. python_doctr-0.10.0/doctr/models/classification/magc_resnet/__init__.py +0 -6
  178. python_doctr-0.10.0/doctr/models/classification/predictor/__init__.py +0 -6
  179. python_doctr-0.10.0/doctr/models/classification/resnet/__init__.py +0 -6
  180. python_doctr-0.10.0/doctr/models/classification/textnet/__init__.py +0 -6
  181. python_doctr-0.10.0/doctr/models/classification/vit/__init__.py +0 -6
  182. python_doctr-0.10.0/doctr/models/detection/_utils/__init__.py +0 -7
  183. python_doctr-0.10.0/doctr/models/detection/differentiable_binarization/__init__.py +0 -6
  184. python_doctr-0.10.0/doctr/models/detection/fast/__init__.py +0 -6
  185. python_doctr-0.10.0/doctr/models/detection/linknet/__init__.py +0 -6
  186. python_doctr-0.10.0/doctr/models/detection/predictor/__init__.py +0 -6
  187. python_doctr-0.10.0/doctr/models/kie_predictor/__init__.py +0 -6
  188. python_doctr-0.10.0/doctr/models/modules/layers/__init__.py +0 -6
  189. python_doctr-0.10.0/doctr/models/modules/transformer/__init__.py +0 -6
  190. python_doctr-0.10.0/doctr/models/modules/vision_transformer/__init__.py +0 -6
  191. python_doctr-0.10.0/doctr/models/predictor/__init__.py +0 -6
  192. python_doctr-0.10.0/doctr/models/preprocessor/__init__.py +0 -6
  193. python_doctr-0.10.0/doctr/models/recognition/crnn/__init__.py +0 -6
  194. python_doctr-0.10.0/doctr/models/recognition/master/__init__.py +0 -6
  195. python_doctr-0.10.0/doctr/models/recognition/parseq/__init__.py +0 -6
  196. python_doctr-0.10.0/doctr/models/recognition/predictor/__init__.py +0 -6
  197. python_doctr-0.10.0/doctr/models/recognition/predictor/_utils.py +0 -86
  198. python_doctr-0.10.0/doctr/models/recognition/sar/__init__.py +0 -6
  199. python_doctr-0.10.0/doctr/models/recognition/utils.py +0 -89
  200. python_doctr-0.10.0/doctr/models/recognition/vitstr/__init__.py +0 -6
  201. python_doctr-0.10.0/doctr/models/utils/__init__.py +0 -6
  202. python_doctr-0.10.0/doctr/transforms/modules/__init__.py +0 -8
  203. python_doctr-0.10.0/doctr/utils/common_types.py +0 -18
  204. python_doctr-0.10.0/doctr/version.py +0 -1
  205. {python_doctr-0.10.0 → python_doctr-0.12.0}/LICENSE +0 -0
  206. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/__init__.py +0 -0
  207. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/io/__init__.py +0 -0
  208. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/__init__.py +0 -0
  209. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/detection/__init__.py +0 -0
  210. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/factory/__init__.py +0 -0
  211. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/models/modules/__init__.py +0 -0
  212. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/py.typed +0 -0
  213. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/transforms/__init__.py +0 -0
  214. {python_doctr-0.10.0 → python_doctr-0.12.0}/doctr/utils/__init__.py +0 -0
  215. {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/dependency_links.txt +0 -0
  216. {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/top_level.txt +0 -0
  217. {python_doctr-0.10.0 → python_doctr-0.12.0}/python_doctr.egg-info/zip-safe +0 -0
  218. {python_doctr-0.10.0 → python_doctr-0.12.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: python-doctr
3
- Version: 0.10.0
3
+ Version: 0.12.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -219,11 +219,11 @@ Classifier: License :: OSI Approved :: Apache Software License
219
219
  Classifier: Natural Language :: English
220
220
  Classifier: Operating System :: OS Independent
221
221
  Classifier: Programming Language :: Python :: 3
222
- Classifier: Programming Language :: Python :: 3.9
223
222
  Classifier: Programming Language :: Python :: 3.10
224
223
  Classifier: Programming Language :: Python :: 3.11
224
+ Classifier: Programming Language :: Python :: 3.12
225
225
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
- Requires-Python: <4,>=3.9.0
226
+ Requires-Python: <4,>=3.10.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
229
  Requires-Dist: numpy<3.0.0,>=1.16.0
@@ -239,9 +239,11 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
239
239
  Requires-Dist: Pillow>=9.2.0
240
240
  Requires-Dist: defusedxml>=0.7.0
241
241
  Requires-Dist: anyascii>=0.3.2
242
+ Requires-Dist: validators>=0.18.0
242
243
  Requires-Dist: tqdm>=4.30.0
243
244
  Provides-Extra: tf
244
- Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
245
+ Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
246
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "tf"
245
247
  Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
246
248
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
247
249
  Provides-Extra: torch
@@ -275,7 +277,8 @@ Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
275
277
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
276
278
  Requires-Dist: furo>=2022.3.4; extra == "docs"
277
279
  Provides-Extra: dev
278
- Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
280
+ Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "dev"
281
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; sys_platform != "linux" and extra == "dev"
279
282
  Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
280
283
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
281
284
  Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
@@ -300,12 +303,13 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
300
303
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
301
304
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
302
305
  Requires-Dist: furo>=2022.3.4; extra == "dev"
306
+ Dynamic: license-file
303
307
 
304
308
  <p align="center">
305
309
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
306
310
  </p>
307
311
 
308
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
312
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
309
313
 
310
314
 
311
315
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -436,9 +440,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
436
440
 
437
441
  ## Installation
438
442
 
443
+ > [!WARNING]
444
+ > **TensorFlow Backend Deprecation Notice**
445
+ >
446
+ > Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
447
+ > We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
448
+ > Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
449
+ >
450
+ > This decision was made based on several considerations:
451
+ >
452
+ > - Allows better focus on improving the core library
453
+ > - Frees up resources to develop new features faster
454
+ > - Enables more targeted optimizations with PyTorch
455
+
439
456
  ### Prerequisites
440
457
 
441
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
458
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
442
459
 
443
460
  ### Latest release
444
461
 
@@ -502,6 +519,7 @@ Credits where it's due: this repository is implementing, among others, architect
502
519
  - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
503
520
  - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
504
521
  - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
522
+ - VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
505
523
 
506
524
  ## More goodies
507
525
 
@@ -557,37 +575,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
557
575
 
558
576
  ### Docker container
559
577
 
560
- [We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
578
+ We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
561
579
 
562
580
  #### Using GPU with docTR Docker Images
563
581
 
564
- The docTR Docker images are GPU-ready and based on CUDA `11.8`.
565
- However, to use GPU support with these Docker images, please ensure that Docker is configured to use your GPU.
582
+ The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
583
+ Please ensure that Docker is configured to use your GPU.
566
584
 
567
585
  To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
568
586
 
569
587
  Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
570
588
 
571
589
  ```shell
572
- docker run -it --gpus all ghcr.io/mindee/doctr:tf-py3.8.18-gpu-2023-09 bash
590
+ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
573
591
  ```
574
592
 
575
593
  #### Available Tags
576
594
 
577
- The Docker images for docTR follow a specific tag nomenclature: `<framework>-py<python_version>-<system>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
595
+ The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
578
596
 
579
- - `<framework>`: `tf` (TensorFlow) or `torch` (PyTorch).
580
- - `<python_version>`: `3.8.18`, `3.9.18`, or `3.10.13`.
581
- - `<system>`: `cpu` or `gpu`
582
- - `<doctr_version>`: a tag >= `v0.7.1`
583
- - `<YYYY-MM>`: e.g. `2023-09`
597
+ - `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
598
+ - `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
599
+ - `<doctr_version>`: a tag >= `v0.11.0`
600
+ - `<YYYY-MM>`: e.g. `2014-10`
584
601
 
585
602
  Here are examples of different image tags:
586
603
 
587
604
  | Tag | Description |
588
605
  |----------------------------|---------------------------------------------------|
589
- | `tf-py3.8.18-cpu-v0.7.1` | TensorFlow version `3.8.18` with docTR `v0.7.1`. |
590
- | `torch-py3.9.18-gpu-2023-09`| PyTorch version `3.9.18` with GPU support and a monthly build from `2023-09`. |
606
+ | `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
607
+ | `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
608
+ | `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
591
609
 
592
610
  #### Building Docker Images Locally
593
611
 
@@ -2,7 +2,7 @@
2
2
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
3
3
  </p>
4
4
 
5
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
5
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
6
6
 
7
7
 
8
8
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -133,9 +133,22 @@ The KIE predictor results per page are in a dictionary format with each key repr
133
133
 
134
134
  ## Installation
135
135
 
136
+ > [!WARNING]
137
+ > **TensorFlow Backend Deprecation Notice**
138
+ >
139
+ > Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
140
+ > We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
141
+ > Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
142
+ >
143
+ > This decision was made based on several considerations:
144
+ >
145
+ > - Allows better focus on improving the core library
146
+ > - Frees up resources to develop new features faster
147
+ > - Enables more targeted optimizations with PyTorch
148
+
136
149
  ### Prerequisites
137
150
 
138
- Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
151
+ Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
139
152
 
140
153
  ### Latest release
141
154
 
@@ -199,6 +212,7 @@ Credits where it's due: this repository is implementing, among others, architect
199
212
  - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
200
213
  - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
201
214
  - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
215
+ - VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
202
216
 
203
217
  ## More goodies
204
218
 
@@ -254,37 +268,37 @@ Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to
254
268
 
255
269
  ### Docker container
256
270
 
257
- [We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
271
+ We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr).
258
272
 
259
273
  #### Using GPU with docTR Docker Images
260
274
 
261
- The docTR Docker images are GPU-ready and based on CUDA `11.8`.
262
- However, to use GPU support with these Docker images, please ensure that Docker is configured to use your GPU.
275
+ The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch or TensorFlow won't be able to initialize the GPU.
276
+ Please ensure that Docker is configured to use your GPU.
263
277
 
264
278
  To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
265
279
 
266
280
  Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support:
267
281
 
268
282
  ```shell
269
- docker run -it --gpus all ghcr.io/mindee/doctr:tf-py3.8.18-gpu-2023-09 bash
283
+ docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash
270
284
  ```
271
285
 
272
286
  #### Available Tags
273
287
 
274
- The Docker images for docTR follow a specific tag nomenclature: `<framework>-py<python_version>-<system>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
288
+ The Docker images for docTR follow a specific tag nomenclature: `<deps>-py<python_version>-<doctr_version|YYYY-MM>`. Here's a breakdown of the tag structure:
275
289
 
276
- - `<framework>`: `tf` (TensorFlow) or `torch` (PyTorch).
277
- - `<python_version>`: `3.8.18`, `3.9.18`, or `3.10.13`.
278
- - `<system>`: `cpu` or `gpu`
279
- - `<doctr_version>`: a tag >= `v0.7.1`
280
- - `<YYYY-MM>`: e.g. `2023-09`
290
+ - `<deps>`: `tf`, `torch`, `tf-viz-html-contrib` or `torch-viz-html-contrib`.
291
+ - `<python_version>`: `3.9.18`, `3.10.13` or `3.11.8`.
292
+ - `<doctr_version>`: a tag >= `v0.11.0`
293
+ - `<YYYY-MM>`: e.g. `2014-10`
281
294
 
282
295
  Here are examples of different image tags:
283
296
 
284
297
  | Tag | Description |
285
298
  |----------------------------|---------------------------------------------------|
286
- | `tf-py3.8.18-cpu-v0.7.1` | TensorFlow version `3.8.18` with docTR `v0.7.1`. |
287
- | `torch-py3.9.18-gpu-2023-09`| PyTorch version `3.9.18` with GPU support and a monthly build from `2023-09`. |
299
+ | `tf-py3.10.13-v0.11.0` | TensorFlow version `3.10.13` with docTR `v0.11.0`. |
300
+ | `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. |
301
+ | `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. |
288
302
 
289
303
  #### Building Docker Images Locally
290
304
 
@@ -0,0 +1 @@
1
+ from .artefacts import ArtefactDetector
@@ -1,9 +1,9 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, Dict, List, Optional, Tuple
6
+ from typing import Any
7
7
 
8
8
  import cv2
9
9
  import numpy as np
@@ -14,7 +14,7 @@ from .base import _BasePredictor
14
14
 
15
15
  __all__ = ["ArtefactDetector"]
16
16
 
17
- default_cfgs: Dict[str, Dict[str, Any]] = {
17
+ default_cfgs: dict[str, dict[str, Any]] = {
18
18
  "yolov8_artefact": {
19
19
  "input_shape": (3, 1024, 1024),
20
20
  "labels": ["bar_code", "qr_code", "logo", "photo"],
@@ -34,7 +34,6 @@ class ArtefactDetector(_BasePredictor):
34
34
  >>> results = detector(doc)
35
35
 
36
36
  Args:
37
- ----
38
37
  arch: the architecture to use
39
38
  batch_size: the batch size to use
40
39
  model_path: the path to the model to use
@@ -50,9 +49,9 @@ class ArtefactDetector(_BasePredictor):
50
49
  self,
51
50
  arch: str = "yolov8_artefact",
52
51
  batch_size: int = 2,
53
- model_path: Optional[str] = None,
54
- labels: Optional[List[str]] = None,
55
- input_shape: Optional[Tuple[int, int, int]] = None,
52
+ model_path: str | None = None,
53
+ labels: list[str] | None = None,
54
+ input_shape: tuple[int, int, int] | None = None,
56
55
  conf_threshold: float = 0.5,
57
56
  iou_threshold: float = 0.5,
58
57
  **kwargs: Any,
@@ -66,7 +65,7 @@ class ArtefactDetector(_BasePredictor):
66
65
  def preprocess(self, img: np.ndarray) -> np.ndarray:
67
66
  return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
68
67
 
69
- def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
68
+ def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
70
69
  results = []
71
70
 
72
71
  for batch in zip(output, input_images):
@@ -109,7 +108,6 @@ class ArtefactDetector(_BasePredictor):
109
108
  Display the results
110
109
 
111
110
  Args:
112
- ----
113
111
  **kwargs: additional keyword arguments to be passed to `plt.show`
114
112
  """
115
113
  requires_package("matplotlib", "`.show()` requires matplotlib installed")
@@ -1,9 +1,9 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
- from typing import Any, List, Optional
6
+ from typing import Any
7
7
 
8
8
  import numpy as np
9
9
 
@@ -16,32 +16,29 @@ class _BasePredictor:
16
16
  Base class for all predictors
17
17
 
18
18
  Args:
19
- ----
20
19
  batch_size: the batch size to use
21
20
  url: the url to use to download a model if needed
22
21
  model_path: the path to the model to use
23
22
  **kwargs: additional arguments to be passed to `download_from_url`
24
23
  """
25
24
 
26
- def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
25
+ def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
27
26
  self.batch_size = batch_size
28
27
  self.session = self._init_model(url, model_path, **kwargs)
29
28
 
30
- self._inputs: List[np.ndarray] = []
31
- self._results: List[Any] = []
29
+ self._inputs: list[np.ndarray] = []
30
+ self._results: list[Any] = []
32
31
 
33
- def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
32
+ def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
34
33
  """
35
34
  Download the model from the given url if needed
36
35
 
37
36
  Args:
38
- ----
39
37
  url: the url to use
40
38
  model_path: the path to the model to use
41
39
  **kwargs: additional arguments to be passed to `download_from_url`
42
40
 
43
41
  Returns:
44
- -------
45
42
  Any: the ONNX loaded model
46
43
  """
47
44
  requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
@@ -57,40 +54,34 @@ class _BasePredictor:
57
54
  Preprocess the input image
58
55
 
59
56
  Args:
60
- ----
61
57
  img: the input image to preprocess
62
58
 
63
59
  Returns:
64
- -------
65
60
  np.ndarray: the preprocessed image
66
61
  """
67
62
  raise NotImplementedError
68
63
 
69
- def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
64
+ def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
70
65
  """
71
66
  Postprocess the model output
72
67
 
73
68
  Args:
74
- ----
75
69
  output: the model output to postprocess
76
70
  input_images: the input images used to generate the output
77
71
 
78
72
  Returns:
79
- -------
80
73
  Any: the postprocessed output
81
74
  """
82
75
  raise NotImplementedError
83
76
 
84
- def __call__(self, inputs: List[np.ndarray]) -> Any:
77
+ def __call__(self, inputs: list[np.ndarray]) -> Any:
85
78
  """
86
79
  Call the model on the given inputs
87
80
 
88
81
  Args:
89
- ----
90
82
  inputs: the inputs to use
91
83
 
92
84
  Returns:
93
- -------
94
85
  Any: the postprocessed output
95
86
  """
96
87
  self._inputs = inputs
@@ -1,6 +1,7 @@
1
1
  from doctr.file_utils import is_tf_available
2
2
 
3
3
  from .generator import *
4
+ from .coco_text import *
4
5
  from .cord import *
5
6
  from .detection import *
6
7
  from .doc_artefacts import *
@@ -0,0 +1,139 @@
1
+ # Copyright (C) 2021-2025, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import numpy as np
12
+ from tqdm import tqdm
13
+
14
+ from .datasets import AbstractDataset
15
+ from .utils import convert_target_to_relative, crop_bboxes_from_image
16
+
17
+ __all__ = ["COCOTEXT"]
18
+
19
+
20
+ class COCOTEXT(AbstractDataset):
21
+ """
22
+ COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
23
+ <https://arxiv.org/pdf/1601.07140v2>`_ |
24
+ `"homepage" <https://bgshih.github.io/cocotext/>`_.
25
+
26
+ >>> # NOTE: You need to download the dataset first.
27
+ >>> from doctr.datasets import COCOTEXT
28
+ >>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
29
+ >>> label_path="/path/to/coco_text/cocotext.v2.json")
30
+ >>> img, target = train_set[0]
31
+ >>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
32
+ >>> label_path = "/path/to/coco_text/cocotext.v2.json")
33
+ >>> img, target = test_set[0]
34
+
35
+ Args:
36
+ img_folder: folder with all the images of the dataset
37
+ label_path: path to the annotations file of the dataset
38
+ train: whether the subset should be the training one
39
+ use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
40
+ recognition_task: whether the dataset should be used for recognition task
41
+ detection_task: whether the dataset should be used for detection task
42
+ **kwargs: keyword arguments from `AbstractDataset`.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ img_folder: str,
48
+ label_path: str,
49
+ train: bool = True,
50
+ use_polygons: bool = False,
51
+ recognition_task: bool = False,
52
+ detection_task: bool = False,
53
+ **kwargs: Any,
54
+ ) -> None:
55
+ super().__init__(
56
+ img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
57
+ )
58
+ # Task check
59
+ if recognition_task and detection_task:
60
+ raise ValueError(
61
+ " 'recognition' and 'detection task' cannot be set to True simultaneously. "
62
+ + " To get the whole dataset with boxes and labels leave both parameters to False "
63
+ )
64
+
65
+ # File existence check
66
+ if not os.path.exists(label_path) or not os.path.exists(img_folder):
67
+ raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}")
68
+
69
+ tmp_root = img_folder
70
+ self.train = train
71
+ np_dtype = np.float32
72
+ self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
73
+
74
+ with open(label_path, "r") as file:
75
+ data = json.load(file)
76
+
77
+ # Filter images based on the set
78
+ img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train]
79
+ box: list[float] | np.ndarray
80
+
81
+ for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)):
82
+ img_path = os.path.join(img_folder, img_info["file_name"])
83
+
84
+ # File existence check
85
+ if not os.path.exists(img_path): # pragma: no cover
86
+ raise FileNotFoundError(f"Unable to locate {img_path}")
87
+
88
+ # Get annotations for the current image (only legible text)
89
+ annotations = [
90
+ ann
91
+ for ann in data["anns"].values()
92
+ if ann["image_id"] == int(img_id) and ann["legibility"] == "legible"
93
+ ]
94
+
95
+ # Some images have no annotations with readable text
96
+ if not annotations: # pragma: no cover
97
+ continue
98
+
99
+ _targets = []
100
+
101
+ for annotation in annotations:
102
+ x, y, w, h = annotation["bbox"]
103
+ if use_polygons:
104
+ # (x, y) coordinates of top left, top right, bottom right, bottom left corners
105
+ box = np.array(
106
+ [
107
+ [x, y],
108
+ [x + w, y],
109
+ [x + w, y + h],
110
+ [x, y + h],
111
+ ],
112
+ dtype=np_dtype,
113
+ )
114
+ else:
115
+ # (xmin, ymin, xmax, ymax) coordinates
116
+ box = [x, y, x + w, y + h]
117
+ _targets.append((annotation["utf8_string"], box))
118
+ text_targets, box_targets = zip(*_targets)
119
+
120
+ if recognition_task:
121
+ crops = crop_bboxes_from_image(
122
+ img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
123
+ )
124
+ for crop, label in zip(crops, list(text_targets)):
125
+ if label and " " not in label:
126
+ self.data.append((crop, label))
127
+
128
+ elif detection_task:
129
+ self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
130
+ else:
131
+ self.data.append((
132
+ img_path,
133
+ dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
134
+ ))
135
+
136
+ self.root = tmp_root
137
+
138
+ def extra_repr(self) -> str:
139
+ return f"train={self.train}"
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2021-2024, Mindee.
1
+ # Copyright (C) 2021-2025, Mindee.
2
2
 
3
3
  # This program is licensed under the Apache License 2.0.
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
@@ -6,7 +6,7 @@
6
6
  import json
7
7
  import os
8
8
  from pathlib import Path
9
- from typing import Any, Dict, List, Tuple, Union
9
+ from typing import Any
10
10
 
11
11
  import numpy as np
12
12
  from tqdm import tqdm
@@ -29,7 +29,6 @@ class CORD(VisionDataset):
29
29
  >>> img, target = train_set[0]
30
30
 
31
31
  Args:
32
- ----
33
32
  train: whether the subset should be the training one
34
33
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
34
  recognition_task: whether the dataset should be used for recognition task
@@ -72,12 +71,14 @@ class CORD(VisionDataset):
72
71
  + "To get the whole dataset with boxes and labels leave both parameters to False."
73
72
  )
74
73
 
75
- # List images
74
+ # list images
76
75
  tmp_root = os.path.join(self.root, "image")
77
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
76
+ self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
78
77
  self.train = train
79
78
  np_dtype = np.float32
80
- for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
79
+ for img_path in tqdm(
80
+ iterable=os.listdir(tmp_root), desc="Preparing and Loading CORD", total=len(os.listdir(tmp_root))
81
+ ):
81
82
  # File existence check
82
83
  if not os.path.exists(os.path.join(tmp_root, img_path)):
83
84
  raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}")
@@ -91,7 +92,7 @@ class CORD(VisionDataset):
91
92
  if len(word["text"]) > 0:
92
93
  x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"]
93
94
  y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"]
94
- box: Union[List[float], np.ndarray]
95
+ box: list[float] | np.ndarray
95
96
  if use_polygons:
96
97
  # (x, y) coordinates of top left, top right, bottom right, bottom left corners
97
98
  box = np.array(
@@ -115,7 +116,8 @@ class CORD(VisionDataset):
115
116
  img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
116
117
  )
117
118
  for crop, label in zip(crops, list(text_targets)):
118
- self.data.append((crop, label))
119
+ if " " not in label:
120
+ self.data.append((crop, label))
119
121
  elif detection_task:
120
122
  self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
121
123
  else:
@@ -0,0 +1,6 @@
1
+ from doctr.file_utils import is_tf_available, is_torch_available
2
+
3
+ if is_torch_available():
4
+ from .pytorch import *
5
+ elif is_tf_available():
6
+ from .tensorflow import * # type: ignore[assignment]