python-doctr 0.11.0__tar.gz → 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. {python_doctr-0.11.0 → python_doctr-0.12.0}/PKG-INFO +19 -3
  2. {python_doctr-0.11.0 → python_doctr-0.12.0}/README.md +15 -1
  3. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/__init__.py +1 -0
  4. python_doctr-0.12.0/doctr/datasets/coco_text.py +139 -0
  5. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/cord.py +2 -1
  6. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/funsd.py +2 -2
  7. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/ic03.py +1 -1
  8. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/ic13.py +2 -1
  9. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/iiit5k.py +4 -1
  10. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/imgur5k.py +9 -2
  11. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/loader.py +1 -1
  12. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/ocr.py +1 -1
  13. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/recognition.py +1 -1
  14. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/svhn.py +1 -1
  15. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/svt.py +2 -2
  16. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/synthtext.py +15 -2
  17. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/utils.py +7 -6
  18. python_doctr-0.12.0/doctr/datasets/vocabs.py +1140 -0
  19. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/file_utils.py +9 -0
  20. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/elements.py +37 -3
  21. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/_utils.py +1 -1
  22. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/__init__.py +1 -0
  23. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/pytorch.py +1 -2
  24. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/tensorflow.py +3 -3
  25. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/pytorch.py +15 -1
  26. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/tensorflow.py +11 -2
  27. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/pytorch.py +1 -1
  28. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/pytorch.py +26 -3
  29. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/tensorflow.py +25 -4
  30. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/pytorch.py +10 -1
  31. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/tensorflow.py +11 -2
  32. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/pytorch.py +16 -1
  33. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/tensorflow.py +11 -2
  34. python_doctr-0.12.0/doctr/models/classification/vip/__init__.py +4 -0
  35. python_doctr-0.12.0/doctr/models/classification/vip/layers/__init__.py +4 -0
  36. python_doctr-0.12.0/doctr/models/classification/vip/layers/pytorch.py +615 -0
  37. python_doctr-0.12.0/doctr/models/classification/vip/pytorch.py +505 -0
  38. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vit/pytorch.py +10 -1
  39. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vit/tensorflow.py +9 -0
  40. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/zoo.py +4 -0
  41. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/base.py +3 -4
  42. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/pytorch.py +10 -1
  43. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +11 -4
  44. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/base.py +2 -3
  45. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/pytorch.py +13 -4
  46. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/tensorflow.py +10 -2
  47. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/base.py +2 -3
  48. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/pytorch.py +10 -1
  49. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/tensorflow.py +10 -2
  50. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/factory/hub.py +3 -3
  51. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/pytorch.py +1 -1
  52. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/tensorflow.py +1 -1
  53. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/layers/pytorch.py +49 -1
  54. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/pytorch.py +1 -1
  55. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/tensorflow.py +1 -1
  56. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/__init__.py +1 -0
  57. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/pytorch.py +10 -1
  58. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/tensorflow.py +10 -1
  59. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/pytorch.py +10 -1
  60. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/tensorflow.py +10 -3
  61. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/pytorch.py +23 -5
  62. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/tensorflow.py +13 -5
  63. python_doctr-0.12.0/doctr/models/recognition/predictor/_utils.py +145 -0
  64. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/pytorch.py +3 -3
  65. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/tensorflow.py +3 -3
  66. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/pytorch.py +10 -1
  67. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/tensorflow.py +10 -3
  68. python_doctr-0.12.0/doctr/models/recognition/utils.py +93 -0
  69. python_doctr-0.12.0/doctr/models/recognition/viptr/__init__.py +4 -0
  70. python_doctr-0.12.0/doctr/models/recognition/viptr/pytorch.py +277 -0
  71. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/pytorch.py +10 -1
  72. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/tensorflow.py +10 -3
  73. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/zoo.py +5 -0
  74. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/utils/pytorch.py +28 -18
  75. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/utils/tensorflow.py +15 -8
  76. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/data.py +1 -1
  77. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/geometry.py +1 -1
  78. python_doctr-0.12.0/doctr/version.py +1 -0
  79. {python_doctr-0.11.0 → python_doctr-0.12.0}/pyproject.toml +1 -0
  80. {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/PKG-INFO +19 -3
  81. {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/SOURCES.txt +7 -0
  82. {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/requires.txt +1 -0
  83. {python_doctr-0.11.0 → python_doctr-0.12.0}/setup.py +1 -1
  84. python_doctr-0.11.0/doctr/datasets/vocabs.py +0 -92
  85. python_doctr-0.11.0/doctr/models/recognition/predictor/_utils.py +0 -83
  86. python_doctr-0.11.0/doctr/models/recognition/utils.py +0 -84
  87. python_doctr-0.11.0/doctr/version.py +0 -1
  88. {python_doctr-0.11.0 → python_doctr-0.12.0}/LICENSE +0 -0
  89. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/__init__.py +0 -0
  90. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/contrib/__init__.py +0 -0
  91. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/contrib/artefacts.py +0 -0
  92. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/contrib/base.py +0 -0
  93. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/__init__.py +0 -0
  94. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/base.py +0 -0
  95. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/pytorch.py +0 -0
  96. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/datasets/tensorflow.py +0 -0
  97. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/detection.py +0 -0
  98. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/doc_artefacts.py +0 -0
  99. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/__init__.py +0 -0
  100. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/base.py +0 -0
  101. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/pytorch.py +0 -0
  102. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/generator/tensorflow.py +0 -0
  103. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/iiithws.py +0 -0
  104. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/mjsynth.py +0 -0
  105. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/orientation.py +0 -0
  106. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/sroie.py +0 -0
  107. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/datasets/wildreceipt.py +0 -0
  108. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/__init__.py +0 -0
  109. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/html.py +0 -0
  110. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/__init__.py +0 -0
  111. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/base.py +0 -0
  112. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/pytorch.py +0 -0
  113. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/image/tensorflow.py +0 -0
  114. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/pdf.py +0 -0
  115. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/io/reader.py +0 -0
  116. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/__init__.py +0 -0
  117. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/builder.py +0 -0
  118. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
  119. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
  120. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/__init__.py +0 -0
  121. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/predictor/tensorflow.py +0 -0
  122. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/resnet/__init__.py +0 -0
  123. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/textnet/__init__.py +0 -0
  124. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vgg/__init__.py +0 -0
  125. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/classification/vit/__init__.py +0 -0
  126. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/core.py +0 -0
  127. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/__init__.py +0 -0
  128. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/__init__.py +0 -0
  129. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/base.py +0 -0
  130. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/pytorch.py +0 -0
  131. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
  132. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/core.py +0 -0
  133. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
  134. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/fast/__init__.py +0 -0
  135. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/linknet/__init__.py +0 -0
  136. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/__init__.py +0 -0
  137. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/pytorch.py +0 -0
  138. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/predictor/tensorflow.py +0 -0
  139. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/detection/zoo.py +0 -0
  140. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/factory/__init__.py +0 -0
  141. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/__init__.py +0 -0
  142. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/kie_predictor/base.py +0 -0
  143. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/__init__.py +0 -0
  144. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/layers/__init__.py +0 -0
  145. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/layers/tensorflow.py +0 -0
  146. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/__init__.py +0 -0
  147. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/pytorch.py +0 -0
  148. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/transformer/tensorflow.py +0 -0
  149. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/__init__.py +0 -0
  150. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/pytorch.py +0 -0
  151. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/modules/vision_transformer/tensorflow.py +0 -0
  152. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/__init__.py +0 -0
  153. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/predictor/base.py +0 -0
  154. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/preprocessor/__init__.py +0 -0
  155. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/preprocessor/pytorch.py +0 -0
  156. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/preprocessor/tensorflow.py +0 -0
  157. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/core.py +0 -0
  158. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/crnn/__init__.py +0 -0
  159. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/__init__.py +0 -0
  160. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/master/base.py +0 -0
  161. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/__init__.py +0 -0
  162. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/parseq/base.py +0 -0
  163. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/predictor/__init__.py +0 -0
  164. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/sar/__init__.py +0 -0
  165. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/__init__.py +0 -0
  166. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/recognition/vitstr/base.py +0 -0
  167. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/utils/__init__.py +0 -0
  168. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/models/zoo.py +0 -0
  169. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/py.typed +0 -0
  170. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/__init__.py +0 -0
  171. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/__init__.py +0 -0
  172. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/base.py +0 -0
  173. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/pytorch.py +0 -0
  174. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/functional/tensorflow.py +0 -0
  175. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/__init__.py +0 -0
  176. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/base.py +0 -0
  177. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/pytorch.py +0 -0
  178. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/transforms/modules/tensorflow.py +0 -0
  179. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/__init__.py +0 -0
  180. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/common_types.py +0 -0
  181. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/fonts.py +0 -0
  182. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/metrics.py +0 -0
  183. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/multithreading.py +0 -0
  184. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/reconstitution.py +0 -0
  185. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/repr.py +0 -0
  186. {python_doctr-0.11.0 → python_doctr-0.12.0}/doctr/utils/visualization.py +0 -0
  187. {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/dependency_links.txt +0 -0
  188. {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/top_level.txt +0 -0
  189. {python_doctr-0.11.0 → python_doctr-0.12.0}/python_doctr.egg-info/zip-safe +0 -0
  190. {python_doctr-0.11.0 → python_doctr-0.12.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: python-doctr
3
- Version: 0.11.0
3
+ Version: 0.12.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -239,6 +239,7 @@ Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
239
239
  Requires-Dist: Pillow>=9.2.0
240
240
  Requires-Dist: defusedxml>=0.7.0
241
241
  Requires-Dist: anyascii>=0.3.2
242
+ Requires-Dist: validators>=0.18.0
242
243
  Requires-Dist: tqdm>=4.30.0
243
244
  Provides-Extra: tf
244
245
  Requires-Dist: tensorflow[and-cuda]<3.0.0,>=2.15.0; sys_platform == "linux" and extra == "tf"
@@ -302,12 +303,13 @@ Requires-Dist: recommonmark>=0.7.1; extra == "dev"
302
303
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
303
304
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
304
305
  Requires-Dist: furo>=2022.3.4; extra == "dev"
306
+ Dynamic: license-file
305
307
 
306
308
  <p align="center">
307
309
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
308
310
  </p>
309
311
 
310
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.11.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
312
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
311
313
 
312
314
 
313
315
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -438,6 +440,19 @@ The KIE predictor results per page are in a dictionary format with each key repr
438
440
 
439
441
  ## Installation
440
442
 
443
+ > [!WARNING]
444
+ > **TensorFlow Backend Deprecation Notice**
445
+ >
446
+ > Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
447
+ > We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
448
+ > Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
449
+ >
450
+ > This decision was made based on several considerations:
451
+ >
452
+ > - Allows better focus on improving the core library
453
+ > - Frees up resources to develop new features faster
454
+ > - Enables more targeted optimizations with PyTorch
455
+
441
456
  ### Prerequisites
442
457
 
443
458
  Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
@@ -504,6 +519,7 @@ Credits where it's due: this repository is implementing, among others, architect
504
519
  - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
505
520
  - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
506
521
  - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
522
+ - VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
507
523
 
508
524
  ## More goodies
509
525
 
@@ -2,7 +2,7 @@
2
2
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
3
3
  </p>
4
4
 
5
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.11.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
5
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.12.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr)
6
6
 
7
7
 
8
8
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -133,6 +133,19 @@ The KIE predictor results per page are in a dictionary format with each key repr
133
133
 
134
134
  ## Installation
135
135
 
136
+ > [!WARNING]
137
+ > **TensorFlow Backend Deprecation Notice**
138
+ >
139
+ > Using docTR with TensorFlow as a backend is deprecated and will be removed in the next major release (v1.0.0).
140
+ > We **recommend switching to the PyTorch backend**, which is more actively maintained and supports the latest features and models.
141
+ > Alternatively, you can use [OnnxTR](https://github.com/felixdittrich92/OnnxTR), which does **not** require TensorFlow or PyTorch.
142
+ >
143
+ > This decision was made based on several considerations:
144
+ >
145
+ > - Allows better focus on improving the core library
146
+ > - Frees up resources to develop new features faster
147
+ > - Enables more targeted optimizations with PyTorch
148
+
136
149
  ### Prerequisites
137
150
 
138
151
  Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
@@ -199,6 +212,7 @@ Credits where it's due: this repository is implementing, among others, architect
199
212
  - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
200
213
  - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
201
214
  - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
215
+ - VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
202
216
 
203
217
  ## More goodies
204
218
 
@@ -1,6 +1,7 @@
1
1
  from doctr.file_utils import is_tf_available
2
2
 
3
3
  from .generator import *
4
+ from .coco_text import *
4
5
  from .cord import *
5
6
  from .detection import *
6
7
  from .doc_artefacts import *
@@ -0,0 +1,139 @@
1
+ # Copyright (C) 2021-2025, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import numpy as np
12
+ from tqdm import tqdm
13
+
14
+ from .datasets import AbstractDataset
15
+ from .utils import convert_target_to_relative, crop_bboxes_from_image
16
+
17
+ __all__ = ["COCOTEXT"]
18
+
19
+
20
+ class COCOTEXT(AbstractDataset):
21
+ """
22
+ COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images"
23
+ <https://arxiv.org/pdf/1601.07140v2>`_ |
24
+ `"homepage" <https://bgshih.github.io/cocotext/>`_.
25
+
26
+ >>> # NOTE: You need to download the dataset first.
27
+ >>> from doctr.datasets import COCOTEXT
28
+ >>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/",
29
+ >>> label_path="/path/to/coco_text/cocotext.v2.json")
30
+ >>> img, target = train_set[0]
31
+ >>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/",
32
+ >>> label_path = "/path/to/coco_text/cocotext.v2.json")
33
+ >>> img, target = test_set[0]
34
+
35
+ Args:
36
+ img_folder: folder with all the images of the dataset
37
+ label_path: path to the annotations file of the dataset
38
+ train: whether the subset should be the training one
39
+ use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
40
+ recognition_task: whether the dataset should be used for recognition task
41
+ detection_task: whether the dataset should be used for detection task
42
+ **kwargs: keyword arguments from `AbstractDataset`.
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ img_folder: str,
48
+ label_path: str,
49
+ train: bool = True,
50
+ use_polygons: bool = False,
51
+ recognition_task: bool = False,
52
+ detection_task: bool = False,
53
+ **kwargs: Any,
54
+ ) -> None:
55
+ super().__init__(
56
+ img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs
57
+ )
58
+ # Task check
59
+ if recognition_task and detection_task:
60
+ raise ValueError(
61
+ " 'recognition' and 'detection task' cannot be set to True simultaneously. "
62
+ + " To get the whole dataset with boxes and labels leave both parameters to False "
63
+ )
64
+
65
+ # File existence check
66
+ if not os.path.exists(label_path) or not os.path.exists(img_folder):
67
+ raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}")
68
+
69
+ tmp_root = img_folder
70
+ self.train = train
71
+ np_dtype = np.float32
72
+ self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = []
73
+
74
+ with open(label_path, "r") as file:
75
+ data = json.load(file)
76
+
77
+ # Filter images based on the set
78
+ img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train]
79
+ box: list[float] | np.ndarray
80
+
81
+ for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)):
82
+ img_path = os.path.join(img_folder, img_info["file_name"])
83
+
84
+ # File existence check
85
+ if not os.path.exists(img_path): # pragma: no cover
86
+ raise FileNotFoundError(f"Unable to locate {img_path}")
87
+
88
+ # Get annotations for the current image (only legible text)
89
+ annotations = [
90
+ ann
91
+ for ann in data["anns"].values()
92
+ if ann["image_id"] == int(img_id) and ann["legibility"] == "legible"
93
+ ]
94
+
95
+ # Some images have no annotations with readable text
96
+ if not annotations: # pragma: no cover
97
+ continue
98
+
99
+ _targets = []
100
+
101
+ for annotation in annotations:
102
+ x, y, w, h = annotation["bbox"]
103
+ if use_polygons:
104
+ # (x, y) coordinates of top left, top right, bottom right, bottom left corners
105
+ box = np.array(
106
+ [
107
+ [x, y],
108
+ [x + w, y],
109
+ [x + w, y + h],
110
+ [x, y + h],
111
+ ],
112
+ dtype=np_dtype,
113
+ )
114
+ else:
115
+ # (xmin, ymin, xmax, ymax) coordinates
116
+ box = [x, y, x + w, y + h]
117
+ _targets.append((annotation["utf8_string"], box))
118
+ text_targets, box_targets = zip(*_targets)
119
+
120
+ if recognition_task:
121
+ crops = crop_bboxes_from_image(
122
+ img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
123
+ )
124
+ for crop, label in zip(crops, list(text_targets)):
125
+ if label and " " not in label:
126
+ self.data.append((crop, label))
127
+
128
+ elif detection_task:
129
+ self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
130
+ else:
131
+ self.data.append((
132
+ img_path,
133
+ dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)),
134
+ ))
135
+
136
+ self.root = tmp_root
137
+
138
+ def extra_repr(self) -> str:
139
+ return f"train={self.train}"
@@ -116,7 +116,8 @@ class CORD(VisionDataset):
116
116
  img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0)
117
117
  )
118
118
  for crop, label in zip(crops, list(text_targets)):
119
- self.data.append((crop, label))
119
+ if " " not in label:
120
+ self.data.append((crop, label))
120
121
  elif detection_task:
121
122
  self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
122
123
  else:
@@ -107,8 +107,8 @@ class FUNSD(VisionDataset):
107
107
  )
108
108
  for crop, label in zip(crops, list(text_targets)):
109
109
  # filter labels with unknown characters
110
- if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
111
- self.data.append((crop, label))
110
+ if not any(char in label for char in ["☑", "☐", "\u03bf", "\uf703", "\uf702", " "]):
111
+ self.data.append((crop, label.replace("–", "-")))
112
112
  elif detection_task:
113
113
  self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
114
114
  else:
@@ -122,7 +122,7 @@ class IC03(VisionDataset):
122
122
  if recognition_task:
123
123
  crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes)
124
124
  for crop, label in zip(crops, labels):
125
- if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
125
+ if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
126
126
  self.data.append((crop, label))
127
127
  elif detection_task:
128
128
  self.data.append((name.text, boxes))
@@ -100,7 +100,8 @@ class IC13(AbstractDataset):
100
100
  if recognition_task:
101
101
  crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets)
102
102
  for crop, label in zip(crops, labels):
103
- self.data.append((crop, label))
103
+ if " " not in label:
104
+ self.data.append((crop, label))
104
105
  elif detection_task:
105
106
  self.data.append((img_path, box_targets))
106
107
  else:
@@ -8,6 +8,7 @@ from typing import Any
8
8
 
9
9
  import numpy as np
10
10
  import scipy.io as sio
11
+ from PIL import Image
11
12
  from tqdm import tqdm
12
13
 
13
14
  from .datasets import VisionDataset
@@ -98,7 +99,9 @@ class IIIT5K(VisionDataset):
98
99
  box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets]
99
100
 
100
101
  if recognition_task:
101
- self.data.append((_raw_path, _raw_label))
102
+ if " " not in _raw_label:
103
+ with Image.open(os.path.join(tmp_root, _raw_path)) as pil_img:
104
+ self.data.append((np.array(pil_img.convert("RGB")), _raw_label))
102
105
  elif detection_task:
103
106
  self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype)))
104
107
  else:
@@ -133,7 +133,13 @@ class IMGUR5K(AbstractDataset):
133
133
  img_path=os.path.join(self.root, img_name), geoms=np.asarray(box_targets, dtype=np_dtype)
134
134
  )
135
135
  for crop, label in zip(crops, labels):
136
- if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
136
+ if (
137
+ crop.shape[0] > 0
138
+ and crop.shape[1] > 0
139
+ and len(label) > 0
140
+ and len(label) < 30
141
+ and " " not in label
142
+ ):
137
143
  # write data to disk
138
144
  with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f:
139
145
  f.write(label)
@@ -152,6 +158,7 @@ class IMGUR5K(AbstractDataset):
152
158
  return f"train={self.train}"
153
159
 
154
160
  def _read_from_folder(self, path: str) -> None:
155
- for img_path in glob.glob(os.path.join(path, "*.png")):
161
+ img_paths = glob.glob(os.path.join(path, "*.png"))
162
+ for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading IMGUR5K", total=len(img_paths)):
156
163
  with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f:
157
164
  self.data.append((img_path, f.read()))
@@ -19,7 +19,7 @@ def default_collate(samples):
19
19
  samples: list of N tuples containing M elements
20
20
 
21
21
  Returns:
22
- tuple of M sequences contianing N elements each
22
+ tuple of M sequences containing N elements each
23
23
  """
24
24
  batch_data = zip(*samples)
25
25
 
@@ -40,7 +40,7 @@ class OCRDataset(AbstractDataset):
40
40
  super().__init__(img_folder, **kwargs)
41
41
 
42
42
  # List images
43
- self.data: list[tuple[str, dict[str, Any]]] = []
43
+ self.data: list[tuple[Path, dict[str, Any]]] = []
44
44
  np_dtype = np.float32
45
45
  with open(label_file, "rb") as f:
46
46
  data = json.load(f)
@@ -23,7 +23,7 @@ class RecognitionDataset(AbstractDataset):
23
23
 
24
24
  Args:
25
25
  img_folder: path to the images folder
26
- labels_path: pathe to the json file containing all labels (character sequences)
26
+ labels_path: path to the json file containing all labels (character sequences)
27
27
  **kwargs: keyword arguments from `AbstractDataset`.
28
28
  """
29
29
 
@@ -129,7 +129,7 @@ class SVHN(VisionDataset):
129
129
  if recognition_task:
130
130
  crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_name), geoms=box_targets)
131
131
  for crop, label in zip(crops, label_targets):
132
- if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
132
+ if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
133
133
  self.data.append((crop, label))
134
134
  elif detection_task:
135
135
  self.data.append((img_name, box_targets))
@@ -35,7 +35,7 @@ class SVT(VisionDataset):
35
35
  **kwargs: keyword arguments from `VisionDataset`.
36
36
  """
37
37
 
38
- URL = "http://vision.ucsd.edu/~kai/svt/svt.zip"
38
+ URL = "http://www.iapr-tc11.org/dataset/SVT/svt.zip"
39
39
  SHA256 = "63b3d55e6b6d1e036e2a844a20c034fe3af3c32e4d914d6e0c4a3cd43df3bebf"
40
40
 
41
41
  def __init__(
@@ -113,7 +113,7 @@ class SVT(VisionDataset):
113
113
  if recognition_task:
114
114
  crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes)
115
115
  for crop, label in zip(crops, labels):
116
- if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
116
+ if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label:
117
117
  self.data.append((crop, label))
118
118
  elif detection_task:
119
119
  self.data.append((name.text, boxes))
@@ -41,6 +41,12 @@ class SynthText(VisionDataset):
41
41
  URL = "https://thor.robots.ox.ac.uk/~vgg/data/scenetext/SynthText.zip"
42
42
  SHA256 = "28ab030485ec8df3ed612c568dd71fb2793b9afbfa3a9d9c6e792aef33265bf1"
43
43
 
44
+ # filter corrupted or missing images
45
+ BLACKLIST = (
46
+ "67/fruits_129_",
47
+ "194/window_19_",
48
+ )
49
+
44
50
  def __init__(
45
51
  self,
46
52
  train: bool = True,
@@ -111,7 +117,13 @@ class SynthText(VisionDataset):
111
117
  if recognition_task:
112
118
  crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_path[0]), geoms=word_boxes)
113
119
  for crop, label in zip(crops, labels):
114
- if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0:
120
+ if (
121
+ crop.shape[0] > 0
122
+ and crop.shape[1] > 0
123
+ and len(label) > 0
124
+ and len(label) < 30
125
+ and " " not in label
126
+ ):
115
127
  # write data to disk
116
128
  with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f:
117
129
  f.write(label)
@@ -132,6 +144,7 @@ class SynthText(VisionDataset):
132
144
  return f"train={self.train}"
133
145
 
134
146
  def _read_from_folder(self, path: str) -> None:
135
- for img_path in glob.glob(os.path.join(path, "*.png")):
147
+ img_paths = glob.glob(os.path.join(path, "*.png"))
148
+ for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading SynthText", total=len(img_paths)):
136
149
  with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f:
137
150
  self.data.append((img_path, f.read()))
@@ -48,7 +48,7 @@ def translate(
48
48
  A string translated in a given vocab
49
49
  """
50
50
  if VOCABS.get(vocab_name) is None:
51
- raise KeyError("output vocabulary must be in vocabs dictionnary")
51
+ raise KeyError("output vocabulary must be in vocabs dictionary")
52
52
 
53
53
  translated = ""
54
54
  for char in input_string:
@@ -81,11 +81,12 @@ def encode_string(
81
81
  """
82
82
  try:
83
83
  return list(map(vocab.index, input_string))
84
- except ValueError:
84
+ except ValueError as e:
85
+ missing_chars = [char for char in input_string if char not in vocab]
85
86
  raise ValueError(
86
- f"some characters cannot be found in 'vocab'. \
87
- Please check the input string {input_string} and the vocabulary {vocab}"
88
- )
87
+ f"Some characters cannot be found in 'vocab': {set(missing_chars)}.\n"
88
+ f"Please check the input string `{input_string}` and the vocabulary `{vocab}`"
89
+ ) from e
89
90
 
90
91
 
91
92
  def decode_sequence(
@@ -199,7 +200,7 @@ def crop_bboxes_from_image(img_path: str | Path, geoms: np.ndarray) -> list[np.n
199
200
  a list of cropped images
200
201
  """
201
202
  with Image.open(img_path) as pil_img:
202
- img: np.ndarray = np.array(pil_img.convert("RGB"))
203
+ img: np.ndarray = np.asarray(pil_img.convert("RGB"))
203
204
  # Polygon
204
205
  if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
205
206
  return extract_rcrops(img, geoms.astype(dtype=int))