python-doctr 0.8.1__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. {python-doctr-0.8.1/python_doctr.egg-info → python_doctr-0.10.0}/PKG-INFO +43 -39
  2. {python-doctr-0.8.1 → python_doctr-0.10.0}/README.md +14 -16
  3. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/__init__.py +1 -1
  4. python_doctr-0.10.0/doctr/contrib/__init__.py +0 -0
  5. python_doctr-0.10.0/doctr/contrib/artefacts.py +131 -0
  6. python_doctr-0.10.0/doctr/contrib/base.py +105 -0
  7. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/cord.py +10 -1
  8. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/datasets/pytorch.py +2 -2
  9. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/funsd.py +11 -1
  10. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/generator/base.py +6 -5
  11. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/ic03.py +11 -1
  12. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/ic13.py +10 -1
  13. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/iiit5k.py +26 -16
  14. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/imgur5k.py +11 -2
  15. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/loader.py +1 -6
  16. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/sroie.py +11 -1
  17. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/svhn.py +11 -1
  18. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/svt.py +11 -1
  19. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/synthtext.py +11 -1
  20. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/utils.py +9 -3
  21. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/vocabs.py +15 -4
  22. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/wildreceipt.py +12 -1
  23. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/file_utils.py +45 -12
  24. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/elements.py +52 -10
  25. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/html.py +2 -2
  26. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/image/pytorch.py +6 -8
  27. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/image/tensorflow.py +1 -1
  28. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/pdf.py +5 -2
  29. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/reader.py +6 -0
  30. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/__init__.py +0 -1
  31. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/_utils.py +57 -20
  32. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/builder.py +73 -15
  33. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/tensorflow.py +13 -6
  34. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/pytorch.py +47 -9
  35. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/tensorflow.py +51 -14
  36. python_doctr-0.10.0/doctr/models/classification/predictor/pytorch.py +67 -0
  37. python_doctr-0.10.0/doctr/models/classification/predictor/tensorflow.py +62 -0
  38. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/resnet/tensorflow.py +21 -8
  39. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/textnet/pytorch.py +3 -3
  40. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/textnet/tensorflow.py +11 -5
  41. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/vgg/tensorflow.py +9 -3
  42. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/vit/tensorflow.py +10 -4
  43. python_doctr-0.10.0/doctr/models/classification/zoo.py +110 -0
  44. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/_utils/__init__.py +1 -0
  45. python_doctr-0.10.0/doctr/models/detection/_utils/base.py +66 -0
  46. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/base.py +4 -3
  47. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
  48. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +34 -12
  49. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/fast/base.py +6 -5
  50. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/fast/pytorch.py +4 -4
  51. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/fast/tensorflow.py +15 -12
  52. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/linknet/base.py +4 -3
  53. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/linknet/tensorflow.py +23 -11
  54. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/predictor/pytorch.py +15 -1
  55. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/predictor/tensorflow.py +17 -3
  56. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/zoo.py +7 -2
  57. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/factory/hub.py +8 -18
  58. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/kie_predictor/base.py +13 -3
  59. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/kie_predictor/pytorch.py +45 -20
  60. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/kie_predictor/tensorflow.py +44 -17
  61. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/layers/pytorch.py +2 -3
  62. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/layers/tensorflow.py +6 -8
  63. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/transformer/pytorch.py +2 -2
  64. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/transformer/tensorflow.py +0 -2
  65. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/pytorch.py +1 -1
  66. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/tensorflow.py +1 -1
  67. python_doctr-0.10.0/doctr/models/predictor/base.py +200 -0
  68. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/predictor/pytorch.py +35 -20
  69. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/predictor/tensorflow.py +35 -18
  70. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/preprocessor/pytorch.py +4 -4
  71. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/preprocessor/tensorflow.py +3 -2
  72. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/crnn/tensorflow.py +8 -6
  73. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/master/pytorch.py +2 -2
  74. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/master/tensorflow.py +9 -4
  75. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/parseq/pytorch.py +4 -3
  76. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/parseq/tensorflow.py +14 -11
  77. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/sar/pytorch.py +7 -6
  78. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/sar/tensorflow.py +10 -12
  79. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/pytorch.py +1 -1
  80. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/tensorflow.py +9 -4
  81. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/zoo.py +1 -1
  82. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/utils/pytorch.py +1 -1
  83. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/utils/tensorflow.py +15 -15
  84. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/zoo.py +2 -2
  85. python_doctr-0.10.0/doctr/py.typed +0 -0
  86. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/functional/base.py +1 -1
  87. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/functional/pytorch.py +5 -5
  88. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/modules/base.py +37 -15
  89. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/modules/pytorch.py +73 -14
  90. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/modules/tensorflow.py +78 -19
  91. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/fonts.py +7 -5
  92. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/geometry.py +141 -31
  93. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/metrics.py +34 -175
  94. python_doctr-0.10.0/doctr/utils/reconstitution.py +212 -0
  95. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/visualization.py +5 -118
  96. python_doctr-0.10.0/doctr/version.py +1 -0
  97. {python-doctr-0.8.1 → python_doctr-0.10.0}/pyproject.toml +48 -32
  98. {python-doctr-0.8.1 → python_doctr-0.10.0/python_doctr.egg-info}/PKG-INFO +43 -39
  99. {python-doctr-0.8.1 → python_doctr-0.10.0}/python_doctr.egg-info/SOURCES.txt +6 -6
  100. {python-doctr-0.8.1 → python_doctr-0.10.0}/python_doctr.egg-info/requires.txt +29 -20
  101. {python-doctr-0.8.1 → python_doctr-0.10.0}/setup.py +1 -1
  102. python-doctr-0.8.1/doctr/models/artefacts/__init__.py +0 -2
  103. python-doctr-0.8.1/doctr/models/artefacts/barcode.py +0 -74
  104. python-doctr-0.8.1/doctr/models/artefacts/face.py +0 -63
  105. python-doctr-0.8.1/doctr/models/classification/predictor/pytorch.py +0 -56
  106. python-doctr-0.8.1/doctr/models/classification/predictor/tensorflow.py +0 -52
  107. python-doctr-0.8.1/doctr/models/classification/zoo.py +0 -74
  108. python-doctr-0.8.1/doctr/models/obj_detection/__init__.py +0 -1
  109. python-doctr-0.8.1/doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
  110. python-doctr-0.8.1/doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
  111. python-doctr-0.8.1/doctr/models/predictor/base.py +0 -161
  112. python-doctr-0.8.1/doctr/version.py +0 -1
  113. {python-doctr-0.8.1 → python_doctr-0.10.0}/LICENSE +0 -0
  114. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/__init__.py +0 -0
  115. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/datasets/__init__.py +0 -0
  116. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/datasets/base.py +0 -0
  117. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/datasets/tensorflow.py +0 -0
  118. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/detection.py +0 -0
  119. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/doc_artefacts.py +0 -0
  120. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/generator/__init__.py +0 -0
  121. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/generator/pytorch.py +0 -0
  122. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/generator/tensorflow.py +0 -0
  123. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/iiithws.py +0 -0
  124. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/mjsynth.py +0 -0
  125. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/ocr.py +0 -0
  126. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/orientation.py +0 -0
  127. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/datasets/recognition.py +0 -0
  128. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/__init__.py +0 -0
  129. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/image/__init__.py +0 -0
  130. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/io/image/base.py +0 -0
  131. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/__init__.py +0 -0
  132. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
  133. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/magc_resnet/pytorch.py +0 -0
  134. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
  135. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/predictor/__init__.py +0 -0
  136. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/resnet/__init__.py +0 -0
  137. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/resnet/pytorch.py +0 -0
  138. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/textnet/__init__.py +0 -0
  139. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/vgg/__init__.py +0 -0
  140. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/vgg/pytorch.py +0 -0
  141. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/vit/__init__.py +0 -0
  142. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/classification/vit/pytorch.py +0 -0
  143. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/core.py +0 -0
  144. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/__init__.py +0 -0
  145. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/_utils/pytorch.py +0 -0
  146. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
  147. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/core.py +0 -0
  148. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
  149. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/fast/__init__.py +0 -0
  150. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/linknet/__init__.py +0 -0
  151. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/linknet/pytorch.py +0 -0
  152. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/detection/predictor/__init__.py +0 -0
  153. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/factory/__init__.py +0 -0
  154. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/kie_predictor/__init__.py +0 -0
  155. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/__init__.py +0 -0
  156. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/layers/__init__.py +0 -0
  157. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/transformer/__init__.py +0 -0
  158. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/modules/vision_transformer/__init__.py +0 -0
  159. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/predictor/__init__.py +0 -0
  160. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/preprocessor/__init__.py +0 -0
  161. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/__init__.py +0 -0
  162. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/core.py +0 -0
  163. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/crnn/__init__.py +0 -0
  164. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/crnn/pytorch.py +0 -0
  165. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/master/__init__.py +0 -0
  166. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/master/base.py +0 -0
  167. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/parseq/__init__.py +0 -0
  168. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/parseq/base.py +0 -0
  169. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/predictor/__init__.py +0 -0
  170. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/predictor/_utils.py +0 -0
  171. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/predictor/pytorch.py +0 -0
  172. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/predictor/tensorflow.py +0 -0
  173. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/sar/__init__.py +0 -0
  174. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/utils.py +0 -0
  175. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/__init__.py +0 -0
  176. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/recognition/vitstr/base.py +0 -0
  177. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/models/utils/__init__.py +0 -0
  178. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/__init__.py +0 -0
  179. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/functional/__init__.py +0 -0
  180. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/functional/tensorflow.py +0 -0
  181. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/transforms/modules/__init__.py +0 -0
  182. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/__init__.py +0 -0
  183. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/common_types.py +0 -0
  184. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/data.py +0 -0
  185. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/multithreading.py +0 -0
  186. {python-doctr-0.8.1 → python_doctr-0.10.0}/doctr/utils/repr.py +0 -0
  187. {python-doctr-0.8.1 → python_doctr-0.10.0}/python_doctr.egg-info/dependency_links.txt +0 -0
  188. {python-doctr-0.8.1 → python_doctr-0.10.0}/python_doctr.egg-info/top_level.txt +0 -0
  189. {python-doctr-0.8.1 → python_doctr-0.10.0}/python_doctr.egg-info/zip-safe +0 -0
  190. {python-doctr-0.8.1 → python_doctr-0.10.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-doctr
3
- Version: 0.8.1
3
+ Version: 0.10.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -219,42 +219,45 @@ Classifier: License :: OSI Approved :: Apache Software License
219
219
  Classifier: Natural Language :: English
220
220
  Classifier: Operating System :: OS Independent
221
221
  Classifier: Programming Language :: Python :: 3
222
- Classifier: Programming Language :: Python :: 3.8
223
222
  Classifier: Programming Language :: Python :: 3.9
224
223
  Classifier: Programming Language :: Python :: 3.10
224
+ Classifier: Programming Language :: Python :: 3.11
225
225
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
- Requires-Python: <4,>=3.8.0
226
+ Requires-Python: <4,>=3.9.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
- Requires-Dist: importlib_metadata
230
- Requires-Dist: numpy<2.0.0,>=1.16.0
229
+ Requires-Dist: numpy<3.0.0,>=1.16.0
231
230
  Requires-Dist: scipy<2.0.0,>=1.4.0
232
231
  Requires-Dist: h5py<4.0.0,>=3.1.0
233
232
  Requires-Dist: opencv-python<5.0.0,>=4.5.0
234
- Requires-Dist: pypdfium2<5.0.0,>=4.0.0
233
+ Requires-Dist: pypdfium2<5.0.0,>=4.11.0
235
234
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
236
235
  Requires-Dist: shapely<3.0.0,>=1.6.0
237
236
  Requires-Dist: langdetect<2.0.0,>=1.0.9
238
237
  Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
239
238
  Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
240
- Requires-Dist: matplotlib>=3.1.0
241
- Requires-Dist: weasyprint>=55.0
242
239
  Requires-Dist: Pillow>=9.2.0
243
240
  Requires-Dist: defusedxml>=0.7.0
244
- Requires-Dist: mplcursors>=0.3
245
- Requires-Dist: unidecode>=1.0.0
241
+ Requires-Dist: anyascii>=0.3.2
246
242
  Requires-Dist: tqdm>=4.30.0
247
243
  Provides-Extra: tf
248
- Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "tf"
244
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "tf"
245
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "tf"
249
246
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "tf"
250
247
  Provides-Extra: torch
251
- Requires-Dist: torch<3.0.0,>=1.12.0; extra == "torch"
252
- Requires-Dist: torchvision>=0.13.0; extra == "torch"
248
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "torch"
249
+ Requires-Dist: torchvision>=0.15.0; extra == "torch"
253
250
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
251
+ Provides-Extra: html
252
+ Requires-Dist: weasyprint>=55.0; extra == "html"
253
+ Provides-Extra: viz
254
+ Requires-Dist: matplotlib>=3.1.0; extra == "viz"
255
+ Requires-Dist: mplcursors>=0.3; extra == "viz"
256
+ Provides-Extra: contrib
257
+ Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
254
258
  Provides-Extra: testing
255
259
  Requires-Dist: pytest>=5.3.2; extra == "testing"
256
260
  Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
257
- Requires-Dist: hdf5storage>=0.1.18; extra == "testing"
258
261
  Requires-Dist: onnxruntime>=1.11.0; extra == "testing"
259
262
  Requires-Dist: requests>=2.20.0; extra == "testing"
260
263
  Requires-Dist: psutil>=5.9.5; extra == "testing"
@@ -266,30 +269,33 @@ Provides-Extra: docs
266
269
  Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
267
270
  Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
268
271
  Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
269
- Requires-Dist: docutils<0.21; extra == "docs"
272
+ Requires-Dist: docutils<0.22; extra == "docs"
270
273
  Requires-Dist: recommonmark>=0.7.1; extra == "docs"
271
274
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
272
275
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
273
276
  Requires-Dist: furo>=2022.3.4; extra == "docs"
274
277
  Provides-Extra: dev
275
- Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "dev"
278
+ Requires-Dist: tensorflow<3.0.0,>=2.15.0; extra == "dev"
279
+ Requires-Dist: tf-keras<3.0.0,>=2.15.0; extra == "dev"
276
280
  Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
277
- Requires-Dist: torch<3.0.0,>=1.12.0; extra == "dev"
278
- Requires-Dist: torchvision>=0.13.0; extra == "dev"
281
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == "dev"
282
+ Requires-Dist: torchvision>=0.15.0; extra == "dev"
279
283
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
284
+ Requires-Dist: weasyprint>=55.0; extra == "dev"
285
+ Requires-Dist: matplotlib>=3.1.0; extra == "dev"
286
+ Requires-Dist: mplcursors>=0.3; extra == "dev"
280
287
  Requires-Dist: pytest>=5.3.2; extra == "dev"
281
288
  Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
282
- Requires-Dist: hdf5storage>=0.1.18; extra == "dev"
283
289
  Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
284
290
  Requires-Dist: requests>=2.20.0; extra == "dev"
285
291
  Requires-Dist: psutil>=5.9.5; extra == "dev"
286
- Requires-Dist: ruff>=0.1.5; extra == "dev"
287
- Requires-Dist: mypy>=0.812; extra == "dev"
288
- Requires-Dist: pre-commit>=2.17.0; extra == "dev"
292
+ Requires-Dist: ruff>=0.3.0; extra == "dev"
293
+ Requires-Dist: mypy>=1.0; extra == "dev"
294
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
289
295
  Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
290
296
  Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
291
297
  Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
292
- Requires-Dist: docutils<0.21; extra == "dev"
298
+ Requires-Dist: docutils<0.22; extra == "dev"
293
299
  Requires-Dist: recommonmark>=0.7.1; extra == "dev"
294
300
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
295
301
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
@@ -299,7 +305,7 @@ Requires-Dist: furo>=2022.3.4; extra == "dev"
299
305
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
300
306
  </p>
301
307
 
302
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
308
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
303
309
 
304
310
 
305
311
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -334,7 +340,7 @@ from doctr.io import DocumentFile
334
340
  pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
335
341
  # Image
336
342
  single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
337
- # Webpage
343
+ # Webpage (requires `weasyprint` to be installed)
338
344
  webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
339
345
  # Multiple page images
340
346
  multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -372,6 +378,7 @@ If both options are set to False, the predictor will always fit and return rotat
372
378
  To interpret your model's predictions, you can visualize them interactively as follows:
373
379
 
374
380
  ```python
381
+ # Display the result (requires matplotlib & mplcursors to be installed)
375
382
  result.show()
376
383
  ```
377
384
 
@@ -431,17 +438,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
431
438
 
432
439
  ### Prerequisites
433
440
 
434
- Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
435
-
436
- Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
437
-
438
- For MacOS users, you can install them as follows:
439
-
440
- ```shell
441
- brew install cairo pango gdk-pixbuf libffi
442
- ```
443
-
444
- For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
441
+ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
445
442
 
446
443
  ### Latest release
447
444
 
@@ -460,12 +457,14 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
460
457
  pip install "python-doctr[tf]"
461
458
  # for PyTorch
462
459
  pip install "python-doctr[torch]"
460
+ # optional dependencies for visualization, html, and contrib modules can be installed as follows:
461
+ pip install "python-doctr[torch,viz,html,contib]"
463
462
  ```
464
463
 
465
464
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
466
465
 
467
466
  - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
468
- - PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
467
+ - PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
469
468
 
470
469
  ### Developer mode
471
470
 
@@ -647,9 +646,14 @@ Your API should now be running locally on your port 8002. Access your automatica
647
646
 
648
647
  ```python
649
648
  import requests
649
+
650
+ params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
651
+
650
652
  with open('/path/to/your/doc.jpg', 'rb') as f:
651
- data = f.read()
652
- response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
653
+ files = [ # application/pdf, image/jpeg, image/png supported
654
+ ("files", ("doc.jpg", f.read(), "image/jpeg")),
655
+ ]
656
+ print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
653
657
  ```
654
658
 
655
659
  ### Example notebooks
@@ -2,7 +2,7 @@
2
2
  <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
3
3
  </p>
4
4
 
5
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.8.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
5
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
6
6
 
7
7
 
8
8
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -37,7 +37,7 @@ from doctr.io import DocumentFile
37
37
  pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
38
38
  # Image
39
39
  single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
40
- # Webpage
40
+ # Webpage (requires `weasyprint` to be installed)
41
41
  webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
42
42
  # Multiple page images
43
43
  multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -75,6 +75,7 @@ If both options are set to False, the predictor will always fit and return rotat
75
75
  To interpret your model's predictions, you can visualize them interactively as follows:
76
76
 
77
77
  ```python
78
+ # Display the result (requires matplotlib & mplcursors to be installed)
78
79
  result.show()
79
80
  ```
80
81
 
@@ -134,17 +135,7 @@ The KIE predictor results per page are in a dictionary format with each key repr
134
135
 
135
136
  ### Prerequisites
136
137
 
137
- Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
138
-
139
- Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
140
-
141
- For MacOS users, you can install them as follows:
142
-
143
- ```shell
144
- brew install cairo pango gdk-pixbuf libffi
145
- ```
146
-
147
- For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
138
+ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
148
139
 
149
140
  ### Latest release
150
141
 
@@ -163,12 +154,14 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
163
154
  pip install "python-doctr[tf]"
164
155
  # for PyTorch
165
156
  pip install "python-doctr[torch]"
157
+ # optional dependencies for visualization, html, and contrib modules can be installed as follows:
158
+ pip install "python-doctr[torch,viz,html,contib]"
166
159
  ```
167
160
 
168
161
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
169
162
 
170
163
  - TensorFlow 2: [metal plugin](https://developer.apple.com/metal/tensorflow-plugin/)
171
- - PyTorch: [version >= 1.12.0](https://pytorch.org/get-started/locally/#start-locally)
164
+ - PyTorch: [version >= 2.0.0](https://pytorch.org/get-started/locally/#start-locally)
172
165
 
173
166
  ### Developer mode
174
167
 
@@ -350,9 +343,14 @@ Your API should now be running locally on your port 8002. Access your automatica
350
343
 
351
344
  ```python
352
345
  import requests
346
+
347
+ params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
348
+
353
349
  with open('/path/to/your/doc.jpg', 'rb') as f:
354
- data = f.read()
355
- response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
350
+ files = [ # application/pdf, image/jpeg, image/png supported
351
+ ("files", ("doc.jpg", f.read(), "image/jpeg")),
352
+ ]
353
+ print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
356
354
  ```
357
355
 
358
356
  ### Example notebooks
@@ -1,3 +1,3 @@
1
- from . import io, models, datasets, transforms, utils
1
+ from . import io, models, datasets, contrib, transforms, utils
2
2
  from .file_utils import is_tf_available, is_torch_available
3
3
  from .version import __version__ # noqa: F401
File without changes
@@ -0,0 +1,131 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ from typing import Any, Dict, List, Optional, Tuple
7
+
8
+ import cv2
9
+ import numpy as np
10
+
11
+ from doctr.file_utils import requires_package
12
+
13
+ from .base import _BasePredictor
14
+
15
+ __all__ = ["ArtefactDetector"]
16
+
17
+ default_cfgs: Dict[str, Dict[str, Any]] = {
18
+ "yolov8_artefact": {
19
+ "input_shape": (3, 1024, 1024),
20
+ "labels": ["bar_code", "qr_code", "logo", "photo"],
21
+ "url": "https://doctr-static.mindee.com/models?id=v0.8.1/yolo_artefact-f9d66f14.onnx&src=0",
22
+ },
23
+ }
24
+
25
+
26
+ class ArtefactDetector(_BasePredictor):
27
+ """
28
+ A class to detect artefacts in images
29
+
30
+ >>> from doctr.io import DocumentFile
31
+ >>> from doctr.contrib.artefacts import ArtefactDetector
32
+ >>> doc = DocumentFile.from_images(["path/to/image.jpg"])
33
+ >>> detector = ArtefactDetector()
34
+ >>> results = detector(doc)
35
+
36
+ Args:
37
+ ----
38
+ arch: the architecture to use
39
+ batch_size: the batch size to use
40
+ model_path: the path to the model to use
41
+ labels: the labels to use
42
+ input_shape: the input shape to use
43
+ mask_labels: the mask labels to use
44
+ conf_threshold: the confidence threshold to use
45
+ iou_threshold: the intersection over union threshold to use
46
+ **kwargs: additional arguments to be passed to `download_from_url`
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ arch: str = "yolov8_artefact",
52
+ batch_size: int = 2,
53
+ model_path: Optional[str] = None,
54
+ labels: Optional[List[str]] = None,
55
+ input_shape: Optional[Tuple[int, int, int]] = None,
56
+ conf_threshold: float = 0.5,
57
+ iou_threshold: float = 0.5,
58
+ **kwargs: Any,
59
+ ) -> None:
60
+ super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
61
+ self.labels = labels or default_cfgs[arch]["labels"]
62
+ self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
63
+ self.conf_threshold = conf_threshold
64
+ self.iou_threshold = iou_threshold
65
+
66
+ def preprocess(self, img: np.ndarray) -> np.ndarray:
67
+ return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
68
+
69
+ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
70
+ results = []
71
+
72
+ for batch in zip(output, input_images):
73
+ for out, img in zip(batch[0], batch[1]):
74
+ org_height, org_width = img.shape[:2]
75
+ width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
76
+ for res in out:
77
+ sample_results = []
78
+ for row in np.transpose(np.squeeze(res)):
79
+ classes_scores = row[4:]
80
+ max_score = np.amax(classes_scores)
81
+ if max_score >= self.conf_threshold:
82
+ class_id = np.argmax(classes_scores)
83
+ x, y, w, h = row[0], row[1], row[2], row[3]
84
+ # to rescaled xmin, ymin, xmax, ymax
85
+ xmin = int((x - w / 2) * width_scale)
86
+ ymin = int((y - h / 2) * height_scale)
87
+ xmax = int((x + w / 2) * width_scale)
88
+ ymax = int((y + h / 2) * height_scale)
89
+
90
+ sample_results.append({
91
+ "label": self.labels[class_id],
92
+ "confidence": float(max_score),
93
+ "box": [xmin, ymin, xmax, ymax],
94
+ })
95
+
96
+ # Filter out overlapping boxes
97
+ boxes = [res["box"] for res in sample_results]
98
+ scores = [res["confidence"] for res in sample_results]
99
+ keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold) # type: ignore[arg-type]
100
+ sample_results = [sample_results[i] for i in keep_indices]
101
+
102
+ results.append(sample_results)
103
+
104
+ self._results = results
105
+ return results
106
+
107
+ def show(self, **kwargs: Any) -> None:
108
+ """
109
+ Display the results
110
+
111
+ Args:
112
+ ----
113
+ **kwargs: additional keyword arguments to be passed to `plt.show`
114
+ """
115
+ requires_package("matplotlib", "`.show()` requires matplotlib installed")
116
+ import matplotlib.pyplot as plt
117
+ from matplotlib.patches import Rectangle
118
+
119
+ # visualize the results with matplotlib
120
+ if self._results and self._inputs:
121
+ for img, res in zip(self._inputs, self._results):
122
+ plt.figure(figsize=(10, 10))
123
+ plt.imshow(img)
124
+ for obj in res:
125
+ xmin, ymin, xmax, ymax = obj["box"]
126
+ label = obj["label"]
127
+ plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
128
+ plt.gca().add_patch(
129
+ Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
130
+ )
131
+ plt.show(**kwargs)
@@ -0,0 +1,105 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ from typing import Any, List, Optional
7
+
8
+ import numpy as np
9
+
10
+ from doctr.file_utils import requires_package
11
+ from doctr.utils.data import download_from_url
12
+
13
+
14
+ class _BasePredictor:
15
+ """
16
+ Base class for all predictors
17
+
18
+ Args:
19
+ ----
20
+ batch_size: the batch size to use
21
+ url: the url to use to download a model if needed
22
+ model_path: the path to the model to use
23
+ **kwargs: additional arguments to be passed to `download_from_url`
24
+ """
25
+
26
+ def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
27
+ self.batch_size = batch_size
28
+ self.session = self._init_model(url, model_path, **kwargs)
29
+
30
+ self._inputs: List[np.ndarray] = []
31
+ self._results: List[Any] = []
32
+
33
+ def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
34
+ """
35
+ Download the model from the given url if needed
36
+
37
+ Args:
38
+ ----
39
+ url: the url to use
40
+ model_path: the path to the model to use
41
+ **kwargs: additional arguments to be passed to `download_from_url`
42
+
43
+ Returns:
44
+ -------
45
+ Any: the ONNX loaded model
46
+ """
47
+ requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
48
+ import onnxruntime as ort
49
+
50
+ if not url and not model_path:
51
+ raise ValueError("You must provide either a url or a model_path")
52
+ onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs)) # type: ignore[arg-type]
53
+ return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
54
+
55
+ def preprocess(self, img: np.ndarray) -> np.ndarray:
56
+ """
57
+ Preprocess the input image
58
+
59
+ Args:
60
+ ----
61
+ img: the input image to preprocess
62
+
63
+ Returns:
64
+ -------
65
+ np.ndarray: the preprocessed image
66
+ """
67
+ raise NotImplementedError
68
+
69
+ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
70
+ """
71
+ Postprocess the model output
72
+
73
+ Args:
74
+ ----
75
+ output: the model output to postprocess
76
+ input_images: the input images used to generate the output
77
+
78
+ Returns:
79
+ -------
80
+ Any: the postprocessed output
81
+ """
82
+ raise NotImplementedError
83
+
84
+ def __call__(self, inputs: List[np.ndarray]) -> Any:
85
+ """
86
+ Call the model on the given inputs
87
+
88
+ Args:
89
+ ----
90
+ inputs: the inputs to use
91
+
92
+ Returns:
93
+ -------
94
+ Any: the postprocessed output
95
+ """
96
+ self._inputs = inputs
97
+ model_inputs = self.session.get_inputs()
98
+
99
+ batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
100
+ processed_batches = [
101
+ np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
102
+ ]
103
+
104
+ outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
105
+ return self.postprocess(outputs, batched_inputs)
@@ -33,6 +33,7 @@ class CORD(VisionDataset):
33
33
  train: whether the subset should be the training one
34
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
35
  recognition_task: whether the dataset should be used for recognition task
36
+ detection_task: whether the dataset should be used for detection task
36
37
  **kwargs: keyword arguments from `VisionDataset`.
37
38
  """
38
39
 
@@ -53,6 +54,7 @@ class CORD(VisionDataset):
53
54
  train: bool = True,
54
55
  use_polygons: bool = False,
55
56
  recognition_task: bool = False,
57
+ detection_task: bool = False,
56
58
  **kwargs: Any,
57
59
  ) -> None:
58
60
  url, sha256, name = self.TRAIN if train else self.TEST
@@ -64,10 +66,15 @@ class CORD(VisionDataset):
64
66
  pre_transforms=convert_target_to_relative if not recognition_task else None,
65
67
  **kwargs,
66
68
  )
69
+ if recognition_task and detection_task:
70
+ raise ValueError(
71
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
72
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
73
+ )
67
74
 
68
75
  # List images
69
76
  tmp_root = os.path.join(self.root, "image")
70
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
77
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
71
78
  self.train = train
72
79
  np_dtype = np.float32
73
80
  for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking CORD", total=len(os.listdir(tmp_root))):
@@ -109,6 +116,8 @@ class CORD(VisionDataset):
109
116
  )
110
117
  for crop, label in zip(crops, list(text_targets)):
111
118
  self.data.append((crop, label))
119
+ elif detection_task:
120
+ self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0)))
112
121
  else:
113
122
  self.data.append((
114
123
  img_path,
@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
50
50
  @staticmethod
51
51
  def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
52
52
  images, targets = zip(*samples)
53
- images = torch.stack(images, dim=0)
53
+ images = torch.stack(images, dim=0) # type: ignore[assignment]
54
54
 
55
- return images, list(targets)
55
+ return images, list(targets) # type: ignore[return-value]
56
56
 
57
57
 
58
58
  class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101
@@ -33,6 +33,7 @@ class FUNSD(VisionDataset):
33
33
  train: whether the subset should be the training one
34
34
  use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones)
35
35
  recognition_task: whether the dataset should be used for recognition task
36
+ detection_task: whether the dataset should be used for detection task
36
37
  **kwargs: keyword arguments from `VisionDataset`.
37
38
  """
38
39
 
@@ -45,6 +46,7 @@ class FUNSD(VisionDataset):
45
46
  train: bool = True,
46
47
  use_polygons: bool = False,
47
48
  recognition_task: bool = False,
49
+ detection_task: bool = False,
48
50
  **kwargs: Any,
49
51
  ) -> None:
50
52
  super().__init__(
@@ -55,6 +57,12 @@ class FUNSD(VisionDataset):
55
57
  pre_transforms=convert_target_to_relative if not recognition_task else None,
56
58
  **kwargs,
57
59
  )
60
+ if recognition_task and detection_task:
61
+ raise ValueError(
62
+ "`recognition_task` and `detection_task` cannot be set to True simultaneously. "
63
+ + "To get the whole dataset with boxes and labels leave both parameters to False."
64
+ )
65
+
58
66
  self.train = train
59
67
  np_dtype = np.float32
60
68
 
@@ -63,7 +71,7 @@ class FUNSD(VisionDataset):
63
71
 
64
72
  # # List images
65
73
  tmp_root = os.path.join(self.root, subfolder, "images")
66
- self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any]]]] = []
74
+ self.data: List[Tuple[Union[str, np.ndarray], Union[str, Dict[str, Any], np.ndarray]]] = []
67
75
  for img_path in tqdm(iterable=os.listdir(tmp_root), desc="Unpacking FUNSD", total=len(os.listdir(tmp_root))):
68
76
  # File existence check
69
77
  if not os.path.exists(os.path.join(tmp_root, img_path)):
@@ -100,6 +108,8 @@ class FUNSD(VisionDataset):
100
108
  # filter labels with unknown characters
101
109
  if not any(char in label for char in ["☑", "☐", "\uf703", "\uf702"]):
102
110
  self.data.append((crop, label))
111
+ elif detection_task:
112
+ self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype)))
103
113
  else:
104
114
  self.data.append((
105
115
  img_path,