python-doctr 0.8.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. {python-doctr-0.8.0/python_doctr.egg-info → python_doctr-0.9.0}/PKG-INFO +45 -40
  2. {python-doctr-0.8.0 → python_doctr-0.9.0}/README.md +24 -25
  3. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/__init__.py +1 -1
  4. python_doctr-0.9.0/doctr/contrib/__init__.py +0 -0
  5. python_doctr-0.9.0/doctr/contrib/artefacts.py +131 -0
  6. python_doctr-0.9.0/doctr/contrib/base.py +105 -0
  7. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/pytorch.py +2 -2
  8. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/base.py +6 -5
  9. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/imgur5k.py +1 -1
  10. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/loader.py +1 -6
  11. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/utils.py +2 -1
  12. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/vocabs.py +9 -2
  13. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/file_utils.py +26 -12
  14. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/elements.py +40 -6
  15. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/html.py +2 -2
  16. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/pytorch.py +6 -8
  17. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/tensorflow.py +1 -1
  18. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/pdf.py +5 -2
  19. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/reader.py +6 -0
  20. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/__init__.py +0 -1
  21. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/_utils.py +57 -20
  22. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/builder.py +71 -13
  23. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/mobilenet/pytorch.py +45 -9
  24. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/mobilenet/tensorflow.py +38 -7
  25. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/predictor/pytorch.py +18 -11
  26. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/predictor/tensorflow.py +16 -10
  27. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/textnet/pytorch.py +3 -3
  28. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/textnet/tensorflow.py +3 -3
  29. python_doctr-0.9.0/doctr/models/classification/zoo.py +98 -0
  30. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/__init__.py +1 -0
  31. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/_utils/__init__.py +1 -0
  32. python_doctr-0.9.0/doctr/models/detection/_utils/base.py +66 -0
  33. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/base.py +4 -3
  34. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/pytorch.py +2 -2
  35. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/tensorflow.py +14 -18
  36. python_doctr-0.9.0/doctr/models/detection/fast/base.py +257 -0
  37. python_doctr-0.9.0/doctr/models/detection/fast/pytorch.py +442 -0
  38. python_doctr-0.9.0/doctr/models/detection/fast/tensorflow.py +428 -0
  39. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/linknet/base.py +4 -3
  40. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/predictor/pytorch.py +15 -1
  41. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/predictor/tensorflow.py +15 -1
  42. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/zoo.py +21 -4
  43. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/factory/hub.py +3 -12
  44. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/base.py +9 -3
  45. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/pytorch.py +41 -20
  46. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/tensorflow.py +36 -16
  47. python_doctr-0.9.0/doctr/models/modules/layers/pytorch.py +165 -0
  48. python_doctr-0.9.0/doctr/models/modules/layers/tensorflow.py +173 -0
  49. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/transformer/pytorch.py +2 -2
  50. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/base.py +77 -50
  51. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/pytorch.py +31 -20
  52. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/tensorflow.py +27 -17
  53. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/preprocessor/pytorch.py +4 -4
  54. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/preprocessor/tensorflow.py +3 -2
  55. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/master/pytorch.py +2 -2
  56. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/parseq/pytorch.py +4 -3
  57. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/parseq/tensorflow.py +4 -3
  58. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/sar/pytorch.py +7 -6
  59. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/sar/tensorflow.py +3 -9
  60. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/vitstr/pytorch.py +1 -1
  61. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/zoo.py +1 -1
  62. python_doctr-0.9.0/doctr/models/utils/__init__.py +6 -0
  63. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/zoo.py +2 -2
  64. python_doctr-0.9.0/doctr/py.typed +0 -0
  65. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/base.py +1 -1
  66. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/pytorch.py +4 -4
  67. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/base.py +37 -15
  68. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/pytorch.py +66 -8
  69. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/tensorflow.py +63 -7
  70. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/fonts.py +7 -5
  71. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/geometry.py +35 -12
  72. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/metrics.py +33 -174
  73. python_doctr-0.9.0/doctr/utils/reconstitution.py +126 -0
  74. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/visualization.py +5 -118
  75. python_doctr-0.9.0/doctr/version.py +1 -0
  76. {python-doctr-0.8.0 → python_doctr-0.9.0}/pyproject.toml +39 -22
  77. {python-doctr-0.8.0 → python_doctr-0.9.0/python_doctr.egg-info}/PKG-INFO +45 -40
  78. {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/SOURCES.txt +10 -6
  79. {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/requires.txt +20 -11
  80. {python-doctr-0.8.0 → python_doctr-0.9.0}/setup.py +1 -1
  81. python-doctr-0.8.0/doctr/models/artefacts/__init__.py +0 -2
  82. python-doctr-0.8.0/doctr/models/artefacts/barcode.py +0 -74
  83. python-doctr-0.8.0/doctr/models/artefacts/face.py +0 -63
  84. python-doctr-0.8.0/doctr/models/classification/zoo.py +0 -74
  85. python-doctr-0.8.0/doctr/models/modules/layers/pytorch.py +0 -86
  86. python-doctr-0.8.0/doctr/models/modules/layers/tensorflow.py +0 -95
  87. python-doctr-0.8.0/doctr/models/obj_detection/__init__.py +0 -1
  88. python-doctr-0.8.0/doctr/models/obj_detection/faster_rcnn/__init__.py +0 -4
  89. python-doctr-0.8.0/doctr/models/obj_detection/faster_rcnn/pytorch.py +0 -81
  90. python-doctr-0.8.0/doctr/version.py +0 -1
  91. {python-doctr-0.8.0 → python_doctr-0.9.0}/LICENSE +0 -0
  92. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/__init__.py +0 -0
  93. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/cord.py +0 -0
  94. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/__init__.py +0 -0
  95. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/base.py +0 -0
  96. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/datasets/tensorflow.py +0 -0
  97. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/detection.py +0 -0
  98. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/doc_artefacts.py +0 -0
  99. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/funsd.py +0 -0
  100. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/__init__.py +0 -0
  101. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/pytorch.py +0 -0
  102. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/generator/tensorflow.py +0 -0
  103. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/ic03.py +0 -0
  104. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/ic13.py +0 -0
  105. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/iiit5k.py +0 -0
  106. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/iiithws.py +0 -0
  107. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/mjsynth.py +0 -0
  108. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/ocr.py +0 -0
  109. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/orientation.py +0 -0
  110. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/recognition.py +0 -0
  111. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/sroie.py +0 -0
  112. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/svhn.py +0 -0
  113. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/svt.py +0 -0
  114. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/synthtext.py +0 -0
  115. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/datasets/wildreceipt.py +0 -0
  116. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/__init__.py +0 -0
  117. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/__init__.py +0 -0
  118. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/io/image/base.py +0 -0
  119. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/__init__.py +0 -0
  120. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/magc_resnet/__init__.py +0 -0
  121. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/magc_resnet/pytorch.py +0 -0
  122. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/magc_resnet/tensorflow.py +0 -0
  123. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/mobilenet/__init__.py +0 -0
  124. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/predictor/__init__.py +0 -0
  125. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/resnet/__init__.py +0 -0
  126. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/resnet/pytorch.py +0 -0
  127. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/resnet/tensorflow.py +0 -0
  128. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/textnet/__init__.py +0 -0
  129. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vgg/__init__.py +0 -0
  130. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vgg/pytorch.py +0 -0
  131. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vgg/tensorflow.py +0 -0
  132. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vit/__init__.py +0 -0
  133. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vit/pytorch.py +0 -0
  134. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/classification/vit/tensorflow.py +0 -0
  135. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/core.py +0 -0
  136. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/_utils/pytorch.py +0 -0
  137. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/_utils/tensorflow.py +0 -0
  138. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/core.py +0 -0
  139. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/differentiable_binarization/__init__.py +0 -0
  140. {python-doctr-0.8.0/doctr/models/detection/linknet → python_doctr-0.9.0/doctr/models/detection/fast}/__init__.py +0 -0
  141. {python-doctr-0.8.0/doctr/models/modules/layers → python_doctr-0.9.0/doctr/models/detection/linknet}/__init__.py +0 -0
  142. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/linknet/pytorch.py +0 -0
  143. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/linknet/tensorflow.py +0 -0
  144. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/detection/predictor/__init__.py +0 -0
  145. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/factory/__init__.py +0 -0
  146. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/kie_predictor/__init__.py +0 -0
  147. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/__init__.py +0 -0
  148. {python-doctr-0.8.0/doctr/models/modules/transformer → python_doctr-0.9.0/doctr/models/modules/layers}/__init__.py +0 -0
  149. {python-doctr-0.8.0/doctr/models/modules/vision_transformer → python_doctr-0.9.0/doctr/models/modules/transformer}/__init__.py +0 -0
  150. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/transformer/tensorflow.py +0 -0
  151. {python-doctr-0.8.0/doctr/models/preprocessor → python_doctr-0.9.0/doctr/models/modules/vision_transformer}/__init__.py +0 -0
  152. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/vision_transformer/pytorch.py +0 -0
  153. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/modules/vision_transformer/tensorflow.py +0 -0
  154. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/predictor/__init__.py +0 -0
  155. {python-doctr-0.8.0/doctr/models/recognition/crnn → python_doctr-0.9.0/doctr/models/preprocessor}/__init__.py +0 -0
  156. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/__init__.py +0 -0
  157. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/core.py +0 -0
  158. {python-doctr-0.8.0/doctr/models/recognition/master → python_doctr-0.9.0/doctr/models/recognition/crnn}/__init__.py +0 -0
  159. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/crnn/pytorch.py +0 -0
  160. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/crnn/tensorflow.py +0 -0
  161. {python-doctr-0.8.0/doctr/models/recognition/parseq → python_doctr-0.9.0/doctr/models/recognition/master}/__init__.py +0 -0
  162. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/master/base.py +0 -0
  163. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/master/tensorflow.py +0 -0
  164. {python-doctr-0.8.0/doctr/models/recognition/sar → python_doctr-0.9.0/doctr/models/recognition/parseq}/__init__.py +0 -0
  165. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/parseq/base.py +0 -0
  166. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/__init__.py +0 -0
  167. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/_utils.py +0 -0
  168. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/pytorch.py +0 -0
  169. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/predictor/tensorflow.py +0 -0
  170. {python-doctr-0.8.0/doctr/models/recognition/vitstr → python_doctr-0.9.0/doctr/models/recognition/sar}/__init__.py +0 -0
  171. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/utils.py +0 -0
  172. {python-doctr-0.8.0/doctr/models/utils → python_doctr-0.9.0/doctr/models/recognition/vitstr}/__init__.py +0 -0
  173. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/vitstr/base.py +0 -0
  174. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/recognition/vitstr/tensorflow.py +0 -0
  175. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/utils/pytorch.py +0 -0
  176. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/models/utils/tensorflow.py +0 -0
  177. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/__init__.py +0 -0
  178. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/__init__.py +0 -0
  179. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/functional/tensorflow.py +0 -0
  180. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/transforms/modules/__init__.py +0 -0
  181. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/__init__.py +0 -0
  182. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/common_types.py +0 -0
  183. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/data.py +0 -0
  184. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/multithreading.py +0 -0
  185. {python-doctr-0.8.0 → python_doctr-0.9.0}/doctr/utils/repr.py +0 -0
  186. {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/dependency_links.txt +0 -0
  187. {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/top_level.txt +0 -0
  188. {python-doctr-0.8.0 → python_doctr-0.9.0}/python_doctr.egg-info/zip-safe +0 -0
  189. {python-doctr-0.8.0 → python_doctr-0.9.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: python-doctr
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: Document Text Recognition (docTR): deep Learning for high-performance OCR on documents.
5
5
  Author-email: Mindee <contact@mindee.com>
6
6
  Maintainer: François-Guillaume Fernandez, Charles Gaillard, Olivier Dulcy, Felix Dittrich
@@ -209,7 +209,7 @@ License: Apache License
209
209
  Project-URL: documentation, https://mindee.github.io/doctr
210
210
  Project-URL: repository, https://github.com/mindee/doctr
211
211
  Project-URL: tracker, https://github.com/mindee/doctr/issues
212
- Project-URL: changelog, https://github.com/mindee/doctr/latest/changelog.html
212
+ Project-URL: changelog, https://mindee.github.io/doctr/changelog.html
213
213
  Keywords: OCR,deep learning,computer vision,tensorflow,pytorch,text detection,text recognition
214
214
  Classifier: Development Status :: 4 - Beta
215
215
  Classifier: Intended Audience :: Developers
@@ -219,30 +219,26 @@ Classifier: License :: OSI Approved :: Apache Software License
219
219
  Classifier: Natural Language :: English
220
220
  Classifier: Operating System :: OS Independent
221
221
  Classifier: Programming Language :: Python :: 3
222
- Classifier: Programming Language :: Python :: 3.8
223
222
  Classifier: Programming Language :: Python :: 3.9
224
223
  Classifier: Programming Language :: Python :: 3.10
224
+ Classifier: Programming Language :: Python :: 3.11
225
225
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
226
- Requires-Python: <4,>=3.8.0
226
+ Requires-Python: <4,>=3.9.0
227
227
  Description-Content-Type: text/markdown
228
228
  License-File: LICENSE
229
- Requires-Dist: importlib_metadata
230
229
  Requires-Dist: numpy<2.0.0,>=1.16.0
231
230
  Requires-Dist: scipy<2.0.0,>=1.4.0
232
231
  Requires-Dist: h5py<4.0.0,>=3.1.0
233
232
  Requires-Dist: opencv-python<5.0.0,>=4.5.0
234
- Requires-Dist: pypdfium2<5.0.0,>=4.0.0
233
+ Requires-Dist: pypdfium2<5.0.0,>=4.11.0
235
234
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
236
235
  Requires-Dist: shapely<3.0.0,>=1.6.0
237
236
  Requires-Dist: langdetect<2.0.0,>=1.0.9
238
237
  Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
239
238
  Requires-Dist: huggingface-hub<1.0.0,>=0.20.0
240
- Requires-Dist: matplotlib>=3.1.0
241
- Requires-Dist: weasyprint>=55.0
242
239
  Requires-Dist: Pillow>=9.2.0
243
240
  Requires-Dist: defusedxml>=0.7.0
244
- Requires-Dist: mplcursors>=0.3
245
- Requires-Dist: unidecode>=1.0.0
241
+ Requires-Dist: anyascii>=0.3.2
246
242
  Requires-Dist: tqdm>=4.30.0
247
243
  Provides-Extra: tf
248
244
  Requires-Dist: tensorflow<2.16.0,>=2.11.0; extra == "tf"
@@ -251,6 +247,13 @@ Provides-Extra: torch
251
247
  Requires-Dist: torch<3.0.0,>=1.12.0; extra == "torch"
252
248
  Requires-Dist: torchvision>=0.13.0; extra == "torch"
253
249
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "torch"
250
+ Provides-Extra: html
251
+ Requires-Dist: weasyprint>=55.0; extra == "html"
252
+ Provides-Extra: viz
253
+ Requires-Dist: matplotlib>=3.1.0; extra == "viz"
254
+ Requires-Dist: mplcursors>=0.3; extra == "viz"
255
+ Provides-Extra: contrib
256
+ Requires-Dist: onnxruntime>=1.11.0; extra == "contrib"
254
257
  Provides-Extra: testing
255
258
  Requires-Dist: pytest>=5.3.2; extra == "testing"
256
259
  Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
@@ -266,7 +269,7 @@ Provides-Extra: docs
266
269
  Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "docs"
267
270
  Requires-Dist: sphinxemoji>=0.1.8; extra == "docs"
268
271
  Requires-Dist: sphinx-copybutton>=0.3.1; extra == "docs"
269
- Requires-Dist: docutils<0.21; extra == "docs"
272
+ Requires-Dist: docutils<0.22; extra == "docs"
270
273
  Requires-Dist: recommonmark>=0.7.1; extra == "docs"
271
274
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "docs"
272
275
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "docs"
@@ -277,29 +280,32 @@ Requires-Dist: tf2onnx<2.0.0,>=1.16.0; extra == "dev"
277
280
  Requires-Dist: torch<3.0.0,>=1.12.0; extra == "dev"
278
281
  Requires-Dist: torchvision>=0.13.0; extra == "dev"
279
282
  Requires-Dist: onnx<3.0.0,>=1.12.0; extra == "dev"
283
+ Requires-Dist: weasyprint>=55.0; extra == "dev"
284
+ Requires-Dist: matplotlib>=3.1.0; extra == "dev"
285
+ Requires-Dist: mplcursors>=0.3; extra == "dev"
280
286
  Requires-Dist: pytest>=5.3.2; extra == "dev"
281
287
  Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
282
288
  Requires-Dist: hdf5storage>=0.1.18; extra == "dev"
283
289
  Requires-Dist: onnxruntime>=1.11.0; extra == "dev"
284
290
  Requires-Dist: requests>=2.20.0; extra == "dev"
285
291
  Requires-Dist: psutil>=5.9.5; extra == "dev"
286
- Requires-Dist: ruff>=0.1.5; extra == "dev"
287
- Requires-Dist: mypy>=0.812; extra == "dev"
288
- Requires-Dist: pre-commit>=2.17.0; extra == "dev"
292
+ Requires-Dist: ruff>=0.3.0; extra == "dev"
293
+ Requires-Dist: mypy>=1.0; extra == "dev"
294
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
289
295
  Requires-Dist: sphinx!=3.5.0,>=3.0.0; extra == "dev"
290
296
  Requires-Dist: sphinxemoji>=0.1.8; extra == "dev"
291
297
  Requires-Dist: sphinx-copybutton>=0.3.1; extra == "dev"
292
- Requires-Dist: docutils<0.21; extra == "dev"
298
+ Requires-Dist: docutils<0.22; extra == "dev"
293
299
  Requires-Dist: recommonmark>=0.7.1; extra == "dev"
294
300
  Requires-Dist: sphinx-markdown-tables>=0.0.15; extra == "dev"
295
301
  Requires-Dist: sphinx-tabs>=3.3.0; extra == "dev"
296
302
  Requires-Dist: furo>=2022.3.4; extra == "dev"
297
303
 
298
304
  <p align="center">
299
- <img src="docs/images/Logo_doctr.gif" width="40%">
305
+ <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
300
306
  </p>
301
307
 
302
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.7.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
308
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
303
309
 
304
310
 
305
311
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -309,7 +315,7 @@ What you can expect from this repository:
309
315
  - efficient ways to parse textual information (localize and identify each word) from your documents
310
316
  - guidance on how to integrate this in your current architecture
311
317
 
312
- ![OCR_example](docs/images/ocr.png)
318
+ ![OCR_example](https://github.com/mindee/doctr/raw/main/docs/images/ocr.png)
313
319
 
314
320
  ## Quick Tour
315
321
 
@@ -334,7 +340,7 @@ from doctr.io import DocumentFile
334
340
  pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
335
341
  # Image
336
342
  single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
337
- # Webpage
343
+ # Webpage (requires `weasyprint` to be installed)
338
344
  webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
339
345
  # Multiple page images
340
346
  multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -372,10 +378,11 @@ If both options are set to False, the predictor will always fit and return rotat
372
378
  To interpret your model's predictions, you can visualize them interactively as follows:
373
379
 
374
380
  ```python
381
+ # Display the result (requires matplotlib & mplcursors to be installed)
375
382
  result.show()
376
383
  ```
377
384
 
378
- ![Visualization sample](docs/images/doctr_example_script.gif)
385
+ ![Visualization sample](https://github.com/mindee/doctr/raw/main/docs/images/doctr_example_script.gif)
379
386
 
380
387
  Or even rebuild the original document from its predictions:
381
388
 
@@ -386,7 +393,7 @@ synthetic_pages = result.synthesize()
386
393
  plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
387
394
  ```
388
395
 
389
- ![Synthesis sample](docs/images/synthesized_sample.png)
396
+ ![Synthesis sample](https://github.com/mindee/doctr/raw/main/docs/images/synthesized_sample.png)
390
397
 
391
398
  The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
392
399
  To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
@@ -425,23 +432,13 @@ The KIE predictor results per page are in a dictionary format with each key repr
425
432
 
426
433
  ### If you are looking for support from the Mindee team
427
434
 
428
- [![Bad OCR test detection image asking the developer if they need help](docs/images/doctr-need-help.png)](https://mindee.com/product/doctr)
435
+ [![Bad OCR test detection image asking the developer if they need help](https://github.com/mindee/doctr/raw/main/docs/images/doctr-need-help.png)](https://mindee.com/product/doctr)
429
436
 
430
437
  ## Installation
431
438
 
432
439
  ### Prerequisites
433
440
 
434
- Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
435
-
436
- Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
437
-
438
- For MacOS users, you can install them as follows:
439
-
440
- ```shell
441
- brew install cairo pango gdk-pixbuf libffi
442
- ```
443
-
444
- For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
441
+ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
445
442
 
446
443
  ### Latest release
447
444
 
@@ -460,6 +457,8 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
460
457
  pip install "python-doctr[tf]"
461
458
  # for PyTorch
462
459
  pip install "python-doctr[torch]"
460
+ # optional dependencies for visualization, html, and contrib modules can be installed as follows:
461
+ pip install "python-doctr[torch,viz,html,contib]"
463
462
  ```
464
463
 
465
464
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
@@ -494,6 +493,7 @@ Credits where it's due: this repository is implementing, among others, architect
494
493
 
495
494
  - DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
496
495
  - LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
496
+ - FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
497
497
 
498
498
  ### Text Recognition
499
499
 
@@ -513,7 +513,7 @@ The full package documentation is available [here](https://mindee.github.io/doct
513
513
 
514
514
  A minimal demo app is provided for you to play with our end-to-end OCR models!
515
515
 
516
- ![Demo app](docs/images/demo_update.png)
516
+ ![Demo app](https://github.com/mindee/doctr/raw/main/docs/images/demo_update.png)
517
517
 
518
518
  #### Live demo
519
519
 
@@ -553,11 +553,11 @@ USE_TORCH=1 streamlit run demo/app.py
553
553
  Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
554
554
  Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
555
555
 
556
- ![TFJS demo](docs/images/demo_illustration_mini.png)
556
+ ![TFJS demo](https://github.com/mindee/doctr/raw/main/docs/images/demo_illustration_mini.png)
557
557
 
558
558
  ### Docker container
559
559
 
560
- [We offers Docker container support for easy testing and deployment](https://github.com/mindee/doctr/packages).
560
+ [We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
561
561
 
562
562
  #### Using GPU with docTR Docker Images
563
563
 
@@ -646,9 +646,14 @@ Your API should now be running locally on your port 8002. Access your automatica
646
646
 
647
647
  ```python
648
648
  import requests
649
+
650
+ params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
651
+
649
652
  with open('/path/to/your/doc.jpg', 'rb') as f:
650
- data = f.read()
651
- response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
653
+ files = [ # application/pdf, image/jpeg, image/png supported
654
+ ("files", ("doc.jpg", f.read(), "image/jpeg")),
655
+ ]
656
+ print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
652
657
  ```
653
658
 
654
659
  ### Example notebooks
@@ -673,8 +678,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt
673
678
 
674
679
  If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
675
680
 
676
- You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](CONTRIBUTING.md)) for you to easily do so!
681
+ You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
677
682
 
678
683
  ## License
679
684
 
680
- Distributed under the Apache 2.0 License. See [`LICENSE`](LICENSE) for more information.
685
+ Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.
@@ -1,8 +1,8 @@
1
1
  <p align="center">
2
- <img src="docs/images/Logo_doctr.gif" width="40%">
2
+ <img src="https://github.com/mindee/doctr/raw/main/docs/images/Logo_doctr.gif" width="40%">
3
3
  </p>
4
4
 
5
- [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.7.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
5
+ [![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v0.9.0-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb)
6
6
 
7
7
 
8
8
  **Optical Character Recognition made seamless & accessible to anyone, powered by TensorFlow 2 & PyTorch**
@@ -12,7 +12,7 @@ What you can expect from this repository:
12
12
  - efficient ways to parse textual information (localize and identify each word) from your documents
13
13
  - guidance on how to integrate this in your current architecture
14
14
 
15
- ![OCR_example](docs/images/ocr.png)
15
+ ![OCR_example](https://github.com/mindee/doctr/raw/main/docs/images/ocr.png)
16
16
 
17
17
  ## Quick Tour
18
18
 
@@ -37,7 +37,7 @@ from doctr.io import DocumentFile
37
37
  pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
38
38
  # Image
39
39
  single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
40
- # Webpage
40
+ # Webpage (requires `weasyprint` to be installed)
41
41
  webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
42
42
  # Multiple page images
43
43
  multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
@@ -75,10 +75,11 @@ If both options are set to False, the predictor will always fit and return rotat
75
75
  To interpret your model's predictions, you can visualize them interactively as follows:
76
76
 
77
77
  ```python
78
+ # Display the result (requires matplotlib & mplcursors to be installed)
78
79
  result.show()
79
80
  ```
80
81
 
81
- ![Visualization sample](docs/images/doctr_example_script.gif)
82
+ ![Visualization sample](https://github.com/mindee/doctr/raw/main/docs/images/doctr_example_script.gif)
82
83
 
83
84
  Or even rebuild the original document from its predictions:
84
85
 
@@ -89,7 +90,7 @@ synthetic_pages = result.synthesize()
89
90
  plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show()
90
91
  ```
91
92
 
92
- ![Synthesis sample](docs/images/synthesized_sample.png)
93
+ ![Synthesis sample](https://github.com/mindee/doctr/raw/main/docs/images/synthesized_sample.png)
93
94
 
94
95
  The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
95
96
  To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
@@ -128,23 +129,13 @@ The KIE predictor results per page are in a dictionary format with each key repr
128
129
 
129
130
  ### If you are looking for support from the Mindee team
130
131
 
131
- [![Bad OCR test detection image asking the developer if they need help](docs/images/doctr-need-help.png)](https://mindee.com/product/doctr)
132
+ [![Bad OCR test detection image asking the developer if they need help](https://github.com/mindee/doctr/raw/main/docs/images/doctr-need-help.png)](https://mindee.com/product/doctr)
132
133
 
133
134
  ## Installation
134
135
 
135
136
  ### Prerequisites
136
137
 
137
- Python 3.8 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
138
-
139
- Since we use [weasyprint](https://weasyprint.org/), you will need extra dependencies if you are not running Linux.
140
-
141
- For MacOS users, you can install them as follows:
142
-
143
- ```shell
144
- brew install cairo pango gdk-pixbuf libffi
145
- ```
146
-
147
- For Windows users, those dependencies are included in GTK. You can find the latest installer over [here](https://github.com/tschoonj/GTK-for-Windows-Runtime-Environment-Installer/releases).
138
+ Python 3.9 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR.
148
139
 
149
140
  ### Latest release
150
141
 
@@ -163,6 +154,8 @@ We try to keep framework-specific dependencies to a minimum. You can install fra
163
154
  pip install "python-doctr[tf]"
164
155
  # for PyTorch
165
156
  pip install "python-doctr[torch]"
157
+ # optional dependencies for visualization, html, and contrib modules can be installed as follows:
158
+ pip install "python-doctr[torch,viz,html,contib]"
166
159
  ```
167
160
 
168
161
  For MacBooks with M1 chip, you will need some additional packages or specific versions:
@@ -197,6 +190,7 @@ Credits where it's due: this repository is implementing, among others, architect
197
190
 
198
191
  - DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
199
192
  - LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
193
+ - FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
200
194
 
201
195
  ### Text Recognition
202
196
 
@@ -216,7 +210,7 @@ The full package documentation is available [here](https://mindee.github.io/doct
216
210
 
217
211
  A minimal demo app is provided for you to play with our end-to-end OCR models!
218
212
 
219
- ![Demo app](docs/images/demo_update.png)
213
+ ![Demo app](https://github.com/mindee/doctr/raw/main/docs/images/demo_update.png)
220
214
 
221
215
  #### Live demo
222
216
 
@@ -256,11 +250,11 @@ USE_TORCH=1 streamlit run demo/app.py
256
250
  Instead of having your demo actually running Python, you would prefer to run everything in your web browser?
257
251
  Check out our [TensorFlow.js demo](https://github.com/mindee/doctr-tfjs-demo) to get started!
258
252
 
259
- ![TFJS demo](docs/images/demo_illustration_mini.png)
253
+ ![TFJS demo](https://github.com/mindee/doctr/raw/main/docs/images/demo_illustration_mini.png)
260
254
 
261
255
  ### Docker container
262
256
 
263
- [We offers Docker container support for easy testing and deployment](https://github.com/mindee/doctr/packages).
257
+ [We offer Docker container support for easy testing and deployment](https://github.com/mindee/doctr/pkgs/container/doctr).
264
258
 
265
259
  #### Using GPU with docTR Docker Images
266
260
 
@@ -349,9 +343,14 @@ Your API should now be running locally on your port 8002. Access your automatica
349
343
 
350
344
  ```python
351
345
  import requests
346
+
347
+ params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"}
348
+
352
349
  with open('/path/to/your/doc.jpg', 'rb') as f:
353
- data = f.read()
354
- response = requests.post("http://localhost:8002/ocr", files={'file': data}).json()
350
+ files = [ # application/pdf, image/jpeg, image/png supported
351
+ ("files", ("doc.jpg", f.read(), "image/jpeg")),
352
+ ]
353
+ print(requests.post("http://localhost:8080/ocr", params=params, files=files).json())
355
354
  ```
356
355
 
357
356
  ### Example notebooks
@@ -376,8 +375,8 @@ If you wish to cite this project, feel free to use this [BibTeX](http://www.bibt
376
375
 
377
376
  If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way?
378
377
 
379
- You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](CONTRIBUTING.md)) for you to easily do so!
378
+ You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so!
380
379
 
381
380
  ## License
382
381
 
383
- Distributed under the Apache 2.0 License. See [`LICENSE`](LICENSE) for more information.
382
+ Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information.
@@ -1,3 +1,3 @@
1
- from . import io, models, datasets, transforms, utils
1
+ from . import io, models, datasets, contrib, transforms, utils
2
2
  from .file_utils import is_tf_available, is_torch_available
3
3
  from .version import __version__ # noqa: F401
File without changes
@@ -0,0 +1,131 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ from typing import Any, Dict, List, Optional, Tuple
7
+
8
+ import cv2
9
+ import numpy as np
10
+
11
+ from doctr.file_utils import requires_package
12
+
13
+ from .base import _BasePredictor
14
+
15
+ __all__ = ["ArtefactDetector"]
16
+
17
+ default_cfgs: Dict[str, Dict[str, Any]] = {
18
+ "yolov8_artefact": {
19
+ "input_shape": (3, 1024, 1024),
20
+ "labels": ["bar_code", "qr_code", "logo", "photo"],
21
+ "url": "https://doctr-static.mindee.com/models?id=v0.8.1/yolo_artefact-f9d66f14.onnx&src=0",
22
+ },
23
+ }
24
+
25
+
26
+ class ArtefactDetector(_BasePredictor):
27
+ """
28
+ A class to detect artefacts in images
29
+
30
+ >>> from doctr.io import DocumentFile
31
+ >>> from doctr.contrib.artefacts import ArtefactDetector
32
+ >>> doc = DocumentFile.from_images(["path/to/image.jpg"])
33
+ >>> detector = ArtefactDetector()
34
+ >>> results = detector(doc)
35
+
36
+ Args:
37
+ ----
38
+ arch: the architecture to use
39
+ batch_size: the batch size to use
40
+ model_path: the path to the model to use
41
+ labels: the labels to use
42
+ input_shape: the input shape to use
43
+ mask_labels: the mask labels to use
44
+ conf_threshold: the confidence threshold to use
45
+ iou_threshold: the intersection over union threshold to use
46
+ **kwargs: additional arguments to be passed to `download_from_url`
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ arch: str = "yolov8_artefact",
52
+ batch_size: int = 2,
53
+ model_path: Optional[str] = None,
54
+ labels: Optional[List[str]] = None,
55
+ input_shape: Optional[Tuple[int, int, int]] = None,
56
+ conf_threshold: float = 0.5,
57
+ iou_threshold: float = 0.5,
58
+ **kwargs: Any,
59
+ ) -> None:
60
+ super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
61
+ self.labels = labels or default_cfgs[arch]["labels"]
62
+ self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
63
+ self.conf_threshold = conf_threshold
64
+ self.iou_threshold = iou_threshold
65
+
66
+ def preprocess(self, img: np.ndarray) -> np.ndarray:
67
+ return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
68
+
69
+ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> List[List[Dict[str, Any]]]:
70
+ results = []
71
+
72
+ for batch in zip(output, input_images):
73
+ for out, img in zip(batch[0], batch[1]):
74
+ org_height, org_width = img.shape[:2]
75
+ width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
76
+ for res in out:
77
+ sample_results = []
78
+ for row in np.transpose(np.squeeze(res)):
79
+ classes_scores = row[4:]
80
+ max_score = np.amax(classes_scores)
81
+ if max_score >= self.conf_threshold:
82
+ class_id = np.argmax(classes_scores)
83
+ x, y, w, h = row[0], row[1], row[2], row[3]
84
+ # to rescaled xmin, ymin, xmax, ymax
85
+ xmin = int((x - w / 2) * width_scale)
86
+ ymin = int((y - h / 2) * height_scale)
87
+ xmax = int((x + w / 2) * width_scale)
88
+ ymax = int((y + h / 2) * height_scale)
89
+
90
+ sample_results.append({
91
+ "label": self.labels[class_id],
92
+ "confidence": float(max_score),
93
+ "box": [xmin, ymin, xmax, ymax],
94
+ })
95
+
96
+ # Filter out overlapping boxes
97
+ boxes = [res["box"] for res in sample_results]
98
+ scores = [res["confidence"] for res in sample_results]
99
+ keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold) # type: ignore[arg-type]
100
+ sample_results = [sample_results[i] for i in keep_indices]
101
+
102
+ results.append(sample_results)
103
+
104
+ self._results = results
105
+ return results
106
+
107
+ def show(self, **kwargs: Any) -> None:
108
+ """
109
+ Display the results
110
+
111
+ Args:
112
+ ----
113
+ **kwargs: additional keyword arguments to be passed to `plt.show`
114
+ """
115
+ requires_package("matplotlib", "`.show()` requires matplotlib installed")
116
+ import matplotlib.pyplot as plt
117
+ from matplotlib.patches import Rectangle
118
+
119
+ # visualize the results with matplotlib
120
+ if self._results and self._inputs:
121
+ for img, res in zip(self._inputs, self._results):
122
+ plt.figure(figsize=(10, 10))
123
+ plt.imshow(img)
124
+ for obj in res:
125
+ xmin, ymin, xmax, ymax = obj["box"]
126
+ label = obj["label"]
127
+ plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
128
+ plt.gca().add_patch(
129
+ Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
130
+ )
131
+ plt.show(**kwargs)
@@ -0,0 +1,105 @@
1
+ # Copyright (C) 2021-2024, Mindee.
2
+
3
+ # This program is licensed under the Apache License 2.0.
4
+ # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
+
6
+ from typing import Any, List, Optional
7
+
8
+ import numpy as np
9
+
10
+ from doctr.file_utils import requires_package
11
+ from doctr.utils.data import download_from_url
12
+
13
+
14
+ class _BasePredictor:
15
+ """
16
+ Base class for all predictors
17
+
18
+ Args:
19
+ ----
20
+ batch_size: the batch size to use
21
+ url: the url to use to download a model if needed
22
+ model_path: the path to the model to use
23
+ **kwargs: additional arguments to be passed to `download_from_url`
24
+ """
25
+
26
+ def __init__(self, batch_size: int, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs) -> None:
27
+ self.batch_size = batch_size
28
+ self.session = self._init_model(url, model_path, **kwargs)
29
+
30
+ self._inputs: List[np.ndarray] = []
31
+ self._results: List[Any] = []
32
+
33
+ def _init_model(self, url: Optional[str] = None, model_path: Optional[str] = None, **kwargs: Any) -> Any:
34
+ """
35
+ Download the model from the given url if needed
36
+
37
+ Args:
38
+ ----
39
+ url: the url to use
40
+ model_path: the path to the model to use
41
+ **kwargs: additional arguments to be passed to `download_from_url`
42
+
43
+ Returns:
44
+ -------
45
+ Any: the ONNX loaded model
46
+ """
47
+ requires_package("onnxruntime", "`.contrib` module requires `onnxruntime` to be installed.")
48
+ import onnxruntime as ort
49
+
50
+ if not url and not model_path:
51
+ raise ValueError("You must provide either a url or a model_path")
52
+ onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs)) # type: ignore[arg-type]
53
+ return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
54
+
55
+ def preprocess(self, img: np.ndarray) -> np.ndarray:
56
+ """
57
+ Preprocess the input image
58
+
59
+ Args:
60
+ ----
61
+ img: the input image to preprocess
62
+
63
+ Returns:
64
+ -------
65
+ np.ndarray: the preprocessed image
66
+ """
67
+ raise NotImplementedError
68
+
69
+ def postprocess(self, output: List[np.ndarray], input_images: List[List[np.ndarray]]) -> Any:
70
+ """
71
+ Postprocess the model output
72
+
73
+ Args:
74
+ ----
75
+ output: the model output to postprocess
76
+ input_images: the input images used to generate the output
77
+
78
+ Returns:
79
+ -------
80
+ Any: the postprocessed output
81
+ """
82
+ raise NotImplementedError
83
+
84
+ def __call__(self, inputs: List[np.ndarray]) -> Any:
85
+ """
86
+ Call the model on the given inputs
87
+
88
+ Args:
89
+ ----
90
+ inputs: the inputs to use
91
+
92
+ Returns:
93
+ -------
94
+ Any: the postprocessed output
95
+ """
96
+ self._inputs = inputs
97
+ model_inputs = self.session.get_inputs()
98
+
99
+ batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
100
+ processed_batches = [
101
+ np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
102
+ ]
103
+
104
+ outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
105
+ return self.postprocess(outputs, batched_inputs)
@@ -50,9 +50,9 @@ class AbstractDataset(_AbstractDataset):
50
50
  @staticmethod
51
51
  def collate_fn(samples: List[Tuple[torch.Tensor, Any]]) -> Tuple[torch.Tensor, List[Any]]:
52
52
  images, targets = zip(*samples)
53
- images = torch.stack(images, dim=0)
53
+ images = torch.stack(images, dim=0) # type: ignore[assignment]
54
54
 
55
- return images, list(targets)
55
+ return images, list(targets) # type: ignore[return-value]
56
56
 
57
57
 
58
58
  class VisionDataset(AbstractDataset, _VisionDataset): # noqa: D101