onnxtr 0.6.2__tar.gz → 0.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {onnxtr-0.6.2 → onnxtr-0.6.3}/PKG-INFO +7 -4
  2. {onnxtr-0.6.2 → onnxtr-0.6.3}/README.md +2 -1
  3. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/elements.py +2 -2
  4. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/builder.py +1 -1
  5. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/postprocessor/base.py +2 -4
  6. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/engine.py +4 -0
  7. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/master.py +1 -1
  8. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/transforms/base.py +34 -18
  9. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/visualization.py +3 -3
  10. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/vocabs.py +102 -28
  11. onnxtr-0.6.3/onnxtr/version.py +1 -0
  12. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/PKG-INFO +7 -4
  13. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/requires.txt +2 -1
  14. {onnxtr-0.6.2 → onnxtr-0.6.3}/pyproject.toml +2 -2
  15. {onnxtr-0.6.2 → onnxtr-0.6.3}/setup.py +1 -1
  16. onnxtr-0.6.2/onnxtr/version.py +0 -1
  17. {onnxtr-0.6.2 → onnxtr-0.6.3}/LICENSE +0 -0
  18. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/__init__.py +0 -0
  19. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/contrib/__init__.py +0 -0
  20. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/contrib/artefacts.py +0 -0
  21. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/contrib/base.py +0 -0
  22. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/file_utils.py +0 -0
  23. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/__init__.py +0 -0
  24. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/html.py +0 -0
  25. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/image.py +0 -0
  26. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/pdf.py +0 -0
  27. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/reader.py +0 -0
  28. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/__init__.py +0 -0
  29. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/_utils.py +0 -0
  30. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/__init__.py +0 -0
  31. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/models/__init__.py +0 -0
  32. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/models/mobilenet.py +0 -0
  33. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/predictor/__init__.py +0 -0
  34. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/predictor/base.py +0 -0
  35. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/zoo.py +0 -0
  36. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/__init__.py +0 -0
  37. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/_utils/__init__.py +0 -0
  38. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/_utils/base.py +0 -0
  39. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/core.py +0 -0
  40. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/__init__.py +0 -0
  41. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/differentiable_binarization.py +0 -0
  42. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/fast.py +0 -0
  43. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/linknet.py +0 -0
  44. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
  45. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/predictor/__init__.py +0 -0
  46. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/predictor/base.py +0 -0
  47. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/zoo.py +0 -0
  48. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/factory/__init__.py +0 -0
  49. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/factory/hub.py +0 -0
  50. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/predictor/__init__.py +0 -0
  51. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/predictor/base.py +0 -0
  52. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/predictor/predictor.py +0 -0
  53. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/preprocessor/__init__.py +0 -0
  54. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/preprocessor/base.py +0 -0
  55. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/__init__.py +0 -0
  56. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/core.py +0 -0
  57. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/__init__.py +0 -0
  58. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/crnn.py +0 -0
  59. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/parseq.py +0 -0
  60. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/sar.py +0 -0
  61. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/vitstr.py +0 -0
  62. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/predictor/__init__.py +0 -0
  63. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/predictor/_utils.py +0 -0
  64. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/predictor/base.py +0 -0
  65. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/utils.py +0 -0
  66. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/zoo.py +0 -0
  67. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/zoo.py +0 -0
  68. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/py.typed +0 -0
  69. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/transforms/__init__.py +0 -0
  70. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/__init__.py +0 -0
  71. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/common_types.py +0 -0
  72. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/data.py +0 -0
  73. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/fonts.py +0 -0
  74. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/geometry.py +0 -0
  75. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/multithreading.py +0 -0
  76. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/reconstitution.py +0 -0
  77. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/repr.py +0 -0
  78. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/SOURCES.txt +0 -0
  79. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/dependency_links.txt +0 -0
  80. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/top_level.txt +0 -0
  81. {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/zip-safe +0 -0
  82. {onnxtr-0.6.2 → onnxtr-0.6.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: onnxtr
3
- Version: 0.6.2
3
+ Version: 0.6.3
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -229,7 +229,6 @@ Requires-Dist: numpy<3.0.0,>=1.16.0
229
229
  Requires-Dist: scipy<2.0.0,>=1.4.0
230
230
  Requires-Dist: pypdfium2<5.0.0,>=4.11.0
231
231
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
232
- Requires-Dist: shapely<3.0.0,>=1.6.0
233
232
  Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
234
233
  Requires-Dist: langdetect<2.0.0,>=1.0.9
235
234
  Requires-Dist: huggingface-hub<1.0.0,>=0.23.0
@@ -264,6 +263,7 @@ Provides-Extra: testing
264
263
  Requires-Dist: pytest>=5.3.2; extra == "testing"
265
264
  Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
266
265
  Requires-Dist: requests>=2.20.0; extra == "testing"
266
+ Requires-Dist: pytest-memray>=1.7.0; extra == "testing"
267
267
  Provides-Extra: quality
268
268
  Requires-Dist: ruff>=0.1.5; extra == "quality"
269
269
  Requires-Dist: mypy>=0.812; extra == "quality"
@@ -277,9 +277,11 @@ Requires-Dist: mplcursors>=0.3; extra == "dev"
277
277
  Requires-Dist: pytest>=5.3.2; extra == "dev"
278
278
  Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
279
279
  Requires-Dist: requests>=2.20.0; extra == "dev"
280
+ Requires-Dist: pytest-memray>=1.7.0; extra == "dev"
280
281
  Requires-Dist: ruff>=0.1.5; extra == "dev"
281
282
  Requires-Dist: mypy>=0.812; extra == "dev"
282
283
  Requires-Dist: pre-commit>=2.17.0; extra == "dev"
284
+ Dynamic: license-file
283
285
 
284
286
  <p align="center">
285
287
  <img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
@@ -290,7 +292,8 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
290
292
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
291
293
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
292
294
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
293
- [![Pypi](https://img.shields.io/badge/pypi-v0.6.2-blue.svg)](https://pypi.org/project/OnnxTR/)
295
+ [![Socket Badge](https://socket.dev/api/badge/pypi/package/onnxtr/0.6.2?artifact_id=tar-gz)](https://socket.dev/pypi/package/onnxtr/overview/0.6.2/tar-gz)
296
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.3-blue.svg)](https://pypi.org/project/OnnxTR/)
294
297
  [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
295
298
  [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
296
299
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/onnxtr)
@@ -7,7 +7,8 @@
7
7
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
8
8
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
9
9
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
10
- [![Pypi](https://img.shields.io/badge/pypi-v0.6.2-blue.svg)](https://pypi.org/project/OnnxTR/)
10
+ [![Socket Badge](https://socket.dev/api/badge/pypi/package/onnxtr/0.6.2?artifact_id=tar-gz)](https://socket.dev/pypi/package/onnxtr/overview/0.6.2/tar-gz)
11
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.3-blue.svg)](https://pypi.org/project/OnnxTR/)
11
12
  [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
12
13
  [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
13
14
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/onnxtr)
@@ -331,7 +331,7 @@ class Page(Element):
331
331
  )
332
332
  # Create the body
333
333
  body = SubElement(page_hocr, "body")
334
- SubElement(
334
+ page_div = SubElement(
335
335
  body,
336
336
  "div",
337
337
  attrib={
@@ -346,7 +346,7 @@ class Page(Element):
346
346
  raise TypeError("XML export is only available for straight bounding boxes for now.")
347
347
  (xmin, ymin), (xmax, ymax) = block.geometry
348
348
  block_div = SubElement(
349
- body,
349
+ page_div,
350
350
  "div",
351
351
  attrib={
352
352
  "class": "ocr_carea",
@@ -74,7 +74,7 @@ class DocumentBuilder(NestedObject):
74
74
  """
75
75
  lines = []
76
76
  # Sort words horizontally
77
- word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()] # type: ignore[call-overload]
77
+ word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()]
78
78
 
79
79
  # Eventually split line horizontally
80
80
  if len(word_idcs) < 2:
@@ -9,7 +9,6 @@
9
9
  import cv2
10
10
  import numpy as np
11
11
  import pyclipper
12
- from shapely.geometry import Polygon
13
12
 
14
13
  from ..core import DetectionPostProcessor
15
14
 
@@ -54,9 +53,8 @@ class GeneralDetectionPostProcessor(DetectionPostProcessor):
54
53
  area = (rect[1][0] + 1) * (1 + rect[1][1])
55
54
  length = 2 * (rect[1][0] + rect[1][1]) + 2
56
55
  else:
57
- poly = Polygon(points)
58
- area = poly.area
59
- length = poly.length
56
+ area = cv2.contourArea(points)
57
+ length = cv2.arcLength(points, closed=True)
60
58
  distance = area * self.unclip_ratio / length # compute distance to expand polygon
61
59
  offset = pyclipper.PyclipperOffset()
62
60
  offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
@@ -4,6 +4,7 @@
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
  import logging
7
+ import os
7
8
  from typing import Any
8
9
 
9
10
  import numpy as np
@@ -15,6 +16,9 @@ from onnxruntime import (
15
16
  get_available_providers,
16
17
  get_device,
17
18
  )
19
+ from onnxruntime.capi._pybind_state import set_default_logger_severity
20
+
21
+ set_default_logger_severity(int(os.getenv("ORT_LOG_SEVERITY_LEVEL", 4)))
18
22
 
19
23
  from onnxtr.utils.data import download_from_url
20
24
  from onnxtr.utils.geometry import shape_translate
@@ -106,7 +106,7 @@ class MASTERPostProcessor(RecognitionPostProcessor):
106
106
  "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
107
107
  ]
108
108
 
109
- return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist())) # type: ignore[arg-type]
109
+ return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))
110
110
 
111
111
 
112
112
  def _master(
@@ -4,6 +4,8 @@
4
4
  # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
5
5
 
6
6
 
7
+ import math
8
+
7
9
  import numpy as np
8
10
  from PIL import Image, ImageOps
9
11
 
@@ -37,37 +39,51 @@ class Resize:
37
39
  raise AssertionError("size should be either a tuple or an int")
38
40
 
39
41
  def __call__(self, img: np.ndarray) -> np.ndarray:
40
- img = (img * 255).astype(np.uint8) if img.dtype != np.uint8 else img
41
- h, w = img.shape[:2] if img.ndim == 3 else img.shape[1:3]
42
+ if img.dtype != np.uint8:
43
+ img_pil = Image.fromarray((img * 255).clip(0, 255).astype(np.uint8))
44
+ else:
45
+ img_pil = Image.fromarray(img)
46
+
42
47
  sh, sw = self.size
48
+ w, h = img_pil.size
43
49
 
44
50
  if not self.preserve_aspect_ratio:
45
- return np.array(Image.fromarray(img).resize((sw, sh), resample=self.interpolation))
51
+ img_resized_pil = img_pil.resize((sw, sh), resample=self.interpolation)
52
+ return np.array(img_resized_pil)
46
53
 
47
54
  actual_ratio = h / w
48
55
  target_ratio = sh / sw
49
56
 
50
- if target_ratio == actual_ratio:
51
- return np.array(Image.fromarray(img).resize((sw, sh), resample=self.interpolation))
52
-
53
57
  if actual_ratio > target_ratio:
54
- tmp_size = (int(sh / actual_ratio), sh)
58
+ new_h = sh
59
+ new_w = max(int(sh / actual_ratio), 1)
55
60
  else:
56
- tmp_size = (sw, int(sw * actual_ratio))
61
+ new_w = sw
62
+ new_h = max(int(sw * actual_ratio), 1)
57
63
 
58
- img_resized = Image.fromarray(img).resize(tmp_size, resample=self.interpolation)
59
- pad_left = pad_top = 0
60
- pad_right = sw - img_resized.width
61
- pad_bottom = sh - img_resized.height
64
+ img_resized_pil = img_pil.resize((new_w, new_h), resample=self.interpolation)
65
+
66
+ delta_w = sw - new_w
67
+ delta_h = sh - new_h
62
68
 
63
69
  if self.symmetric_pad:
64
- pad_left = pad_right // 2
65
- pad_right -= pad_left
66
- pad_top = pad_bottom // 2
67
- pad_bottom -= pad_top
70
+ # Symmetric padding
71
+ pad_left = math.ceil(delta_w / 2)
72
+ pad_right = math.floor(delta_w / 2)
73
+ pad_top = math.ceil(delta_h / 2)
74
+ pad_bottom = math.floor(delta_h / 2)
75
+ else:
76
+ # Asymmetric padding
77
+ pad_left, pad_top = 0, 0
78
+ pad_right, pad_bottom = delta_w, delta_h
79
+
80
+ img_padded_pil = ImageOps.expand(
81
+ img_resized_pil,
82
+ border=(pad_left, pad_top, pad_right, pad_bottom),
83
+ fill=0,
84
+ )
68
85
 
69
- img_resized = ImageOps.expand(img_resized, (pad_left, pad_top, pad_right, pad_bottom))
70
- return np.array(img_resized)
86
+ return np.array(img_padded_pil)
71
87
 
72
88
  def __repr__(self) -> str:
73
89
  interpolate_str = self.interpolation
@@ -274,11 +274,11 @@ def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int]
274
274
  _boxes[:, [1, 3]] *= h
275
275
  _boxes = _boxes.astype(np.int32)
276
276
  for box in _boxes.tolist():
277
- xmin, ymin, xmax, ymax = box # type: ignore[misc]
277
+ xmin, ymin, xmax, ymax = box
278
278
  image = cv2.rectangle(
279
279
  image,
280
- (xmin, ymin), # type: ignore[arg-type]
281
- (xmax, ymax), # type: ignore[arg-type]
280
+ (xmin, ymin),
281
+ (xmax, ymax),
282
282
  color=color if isinstance(color, tuple) else (0, 0, 255),
283
283
  thickness=2,
284
284
  )
@@ -9,32 +9,92 @@ __all__ = ["VOCABS"]
9
9
 
10
10
 
11
11
  VOCABS: dict[str, str] = {
12
- "digits": string.digits,
13
- "ascii_letters": string.ascii_letters,
14
- "punctuation": string.punctuation,
15
- "currency": "£€¥¢฿",
16
- "ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
17
- "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
18
- "persian_letters": "پچڢڤگ",
19
- "arabic_digits": "٠١٢٣٤٥٦٧٨٩",
12
+ # Arabic & Persian
20
13
  "arabic_diacritics": "ًٌٍَُِّْ",
14
+ "arabic_digits": "٠١٢٣٤٥٦٧٨٩",
15
+ "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
21
16
  "arabic_punctuation": "؟؛«»—",
22
- "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
23
- "hindi_digits": "०१२३४५६७८९",
24
- "hindi_punctuation": "।,?!:्ॐ॰॥",
25
- "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
17
+ "persian_letters": "پچڢڤگ",
18
+ # Bangla
26
19
  "bangla_digits": "০১২৩৪৫৬৭৮৯",
20
+ "bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
21
+ # Cyrillic
27
22
  "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
23
+ "russian_cyrillic_letters": "ёыэЁЫЭ",
24
+ "russian_signs": "ъЪ",
25
+ # Greek
26
+ "ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
27
+ # Gujarati
28
+ "gujarati_consonants": "ખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશસહળક્ષ",
29
+ "gujarati_digits": "૦૧૨૩૪૫૬૭૮૯",
30
+ "gujarati_punctuation": "૰ઽ◌ંઃ॥ૐ઼ઁ" + "૱",
31
+ "gujarati_vowels": "અઆઇઈઉઊઋએઐઓ",
32
+ # Hindi
33
+ "hindi_digits": "०१२३४५६७८९",
34
+ "hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
35
+ "hindi_punctuation": "।,?!:्ॐ॰॥",
36
+ # Hebrew
37
+ "hebrew_cantillations": "֑֖֛֢֣֤֥֦֧֪֚֭֮֒֓֔֕֗֘֙֜֝֞֟֠֡֨֩֫֬֯",
38
+ "hebrew_letters": "אבגדהוזחטיךכלםמןנסעףפץצקרשת",
39
+ "hebrew_specials": "ׯװױײיִﬞײַﬠﬡﬢﬣﬤﬥﬦﬧﬨ﬩שׁשׂשּׁשּׂאַאָאּבּגּדּהּוּזּטּיּךּכּלּמּנּסּףּפּצּקּרּשּתּוֹבֿכֿפֿﭏ",
40
+ "hebrew_punctuation": "ֽ־ֿ׀ׁׂ׃ׅׄ׆׳״",
41
+ "hebrew_vowels": "ְֱֲֳִֵֶַָׇֹֺֻ",
42
+ # Latin
43
+ "digits": string.digits,
44
+ "ascii_letters": string.ascii_letters,
45
+ "punctuation": string.punctuation,
46
+ "currency": "£€¥¢฿",
28
47
  }
29
48
 
49
+ # Latin & latin-dependent alphabets
30
50
  VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
31
51
  VOCABS["english"] = VOCABS["latin"] + "°" + VOCABS["currency"]
32
- VOCABS["legacy_french"] = VOCABS["latin"] + "°" + "àâéèêëîïôùûçÀÂÉÈËÎÏÔÙÛÇ" + VOCABS["currency"]
52
+
53
+ VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"
54
+
55
+ VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"
56
+
57
+ VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"
58
+
33
59
  VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"
60
+ VOCABS["legacy_french"] = VOCABS["latin"] + "°" + "àâéèêëîïôùûçÀÂÉÈËÎÏÔÙÛÇ" + VOCABS["currency"]
61
+
62
+ VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
63
+
64
+ VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
65
+
66
+ VOCABS["croatian"] = VOCABS["english"] + "ČčĆćĐ𩹮ž"
67
+
68
+ VOCABS["hebrew"] = (
69
+ VOCABS["english"]
70
+ + VOCABS["hebrew_letters"]
71
+ + VOCABS["hebrew_vowels"]
72
+ + VOCABS["hebrew_punctuation"]
73
+ + VOCABS["hebrew_cantillations"]
74
+ + VOCABS["hebrew_specials"]
75
+ + "₪"
76
+ )
77
+
78
+ VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"
79
+
80
+ VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"
81
+
82
+ VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"
83
+
34
84
  VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ"
85
+
35
86
  VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿"
36
- VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"
37
- VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
87
+
88
+ VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
89
+
90
+ VOCABS["vietnamese"] = (
91
+ VOCABS["english"]
92
+ + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựíìỉĩịýỳỷỹỵ"
93
+ + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰÍÌỈĨỊÝỲỶỸỴ"
94
+ )
95
+
96
+ # Non-latin alphabets.
97
+ # Arabic
38
98
  VOCABS["arabic"] = (
39
99
  VOCABS["digits"]
40
100
  + VOCABS["arabic_digits"]
@@ -44,24 +104,37 @@ VOCABS["arabic"] = (
44
104
  + VOCABS["arabic_punctuation"]
45
105
  + VOCABS["punctuation"]
46
106
  )
47
- VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"
48
- VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"
49
- VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"
50
- VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"
51
- VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"
52
- VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
53
- VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
54
- VOCABS["vietnamese"] = (
55
- VOCABS["english"]
56
- + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựíìỉĩịýỳỷỹỵ"
57
- + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰÍÌỈĨỊÝỲỶỸỴ"
107
+
108
+ # Bangla
109
+ VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
110
+
111
+ # Gujarati
112
+ VOCABS["gujarati"] = (
113
+ VOCABS["gujarati_vowels"]
114
+ + VOCABS["gujarati_consonants"]
115
+ + VOCABS["gujarati_digits"]
116
+ + VOCABS["gujarati_punctuation"]
117
+ + VOCABS["punctuation"]
58
118
  )
59
- VOCABS["hebrew"] = VOCABS["english"] + "אבגדהוזחטיכלמנסעפצקרשת" + "₪"
119
+
120
+ # Hindi
60
121
  VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
61
- VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
122
+
123
+ # Cyrillic
124
+ VOCABS["russian"] = (
125
+ VOCABS["generic_cyrillic_letters"]
126
+ + VOCABS["russian_cyrillic_letters"]
127
+ + VOCABS["russian_signs"]
128
+ + VOCABS["digits"]
129
+ + VOCABS["punctuation"]
130
+ + "₽"
131
+ )
132
+
62
133
  VOCABS["ukrainian"] = (
63
134
  VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
64
135
  )
136
+
137
+ # Multi-lingual
65
138
  VOCABS["multilingual"] = "".join(
66
139
  dict.fromkeys(
67
140
  VOCABS["french"]
@@ -69,6 +142,7 @@ VOCABS["multilingual"] = "".join(
69
142
  + VOCABS["spanish"]
70
143
  + VOCABS["german"]
71
144
  + VOCABS["czech"]
145
+ + VOCABS["croatian"]
72
146
  + VOCABS["polish"]
73
147
  + VOCABS["dutch"]
74
148
  + VOCABS["italian"]
@@ -0,0 +1 @@
1
+ __version__ = 'v0.6.3'
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: onnxtr
3
- Version: 0.6.2
3
+ Version: 0.6.3
4
4
  Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
5
5
  Author-email: Felix Dittrich <felixdittrich92@gmail.com>
6
6
  Maintainer: Felix Dittrich
@@ -229,7 +229,6 @@ Requires-Dist: numpy<3.0.0,>=1.16.0
229
229
  Requires-Dist: scipy<2.0.0,>=1.4.0
230
230
  Requires-Dist: pypdfium2<5.0.0,>=4.11.0
231
231
  Requires-Dist: pyclipper<2.0.0,>=1.2.0
232
- Requires-Dist: shapely<3.0.0,>=1.6.0
233
232
  Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
234
233
  Requires-Dist: langdetect<2.0.0,>=1.0.9
235
234
  Requires-Dist: huggingface-hub<1.0.0,>=0.23.0
@@ -264,6 +263,7 @@ Provides-Extra: testing
264
263
  Requires-Dist: pytest>=5.3.2; extra == "testing"
265
264
  Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
266
265
  Requires-Dist: requests>=2.20.0; extra == "testing"
266
+ Requires-Dist: pytest-memray>=1.7.0; extra == "testing"
267
267
  Provides-Extra: quality
268
268
  Requires-Dist: ruff>=0.1.5; extra == "quality"
269
269
  Requires-Dist: mypy>=0.812; extra == "quality"
@@ -277,9 +277,11 @@ Requires-Dist: mplcursors>=0.3; extra == "dev"
277
277
  Requires-Dist: pytest>=5.3.2; extra == "dev"
278
278
  Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
279
279
  Requires-Dist: requests>=2.20.0; extra == "dev"
280
+ Requires-Dist: pytest-memray>=1.7.0; extra == "dev"
280
281
  Requires-Dist: ruff>=0.1.5; extra == "dev"
281
282
  Requires-Dist: mypy>=0.812; extra == "dev"
282
283
  Requires-Dist: pre-commit>=2.17.0; extra == "dev"
284
+ Dynamic: license-file
283
285
 
284
286
  <p align="center">
285
287
  <img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
@@ -290,7 +292,8 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
290
292
  [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
291
293
  [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
292
294
  [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
293
- [![Pypi](https://img.shields.io/badge/pypi-v0.6.2-blue.svg)](https://pypi.org/project/OnnxTR/)
295
+ [![Socket Badge](https://socket.dev/api/badge/pypi/package/onnxtr/0.6.2?artifact_id=tar-gz)](https://socket.dev/pypi/package/onnxtr/overview/0.6.2/tar-gz)
296
+ [![Pypi](https://img.shields.io/badge/pypi-v0.6.3-blue.svg)](https://pypi.org/project/OnnxTR/)
294
297
  [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
295
298
  [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
296
299
  ![PyPI - Downloads](https://img.shields.io/pypi/dm/onnxtr)
@@ -2,7 +2,6 @@ numpy<3.0.0,>=1.16.0
2
2
  scipy<2.0.0,>=1.4.0
3
3
  pypdfium2<5.0.0,>=4.11.0
4
4
  pyclipper<2.0.0,>=1.2.0
5
- shapely<3.0.0,>=1.6.0
6
5
  rapidfuzz<4.0.0,>=3.0.0
7
6
  langdetect<2.0.0,>=1.0.9
8
7
  huggingface-hub<1.0.0,>=0.23.0
@@ -28,6 +27,7 @@ mplcursors>=0.3
28
27
  pytest>=5.3.2
29
28
  coverage[toml]>=4.5.4
30
29
  requests>=2.20.0
30
+ pytest-memray>=1.7.0
31
31
  ruff>=0.1.5
32
32
  mypy>=0.812
33
33
  pre-commit>=2.17.0
@@ -60,6 +60,7 @@ pre-commit>=2.17.0
60
60
  pytest>=5.3.2
61
61
  coverage[toml]>=4.5.4
62
62
  requests>=2.20.0
63
+ pytest-memray>=1.7.0
63
64
 
64
65
  [viz]
65
66
  matplotlib>=3.1.0
@@ -35,7 +35,6 @@ dependencies = [
35
35
  "scipy>=1.4.0,<2.0.0",
36
36
  "pypdfium2>=4.11.0,<5.0.0",
37
37
  "pyclipper>=1.2.0,<2.0.0",
38
- "shapely>=1.6.0,<3.0.0",
39
38
  "rapidfuzz>=3.0.0,<4.0.0",
40
39
  "langdetect>=1.0.9,<2.0.0",
41
40
  "huggingface-hub>=0.23.0,<1.0.0",
@@ -81,6 +80,7 @@ testing = [
81
80
  "pytest>=5.3.2",
82
81
  "coverage[toml]>=4.5.4",
83
82
  "requests>=2.20.0",
83
+ "pytest-memray>=1.7.0",
84
84
  ]
85
85
  quality = [
86
86
  "ruff>=0.1.5",
@@ -100,6 +100,7 @@ dev = [
100
100
  "pytest>=5.3.2",
101
101
  "coverage[toml]>=4.5.4",
102
102
  "requests>=2.20.0",
103
+ "pytest-memray>=1.7.0",
103
104
  # Quality
104
105
  "ruff>=0.1.5",
105
106
  "mypy>=0.812",
@@ -139,7 +140,6 @@ module = [
139
140
  "matplotlib.*",
140
141
  "numpy.*",
141
142
  "pyclipper.*",
142
- "shapely.*",
143
143
  "mplcursors.*",
144
144
  "defusedxml.*",
145
145
  "weasyprint.*",
@@ -9,7 +9,7 @@ from pathlib import Path
9
9
  from setuptools import setup
10
10
 
11
11
  PKG_NAME = "onnxtr"
12
- VERSION = os.getenv("BUILD_VERSION", "0.6.2a0")
12
+ VERSION = os.getenv("BUILD_VERSION", "0.6.3a0")
13
13
 
14
14
 
15
15
  if __name__ == "__main__":
@@ -1 +0,0 @@
1
- __version__ = 'v0.6.2'
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes