onnxtr 0.6.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onnxtr-0.6.2 → onnxtr-0.6.3}/PKG-INFO +7 -4
- {onnxtr-0.6.2 → onnxtr-0.6.3}/README.md +2 -1
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/elements.py +2 -2
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/builder.py +1 -1
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/postprocessor/base.py +2 -4
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/engine.py +4 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/master.py +1 -1
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/transforms/base.py +34 -18
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/visualization.py +3 -3
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/vocabs.py +102 -28
- onnxtr-0.6.3/onnxtr/version.py +1 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/PKG-INFO +7 -4
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/requires.txt +2 -1
- {onnxtr-0.6.2 → onnxtr-0.6.3}/pyproject.toml +2 -2
- {onnxtr-0.6.2 → onnxtr-0.6.3}/setup.py +1 -1
- onnxtr-0.6.2/onnxtr/version.py +0 -1
- {onnxtr-0.6.2 → onnxtr-0.6.3}/LICENSE +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/contrib/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/contrib/artefacts.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/contrib/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/file_utils.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/html.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/image.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/pdf.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/io/reader.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/_utils.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/models/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/models/mobilenet.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/predictor/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/predictor/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/classification/zoo.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/_utils/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/_utils/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/core.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/differentiable_binarization.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/fast.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/models/linknet.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/postprocessor/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/predictor/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/predictor/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/detection/zoo.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/factory/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/factory/hub.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/predictor/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/predictor/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/predictor/predictor.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/preprocessor/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/preprocessor/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/core.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/crnn.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/parseq.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/sar.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/models/vitstr.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/predictor/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/predictor/_utils.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/predictor/base.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/utils.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/recognition/zoo.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/models/zoo.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/py.typed +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/transforms/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/__init__.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/common_types.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/data.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/fonts.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/geometry.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/multithreading.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/reconstitution.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr/utils/repr.py +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/SOURCES.txt +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/dependency_links.txt +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/top_level.txt +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/onnxtr.egg-info/zip-safe +0 -0
- {onnxtr-0.6.2 → onnxtr-0.6.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -229,7 +229,6 @@ Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
230
|
Requires-Dist: pypdfium2<5.0.0,>=4.11.0
|
|
231
231
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
232
|
-
Requires-Dist: shapely<3.0.0,>=1.6.0
|
|
233
232
|
Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
|
|
234
233
|
Requires-Dist: langdetect<2.0.0,>=1.0.9
|
|
235
234
|
Requires-Dist: huggingface-hub<1.0.0,>=0.23.0
|
|
@@ -264,6 +263,7 @@ Provides-Extra: testing
|
|
|
264
263
|
Requires-Dist: pytest>=5.3.2; extra == "testing"
|
|
265
264
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
|
|
266
265
|
Requires-Dist: requests>=2.20.0; extra == "testing"
|
|
266
|
+
Requires-Dist: pytest-memray>=1.7.0; extra == "testing"
|
|
267
267
|
Provides-Extra: quality
|
|
268
268
|
Requires-Dist: ruff>=0.1.5; extra == "quality"
|
|
269
269
|
Requires-Dist: mypy>=0.812; extra == "quality"
|
|
@@ -277,9 +277,11 @@ Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
|
277
277
|
Requires-Dist: pytest>=5.3.2; extra == "dev"
|
|
278
278
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
|
|
279
279
|
Requires-Dist: requests>=2.20.0; extra == "dev"
|
|
280
|
+
Requires-Dist: pytest-memray>=1.7.0; extra == "dev"
|
|
280
281
|
Requires-Dist: ruff>=0.1.5; extra == "dev"
|
|
281
282
|
Requires-Dist: mypy>=0.812; extra == "dev"
|
|
282
283
|
Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
284
|
+
Dynamic: license-file
|
|
283
285
|
|
|
284
286
|
<p align="center">
|
|
285
287
|
<img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
|
|
@@ -290,7 +292,8 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
290
292
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
291
293
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
292
294
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
293
|
-
[](https://socket.dev/pypi/package/onnxtr/overview/0.6.2/tar-gz)
|
|
296
|
+
[](https://pypi.org/project/OnnxTR/)
|
|
294
297
|
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
295
298
|
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
296
299
|

|
|
@@ -7,7 +7,8 @@
|
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
10
|
-
[](https://socket.dev/pypi/package/onnxtr/overview/0.6.2/tar-gz)
|
|
11
|
+
[](https://pypi.org/project/OnnxTR/)
|
|
11
12
|
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
12
13
|
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
13
14
|

|
|
@@ -331,7 +331,7 @@ class Page(Element):
|
|
|
331
331
|
)
|
|
332
332
|
# Create the body
|
|
333
333
|
body = SubElement(page_hocr, "body")
|
|
334
|
-
SubElement(
|
|
334
|
+
page_div = SubElement(
|
|
335
335
|
body,
|
|
336
336
|
"div",
|
|
337
337
|
attrib={
|
|
@@ -346,7 +346,7 @@ class Page(Element):
|
|
|
346
346
|
raise TypeError("XML export is only available for straight bounding boxes for now.")
|
|
347
347
|
(xmin, ymin), (xmax, ymax) = block.geometry
|
|
348
348
|
block_div = SubElement(
|
|
349
|
-
|
|
349
|
+
page_div,
|
|
350
350
|
"div",
|
|
351
351
|
attrib={
|
|
352
352
|
"class": "ocr_carea",
|
|
@@ -74,7 +74,7 @@ class DocumentBuilder(NestedObject):
|
|
|
74
74
|
"""
|
|
75
75
|
lines = []
|
|
76
76
|
# Sort words horizontally
|
|
77
|
-
word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()]
|
|
77
|
+
word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()]
|
|
78
78
|
|
|
79
79
|
# Eventually split line horizontally
|
|
80
80
|
if len(word_idcs) < 2:
|
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
import cv2
|
|
10
10
|
import numpy as np
|
|
11
11
|
import pyclipper
|
|
12
|
-
from shapely.geometry import Polygon
|
|
13
12
|
|
|
14
13
|
from ..core import DetectionPostProcessor
|
|
15
14
|
|
|
@@ -54,9 +53,8 @@ class GeneralDetectionPostProcessor(DetectionPostProcessor):
|
|
|
54
53
|
area = (rect[1][0] + 1) * (1 + rect[1][1])
|
|
55
54
|
length = 2 * (rect[1][0] + rect[1][1]) + 2
|
|
56
55
|
else:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
length = poly.length
|
|
56
|
+
area = cv2.contourArea(points)
|
|
57
|
+
length = cv2.arcLength(points, closed=True)
|
|
60
58
|
distance = area * self.unclip_ratio / length # compute distance to expand polygon
|
|
61
59
|
offset = pyclipper.PyclipperOffset()
|
|
62
60
|
offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
import os
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
10
|
import numpy as np
|
|
@@ -15,6 +16,9 @@ from onnxruntime import (
|
|
|
15
16
|
get_available_providers,
|
|
16
17
|
get_device,
|
|
17
18
|
)
|
|
19
|
+
from onnxruntime.capi._pybind_state import set_default_logger_severity
|
|
20
|
+
|
|
21
|
+
set_default_logger_severity(int(os.getenv("ORT_LOG_SEVERITY_LEVEL", 4)))
|
|
18
22
|
|
|
19
23
|
from onnxtr.utils.data import download_from_url
|
|
20
24
|
from onnxtr.utils.geometry import shape_translate
|
|
@@ -106,7 +106,7 @@ class MASTERPostProcessor(RecognitionPostProcessor):
|
|
|
106
106
|
"".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
|
|
107
107
|
]
|
|
108
108
|
|
|
109
|
-
return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))
|
|
109
|
+
return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
def _master(
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
|
|
5
5
|
|
|
6
6
|
|
|
7
|
+
import math
|
|
8
|
+
|
|
7
9
|
import numpy as np
|
|
8
10
|
from PIL import Image, ImageOps
|
|
9
11
|
|
|
@@ -37,37 +39,51 @@ class Resize:
|
|
|
37
39
|
raise AssertionError("size should be either a tuple or an int")
|
|
38
40
|
|
|
39
41
|
def __call__(self, img: np.ndarray) -> np.ndarray:
|
|
40
|
-
|
|
41
|
-
|
|
42
|
+
if img.dtype != np.uint8:
|
|
43
|
+
img_pil = Image.fromarray((img * 255).clip(0, 255).astype(np.uint8))
|
|
44
|
+
else:
|
|
45
|
+
img_pil = Image.fromarray(img)
|
|
46
|
+
|
|
42
47
|
sh, sw = self.size
|
|
48
|
+
w, h = img_pil.size
|
|
43
49
|
|
|
44
50
|
if not self.preserve_aspect_ratio:
|
|
45
|
-
|
|
51
|
+
img_resized_pil = img_pil.resize((sw, sh), resample=self.interpolation)
|
|
52
|
+
return np.array(img_resized_pil)
|
|
46
53
|
|
|
47
54
|
actual_ratio = h / w
|
|
48
55
|
target_ratio = sh / sw
|
|
49
56
|
|
|
50
|
-
if target_ratio == actual_ratio:
|
|
51
|
-
return np.array(Image.fromarray(img).resize((sw, sh), resample=self.interpolation))
|
|
52
|
-
|
|
53
57
|
if actual_ratio > target_ratio:
|
|
54
|
-
|
|
58
|
+
new_h = sh
|
|
59
|
+
new_w = max(int(sh / actual_ratio), 1)
|
|
55
60
|
else:
|
|
56
|
-
|
|
61
|
+
new_w = sw
|
|
62
|
+
new_h = max(int(sw * actual_ratio), 1)
|
|
57
63
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
64
|
+
img_resized_pil = img_pil.resize((new_w, new_h), resample=self.interpolation)
|
|
65
|
+
|
|
66
|
+
delta_w = sw - new_w
|
|
67
|
+
delta_h = sh - new_h
|
|
62
68
|
|
|
63
69
|
if self.symmetric_pad:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
70
|
+
# Symmetric padding
|
|
71
|
+
pad_left = math.ceil(delta_w / 2)
|
|
72
|
+
pad_right = math.floor(delta_w / 2)
|
|
73
|
+
pad_top = math.ceil(delta_h / 2)
|
|
74
|
+
pad_bottom = math.floor(delta_h / 2)
|
|
75
|
+
else:
|
|
76
|
+
# Asymmetric padding
|
|
77
|
+
pad_left, pad_top = 0, 0
|
|
78
|
+
pad_right, pad_bottom = delta_w, delta_h
|
|
79
|
+
|
|
80
|
+
img_padded_pil = ImageOps.expand(
|
|
81
|
+
img_resized_pil,
|
|
82
|
+
border=(pad_left, pad_top, pad_right, pad_bottom),
|
|
83
|
+
fill=0,
|
|
84
|
+
)
|
|
68
85
|
|
|
69
|
-
|
|
70
|
-
return np.array(img_resized)
|
|
86
|
+
return np.array(img_padded_pil)
|
|
71
87
|
|
|
72
88
|
def __repr__(self) -> str:
|
|
73
89
|
interpolate_str = self.interpolation
|
|
@@ -274,11 +274,11 @@ def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int]
|
|
|
274
274
|
_boxes[:, [1, 3]] *= h
|
|
275
275
|
_boxes = _boxes.astype(np.int32)
|
|
276
276
|
for box in _boxes.tolist():
|
|
277
|
-
xmin, ymin, xmax, ymax = box
|
|
277
|
+
xmin, ymin, xmax, ymax = box
|
|
278
278
|
image = cv2.rectangle(
|
|
279
279
|
image,
|
|
280
|
-
(xmin, ymin),
|
|
281
|
-
(xmax, ymax),
|
|
280
|
+
(xmin, ymin),
|
|
281
|
+
(xmax, ymax),
|
|
282
282
|
color=color if isinstance(color, tuple) else (0, 0, 255),
|
|
283
283
|
thickness=2,
|
|
284
284
|
)
|
|
@@ -9,32 +9,92 @@ __all__ = ["VOCABS"]
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
VOCABS: dict[str, str] = {
|
|
12
|
-
|
|
13
|
-
"ascii_letters": string.ascii_letters,
|
|
14
|
-
"punctuation": string.punctuation,
|
|
15
|
-
"currency": "£€¥¢฿",
|
|
16
|
-
"ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
|
|
17
|
-
"arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
|
|
18
|
-
"persian_letters": "پچڢڤگ",
|
|
19
|
-
"arabic_digits": "٠١٢٣٤٥٦٧٨٩",
|
|
12
|
+
# Arabic & Persian
|
|
20
13
|
"arabic_diacritics": "ًٌٍَُِّْ",
|
|
14
|
+
"arabic_digits": "٠١٢٣٤٥٦٧٨٩",
|
|
15
|
+
"arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىي",
|
|
21
16
|
"arabic_punctuation": "؟؛«»—",
|
|
22
|
-
"
|
|
23
|
-
|
|
24
|
-
"hindi_punctuation": "।,?!:्ॐ॰॥",
|
|
25
|
-
"bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
|
|
17
|
+
"persian_letters": "پچڢڤگ",
|
|
18
|
+
# Bangla
|
|
26
19
|
"bangla_digits": "০১২৩৪৫৬৭৮৯",
|
|
20
|
+
"bangla_letters": "অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহ়ঽািীুূৃেৈোৌ্ৎংঃঁ",
|
|
21
|
+
# Cyrillic
|
|
27
22
|
"generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
|
|
23
|
+
"russian_cyrillic_letters": "ёыэЁЫЭ",
|
|
24
|
+
"russian_signs": "ъЪ",
|
|
25
|
+
# Greek
|
|
26
|
+
"ancient_greek": "αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
|
|
27
|
+
# Gujarati
|
|
28
|
+
"gujarati_consonants": "ખગઘચછજઝઞટઠડઢણતથદધનપફબભમયરલવશસહળક્ષ",
|
|
29
|
+
"gujarati_digits": "૦૧૨૩૪૫૬૭૮૯",
|
|
30
|
+
"gujarati_punctuation": "૰ઽ◌ંઃ॥ૐ઼ઁ" + "૱",
|
|
31
|
+
"gujarati_vowels": "અઆઇઈઉઊઋએઐઓ",
|
|
32
|
+
# Hindi
|
|
33
|
+
"hindi_digits": "०१२३४५६७८९",
|
|
34
|
+
"hindi_letters": "अआइईउऊऋॠऌॡएऐओऔंःकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह",
|
|
35
|
+
"hindi_punctuation": "।,?!:्ॐ॰॥",
|
|
36
|
+
# Hebrew
|
|
37
|
+
"hebrew_cantillations": "֑֖֛֢֣֤֥֦֧֪֚֭֮֒֓֔֕֗֘֙֜֝֞֟֠֡֨֩֫֬֯",
|
|
38
|
+
"hebrew_letters": "אבגדהוזחטיךכלםמןנסעףפץצקרשת",
|
|
39
|
+
"hebrew_specials": "ׯװױײיִﬞײַﬠﬡﬢﬣﬤﬥﬦﬧﬨ﬩שׁשׂשּׁשּׂאַאָאּבּגּדּהּוּזּטּיּךּכּלּמּנּסּףּפּצּקּרּשּתּוֹבֿכֿפֿﭏ",
|
|
40
|
+
"hebrew_punctuation": "ֽ־ֿ׀ׁׂ׃ׅׄ׆׳״",
|
|
41
|
+
"hebrew_vowels": "ְֱֲֳִֵֶַָׇֹֺֻ",
|
|
42
|
+
# Latin
|
|
43
|
+
"digits": string.digits,
|
|
44
|
+
"ascii_letters": string.ascii_letters,
|
|
45
|
+
"punctuation": string.punctuation,
|
|
46
|
+
"currency": "£€¥¢฿",
|
|
28
47
|
}
|
|
29
48
|
|
|
49
|
+
# Latin & latin-dependent alphabets
|
|
30
50
|
VOCABS["latin"] = VOCABS["digits"] + VOCABS["ascii_letters"] + VOCABS["punctuation"]
|
|
31
51
|
VOCABS["english"] = VOCABS["latin"] + "°" + VOCABS["currency"]
|
|
32
|
-
|
|
52
|
+
|
|
53
|
+
VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"
|
|
54
|
+
|
|
55
|
+
VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"
|
|
56
|
+
|
|
57
|
+
VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"
|
|
58
|
+
|
|
33
59
|
VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"
|
|
60
|
+
VOCABS["legacy_french"] = VOCABS["latin"] + "°" + "àâéèêëîïôùûçÀÂÉÈËÎÏÔÙÛÇ" + VOCABS["currency"]
|
|
61
|
+
|
|
62
|
+
VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
|
|
63
|
+
|
|
64
|
+
VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
|
|
65
|
+
|
|
66
|
+
VOCABS["croatian"] = VOCABS["english"] + "ČčĆćĐ𩹮ž"
|
|
67
|
+
|
|
68
|
+
VOCABS["hebrew"] = (
|
|
69
|
+
VOCABS["english"]
|
|
70
|
+
+ VOCABS["hebrew_letters"]
|
|
71
|
+
+ VOCABS["hebrew_vowels"]
|
|
72
|
+
+ VOCABS["hebrew_punctuation"]
|
|
73
|
+
+ VOCABS["hebrew_cantillations"]
|
|
74
|
+
+ VOCABS["hebrew_specials"]
|
|
75
|
+
+ "₪"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"
|
|
79
|
+
|
|
80
|
+
VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"
|
|
81
|
+
|
|
82
|
+
VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"
|
|
83
|
+
|
|
34
84
|
VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ"
|
|
85
|
+
|
|
35
86
|
VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿"
|
|
36
|
-
|
|
37
|
-
VOCABS["
|
|
87
|
+
|
|
88
|
+
VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
|
|
89
|
+
|
|
90
|
+
VOCABS["vietnamese"] = (
|
|
91
|
+
VOCABS["english"]
|
|
92
|
+
+ "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựíìỉĩịýỳỷỹỵ"
|
|
93
|
+
+ "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰÍÌỈĨỊÝỲỶỸỴ"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Non-latin alphabets.
|
|
97
|
+
# Arabic
|
|
38
98
|
VOCABS["arabic"] = (
|
|
39
99
|
VOCABS["digits"]
|
|
40
100
|
+ VOCABS["arabic_digits"]
|
|
@@ -44,24 +104,37 @@ VOCABS["arabic"] = (
|
|
|
44
104
|
+ VOCABS["arabic_punctuation"]
|
|
45
105
|
+ VOCABS["punctuation"]
|
|
46
106
|
)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
VOCABS["
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
VOCABS["
|
|
53
|
-
VOCABS["
|
|
54
|
-
VOCABS["
|
|
55
|
-
VOCABS["
|
|
56
|
-
+ "
|
|
57
|
-
+ "
|
|
107
|
+
|
|
108
|
+
# Bangla
|
|
109
|
+
VOCABS["bangla"] = VOCABS["bangla_letters"] + VOCABS["bangla_digits"]
|
|
110
|
+
|
|
111
|
+
# Gujarati
|
|
112
|
+
VOCABS["gujarati"] = (
|
|
113
|
+
VOCABS["gujarati_vowels"]
|
|
114
|
+
+ VOCABS["gujarati_consonants"]
|
|
115
|
+
+ VOCABS["gujarati_digits"]
|
|
116
|
+
+ VOCABS["gujarati_punctuation"]
|
|
117
|
+
+ VOCABS["punctuation"]
|
|
58
118
|
)
|
|
59
|
-
|
|
119
|
+
|
|
120
|
+
# Hindi
|
|
60
121
|
VOCABS["hindi"] = VOCABS["hindi_letters"] + VOCABS["hindi_digits"] + VOCABS["hindi_punctuation"]
|
|
61
|
-
|
|
122
|
+
|
|
123
|
+
# Cyrillic
|
|
124
|
+
VOCABS["russian"] = (
|
|
125
|
+
VOCABS["generic_cyrillic_letters"]
|
|
126
|
+
+ VOCABS["russian_cyrillic_letters"]
|
|
127
|
+
+ VOCABS["russian_signs"]
|
|
128
|
+
+ VOCABS["digits"]
|
|
129
|
+
+ VOCABS["punctuation"]
|
|
130
|
+
+ "₽"
|
|
131
|
+
)
|
|
132
|
+
|
|
62
133
|
VOCABS["ukrainian"] = (
|
|
63
134
|
VOCABS["generic_cyrillic_letters"] + VOCABS["digits"] + VOCABS["punctuation"] + VOCABS["currency"] + "ґіїєҐІЇЄ₴"
|
|
64
135
|
)
|
|
136
|
+
|
|
137
|
+
# Multi-lingual
|
|
65
138
|
VOCABS["multilingual"] = "".join(
|
|
66
139
|
dict.fromkeys(
|
|
67
140
|
VOCABS["french"]
|
|
@@ -69,6 +142,7 @@ VOCABS["multilingual"] = "".join(
|
|
|
69
142
|
+ VOCABS["spanish"]
|
|
70
143
|
+ VOCABS["german"]
|
|
71
144
|
+ VOCABS["czech"]
|
|
145
|
+
+ VOCABS["croatian"]
|
|
72
146
|
+ VOCABS["polish"]
|
|
73
147
|
+ VOCABS["dutch"]
|
|
74
148
|
+ VOCABS["italian"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.6.3'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: onnxtr
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.3
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents.
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -229,7 +229,6 @@ Requires-Dist: numpy<3.0.0,>=1.16.0
|
|
|
229
229
|
Requires-Dist: scipy<2.0.0,>=1.4.0
|
|
230
230
|
Requires-Dist: pypdfium2<5.0.0,>=4.11.0
|
|
231
231
|
Requires-Dist: pyclipper<2.0.0,>=1.2.0
|
|
232
|
-
Requires-Dist: shapely<3.0.0,>=1.6.0
|
|
233
232
|
Requires-Dist: rapidfuzz<4.0.0,>=3.0.0
|
|
234
233
|
Requires-Dist: langdetect<2.0.0,>=1.0.9
|
|
235
234
|
Requires-Dist: huggingface-hub<1.0.0,>=0.23.0
|
|
@@ -264,6 +263,7 @@ Provides-Extra: testing
|
|
|
264
263
|
Requires-Dist: pytest>=5.3.2; extra == "testing"
|
|
265
264
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "testing"
|
|
266
265
|
Requires-Dist: requests>=2.20.0; extra == "testing"
|
|
266
|
+
Requires-Dist: pytest-memray>=1.7.0; extra == "testing"
|
|
267
267
|
Provides-Extra: quality
|
|
268
268
|
Requires-Dist: ruff>=0.1.5; extra == "quality"
|
|
269
269
|
Requires-Dist: mypy>=0.812; extra == "quality"
|
|
@@ -277,9 +277,11 @@ Requires-Dist: mplcursors>=0.3; extra == "dev"
|
|
|
277
277
|
Requires-Dist: pytest>=5.3.2; extra == "dev"
|
|
278
278
|
Requires-Dist: coverage[toml]>=4.5.4; extra == "dev"
|
|
279
279
|
Requires-Dist: requests>=2.20.0; extra == "dev"
|
|
280
|
+
Requires-Dist: pytest-memray>=1.7.0; extra == "dev"
|
|
280
281
|
Requires-Dist: ruff>=0.1.5; extra == "dev"
|
|
281
282
|
Requires-Dist: mypy>=0.812; extra == "dev"
|
|
282
283
|
Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
284
|
+
Dynamic: license-file
|
|
283
285
|
|
|
284
286
|
<p align="center">
|
|
285
287
|
<img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
|
|
@@ -290,7 +292,8 @@ Requires-Dist: pre-commit>=2.17.0; extra == "dev"
|
|
|
290
292
|
[](https://codecov.io/gh/felixdittrich92/OnnxTR)
|
|
291
293
|
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
292
294
|
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
|
|
293
|
-
[](https://socket.dev/pypi/package/onnxtr/overview/0.6.2/tar-gz)
|
|
296
|
+
[](https://pypi.org/project/OnnxTR/)
|
|
294
297
|
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
|
|
295
298
|
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
|
|
296
299
|

|
|
@@ -2,7 +2,6 @@ numpy<3.0.0,>=1.16.0
|
|
|
2
2
|
scipy<2.0.0,>=1.4.0
|
|
3
3
|
pypdfium2<5.0.0,>=4.11.0
|
|
4
4
|
pyclipper<2.0.0,>=1.2.0
|
|
5
|
-
shapely<3.0.0,>=1.6.0
|
|
6
5
|
rapidfuzz<4.0.0,>=3.0.0
|
|
7
6
|
langdetect<2.0.0,>=1.0.9
|
|
8
7
|
huggingface-hub<1.0.0,>=0.23.0
|
|
@@ -28,6 +27,7 @@ mplcursors>=0.3
|
|
|
28
27
|
pytest>=5.3.2
|
|
29
28
|
coverage[toml]>=4.5.4
|
|
30
29
|
requests>=2.20.0
|
|
30
|
+
pytest-memray>=1.7.0
|
|
31
31
|
ruff>=0.1.5
|
|
32
32
|
mypy>=0.812
|
|
33
33
|
pre-commit>=2.17.0
|
|
@@ -60,6 +60,7 @@ pre-commit>=2.17.0
|
|
|
60
60
|
pytest>=5.3.2
|
|
61
61
|
coverage[toml]>=4.5.4
|
|
62
62
|
requests>=2.20.0
|
|
63
|
+
pytest-memray>=1.7.0
|
|
63
64
|
|
|
64
65
|
[viz]
|
|
65
66
|
matplotlib>=3.1.0
|
|
@@ -35,7 +35,6 @@ dependencies = [
|
|
|
35
35
|
"scipy>=1.4.0,<2.0.0",
|
|
36
36
|
"pypdfium2>=4.11.0,<5.0.0",
|
|
37
37
|
"pyclipper>=1.2.0,<2.0.0",
|
|
38
|
-
"shapely>=1.6.0,<3.0.0",
|
|
39
38
|
"rapidfuzz>=3.0.0,<4.0.0",
|
|
40
39
|
"langdetect>=1.0.9,<2.0.0",
|
|
41
40
|
"huggingface-hub>=0.23.0,<1.0.0",
|
|
@@ -81,6 +80,7 @@ testing = [
|
|
|
81
80
|
"pytest>=5.3.2",
|
|
82
81
|
"coverage[toml]>=4.5.4",
|
|
83
82
|
"requests>=2.20.0",
|
|
83
|
+
"pytest-memray>=1.7.0",
|
|
84
84
|
]
|
|
85
85
|
quality = [
|
|
86
86
|
"ruff>=0.1.5",
|
|
@@ -100,6 +100,7 @@ dev = [
|
|
|
100
100
|
"pytest>=5.3.2",
|
|
101
101
|
"coverage[toml]>=4.5.4",
|
|
102
102
|
"requests>=2.20.0",
|
|
103
|
+
"pytest-memray>=1.7.0",
|
|
103
104
|
# Quality
|
|
104
105
|
"ruff>=0.1.5",
|
|
105
106
|
"mypy>=0.812",
|
|
@@ -139,7 +140,6 @@ module = [
|
|
|
139
140
|
"matplotlib.*",
|
|
140
141
|
"numpy.*",
|
|
141
142
|
"pyclipper.*",
|
|
142
|
-
"shapely.*",
|
|
143
143
|
"mplcursors.*",
|
|
144
144
|
"defusedxml.*",
|
|
145
145
|
"weasyprint.*",
|
onnxtr-0.6.2/onnxtr/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.6.2'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|