ocr-stringdist 1.0.0__cp310-cp310-win32.whl → 1.0.1__cp310-cp310-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ocr_stringdist/__init__.py +2 -0
- ocr_stringdist/_rust_stringdist.cp310-win32.pyd +0 -0
- ocr_stringdist/edit_operation.py +5 -2
- ocr_stringdist/levenshtein.py +2 -1
- {ocr_stringdist-1.0.0.dist-info → ocr_stringdist-1.0.1.dist-info}/METADATA +1 -1
- ocr_stringdist-1.0.1.dist-info/RECORD +13 -0
- ocr_stringdist-1.0.0.dist-info/RECORD +0 -13
- {ocr_stringdist-1.0.0.dist-info → ocr_stringdist-1.0.1.dist-info}/WHEEL +0 -0
- {ocr_stringdist-1.0.0.dist-info → ocr_stringdist-1.0.1.dist-info}/licenses/LICENSE +0 -0
ocr_stringdist/__init__.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from .default_ocr_distances import ocr_distance_map
|
|
2
|
+
from .edit_operation import EditOperation
|
|
2
3
|
from .learner import CostLearner
|
|
3
4
|
from .levenshtein import WeightedLevenshtein
|
|
4
5
|
from .matching import find_best_candidate
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"ocr_distance_map",
|
|
9
|
+
"EditOperation",
|
|
8
10
|
"CostLearner",
|
|
9
11
|
"WeightedLevenshtein",
|
|
10
12
|
"find_best_candidate",
|
|
Binary file
|
ocr_stringdist/edit_operation.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import Literal, Optional
|
|
1
|
+
from dataclasses import asdict, dataclass
|
|
2
|
+
from typing import Any, Literal, Optional
|
|
3
3
|
|
|
4
4
|
OperationType = Literal["substitute", "insert", "delete", "match"]
|
|
5
5
|
|
|
@@ -14,3 +14,6 @@ class EditOperation:
|
|
|
14
14
|
source_token: Optional[str]
|
|
15
15
|
target_token: Optional[str]
|
|
16
16
|
cost: float
|
|
17
|
+
|
|
18
|
+
def to_dict(self) -> dict[str, Any]:
|
|
19
|
+
return asdict(self)
|
ocr_stringdist/levenshtein.py
CHANGED
|
@@ -191,6 +191,7 @@ class WeightedLevenshtein:
|
|
|
191
191
|
For the counterpart, see :meth:`WeightedLevenshtein.to_dict`.
|
|
192
192
|
|
|
193
193
|
:param data: A dictionary with (not necessarily all of) the following keys:
|
|
194
|
+
|
|
194
195
|
- "substitution_costs": {"from": str, "to": str, "cost": float}
|
|
195
196
|
- "substitution_costs": dict[str, float]
|
|
196
197
|
- "deletion_costs": dict[str, float]
|
|
@@ -206,7 +207,7 @@ class WeightedLevenshtein:
|
|
|
206
207
|
|
|
207
208
|
return cls(
|
|
208
209
|
substitution_costs=sub_costs,
|
|
209
|
-
insertion_costs=data.get("
|
|
210
|
+
insertion_costs=data.get("insertion_costs"),
|
|
210
211
|
deletion_costs=data.get("deletion_costs"),
|
|
211
212
|
symmetric_substitution=data.get("symmetric_substitution", True),
|
|
212
213
|
default_substitution_cost=data.get("default_substitution_cost", 1.0),
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
ocr_stringdist-1.0.1.dist-info/METADATA,sha256=19qpQxHGCgNwcYJ0jg-Fv-_DWBcIJIub2cuXdSMSrNU,4043
|
|
2
|
+
ocr_stringdist-1.0.1.dist-info/WHEEL,sha256=5LC1_L8Ayj7J7rr28y5cm5MOS1WwrgwbELnkPUf-gvQ,92
|
|
3
|
+
ocr_stringdist-1.0.1.dist-info/licenses/LICENSE,sha256=3cNRiJag5vI0KMMDNf0oiaY4vg43rLxRszbMJs1GBoU,1092
|
|
4
|
+
ocr_stringdist/__init__.py,sha256=LIyod7R1MxJgqNsFSJp-b5r1oqZtQBFIXLnxRHOpUnY,360
|
|
5
|
+
ocr_stringdist/_rust_stringdist.cp310-win32.pyd,sha256=X6XqzhIo7y8T5GfY8L_cs6J8Z25ZZq5peDgXAmHQDD0,349184
|
|
6
|
+
ocr_stringdist/default_ocr_distances.py,sha256=vlhzQCCcE-D1xor5RvMW0oaMuL_HP_5Y7SO4ESkdb4w,1075
|
|
7
|
+
ocr_stringdist/edit_operation.py,sha256=9JwWfsurfeZ24rY7eri3WRpqckYauaXCs6C-ewMamDg,497
|
|
8
|
+
ocr_stringdist/learner.py,sha256=blzjifV0S-fjwzYP7iPQZm2iUuZKYqx9o787eB32tAk,10745
|
|
9
|
+
ocr_stringdist/levenshtein.py,sha256=hBu5XTl-ocmecqEpSmVt038B2cnd6pgr9eUZf3DK7Fc,10068
|
|
10
|
+
ocr_stringdist/matching.py,sha256=hM-_M0jpzaC84ekjkWw8qSZUljIfEY86kT8tWL7bq0s,3353
|
|
11
|
+
ocr_stringdist/protocols.py,sha256=Cat28DHGa53-b81J34RSiH6O7Hob8w-y8FcYlpeGLTM,274
|
|
12
|
+
ocr_stringdist/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
ocr_stringdist-1.0.1.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
ocr_stringdist-1.0.0.dist-info/METADATA,sha256=UQpqp6A67F89fMleGdVdNkIePwjHqPVze1QIOdu3etA,4043
|
|
2
|
-
ocr_stringdist-1.0.0.dist-info/WHEEL,sha256=5LC1_L8Ayj7J7rr28y5cm5MOS1WwrgwbELnkPUf-gvQ,92
|
|
3
|
-
ocr_stringdist-1.0.0.dist-info/licenses/LICENSE,sha256=3cNRiJag5vI0KMMDNf0oiaY4vg43rLxRszbMJs1GBoU,1092
|
|
4
|
-
ocr_stringdist/__init__.py,sha256=HsCF7QPJmFfJiB5kSnIBDSGsc1lbCddS6u3Ea0fCF0M,295
|
|
5
|
-
ocr_stringdist/_rust_stringdist.cp310-win32.pyd,sha256=BlUuV2vmwokWcighD_PuMZoOOLbMwMQEnrxLkT3cUJs,349184
|
|
6
|
-
ocr_stringdist/default_ocr_distances.py,sha256=vlhzQCCcE-D1xor5RvMW0oaMuL_HP_5Y7SO4ESkdb4w,1075
|
|
7
|
-
ocr_stringdist/edit_operation.py,sha256=8yzz4BUBhqowMwUVWYpkXGc_0GwwGahCu2e55qeLAv4,411
|
|
8
|
-
ocr_stringdist/learner.py,sha256=blzjifV0S-fjwzYP7iPQZm2iUuZKYqx9o787eB32tAk,10745
|
|
9
|
-
ocr_stringdist/levenshtein.py,sha256=e1RQ4bFW-8yV24ajAh7tfqj0sK2W_p0j-vLH15FgPPU,10069
|
|
10
|
-
ocr_stringdist/matching.py,sha256=hM-_M0jpzaC84ekjkWw8qSZUljIfEY86kT8tWL7bq0s,3353
|
|
11
|
-
ocr_stringdist/protocols.py,sha256=Cat28DHGa53-b81J34RSiH6O7Hob8w-y8FcYlpeGLTM,274
|
|
12
|
-
ocr_stringdist/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
-
ocr_stringdist-1.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|