ocr-stringdist 0.0.3__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/.github/workflows/CI.yml +0 -1
  2. ocr_stringdist-0.0.5/.github/workflows/docs.yml +70 -0
  3. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/.gitignore +0 -1
  4. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/Cargo.lock +2 -66
  5. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/Cargo.toml +1 -3
  6. ocr_stringdist-0.0.5/Justfile +14 -0
  7. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/PKG-INFO +28 -5
  8. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/README.md +26 -3
  9. ocr_stringdist-0.0.5/docs/Makefile +20 -0
  10. ocr_stringdist-0.0.5/docs/make.bat +35 -0
  11. ocr_stringdist-0.0.5/docs/source/api/index.rst +18 -0
  12. ocr_stringdist-0.0.5/docs/source/conf.py +40 -0
  13. ocr_stringdist-0.0.5/docs/source/index.rst +10 -0
  14. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/example.py +17 -11
  15. ocr_stringdist-0.0.5/mypy.ini +137 -0
  16. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/pyproject.toml +16 -1
  17. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/python/ocr_stringdist/__init__.py +4 -3
  18. ocr_stringdist-0.0.5/python/ocr_stringdist/matching.py +83 -0
  19. ocr_stringdist-0.0.5/ruff.toml +88 -0
  20. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/src/rust_stringdist.rs +3 -8
  21. ocr_stringdist-0.0.5/src/weighted_levenshtein.rs +322 -0
  22. ocr_stringdist-0.0.5/tests/test_matching.py +39 -0
  23. ocr_stringdist-0.0.5/tests/test_ocr_stringdist.py +106 -0
  24. ocr_stringdist-0.0.5/uv.lock +801 -0
  25. ocr_stringdist-0.0.3/Justfile +0 -12
  26. ocr_stringdist-0.0.3/src/weighted_levenshtein.rs +0 -140
  27. ocr_stringdist-0.0.3/tests/test_ocr_stringdist.py +0 -5
  28. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/LICENSE +0 -0
  29. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/python/ocr_stringdist/default_ocr_distances.py +0 -0
  30. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/python/ocr_stringdist/py.typed +0 -0
  31. {ocr_stringdist-0.0.3 → ocr_stringdist-0.0.5}/src/lib.rs +0 -0
@@ -1,4 +1,3 @@
1
- # This file was edited manually to add
2
1
  # The original was autogenerated by maturin v0.14.15
3
2
  on:
4
3
  push:
@@ -0,0 +1,70 @@
1
+ name: Deploy Documentation to Pages
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ workflow_dispatch: # Allows manual triggering from the Actions tab
8
+
9
+ # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
10
+ permissions:
11
+ contents: read
12
+ pages: write
13
+ id-token: write
14
+
15
+ # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
16
+ # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
17
+ concurrency:
18
+ group: "pages"
19
+ cancel-in-progress: false
20
+
21
+ jobs:
22
+ build:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - name: Checkout repository
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Set up Python
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: '3.12'
32
+
33
+ - name: Install uv
34
+ run: curl -LsSf https://astral.sh/uv/install.sh | sh
35
+
36
+ - name: Create virtual environment
37
+ run: uv venv
38
+
39
+ - name: Install dependencies
40
+ run: uv sync --group docs
41
+
42
+ - name: Build Sphinx documentation
43
+ run: |
44
+ uv run make -C docs html
45
+ # Add a .nojekyll file to the build output directory to prevent
46
+ # GitHub Pages from ignoring files that start with an underscore
47
+ # (like Sphinx's _static and _images directories).
48
+ touch docs/build/html/.nojekyll
49
+
50
+ - name: Setup Pages
51
+ uses: actions/configure-pages@v4
52
+
53
+ - name: Upload artifact
54
+ uses: actions/upload-pages-artifact@v3
55
+ with:
56
+ # Upload entire directory. GitHub Pages expects index.html at the root.
57
+ path: './docs/build/html'
58
+
59
+ deploy:
60
+ environment:
61
+ name: github-pages
62
+ url: ${{ steps.deployment.outputs.page_url }}
63
+ runs-on: ubuntu-latest
64
+ needs: build
65
+ steps:
66
+ - name: Deploy to GitHub Pages
67
+ id: deployment
68
+ uses: actions/deploy-pages@v4
69
+ # This action automatically downloads the artifact uploaded by
70
+ # upload-pages-artifact and deploys it to GitHub Pages.
@@ -15,7 +15,6 @@ _build/
15
15
  wheelhouse/
16
16
  site/
17
17
  target/
18
- Cargo.lock
19
18
  .venv
20
19
  .vscode
21
20
 
@@ -1,19 +1,6 @@
1
1
  # This file is automatically @generated by Cargo.
2
2
  # It is not intended for manual editing.
3
- version = 4
4
-
5
- [[package]]
6
- name = "ahash"
7
- version = "0.8.11"
8
- source = "registry+https://github.com/rust-lang/crates.io-index"
9
- checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
10
- dependencies = [
11
- "cfg-if",
12
- "getrandom",
13
- "once_cell",
14
- "version_check",
15
- "zerocopy",
16
- ]
3
+ version = 3
17
4
 
18
5
  [[package]]
19
6
  name = "autocfg"
@@ -27,17 +14,6 @@ version = "1.0.0"
27
14
  source = "registry+https://github.com/rust-lang/crates.io-index"
28
15
  checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
29
16
 
30
- [[package]]
31
- name = "getrandom"
32
- version = "0.2.15"
33
- source = "registry+https://github.com/rust-lang/crates.io-index"
34
- checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
35
- dependencies = [
36
- "cfg-if",
37
- "libc",
38
- "wasi",
39
- ]
40
-
41
17
  [[package]]
42
18
  name = "heck"
43
19
  version = "0.5.0"
@@ -67,11 +43,9 @@ dependencies = [
67
43
 
68
44
  [[package]]
69
45
  name = "ocr_stringdist"
70
- version = "0.0.3"
46
+ version = "0.0.5"
71
47
  dependencies = [
72
- "ahash",
73
48
  "pyo3",
74
- "smallvec",
75
49
  ]
76
50
 
77
51
  [[package]]
@@ -167,12 +141,6 @@ dependencies = [
167
141
  "proc-macro2",
168
142
  ]
169
143
 
170
- [[package]]
171
- name = "smallvec"
172
- version = "1.15.0"
173
- source = "registry+https://github.com/rust-lang/crates.io-index"
174
- checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9"
175
-
176
144
  [[package]]
177
145
  name = "syn"
178
146
  version = "2.0.100"
@@ -201,35 +169,3 @@ name = "unindent"
201
169
  version = "0.2.4"
202
170
  source = "registry+https://github.com/rust-lang/crates.io-index"
203
171
  checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
204
-
205
- [[package]]
206
- name = "version_check"
207
- version = "0.9.5"
208
- source = "registry+https://github.com/rust-lang/crates.io-index"
209
- checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
210
-
211
- [[package]]
212
- name = "wasi"
213
- version = "0.11.0+wasi-snapshot-preview1"
214
- source = "registry+https://github.com/rust-lang/crates.io-index"
215
- checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
216
-
217
- [[package]]
218
- name = "zerocopy"
219
- version = "0.7.35"
220
- source = "registry+https://github.com/rust-lang/crates.io-index"
221
- checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
222
- dependencies = [
223
- "zerocopy-derive",
224
- ]
225
-
226
- [[package]]
227
- name = "zerocopy-derive"
228
- version = "0.7.35"
229
- source = "registry+https://github.com/rust-lang/crates.io-index"
230
- checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
231
- dependencies = [
232
- "proc-macro2",
233
- "quote",
234
- "syn",
235
- ]
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "ocr_stringdist"
3
- version = "0.0.3"
3
+ version = "0.0.5"
4
4
  edition = "2021"
5
5
  description = "String distances considering OCR errors."
6
6
  authors = ["Niklas von Moers <niklasvmoers@protonmail.com>"]
@@ -15,8 +15,6 @@ crate-type = ["cdylib"]
15
15
 
16
16
  [dependencies]
17
17
  pyo3 = { version = "0.24.0", features = [] }
18
- ahash = "^0.8"
19
- smallvec = "1.15.0"
20
18
 
21
19
  [features]
22
20
  python = []
@@ -0,0 +1,14 @@
1
+ venv:
2
+ rm -rf .venv
3
+ uv venv
4
+ uv sync
5
+
6
+ pytest:
7
+ uv run maturin develop
8
+ uv run pytest
9
+
10
+ test:
11
+ cargo test
12
+
13
+ mypy:
14
+ uv run mypy .
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocr_stringdist
3
- Version: 0.0.3
3
+ Version: 0.0.5
4
4
  Classifier: Programming Language :: Rust
5
- Classifier: Programming Language :: Python :: Implementation :: PyPy
5
+ Classifier: Programming Language :: Python
6
6
  Classifier: Operating System :: OS Independent
7
7
  License-File: LICENSE
8
8
  Summary: String distances considering OCR errors.
@@ -17,6 +17,8 @@ Project-URL: repository, https://github.com/NiklasvonM/ocr-stringdist
17
17
 
18
18
  A Python library for string distance calculations that account for common OCR (optical character recognition) errors.
19
19
 
20
+ Documentation: https://niklasvonm.github.io/ocr-stringdist/
21
+
20
22
  [![PyPI](https://img.shields.io/badge/PyPI-Package-blue)](https://pypi.org/project/ocr-stringdist/)
21
23
  [![License](https://img.shields.io/badge/License-MIT-green)](LICENSE)
22
24
 
@@ -35,11 +37,16 @@ pip install ocr-stringdist
35
37
  ## Features
36
38
 
37
39
  - **Weighted Levenshtein Distance**: An adaptation of the classic Levenshtein algorithm with custom substitution costs for character pairs that are commonly confused in OCR models.
40
+ - **Unicode Support**: Arbitrary unicode strings can be compared.
41
+ - **Substitution of Multiple Characters**: Not just character pairs, but string pairs may be substituted, for example the Korean syllable "이" for the two letters "OI".
38
42
  - **Pre-defined OCR Distance Map**: A built-in distance map for common OCR confusions (e.g., "0" vs "O", "1" vs "l", "5" vs "S").
39
43
  - **Customizable Cost Maps**: Create your own substitution cost maps for specific OCR systems or domains.
44
+ - **Best Match Finder**: Utility function `find_best_candidate` to efficiently find the best matching string from a collection of candidates using any specified distance function (including the library's OCR-aware ones).
40
45
 
41
46
  ## Usage
42
47
 
48
+ ### Weighted Levenshtein Distance
49
+
43
50
  ```python
44
51
  import ocr_stringdist as osd
45
52
 
@@ -48,16 +55,32 @@ distance = osd.weighted_levenshtein_distance("OCR5", "OCRS")
48
55
  print(f"Distance between 'OCR5' and 'OCRS': {distance}") # Will be less than 1.0
49
56
 
50
57
  # Custom cost map
51
- custom_map = {("f", "t"): 0.2, ("m", "n"): 0.1}
58
+ custom_map = {("In", "h"): 0.5}
52
59
  distance = osd.weighted_levenshtein_distance(
53
- "first", "tirst",
60
+ "hi", "Ini",
54
61
  cost_map=custom_map,
55
62
  symmetric=True,
56
- default_cost=1.0
63
+ default_cost=1.0,
57
64
  )
58
65
  print(f"Distance with custom map: {distance}")
59
66
  ```
60
67
 
68
+ ### Finding the Best Candidate
69
+
70
+ ```python
71
+ import ocr_stringdist as osd
72
+
73
+ s = "apple"
74
+ candidates = ["apply", "apples", "orange", "appIe"] # 'appIe' has an OCR-like error
75
+
76
+ def ocr_aware_distance(s1: str, s2: str) -> float:
77
+ return osd.weighted_levenshtein_distance(s1, s2, cost_map={("l", "I"): 0.1})
78
+
79
+ best_candidate, best_dist = osd.find_best_candidate(s, candidates, ocr_aware_distance)
80
+ print(f"Best candidate for '{s}' is '{best_candidate}' with distance {best_dist}")
81
+ # Output: Best candidate for 'apple' is 'appIe' with distance 0.1
82
+ ```
83
+
61
84
  ## Acknowledgements
62
85
 
63
86
  This project is inspired by [jellyfish](https://github.com/jamesturk/jellyfish), providing the base implementations of the algorithms used here.
@@ -2,6 +2,8 @@
2
2
 
3
3
  A Python library for string distance calculations that account for common OCR (optical character recognition) errors.
4
4
 
5
+ Documentation: https://niklasvonm.github.io/ocr-stringdist/
6
+
5
7
  [![PyPI](https://img.shields.io/badge/PyPI-Package-blue)](https://pypi.org/project/ocr-stringdist/)
6
8
  [![License](https://img.shields.io/badge/License-MIT-green)](LICENSE)
7
9
 
@@ -20,11 +22,16 @@ pip install ocr-stringdist
20
22
  ## Features
21
23
 
22
24
  - **Weighted Levenshtein Distance**: An adaptation of the classic Levenshtein algorithm with custom substitution costs for character pairs that are commonly confused in OCR models.
25
+ - **Unicode Support**: Arbitrary unicode strings can be compared.
26
+ - **Substitution of Multiple Characters**: Not just character pairs, but string pairs may be substituted, for example the Korean syllable "이" for the two letters "OI".
23
27
  - **Pre-defined OCR Distance Map**: A built-in distance map for common OCR confusions (e.g., "0" vs "O", "1" vs "l", "5" vs "S").
24
28
  - **Customizable Cost Maps**: Create your own substitution cost maps for specific OCR systems or domains.
29
+ - **Best Match Finder**: Utility function `find_best_candidate` to efficiently find the best matching string from a collection of candidates using any specified distance function (including the library's OCR-aware ones).
25
30
 
26
31
  ## Usage
27
32
 
33
+ ### Weighted Levenshtein Distance
34
+
28
35
  ```python
29
36
  import ocr_stringdist as osd
30
37
 
@@ -33,16 +40,32 @@ distance = osd.weighted_levenshtein_distance("OCR5", "OCRS")
33
40
  print(f"Distance between 'OCR5' and 'OCRS': {distance}") # Will be less than 1.0
34
41
 
35
42
  # Custom cost map
36
- custom_map = {("f", "t"): 0.2, ("m", "n"): 0.1}
43
+ custom_map = {("In", "h"): 0.5}
37
44
  distance = osd.weighted_levenshtein_distance(
38
- "first", "tirst",
45
+ "hi", "Ini",
39
46
  cost_map=custom_map,
40
47
  symmetric=True,
41
- default_cost=1.0
48
+ default_cost=1.0,
42
49
  )
43
50
  print(f"Distance with custom map: {distance}")
44
51
  ```
45
52
 
53
+ ### Finding the Best Candidate
54
+
55
+ ```python
56
+ import ocr_stringdist as osd
57
+
58
+ s = "apple"
59
+ candidates = ["apply", "apples", "orange", "appIe"] # 'appIe' has an OCR-like error
60
+
61
+ def ocr_aware_distance(s1: str, s2: str) -> float:
62
+ return osd.weighted_levenshtein_distance(s1, s2, cost_map={("l", "I"): 0.1})
63
+
64
+ best_candidate, best_dist = osd.find_best_candidate(s, candidates, ocr_aware_distance)
65
+ print(f"Best candidate for '{s}' is '{best_candidate}' with distance {best_dist}")
66
+ # Output: Best candidate for 'apple' is 'appIe' with distance 0.1
67
+ ```
68
+
46
69
  ## Acknowledgements
47
70
 
48
71
  This project is inspired by [jellyfish](https://github.com/jamesturk/jellyfish), providing the base implementations of the algorithms used here.
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line, and also
5
+ # from the environment for the first two.
6
+ SPHINXOPTS ?=
7
+ SPHINXBUILD ?= sphinx-build
8
+ SOURCEDIR = source
9
+ BUILDDIR = build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,35 @@
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ %SPHINXBUILD% >NUL 2>NUL
14
+ if errorlevel 9009 (
15
+ echo.
16
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17
+ echo.installed, then set the SPHINXBUILD environment variable to point
18
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
19
+ echo.may add the Sphinx directory to PATH.
20
+ echo.
21
+ echo.If you don't have Sphinx installed, grab it from
22
+ echo.https://www.sphinx-doc.org/
23
+ exit /b 1
24
+ )
25
+
26
+ if "%1" == "" goto help
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
+
34
+ :end
35
+ popd
@@ -0,0 +1,18 @@
1
+ .. _api_reference:
2
+
3
+ API Reference
4
+ =============
5
+
6
+ This page contains the auto-generated API reference documentation.
7
+
8
+ .. autofunction:: ocr_stringdist.__init__.weighted_levenshtein_distance
9
+
10
+ .. automodule:: ocr_stringdist.matching
11
+ :members:
12
+ :undoc-members:
13
+ :show-inheritance:
14
+
15
+ .. automodule:: ocr_stringdist.default_ocr_distances
16
+ :members:
17
+ :undoc-members:
18
+ :show-inheritance:
@@ -0,0 +1,40 @@
1
+ # Configuration file for the Sphinx documentation builder.
2
+ #
3
+ # For the full list of built-in configuration values, see the documentation:
4
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html
5
+
6
+
7
+ import os
8
+ import sys
9
+
10
+ # source code is in project_root/python/ocr_stringdist
11
+ sys.path.insert(0, os.path.abspath("../../python"))
12
+
13
+
14
+ # -- Project information -----------------------------------------------------
15
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
16
+
17
+ project = "OCR-StringDist"
18
+ copyright = "2025, Niklas von Moers"
19
+ author = "Niklas von Moers"
20
+
21
+ # -- General configuration ---------------------------------------------------
22
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
23
+
24
+ extensions: list[str] = [
25
+ "sphinx.ext.autodoc", # Core library to pull documentation from docstrings
26
+ "sphinx.ext.napoleon", # Support for Google and NumPy style docstrings
27
+ "sphinx.ext.intersphinx", # Link to other projects' documentation
28
+ "sphinx.ext.viewcode", # Add links to source code
29
+ "sphinx_mdinclude", # Include Markdown
30
+ ]
31
+
32
+ templates_path = ["_templates"]
33
+ exclude_patterns: list[str] = []
34
+
35
+
36
+ # -- Options for HTML output -------------------------------------------------
37
+ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
38
+
39
+ html_theme = "sphinx_rtd_theme"
40
+ html_static_path: list[str] = ["_static"]
@@ -0,0 +1,10 @@
1
+ .. OCR-StringDist documentation master file, created by
2
+ sphinx-quickstart on Sun Apr 20 10:40:20 2025.
3
+
4
+ .. mdinclude:: ../../README.md
5
+
6
+ .. toctree::
7
+ :maxdepth: 2
8
+ :caption: Contents:
9
+
10
+ api/index
@@ -1,5 +1,5 @@
1
- from ocr_stringdist import weighted_levenshtein_distance
2
1
  from icecream import ic
2
+ from ocr_stringdist import find_best_candidate, weighted_levenshtein_distance
3
3
 
4
4
  ic(
5
5
  weighted_levenshtein_distance(
@@ -17,6 +17,15 @@ ic(
17
17
  )
18
18
  )
19
19
 
20
+ # Substitution of multiple characters at once is supported.
21
+ ic(
22
+ weighted_levenshtein_distance(
23
+ "이탈리",
24
+ "OI탈리", # Korean syllables may be confused with multiple Latin letters at once
25
+ {("이", "OI"): 0.5},
26
+ ),
27
+ )
28
+
20
29
  ic(
21
30
  weighted_levenshtein_distance(
22
31
  "ABCDE",
@@ -26,16 +35,13 @@ ic(
26
35
  )
27
36
  )
28
37
 
38
+ ic(weighted_levenshtein_distance("A", "B", {("A", "B"): 0.0}, symmetric=False))
39
+ ic(weighted_levenshtein_distance("A", "B", {("B", "A"): 0.0}, symmetric=False))
40
+
29
41
  ic(
30
- weighted_levenshtein_distance(
31
- "RO8ERT",
32
- "R0BERT",
33
- {("O", "0"): 0.1, ("B", "8"): 0.2},
42
+ find_best_candidate(
43
+ "apple",
44
+ ["apply", "apples", "orange", "appIe"],
45
+ lambda s1, s2: weighted_levenshtein_distance(s1, s2, {("l", "I"): 0.1}),
34
46
  )
35
47
  )
36
-
37
-
38
- ic(weighted_levenshtein_distance("A", "B", {("A", "B"): 0.0}, symmetric=False))
39
- ic(weighted_levenshtein_distance("A", "B", {("B", "A"): 0.0}, symmetric=False))
40
- ic(weighted_levenshtein_distance("B", "A", {("B", "A"): 0.0}, symmetric=False))
41
- ic(weighted_levenshtein_distance("B", "A", {("A", "B"): 0.0}, symmetric=False))
@@ -0,0 +1,137 @@
1
+ ; Based on https://gist.github.com/CodeByAidan/adb2b9e188256def1fe35b932cba7eb8
2
+ [mypy]
3
+ check_untyped_defs = True
4
+ disallow_any_generics = True
5
+ disallow_any_unimported = True
6
+ disallow_subclassing_any = True
7
+ disallow_untyped_calls = True
8
+ disallow_untyped_decorators = True
9
+ disallow_untyped_defs = True
10
+ ignore_missing_imports = True
11
+ no_implicit_optional = True
12
+ pretty = True
13
+ show_column_numbers = True
14
+ show_error_codes = True
15
+ show_error_context = True
16
+ strict_equality = True
17
+ warn_return_any = True
18
+ warn_unused_ignores = True
19
+
20
+ ; All of this below is just defaults:
21
+ ; -----------------------------------
22
+ ; (if any flags are commented out with a = and nothing after it,
23
+ ; it means there is no default value/custom)
24
+ ; ex. ; mypy_path =
25
+ ; -----------------------------------
26
+ ; (if any flags are commented out with a = and a value after it,
27
+ ; it means that is the default value but it was changed out for
28
+ ; my personal preference in my config above)
29
+ ; ex. ; ignore_missing_imports = False
30
+
31
+ ; == Import discovery ==
32
+ ; mypy_path =
33
+ ; files =
34
+ ; modules =
35
+ ; packages =
36
+ ; exclude =
37
+ namespace_packages = True
38
+ explicit_package_bases = False
39
+ ; ignore_missing_imports = False
40
+ follow_imports = normal
41
+ follow_imports_for_stubs = False
42
+ ; python_executable =
43
+ no_site_packages = False
44
+ no_silence_site_packages = False
45
+
46
+ ; == Platform configuration ==
47
+ ; python_version =
48
+ ; platform =
49
+ ; always_true =
50
+ ; always_false =
51
+
52
+ ; == Disallow dynamic typing ==
53
+ ; disallow_any_unimported = False
54
+ disallow_any_expr = False
55
+ disallow_any_decorated = False
56
+ disallow_any_explicit = False
57
+ ; disallow_any_generics = False
58
+ ; disallow_subclassing_any = False
59
+
60
+ ; == Untyped definitions and calls ==
61
+ ; disallow_untyped_calls = False
62
+ ; untyped_calls_exclude =
63
+ ; disallow_untyped_defs = False
64
+ disallow_incomplete_defs = False
65
+ ; check_untyped_defs = False
66
+ ; disallow_untyped_decorators = False
67
+
68
+ ; == None and Optional handling ==
69
+ implicit_optional = False
70
+ strict_optional = True
71
+
72
+ ; == Configuring warnings ==
73
+ warn_redundant_casts = False
74
+ ; warn_unused_ignores = False
75
+ warn_no_return = True
76
+ ; warn_return_any = False
77
+ warn_unreachable = False
78
+
79
+ ; == Suppressing errors ==
80
+ ignore_errors = False
81
+
82
+ ; == Miscellaneous strictness flags ==
83
+ allow_untyped_globals = False
84
+ allow_redefinition = False
85
+ local_partial_types = False
86
+ ; disable_error_code =
87
+ ; enable_error_code =
88
+ implicit_reexport = True
89
+ strict_concatenate = False
90
+ ; strict_equality = False
91
+ strict = False
92
+
93
+ ; == Configuring error messages ==
94
+ ; show_error_context = False
95
+ ; show_column_numbers = False
96
+ hide_error_codes = False
97
+ ; pretty = False
98
+ color_output = True
99
+ error_summary = True
100
+ show_absolute_path = False
101
+ force_uppercase_builtins = False
102
+ force_union_syntax = False
103
+
104
+ ; == Incremental mode ==
105
+ incremental = True
106
+ cache_dir = .mypy_cache
107
+ sqlite_cache = False
108
+ cache_fine_grained = False
109
+ skip_version_check = False
110
+ skip_cache_mtime_checks = False
111
+
112
+ ; == Advanced options ==
113
+ ; plugins =
114
+ pdb = False
115
+ show_traceback = False
116
+ raise_exceptions = False
117
+ ; custom_typing_module =
118
+ ; custom_typeshed_dir =
119
+ warn_incomplete_stub = False
120
+
121
+ ; == Report generation ==
122
+ ; any_exprs_report =
123
+ ; cobertura_xml_report = ; pip install mypy[reports]
124
+ ; html_report = ; pip install mypy[reports]
125
+ ; xslt_html_report = ; pip install mypy[reports]
126
+ ; linecount_report =
127
+ ; linecoverage_report =
128
+ ; lineprecision_report =
129
+ ; txt_report = ; pip install mypy[reports]
130
+ ; xslt_txt_report = ; pip install mypy[reports]
131
+ ; xml_report = ; pip install mypy[reports]
132
+
133
+ ; == Miscellaneous ==
134
+ ; junit_xml =
135
+ scripts_are_modules = False
136
+ warn_unused_configs = False
137
+ verbosity = 0