multiregex 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {multiregex-2.0.0 → multiregex-2.0.2}/.gitattributes +2 -0
  2. multiregex-2.0.2/.github/CODEOWNERS +2 -0
  3. multiregex-2.0.2/.github/ISSUE_TEMPLATE/config.yml +1 -0
  4. multiregex-2.0.2/.github/ISSUE_TEMPLATE/issue-template.md +6 -0
  5. multiregex-2.0.0/.github/PULL_REQUEST_TEMPLATE.md → multiregex-2.0.2/.github/pull_request_template.md +3 -1
  6. multiregex-2.0.2/.github/workflows/build.yml +44 -0
  7. multiregex-2.0.2/.github/workflows/ci.yml +60 -0
  8. multiregex-2.0.2/.gitignore +127 -0
  9. multiregex-2.0.2/.pre-commit-config.yaml +65 -0
  10. {multiregex-2.0.0 → multiregex-2.0.2}/CHANGELOG.rst +9 -0
  11. {multiregex-2.0.0 → multiregex-2.0.2}/PKG-INFO +23 -21
  12. {multiregex-2.0.0 → multiregex-2.0.2}/README.md +12 -10
  13. {multiregex-2.0.0 → multiregex-2.0.2}/docs/make.bat +1 -1
  14. {multiregex-2.0.0/src → multiregex-2.0.2}/multiregex/__init__.py +32 -23
  15. multiregex-2.0.2/multiregex/py.typed +0 -0
  16. multiregex-2.0.2/multiregex.egg-info/PKG-INFO +125 -0
  17. multiregex-2.0.2/multiregex.egg-info/SOURCES.txt +35 -0
  18. multiregex-2.0.2/multiregex.egg-info/dependency_links.txt +1 -0
  19. multiregex-2.0.2/multiregex.egg-info/requires.txt +1 -0
  20. multiregex-2.0.2/multiregex.egg-info/top_level.txt +1 -0
  21. multiregex-2.0.2/pixi.lock +4161 -0
  22. multiregex-2.0.2/pixi.toml +58 -0
  23. multiregex-2.0.2/pyproject.toml +82 -0
  24. multiregex-2.0.2/setup.cfg +4 -0
  25. {multiregex-2.0.0/src → multiregex-2.0.2/stubs}/ahocorasick.pyi +2 -3
  26. {multiregex-2.0.0 → multiregex-2.0.2}/tests/test_bench.py +1 -1
  27. {multiregex-2.0.0 → multiregex-2.0.2}/tests/test_cpython_tests.py +1 -1
  28. {multiregex-2.0.0 → multiregex-2.0.2}/tests/test_multiregex.py +5 -3
  29. multiregex-2.0.0/.flake8 +0 -11
  30. multiregex-2.0.0/.github/CODEOWNERS +0 -1
  31. multiregex-2.0.0/.github/workflows/ci.yml +0 -68
  32. multiregex-2.0.0/.pre-commit-config.yaml +0 -28
  33. multiregex-2.0.0/environment.yml +0 -22
  34. multiregex-2.0.0/pyproject.toml +0 -60
  35. multiregex-2.0.0/setup.cfg +0 -31
  36. multiregex-2.0.0/setup.py +0 -3
  37. {multiregex-2.0.0 → multiregex-2.0.2}/.github/dependabot.yml +0 -0
  38. {multiregex-2.0.0 → multiregex-2.0.2}/LICENSE +0 -0
  39. {multiregex-2.0.0 → multiregex-2.0.2}/docs/Makefile +0 -0
  40. {multiregex-2.0.0 → multiregex-2.0.2}/docs/changelog.rst +0 -0
  41. {multiregex-2.0.0 → multiregex-2.0.2}/docs/conf.py +0 -0
  42. {multiregex-2.0.0 → multiregex-2.0.2}/docs/index.rst +0 -0
  43. {multiregex-2.0.0 → multiregex-2.0.2}/test_utils/__init__.py +0 -0
  44. {multiregex-2.0.0 → multiregex-2.0.2}/test_utils/cpython_test_re.py +0 -0
  45. {multiregex-2.0.0 → multiregex-2.0.2}/tests/conftest.py +0 -0
@@ -4,3 +4,5 @@
4
4
 
5
5
  *.{py,yaml,yml,sh} text eol=lf
6
6
  *.bat text eol=crlf
7
+ # GitHub syntax highlighting
8
+ pixi.lock linguist-language=YAML linguist-generated=true
@@ -0,0 +1,2 @@
1
+ * @0xbe7a
2
+ * @pavelzw
@@ -0,0 +1 @@
1
+ blank_issues_enabled: false
@@ -0,0 +1,6 @@
1
+ ---
2
+ name: New issue
3
+ about: Create a new issue
4
+ ---
5
+
6
+ <!-- ⚠️ This is an open-source repository. Do not share sensitive information. -->
@@ -1,3 +1,5 @@
1
+ <!-- ⚠️ This is an open-source repository. Do not share sensitive information. -->
2
+
1
3
  <!--
2
4
  Thank you for pull request.
3
5
  Below are a few things we ask you kindly to self-check before getting a review. Remove checks that are not relevant.
@@ -5,4 +7,4 @@ Below are a few things we ask you kindly to self-check before getting a review.
5
7
 
6
8
  # Checklist
7
9
 
8
- * [ ] Added a `CHANGELOG.rst` entry
10
+ - [ ] Added a `CHANGELOG.rst` entry
@@ -0,0 +1,44 @@
1
+ name: Build
2
+ on:
3
+ pull_request:
4
+ push:
5
+ branches:
6
+ - "main"
7
+ tags:
8
+ - "*"
9
+
10
+ jobs:
11
+ build:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ with:
16
+ ref: ${{ github.ref }}
17
+ fetch-depth: 0
18
+ - name: Set up pixi
19
+ uses: prefix-dev/setup-pixi@v0.8.0
20
+ with:
21
+ environments: build
22
+ - name: Build project
23
+ run: pixi run -e build build-wheel
24
+ - name: Upload package
25
+ uses: actions/upload-artifact@v4
26
+ with:
27
+ name: artifact
28
+ path: dist/*
29
+
30
+ release:
31
+ name: Publish package
32
+ if: startsWith(github.ref, 'refs/tags/')
33
+ needs: [build]
34
+ runs-on: ubuntu-latest
35
+ permissions:
36
+ id-token: write
37
+ environment: pypi
38
+ steps:
39
+ - uses: actions/download-artifact@v4
40
+ with:
41
+ name: artifact
42
+ path: dist
43
+ - name: Publish package on PyPi
44
+ uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450
@@ -0,0 +1,60 @@
1
+ name: CI
2
+ on:
3
+ push:
4
+ branches:
5
+ - main
6
+ pull_request:
7
+
8
+ defaults:
9
+ run:
10
+ shell: bash -el {0}
11
+
12
+ jobs:
13
+ unit-tests:
14
+ name: pytest
15
+ timeout-minutes: 10
16
+ runs-on: ${{ matrix.os }}
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ include:
21
+ - { os: ubuntu-latest, environment: py38 }
22
+ - { os: ubuntu-latest, environment: py312 }
23
+ - { os: windows-latest, environment: py38 }
24
+ - { os: windows-latest, environment: py312 }
25
+ - { os: macos-latest, environment: py38 }
26
+ - { os: macos-latest, environment: py312 }
27
+ steps:
28
+ - name: Checkout branch
29
+ uses: actions/checkout@v4
30
+ with:
31
+ ref: ${{ github.ref }}
32
+ fetch-depth: 0
33
+ - name: Set up pixi
34
+ uses: prefix-dev/setup-pixi@v0.8.0
35
+ with:
36
+ environments: ${{ matrix.environment }}
37
+ - name: Install repository
38
+ run: pixi run -e ${{ matrix.environment }} postinstall
39
+ - name: Run unittests
40
+ uses: quantco/pytest-action@v2
41
+ with:
42
+ report-title: Unit tests ${{ matrix.environment }}
43
+ custom-pytest: pixi run -e ${{ matrix.environment }} pytest
44
+
45
+ pre-commit-checks:
46
+ name: Pre-commit Checks
47
+ timeout-minutes: 30
48
+ runs-on: ubuntu-latest
49
+ steps:
50
+ - name: Checkout branch
51
+ uses: actions/checkout@v4
52
+ with:
53
+ ref: ${{ github.ref }}
54
+ fetch-depth: 0
55
+ - name: Set up pixi
56
+ uses: prefix-dev/setup-pixi@v0.8.0
57
+ with:
58
+ environments: lint default
59
+ - name: pre-commit
60
+ run: pixi run pre-commit-run --color=always --show-diff-on-failure
@@ -0,0 +1,127 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ .envrc
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ .asv
29
+ pip-wheel-metadata
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ .hypothesis/
51
+ /.pytest_cache/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+
61
+ # Flask stuff:
62
+ instance/
63
+ .webassets-cache
64
+
65
+ # Scrapy stuff:
66
+ .scrapy
67
+
68
+ # Sphinx documentation
69
+ docs/_build/
70
+ docs/api/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # pyenv
79
+ .python-version
80
+
81
+ # celery beat schedule file
82
+ celerybeat-schedule
83
+
84
+ # SageMath parsed files
85
+ *.sage.py
86
+
87
+ # dotenv
88
+ .env
89
+
90
+ # virtualenv
91
+ .venv
92
+ venv/
93
+ ENV/
94
+
95
+ # Spyder project settings
96
+ .spyderproject
97
+ .spyproject
98
+
99
+ # Rope project settings
100
+ .ropeproject
101
+
102
+ # mkdocs documentation
103
+ /site
104
+
105
+ # mypy
106
+ .mypy_cache/
107
+
108
+ # pycharm
109
+ /.idea/
110
+
111
+
112
+ # experiments
113
+ private_*
114
+
115
+ # mlflow
116
+ mlruns
117
+
118
+ # vscode
119
+ .vscode
120
+
121
+ # direnv
122
+ .envrc
123
+ # pixi environments
124
+ .pixi
125
+ *.egg-info
126
+
127
+ .ruff_cache
@@ -0,0 +1,65 @@
1
+ repos:
2
+ - repo: local
3
+ hooks:
4
+ # docformatter
5
+ - id: docformatter
6
+ name: docformatter
7
+ entry: pixi run -e lint docformatter -i
8
+ language: system
9
+ types: [python]
10
+ # ruff
11
+ - id: ruff
12
+ name: ruff
13
+ entry: pixi run -e lint ruff check --fix --exit-non-zero-on-fix --force-exclude
14
+ language: system
15
+ types_or: [python, pyi]
16
+ require_serial: true
17
+ - id: ruff-format
18
+ name: ruff-format
19
+ entry: pixi run -e lint ruff format --force-exclude
20
+ language: system
21
+ types_or: [python, pyi]
22
+ require_serial: true
23
+ # prettier
24
+ - id: prettier
25
+ name: prettier
26
+ entry: pixi run -e lint prettier --write --list-different --ignore-unknown
27
+ language: system
28
+ types: [text]
29
+ files: \.(md|yml|yaml)$
30
+ # pre-commit-hooks
31
+ - id: trailing-whitespace-fixer
32
+ name: trailing-whitespace-fixer
33
+ entry: pixi run -e lint trailing-whitespace-fixer
34
+ language: system
35
+ types: [text]
36
+ - id: end-of-file-fixer
37
+ name: end-of-file-fixer
38
+ entry: pixi run -e lint end-of-file-fixer
39
+ language: system
40
+ types: [text]
41
+ - id: check-merge-conflict
42
+ name: check-merge-conflict
43
+ entry: pixi run -e lint check-merge-conflict --assume-in-merge
44
+ language: system
45
+ types: [text]
46
+ # typos
47
+ - id: typos
48
+ name: typos
49
+ entry: pixi run -e lint typos --force-exclude
50
+ language: system
51
+ types: [text]
52
+ require_serial: true
53
+ # mypy
54
+ - id: mypy
55
+ name: mypy
56
+ entry: pixi run -e default mypy
57
+ language: system
58
+ types: [python]
59
+ require_serial: true
60
+ # taplo
61
+ - id: taplo
62
+ name: taplo
63
+ entry: pixi run -e lint taplo format
64
+ language: system
65
+ types: [toml]
@@ -7,6 +7,15 @@
7
7
  Changelog
8
8
  =========
9
9
 
10
+ 2.0.2 (2024-05-23)
11
+ ------------------
12
+ - Included a py.typed file to indicate that the package is fully typed.
13
+
14
+ 2.0.1 (2023-06-11)
15
+ ------------------
16
+
17
+ - Fix exception when mixing patterns with prematchers and without prematchers.
18
+
10
19
  2.0.0 (2023-03-08)
11
20
  ------------------
12
21
 
@@ -1,45 +1,48 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: multiregex
3
- Version: 2.0.0
4
- Summary: Speed up regex matching with non-regex substring "prematchers", similar to Bloom filters.
5
- Author-email: "QuantCo, Inc." <noreply@quantco.com>
6
- Maintainer-email: Jonas Haag <jonas.haag@quantco.com>
7
- Description-Content-Type: text/markdown
3
+ Version: 2.0.2
4
+ Summary: Quickly match many regexes against a string. Provides 2-10x speedups over naïve regex matching.
5
+ Author-email: "QuantCo, Inc." <noreply@quantco.com>, Jonas Haag <jonas@lophus.org>
6
+ Maintainer-email: Bela Stoyan <bela.stoyan@quantco.com>
7
+ Project-URL: Home, https://github.com/quantco/multiregex
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.4
10
- Classifier: Programming Language :: Python :: 3.5
11
- Classifier: Programming Language :: Python :: 3.6
12
- Classifier: Programming Language :: Python :: 3.7
13
9
  Classifier: Programming Language :: Python :: 3.8
14
10
  Classifier: Programming Language :: Python :: 3.9
15
11
  Classifier: Programming Language :: Python :: 3.10
16
- Project-URL: Home, https://github.com/quantco/multiregex
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pyahocorasick
17
18
 
18
19
  # multiregex
19
20
 
20
21
  [![CI](https://github.com/Quantco/multiregex/actions/workflows/ci.yml/badge.svg)](https://github.com/Quantco/multiregex/actions/workflows/ci.yml)
21
22
  [![Documentation](https://img.shields.io/badge/docs-latest-success?style=plastic)](https://docs.dev.quantco.cloud/qc-github-artifacts/Quantco/multiregex/latest/index.html)
23
+ [![conda-forge](https://img.shields.io/conda/vn/conda-forge/multiregex?logoColor=white&logo=conda-forge)](https://anaconda.org/conda-forge/multiregex)
24
+ [![pypi-version](https://img.shields.io/pypi/v/multiregex.svg?logo=pypi&logoColor=white)](https://pypi.org/project/multiregex)
25
+ [![python-version](https://img.shields.io/pypi/pyversions/multiregex?logoColor=white&logo=python)](https://pypi.org/project/multiregex)
22
26
 
23
27
  Quickly match many regexes against a string. Provides 2-10x speedups over naïve regex matching.
24
28
 
29
+ ## Introduction
30
+
31
+ See [this introductory blog post](https://tech.quantco.com/2022/07/31/multiregex.html).
32
+
25
33
  ## Installation
26
34
 
27
35
  You can install the package in development mode using:
28
36
 
29
37
  ```bash
30
- git clone git@github.com:quantco/multiregex.git
38
+ git clone https://github.com/quantco/multiregex
31
39
  cd multiregex
32
40
 
33
- # create and activate a fresh environment named multiregex
34
- # see environment.yml for details
35
- mamba env create
36
- conda activate multiregex
37
-
38
- pre-commit install
39
- pip install --no-build-isolation -e .
41
+ pixi run pre-commit-install
42
+ pixi run postinstall
43
+ pixi run test
40
44
  ```
41
45
 
42
-
43
46
  ## Usage
44
47
 
45
48
  ```py
@@ -68,7 +71,7 @@ To be able to quickly match many regexes against a string, `multiregex` uses
68
71
  at least one can be assumed to be present in the haystack if the corresponding regex matches.
69
72
  As an example, a valid prematcher of `r"\w+\.com"` could be `[".com"]` and a valid
70
73
  prematcher of `r"(B|b)aNäNa"` could be `["b"]` or `["anäna"]`.
71
- Note that prematchers must be all-lowercase (in order for ``multiregex`` to be able to support ``re.IGNORECASE``).
74
+ Note that prematchers must be all-lowercase (in order for `multiregex` to be able to support `re.IGNORECASE`).
72
75
 
73
76
  You will likely have to provide your own prematchers for all but the simplest
74
77
  regex patterns:
@@ -120,4 +123,3 @@ print(matcher.format_prematcher_false_positives())
120
123
 
121
124
  In this example, there were 137 input strings that were matched positive by the prematcher but negative by the regex.
122
125
  In other words, the prematcher failed to prevent slow regex evaluation in 72% of the cases.
123
-
@@ -2,27 +2,29 @@
2
2
 
3
3
  [![CI](https://github.com/Quantco/multiregex/actions/workflows/ci.yml/badge.svg)](https://github.com/Quantco/multiregex/actions/workflows/ci.yml)
4
4
  [![Documentation](https://img.shields.io/badge/docs-latest-success?style=plastic)](https://docs.dev.quantco.cloud/qc-github-artifacts/Quantco/multiregex/latest/index.html)
5
+ [![conda-forge](https://img.shields.io/conda/vn/conda-forge/multiregex?logoColor=white&logo=conda-forge)](https://anaconda.org/conda-forge/multiregex)
6
+ [![pypi-version](https://img.shields.io/pypi/v/multiregex.svg?logo=pypi&logoColor=white)](https://pypi.org/project/multiregex)
7
+ [![python-version](https://img.shields.io/pypi/pyversions/multiregex?logoColor=white&logo=python)](https://pypi.org/project/multiregex)
5
8
 
6
9
  Quickly match many regexes against a string. Provides 2-10x speedups over naïve regex matching.
7
10
 
11
+ ## Introduction
12
+
13
+ See [this introductory blog post](https://tech.quantco.com/2022/07/31/multiregex.html).
14
+
8
15
  ## Installation
9
16
 
10
17
  You can install the package in development mode using:
11
18
 
12
19
  ```bash
13
- git clone git@github.com:quantco/multiregex.git
20
+ git clone https://github.com/quantco/multiregex
14
21
  cd multiregex
15
22
 
16
- # create and activate a fresh environment named multiregex
17
- # see environment.yml for details
18
- mamba env create
19
- conda activate multiregex
20
-
21
- pre-commit install
22
- pip install --no-build-isolation -e .
23
+ pixi run pre-commit-install
24
+ pixi run postinstall
25
+ pixi run test
23
26
  ```
24
27
 
25
-
26
28
  ## Usage
27
29
 
28
30
  ```py
@@ -51,7 +53,7 @@ To be able to quickly match many regexes against a string, `multiregex` uses
51
53
  at least one can be assumed to be present in the haystack if the corresponding regex matches.
52
54
  As an example, a valid prematcher of `r"\w+\.com"` could be `[".com"]` and a valid
53
55
  prematcher of `r"(B|b)aNäNa"` could be `["b"]` or `["anäna"]`.
54
- Note that prematchers must be all-lowercase (in order for ``multiregex`` to be able to support ``re.IGNORECASE``).
56
+ Note that prematchers must be all-lowercase (in order for `multiregex` to be able to support `re.IGNORECASE`).
55
57
 
56
58
  You will likely have to provide your own prematchers for all but the simplest
57
59
  regex patterns:
@@ -32,4 +32,4 @@ goto end
32
32
  %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33
33
 
34
34
  :end
35
- popd
35
+ popd
@@ -1,4 +1,5 @@
1
- r"""Speed up regex matching with non-regex substring "prematchers", similar to Bloom filters.
1
+ r"""Speed up regex matching with non-regex substring "prematchers", similar to
2
+ Bloom filters.
2
3
 
3
4
  For each regex pattern we use a list of simple (non-regex) substring prematchers.
4
5
  When evaluating regex patterns on a string, we use the prematchers to restrict
@@ -19,7 +20,9 @@ automatically generated prematchers.
19
20
 
20
21
  import collections
21
22
  import functools
23
+ import importlib
22
24
  import re
25
+ import warnings
23
26
 
24
27
  try:
25
28
  sre_constants = re._constants # type: ignore
@@ -42,7 +45,12 @@ from typing import (
42
45
 
43
46
  import ahocorasick
44
47
 
45
- __version__ = "2.0.0"
48
+ try:
49
+ __version__ = importlib.metadata.version(__name__)
50
+ except importlib.metadata.PackageNotFoundError as e:
51
+ warnings.warn(f"Could not determine version of {__name__}", stacklevel=1)
52
+ warnings.warn(str(e), stacklevel=1)
53
+ __version__ = "unknown"
46
54
 
47
55
 
48
56
  V = TypeVar("V")
@@ -83,10 +91,13 @@ class RegexMatcher:
83
91
  patterns = self._generate_missing_prematchers(patterns)
84
92
  self.patterns = [pattern for pattern, _ in patterns]
85
93
  self.prematchers = dict(patterns)
94
+ enumerated_patterns = list(enumerate(patterns))
86
95
  self.patterns_without_prematchers = {
87
- pattern for pattern, prematchers in patterns if not prematchers
96
+ (idx, pattern)
97
+ for idx, (pattern, prematchers) in enumerated_patterns
98
+ if not prematchers
88
99
  }
89
- self.automaton = self._make_automaton(patterns)
100
+ self.automaton = self._make_automaton(enumerated_patterns)
90
101
 
91
102
  self.count_prematcher_false_positives = count_prematcher_false_positives
92
103
  if count_prematcher_false_positives:
@@ -107,9 +118,7 @@ class RegexMatcher:
107
118
  def safe_set(iterable):
108
119
  if isinstance(iterable, str):
109
120
  raise TypeError(
110
- "Refusing to interpret {!r} as a list of patterns, pass a list of strings instead".format(
111
- iterable
112
- )
121
+ f"Refusing to interpret {iterable!r} as a list of patterns, pass a list of strings instead"
113
122
  )
114
123
  else:
115
124
  return set(iterable)
@@ -130,9 +139,11 @@ class RegexMatcher:
130
139
  patterns = [
131
140
  (
132
141
  pattern,
133
- self.generate_prematchers(pattern)
134
- if prematchers is None
135
- else prematchers,
142
+ (
143
+ self.generate_prematchers(pattern)
144
+ if prematchers is None
145
+ else prematchers
146
+ ),
136
147
  )
137
148
  for pattern, prematchers in patterns
138
149
  ]
@@ -142,10 +153,10 @@ class RegexMatcher:
142
153
  return patterns
143
154
 
144
155
  @staticmethod
145
- def _make_automaton(patterns):
156
+ def _make_automaton(enumerated_patterns):
146
157
  """Create the pyahocorasick automaton."""
147
158
  pattern_candidates_by_prematchers = collections.defaultdict(set)
148
- for pattern_idx, (pattern, prematchers) in enumerate(patterns):
159
+ for pattern_idx, (pattern, prematchers) in enumerated_patterns:
149
160
  for prematcher in prematchers:
150
161
  # `pattern_idx` is used for keeping patterns in order, see `get_pattern_candidates`.
151
162
  pattern_candidates_by_prematchers[prematcher].add(
@@ -190,10 +201,8 @@ class RegexMatcher:
190
201
 
191
202
  """Alias for ``run(re.search, ...)``."""
192
203
  search = functools.partialmethod(run, re.search)
193
-
194
204
  """Alias for ``run(re.match, ...)``."""
195
205
  match = functools.partialmethod(run, re.match)
196
-
197
206
  """Alias for ``run(re.fullmatch, ...)``."""
198
207
  fullmatch = functools.partialmethod(run, re.fullmatch)
199
208
 
@@ -248,18 +257,17 @@ class RegexMatcher:
248
257
  def validate_prematcher(prematcher: str) -> None:
249
258
  if not prematcher or any(map(str.isupper, prematcher)):
250
259
  raise ValueError(
251
- "Prematcher {!r} must be non-empty, all-lowercase, all-ASCII".format(
252
- prematcher
253
- )
260
+ f"Prematcher {prematcher!r} must be non-empty, all-lowercase, all-ASCII"
254
261
  )
255
262
 
256
263
 
257
264
  def generate_prematchers(pattern: Pattern) -> Prematchers:
258
265
  """Generate fallback/default prematchers for the given regex `pattern`.
259
266
 
260
- Currently the fallback prematcher is just the set of longest terminal texts
261
- in the pattern, eg. "Fast(er)? regex(es| matching)" -> {" regex"}. One level of
262
- branches with the "|" character is supported, ie. "(a|bb|ccc)" -> {"ccc", "a", "bb"}.
267
+ Currently the fallback prematcher is just the set of longest
268
+ terminal texts in the pattern, eg. "Fast(er)? regex(es| matching)"
269
+ -> {" regex"}. One level of branches with the "|" character is
270
+ supported, ie. "(a|bb|ccc)" -> {"ccc", "a", "bb"}.
263
271
  """
264
272
 
265
273
  def _get_top_level_prematcher(sre_ast):
@@ -283,7 +291,7 @@ def generate_prematchers(pattern: Pattern) -> Prematchers:
283
291
  if all(child_prematchers):
284
292
  return child_prematchers
285
293
 
286
- raise ValueError("Could not generate prematchers for {!r}".format(pattern.pattern))
294
+ raise ValueError(f"Could not generate prematchers for {pattern.pattern!r}")
287
295
 
288
296
 
289
297
  def _simplify_sre_ast(sre_ast):
@@ -315,7 +323,8 @@ def _sre_find_terminals(sre_ast):
315
323
 
316
324
 
317
325
  def _ahocorasick_make_automaton(words: Dict[str, V]) -> "ahocorasick.Automaton[V]":
318
- """Make an ahocorasick automaton from a dictionary of `needle -> value` items."""
326
+ """Make an ahocorasick automaton from a dictionary of `needle -> value`
327
+ items."""
319
328
  automaton = ahocorasick.Automaton() # type: ahocorasick.Automaton[V]
320
329
  for word, value in words.items():
321
330
  _ahocorasick_ensure_successful(automaton.add_word(word, value))
@@ -324,6 +333,6 @@ def _ahocorasick_make_automaton(words: Dict[str, V]) -> "ahocorasick.Automaton[V
324
333
 
325
334
 
326
335
  def _ahocorasick_ensure_successful(res):
327
- """pyahocorasick returns errors as bools."""
336
+ """Pyahocorasick returns errors as bools."""
328
337
  if res is False:
329
338
  raise AhocorasickError("Error performing ahocorasick call")
File without changes