pyannotators-patterns 0.5.123__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/.gitignore +1 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/.pre-commit-config.yaml +6 -9
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/Dockerfile +4 -4
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/Jenkinsfile +15 -19
- pyannotators_patterns-0.6.3/MIGRATION.md +166 -0
- pyannotators_patterns-0.6.3/PKG-INFO +129 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/README.md +27 -13
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/conf.py +0 -1
- pyannotators_patterns-0.6.3/pyproject.toml +87 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/src/pyannotators_patterns/__init__.py +2 -1
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/src/pyannotators_patterns/named_pattern_recognizer.py +30 -24
- pyannotators_patterns-0.6.3/src/pyannotators_patterns/patterns.py +260 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/assertions.py +4 -12
- pyannotators_patterns-0.6.3/tests/test_annotator.py +172 -0
- pyannotators_patterns-0.6.3/tests/test_coords.py +42 -0
- pyannotators_patterns-0.6.3/tests/test_credit_cards.py +111 -0
- pyannotators_patterns-0.6.3/tests/test_emails.py +75 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/test_mgrs.py +7 -10
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/test_tel.py +12 -11
- pyannotators_patterns-0.6.3/tests/test_zip.py +60 -0
- pyannotators_patterns-0.5.123/PKG-INFO +0 -97
- pyannotators_patterns-0.5.123/pyproject.toml +0 -90
- pyannotators_patterns-0.5.123/setup.py +0 -54
- pyannotators_patterns-0.5.123/src/pyannotators_patterns/patterns.py +0 -219
- pyannotators_patterns-0.5.123/tests/test_coords.py +0 -40
- pyannotators_patterns-0.5.123/tests/test_credit_cards.py +0 -58
- pyannotators_patterns-0.5.123/tests/test_emails.py +0 -58
- pyannotators_patterns-0.5.123/tests/test_zip.py +0 -52
- pyannotators_patterns-0.5.123/tox.ini +0 -51
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/.bumpversion.cfg +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/.github/workflows/main.yml +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/.readthedocs.yml +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/AUTHORS.md +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/CHANGELOG.md +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/LICENSE +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/RELEASE.md +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/bumpversion.py +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/.gitignore +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/CHANGELOG.md +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/LICENSE +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/_static/.gitkeep +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/_templates/.gitkeep +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/docs/index.rst +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/mypy.ini +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/data/coords-document.json +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/data/coords.json +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/data/mgrs-document.json +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/data/mgrs.json +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/data/tel-document.json +0 -0
- {pyannotators_patterns-0.5.123 → pyannotators_patterns-0.6.3}/tests/data/tel.json +0 -0
|
@@ -1,17 +1,14 @@
|
|
|
1
1
|
repos:
|
|
2
|
-
- repo: https://github.com/
|
|
3
|
-
rev:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.3.0
|
|
4
4
|
hooks:
|
|
5
|
-
- id:
|
|
6
|
-
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
7
8
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
8
|
-
rev:
|
|
9
|
+
rev: v4.5.0
|
|
9
10
|
hooks:
|
|
10
11
|
- id: end-of-file-fixer
|
|
11
12
|
exclude: '.bumpversion.cfg'
|
|
12
13
|
- id: trailing-whitespace
|
|
13
14
|
exclude: '.bumpversion.cfg'
|
|
14
|
-
- repo: https://gitlab.com/pycqa/flake8
|
|
15
|
-
rev: 3.7.9
|
|
16
|
-
hooks:
|
|
17
|
-
- id: flake8
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.12-slim-bookworm
|
|
2
2
|
# Install prerequisites
|
|
3
3
|
RUN apt-get update -y && \
|
|
4
4
|
apt-get install -y \
|
|
@@ -11,9 +11,9 @@ RUN apt-get update -y && \
|
|
|
11
11
|
apt-get update -y && \
|
|
12
12
|
apt-get clean all -y
|
|
13
13
|
|
|
14
|
-
#
|
|
15
|
-
|
|
14
|
+
# Install uv
|
|
15
|
+
RUN pip install uv
|
|
16
16
|
|
|
17
|
-
# Add pyproject.toml + README.md for
|
|
17
|
+
# Add pyproject.toml + README.md for uv install
|
|
18
18
|
ADD pyproject.toml pyproject.toml
|
|
19
19
|
ADD README.md README.md
|
|
@@ -20,7 +20,7 @@ pipeline {
|
|
|
20
20
|
JENKINS_UIDGID = '1004:1004'
|
|
21
21
|
|
|
22
22
|
MAJOR_VERSION = '0'
|
|
23
|
-
MINOR_VERSION = '
|
|
23
|
+
MINOR_VERSION = '6'
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
stages {
|
|
@@ -58,7 +58,7 @@ pipeline {
|
|
|
58
58
|
stage('Add credentials') {
|
|
59
59
|
steps {
|
|
60
60
|
script {
|
|
61
|
-
// Add password file for
|
|
61
|
+
// Add password file for uv publishing
|
|
62
62
|
sh "cp ${PATH_HOME}/.passwd-pypi .env"
|
|
63
63
|
}
|
|
64
64
|
}
|
|
@@ -73,6 +73,7 @@ pipeline {
|
|
|
73
73
|
withCredentials([gitUsernamePassword(credentialsId: 'bitbucket-user', gitToolName: 'git-tool')]) {
|
|
74
74
|
sh 'git pull'
|
|
75
75
|
sh "echo '\"\"\"Annotator based on Presidio pattern recognizer\"\"\"' > src/pyannotators_patterns/__init__.py"
|
|
76
|
+
sh "echo '' >> src/pyannotators_patterns/__init__.py"
|
|
76
77
|
sh "echo '__version__ = \"${MAJOR_VERSION}.${MINOR_VERSION}.${BUILD_ID}\"' >> src/pyannotators_patterns/__init__.py"
|
|
77
78
|
sh 'git commit src/pyannotators_patterns/__init__.py -m "[Jenkins CI] Commit on version files" || echo "No changes to commit"'
|
|
78
79
|
sh 'git push'
|
|
@@ -103,13 +104,10 @@ pipeline {
|
|
|
103
104
|
}
|
|
104
105
|
|
|
105
106
|
stages {
|
|
106
|
-
stage('Install
|
|
107
|
+
stage('Install dependencies') {
|
|
107
108
|
steps {
|
|
108
|
-
|
|
109
|
-
sh '
|
|
110
|
-
sh 'python -m pip install pip==22.0.3'
|
|
111
|
-
sh 'pip install --no-cache-dir flit==3.2.0 flake8==3.9.2 flakehell tox'
|
|
112
|
-
sh 'flit install'
|
|
109
|
+
sh 'pip install uv'
|
|
110
|
+
sh 'uv sync --extra test'
|
|
113
111
|
}
|
|
114
112
|
}
|
|
115
113
|
|
|
@@ -117,20 +115,20 @@ pipeline {
|
|
|
117
115
|
steps {
|
|
118
116
|
// remove any previous results.xml file
|
|
119
117
|
sh "rm -f ${TEST_REPORT_DIR}/results.xml"
|
|
120
|
-
sh '
|
|
118
|
+
sh 'uv run ruff check .'
|
|
119
|
+
sh 'uv run ruff format --check .'
|
|
120
|
+
sh "uv run pytest --junit-xml=${TEST_REPORT_DIR}/results.xml"
|
|
121
121
|
}
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
stage('Publish on PyPI') {
|
|
125
125
|
environment {
|
|
126
|
-
|
|
127
|
-
|
|
126
|
+
UV_PUBLISH_USERNAME = getUserName '.env'
|
|
127
|
+
UV_PUBLISH_PASSWORD = getUserPass '.env'
|
|
128
128
|
}
|
|
129
129
|
steps {
|
|
130
130
|
// remove any previous folder dist
|
|
131
131
|
sh 'rm -rf dist'
|
|
132
|
-
// create (as root) folder dist
|
|
133
|
-
sh 'mkdir dist'
|
|
134
132
|
// pull recent updates of file __init__.py
|
|
135
133
|
withCredentials([gitUsernamePassword(credentialsId: 'bitbucket-user', gitToolName: 'git-tool')]) {
|
|
136
134
|
sh 'git config --global pull.rebase false'
|
|
@@ -143,18 +141,16 @@ pipeline {
|
|
|
143
141
|
sh "chown ${JENKINS_UIDGID} src/pyannotators_patterns/__init__.py"
|
|
144
142
|
// get git status
|
|
145
143
|
sh 'git status'
|
|
146
|
-
// publish on PyPI
|
|
144
|
+
// build and publish on PyPI
|
|
147
145
|
sh '''
|
|
148
146
|
export COMMIT_VERSION=$( cat src/pyannotators_patterns/__init__.py|grep version|cut -d '"' -f2|tr -s '[:blank:]' )
|
|
149
147
|
export BUILD_VERSION="${MAJOR_VERSION}"."${MINOR_VERSION}"."${BUILD_ID}"
|
|
150
|
-
if [ "${COMMIT_VERSION}" = "${BUILD_VERSION}" ] ; then
|
|
148
|
+
if [ "${COMMIT_VERSION}" = "${BUILD_VERSION}" ] ; then uv build && uv publish ; fi
|
|
151
149
|
'''
|
|
152
150
|
// remove current folder dist
|
|
153
151
|
sh 'rm -rf dist'
|
|
154
152
|
// remove current folder .hypothesis
|
|
155
153
|
sh 'rm -rf .hypothesis'
|
|
156
|
-
// remove current folder .tox
|
|
157
|
-
sh 'rm -rf .tox'
|
|
158
154
|
}
|
|
159
155
|
}
|
|
160
156
|
}
|
|
@@ -239,7 +235,7 @@ pipeline {
|
|
|
239
235
|
}
|
|
240
236
|
}
|
|
241
237
|
|
|
242
|
-
// return
|
|
238
|
+
// return UV_PUBLISH_USERNAME from given file (reads FLIT_USERNAME key)
|
|
243
239
|
def getUserName(path) {
|
|
244
240
|
def USERNAME = sh(
|
|
245
241
|
script: "grep FLIT_USERNAME ${path}|cut -d '=' -f2",
|
|
@@ -248,7 +244,7 @@ def getUserName(path) {
|
|
|
248
244
|
return USERNAME
|
|
249
245
|
}
|
|
250
246
|
|
|
251
|
-
// return
|
|
247
|
+
// return UV_PUBLISH_PASSWORD from given file (reads FLIT_PASSWORD key)
|
|
252
248
|
def getUserPass(path) {
|
|
253
249
|
def USERPASS = sh(
|
|
254
250
|
script: "grep FLIT_PASSWORD ${path}|cut -d '=' -f2",
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# Migration to Python 3.12, Pydantic v2, and uv tooling
|
|
2
|
+
|
|
3
|
+
This document describes the migration steps to perform on this project according to past experience. Please apply all these changes if they make sense.
|
|
4
|
+
|
|
5
|
+
## 0. Initial setup
|
|
6
|
+
- create a py312 git branch and switch to it
|
|
7
|
+
- change pyenv local to pymultirole312
|
|
8
|
+
- reset version in __init__.py to 0.6.0
|
|
9
|
+
|
|
10
|
+
## 1. Build system: flit → hatchling/uv
|
|
11
|
+
|
|
12
|
+
The build system was migrated from **flit** to **hatchling** with **uv** as the package manager.
|
|
13
|
+
|
|
14
|
+
- `pyproject.toml`: rewritten from flit legacy format (`[tool.flit.metadata]`) to PEP 621
|
|
15
|
+
with `hatchling` backend and `[project]` table
|
|
16
|
+
- Version read dynamically from `src/pyconverters_pyword/__init__.py` via `[tool.hatch.version]`
|
|
17
|
+
- Entry point `pyconverters.plugins` moved from `[tool.flit.entrypoints]` to `[project.entry-points]`
|
|
18
|
+
- Replaced `flit` commands with `uv build` and `uv publish`
|
|
19
|
+
- Jenkinsfile updated to use `uv sync` and `uv build`/`uv publish`
|
|
20
|
+
- Dockerfile updated to install `uv` via `COPY --from=ghcr.io/astral-sh/uv:latest`
|
|
21
|
+
|
|
22
|
+
## 2. Python version: 3.8+ → 3.12
|
|
23
|
+
|
|
24
|
+
- `requires-python` set to `>=3.12` in `pyproject.toml`
|
|
25
|
+
- `Dockerfile` base image changed from `python:3.8-slim-bookworm` to `python:3.12-slim-bookworm`
|
|
26
|
+
- `FLIT_ROOT_INSTALL=1` env var removed from Dockerfile (no longer needed)
|
|
27
|
+
- Classifiers updated from `Python :: 3.8` to `Python :: 3.12`
|
|
28
|
+
|
|
29
|
+
## 3. Pydantic v1 → v2
|
|
30
|
+
|
|
31
|
+
Replaced deprecated Pydantic v1 APIs in `pyword.py` and `tests/test_pyword.py`:
|
|
32
|
+
|
|
33
|
+
- `.json(exclude_none=True, exclude_unset=True, indent=2)` → `.model_dump_json(...)` in tests
|
|
34
|
+
- `DocumentList(__root__=docs)` → `DocumentList(root=docs)` (Pydantic v2 `RootModel` API)
|
|
35
|
+
- `Field(extra="internal"/"advanced")` → `Field(json_schema_extra={"extra": "..."})` in `pyword.py`
|
|
36
|
+
- `params.dict()` → `params.model_dump()` in `pyword.py`
|
|
37
|
+
|
|
38
|
+
## 4. Python 3.12 type modernization
|
|
39
|
+
|
|
40
|
+
Leveraged Python 3.12 builtins and modern syntax (enforced by ruff `UP` rules):
|
|
41
|
+
|
|
42
|
+
- `typing.List[Document]` → `list[Document]` (method signatures in `pyword.py` and tests)
|
|
43
|
+
- `typing.Type[BaseModel]` → `type[BaseModel]` (`get_model` return type in `pyword.py`)
|
|
44
|
+
- Removed unused `typing` imports: `List`, `Type`
|
|
45
|
+
|
|
46
|
+
## 5. Linter: black + flake8 → ruff
|
|
47
|
+
|
|
48
|
+
- Removed `black`, `flake8`, `pytest-flake8`, `pytest-black`, `flakehell` dependencies
|
|
49
|
+
- Added `ruff` to `[project.optional-dependencies].test`
|
|
50
|
+
- Removed `[tool.flakehell]` configuration block from `pyproject.toml`
|
|
51
|
+
- Configured `[tool.ruff]` in `pyproject.toml`:
|
|
52
|
+
- `line-length = 120`
|
|
53
|
+
- `target-version = "py312"`
|
|
54
|
+
- Lint rules: `E`, `W`, `F`, `I`, `B`, `C4`, `UP`, `ARG`, `SIM`
|
|
55
|
+
- Format: double quotes, space indent
|
|
56
|
+
- Updated `.pre-commit-config.yaml`: replaced `black` and `flake8` hooks with `ruff` and `ruff-format`
|
|
57
|
+
|
|
58
|
+
### Running the linter
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
uv run ruff check .
|
|
62
|
+
uv run ruff format --check .
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
To auto-fix formatting:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
uv run ruff format .
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 6. Test runner: tox → uv
|
|
72
|
+
|
|
73
|
+
**tox** was removed entirely. The project now uses `uv run` to execute tests and linting directly.
|
|
74
|
+
|
|
75
|
+
### What changed
|
|
76
|
+
|
|
77
|
+
- Deleted `tox.ini`
|
|
78
|
+
- Removed `tox`, `flake8`, `pytest-flake8`, `pytest-black` from test dependencies
|
|
79
|
+
- Moved pytest configuration from `tox.ini` to `pyproject.toml`:
|
|
80
|
+
|
|
81
|
+
```toml
|
|
82
|
+
[tool.pytest.ini_options]
|
|
83
|
+
addopts = "--durations=5"
|
|
84
|
+
norecursedirs = ["docs"]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Running tests
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
uv run pytest
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Why tox was unnecessary
|
|
94
|
+
|
|
95
|
+
- The project targets a single Python version (3.12)
|
|
96
|
+
- `uv run` provides virtual environment isolation without the extra layer
|
|
97
|
+
|
|
98
|
+
## 7. Documentation generation
|
|
99
|
+
|
|
100
|
+
Sphinx is used for documentation with the `docs` optional dependency group.
|
|
101
|
+
|
|
102
|
+
### Fix: added `lxml_html_clean` dependency
|
|
103
|
+
|
|
104
|
+
The `jupyter_sphinx` extension requires `lxml_html_clean` (split from `lxml` in recent versions).
|
|
105
|
+
This was added to `[project.optional-dependencies].docs`.
|
|
106
|
+
|
|
107
|
+
### Generating docs
|
|
108
|
+
|
|
109
|
+
```
|
|
110
|
+
uv run --extra docs sphinx-build docs docs/_build
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## 8. Jenkinsfile updates
|
|
114
|
+
|
|
115
|
+
- `__init__.py` generation now includes a blank line between the docstring and `__version__`
|
|
116
|
+
to comply with `ruff format`
|
|
117
|
+
- Set `__version__ = "0.6.0"`
|
|
118
|
+
- In Jenkinsfile, set `MINOR_VERSION = "6"`
|
|
119
|
+
- Install step changed from `flit install` to `uv sync --extra test`
|
|
120
|
+
- Lint step added: `uv run ruff check .` + `uv run ruff format --check .`
|
|
121
|
+
- Test step changed from `tox` to `uv run pytest --junit-xml=...`
|
|
122
|
+
- Build/publish pipeline uses `uv build && uv publish` instead of `flit publish`
|
|
123
|
+
- Credentials: Jenkinsfile helper functions still read `FLIT_USERNAME` and `FLIT_PASSWORD`
|
|
124
|
+
from `.passwd-pypi`, but expose them as `UV_PUBLISH_USERNAME` and `UV_PUBLISH_PASSWORD`
|
|
125
|
+
for `uv publish`
|
|
126
|
+
- Removed `.tox` cleanup steps
|
|
127
|
+
|
|
128
|
+
## 9. .gitignore updates
|
|
129
|
+
|
|
130
|
+
Added the following entry:
|
|
131
|
+
- `uv.lock` — generated lock file, not committed
|
|
132
|
+
|
|
133
|
+
(`docs/_build/` was already present.)
|
|
134
|
+
|
|
135
|
+
## 10. Dependency cleanup and upgrades
|
|
136
|
+
|
|
137
|
+
### Removed unused dependencies
|
|
138
|
+
|
|
139
|
+
- None
|
|
140
|
+
|
|
141
|
+
### Upgraded pinned dependencies
|
|
142
|
+
|
|
143
|
+
- `pymultirole-plugins>=0.5.0,<0.6.0` → `pymultirole-plugins>=0.6.0,<0.7.0`
|
|
144
|
+
- update dependencies to latest versions if possible
|
|
145
|
+
|
|
146
|
+
## 11. Starlette `UploadFile` API change
|
|
147
|
+
|
|
148
|
+
The `UploadFile` constructor signature changed in recent Starlette versions:
|
|
149
|
+
|
|
150
|
+
**Before (old API):**
|
|
151
|
+
```python
|
|
152
|
+
UploadFile(filename, file, content_type) # positional args
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**After (new API):**
|
|
156
|
+
```python
|
|
157
|
+
UploadFile(file=..., filename=..., headers=Headers({"content-type": "..."}))
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Updated in `tests/test_pyword.py` and added `Headers` import from `starlette.datastructures`.
|
|
161
|
+
|
|
162
|
+
## 12. Additional unit tests
|
|
163
|
+
|
|
164
|
+
- Analyze src directory and extended `tests/` with tests covering previously untested behaviour
|
|
165
|
+
|
|
166
|
+
## 13. Update README.md
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyannotators-patterns
|
|
3
|
+
Version: 0.6.3
|
|
4
|
+
Summary: Annotator based on Presidio pattern recognizer
|
|
5
|
+
Project-URL: Homepage, https://github.com/oterrier/pyannotators_patterns/
|
|
6
|
+
Author-email: Olivier Terrier <olivier.terrier@kairntech.com>
|
|
7
|
+
License: The MIT License (MIT)
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2021 Olivier Terrier
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in
|
|
19
|
+
all copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
27
|
+
THE SOFTWARE.
|
|
28
|
+
License-File: AUTHORS.md
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Classifier: Development Status :: 4 - Beta
|
|
31
|
+
Classifier: Intended Audience :: Developers
|
|
32
|
+
Classifier: Intended Audience :: Information Technology
|
|
33
|
+
Classifier: Intended Audience :: System Administrators
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Operating System :: OS Independent
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Topic :: Software Development
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
39
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
40
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
41
|
+
Requires-Python: >=3.12
|
|
42
|
+
Requires-Dist: collections-extended
|
|
43
|
+
Requires-Dist: log-with-context
|
|
44
|
+
Requires-Dist: presidio-analyzer>=2.2.354
|
|
45
|
+
Requires-Dist: pymultirole-plugins<0.7.0,>=0.6.0
|
|
46
|
+
Requires-Dist: spacy[lookups]>=3.7.0
|
|
47
|
+
Requires-Dist: tldextract>=5.1.2
|
|
48
|
+
Requires-Dist: unidecode
|
|
49
|
+
Provides-Extra: dev
|
|
50
|
+
Requires-Dist: bump2version; extra == 'dev'
|
|
51
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
|
52
|
+
Provides-Extra: docs
|
|
53
|
+
Requires-Dist: jupyter-sphinx; extra == 'docs'
|
|
54
|
+
Requires-Dist: lxml-html-clean; extra == 'docs'
|
|
55
|
+
Requires-Dist: m2r2; extra == 'docs'
|
|
56
|
+
Requires-Dist: sphinx; extra == 'docs'
|
|
57
|
+
Requires-Dist: sphinx-rtd-theme; extra == 'docs'
|
|
58
|
+
Requires-Dist: sphinxcontrib-apidoc; extra == 'docs'
|
|
59
|
+
Provides-Extra: test
|
|
60
|
+
Requires-Dist: dirty-equals; extra == 'test'
|
|
61
|
+
Requires-Dist: pip; extra == 'test'
|
|
62
|
+
Requires-Dist: pytest; extra == 'test'
|
|
63
|
+
Requires-Dist: pytest-check; extra == 'test'
|
|
64
|
+
Requires-Dist: pytest-cov; extra == 'test'
|
|
65
|
+
Requires-Dist: ruff; extra == 'test'
|
|
66
|
+
Description-Content-Type: text/markdown
|
|
67
|
+
|
|
68
|
+
# pyannotators_patterns
|
|
69
|
+
|
|
70
|
+
[](https://github.com/oterrier/pyannotators_patterns/blob/master/LICENSE)
|
|
71
|
+
[](https://github.com/oterrier/pyannotators_patterns/actions?query=workflow%3Atests)
|
|
72
|
+
[](https://codecov.io/gh/oterrier/pyannotators_patterns)
|
|
73
|
+
[](https://pyannotators_patterns.readthedocs.io)
|
|
74
|
+
[](https://pypi.org/project/pyannotators_patterns/)
|
|
75
|
+
[](https://pypi.org/project/pyannotators_patterns/)
|
|
76
|
+
|
|
77
|
+
Annotator based on Presidio regex pattern recognizers.
|
|
78
|
+
|
|
79
|
+
## Installation
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
pip install pyannotators-patterns
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Developing
|
|
86
|
+
|
|
87
|
+
### Prerequisites
|
|
88
|
+
|
|
89
|
+
You will need [uv](https://github.com/astral-sh/uv) (package manager) and Python 3.12+.
|
|
90
|
+
|
|
91
|
+
Clone the repository:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
git clone https://github.com/oterrier/pyannotators_patterns
|
|
95
|
+
cd pyannotators_patterns
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Install dependencies (including test extras):
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
uv sync --extra test
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Running the test suite
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
uv run pytest
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Linting and formatting
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
uv run ruff check .
|
|
114
|
+
uv run ruff format --check .
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
To auto-fix formatting:
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
uv run ruff format .
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Building the documentation
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
uv run --extra docs sphinx-build docs docs/_build
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
The built documentation is available at `docs/_build/index.html`.
|
|
@@ -7,42 +7,56 @@
|
|
|
7
7
|
[](https://pypi.org/project/pyannotators_patterns/)
|
|
8
8
|
[](https://pypi.org/project/pyannotators_patterns/)
|
|
9
9
|
|
|
10
|
-
Annotator based on
|
|
10
|
+
Annotator based on Presidio regex pattern recognizers.
|
|
11
11
|
|
|
12
12
|
## Installation
|
|
13
13
|
|
|
14
|
-
|
|
14
|
+
```
|
|
15
|
+
pip install pyannotators-patterns
|
|
16
|
+
```
|
|
15
17
|
|
|
16
18
|
## Developing
|
|
17
19
|
|
|
18
|
-
###
|
|
20
|
+
### Prerequisites
|
|
19
21
|
|
|
20
|
-
You will need
|
|
22
|
+
You will need [uv](https://github.com/astral-sh/uv) (package manager) and Python 3.12+.
|
|
23
|
+
|
|
24
|
+
Clone the repository:
|
|
21
25
|
|
|
22
26
|
```
|
|
23
|
-
|
|
27
|
+
git clone https://github.com/oterrier/pyannotators_patterns
|
|
28
|
+
cd pyannotators_patterns
|
|
24
29
|
```
|
|
25
30
|
|
|
26
|
-
|
|
31
|
+
Install dependencies (including test extras):
|
|
27
32
|
|
|
28
33
|
```
|
|
29
|
-
|
|
34
|
+
uv sync --extra test
|
|
30
35
|
```
|
|
31
36
|
|
|
32
37
|
### Running the test suite
|
|
33
38
|
|
|
34
|
-
|
|
39
|
+
```
|
|
40
|
+
uv run pytest
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Linting and formatting
|
|
35
44
|
|
|
36
45
|
```
|
|
37
|
-
|
|
46
|
+
uv run ruff check .
|
|
47
|
+
uv run ruff format --check .
|
|
38
48
|
```
|
|
39
49
|
|
|
40
|
-
|
|
50
|
+
To auto-fix formatting:
|
|
41
51
|
|
|
42
|
-
|
|
52
|
+
```
|
|
53
|
+
uv run ruff format .
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Building the documentation
|
|
43
57
|
|
|
44
58
|
```
|
|
45
|
-
|
|
59
|
+
uv run --extra docs sphinx-build docs docs/_build
|
|
46
60
|
```
|
|
47
61
|
|
|
48
|
-
The built documentation is available at `docs/_build/index.html
|
|
62
|
+
The built documentation is available at `docs/_build/index.html`.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pyannotators-patterns"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Annotator based on Presidio pattern recognizer"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {file = "LICENSE"}
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Olivier Terrier", email = "olivier.terrier@kairntech.com"},
|
|
13
|
+
]
|
|
14
|
+
keywords = []
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Intended Audience :: Information Technology",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Intended Audience :: System Administrators",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
21
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
22
|
+
"Topic :: Software Development :: Libraries",
|
|
23
|
+
"Topic :: Software Development",
|
|
24
|
+
"License :: OSI Approved :: MIT License",
|
|
25
|
+
"Development Status :: 4 - Beta",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
]
|
|
28
|
+
requires-python = ">=3.12"
|
|
29
|
+
dependencies = [
|
|
30
|
+
"pymultirole-plugins>=0.6.0,<0.7.0",
|
|
31
|
+
"tldextract>=5.1.2",
|
|
32
|
+
"spacy[lookups]>=3.7.0",
|
|
33
|
+
"log-with-context",
|
|
34
|
+
"collections_extended",
|
|
35
|
+
"unidecode",
|
|
36
|
+
"presidio-analyzer>=2.2.354",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/oterrier/pyannotators_patterns/"
|
|
41
|
+
|
|
42
|
+
[project.entry-points."pyannotators.plugins"]
|
|
43
|
+
patterns = "pyannotators_patterns.patterns:PatternsAnnotator"
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
test = [
|
|
47
|
+
"pytest",
|
|
48
|
+
"pytest-cov",
|
|
49
|
+
"ruff",
|
|
50
|
+
"pip",
|
|
51
|
+
"dirty-equals",
|
|
52
|
+
"pytest-check",
|
|
53
|
+
]
|
|
54
|
+
docs = [
|
|
55
|
+
"sphinx",
|
|
56
|
+
"sphinx-rtd-theme",
|
|
57
|
+
"m2r2",
|
|
58
|
+
"sphinxcontrib.apidoc",
|
|
59
|
+
"jupyter_sphinx",
|
|
60
|
+
"lxml_html_clean",
|
|
61
|
+
]
|
|
62
|
+
dev = [
|
|
63
|
+
"pre-commit",
|
|
64
|
+
"bump2version",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
[tool.hatch.version]
|
|
68
|
+
path = "src/pyannotators_patterns/__init__.py"
|
|
69
|
+
|
|
70
|
+
[tool.hatch.build.targets.wheel]
|
|
71
|
+
packages = ["src/pyannotators_patterns"]
|
|
72
|
+
|
|
73
|
+
[tool.pytest.ini_options]
|
|
74
|
+
addopts = "--durations=5"
|
|
75
|
+
norecursedirs = ["docs"]
|
|
76
|
+
|
|
77
|
+
[tool.ruff]
|
|
78
|
+
line-length = 120
|
|
79
|
+
target-version = "py312"
|
|
80
|
+
|
|
81
|
+
[tool.ruff.lint]
|
|
82
|
+
select = ["E", "W", "F", "I", "B", "C4", "UP", "ARG", "SIM"]
|
|
83
|
+
ignore = ["E501"]
|
|
84
|
+
|
|
85
|
+
[tool.ruff.format]
|
|
86
|
+
quote-style = "double"
|
|
87
|
+
indent-style = "space"
|