pyannotators-patterns 0.6.21__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/.gitignore +0 -6
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/.pre-commit-config.yaml +9 -6
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/Dockerfile +4 -4
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/Jenkinsfile +20 -86
- pyannotators_patterns-0.7.1/PKG-INFO +97 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/README.md +12 -52
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/conf.py +1 -0
- pyannotators_patterns-0.7.1/pyproject.toml +90 -0
- pyannotators_patterns-0.7.1/setup.py +54 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/src/pyannotators_patterns/__init__.py +1 -2
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/src/pyannotators_patterns/named_pattern_recognizer.py +24 -30
- pyannotators_patterns-0.7.1/src/pyannotators_patterns/patterns.py +219 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/assertions.py +12 -4
- pyannotators_patterns-0.7.1/tests/test_coords.py +40 -0
- pyannotators_patterns-0.7.1/tests/test_credit_cards.py +58 -0
- pyannotators_patterns-0.7.1/tests/test_emails.py +58 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/test_mgrs.py +10 -7
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/test_tel.py +11 -12
- pyannotators_patterns-0.7.1/tests/test_zip.py +52 -0
- pyannotators_patterns-0.7.1/tox.ini +51 -0
- pyannotators_patterns-0.6.21/MIGRATION.md +0 -166
- pyannotators_patterns-0.6.21/PKG-INFO +0 -157
- pyannotators_patterns-0.6.21/pyproject.toml +0 -88
- pyannotators_patterns-0.6.21/src/pyannotators_patterns/patterns.py +0 -260
- pyannotators_patterns-0.6.21/tests/test_annotator.py +0 -172
- pyannotators_patterns-0.6.21/tests/test_coords.py +0 -42
- pyannotators_patterns-0.6.21/tests/test_credit_cards.py +0 -111
- pyannotators_patterns-0.6.21/tests/test_emails.py +0 -75
- pyannotators_patterns-0.6.21/tests/test_zip.py +0 -60
- pyannotators_patterns-0.6.21/trivy +0 -0
- pyannotators_patterns-0.6.21/trivy-html-template.tpl +0 -148
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/.bumpversion.cfg +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/.github/workflows/main.yml +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/.readthedocs.yml +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/AUTHORS.md +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/CHANGELOG.md +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/LICENSE +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/RELEASE.md +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/bumpversion.py +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/.gitignore +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/CHANGELOG.md +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/LICENSE +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/_static/.gitkeep +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/_templates/.gitkeep +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/docs/index.rst +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/mypy.ini +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/data/coords-document.json +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/data/coords.json +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/data/mgrs-document.json +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/data/mgrs.json +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/data/tel-document.json +0 -0
- {pyannotators_patterns-0.6.21 → pyannotators_patterns-0.7.1}/tests/data/tel.json +0 -0
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
repos:
|
|
2
|
-
- repo: https://github.com/
|
|
3
|
-
rev:
|
|
2
|
+
- repo: https://github.com/ambv/black
|
|
3
|
+
rev: 19.3b0
|
|
4
4
|
hooks:
|
|
5
|
-
- id:
|
|
6
|
-
|
|
7
|
-
- id: ruff-format
|
|
5
|
+
- id: black
|
|
6
|
+
language_version: python3.8
|
|
8
7
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
9
|
-
rev:
|
|
8
|
+
rev: v2.1.0
|
|
10
9
|
hooks:
|
|
11
10
|
- id: end-of-file-fixer
|
|
12
11
|
exclude: '.bumpversion.cfg'
|
|
13
12
|
- id: trailing-whitespace
|
|
14
13
|
exclude: '.bumpversion.cfg'
|
|
14
|
+
- repo: https://gitlab.com/pycqa/flake8
|
|
15
|
+
rev: 3.7.9
|
|
16
|
+
hooks:
|
|
17
|
+
- id: flake8
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
FROM python:3.
|
|
1
|
+
FROM python:3.8-slim-bookworm
|
|
2
2
|
# Install prerequisites
|
|
3
3
|
RUN apt-get update -y && \
|
|
4
4
|
apt-get install -y \
|
|
@@ -11,9 +11,9 @@ RUN apt-get update -y && \
|
|
|
11
11
|
apt-get update -y && \
|
|
12
12
|
apt-get clean all -y
|
|
13
13
|
|
|
14
|
-
#
|
|
15
|
-
|
|
14
|
+
# Enable Installing packages as root
|
|
15
|
+
ENV FLIT_ROOT_INSTALL=1
|
|
16
16
|
|
|
17
|
-
# Add pyproject.toml + README.md for
|
|
17
|
+
# Add pyproject.toml + README.md for flit install
|
|
18
18
|
ADD pyproject.toml pyproject.toml
|
|
19
19
|
ADD README.md README.md
|
|
@@ -20,7 +20,7 @@ pipeline {
|
|
|
20
20
|
JENKINS_UIDGID = '1004:1004'
|
|
21
21
|
|
|
22
22
|
MAJOR_VERSION = '0'
|
|
23
|
-
MINOR_VERSION = '
|
|
23
|
+
MINOR_VERSION = '7'
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
stages {
|
|
@@ -58,7 +58,7 @@ pipeline {
|
|
|
58
58
|
stage('Add credentials') {
|
|
59
59
|
steps {
|
|
60
60
|
script {
|
|
61
|
-
// Add password file for
|
|
61
|
+
// Add password file for flit publishing
|
|
62
62
|
sh "cp ${PATH_HOME}/.passwd-pypi .env"
|
|
63
63
|
}
|
|
64
64
|
}
|
|
@@ -73,7 +73,6 @@ pipeline {
|
|
|
73
73
|
withCredentials([gitUsernamePassword(credentialsId: 'bitbucket-user', gitToolName: 'git-tool')]) {
|
|
74
74
|
sh 'git pull'
|
|
75
75
|
sh "echo '\"\"\"Annotator based on Presidio pattern recognizer\"\"\"' > src/pyannotators_patterns/__init__.py"
|
|
76
|
-
sh "echo '' >> src/pyannotators_patterns/__init__.py"
|
|
77
76
|
sh "echo '__version__ = \"${MAJOR_VERSION}.${MINOR_VERSION}.${BUILD_ID}\"' >> src/pyannotators_patterns/__init__.py"
|
|
78
77
|
sh 'git commit src/pyannotators_patterns/__init__.py -m "[Jenkins CI] Commit on version files" || echo "No changes to commit"'
|
|
79
78
|
sh 'git push'
|
|
@@ -104,11 +103,13 @@ pipeline {
|
|
|
104
103
|
}
|
|
105
104
|
|
|
106
105
|
stages {
|
|
107
|
-
stage('Install
|
|
106
|
+
stage('Install flit & flake8') {
|
|
108
107
|
steps {
|
|
109
|
-
|
|
110
|
-
sh '
|
|
111
|
-
sh '
|
|
108
|
+
// remove any previous tox env
|
|
109
|
+
sh 'rm -rf .tox'
|
|
110
|
+
sh 'python -m pip install pip==22.0.3'
|
|
111
|
+
sh 'pip install --no-cache-dir flit==3.2.0 flake8==3.9.2 flakehell tox'
|
|
112
|
+
sh 'flit install'
|
|
112
113
|
}
|
|
113
114
|
}
|
|
114
115
|
|
|
@@ -116,20 +117,20 @@ pipeline {
|
|
|
116
117
|
steps {
|
|
117
118
|
// remove any previous results.xml file
|
|
118
119
|
sh "rm -f ${TEST_REPORT_DIR}/results.xml"
|
|
119
|
-
sh '
|
|
120
|
-
sh 'uv run ruff format --check .'
|
|
121
|
-
sh "uv run pytest --junit-xml=${TEST_REPORT_DIR}/results.xml"
|
|
120
|
+
sh 'tox'
|
|
122
121
|
}
|
|
123
122
|
}
|
|
124
123
|
|
|
125
124
|
stage('Publish on PyPI') {
|
|
126
125
|
environment {
|
|
127
|
-
|
|
128
|
-
|
|
126
|
+
FLIT_USERNAME = getUserName '.env'
|
|
127
|
+
FLIT_PASSWORD = getUserPass '.env'
|
|
129
128
|
}
|
|
130
129
|
steps {
|
|
131
130
|
// remove any previous folder dist
|
|
132
131
|
sh 'rm -rf dist'
|
|
132
|
+
// create (as root) folder dist
|
|
133
|
+
sh 'mkdir dist'
|
|
133
134
|
// pull recent updates of file __init__.py
|
|
134
135
|
withCredentials([gitUsernamePassword(credentialsId: 'bitbucket-user', gitToolName: 'git-tool')]) {
|
|
135
136
|
sh 'git config --global pull.rebase false'
|
|
@@ -142,85 +143,18 @@ pipeline {
|
|
|
142
143
|
sh "chown ${JENKINS_UIDGID} src/pyannotators_patterns/__init__.py"
|
|
143
144
|
// get git status
|
|
144
145
|
sh 'git status'
|
|
145
|
-
//
|
|
146
|
+
// publish on PyPI
|
|
146
147
|
sh '''
|
|
147
148
|
export COMMIT_VERSION=$( cat src/pyannotators_patterns/__init__.py|grep version|cut -d '"' -f2|tr -s '[:blank:]' )
|
|
148
149
|
export BUILD_VERSION="${MAJOR_VERSION}"."${MINOR_VERSION}"."${BUILD_ID}"
|
|
149
|
-
if [ "${COMMIT_VERSION}" = "${BUILD_VERSION}" ] ; then
|
|
150
|
+
if [ "${COMMIT_VERSION}" = "${BUILD_VERSION}" ] ; then flit publish ; fi
|
|
150
151
|
'''
|
|
151
152
|
// remove current folder dist
|
|
152
153
|
sh 'rm -rf dist'
|
|
153
154
|
// remove current folder .hypothesis
|
|
154
155
|
sh 'rm -rf .hypothesis'
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
stage('Prepare SBOM') {
|
|
161
|
-
when {
|
|
162
|
-
beforeAgent true
|
|
163
|
-
environment name: 'SKIP_JOB', value: '0'
|
|
164
|
-
}
|
|
165
|
-
options {
|
|
166
|
-
timeout(time: 120, unit: 'SECONDS')
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
agent {
|
|
170
|
-
docker {
|
|
171
|
-
image 'alpine:3.19'
|
|
172
|
-
label 'built-in'
|
|
173
|
-
customWorkspace "${PATH_HOME}/${JOB_NAME}"
|
|
174
|
-
args "-u 0"
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
stages {
|
|
179
|
-
stage('Generate SBOM') {
|
|
180
|
-
steps {
|
|
181
|
-
sh '''
|
|
182
|
-
apk add --no-cache curl
|
|
183
|
-
|
|
184
|
-
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b . v${SYFT_VERSION}
|
|
185
|
-
|
|
186
|
-
./syft . \
|
|
187
|
-
--exclude '**/syft' \
|
|
188
|
-
--exclude '**/.pytest_cache' \
|
|
189
|
-
--exclude '**/.ruff_cache' \
|
|
190
|
-
--exclude '**/sbom.cdx.json' \
|
|
191
|
-
--exclude '**/sbom.spdx.json' \
|
|
192
|
-
--source-name "${JOB_NAME}" \
|
|
193
|
-
--source-version "${BUILD_NUMBER}" \
|
|
194
|
-
-o cyclonedx-json=sbom.cdx.json \
|
|
195
|
-
-o spdx-json=sbom.spdx.json
|
|
196
|
-
|
|
197
|
-
rm -f ./syft
|
|
198
|
-
'''
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
stage('Analyse SBOM') {
|
|
203
|
-
steps {
|
|
204
|
-
sh '''
|
|
205
|
-
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b . v${TRIVY_VERSION}
|
|
206
|
-
curl -L https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/html.tpl -o trivy-html-template.tpl
|
|
207
|
-
|
|
208
|
-
./trivy sbom --download-db-only
|
|
209
|
-
|
|
210
|
-
./trivy sbom sbom.cdx.json \
|
|
211
|
-
--skip-db-update \
|
|
212
|
-
--format template \
|
|
213
|
-
--template "@trivy-html-template.tpl" \
|
|
214
|
-
-o trivy-report.html
|
|
215
|
-
|
|
216
|
-
rm -f ./trivy
|
|
217
|
-
'''
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
stage('Archive SBOM & analysis') {
|
|
222
|
-
steps {
|
|
223
|
-
archiveArtifacts artifacts: 'sbom*.json, trivy*.html', fingerprint: true
|
|
156
|
+
// remove current folder .tox
|
|
157
|
+
sh 'rm -rf .tox'
|
|
224
158
|
}
|
|
225
159
|
}
|
|
226
160
|
}
|
|
@@ -305,7 +239,7 @@ pipeline {
|
|
|
305
239
|
}
|
|
306
240
|
}
|
|
307
241
|
|
|
308
|
-
// return
|
|
242
|
+
// return FLIT_USERNAME from given file
|
|
309
243
|
def getUserName(path) {
|
|
310
244
|
def USERNAME = sh(
|
|
311
245
|
script: "grep FLIT_USERNAME ${path}|cut -d '=' -f2",
|
|
@@ -314,7 +248,7 @@ def getUserName(path) {
|
|
|
314
248
|
return USERNAME
|
|
315
249
|
}
|
|
316
250
|
|
|
317
|
-
// return
|
|
251
|
+
// return FLIT_PASSWORD from given file
|
|
318
252
|
def getUserPass(path) {
|
|
319
253
|
def USERPASS = sh(
|
|
320
254
|
script: "grep FLIT_PASSWORD ${path}|cut -d '=' -f2",
|
|
@@ -475,6 +409,6 @@ def analyseBuildCause() {
|
|
|
475
409
|
println 'Skipping build because last commit has been done by CI'
|
|
476
410
|
env.SKIP_JOB = '1'
|
|
477
411
|
switchEmailNotif(false, 0)
|
|
478
|
-
currentBuild.result = 'NOT_BUILT'
|
|
412
|
+
//currentBuild.result = 'NOT_BUILT'
|
|
479
413
|
}
|
|
480
414
|
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyannotators-patterns
|
|
3
|
+
Version: 0.7.1
|
|
4
|
+
Summary: Annotator based on Presidio pattern recognizer
|
|
5
|
+
Home-page: https://github.com/oterrier/pyannotators_patterns/
|
|
6
|
+
Keywords:
|
|
7
|
+
Author: Olivier Terrier
|
|
8
|
+
Author-email: olivier.terrier@kairntech.com
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: System Administrators
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
18
|
+
Classifier: Topic :: Software Development
|
|
19
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
20
|
+
Classifier: Development Status :: 4 - Beta
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: pymultirole-plugins>=0.7.0,<0.8.0
|
|
24
|
+
Requires-Dist: tldextract==5.1.2
|
|
25
|
+
Requires-Dist: spacy==3.4.4
|
|
26
|
+
Requires-Dist: spacy[lookups]==3.4.4
|
|
27
|
+
Requires-Dist: log-with-context
|
|
28
|
+
Requires-Dist: collections_extended
|
|
29
|
+
Requires-Dist: unidecode
|
|
30
|
+
Requires-Dist: presidio-analyzer==2.2.354
|
|
31
|
+
Requires-Dist: flit ; extra == "dev"
|
|
32
|
+
Requires-Dist: pre-commit ; extra == "dev"
|
|
33
|
+
Requires-Dist: bump2version ; extra == "dev"
|
|
34
|
+
Requires-Dist: sphinx ; extra == "docs"
|
|
35
|
+
Requires-Dist: sphinx-rtd-theme ; extra == "docs"
|
|
36
|
+
Requires-Dist: m2r2 ; extra == "docs"
|
|
37
|
+
Requires-Dist: pytest ; extra == "test"
|
|
38
|
+
Requires-Dist: pytest-cov ; extra == "test"
|
|
39
|
+
Requires-Dist: pytest-flake8 ; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-black ; extra == "test"
|
|
41
|
+
Requires-Dist: pytest_check ; extra == "test"
|
|
42
|
+
Requires-Dist: flake8==3.9.2 ; extra == "test"
|
|
43
|
+
Requires-Dist: tox ; extra == "test"
|
|
44
|
+
Requires-Dist: dirty-equals ; extra == "test"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Provides-Extra: docs
|
|
47
|
+
Provides-Extra: test
|
|
48
|
+
|
|
49
|
+
# pyannotators_patterns
|
|
50
|
+
|
|
51
|
+
[](https://github.com/oterrier/pyannotators_patterns/blob/master/LICENSE)
|
|
52
|
+
[](https://github.com/oterrier/pyannotators_patterns/actions?query=workflow%3Atests)
|
|
53
|
+
[](https://codecov.io/gh/oterrier/pyannotators_patterns)
|
|
54
|
+
[](https://pyannotators_patterns.readthedocs.io)
|
|
55
|
+
[](https://pypi.org/project/pyannotators_patterns/)
|
|
56
|
+
[](https://pypi.org/project/pyannotators_patterns/)
|
|
57
|
+
|
|
58
|
+
Annotator based on Facebook's Patterns
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
You can simply `pip install pyannotators_patterns`.
|
|
63
|
+
|
|
64
|
+
## Developing
|
|
65
|
+
|
|
66
|
+
### Pre-requesites
|
|
67
|
+
|
|
68
|
+
You will need to install `flit` (for building the package) and `tox` (for orchestrating testing and documentation building):
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
python3 -m pip install flit tox
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Clone the repository:
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
git clone https://github.com/oterrier/pyannotators_patterns
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Running the test suite
|
|
81
|
+
|
|
82
|
+
You can run the full test suite against all supported versions of Python (3.8) with:
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
tox
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Building the documentation
|
|
89
|
+
|
|
90
|
+
You can build the HTML documentation with:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
tox -e docs
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The built documentation is available at `docs/_build/index.html.
|
|
97
|
+
|
|
@@ -7,82 +7,42 @@
|
|
|
7
7
|
[](https://pypi.org/project/pyannotators_patterns/)
|
|
8
8
|
[](https://pypi.org/project/pyannotators_patterns/)
|
|
9
9
|
|
|
10
|
-
Annotator based on
|
|
10
|
+
Annotator based on Facebook's Patterns
|
|
11
11
|
|
|
12
12
|
## Installation
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
pip install pyannotators-patterns
|
|
16
|
-
```
|
|
14
|
+
You can simply `pip install pyannotators_patterns`.
|
|
17
15
|
|
|
18
16
|
## Developing
|
|
19
17
|
|
|
20
|
-
###
|
|
18
|
+
### Pre-requesites
|
|
21
19
|
|
|
22
|
-
You will need
|
|
23
|
-
|
|
24
|
-
Clone the repository:
|
|
20
|
+
You will need to install `flit` (for building the package) and `tox` (for orchestrating testing and documentation building):
|
|
25
21
|
|
|
26
22
|
```
|
|
27
|
-
|
|
28
|
-
cd pyannotators_patterns
|
|
23
|
+
python3 -m pip install flit tox
|
|
29
24
|
```
|
|
30
25
|
|
|
31
|
-
|
|
26
|
+
Clone the repository:
|
|
32
27
|
|
|
33
28
|
```
|
|
34
|
-
|
|
29
|
+
git clone https://github.com/oterrier/pyannotators_patterns
|
|
35
30
|
```
|
|
36
31
|
|
|
37
32
|
### Running the test suite
|
|
38
33
|
|
|
39
|
-
|
|
40
|
-
uv run pytest
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
### Linting and formatting
|
|
34
|
+
You can run the full test suite against all supported versions of Python (3.8) with:
|
|
44
35
|
|
|
45
36
|
```
|
|
46
|
-
|
|
47
|
-
uv run ruff format --check .
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
To auto-fix formatting:
|
|
51
|
-
|
|
52
|
-
```
|
|
53
|
-
uv run ruff format .
|
|
37
|
+
tox
|
|
54
38
|
```
|
|
55
39
|
|
|
56
40
|
### Building the documentation
|
|
57
41
|
|
|
58
|
-
|
|
59
|
-
uv run --extra docs sphinx-build docs docs/_build
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
The built documentation is available at `docs/_build/index.html`.
|
|
63
|
-
|
|
64
|
-
## SBOM & vulnerability check
|
|
65
|
-
|
|
66
|
-
Install the SBOM dependencies:
|
|
67
|
-
|
|
68
|
-
```
|
|
69
|
-
uv sync --extra sbom
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
Generate a CycloneDX SBOM from the current environment:
|
|
42
|
+
You can build the HTML documentation with:
|
|
73
43
|
|
|
74
44
|
```
|
|
75
|
-
|
|
45
|
+
tox -e docs
|
|
76
46
|
```
|
|
77
47
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
```
|
|
81
|
-
uv run pip-audit --format json --output audit-report.json
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
To fail on any known vulnerability (useful in CI):
|
|
85
|
-
|
|
86
|
-
```
|
|
87
|
-
uv run pip-audit --strict
|
|
88
|
-
```
|
|
48
|
+
The built documentation is available at `docs/_build/index.html.
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["flit_core >=3.2,<4"]
|
|
3
|
+
#requires = ["flit_core >=2,<3"]
|
|
4
|
+
build-backend = "flit_core.buildapi"
|
|
5
|
+
|
|
6
|
+
[tool.flit.metadata]
|
|
7
|
+
module = "pyannotators_patterns"
|
|
8
|
+
author = "Olivier Terrier"
|
|
9
|
+
author-email = "olivier.terrier@kairntech.com"
|
|
10
|
+
home-page = "https://github.com/oterrier/pyannotators_patterns/"
|
|
11
|
+
requires-python=">=3.8"
|
|
12
|
+
description-file="README.md"
|
|
13
|
+
keywords = ""
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Intended Audience :: Information Technology",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Intended Audience :: System Administrators",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
20
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
21
|
+
"Topic :: Software Development :: Libraries",
|
|
22
|
+
"Topic :: Software Development",
|
|
23
|
+
"License :: OSI Approved :: MIT License",
|
|
24
|
+
"Development Status :: 4 - Beta",
|
|
25
|
+
"Programming Language :: Python :: 3.8",
|
|
26
|
+
]
|
|
27
|
+
requires = [
|
|
28
|
+
"pymultirole-plugins>=0.7.0,<0.8.0",
|
|
29
|
+
"tldextract==5.1.2",
|
|
30
|
+
"spacy==3.4.4",
|
|
31
|
+
"spacy[lookups]==3.4.4",
|
|
32
|
+
"log-with-context",
|
|
33
|
+
"collections_extended",
|
|
34
|
+
"unidecode",
|
|
35
|
+
"presidio-analyzer==2.2.354"
|
|
36
|
+
]
|
|
37
|
+
dist-name = "pyannotators-patterns"
|
|
38
|
+
|
|
39
|
+
[tool.flit.entrypoints."pyannotators.plugins"]
|
|
40
|
+
patterns = "pyannotators_patterns.patterns:PatternsAnnotator"
|
|
41
|
+
|
|
42
|
+
[tool.flit.metadata.requires-extra]
|
|
43
|
+
test = [
|
|
44
|
+
"pytest",
|
|
45
|
+
"pytest-cov",
|
|
46
|
+
"pytest-flake8",
|
|
47
|
+
"pytest-black",
|
|
48
|
+
"pytest_check",
|
|
49
|
+
"flake8==3.9.2",
|
|
50
|
+
"tox",
|
|
51
|
+
"dirty-equals"
|
|
52
|
+
]
|
|
53
|
+
docs = [
|
|
54
|
+
"sphinx",
|
|
55
|
+
"sphinx-rtd-theme",
|
|
56
|
+
"m2r2", # markdown support
|
|
57
|
+
# "sphinxcontrib.apidoc", # run sphinx-apidoc when building docs
|
|
58
|
+
# "jupyter_sphinx", # for execution of code snippets in the documentation
|
|
59
|
+
]
|
|
60
|
+
dev = [
|
|
61
|
+
"flit",
|
|
62
|
+
"pre-commit",
|
|
63
|
+
"bump2version",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
[tool.flakehell]
|
|
67
|
+
exclude = ["README.md"]
|
|
68
|
+
format = "colored"
|
|
69
|
+
#format = "junit-xml"
|
|
70
|
+
max_line_length = 120
|
|
71
|
+
show_source = true
|
|
72
|
+
#whitelist = "../../allowlist.txt"
|
|
73
|
+
|
|
74
|
+
[tool.flakehell.plugins]
|
|
75
|
+
flake8-bandit = ["+*", "-S322"]
|
|
76
|
+
flake8-bugbear = ["+*"]
|
|
77
|
+
flake8-builtins = ["+*"]
|
|
78
|
+
flake8-comprehensions = ["+*"]
|
|
79
|
+
#flake8-darglint = ["+*"]
|
|
80
|
+
flake8-docstrings = ["+*"]
|
|
81
|
+
flake8-eradicate = ["+*"]
|
|
82
|
+
flake8-isort = ["+*"]
|
|
83
|
+
flake8-mutable = ["+*"]
|
|
84
|
+
flake8-pytest-style = ["+*"]
|
|
85
|
+
flake8-spellcheck = ["+*"]
|
|
86
|
+
mccabe = ["+*"]
|
|
87
|
+
pep8-naming = ["+*"]
|
|
88
|
+
pycodestyle = ["+*"]
|
|
89
|
+
pyflakes = ["+*"]
|
|
90
|
+
pylint = ["+*"]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# setup.py generated by flit for tools that don't yet use PEP 517
|
|
3
|
+
|
|
4
|
+
from distutils.core import setup
|
|
5
|
+
|
|
6
|
+
packages = \
|
|
7
|
+
['pyannotators_patterns']
|
|
8
|
+
|
|
9
|
+
package_data = \
|
|
10
|
+
{'': ['*']}
|
|
11
|
+
|
|
12
|
+
package_dir = \
|
|
13
|
+
{'': 'src'}
|
|
14
|
+
|
|
15
|
+
install_requires = \
|
|
16
|
+
['pymultirole-plugins>=0.7.0,<0.8.0',
|
|
17
|
+
'tldextract==5.1.2',
|
|
18
|
+
'spacy==3.4.4',
|
|
19
|
+
'spacy[lookups]==3.4.4',
|
|
20
|
+
'log-with-context',
|
|
21
|
+
'collections_extended',
|
|
22
|
+
'unidecode',
|
|
23
|
+
'presidio-analyzer==2.2.354']
|
|
24
|
+
|
|
25
|
+
extras_require = \
|
|
26
|
+
{'dev': ['flit', 'pre-commit', 'bump2version'],
|
|
27
|
+
'docs': ['sphinx', 'sphinx-rtd-theme', 'm2r2'],
|
|
28
|
+
'test': ['pytest',
|
|
29
|
+
'pytest-cov',
|
|
30
|
+
'pytest-flake8',
|
|
31
|
+
'pytest-black',
|
|
32
|
+
'pytest_check',
|
|
33
|
+
'flake8==3.9.2',
|
|
34
|
+
'tox',
|
|
35
|
+
'dirty-equals']}
|
|
36
|
+
|
|
37
|
+
entry_points = \
|
|
38
|
+
{'pyannotators.plugins': ['patterns = '
|
|
39
|
+
'pyannotators_patterns.patterns:PatternsAnnotator']}
|
|
40
|
+
|
|
41
|
+
setup(name='pyannotators-patterns',
|
|
42
|
+
version='0.7.1',
|
|
43
|
+
description='Annotator based on Presidio pattern recognizer',
|
|
44
|
+
author='Olivier Terrier',
|
|
45
|
+
author_email='olivier.terrier@kairntech.com',
|
|
46
|
+
url='https://github.com/oterrier/pyannotators_patterns/',
|
|
47
|
+
packages=packages,
|
|
48
|
+
package_data=package_data,
|
|
49
|
+
package_dir=package_dir,
|
|
50
|
+
install_requires=install_requires,
|
|
51
|
+
extras_require=extras_require,
|
|
52
|
+
entry_points=entry_points,
|
|
53
|
+
python_requires='>=3.8',
|
|
54
|
+
)
|
|
@@ -1,41 +1,33 @@
|
|
|
1
1
|
import re
|
|
2
|
+
from typing import List, Optional
|
|
2
3
|
|
|
3
|
-
from presidio_analyzer import
|
|
4
|
+
from presidio_analyzer import Pattern, PatternRecognizer, RecognizerResult, EntityRecognizer
|
|
4
5
|
from presidio_analyzer.nlp_engine import NlpArtifacts
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class NamedPatternRecognizer(PatternRecognizer):
|
|
8
9
|
def __init__(
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
10
|
+
self,
|
|
11
|
+
supported_entity: str,
|
|
12
|
+
name: str = None,
|
|
13
|
+
supported_language: str = "en",
|
|
14
|
+
patterns: List[Pattern] = None,
|
|
15
|
+
deny_list: List[str] = None,
|
|
16
|
+
context: List[str] = None,
|
|
17
|
+
deny_list_score: float = 1.0,
|
|
18
|
+
global_regex_flags: Optional[int] = re.DOTALL | re.MULTILINE,
|
|
19
|
+
version: str = "0.0.1",
|
|
19
20
|
):
|
|
20
|
-
super().__init__(
|
|
21
|
-
|
|
22
|
-
name,
|
|
23
|
-
supported_language,
|
|
24
|
-
patterns,
|
|
25
|
-
deny_list,
|
|
26
|
-
context,
|
|
27
|
-
deny_list_score,
|
|
28
|
-
global_regex_flags,
|
|
29
|
-
version,
|
|
30
|
-
)
|
|
21
|
+
super().__init__(supported_entity, name, supported_language, patterns, deny_list, context, deny_list_score,
|
|
22
|
+
global_regex_flags, version)
|
|
31
23
|
|
|
32
24
|
def analyze(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
) ->
|
|
25
|
+
self,
|
|
26
|
+
text: str,
|
|
27
|
+
entities: List[str],
|
|
28
|
+
nlp_artifacts: Optional[NlpArtifacts] = None,
|
|
29
|
+
regex_flags: Optional[int] = None,
|
|
30
|
+
) -> List[RecognizerResult]:
|
|
39
31
|
"""
|
|
40
32
|
Analyzes text to detect PII using regular expressions or deny-lists.
|
|
41
33
|
|
|
@@ -53,7 +45,9 @@ class NamedPatternRecognizer(PatternRecognizer):
|
|
|
53
45
|
|
|
54
46
|
return results
|
|
55
47
|
|
|
56
|
-
def __analyze_patterns(
|
|
48
|
+
def __analyze_patterns(
|
|
49
|
+
self, text: str, flags: int = None
|
|
50
|
+
) -> List[RecognizerResult]:
|
|
57
51
|
"""
|
|
58
52
|
Evaluate all patterns in the provided text.
|
|
59
53
|
|
|
@@ -99,7 +93,7 @@ class NamedPatternRecognizer(PatternRecognizer):
|
|
|
99
93
|
end=end,
|
|
100
94
|
score=score,
|
|
101
95
|
analysis_explanation=description,
|
|
102
|
-
recognition_metadata=recognition_metadata
|
|
96
|
+
recognition_metadata=recognition_metadata
|
|
103
97
|
)
|
|
104
98
|
|
|
105
99
|
if validation_result is not None:
|