pyannotators-patterns 0.5.86__tar.gz → 0.5.88__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/PKG-INFO +1 -1
  2. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/setup.py +1 -1
  3. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/src/pyannotators_patterns/__init__.py +1 -1
  4. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/src/pyannotators_patterns/patterns.py +1 -1
  5. pyannotators_patterns-0.5.88/tests/data/coords-document.json +8 -0
  6. pyannotators_patterns-0.5.88/tests/data/tel-document.json +8 -0
  7. pyannotators_patterns-0.5.88/tests/data/tel.json +21 -0
  8. pyannotators_patterns-0.5.88/tests/test_tel.py +32 -0
  9. pyannotators_patterns-0.5.86/tests/data/coords-document.json +0 -86
  10. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/.bumpversion.cfg +0 -0
  11. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/.github/workflows/main.yml +0 -0
  12. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/.gitignore +0 -0
  13. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/.pre-commit-config.yaml +0 -0
  14. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/.readthedocs.yml +0 -0
  15. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/AUTHORS.md +0 -0
  16. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/CHANGELOG.md +0 -0
  17. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/Dockerfile +0 -0
  18. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/Jenkinsfile +0 -0
  19. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/LICENSE +0 -0
  20. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/README.md +0 -0
  21. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/RELEASE.md +0 -0
  22. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/bumpversion.py +0 -0
  23. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/.gitignore +0 -0
  24. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/CHANGELOG.md +0 -0
  25. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/LICENSE +0 -0
  26. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/_static/.gitkeep +0 -0
  27. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/_templates/.gitkeep +0 -0
  28. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/conf.py +0 -0
  29. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/docs/index.rst +0 -0
  30. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/mypy.ini +0 -0
  31. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/pyproject.toml +0 -0
  32. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/src/pyannotators_patterns/named_pattern_recognizer.py +0 -0
  33. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tests/assertions.py +0 -0
  34. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tests/data/coords.json +0 -0
  35. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tests/test_coords.py +0 -0
  36. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tests/test_credit_cards.py +0 -0
  37. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tests/test_emails.py +0 -0
  38. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tests/test_zip.py +0 -0
  39. {pyannotators_patterns-0.5.86 → pyannotators_patterns-0.5.88}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pyannotators-patterns
3
- Version: 0.5.86
3
+ Version: 0.5.88
4
4
  Summary: Annotator based on Presidio pattern recognizer
5
5
  Home-page: https://github.com/oterrier/pyannotators_patterns/
6
6
  Keywords:
@@ -41,7 +41,7 @@ entry_points = \
41
41
  'pyannotators_patterns.patterns:PatternsAnnotator']}
42
42
 
43
43
  setup(name='pyannotators-patterns',
44
- version='0.5.86',
44
+ version='0.5.88',
45
45
  description='Annotator based on Presidio pattern recognizer',
46
46
  author='Olivier Terrier',
47
47
  author_email='olivier.terrier@kairntech.com',
@@ -1,2 +1,2 @@
1
1
  """Annotator based on Presidio pattern recognizer"""
2
- __version__ = "0.5.86"
2
+ __version__ = "0.5.88"
@@ -52,7 +52,7 @@ class PatternsParameters(AnnotatorParameters):
52
52
  ```""" + PATTERNS_EXAMPLE_STR + "```", extra="key:label,val:json")
53
53
 
54
54
  score_threshold: float = Field(0.0, description="Minimum confidence value for detected entities to be returned")
55
- context_similarity_factor: float = Field(0.0,
55
+ context_similarity_factor: float = Field(0.35,
56
56
  description="How much to enhance confidence of match entity, as explained [here](https://microsoft.github.io/presidio/tutorial/06_context/)",
57
57
  extra="advanced")
58
58
  min_score_with_context_similarity: float = Field(0.4,
@@ -0,0 +1,8 @@
1
+ {
2
+ "text": " Coordonnées degrés décimaux, référence devant :\n Exemple : N85,8598654 W150,589654\n\n\n Coordonnées degrés décimaux, référence derrière : \n Exemple 85,8598654N 150,589654W",
3
+ "metadata": {
4
+ "language": "fr"
5
+ },
6
+ "identifier": "c8ecc5153c542c4f6a07fbe813384842",
7
+ "title": "unknown test document"
8
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "text": "Si vous êtes intéressé, veuillez nous appeler au +33.089-658-6494.",
3
+ "metadata": {
4
+ "language": "fr"
5
+ },
6
+ "identifier": "c9321541549ae539665b8bbf440cb1a0",
7
+ "title": "unknown test document"
8
+ }
@@ -0,0 +1,21 @@
1
+ {
2
+ "patterns": [
3
+ {
4
+ "name": "avec préfixe",
5
+ "regex": "(00|\\+)( |\\-|\\.)?(?P<prefix>9[976]\\d|8[987530]\\d|6[987]\\d|5[90]\\d|42\\d|3[875]\\d|2[98654321]\\d|9[8543210]|8[6421]|6[6543210]|5[87654321]|4[987654310]|3[9643210]|2[70]|7|1)( |\\-|\\.)?(?P<number>(?:\\d{1,3}( |\\-|\\.)?){5,14})\\b",
6
+ "score": 0.5
7
+ },
8
+ {
9
+ "name": "sans préfixe",
10
+ "regex": "\\b(?P<number>(?:\\d{1,3}( |\\-|\\.)?){5,14})\\b",
11
+ "score": 0.5
12
+ }
13
+ ],
14
+ "context": [
15
+ "appeler",
16
+ "appel",
17
+ "numéro",
18
+ "téléphone",
19
+ "téléphoner"
20
+ ]
21
+ }
@@ -0,0 +1,32 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import List
4
+
5
+ from dirty_equals import IsPartialDict
6
+ from pymultirole_plugins.v1.schema import Document
7
+ from pytest_check import check
8
+
9
+ from pyannotators_patterns.patterns import PatternsAnnotator, PatternsParameters
10
+
11
+
12
+ def test_tel(
13
+ ):
14
+ testdir = Path(__file__).parent
15
+ source = Path(testdir, "data/tel.json")
16
+ with source.open("r") as fin:
17
+ pat = json.load(fin)
18
+ parameters = PatternsParameters(mapping={
19
+ "telephone": json.dumps(pat, indent=2)
20
+ })
21
+ source = Path(testdir, "data/tel-document.json")
22
+ with source.open("r") as fin:
23
+ jdoc = json.load(fin)
24
+ doc = Document(**jdoc)
25
+ annotator = PatternsAnnotator()
26
+ docs: List[Document] = annotator.annotate([doc], parameters)
27
+ doc0 = docs[0]
28
+ tel = next(a.dict(exclude_none=True, exclude_unset=True) for a in doc0.annotations if
29
+ a.text == '+33.089-658-6494')
30
+ with check:
31
+ assert tel == IsPartialDict(labelName='telephone', text='+33.089-658-6494', score=0.85,
32
+ properties=IsPartialDict(prefix='33', number='089-658-6494'))
@@ -1,86 +0,0 @@
1
- {
2
- "text": " Coordonnées degrés décimaux, référence devant :\n Exemple : N85,8598654 W150,589654\n\n\n Coordonnées degrés décimaux, référence derrière : \n Exemple 85,8598654N 150,589654W",
3
- "metadata": {
4
- "language": "fr"
5
- },
6
- "annotations": [
7
- {
8
- "start": 66,
9
- "end": 77,
10
- "labelName": "coordonnees_geographiques",
11
- "text": "N85,8598654",
12
- "score": 0.9,
13
- "properties": {
14
- "analysis_explanation": {
15
- "recognizer": "PatternRecognizer",
16
- "pattern_name": "ref_lat",
17
- "pattern": "\\b(?P<ref_lat>N|S|n|s)[0-9,]+\\b",
18
- "original_score": 0.9,
19
- "score": 0.9,
20
- "score_context_improvement": 0,
21
- "supportive_context_word": "",
22
- "regex_flags": 26
23
- }
24
- }
25
- },
26
- {
27
- "start": 78,
28
- "end": 89,
29
- "labelName": "coordonnees_geographiques",
30
- "text": "W150,589654",
31
- "score": 0.9,
32
- "properties": {
33
- "analysis_explanation": {
34
- "recognizer": "PatternRecognizer",
35
- "pattern_name": "ref_long",
36
- "pattern": "\\b(?P<ref_long>E|O|w|e|o)[0-9,]+\\b",
37
- "original_score": 0.9,
38
- "score": 0.9,
39
- "score_context_improvement": 0,
40
- "supportive_context_word": "",
41
- "regex_flags": 26
42
- }
43
- }
44
- },
45
- {
46
- "start": 160,
47
- "end": 171,
48
- "labelName": "coordonnees_geographiques",
49
- "text": "85,8598654N",
50
- "score": 0.9,
51
- "properties": {
52
- "analysis_explanation": {
53
- "recognizer": "PatternRecognizer",
54
- "pattern_name": "ref_lat_DEVANT",
55
- "pattern": "\\b[0-9,]+(?P<ref_lat>N|S|n|s)\\b",
56
- "original_score": 0.9,
57
- "score": 0.9,
58
- "score_context_improvement": 0,
59
- "supportive_context_word": "",
60
- "regex_flags": 26
61
- }
62
- }
63
- },
64
- {
65
- "start": 172,
66
- "end": 183,
67
- "labelName": "coordonnees_geographiques",
68
- "text": "150,589654W",
69
- "score": 0.9,
70
- "properties": {
71
- "analysis_explanation": {
72
- "recognizer": "PatternRecognizer",
73
- "pattern_name": "ref_long_DEVANT",
74
- "pattern": "\\b[0-9,]+(?P<ref_long>E|O|w|e|o)\\b",
75
- "original_score": 0.9,
76
- "score": 0.9,
77
- "score_context_improvement": 0,
78
- "supportive_context_word": "",
79
- "regex_flags": 26
80
- }
81
- }
82
- }
83
- ],
84
- "identifier": "c8ecc5153c542c4f6a07fbe813384842",
85
- "title": "unknown test document"
86
- }