pyannotators-patterns 0.5.80__tar.gz → 0.5.84__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/PKG-INFO +3 -2
  2. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/pyproject.toml +2 -1
  3. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/setup.py +3 -2
  4. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/src/pyannotators_patterns/__init__.py +1 -1
  5. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/src/pyannotators_patterns/patterns.py +43 -38
  6. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/.bumpversion.cfg +0 -0
  7. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/.github/workflows/main.yml +0 -0
  8. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/.gitignore +0 -0
  9. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/.pre-commit-config.yaml +0 -0
  10. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/.readthedocs.yml +0 -0
  11. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/AUTHORS.md +0 -0
  12. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/CHANGELOG.md +0 -0
  13. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/Dockerfile +0 -0
  14. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/Jenkinsfile +0 -0
  15. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/LICENSE +0 -0
  16. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/README.md +0 -0
  17. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/RELEASE.md +0 -0
  18. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/bumpversion.py +0 -0
  19. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/.gitignore +0 -0
  20. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/CHANGELOG.md +0 -0
  21. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/LICENSE +0 -0
  22. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/_static/.gitkeep +0 -0
  23. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/_templates/.gitkeep +0 -0
  24. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/conf.py +0 -0
  25. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/docs/index.rst +0 -0
  26. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/hgnc_cache.sqlite +0 -0
  27. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/mypy.ini +0 -0
  28. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/tests/assertions.py +0 -0
  29. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/tests/test_credit_cards.py +0 -0
  30. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/tests/test_emails.py +0 -0
  31. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/tests/test_zip.py +0 -0
  32. {pyannotators_patterns-0.5.80 → pyannotators_patterns-0.5.84}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pyannotators-patterns
3
- Version: 0.5.80
3
+ Version: 0.5.84
4
4
  Summary: Annotator based on Presidio pattern recognizer
5
5
  Home-page: https://github.com/oterrier/pyannotators_patterns/
6
6
  Keywords:
@@ -21,9 +21,10 @@ Classifier: Development Status :: 4 - Beta
21
21
  Classifier: Programming Language :: Python :: 3.8
22
22
  Requires-Dist: pymultirole-plugins>=0.5.0,<0.6.0
23
23
  Requires-Dist: spacy[lookups]==3.4.4
24
+ Requires-Dist: log-with-context
24
25
  Requires-Dist: collections_extended
25
26
  Requires-Dist: unidecode
26
- Requires-Dist: presidio-analyzer>=2.2.352
27
+ Requires-Dist: presidio-analyzer==2.2.354
27
28
  Requires-Dist: flit ; extra == "dev"
28
29
  Requires-Dist: pre-commit ; extra == "dev"
29
30
  Requires-Dist: bump2version ; extra == "dev"
@@ -27,9 +27,10 @@ classifiers = [
27
27
  requires = [
28
28
  "pymultirole-plugins>=0.5.0,<0.6.0",
29
29
  "spacy[lookups]==3.4.4",
30
+ "log-with-context",
30
31
  "collections_extended",
31
32
  "unidecode",
32
- "presidio-analyzer>=2.2.352"
33
+ "presidio-analyzer==2.2.354"
33
34
  ]
34
35
  dist-name = "pyannotators-patterns"
35
36
 
@@ -15,9 +15,10 @@ package_dir = \
15
15
  install_requires = \
16
16
  ['pymultirole-plugins>=0.5.0,<0.6.0',
17
17
  'spacy[lookups]==3.4.4',
18
+ 'log-with-context',
18
19
  'collections_extended',
19
20
  'unidecode',
20
- 'presidio-analyzer>=2.2.352']
21
+ 'presidio-analyzer==2.2.354']
21
22
 
22
23
  extras_require = \
23
24
  {'dev': ['flit', 'pre-commit', 'bump2version'],
@@ -39,7 +40,7 @@ entry_points = \
39
40
  'pyannotators_patterns.patterns:PatternsAnnotator']}
40
41
 
41
42
  setup(name='pyannotators-patterns',
42
- version='0.5.80',
43
+ version='0.5.84',
43
44
  description='Annotator based on Presidio pattern recognizer',
44
45
  author='Olivier Terrier',
45
46
  author_email='olivier.terrier@kairntech.com',
@@ -1,2 +1,2 @@
1
1
  """Annotator based on Presidio pattern recognizer"""
2
- __version__ = "0.5.80"
2
+ __version__ = "0.5.84"
@@ -4,6 +4,7 @@ from functools import lru_cache
4
4
  from typing import Type, List, cast, Dict
5
5
 
6
6
  import spacy
7
+ from log_with_context import add_logging_context
7
8
  from presidio_analyzer import Pattern, PatternRecognizer, RecognizerRegistry, AnalyzerEngine, LemmaContextAwareEnhancer
8
9
  from presidio_analyzer.nlp_engine import SpacyNlpEngine, NerModelConfiguration
9
10
  from pydantic import BaseModel, Field
@@ -81,44 +82,45 @@ class PatternsAnnotator(AnnotatorBase):
81
82
  mapping = frozenset(params.mapping.items())
82
83
  labels = list(params.mapping.keys())
83
84
  for document in documents:
84
- # Retrieve nlp pipe
85
- lang = document_language(document, None)
86
- if lang is None or lang not in supported_languages:
87
- raise AttributeError(f"Metadata language {lang} is required and must be in {SUPPORTED_LANGUAGES}")
88
- nlp = get_nlp(lang)
89
- registry = get_registry(mapping, lang)
90
- analyzer = AnalyzerEngine(registry=registry,
91
- nlp_engine=LoadedSpacyNlpEngine(lang, nlp),
92
- default_score_threshold=0,
93
- supported_languages=supported_languages,
94
- context_aware_enhancer=LemmaContextAwareEnhancer(params.context_similarity_factor,
95
- params.min_score_with_context_similarity,
96
- params.context_prefix_count,
97
- params.context_suffix_count))
98
-
99
- document.annotations = []
100
- if not document.sentences:
101
- document.sentences = [Span(start=0, end=len(document.text))]
102
-
103
- for s in document.sentences:
104
- if s.end > s.start:
105
- stext = document.text[s.start: s.end]
106
- results = analyzer.analyze(text=stext, entities=labels,
107
- language=lang, return_decision_process=True,
108
- score_threshold=params.score_threshold) # noqa D501
109
- for result in results:
110
- start = s.start + result.start
111
- end = s.start + + result.end
112
- document.annotations.append(
113
- Annotation(
114
- start=start,
115
- end=end,
116
- text=document.text[start: end],
117
- labelName=result.entity_type,
118
- score=result.score,
119
- properties={"analysis_explanation": result.analysis_explanation}
85
+ with add_logging_context(docid=document.identifier):
86
+ # Retrieve nlp pipe
87
+ lang = document_language(document, None)
88
+ if lang is None or lang not in supported_languages:
89
+ raise AttributeError(f"Metadata language {lang} is required and must be in {SUPPORTED_LANGUAGES}")
90
+ nlp = get_nlp(lang)
91
+ registry = get_registry(mapping, lang)
92
+ analyzer = AnalyzerEngine(registry=registry,
93
+ nlp_engine=LoadedSpacyNlpEngine(lang, nlp),
94
+ default_score_threshold=0,
95
+ supported_languages=supported_languages,
96
+ context_aware_enhancer=LemmaContextAwareEnhancer(params.context_similarity_factor,
97
+ params.min_score_with_context_similarity,
98
+ params.context_prefix_count,
99
+ params.context_suffix_count))
100
+
101
+ document.annotations = []
102
+ if not document.sentences:
103
+ document.sentences = [Span(start=0, end=len(document.text))]
104
+
105
+ for s in document.sentences:
106
+ if s.end > s.start:
107
+ stext = document.text[s.start: s.end]
108
+ results = analyzer.analyze(text=stext, entities=labels,
109
+ language=lang, return_decision_process=True,
110
+ score_threshold=params.score_threshold) # noqa D501
111
+ for result in results:
112
+ start = s.start + result.start
113
+ end = s.start + + result.end
114
+ document.annotations.append(
115
+ Annotation(
116
+ start=start,
117
+ end=end,
118
+ text=document.text[start: end],
119
+ labelName=result.entity_type,
120
+ score=result.score,
121
+ properties={"analysis_explanation": result.analysis_explanation}
122
+ )
120
123
  )
121
- )
122
124
 
123
125
  return documents
124
126
 
@@ -164,7 +166,10 @@ class LoadedSpacyNlpEngine(SpacyNlpEngine):
164
166
  def get_registry(mapping_items, lang):
165
167
  recognizers = []
166
168
  for pname, pvalue in mapping_items:
167
- pattern_definition = json.loads(pvalue)
169
+ try:
170
+ pattern_definition = json.loads(pvalue)
171
+ except BaseException:
172
+ logger.warning(f"Invalid json: {pvalue}", exc_info=True)
168
173
  patterns = [Pattern.from_dict(pat) for pat in pattern_definition['patterns']]
169
174
  recognizer = PatternRecognizer(
170
175
  supported_entity=pname, supported_language=lang, patterns=patterns, context=pattern_definition.get('context', None)