pyannotators-patterns 0.5.78__tar.gz → 0.5.82__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/PKG-INFO +2 -1
  2. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/pyproject.toml +1 -0
  3. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/setup.py +2 -1
  4. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/src/pyannotators_patterns/__init__.py +1 -1
  5. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/src/pyannotators_patterns/patterns.py +45 -41
  6. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/.bumpversion.cfg +0 -0
  7. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/.github/workflows/main.yml +0 -0
  8. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/.gitignore +0 -0
  9. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/.pre-commit-config.yaml +0 -0
  10. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/.readthedocs.yml +0 -0
  11. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/AUTHORS.md +0 -0
  12. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/CHANGELOG.md +0 -0
  13. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/Dockerfile +0 -0
  14. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/Jenkinsfile +0 -0
  15. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/LICENSE +0 -0
  16. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/README.md +0 -0
  17. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/RELEASE.md +0 -0
  18. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/bumpversion.py +0 -0
  19. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/.gitignore +0 -0
  20. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/CHANGELOG.md +0 -0
  21. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/LICENSE +0 -0
  22. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/_static/.gitkeep +0 -0
  23. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/_templates/.gitkeep +0 -0
  24. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/conf.py +0 -0
  25. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/docs/index.rst +0 -0
  26. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/hgnc_cache.sqlite +0 -0
  27. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/mypy.ini +0 -0
  28. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/tests/assertions.py +0 -0
  29. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/tests/test_credit_cards.py +0 -0
  30. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/tests/test_emails.py +0 -0
  31. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/tests/test_zip.py +0 -0
  32. {pyannotators_patterns-0.5.78 → pyannotators_patterns-0.5.82}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pyannotators-patterns
3
- Version: 0.5.78
3
+ Version: 0.5.82
4
4
  Summary: Annotator based on Presidio pattern recognizer
5
5
  Home-page: https://github.com/oterrier/pyannotators_patterns/
6
6
  Keywords:
@@ -21,6 +21,7 @@ Classifier: Development Status :: 4 - Beta
21
21
  Classifier: Programming Language :: Python :: 3.8
22
22
  Requires-Dist: pymultirole-plugins>=0.5.0,<0.6.0
23
23
  Requires-Dist: spacy[lookups]==3.4.4
24
+ Requires-Dist: log-with-context
24
25
  Requires-Dist: collections_extended
25
26
  Requires-Dist: unidecode
26
27
  Requires-Dist: presidio-analyzer>=2.2.352
@@ -27,6 +27,7 @@ classifiers = [
27
27
  requires = [
28
28
  "pymultirole-plugins>=0.5.0,<0.6.0",
29
29
  "spacy[lookups]==3.4.4",
30
+ "log-with-context",
30
31
  "collections_extended",
31
32
  "unidecode",
32
33
  "presidio-analyzer>=2.2.352"
@@ -15,6 +15,7 @@ package_dir = \
15
15
  install_requires = \
16
16
  ['pymultirole-plugins>=0.5.0,<0.6.0',
17
17
  'spacy[lookups]==3.4.4',
18
+ 'log-with-context',
18
19
  'collections_extended',
19
20
  'unidecode',
20
21
  'presidio-analyzer>=2.2.352']
@@ -39,7 +40,7 @@ entry_points = \
39
40
  'pyannotators_patterns.patterns:PatternsAnnotator']}
40
41
 
41
42
  setup(name='pyannotators-patterns',
42
- version='0.5.78',
43
+ version='0.5.82',
43
44
  description='Annotator based on Presidio pattern recognizer',
44
45
  author='Olivier Terrier',
45
46
  author_email='olivier.terrier@kairntech.com',
@@ -1,2 +1,2 @@
1
1
  """Annotator based on Presidio pattern recognizer"""
2
- __version__ = "0.5.78"
2
+ __version__ = "0.5.82"
@@ -4,6 +4,7 @@ from functools import lru_cache
4
4
  from typing import Type, List, cast, Dict
5
5
 
6
6
  import spacy
7
+ from log_with_context import add_logging_context
7
8
  from presidio_analyzer import Pattern, PatternRecognizer, RecognizerRegistry, AnalyzerEngine, LemmaContextAwareEnhancer
8
9
  from presidio_analyzer.nlp_engine import SpacyNlpEngine, NerModelConfiguration
9
10
  from pydantic import BaseModel, Field
@@ -80,46 +81,46 @@ class PatternsAnnotator(AnnotatorBase):
80
81
 
81
82
  mapping = frozenset(params.mapping.items())
82
83
  labels = list(params.mapping.keys())
83
- registry = get_registry(mapping)
84
-
85
84
  for document in documents:
86
- # Retrieve nlp pipe
87
- lang = document_language(document, None)
88
- if lang is None or lang not in supported_languages:
89
- raise AttributeError(f"Metadata language {lang} is required and must be in {SUPPORTED_LANGUAGES}")
90
- nlp = get_nlp(lang)
91
- analyzer = AnalyzerEngine(registry=registry,
92
- nlp_engine=LoadedSpacyNlpEngine(lang, nlp),
93
- default_score_threshold=0,
94
- supported_languages=supported_languages,
95
- context_aware_enhancer=LemmaContextAwareEnhancer(params.context_similarity_factor,
96
- params.min_score_with_context_similarity,
97
- params.context_prefix_count,
98
- params.context_suffix_count))
99
-
100
- document.annotations = []
101
- if not document.sentences:
102
- document.sentences = [Span(start=0, end=len(document.text))]
103
-
104
- for s in document.sentences:
105
- if s.end > s.start:
106
- stext = document.text[s.start: s.end]
107
- results = analyzer.analyze(text=stext, entities=labels,
108
- language=lang, return_decision_process=True,
109
- score_threshold=params.score_threshold) # noqa D501
110
- for result in results:
111
- start = s.start + result.start
112
- end = s.start + + result.end
113
- document.annotations.append(
114
- Annotation(
115
- start=start,
116
- end=end,
117
- text=document.text[start: end],
118
- labelName=result.entity_type,
119
- score=result.score,
120
- properties={"analysis_explanation": result.analysis_explanation}
85
+ with add_logging_context(docid=document.identifier):
86
+ # Retrieve nlp pipe
87
+ lang = document_language(document, None)
88
+ if lang is None or lang not in supported_languages:
89
+ raise AttributeError(f"Metadata language {lang} is required and must be in {SUPPORTED_LANGUAGES}")
90
+ nlp = get_nlp(lang)
91
+ registry = get_registry(mapping, lang)
92
+ analyzer = AnalyzerEngine(registry=registry,
93
+ nlp_engine=LoadedSpacyNlpEngine(lang, nlp),
94
+ default_score_threshold=0,
95
+ supported_languages=supported_languages,
96
+ context_aware_enhancer=LemmaContextAwareEnhancer(params.context_similarity_factor,
97
+ params.min_score_with_context_similarity,
98
+ params.context_prefix_count,
99
+ params.context_suffix_count))
100
+
101
+ document.annotations = []
102
+ if not document.sentences:
103
+ document.sentences = [Span(start=0, end=len(document.text))]
104
+
105
+ for s in document.sentences:
106
+ if s.end > s.start:
107
+ stext = document.text[s.start: s.end]
108
+ results = analyzer.analyze(text=stext, entities=labels,
109
+ language=lang, return_decision_process=True,
110
+ score_threshold=params.score_threshold) # noqa D501
111
+ for result in results:
112
+ start = s.start + result.start
113
+ end = s.start + + result.end
114
+ document.annotations.append(
115
+ Annotation(
116
+ start=start,
117
+ end=end,
118
+ text=document.text[start: end],
119
+ labelName=result.entity_type,
120
+ score=result.score,
121
+ properties={"analysis_explanation": result.analysis_explanation}
122
+ )
121
123
  )
122
- )
123
124
 
124
125
  return documents
125
126
 
@@ -162,13 +163,16 @@ class LoadedSpacyNlpEngine(SpacyNlpEngine):
162
163
 
163
164
 
164
165
  @lru_cache(maxsize=None)
165
- def get_registry(mapping_items):
166
+ def get_registry(mapping_items, lang):
166
167
  recognizers = []
167
168
  for pname, pvalue in mapping_items:
168
- pattern_definition = json.loads(pvalue)
169
+ try:
170
+ pattern_definition = json.loads(pvalue)
171
+ except BaseException:
172
+ logger.warning(f"Invalid json: {pvalue}", exc_info=True)
169
173
  patterns = [Pattern.from_dict(pat) for pat in pattern_definition['patterns']]
170
174
  recognizer = PatternRecognizer(
171
- supported_entity=pname, patterns=patterns, context=pattern_definition.get('context', None)
175
+ supported_entity=pname, supported_language=lang, patterns=patterns, context=pattern_definition.get('context', None)
172
176
  )
173
177
  recognizers.append(recognizer)
174
178
  registry = RecognizerRegistry(recognizers)