personal_knowledge_library 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of personal_knowledge_library might be problematic. Click here for more details.
- knowledge/__init__.py +91 -0
- knowledge/base/__init__.py +22 -0
- knowledge/base/access.py +167 -0
- knowledge/base/entity.py +267 -0
- knowledge/base/language.py +27 -0
- knowledge/base/ontology.py +2734 -0
- knowledge/base/search.py +473 -0
- knowledge/base/tenant.py +192 -0
- knowledge/nel/__init__.py +11 -0
- knowledge/nel/base.py +495 -0
- knowledge/nel/engine.py +123 -0
- knowledge/ontomapping/__init__.py +667 -0
- knowledge/ontomapping/manager.py +320 -0
- knowledge/public/__init__.py +27 -0
- knowledge/public/cache.py +115 -0
- knowledge/public/helper.py +373 -0
- knowledge/public/relations.py +128 -0
- knowledge/public/wikidata.py +1324 -0
- knowledge/services/__init__.py +128 -0
- knowledge/services/asyncio/__init__.py +7 -0
- knowledge/services/asyncio/base.py +458 -0
- knowledge/services/asyncio/graph.py +1420 -0
- knowledge/services/asyncio/group.py +450 -0
- knowledge/services/asyncio/search.py +439 -0
- knowledge/services/asyncio/users.py +270 -0
- knowledge/services/base.py +533 -0
- knowledge/services/graph.py +1897 -0
- knowledge/services/group.py +819 -0
- knowledge/services/helper.py +142 -0
- knowledge/services/ontology.py +1234 -0
- knowledge/services/search.py +488 -0
- knowledge/services/session.py +444 -0
- knowledge/services/tenant.py +281 -0
- knowledge/services/users.py +445 -0
- knowledge/utils/__init__.py +10 -0
- knowledge/utils/graph.py +417 -0
- knowledge/utils/wikidata.py +197 -0
- knowledge/utils/wikipedia.py +175 -0
- personal_knowledge_library-3.0.0.dist-info/LICENSE +201 -0
- personal_knowledge_library-3.0.0.dist-info/METADATA +1163 -0
- personal_knowledge_library-3.0.0.dist-info/RECORD +42 -0
- personal_knowledge_library-3.0.0.dist-info/WHEEL +4 -0
knowledge/nel/base.py
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2021-present Wacom. All rights reserved.
|
|
3
|
+
import abc
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Optional, List
|
|
6
|
+
|
|
7
|
+
from knowledge.base.language import LocaleCode, EN_US
|
|
8
|
+
from knowledge.base.ontology import THING_CLASS, OntologyClassReference
|
|
9
|
+
from knowledge.services.base import WacomServiceAPIClient, RESTAPIClient
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class EntityType(Enum):
|
|
13
|
+
"""
|
|
14
|
+
Entity types
|
|
15
|
+
------------
|
|
16
|
+
Different types of entities.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
PUBLIC_ENTITY = 0
|
|
20
|
+
"""Public entity - Entity from a public knowledge graph"""
|
|
21
|
+
PERSONAL_ENTITY = 1
|
|
22
|
+
"""Personal entity - Entity from a personal / organisational knowledge graph"""
|
|
23
|
+
NAMED_ENTITY = 2
|
|
24
|
+
"""Simple entity - Entity type not linked to a knowledge graph"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class KnowledgeSource(Enum):
|
|
28
|
+
"""
|
|
29
|
+
Knowledge source
|
|
30
|
+
----------------
|
|
31
|
+
List of knowledge sources which a used within Semantic Ink.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
WIKIDATA = "wikidata"
|
|
35
|
+
"""Wikidata"""
|
|
36
|
+
DBPEDIA = "dbpedia"
|
|
37
|
+
"""dbpedia"""
|
|
38
|
+
WACOM_KNOWLEDGE = "wacom"
|
|
39
|
+
"""Wacom Personal Knowledge"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class BasicType(Enum):
|
|
43
|
+
"""
|
|
44
|
+
Basic type
|
|
45
|
+
----------
|
|
46
|
+
Basic type of entities use for instance in named entity recognition.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
UNKNOWN = "Unknown"
|
|
50
|
+
MONEY = "Money"
|
|
51
|
+
PERSON = "Person"
|
|
52
|
+
DATE = "Date"
|
|
53
|
+
PLACE = "Place"
|
|
54
|
+
TIME = "Time"
|
|
55
|
+
NUMBER = "Number"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class EntitySource:
|
|
59
|
+
"""
|
|
60
|
+
EntitySource
|
|
61
|
+
------------
|
|
62
|
+
Source of the entity.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
uri: str
|
|
67
|
+
URI of entity
|
|
68
|
+
source: KnowledgeSource
|
|
69
|
+
Identifier where the entity originates.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, uri: str, source: KnowledgeSource):
|
|
73
|
+
self.__uri = uri
|
|
74
|
+
self.__source: KnowledgeSource = source
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def uri(self) -> str:
|
|
78
|
+
"""Identifier with the knowledge graph."""
|
|
79
|
+
return self.__uri
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def source(self) -> KnowledgeSource:
|
|
83
|
+
"""Source of the entity."""
|
|
84
|
+
return self.__source
|
|
85
|
+
|
|
86
|
+
def __repr__(self):
|
|
87
|
+
return f"{self.uri} ({self.source})"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class NamedEntity(abc.ABC):
|
|
91
|
+
"""
|
|
92
|
+
NamedEntity
|
|
93
|
+
-----------
|
|
94
|
+
A named entity which is recognized by recognition engine.
|
|
95
|
+
The class contains information on the found entity, found in reference text.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
ref_text: str
|
|
100
|
+
Reference text. Entity found for this specific text
|
|
101
|
+
start_idx: int
|
|
102
|
+
Start index within the full reference text
|
|
103
|
+
end_idx: int
|
|
104
|
+
End index with the full reference text
|
|
105
|
+
entity_type: EntityType
|
|
106
|
+
Type of the entity.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self, ref_text: str, start_idx: int, end_idx: int, entity_type: EntityType):
|
|
110
|
+
self.__ref_text: str = ref_text
|
|
111
|
+
self.__start_idx: int = start_idx
|
|
112
|
+
self.__end_idx: int = end_idx
|
|
113
|
+
self.__type: EntityType = entity_type
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def ref_text(self) -> str:
|
|
117
|
+
"""Reference text for which the entity has been found"""
|
|
118
|
+
return self.__ref_text
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def start_idx(self) -> int:
|
|
122
|
+
"""Start index within the text handed to the named entity recognition."""
|
|
123
|
+
return self.__start_idx
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def end_idx(self) -> int:
|
|
127
|
+
"""End index within the text handed to the named entity recognition."""
|
|
128
|
+
return self.__end_idx
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def entity_type(self) -> EntityType:
|
|
132
|
+
"""Type of the entity."""
|
|
133
|
+
return self.__type
|
|
134
|
+
|
|
135
|
+
def __repr__(self):
|
|
136
|
+
return f"{self.ref_text} [{self.start_idx}-{self.end_idx}"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class KnowledgeGraphEntity(NamedEntity):
|
|
140
|
+
"""
|
|
141
|
+
Knowledge graph entity
|
|
142
|
+
----------------------
|
|
143
|
+
Entity from a knowledge graph.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
ref_text: str
|
|
148
|
+
Reference text. Entity found for this specific text
|
|
149
|
+
start_idx: int
|
|
150
|
+
Start index within the full reference text
|
|
151
|
+
end_idx: int
|
|
152
|
+
End index with the full reference text
|
|
153
|
+
label: str
|
|
154
|
+
Main label of the entity.
|
|
155
|
+
confidence: float
|
|
156
|
+
Confidence value if available
|
|
157
|
+
source: EntitySource
|
|
158
|
+
Source of the entity
|
|
159
|
+
content_link: str
|
|
160
|
+
Link to side with content
|
|
161
|
+
ontology_types: List[str]
|
|
162
|
+
List of ontology types (class names)
|
|
163
|
+
entity_type: EntityType
|
|
164
|
+
Type of the entity.
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
def __init__(
|
|
168
|
+
self,
|
|
169
|
+
ref_text: str,
|
|
170
|
+
start_idx: int,
|
|
171
|
+
end_idx: int,
|
|
172
|
+
label: str,
|
|
173
|
+
confidence: float,
|
|
174
|
+
source: EntitySource,
|
|
175
|
+
content_link: str,
|
|
176
|
+
ontology_types: List[str],
|
|
177
|
+
entity_type: EntityType = EntityType.PUBLIC_ENTITY,
|
|
178
|
+
):
|
|
179
|
+
super().__init__(ref_text, start_idx, end_idx, entity_type)
|
|
180
|
+
self.__source: EntitySource = source
|
|
181
|
+
self.__content_link: str = content_link
|
|
182
|
+
self.__label: str = label
|
|
183
|
+
self.__confidence: float = confidence
|
|
184
|
+
self.__description: Optional[str] = None
|
|
185
|
+
self.__thumbnail: Optional[str] = None
|
|
186
|
+
self.__ontology_types: List[str] = ontology_types
|
|
187
|
+
self.__relevant_type: OntologyClassReference = THING_CLASS
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def entity_source(self) -> EntitySource:
|
|
191
|
+
"""Source of the entity."""
|
|
192
|
+
return self.__source
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def label(self) -> str:
|
|
196
|
+
"""Label of the entity from the knowledge graph."""
|
|
197
|
+
return self.__label
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def confidence(self) -> float:
|
|
201
|
+
"""Confidence level of the system that links the entities."""
|
|
202
|
+
return self.__confidence
|
|
203
|
+
|
|
204
|
+
@confidence.setter
|
|
205
|
+
def confidence(self, value: float):
|
|
206
|
+
self.__confidence = value
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def description(self) -> Optional[str]:
|
|
210
|
+
"""Description of the entity if available."""
|
|
211
|
+
return self.__description
|
|
212
|
+
|
|
213
|
+
@description.setter
|
|
214
|
+
def description(self, value: str):
|
|
215
|
+
self.__description = value
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def thumbnail(self) -> Optional[str]:
|
|
219
|
+
"""Thumbnail to describes the entity."""
|
|
220
|
+
return self.__thumbnail
|
|
221
|
+
|
|
222
|
+
@thumbnail.setter
|
|
223
|
+
def thumbnail(self, value: str):
|
|
224
|
+
self.__thumbnail = value
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def content_link(self) -> str:
|
|
228
|
+
"""Link to content page."""
|
|
229
|
+
return self.__content_link
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def ontology_types(self) -> List[str]:
|
|
233
|
+
"""List of ontology types."""
|
|
234
|
+
return self.__ontology_types
|
|
235
|
+
|
|
236
|
+
@property
|
|
237
|
+
def relevant_type(self) -> OntologyClassReference:
|
|
238
|
+
"""Most relevant ontology type. That likes to Wacom's personal knowledge base ontology."""
|
|
239
|
+
return self.__relevant_type
|
|
240
|
+
|
|
241
|
+
@relevant_type.setter
|
|
242
|
+
def relevant_type(self, value: OntologyClassReference):
|
|
243
|
+
self.__relevant_type = value
|
|
244
|
+
|
|
245
|
+
def __repr__(self):
|
|
246
|
+
return f"{self.ref_text} [{self.start_idx}-{self.end_idx}] -> {self.entity_source} [{self.entity_type}]"
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class BasicNamedEntity(NamedEntity):
|
|
250
|
+
"""
|
|
251
|
+
Basic named entity
|
|
252
|
+
------------------
|
|
253
|
+
Entity found by Named entity recognition.
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
ref_text: str
|
|
258
|
+
Reference text. Entity found for this specific text
|
|
259
|
+
start_idx: int
|
|
260
|
+
Start index within the full reference text
|
|
261
|
+
end_idx: int
|
|
262
|
+
End index with the full reference text
|
|
263
|
+
basic_type: BasicType
|
|
264
|
+
Type of the entity.
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
def __init__(self, ref_text: str, start_idx: int, end_idx: int, basic_type: BasicType):
|
|
268
|
+
super().__init__(ref_text, start_idx, end_idx, EntityType.NAMED_ENTITY)
|
|
269
|
+
self.__basic_type: BasicType = basic_type
|
|
270
|
+
|
|
271
|
+
@property
|
|
272
|
+
def basic_type(self) -> BasicType:
|
|
273
|
+
"""Basic type that is recognized."""
|
|
274
|
+
return self.__basic_type
|
|
275
|
+
|
|
276
|
+
def __repr__(self):
|
|
277
|
+
return f"{self.ref_text} [{self.start_idx}-{self.end_idx}] -> {self.basic_type}"
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class PersonalEntityLinkingProcessor(WacomServiceAPIClient):
|
|
281
|
+
"""
|
|
282
|
+
PersonalEntityLinkingProcessor
|
|
283
|
+
------------------------------
|
|
284
|
+
Service that links entities to entities in a personal knowledge graph.
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
service_url: str
|
|
289
|
+
URL where the service has been deployed
|
|
290
|
+
supported_languages: List[str] = None
|
|
291
|
+
List of supported languages
|
|
292
|
+
verify_calls: bool (default:=False)
|
|
293
|
+
Verifies all HTTPS calls and the associated certificate.
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
def __init__(self, service_url: str = str, supported_languages: List[str] = None, verify_calls: bool = True):
|
|
297
|
+
super().__init__(
|
|
298
|
+
application_name="Personal entity linking",
|
|
299
|
+
service_url=service_url,
|
|
300
|
+
service_endpoint="graph/v1",
|
|
301
|
+
verify_calls=verify_calls,
|
|
302
|
+
)
|
|
303
|
+
self.__supported_languages: List[str] = supported_languages if supported_languages else []
|
|
304
|
+
|
|
305
|
+
@abc.abstractmethod
|
|
306
|
+
def link_personal_entities(
|
|
307
|
+
self, text: str, language_code: LocaleCode = EN_US, auth_key: Optional[str] = None, max_retries: int = 5
|
|
308
|
+
) -> List[KnowledgeGraphEntity]:
|
|
309
|
+
"""
|
|
310
|
+
Performs Named Entity Linking on a text. It only finds entities which are accessible by the user identified by
|
|
311
|
+
the auth key.
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
text: str
|
|
316
|
+
Text where the entities shall be tagged in.
|
|
317
|
+
language_code: LanguageCode
|
|
318
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
|
|
319
|
+
auth_key: Optional[str] (default:=None)
|
|
320
|
+
Auth key identifying a user within the Wacom personal knowledge service.
|
|
321
|
+
max_retries: int (default:=5)
|
|
322
|
+
Maximum number of retries, if the service is not available.
|
|
323
|
+
Returns
|
|
324
|
+
-------
|
|
325
|
+
entities: List[KnowledgeGraphEntity]
|
|
326
|
+
List of knowledge graph entities.
|
|
327
|
+
"""
|
|
328
|
+
raise NotImplementedError
|
|
329
|
+
|
|
330
|
+
@property
|
|
331
|
+
def supported_language(self) -> List[str]:
|
|
332
|
+
"""List of supported languages."""
|
|
333
|
+
return self.__supported_languages
|
|
334
|
+
|
|
335
|
+
def is_language_supported(self, language_code: LocaleCode) -> bool:
|
|
336
|
+
"""Is the language_code code supported by the engine.
|
|
337
|
+
|
|
338
|
+
Parameters
|
|
339
|
+
-----------
|
|
340
|
+
language_code: LocaleCode
|
|
341
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
|
|
342
|
+
|
|
343
|
+
Returns
|
|
344
|
+
-------
|
|
345
|
+
flag: bool
|
|
346
|
+
Flag if this language_code code is supported.
|
|
347
|
+
"""
|
|
348
|
+
return language_code in self.supported_language
|
|
349
|
+
|
|
350
|
+
def __repr__(self):
|
|
351
|
+
return f"Personal Entity Linking:= {self.service_url}"
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class NamedEntityRecognitionProcessor(WacomServiceAPIClient):
|
|
355
|
+
"""
|
|
356
|
+
NamedEntityRecognitionProcessor
|
|
357
|
+
-------------------------------
|
|
358
|
+
Service that recognizes entities.
|
|
359
|
+
|
|
360
|
+
Parameters
|
|
361
|
+
----------
|
|
362
|
+
service_url: str
|
|
363
|
+
URL where the service has been deployed
|
|
364
|
+
supported_languages: List[str] = None
|
|
365
|
+
List of supported languages
|
|
366
|
+
verify_calls: bool (default:=False)
|
|
367
|
+
Verifies all HTTPS calls and the associated certificate.
|
|
368
|
+
"""
|
|
369
|
+
|
|
370
|
+
def __init__(self, service_url: str, supported_languages: List[LocaleCode] = None, verify_calls: bool = False):
|
|
371
|
+
super().__init__(
|
|
372
|
+
application_name="Named Entity Linking",
|
|
373
|
+
service_url=service_url,
|
|
374
|
+
service_endpoint="graph",
|
|
375
|
+
verify_calls=verify_calls,
|
|
376
|
+
)
|
|
377
|
+
self.__supported_languages: List[LocaleCode] = supported_languages if supported_languages else []
|
|
378
|
+
|
|
379
|
+
@abc.abstractmethod
|
|
380
|
+
def named_entities(self, text: str, language_code: LocaleCode = EN_US) -> List[NamedEntity]:
|
|
381
|
+
"""
|
|
382
|
+
Performs Named Entity Recognition on a text.
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
text: str
|
|
387
|
+
Text where the entities shall be tagged in.
|
|
388
|
+
language_code: LocaleCode (default:= 'en_US')
|
|
389
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
|
|
390
|
+
|
|
391
|
+
Returns
|
|
392
|
+
-------
|
|
393
|
+
entities: List[NamedEntity]
|
|
394
|
+
List of knowledge named entities.
|
|
395
|
+
"""
|
|
396
|
+
raise NotImplementedError
|
|
397
|
+
|
|
398
|
+
@property
|
|
399
|
+
def supported_language(self) -> List[LocaleCode]:
|
|
400
|
+
"""List of supported languages."""
|
|
401
|
+
return self.__supported_languages
|
|
402
|
+
|
|
403
|
+
def is_language_supported(self, language_code: LocaleCode) -> bool:
|
|
404
|
+
"""Is the language_code code supported by the engine.
|
|
405
|
+
|
|
406
|
+
Parameters
|
|
407
|
+
----------
|
|
408
|
+
language_code: LanguageCode
|
|
409
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
|
|
410
|
+
|
|
411
|
+
Returns
|
|
412
|
+
-------
|
|
413
|
+
flag: bool
|
|
414
|
+
Flag if this language_code code is supported
|
|
415
|
+
"""
|
|
416
|
+
return language_code in self.supported_language
|
|
417
|
+
|
|
418
|
+
def __repr__(self):
|
|
419
|
+
return f"Public entity linking:= {self.__service_url}"
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class PublicEntityLinkingProcessor(RESTAPIClient):
|
|
423
|
+
"""
|
|
424
|
+
Public Entity Linking
|
|
425
|
+
---------------------
|
|
426
|
+
Service that links entities to a public entities in a knowledge graph.
|
|
427
|
+
|
|
428
|
+
Parameters
|
|
429
|
+
----------
|
|
430
|
+
service_url: str
|
|
431
|
+
URL where the service has been deployed
|
|
432
|
+
supported_languages: List[str] = None
|
|
433
|
+
List of supported languages
|
|
434
|
+
verify_calls: bool (default:=False)
|
|
435
|
+
Verifies all HTTPS calls and the associated certificate.
|
|
436
|
+
"""
|
|
437
|
+
|
|
438
|
+
def __init__(
|
|
439
|
+
self,
|
|
440
|
+
service_url: str,
|
|
441
|
+
provider: str = "external",
|
|
442
|
+
supported_languages: List[str] = None,
|
|
443
|
+
verify_calls: bool = False,
|
|
444
|
+
):
|
|
445
|
+
super().__init__(service_url=service_url, verify_calls=verify_calls)
|
|
446
|
+
self.__provider: str = provider
|
|
447
|
+
self.__supported_languages: List[str] = supported_languages if supported_languages else []
|
|
448
|
+
|
|
449
|
+
@abc.abstractmethod
|
|
450
|
+
def link_public_entities(self, text: str, language_code: LocaleCode = EN_US) -> List[KnowledgeGraphEntity]:
|
|
451
|
+
"""
|
|
452
|
+
Performs Named Entity Linking on a text. It only finds entities within a large public knowledge graph.
|
|
453
|
+
|
|
454
|
+
Parameters
|
|
455
|
+
----------
|
|
456
|
+
text: str
|
|
457
|
+
Text where the entities shall be tagged in.
|
|
458
|
+
language_code: LanguageCode
|
|
459
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
|
|
460
|
+
|
|
461
|
+
Returns
|
|
462
|
+
-------
|
|
463
|
+
entities: List[KnowledgeGraphEntity]
|
|
464
|
+
List of knowledge public knowledge entities.
|
|
465
|
+
"""
|
|
466
|
+
raise NotImplementedError
|
|
467
|
+
|
|
468
|
+
@property
|
|
469
|
+
def supported_language(self) -> List[str]:
|
|
470
|
+
"""List of supported languages."""
|
|
471
|
+
return self.__supported_languages
|
|
472
|
+
|
|
473
|
+
def is_language_supported(self, language_code: LocaleCode) -> bool:
|
|
474
|
+
"""
|
|
475
|
+
Is the language_code code supported by the engine.
|
|
476
|
+
|
|
477
|
+
Parameters
|
|
478
|
+
----------
|
|
479
|
+
language_code: LocaleCode
|
|
480
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
|
|
481
|
+
|
|
482
|
+
Returns
|
|
483
|
+
-------
|
|
484
|
+
flag: bool
|
|
485
|
+
Flag if this language_code code is supported
|
|
486
|
+
"""
|
|
487
|
+
return language_code in self.supported_language
|
|
488
|
+
|
|
489
|
+
@property
|
|
490
|
+
def provider(self) -> str:
|
|
491
|
+
"""Provider of the service."""
|
|
492
|
+
return self.__provider
|
|
493
|
+
|
|
494
|
+
def __repr__(self):
|
|
495
|
+
return f"Public Entity Linking:= {self.service_url}"
|
knowledge/nel/engine.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2021-present Wacom. All rights reserved.
|
|
3
|
+
from typing import Optional, List, Dict
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
from requests import Response
|
|
7
|
+
from requests.adapters import HTTPAdapter, Retry
|
|
8
|
+
|
|
9
|
+
from knowledge.base.entity import LOCALE_TAG, TEXT_TAG
|
|
10
|
+
from knowledge.base.language import LocaleCode, DE_DE, EN_US, JA_JP
|
|
11
|
+
from knowledge.base.ontology import OntologyClassReference
|
|
12
|
+
from knowledge.nel.base import (
|
|
13
|
+
PersonalEntityLinkingProcessor,
|
|
14
|
+
EntitySource,
|
|
15
|
+
KnowledgeSource,
|
|
16
|
+
KnowledgeGraphEntity,
|
|
17
|
+
EntityType,
|
|
18
|
+
)
|
|
19
|
+
from knowledge.services.base import handle_error
|
|
20
|
+
from knowledge.services.graph import AUTHORIZATION_HEADER_FLAG, CONTENT_TYPE_HEADER_FLAG
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class WacomEntityLinkingEngine(PersonalEntityLinkingProcessor):
|
|
24
|
+
"""
|
|
25
|
+
Wacom Engine
|
|
26
|
+
------------
|
|
27
|
+
Performing Wacom's Named entity linking.
|
|
28
|
+
|
|
29
|
+
Parameter
|
|
30
|
+
---------
|
|
31
|
+
service_url: str
|
|
32
|
+
URL of the service
|
|
33
|
+
service_endpoint: str
|
|
34
|
+
Endpoint of the service
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
SERVICE_ENDPOINT: str = "graph/v1/nel/text"
|
|
38
|
+
SERVICE_URL: str = "https://private-knowledge.wacom.com"
|
|
39
|
+
LANGUAGES: List[LocaleCode] = [DE_DE, EN_US, JA_JP]
|
|
40
|
+
|
|
41
|
+
def __init__(self, service_url: str = SERVICE_URL, service_endpoint: str = SERVICE_ENDPOINT):
|
|
42
|
+
self.__service_endpoint: str = service_endpoint
|
|
43
|
+
super().__init__(supported_languages=WacomEntityLinkingEngine.LANGUAGES, service_url=service_url)
|
|
44
|
+
|
|
45
|
+
def link_personal_entities(
|
|
46
|
+
self, text: str, language_code: LocaleCode = EN_US, auth_key: Optional[str] = None, max_retries: int = 5
|
|
47
|
+
) -> List[KnowledgeGraphEntity]:
|
|
48
|
+
"""
|
|
49
|
+
Performs Named Entity Linking on a text. It only finds entities which are accessible by the user identified by
|
|
50
|
+
the auth key.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
text: str
|
|
55
|
+
Text where the entities shall be tagged in.
|
|
56
|
+
language_code: LocaleCode
|
|
57
|
+
ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., 'en_US'.
|
|
58
|
+
auth_key: Optional[str]
|
|
59
|
+
If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
|
|
60
|
+
max_retries: int
|
|
61
|
+
Maximum number of retries, if the service is not available.
|
|
62
|
+
|
|
63
|
+
Returns
|
|
64
|
+
-------
|
|
65
|
+
entities: List[KnowledgeGraphEntity]
|
|
66
|
+
List of knowledge graph entities.
|
|
67
|
+
|
|
68
|
+
Raises
|
|
69
|
+
------
|
|
70
|
+
WacomServiceException
|
|
71
|
+
If the Named Entity Linking service returns an error code.
|
|
72
|
+
"""
|
|
73
|
+
if auth_key is None:
|
|
74
|
+
auth_key, _ = self.handle_token()
|
|
75
|
+
named_entities: List[KnowledgeGraphEntity] = []
|
|
76
|
+
url: str = f"{self.service_url}/{self.__service_endpoint}"
|
|
77
|
+
headers: Dict[str, str] = {
|
|
78
|
+
AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
|
|
79
|
+
CONTENT_TYPE_HEADER_FLAG: "application/json",
|
|
80
|
+
}
|
|
81
|
+
payload: Dict[str, str] = {LOCALE_TAG: language_code, TEXT_TAG: text}
|
|
82
|
+
# Define the retry policy
|
|
83
|
+
retry_policy: Retry = Retry(
|
|
84
|
+
total=max_retries, # maximum number of retries
|
|
85
|
+
backoff_factor=0.5, # factor by which to multiply the delay between retries
|
|
86
|
+
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
|
|
87
|
+
respect_retry_after_header=True, # respect the Retry-After header
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Create a session and mount the retry adapter
|
|
91
|
+
with requests.Session() as session:
|
|
92
|
+
retry_adapter = HTTPAdapter(max_retries=retry_policy)
|
|
93
|
+
session.mount("https://", retry_adapter)
|
|
94
|
+
response: Response = session.post(url, headers=headers, json=payload, verify=self.verify_calls)
|
|
95
|
+
if response.ok:
|
|
96
|
+
results: dict = response.json()
|
|
97
|
+
for e in results:
|
|
98
|
+
entity_types: List[str] = []
|
|
99
|
+
# --------------------------- Entity content -------------------------------------------------------
|
|
100
|
+
source: Optional[EntitySource] = None
|
|
101
|
+
if "uri" in e:
|
|
102
|
+
source = EntitySource(e["uri"], KnowledgeSource.WACOM_KNOWLEDGE)
|
|
103
|
+
# --------------------------- Ontology types -------------------------------------------------------
|
|
104
|
+
if "type" in e:
|
|
105
|
+
entity_types.append(e["type"])
|
|
106
|
+
# --------------------------------------------------------------------------------------------------
|
|
107
|
+
start: int = e["startPosition"]
|
|
108
|
+
end: int = e["endPosition"]
|
|
109
|
+
ne: KnowledgeGraphEntity = KnowledgeGraphEntity(
|
|
110
|
+
ref_text=text[start : end + 1],
|
|
111
|
+
start_idx=start,
|
|
112
|
+
end_idx=end,
|
|
113
|
+
label=e["value"],
|
|
114
|
+
confidence=0.0,
|
|
115
|
+
source=source,
|
|
116
|
+
content_link="",
|
|
117
|
+
ontology_types=entity_types,
|
|
118
|
+
entity_type=EntityType.PERSONAL_ENTITY,
|
|
119
|
+
)
|
|
120
|
+
ne.relevant_type = OntologyClassReference.parse(e["type"])
|
|
121
|
+
named_entities.append(ne)
|
|
122
|
+
return named_entities
|
|
123
|
+
raise handle_error(f"Named entity linking for text:={text}@{language_code}. ", response)
|