personal_knowledge_library 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

Files changed (42) hide show
  1. knowledge/__init__.py +91 -0
  2. knowledge/base/__init__.py +22 -0
  3. knowledge/base/access.py +167 -0
  4. knowledge/base/entity.py +267 -0
  5. knowledge/base/language.py +27 -0
  6. knowledge/base/ontology.py +2734 -0
  7. knowledge/base/search.py +473 -0
  8. knowledge/base/tenant.py +192 -0
  9. knowledge/nel/__init__.py +11 -0
  10. knowledge/nel/base.py +495 -0
  11. knowledge/nel/engine.py +123 -0
  12. knowledge/ontomapping/__init__.py +667 -0
  13. knowledge/ontomapping/manager.py +320 -0
  14. knowledge/public/__init__.py +27 -0
  15. knowledge/public/cache.py +115 -0
  16. knowledge/public/helper.py +373 -0
  17. knowledge/public/relations.py +128 -0
  18. knowledge/public/wikidata.py +1324 -0
  19. knowledge/services/__init__.py +128 -0
  20. knowledge/services/asyncio/__init__.py +7 -0
  21. knowledge/services/asyncio/base.py +458 -0
  22. knowledge/services/asyncio/graph.py +1420 -0
  23. knowledge/services/asyncio/group.py +450 -0
  24. knowledge/services/asyncio/search.py +439 -0
  25. knowledge/services/asyncio/users.py +270 -0
  26. knowledge/services/base.py +533 -0
  27. knowledge/services/graph.py +1897 -0
  28. knowledge/services/group.py +819 -0
  29. knowledge/services/helper.py +142 -0
  30. knowledge/services/ontology.py +1234 -0
  31. knowledge/services/search.py +488 -0
  32. knowledge/services/session.py +444 -0
  33. knowledge/services/tenant.py +281 -0
  34. knowledge/services/users.py +445 -0
  35. knowledge/utils/__init__.py +10 -0
  36. knowledge/utils/graph.py +417 -0
  37. knowledge/utils/wikidata.py +197 -0
  38. knowledge/utils/wikipedia.py +175 -0
  39. personal_knowledge_library-3.0.0.dist-info/LICENSE +201 -0
  40. personal_knowledge_library-3.0.0.dist-info/METADATA +1163 -0
  41. personal_knowledge_library-3.0.0.dist-info/RECORD +42 -0
  42. personal_knowledge_library-3.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,320 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2023-present Wacom. All rights reserved.
3
+ import logging
4
+ import time
5
+ from datetime import datetime
6
+ from typing import Optional, Any, List, Dict, Tuple, Set
7
+
8
+ from knowledge.base.entity import Label, LanguageCode, Description
9
+ from knowledge.base.ontology import (
10
+ ThingObject,
11
+ DataProperty,
12
+ SYSTEM_SOURCE_SYSTEM,
13
+ SYSTEM_SOURCE_REFERENCE_ID,
14
+ OntologyClassReference,
15
+ OntologyPropertyReference,
16
+ ObjectProperty,
17
+ )
18
+ from knowledge.ontomapping import (
19
+ ClassConfiguration,
20
+ TOPIC_CLASS,
21
+ superclasses_cache,
22
+ PropertyConfiguration,
23
+ PropertyType,
24
+ get_mapping_configuration,
25
+ save_superclasses_cache,
26
+ superclasses_path,
27
+ )
28
+ from knowledge.public.cache import pull_wikidata_object
29
+ from knowledge.public.wikidata import WikidataThing, WikiDataAPIClient, WikidataClass, WikidataProperty
30
+ from knowledge.base.language import LOCALE_LANGUAGE_MAPPING, LocaleCode, LANGUAGE_LOCALE_MAPPING, EN_US
31
+ from knowledge.utils.wikipedia import get_wikipedia_summary
32
+
33
+
34
+ def flatten(hierarchy: WikidataClass, use_names: bool = False) -> List[str]:
35
+ """
36
+ Flattens the hierarchy.
37
+
38
+ Parameters
39
+ ----------
40
+ hierarchy: WikidataClass
41
+ Hierarchy
42
+ use_names: bool
43
+ Use names instead of QIDs.
44
+
45
+ Returns
46
+ -------
47
+ hierarchy: List[str]
48
+ Hierarchy
49
+
50
+ """
51
+ hierarchy_list: List[str] = [hierarchy.qid]
52
+ jobs: List[WikidataClass] = [hierarchy]
53
+ while len(jobs) > 0:
54
+ job: WikidataClass = jobs.pop()
55
+ if use_names:
56
+ hierarchy_list.append(f"{job.qid} ({job.label})")
57
+ else:
58
+ hierarchy_list.append(job.qid)
59
+ for c in job.superclasses:
60
+ if use_names:
61
+ if f"{job.qid} ({job.label})" not in hierarchy_list:
62
+ jobs.append(c)
63
+ elif c.qid not in hierarchy_list:
64
+ jobs.append(c)
65
+ return hierarchy_list
66
+
67
+
68
+ def wikidata_taxonomy(qid: str) -> Optional[WikidataClass]:
69
+ """
70
+ Returns the taxonomy of a Wikidata thing.
71
+ Parameters
72
+ ----------
73
+ qid: str
74
+ Wikidata QID.
75
+
76
+ Returns
77
+ -------
78
+ hierarchy: WikidataClass
79
+ Hierarchy.
80
+ """
81
+ if superclasses_cache and qid in superclasses_cache:
82
+ taxonomy: WikidataClass = superclasses_cache[qid]
83
+ return taxonomy
84
+ hierarchy: Dict[str, WikidataClass] = WikiDataAPIClient.superclasses(qid)
85
+ if qid not in hierarchy:
86
+ logging.warning(f"Taxonomy for {qid} not found.")
87
+ return None
88
+ if hierarchy:
89
+ superclasses_cache.update(hierarchy)
90
+ save_superclasses_cache(superclasses_path)
91
+ return hierarchy.get(qid)
92
+
93
+
94
+ def convert_dict(structure: Dict[str, Any], locale: str) -> Optional[str]:
95
+ """
96
+ Converts a dictionary to a string.
97
+ Parameters
98
+ ----------
99
+ structure: Dict[str, Any]
100
+ Dictionary to convert.
101
+ locale: str
102
+ Locale.
103
+
104
+ Returns
105
+ -------
106
+ string: str
107
+ String representation of the dictionary.
108
+ """
109
+ if "type" in structure and "value" in structure:
110
+ structure_type: str = structure["type"]
111
+ value: Any = structure["value"]
112
+ if structure_type == "time" and isinstance(value, dict) and "iso" in value and value["iso"]:
113
+ return value["iso"]
114
+ if structure_type == "time" and isinstance(value, dict):
115
+ return value["time"]
116
+ if structure_type == "quantity" and isinstance(value, dict):
117
+ return value["amount"]
118
+ if structure_type == "wikibase-item" and isinstance(value, dict):
119
+ wikidata_data: WikidataThing = pull_wikidata_object(value["id"])
120
+ if locale in wikidata_data.label:
121
+ return wikidata_data.label[locale].content
122
+ return None
123
+ if structure_type == "external-id":
124
+ return value
125
+ if structure_type == "string":
126
+ return value
127
+ if structure_type == "monolingualtext" and isinstance(value, dict):
128
+ if LOCALE_LANGUAGE_MAPPING.get(LocaleCode(locale)) == LanguageCode(value["language"]):
129
+ return value["text"]
130
+ return None
131
+ if structure_type == "globe-coordinate" and isinstance(value, dict):
132
+ return f'{value["latitude"]},{value["longitude"]}'
133
+ if structure_type == "url" and isinstance(value, str):
134
+ return value
135
+ raise NotImplementedError()
136
+
137
+
138
+ def wikidata_to_thing(
139
+ wikidata_thing: WikidataThing,
140
+ all_relations: Dict[str, Any],
141
+ supported_locales: List[str],
142
+ all_wikidata_objects: Dict[str, WikidataThing],
143
+ pull_wikipedia: bool = False,
144
+ guess_concept_type: bool = True,
145
+ ) -> Tuple[ThingObject, List[Dict[str, Any]]]:
146
+ """
147
+ Converts a Wikidata thing to a ThingObject.
148
+
149
+ Parameters
150
+ ----------
151
+ wikidata_thing: WikidataThing
152
+ Wikidata thing
153
+
154
+ all_relations: Dict[str, Any]
155
+ All relations.
156
+
157
+ supported_locales: List[str]
158
+ Supported locales.
159
+
160
+ all_wikidata_objects: Dict[str, WikidataThing]
161
+ All Wikidata objects.
162
+
163
+ pull_wikipedia: bool
164
+ Pull Wikipedia summary.
165
+
166
+ guess_concept_type: bool
167
+ Guess the concept type (queries all super types from Wikidata).
168
+
169
+ Returns
170
+ -------
171
+ thing: ThingObject
172
+ Thing object
173
+ import_warnings: List[Dict[str, Any]]
174
+ Errors
175
+
176
+ """
177
+ import_warnings: List[Dict[str, Any]] = []
178
+ qid: str = wikidata_thing.qid
179
+ labels_entity: List[Label] = []
180
+ aliases_entity: List[Label] = []
181
+ supported_languages: List[str] = [
182
+ LOCALE_LANGUAGE_MAPPING[locale] for locale in supported_locales if locale in LOCALE_LANGUAGE_MAPPING
183
+ ]
184
+ # Make sure that the main label are added to labels and aliases to aliases.
185
+ main_languages: Set[str] = set()
186
+ t1: float = time.perf_counter()
187
+ for la in wikidata_thing.label.values():
188
+ if str(la.language_code) in supported_locales:
189
+ if str(la.language_code) not in main_languages:
190
+ main_languages.add(str(la.language_code))
191
+ labels_entity.append(Label(content=la.content, language_code=la.language_code, main=True))
192
+ else:
193
+ aliases_entity.append(Label(content=la.content, language_code=la.language_code, main=False))
194
+ for lang, aliases in wikidata_thing.aliases.items():
195
+ if str(lang) in supported_locales:
196
+ if str(lang) not in main_languages:
197
+ main_languages.add(str(lang))
198
+ labels_entity.append(Label(content=aliases[0].content, language_code=LocaleCode(lang), main=True))
199
+ for alias in aliases[1:]:
200
+ aliases_entity.append(Label(content=alias.content, language_code=LocaleCode(lang), main=False))
201
+ else:
202
+ for alias in aliases:
203
+ aliases_entity.append(Label(content=alias.content, language_code=LocaleCode(lang), main=False))
204
+ t2: float = time.perf_counter()
205
+ descriptions: List[Description] = []
206
+ if "wiki" in wikidata_thing.sitelinks and pull_wikipedia:
207
+ for lang, title in wikidata_thing.sitelinks["wiki"].titles.items():
208
+ if str(lang) in supported_languages:
209
+ locale: LocaleCode = LANGUAGE_LOCALE_MAPPING.get(LanguageCode(lang), EN_US)
210
+ if locale in supported_locales:
211
+ try:
212
+ descriptions.append(
213
+ Description(
214
+ description=get_wikipedia_summary(title, lang), language_code=LocaleCode(locale)
215
+ )
216
+ )
217
+ except Exception as e:
218
+ logging.error(f"Failed to get Wikipedia summary for {title} ({lang}): {e}")
219
+ if len(descriptions) == 0:
220
+ descriptions = list(wikidata_thing.description.values())
221
+ t3: float = time.perf_counter()
222
+ # Create the thing
223
+ thing: ThingObject = ThingObject(label=labels_entity, description=descriptions, icon=wikidata_thing.image(dpi=500))
224
+ thing.alias = aliases_entity
225
+ thing.add_source_system(DataProperty(content="wikidata", property_ref=SYSTEM_SOURCE_SYSTEM, language_code=EN_US))
226
+ thing.add_source_reference_id(
227
+ DataProperty(content=qid, property_ref=SYSTEM_SOURCE_REFERENCE_ID, language_code=EN_US)
228
+ )
229
+ thing.add_data_property(
230
+ DataProperty(
231
+ content=datetime.utcnow().isoformat(), property_ref=OntologyPropertyReference.parse("wacom:core#lastUpdate")
232
+ )
233
+ )
234
+ t4: float = time.perf_counter()
235
+ class_types: List[str] = wikidata_thing.ontology_types
236
+ if guess_concept_type:
237
+ for cls in wikidata_thing.instance_of:
238
+ class_types.append(cls.qid)
239
+ class_configuration: Optional[ClassConfiguration] = get_mapping_configuration().guess_classed(class_types)
240
+ if class_configuration:
241
+ thing.concept_type = class_configuration.concept_type
242
+ else:
243
+ thing.concept_type = OntologyClassReference.parse(TOPIC_CLASS)
244
+ t5: float = time.perf_counter()
245
+ relation_props: Dict[OntologyPropertyReference, List[str]] = {}
246
+ for pid, cl in wikidata_thing.claims.items():
247
+ prop: Optional[PropertyConfiguration] = get_mapping_configuration().guess_property(pid, thing.concept_type)
248
+ if prop and prop.type == PropertyType.DATA_PROPERTY:
249
+ property_type: OntologyPropertyReference = OntologyPropertyReference.parse(prop.iri)
250
+ for locale in supported_locales:
251
+ for c in cl.literals:
252
+ try:
253
+ if isinstance(c, dict):
254
+ content: Optional[str] = convert_dict(c, locale)
255
+ if get_mapping_configuration().check_data_property_range(property_type, content):
256
+ thing.add_data_property(
257
+ DataProperty(
258
+ content=content, property_ref=property_type, language_code=LocaleCode(locale)
259
+ )
260
+ )
261
+ elif isinstance(c, (str, float, int)):
262
+ thing.add_data_property(
263
+ DataProperty(content=c, property_ref=property_type, language_code=LocaleCode(locale))
264
+ )
265
+ except NotImplementedError as e:
266
+ import_warnings.append({"qid": qid, "pid": pid, "error": str(e)})
267
+ t6: float = time.perf_counter()
268
+ for relation in all_relations.get(qid, []):
269
+ prop: Optional[PropertyConfiguration] = get_mapping_configuration().guess_property(
270
+ relation["predicate"]["pid"], thing.concept_type
271
+ )
272
+ target_thing: Optional[WikidataThing] = all_wikidata_objects.get(relation["target"]["qid"])
273
+ if target_thing:
274
+ if prop and prop.type == PropertyType.OBJECT_PROPERTY:
275
+ class_types: List[str] = [c.qid for c in target_thing.instance_of]
276
+ class_types.extend(target_thing.ontology_types)
277
+ target_config: Optional[ClassConfiguration] = get_mapping_configuration().guess_classed(class_types)
278
+ if target_config:
279
+ if get_mapping_configuration().check_object_property_range(
280
+ prop, thing.concept_type, target_config.concept_type
281
+ ):
282
+ property_type: OntologyPropertyReference = OntologyPropertyReference.parse(prop.iri)
283
+ if property_type not in relation_props:
284
+ relation_props[property_type] = []
285
+ relation_props[property_type].append(relation["target"]["qid"])
286
+ else:
287
+ prop_missing: WikidataProperty = WikidataProperty(pid=relation["predicate"]["pid"])
288
+ import_warnings.append(
289
+ {
290
+ "source_qid": qid,
291
+ "source_concept": thing.concept_type,
292
+ "source_classes": class_types,
293
+ "property": prop_missing.pid,
294
+ "property_label": prop_missing.label,
295
+ "target_qid": target_thing.qid,
296
+ "target_classes": target_thing.ontology_types,
297
+ }
298
+ )
299
+ else:
300
+ prop_missing: WikidataProperty = WikidataProperty(pid=relation["predicate"]["pid"])
301
+ import_warnings.append(
302
+ {
303
+ "source_qid": qid,
304
+ "source_concept": thing.concept_type,
305
+ "source_classes": class_types,
306
+ "property": prop_missing.pid,
307
+ "property_label": prop_missing.label,
308
+ "target_qid": target_thing.qid,
309
+ "target_classes": target_thing.ontology_types,
310
+ }
311
+ )
312
+ for p, lst in relation_props.items():
313
+ thing.add_relation(ObjectProperty(p, outgoing=lst))
314
+ t7: float = time.perf_counter()
315
+ logging.debug(
316
+ f"Wikidata to Thing: {t2 - t1} seconds for labels, {t3 - t2} seconds for descriptions, "
317
+ f"{t4 - t3} seconds for sources, {t5 - t4} seconds for class types, {t6 - t5} seconds for data "
318
+ f"properties, {t7 - t6} seconds for object properties"
319
+ )
320
+ return thing, import_warnings
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2021-present Wacom. All rights reserved.
3
+ """Mapping of Wikidata property ids to its string."""
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Dict
7
+
8
+ # OntologyPropertyReference constants
9
+ INSTANCE_OF_PROPERTY: str = "P31"
10
+ IMAGE_PROPERTY: str = "P18"
11
+
12
+ # Mapping for property names
13
+ PROPERTY_MAPPING: Dict[str, str] = {}
14
+
15
+ CWD: Path = Path(__file__).parent
16
+ CONFIGURATION_FILE: Path = CWD / "../../pkl-cache/property_cache.json"
17
+ if CONFIGURATION_FILE.exists():
18
+ with CONFIGURATION_FILE.open("r") as f:
19
+ PROPERTY_MAPPING = json.load(f)
20
+
21
+ from knowledge.public import wikidata
22
+ from knowledge.public import helper
23
+ from knowledge.public import relations
24
+ from knowledge.public import cache
25
+
26
+
27
+ __all__ = ["wikidata", "helper", "relations", "cache", "PROPERTY_MAPPING", "INSTANCE_OF_PROPERTY", "IMAGE_PROPERTY"]
@@ -0,0 +1,115 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2023-present Wacom. All rights reserved.
3
+ from pathlib import Path
4
+ from typing import Optional, Dict
5
+
6
+ import ndjson
7
+
8
+ from knowledge.public.wikidata import WikidataThing, WikiDataAPIClient
9
+
10
+
11
+ def cache_wikidata_object(wikidata_object: WikidataThing):
12
+ """
13
+ Caches a Wikidata object.
14
+ Parameters
15
+ ----------
16
+ wikidata_object: WikidataObject
17
+ The Wikidata object
18
+ """
19
+ wikidata_cache[wikidata_object.qid] = wikidata_object
20
+
21
+
22
+ def get_wikidata_object(qid_object: str) -> WikidataThing:
23
+ """
24
+ Returns a Wikidata object from the cache.
25
+
26
+ Parameters
27
+ ----------
28
+ qid_object: str
29
+ The QID of the Wikidata object.
30
+ Returns
31
+ -------
32
+ wikidata_object: WikidataThing
33
+ The Wikidata object.
34
+ """
35
+ if qid_object not in wikidata_cache:
36
+ raise ValueError(f"Wikidata object {qid_object} not in cache.")
37
+ return wikidata_cache[qid_object]
38
+
39
+
40
+ def pull_wikidata_object(qid_object: str) -> Optional[WikidataThing]:
41
+ """
42
+ Pulls a Wikidata object from the cache or from the Wikidata API.
43
+ Parameters
44
+ ----------
45
+ qid_object: str
46
+ The QID of the Wikidata object.
47
+ Returns
48
+ -------
49
+ wikidata_object: Optional[WikidataThing]
50
+ The Wikidata object, if it exists, otherwise None.
51
+ """
52
+ if qid_object in wikidata_cache:
53
+ return wikidata_cache[qid_object]
54
+ wikidata_object: Optional[WikidataThing] = WikiDataAPIClient.retrieve_entity(qid_object)
55
+ cache_wikidata_object(wikidata_object)
56
+ return wikidata_object
57
+
58
+
59
+ def cache_wikidata_objects() -> Dict[str, WikidataThing]:
60
+ """
61
+ Returns the Wikidata cache.
62
+ Returns
63
+ -------
64
+ wikidata_cache: Dict[str, WikidataThing]
65
+ Wikidata cache.
66
+ """
67
+ return wikidata_cache
68
+
69
+
70
+ def number_of_cached_objects() -> int:
71
+ """
72
+ Returns the number of cached objects.
73
+ Returns
74
+ -------
75
+ number_of_cached_objects: int
76
+ Number of cached objects.
77
+ """
78
+ return len(wikidata_cache)
79
+
80
+
81
+ def load_cache(cache: Path):
82
+ """
83
+ Load the cache from the file.
84
+ Parameters
85
+ ----------
86
+ cache: Path
87
+ The path to the cache file.
88
+ """
89
+ if cache.exists():
90
+ with cache.open("r") as r:
91
+ reader = ndjson.reader(r)
92
+ for line in reader:
93
+ wiki_data_thing: WikidataThing = WikidataThing.create_from_dict(line)
94
+ # Cache the object
95
+ cache_wikidata_object(wiki_data_thing)
96
+
97
+
98
+ def qid_in_cache(ref_qid: str) -> bool:
99
+ """
100
+ Checks if a QID is in the cache.
101
+ Parameters
102
+ ----------
103
+ ref_qid: str
104
+ The QID to check.
105
+
106
+ Returns
107
+ -------
108
+ in_cache: bool
109
+ True if the QID is in the cache, otherwise False.
110
+ """
111
+ return ref_qid in wikidata_cache
112
+
113
+
114
+ wikidata_cache: Dict[str, WikidataThing] = {}
115
+ # Wikidata cache