personal_knowledge_library 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of personal_knowledge_library might be problematic. Click here for more details.
- knowledge/__init__.py +91 -0
- knowledge/base/__init__.py +22 -0
- knowledge/base/access.py +167 -0
- knowledge/base/entity.py +267 -0
- knowledge/base/language.py +27 -0
- knowledge/base/ontology.py +2734 -0
- knowledge/base/search.py +473 -0
- knowledge/base/tenant.py +192 -0
- knowledge/nel/__init__.py +11 -0
- knowledge/nel/base.py +495 -0
- knowledge/nel/engine.py +123 -0
- knowledge/ontomapping/__init__.py +667 -0
- knowledge/ontomapping/manager.py +320 -0
- knowledge/public/__init__.py +27 -0
- knowledge/public/cache.py +115 -0
- knowledge/public/helper.py +373 -0
- knowledge/public/relations.py +128 -0
- knowledge/public/wikidata.py +1324 -0
- knowledge/services/__init__.py +128 -0
- knowledge/services/asyncio/__init__.py +7 -0
- knowledge/services/asyncio/base.py +458 -0
- knowledge/services/asyncio/graph.py +1420 -0
- knowledge/services/asyncio/group.py +450 -0
- knowledge/services/asyncio/search.py +439 -0
- knowledge/services/asyncio/users.py +270 -0
- knowledge/services/base.py +533 -0
- knowledge/services/graph.py +1897 -0
- knowledge/services/group.py +819 -0
- knowledge/services/helper.py +142 -0
- knowledge/services/ontology.py +1234 -0
- knowledge/services/search.py +488 -0
- knowledge/services/session.py +444 -0
- knowledge/services/tenant.py +281 -0
- knowledge/services/users.py +445 -0
- knowledge/utils/__init__.py +10 -0
- knowledge/utils/graph.py +417 -0
- knowledge/utils/wikidata.py +197 -0
- knowledge/utils/wikipedia.py +175 -0
- personal_knowledge_library-3.0.0.dist-info/LICENSE +201 -0
- personal_knowledge_library-3.0.0.dist-info/METADATA +1163 -0
- personal_knowledge_library-3.0.0.dist-info/RECORD +42 -0
- personal_knowledge_library-3.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2023-present Wacom. All rights reserved.
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Optional, Any, List, Dict, Tuple, Set
|
|
7
|
+
|
|
8
|
+
from knowledge.base.entity import Label, LanguageCode, Description
|
|
9
|
+
from knowledge.base.ontology import (
|
|
10
|
+
ThingObject,
|
|
11
|
+
DataProperty,
|
|
12
|
+
SYSTEM_SOURCE_SYSTEM,
|
|
13
|
+
SYSTEM_SOURCE_REFERENCE_ID,
|
|
14
|
+
OntologyClassReference,
|
|
15
|
+
OntologyPropertyReference,
|
|
16
|
+
ObjectProperty,
|
|
17
|
+
)
|
|
18
|
+
from knowledge.ontomapping import (
|
|
19
|
+
ClassConfiguration,
|
|
20
|
+
TOPIC_CLASS,
|
|
21
|
+
superclasses_cache,
|
|
22
|
+
PropertyConfiguration,
|
|
23
|
+
PropertyType,
|
|
24
|
+
get_mapping_configuration,
|
|
25
|
+
save_superclasses_cache,
|
|
26
|
+
superclasses_path,
|
|
27
|
+
)
|
|
28
|
+
from knowledge.public.cache import pull_wikidata_object
|
|
29
|
+
from knowledge.public.wikidata import WikidataThing, WikiDataAPIClient, WikidataClass, WikidataProperty
|
|
30
|
+
from knowledge.base.language import LOCALE_LANGUAGE_MAPPING, LocaleCode, LANGUAGE_LOCALE_MAPPING, EN_US
|
|
31
|
+
from knowledge.utils.wikipedia import get_wikipedia_summary
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def flatten(hierarchy: WikidataClass, use_names: bool = False) -> List[str]:
|
|
35
|
+
"""
|
|
36
|
+
Flattens the hierarchy.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
hierarchy: WikidataClass
|
|
41
|
+
Hierarchy
|
|
42
|
+
use_names: bool
|
|
43
|
+
Use names instead of QIDs.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
hierarchy: List[str]
|
|
48
|
+
Hierarchy
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
hierarchy_list: List[str] = [hierarchy.qid]
|
|
52
|
+
jobs: List[WikidataClass] = [hierarchy]
|
|
53
|
+
while len(jobs) > 0:
|
|
54
|
+
job: WikidataClass = jobs.pop()
|
|
55
|
+
if use_names:
|
|
56
|
+
hierarchy_list.append(f"{job.qid} ({job.label})")
|
|
57
|
+
else:
|
|
58
|
+
hierarchy_list.append(job.qid)
|
|
59
|
+
for c in job.superclasses:
|
|
60
|
+
if use_names:
|
|
61
|
+
if f"{job.qid} ({job.label})" not in hierarchy_list:
|
|
62
|
+
jobs.append(c)
|
|
63
|
+
elif c.qid not in hierarchy_list:
|
|
64
|
+
jobs.append(c)
|
|
65
|
+
return hierarchy_list
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def wikidata_taxonomy(qid: str) -> Optional[WikidataClass]:
|
|
69
|
+
"""
|
|
70
|
+
Returns the taxonomy of a Wikidata thing.
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
qid: str
|
|
74
|
+
Wikidata QID.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
hierarchy: WikidataClass
|
|
79
|
+
Hierarchy.
|
|
80
|
+
"""
|
|
81
|
+
if superclasses_cache and qid in superclasses_cache:
|
|
82
|
+
taxonomy: WikidataClass = superclasses_cache[qid]
|
|
83
|
+
return taxonomy
|
|
84
|
+
hierarchy: Dict[str, WikidataClass] = WikiDataAPIClient.superclasses(qid)
|
|
85
|
+
if qid not in hierarchy:
|
|
86
|
+
logging.warning(f"Taxonomy for {qid} not found.")
|
|
87
|
+
return None
|
|
88
|
+
if hierarchy:
|
|
89
|
+
superclasses_cache.update(hierarchy)
|
|
90
|
+
save_superclasses_cache(superclasses_path)
|
|
91
|
+
return hierarchy.get(qid)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def convert_dict(structure: Dict[str, Any], locale: str) -> Optional[str]:
|
|
95
|
+
"""
|
|
96
|
+
Converts a dictionary to a string.
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
structure: Dict[str, Any]
|
|
100
|
+
Dictionary to convert.
|
|
101
|
+
locale: str
|
|
102
|
+
Locale.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
string: str
|
|
107
|
+
String representation of the dictionary.
|
|
108
|
+
"""
|
|
109
|
+
if "type" in structure and "value" in structure:
|
|
110
|
+
structure_type: str = structure["type"]
|
|
111
|
+
value: Any = structure["value"]
|
|
112
|
+
if structure_type == "time" and isinstance(value, dict) and "iso" in value and value["iso"]:
|
|
113
|
+
return value["iso"]
|
|
114
|
+
if structure_type == "time" and isinstance(value, dict):
|
|
115
|
+
return value["time"]
|
|
116
|
+
if structure_type == "quantity" and isinstance(value, dict):
|
|
117
|
+
return value["amount"]
|
|
118
|
+
if structure_type == "wikibase-item" and isinstance(value, dict):
|
|
119
|
+
wikidata_data: WikidataThing = pull_wikidata_object(value["id"])
|
|
120
|
+
if locale in wikidata_data.label:
|
|
121
|
+
return wikidata_data.label[locale].content
|
|
122
|
+
return None
|
|
123
|
+
if structure_type == "external-id":
|
|
124
|
+
return value
|
|
125
|
+
if structure_type == "string":
|
|
126
|
+
return value
|
|
127
|
+
if structure_type == "monolingualtext" and isinstance(value, dict):
|
|
128
|
+
if LOCALE_LANGUAGE_MAPPING.get(LocaleCode(locale)) == LanguageCode(value["language"]):
|
|
129
|
+
return value["text"]
|
|
130
|
+
return None
|
|
131
|
+
if structure_type == "globe-coordinate" and isinstance(value, dict):
|
|
132
|
+
return f'{value["latitude"]},{value["longitude"]}'
|
|
133
|
+
if structure_type == "url" and isinstance(value, str):
|
|
134
|
+
return value
|
|
135
|
+
raise NotImplementedError()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def wikidata_to_thing(
|
|
139
|
+
wikidata_thing: WikidataThing,
|
|
140
|
+
all_relations: Dict[str, Any],
|
|
141
|
+
supported_locales: List[str],
|
|
142
|
+
all_wikidata_objects: Dict[str, WikidataThing],
|
|
143
|
+
pull_wikipedia: bool = False,
|
|
144
|
+
guess_concept_type: bool = True,
|
|
145
|
+
) -> Tuple[ThingObject, List[Dict[str, Any]]]:
|
|
146
|
+
"""
|
|
147
|
+
Converts a Wikidata thing to a ThingObject.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
wikidata_thing: WikidataThing
|
|
152
|
+
Wikidata thing
|
|
153
|
+
|
|
154
|
+
all_relations: Dict[str, Any]
|
|
155
|
+
All relations.
|
|
156
|
+
|
|
157
|
+
supported_locales: List[str]
|
|
158
|
+
Supported locales.
|
|
159
|
+
|
|
160
|
+
all_wikidata_objects: Dict[str, WikidataThing]
|
|
161
|
+
All Wikidata objects.
|
|
162
|
+
|
|
163
|
+
pull_wikipedia: bool
|
|
164
|
+
Pull Wikipedia summary.
|
|
165
|
+
|
|
166
|
+
guess_concept_type: bool
|
|
167
|
+
Guess the concept type (queries all super types from Wikidata).
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
thing: ThingObject
|
|
172
|
+
Thing object
|
|
173
|
+
import_warnings: List[Dict[str, Any]]
|
|
174
|
+
Errors
|
|
175
|
+
|
|
176
|
+
"""
|
|
177
|
+
import_warnings: List[Dict[str, Any]] = []
|
|
178
|
+
qid: str = wikidata_thing.qid
|
|
179
|
+
labels_entity: List[Label] = []
|
|
180
|
+
aliases_entity: List[Label] = []
|
|
181
|
+
supported_languages: List[str] = [
|
|
182
|
+
LOCALE_LANGUAGE_MAPPING[locale] for locale in supported_locales if locale in LOCALE_LANGUAGE_MAPPING
|
|
183
|
+
]
|
|
184
|
+
# Make sure that the main label are added to labels and aliases to aliases.
|
|
185
|
+
main_languages: Set[str] = set()
|
|
186
|
+
t1: float = time.perf_counter()
|
|
187
|
+
for la in wikidata_thing.label.values():
|
|
188
|
+
if str(la.language_code) in supported_locales:
|
|
189
|
+
if str(la.language_code) not in main_languages:
|
|
190
|
+
main_languages.add(str(la.language_code))
|
|
191
|
+
labels_entity.append(Label(content=la.content, language_code=la.language_code, main=True))
|
|
192
|
+
else:
|
|
193
|
+
aliases_entity.append(Label(content=la.content, language_code=la.language_code, main=False))
|
|
194
|
+
for lang, aliases in wikidata_thing.aliases.items():
|
|
195
|
+
if str(lang) in supported_locales:
|
|
196
|
+
if str(lang) not in main_languages:
|
|
197
|
+
main_languages.add(str(lang))
|
|
198
|
+
labels_entity.append(Label(content=aliases[0].content, language_code=LocaleCode(lang), main=True))
|
|
199
|
+
for alias in aliases[1:]:
|
|
200
|
+
aliases_entity.append(Label(content=alias.content, language_code=LocaleCode(lang), main=False))
|
|
201
|
+
else:
|
|
202
|
+
for alias in aliases:
|
|
203
|
+
aliases_entity.append(Label(content=alias.content, language_code=LocaleCode(lang), main=False))
|
|
204
|
+
t2: float = time.perf_counter()
|
|
205
|
+
descriptions: List[Description] = []
|
|
206
|
+
if "wiki" in wikidata_thing.sitelinks and pull_wikipedia:
|
|
207
|
+
for lang, title in wikidata_thing.sitelinks["wiki"].titles.items():
|
|
208
|
+
if str(lang) in supported_languages:
|
|
209
|
+
locale: LocaleCode = LANGUAGE_LOCALE_MAPPING.get(LanguageCode(lang), EN_US)
|
|
210
|
+
if locale in supported_locales:
|
|
211
|
+
try:
|
|
212
|
+
descriptions.append(
|
|
213
|
+
Description(
|
|
214
|
+
description=get_wikipedia_summary(title, lang), language_code=LocaleCode(locale)
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logging.error(f"Failed to get Wikipedia summary for {title} ({lang}): {e}")
|
|
219
|
+
if len(descriptions) == 0:
|
|
220
|
+
descriptions = list(wikidata_thing.description.values())
|
|
221
|
+
t3: float = time.perf_counter()
|
|
222
|
+
# Create the thing
|
|
223
|
+
thing: ThingObject = ThingObject(label=labels_entity, description=descriptions, icon=wikidata_thing.image(dpi=500))
|
|
224
|
+
thing.alias = aliases_entity
|
|
225
|
+
thing.add_source_system(DataProperty(content="wikidata", property_ref=SYSTEM_SOURCE_SYSTEM, language_code=EN_US))
|
|
226
|
+
thing.add_source_reference_id(
|
|
227
|
+
DataProperty(content=qid, property_ref=SYSTEM_SOURCE_REFERENCE_ID, language_code=EN_US)
|
|
228
|
+
)
|
|
229
|
+
thing.add_data_property(
|
|
230
|
+
DataProperty(
|
|
231
|
+
content=datetime.utcnow().isoformat(), property_ref=OntologyPropertyReference.parse("wacom:core#lastUpdate")
|
|
232
|
+
)
|
|
233
|
+
)
|
|
234
|
+
t4: float = time.perf_counter()
|
|
235
|
+
class_types: List[str] = wikidata_thing.ontology_types
|
|
236
|
+
if guess_concept_type:
|
|
237
|
+
for cls in wikidata_thing.instance_of:
|
|
238
|
+
class_types.append(cls.qid)
|
|
239
|
+
class_configuration: Optional[ClassConfiguration] = get_mapping_configuration().guess_classed(class_types)
|
|
240
|
+
if class_configuration:
|
|
241
|
+
thing.concept_type = class_configuration.concept_type
|
|
242
|
+
else:
|
|
243
|
+
thing.concept_type = OntologyClassReference.parse(TOPIC_CLASS)
|
|
244
|
+
t5: float = time.perf_counter()
|
|
245
|
+
relation_props: Dict[OntologyPropertyReference, List[str]] = {}
|
|
246
|
+
for pid, cl in wikidata_thing.claims.items():
|
|
247
|
+
prop: Optional[PropertyConfiguration] = get_mapping_configuration().guess_property(pid, thing.concept_type)
|
|
248
|
+
if prop and prop.type == PropertyType.DATA_PROPERTY:
|
|
249
|
+
property_type: OntologyPropertyReference = OntologyPropertyReference.parse(prop.iri)
|
|
250
|
+
for locale in supported_locales:
|
|
251
|
+
for c in cl.literals:
|
|
252
|
+
try:
|
|
253
|
+
if isinstance(c, dict):
|
|
254
|
+
content: Optional[str] = convert_dict(c, locale)
|
|
255
|
+
if get_mapping_configuration().check_data_property_range(property_type, content):
|
|
256
|
+
thing.add_data_property(
|
|
257
|
+
DataProperty(
|
|
258
|
+
content=content, property_ref=property_type, language_code=LocaleCode(locale)
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
elif isinstance(c, (str, float, int)):
|
|
262
|
+
thing.add_data_property(
|
|
263
|
+
DataProperty(content=c, property_ref=property_type, language_code=LocaleCode(locale))
|
|
264
|
+
)
|
|
265
|
+
except NotImplementedError as e:
|
|
266
|
+
import_warnings.append({"qid": qid, "pid": pid, "error": str(e)})
|
|
267
|
+
t6: float = time.perf_counter()
|
|
268
|
+
for relation in all_relations.get(qid, []):
|
|
269
|
+
prop: Optional[PropertyConfiguration] = get_mapping_configuration().guess_property(
|
|
270
|
+
relation["predicate"]["pid"], thing.concept_type
|
|
271
|
+
)
|
|
272
|
+
target_thing: Optional[WikidataThing] = all_wikidata_objects.get(relation["target"]["qid"])
|
|
273
|
+
if target_thing:
|
|
274
|
+
if prop and prop.type == PropertyType.OBJECT_PROPERTY:
|
|
275
|
+
class_types: List[str] = [c.qid for c in target_thing.instance_of]
|
|
276
|
+
class_types.extend(target_thing.ontology_types)
|
|
277
|
+
target_config: Optional[ClassConfiguration] = get_mapping_configuration().guess_classed(class_types)
|
|
278
|
+
if target_config:
|
|
279
|
+
if get_mapping_configuration().check_object_property_range(
|
|
280
|
+
prop, thing.concept_type, target_config.concept_type
|
|
281
|
+
):
|
|
282
|
+
property_type: OntologyPropertyReference = OntologyPropertyReference.parse(prop.iri)
|
|
283
|
+
if property_type not in relation_props:
|
|
284
|
+
relation_props[property_type] = []
|
|
285
|
+
relation_props[property_type].append(relation["target"]["qid"])
|
|
286
|
+
else:
|
|
287
|
+
prop_missing: WikidataProperty = WikidataProperty(pid=relation["predicate"]["pid"])
|
|
288
|
+
import_warnings.append(
|
|
289
|
+
{
|
|
290
|
+
"source_qid": qid,
|
|
291
|
+
"source_concept": thing.concept_type,
|
|
292
|
+
"source_classes": class_types,
|
|
293
|
+
"property": prop_missing.pid,
|
|
294
|
+
"property_label": prop_missing.label,
|
|
295
|
+
"target_qid": target_thing.qid,
|
|
296
|
+
"target_classes": target_thing.ontology_types,
|
|
297
|
+
}
|
|
298
|
+
)
|
|
299
|
+
else:
|
|
300
|
+
prop_missing: WikidataProperty = WikidataProperty(pid=relation["predicate"]["pid"])
|
|
301
|
+
import_warnings.append(
|
|
302
|
+
{
|
|
303
|
+
"source_qid": qid,
|
|
304
|
+
"source_concept": thing.concept_type,
|
|
305
|
+
"source_classes": class_types,
|
|
306
|
+
"property": prop_missing.pid,
|
|
307
|
+
"property_label": prop_missing.label,
|
|
308
|
+
"target_qid": target_thing.qid,
|
|
309
|
+
"target_classes": target_thing.ontology_types,
|
|
310
|
+
}
|
|
311
|
+
)
|
|
312
|
+
for p, lst in relation_props.items():
|
|
313
|
+
thing.add_relation(ObjectProperty(p, outgoing=lst))
|
|
314
|
+
t7: float = time.perf_counter()
|
|
315
|
+
logging.debug(
|
|
316
|
+
f"Wikidata to Thing: {t2 - t1} seconds for labels, {t3 - t2} seconds for descriptions, "
|
|
317
|
+
f"{t4 - t3} seconds for sources, {t5 - t4} seconds for class types, {t6 - t5} seconds for data "
|
|
318
|
+
f"properties, {t7 - t6} seconds for object properties"
|
|
319
|
+
)
|
|
320
|
+
return thing, import_warnings
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2021-present Wacom. All rights reserved.
|
|
3
|
+
"""Mapping of Wikidata property ids to its string."""
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
# OntologyPropertyReference constants
|
|
9
|
+
INSTANCE_OF_PROPERTY: str = "P31"
|
|
10
|
+
IMAGE_PROPERTY: str = "P18"
|
|
11
|
+
|
|
12
|
+
# Mapping for property names
|
|
13
|
+
PROPERTY_MAPPING: Dict[str, str] = {}
|
|
14
|
+
|
|
15
|
+
CWD: Path = Path(__file__).parent
|
|
16
|
+
CONFIGURATION_FILE: Path = CWD / "../../pkl-cache/property_cache.json"
|
|
17
|
+
if CONFIGURATION_FILE.exists():
|
|
18
|
+
with CONFIGURATION_FILE.open("r") as f:
|
|
19
|
+
PROPERTY_MAPPING = json.load(f)
|
|
20
|
+
|
|
21
|
+
from knowledge.public import wikidata
|
|
22
|
+
from knowledge.public import helper
|
|
23
|
+
from knowledge.public import relations
|
|
24
|
+
from knowledge.public import cache
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
__all__ = ["wikidata", "helper", "relations", "cache", "PROPERTY_MAPPING", "INSTANCE_OF_PROPERTY", "IMAGE_PROPERTY"]
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2023-present Wacom. All rights reserved.
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Dict
|
|
5
|
+
|
|
6
|
+
import ndjson
|
|
7
|
+
|
|
8
|
+
from knowledge.public.wikidata import WikidataThing, WikiDataAPIClient
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def cache_wikidata_object(wikidata_object: WikidataThing):
|
|
12
|
+
"""
|
|
13
|
+
Caches a Wikidata object.
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
wikidata_object: WikidataObject
|
|
17
|
+
The Wikidata object
|
|
18
|
+
"""
|
|
19
|
+
wikidata_cache[wikidata_object.qid] = wikidata_object
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_wikidata_object(qid_object: str) -> WikidataThing:
|
|
23
|
+
"""
|
|
24
|
+
Returns a Wikidata object from the cache.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
qid_object: str
|
|
29
|
+
The QID of the Wikidata object.
|
|
30
|
+
Returns
|
|
31
|
+
-------
|
|
32
|
+
wikidata_object: WikidataThing
|
|
33
|
+
The Wikidata object.
|
|
34
|
+
"""
|
|
35
|
+
if qid_object not in wikidata_cache:
|
|
36
|
+
raise ValueError(f"Wikidata object {qid_object} not in cache.")
|
|
37
|
+
return wikidata_cache[qid_object]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def pull_wikidata_object(qid_object: str) -> Optional[WikidataThing]:
|
|
41
|
+
"""
|
|
42
|
+
Pulls a Wikidata object from the cache or from the Wikidata API.
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
qid_object: str
|
|
46
|
+
The QID of the Wikidata object.
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
wikidata_object: Optional[WikidataThing]
|
|
50
|
+
The Wikidata object, if it exists, otherwise None.
|
|
51
|
+
"""
|
|
52
|
+
if qid_object in wikidata_cache:
|
|
53
|
+
return wikidata_cache[qid_object]
|
|
54
|
+
wikidata_object: Optional[WikidataThing] = WikiDataAPIClient.retrieve_entity(qid_object)
|
|
55
|
+
cache_wikidata_object(wikidata_object)
|
|
56
|
+
return wikidata_object
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def cache_wikidata_objects() -> Dict[str, WikidataThing]:
|
|
60
|
+
"""
|
|
61
|
+
Returns the Wikidata cache.
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
wikidata_cache: Dict[str, WikidataThing]
|
|
65
|
+
Wikidata cache.
|
|
66
|
+
"""
|
|
67
|
+
return wikidata_cache
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def number_of_cached_objects() -> int:
|
|
71
|
+
"""
|
|
72
|
+
Returns the number of cached objects.
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
number_of_cached_objects: int
|
|
76
|
+
Number of cached objects.
|
|
77
|
+
"""
|
|
78
|
+
return len(wikidata_cache)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def load_cache(cache: Path):
|
|
82
|
+
"""
|
|
83
|
+
Load the cache from the file.
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
cache: Path
|
|
87
|
+
The path to the cache file.
|
|
88
|
+
"""
|
|
89
|
+
if cache.exists():
|
|
90
|
+
with cache.open("r") as r:
|
|
91
|
+
reader = ndjson.reader(r)
|
|
92
|
+
for line in reader:
|
|
93
|
+
wiki_data_thing: WikidataThing = WikidataThing.create_from_dict(line)
|
|
94
|
+
# Cache the object
|
|
95
|
+
cache_wikidata_object(wiki_data_thing)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def qid_in_cache(ref_qid: str) -> bool:
|
|
99
|
+
"""
|
|
100
|
+
Checks if a QID is in the cache.
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
ref_qid: str
|
|
104
|
+
The QID to check.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
in_cache: bool
|
|
109
|
+
True if the QID is in the cache, otherwise False.
|
|
110
|
+
"""
|
|
111
|
+
return ref_qid in wikidata_cache
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
wikidata_cache: Dict[str, WikidataThing] = {}
|
|
115
|
+
# Wikidata cache
|