personal_knowledge_library 3.0.0__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of personal_knowledge_library might be problematic. Click here for more details.
- knowledge/__init__.py +1 -1
- knowledge/ontomapping/__init__.py +33 -115
- knowledge/ontomapping/manager.py +24 -25
- knowledge/public/__init__.py +8 -7
- knowledge/public/cache.py +413 -86
- knowledge/public/client.py +428 -0
- knowledge/public/helper.py +39 -11
- knowledge/public/relations.py +2 -1
- knowledge/public/wikidata.py +47 -381
- knowledge/utils/graph.py +6 -6
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/METADATA +7 -1
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/RECORD +14 -13
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/LICENSE +0 -0
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/WHEEL +0 -0
knowledge/public/wikidata.py
CHANGED
|
@@ -1,18 +1,11 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# Copyright © 2023-present Wacom. All rights reserved.
|
|
3
3
|
import hashlib
|
|
4
|
-
import multiprocessing
|
|
5
4
|
import urllib
|
|
6
|
-
from abc import ABC
|
|
7
|
-
from collections import deque
|
|
8
5
|
from datetime import datetime
|
|
9
|
-
from
|
|
10
|
-
from typing import Optional, Union, Any, Dict, List, Tuple, Set
|
|
6
|
+
from typing import Optional, Union, Any, Dict, List
|
|
11
7
|
|
|
12
8
|
import requests
|
|
13
|
-
from requests import Response
|
|
14
|
-
from requests.adapters import HTTPAdapter
|
|
15
|
-
from urllib3 import Retry
|
|
16
9
|
|
|
17
10
|
from knowledge import logger
|
|
18
11
|
from knowledge.base.entity import (
|
|
@@ -25,11 +18,10 @@ from knowledge.base.entity import (
|
|
|
25
18
|
DISPLAY_TAG,
|
|
26
19
|
DESCRIPTION_TAG,
|
|
27
20
|
)
|
|
28
|
-
from knowledge.base.language import LANGUAGE_LOCALE_MAPPING, EN_US, LocaleCode
|
|
29
|
-
from knowledge.public import
|
|
21
|
+
from knowledge.base.language import LANGUAGE_LOCALE_MAPPING, EN_US, LocaleCode, EN
|
|
22
|
+
from knowledge.public import INSTANCE_OF_PROPERTY, IMAGE_PROPERTY
|
|
30
23
|
from knowledge.public.helper import (
|
|
31
24
|
__waiting_request__,
|
|
32
|
-
__waiting_multi_request__,
|
|
33
25
|
QID_TAG,
|
|
34
26
|
REVISION_TAG,
|
|
35
27
|
PID_TAG,
|
|
@@ -46,14 +38,13 @@ from knowledge.public.helper import (
|
|
|
46
38
|
LAST_REVID_TAG,
|
|
47
39
|
wikidate,
|
|
48
40
|
WikiDataAPIException,
|
|
49
|
-
WIKIDATA_SPARQL_URL,
|
|
50
41
|
SOURCE_TAG,
|
|
51
42
|
URLS_TAG,
|
|
52
43
|
TITLES_TAG,
|
|
53
44
|
image_url,
|
|
54
|
-
WIKIDATA_SEARCH_URL,
|
|
55
45
|
SUPERCLASSES_TAG,
|
|
56
|
-
|
|
46
|
+
SYNC_TIME_TAG,
|
|
47
|
+
SUBCLASSES_TAG,
|
|
57
48
|
)
|
|
58
49
|
|
|
59
50
|
# Constants
|
|
@@ -61,21 +52,6 @@ QUALIFIERS_TAG: str = "QUALIFIERS"
|
|
|
61
52
|
LITERALS_TAG: str = "LITERALS"
|
|
62
53
|
|
|
63
54
|
|
|
64
|
-
def chunks(lst: List[str], chunk_size: int):
|
|
65
|
-
"""
|
|
66
|
-
Yield successive n-sized chunks from lst.Yield successive n-sized chunks from lst.
|
|
67
|
-
Parameters
|
|
68
|
-
----------
|
|
69
|
-
lst: List[str]
|
|
70
|
-
Full length.
|
|
71
|
-
chunk_size: int
|
|
72
|
-
Chunk size.
|
|
73
|
-
|
|
74
|
-
"""
|
|
75
|
-
for i in range(0, len(lst), chunk_size):
|
|
76
|
-
yield lst[i : i + chunk_size]
|
|
77
|
-
|
|
78
|
-
|
|
79
55
|
class WikidataProperty:
|
|
80
56
|
"""
|
|
81
57
|
WikidataProperty
|
|
@@ -109,36 +85,10 @@ class WikidataProperty:
|
|
|
109
85
|
label: str
|
|
110
86
|
Label of the property.
|
|
111
87
|
"""
|
|
112
|
-
if self.__label:
|
|
113
|
-
return self.__label
|
|
114
|
-
if self.pid in PROPERTY_MAPPING: # only English mappings
|
|
115
|
-
self.__label = PROPERTY_MAPPING[self.pid]
|
|
116
|
-
else:
|
|
117
|
-
prop_dict = __waiting_request__(self.pid)
|
|
118
|
-
if "labels" in prop_dict:
|
|
119
|
-
labels: Dict[str, Any] = prop_dict.get("labels")
|
|
120
|
-
if "en" in labels:
|
|
121
|
-
en_label: Dict[str, Any] = labels.get("en")
|
|
122
|
-
self.__label = en_label.get("value", self.__pid)
|
|
123
|
-
PROPERTY_MAPPING[self.pid] = self.__label
|
|
124
|
-
else:
|
|
125
|
-
self.__label = self.pid
|
|
126
|
-
|
|
127
|
-
else:
|
|
128
|
-
self.__label = self.__pid
|
|
129
|
-
return self.__label
|
|
130
|
-
|
|
131
|
-
@property
|
|
132
|
-
def label_cached(self) -> str:
|
|
133
|
-
"""Label with cached value."""
|
|
134
|
-
if self.__label:
|
|
135
|
-
return self.__label
|
|
136
|
-
if self.pid in PROPERTY_MAPPING: # only English mappings
|
|
137
|
-
self.__label = PROPERTY_MAPPING[self.pid]
|
|
138
88
|
return self.__label
|
|
139
89
|
|
|
140
90
|
def __dict__(self):
|
|
141
|
-
return {PID_TAG: self.pid, LABEL_TAG: self.
|
|
91
|
+
return {PID_TAG: self.pid, LABEL_TAG: self.label}
|
|
142
92
|
|
|
143
93
|
@classmethod
|
|
144
94
|
def create_from_dict(cls, prop_dict: Dict[str, Any]) -> "WikidataProperty":
|
|
@@ -155,6 +105,27 @@ class WikidataProperty:
|
|
|
155
105
|
"""
|
|
156
106
|
return WikidataProperty(prop_dict[PID_TAG], prop_dict.get(LABEL_TAG))
|
|
157
107
|
|
|
108
|
+
@staticmethod
|
|
109
|
+
def from_wikidata(entity_dict: Dict[str, Any]) -> "WikidataProperty":
|
|
110
|
+
"""
|
|
111
|
+
Create a property from a dictionary.
|
|
112
|
+
Parameters
|
|
113
|
+
----------
|
|
114
|
+
entity_dict: Dict[str, Any]
|
|
115
|
+
Property dictionary.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
instance: WikidataProperty
|
|
120
|
+
Instance of WikidataProperty.
|
|
121
|
+
"""
|
|
122
|
+
pid: str = entity_dict[ID_TAG]
|
|
123
|
+
label: Optional[str] = None
|
|
124
|
+
if LABELS_TAG in entity_dict:
|
|
125
|
+
if EN in entity_dict[LABELS_TAG]:
|
|
126
|
+
label = entity_dict[LABELS_TAG][EN].get(LABEL_VALUE_TAG)
|
|
127
|
+
return WikidataProperty(pid, label)
|
|
128
|
+
|
|
158
129
|
def __repr__(self):
|
|
159
130
|
return f"<Property:={self.pid}]>"
|
|
160
131
|
|
|
@@ -324,7 +295,7 @@ class WikidataClass:
|
|
|
324
295
|
wiki_cls.__superclasses.append(WikidataClass.create_from_dict(superclass))
|
|
325
296
|
return wiki_cls
|
|
326
297
|
|
|
327
|
-
def
|
|
298
|
+
def __superclasses_hierarchy__(self, visited: Optional[set] = None):
|
|
328
299
|
if visited is None:
|
|
329
300
|
visited = set()
|
|
330
301
|
if self.qid in visited:
|
|
@@ -332,16 +303,18 @@ class WikidataClass:
|
|
|
332
303
|
QID_TAG: self.qid,
|
|
333
304
|
LABEL_TAG: self.label,
|
|
334
305
|
SUPERCLASSES_TAG: [],
|
|
306
|
+
SUBCLASSES_TAG: [],
|
|
335
307
|
}
|
|
336
308
|
visited.add(self.qid)
|
|
337
309
|
return {
|
|
338
310
|
QID_TAG: self.qid,
|
|
339
311
|
LABEL_TAG: self.label,
|
|
340
|
-
SUPERCLASSES_TAG: [superclass.
|
|
312
|
+
SUPERCLASSES_TAG: [superclass.__superclasses_hierarchy__(visited) for superclass in self.superclasses],
|
|
313
|
+
SUBCLASSES_TAG: [subclass.__superclasses_hierarchy__(visited) for subclass in self.subclasses],
|
|
341
314
|
}
|
|
342
315
|
|
|
343
316
|
def __dict__(self):
|
|
344
|
-
return self.
|
|
317
|
+
return self.__superclasses_hierarchy__()
|
|
345
318
|
|
|
346
319
|
def __repr__(self):
|
|
347
320
|
return f"<WikidataClass:={self.qid}]>"
|
|
@@ -521,10 +494,12 @@ class WikidataThing:
|
|
|
521
494
|
label: Optional[Dict[str, Label]] = None,
|
|
522
495
|
aliases: Optional[Dict[str, List[Label]]] = None,
|
|
523
496
|
description: Optional[Dict[str, Description]] = None,
|
|
497
|
+
sync_time: datetime = datetime.now(),
|
|
524
498
|
):
|
|
525
499
|
self.__qid: str = qid
|
|
526
500
|
self.__revision: str = revision
|
|
527
501
|
self.__modified: datetime = modified
|
|
502
|
+
self.__sync_time: datetime = sync_time
|
|
528
503
|
self.__label: Dict[str, Label] = label if label else {}
|
|
529
504
|
self.__description: Dict[str, Description] = description if description else {}
|
|
530
505
|
self.__aliases: Dict[str, List[Label]] = aliases if aliases else {}
|
|
@@ -547,6 +522,11 @@ class WikidataThing:
|
|
|
547
522
|
"""Modification date of entity."""
|
|
548
523
|
return self.__modified
|
|
549
524
|
|
|
525
|
+
@property
|
|
526
|
+
def sync_time(self) -> datetime:
|
|
527
|
+
"""Sync time of entity."""
|
|
528
|
+
return self.__sync_time
|
|
529
|
+
|
|
550
530
|
@property
|
|
551
531
|
def label(self) -> Dict[str, Label]:
|
|
552
532
|
"""Labels of the entity."""
|
|
@@ -730,6 +710,7 @@ class WikidataThing:
|
|
|
730
710
|
QID_TAG: self.qid,
|
|
731
711
|
REVISION_TAG: self.revision,
|
|
732
712
|
MODIFIED_TAG: self.modified.isoformat(),
|
|
713
|
+
SYNC_TIME_TAG: self.sync_time.isoformat(),
|
|
733
714
|
LABELS_TAG: {lang: la.__dict__() for lang, la in self.label.items()},
|
|
734
715
|
DESCRIPTIONS_TAG: {lang: la.__dict__() for lang, la in self.description.items()},
|
|
735
716
|
ALIASES_TAG: {lang: [a.__dict__() for a in al] for lang, al in self.aliases.items()},
|
|
@@ -764,6 +745,8 @@ class WikidataThing:
|
|
|
764
745
|
aliases[language] = []
|
|
765
746
|
for a in al:
|
|
766
747
|
aliases[language].append(Label.create_from_dict(a))
|
|
748
|
+
|
|
749
|
+
sync_time: datetime = parse_date(entity_dict[SYNC_TIME_TAG]) if SYNC_TIME_TAG in entity_dict else datetime.now()
|
|
767
750
|
# Initiate the wikidata thing
|
|
768
751
|
thing: WikidataThing = WikidataThing(
|
|
769
752
|
qid=entity_dict[QID_TAG],
|
|
@@ -772,6 +755,7 @@ class WikidataThing:
|
|
|
772
755
|
label=labels,
|
|
773
756
|
aliases=aliases,
|
|
774
757
|
description=descriptions,
|
|
758
|
+
sync_time=sync_time,
|
|
775
759
|
)
|
|
776
760
|
# Load the ontology types
|
|
777
761
|
thing.ontology_types = entity_dict.get(ONTOLOGY_TYPES_TAG, [])
|
|
@@ -802,6 +786,7 @@ class WikidataThing:
|
|
|
802
786
|
labels: Dict[str, Label] = {}
|
|
803
787
|
aliases: Dict[str, List[Label]] = {}
|
|
804
788
|
descriptions: Dict[str, Description] = {}
|
|
789
|
+
sync_time: datetime = datetime.now()
|
|
805
790
|
if LABELS_TAG in entity_dict:
|
|
806
791
|
# Extract the labels
|
|
807
792
|
for label in entity_dict[LABELS_TAG].values():
|
|
@@ -846,6 +831,7 @@ class WikidataThing:
|
|
|
846
831
|
label=labels,
|
|
847
832
|
aliases=aliases,
|
|
848
833
|
description=descriptions,
|
|
834
|
+
sync_time=sync_time,
|
|
849
835
|
)
|
|
850
836
|
|
|
851
837
|
# Iterate over the claims
|
|
@@ -891,7 +877,7 @@ class WikidataThing:
|
|
|
891
877
|
val = {"tabular": data_value["value"]}
|
|
892
878
|
elif data_type == "entity-schema":
|
|
893
879
|
val = {"id": data_value["value"]["id"]}
|
|
894
|
-
elif data_type
|
|
880
|
+
elif data_type in ["wikibase-form", "musical-notation"]:
|
|
895
881
|
continue
|
|
896
882
|
else:
|
|
897
883
|
raise WikiDataAPIException(f"Data type: {data_type} not supported.")
|
|
@@ -988,6 +974,7 @@ class WikidataThing:
|
|
|
988
974
|
self.__qid = state[QID_TAG]
|
|
989
975
|
self.__revision = state.get(REVISION_TAG)
|
|
990
976
|
self.__modified = parse_date(state[MODIFIED_TAG]) if MODIFIED_TAG in state else None
|
|
977
|
+
self.__sync_time = parse_date(state[SYNC_TIME_TAG]) if SYNC_TIME_TAG in state else datetime.now()
|
|
991
978
|
self.__label = labels
|
|
992
979
|
self.__aliases = aliases
|
|
993
980
|
self.__description = descriptions
|
|
@@ -1001,324 +988,3 @@ class WikidataThing:
|
|
|
1001
988
|
self.__sitelinks = {}
|
|
1002
989
|
for wiki_source, site_link in state[SITELINKS_TAG].items():
|
|
1003
990
|
self.__sitelinks[wiki_source] = SiteLinks.create_from_dict(site_link)
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
class WikiDataAPIClient(ABC):
|
|
1007
|
-
"""
|
|
1008
|
-
WikiDataAPIClient
|
|
1009
|
-
-----------------
|
|
1010
|
-
Utility class for the WikiData.
|
|
1011
|
-
|
|
1012
|
-
"""
|
|
1013
|
-
|
|
1014
|
-
def __init__(self):
|
|
1015
|
-
pass
|
|
1016
|
-
|
|
1017
|
-
@staticmethod
|
|
1018
|
-
def sparql_query(query_string: str, wikidata_sparql_url: str = WIKIDATA_SPARQL_URL, max_retries: int = 3) -> dict:
|
|
1019
|
-
"""Send a SPARQL query and return the JSON formatted result.
|
|
1020
|
-
|
|
1021
|
-
Parameters
|
|
1022
|
-
-----------
|
|
1023
|
-
query_string: str
|
|
1024
|
-
SPARQL query string
|
|
1025
|
-
wikidata_sparql_url: str
|
|
1026
|
-
Wikidata SPARQL endpoint to use
|
|
1027
|
-
max_retries: int
|
|
1028
|
-
Maximum number of retries
|
|
1029
|
-
"""
|
|
1030
|
-
# Define the retry policy
|
|
1031
|
-
retry_policy: Retry = Retry(
|
|
1032
|
-
total=max_retries, # maximum number of retries
|
|
1033
|
-
backoff_factor=1, # factor by which to multiply the delay between retries
|
|
1034
|
-
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
|
|
1035
|
-
respect_retry_after_header=True, # respect the Retry-After header
|
|
1036
|
-
)
|
|
1037
|
-
|
|
1038
|
-
# Create a session and mount the retry adapter
|
|
1039
|
-
with requests.Session() as session:
|
|
1040
|
-
retry_adapter = HTTPAdapter(max_retries=retry_policy)
|
|
1041
|
-
session.mount("https://", retry_adapter)
|
|
1042
|
-
|
|
1043
|
-
# Make a request using the session
|
|
1044
|
-
response: Response = session.get(
|
|
1045
|
-
wikidata_sparql_url, params={"query": query_string, "format": "json"}, timeout=10000
|
|
1046
|
-
)
|
|
1047
|
-
if response.ok:
|
|
1048
|
-
return response.json()
|
|
1049
|
-
|
|
1050
|
-
raise WikiDataAPIException(
|
|
1051
|
-
f"Failed to query entities. " f"Response code:={response.status_code}, Exception:= {response.content}."
|
|
1052
|
-
)
|
|
1053
|
-
|
|
1054
|
-
@staticmethod
|
|
1055
|
-
def superclasses(qid: str) -> Dict[str, WikidataClass]:
|
|
1056
|
-
"""
|
|
1057
|
-
Returns the Wikidata class with all its superclasses for the given QID.
|
|
1058
|
-
|
|
1059
|
-
Parameters
|
|
1060
|
-
----------
|
|
1061
|
-
qid: str
|
|
1062
|
-
Wikidata QID (e.g., 'Q146' for house cat).
|
|
1063
|
-
|
|
1064
|
-
Returns
|
|
1065
|
-
-------
|
|
1066
|
-
classes: Dict[str, WikidataClass]
|
|
1067
|
-
A dictionary of WikidataClass objects, where the keys are QIDs and the values are the corresponding
|
|
1068
|
-
"""
|
|
1069
|
-
# Fetch superclasses
|
|
1070
|
-
query = f"""
|
|
1071
|
-
SELECT DISTINCT ?class ?classLabel ?superclass ?superclassLabel
|
|
1072
|
-
WHERE
|
|
1073
|
-
{{
|
|
1074
|
-
wd:{qid} wdt:P279* ?class.
|
|
1075
|
-
?class wdt:P279 ?superclass.
|
|
1076
|
-
SERVICE wikibase:label {{bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
|
|
1077
|
-
}}
|
|
1078
|
-
"""
|
|
1079
|
-
try:
|
|
1080
|
-
reply: Dict[str, Any] = WikiDataAPIClient.sparql_query(query)
|
|
1081
|
-
wikidata_classes: Dict[str, WikidataClass] = {}
|
|
1082
|
-
cycle_detector: Set[Tuple[str, str]] = set()
|
|
1083
|
-
adjacency_list: Dict[str, Set[str]] = {}
|
|
1084
|
-
|
|
1085
|
-
if "results" in reply:
|
|
1086
|
-
for b in reply["results"]["bindings"]:
|
|
1087
|
-
superclass_qid = b["superclass"]["value"].rsplit("/", 1)[-1]
|
|
1088
|
-
class_qid = b["class"]["value"].rsplit("/", 1)[-1]
|
|
1089
|
-
superclass_label = b["superclassLabel"]["value"]
|
|
1090
|
-
class_label = b["classLabel"]["value"]
|
|
1091
|
-
|
|
1092
|
-
wikidata_classes.setdefault(class_qid, WikidataClass(class_qid, class_label))
|
|
1093
|
-
wikidata_classes.setdefault(superclass_qid, WikidataClass(superclass_qid, superclass_label))
|
|
1094
|
-
|
|
1095
|
-
adjacency_list.setdefault(class_qid, set()).add(superclass_qid)
|
|
1096
|
-
except Exception as e:
|
|
1097
|
-
logger.exception(e)
|
|
1098
|
-
return {qid: WikidataClass(qid, f"Class {qid}")}
|
|
1099
|
-
queue = deque([qid])
|
|
1100
|
-
visited = set()
|
|
1101
|
-
|
|
1102
|
-
while queue:
|
|
1103
|
-
current_qid = queue.popleft()
|
|
1104
|
-
if current_qid in visited:
|
|
1105
|
-
continue
|
|
1106
|
-
visited.add(current_qid)
|
|
1107
|
-
|
|
1108
|
-
if current_qid in adjacency_list:
|
|
1109
|
-
for superclass_qid in adjacency_list[current_qid]:
|
|
1110
|
-
if (current_qid, superclass_qid) not in cycle_detector:
|
|
1111
|
-
wikidata_classes[current_qid].superclasses.append(wikidata_classes[superclass_qid])
|
|
1112
|
-
queue.append(superclass_qid)
|
|
1113
|
-
cycle_detector.add((current_qid, superclass_qid))
|
|
1114
|
-
|
|
1115
|
-
return wikidata_classes
|
|
1116
|
-
|
|
1117
|
-
@staticmethod
|
|
1118
|
-
def subclasses(qid: str) -> Dict[str, WikidataClass]:
|
|
1119
|
-
"""
|
|
1120
|
-
Returns the Wikidata class with all its subclasses for the given QID.
|
|
1121
|
-
|
|
1122
|
-
Parameters
|
|
1123
|
-
----------
|
|
1124
|
-
qid: str
|
|
1125
|
-
Wikidata QID (e.g., 'Q146' for house cat).
|
|
1126
|
-
|
|
1127
|
-
Returns
|
|
1128
|
-
-------
|
|
1129
|
-
classes: Dict[str, WikidataClass]
|
|
1130
|
-
A dictionary of WikidataClass objects, where the keys are QIDs and the values are the corresponding
|
|
1131
|
-
classes with their subclasses populated.
|
|
1132
|
-
"""
|
|
1133
|
-
# Fetch subclasses
|
|
1134
|
-
query = f"""
|
|
1135
|
-
SELECT DISTINCT ?class ?classLabel ?subclass ?subclassLabel
|
|
1136
|
-
WHERE
|
|
1137
|
-
{{
|
|
1138
|
-
?subclass wdt:P279 wd:{qid}.
|
|
1139
|
-
?subclass wdt:P279 ?class.
|
|
1140
|
-
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
|
|
1141
|
-
}}
|
|
1142
|
-
LIMIT 1000
|
|
1143
|
-
"""
|
|
1144
|
-
try:
|
|
1145
|
-
reply: Dict[str, Any] = WikiDataAPIClient.sparql_query(query)
|
|
1146
|
-
wikidata_classes: Dict[str, WikidataClass] = {}
|
|
1147
|
-
cycle_detector: Set[Tuple[str, str]] = set()
|
|
1148
|
-
adjacency_list: Dict[str, Set[str]] = {}
|
|
1149
|
-
|
|
1150
|
-
if "results" in reply:
|
|
1151
|
-
for b in reply["results"]["bindings"]:
|
|
1152
|
-
subclass_qid = b["subclass"]["value"].rsplit("/", 1)[-1]
|
|
1153
|
-
class_qid = b["class"]["value"].rsplit("/", 1)[-1]
|
|
1154
|
-
subclass_label = b["subclassLabel"]["value"]
|
|
1155
|
-
class_label = b["classLabel"]["value"]
|
|
1156
|
-
|
|
1157
|
-
wikidata_classes.setdefault(class_qid, WikidataClass(class_qid, class_label))
|
|
1158
|
-
wikidata_classes.setdefault(subclass_qid, WikidataClass(subclass_qid, subclass_label))
|
|
1159
|
-
|
|
1160
|
-
# subclass -> class relationship (reverse of superclass logic)
|
|
1161
|
-
adjacency_list.setdefault(class_qid, set()).add(subclass_qid)
|
|
1162
|
-
except Exception as e:
|
|
1163
|
-
logger.exception(e)
|
|
1164
|
-
return {qid: WikidataClass(qid, f"Class {qid}")}
|
|
1165
|
-
|
|
1166
|
-
queue = deque([qid])
|
|
1167
|
-
visited = set()
|
|
1168
|
-
|
|
1169
|
-
while queue:
|
|
1170
|
-
current_qid = queue.popleft()
|
|
1171
|
-
if current_qid in visited:
|
|
1172
|
-
continue
|
|
1173
|
-
visited.add(current_qid)
|
|
1174
|
-
|
|
1175
|
-
# Ensure the starting QID is in the dictionary
|
|
1176
|
-
if current_qid not in wikidata_classes:
|
|
1177
|
-
# If not present, we might need to fetch its label separately
|
|
1178
|
-
wikidata_classes[current_qid] = WikidataClass(current_qid, f"Class {current_qid}")
|
|
1179
|
-
|
|
1180
|
-
if current_qid in adjacency_list:
|
|
1181
|
-
for subclass_qid in adjacency_list[current_qid]:
|
|
1182
|
-
if (current_qid, subclass_qid) not in cycle_detector:
|
|
1183
|
-
wikidata_classes[current_qid].subclasses.append(wikidata_classes[subclass_qid])
|
|
1184
|
-
queue.append(subclass_qid)
|
|
1185
|
-
cycle_detector.add((current_qid, subclass_qid))
|
|
1186
|
-
|
|
1187
|
-
return wikidata_classes
|
|
1188
|
-
|
|
1189
|
-
@staticmethod
|
|
1190
|
-
def search_term(
|
|
1191
|
-
search_term: str, language: LanguageCode, url: str = WIKIDATA_SEARCH_URL
|
|
1192
|
-
) -> List[WikidataSearchResult]:
|
|
1193
|
-
"""
|
|
1194
|
-
Search for a term in the WikiData.
|
|
1195
|
-
Parameters
|
|
1196
|
-
----------
|
|
1197
|
-
search_term: str
|
|
1198
|
-
The term to search for.
|
|
1199
|
-
language: str
|
|
1200
|
-
The language to search in.
|
|
1201
|
-
url: str
|
|
1202
|
-
The URL of the WikiData search API.
|
|
1203
|
-
|
|
1204
|
-
Returns
|
|
1205
|
-
-------
|
|
1206
|
-
search_results_dict: List[WikidataSearchResult]
|
|
1207
|
-
The search results.
|
|
1208
|
-
"""
|
|
1209
|
-
search_results_dict: List[WikidataSearchResult] = []
|
|
1210
|
-
# Define the retry policy
|
|
1211
|
-
retry_policy: Retry = Retry(
|
|
1212
|
-
total=3, # maximum number of retries
|
|
1213
|
-
backoff_factor=1, # factor by which to multiply the delay between retries
|
|
1214
|
-
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
|
|
1215
|
-
respect_retry_after_header=True, # respect the Retry-After header
|
|
1216
|
-
)
|
|
1217
|
-
|
|
1218
|
-
# Create a session and mount the retry adapter
|
|
1219
|
-
with requests.Session() as session:
|
|
1220
|
-
retry_adapter = HTTPAdapter(max_retries=retry_policy)
|
|
1221
|
-
session.mount("https://", retry_adapter)
|
|
1222
|
-
params: Dict[str, str] = {
|
|
1223
|
-
"action": "wbsearchentities",
|
|
1224
|
-
"format": "json",
|
|
1225
|
-
"language": language,
|
|
1226
|
-
"search": search_term,
|
|
1227
|
-
}
|
|
1228
|
-
# Make a request using the session
|
|
1229
|
-
response: Response = session.get(url, params=params, timeout=200000)
|
|
1230
|
-
|
|
1231
|
-
# Check the response status code
|
|
1232
|
-
if not response.ok:
|
|
1233
|
-
raise WikiDataAPIException(
|
|
1234
|
-
f"Search request failed with status code : {response.status_code}. " f"URL:= {url}"
|
|
1235
|
-
)
|
|
1236
|
-
search_result_dict_full: Dict[str, Any] = response.json()
|
|
1237
|
-
for search_result_dict in search_result_dict_full["search"]:
|
|
1238
|
-
search_results_dict.append(WikidataSearchResult.from_dict(search_result_dict))
|
|
1239
|
-
return search_results_dict
|
|
1240
|
-
|
|
1241
|
-
@staticmethod
|
|
1242
|
-
def __wikidata_task__(qid: str) -> WikidataThing:
|
|
1243
|
-
"""Retrieve a single Wikidata thing.
|
|
1244
|
-
|
|
1245
|
-
Parameters
|
|
1246
|
-
----------
|
|
1247
|
-
qid: str
|
|
1248
|
-
QID of the entity.
|
|
1249
|
-
|
|
1250
|
-
Returns
|
|
1251
|
-
-------
|
|
1252
|
-
instance: WikidataThing
|
|
1253
|
-
Single wikidata thing
|
|
1254
|
-
"""
|
|
1255
|
-
try:
|
|
1256
|
-
return WikidataThing.from_wikidata(__waiting_request__(qid))
|
|
1257
|
-
except Exception as e:
|
|
1258
|
-
logger.exception(e)
|
|
1259
|
-
raise WikiDataAPIException(e) from e
|
|
1260
|
-
|
|
1261
|
-
@staticmethod
|
|
1262
|
-
def __wikidata_multiple_task__(qids: List[str]) -> List[WikidataThing]:
|
|
1263
|
-
"""Retrieve multiple Wikidata things.
|
|
1264
|
-
|
|
1265
|
-
Parameters
|
|
1266
|
-
----------
|
|
1267
|
-
qids: List[str]
|
|
1268
|
-
QIDs of the entities.
|
|
1269
|
-
|
|
1270
|
-
Returns
|
|
1271
|
-
-------
|
|
1272
|
-
instances: List[WikidataThing]
|
|
1273
|
-
List of wikidata things
|
|
1274
|
-
"""
|
|
1275
|
-
try:
|
|
1276
|
-
return [WikidataThing.from_wikidata(e) for e in __waiting_multi_request__(qids)]
|
|
1277
|
-
except Exception as e:
|
|
1278
|
-
logger.exception(e)
|
|
1279
|
-
raise WikiDataAPIException(e) from e
|
|
1280
|
-
|
|
1281
|
-
@staticmethod
|
|
1282
|
-
def retrieve_entity(qid: str) -> WikidataThing:
|
|
1283
|
-
"""
|
|
1284
|
-
Retrieve a single Wikidata thing.
|
|
1285
|
-
|
|
1286
|
-
Parameters
|
|
1287
|
-
----------
|
|
1288
|
-
qid: str
|
|
1289
|
-
QID of the entity.
|
|
1290
|
-
|
|
1291
|
-
Returns
|
|
1292
|
-
-------
|
|
1293
|
-
instance: WikidataThing
|
|
1294
|
-
Single wikidata thing
|
|
1295
|
-
"""
|
|
1296
|
-
return WikiDataAPIClient.__wikidata_task__(qid)
|
|
1297
|
-
|
|
1298
|
-
@staticmethod
|
|
1299
|
-
def retrieve_entities(qids: Union[List[str], Set[str]]) -> List[WikidataThing]:
|
|
1300
|
-
"""
|
|
1301
|
-
Retrieve multiple Wikidata things.
|
|
1302
|
-
Parameters
|
|
1303
|
-
----------
|
|
1304
|
-
qids: List[str]
|
|
1305
|
-
QIDs of the entities.
|
|
1306
|
-
|
|
1307
|
-
Returns
|
|
1308
|
-
-------
|
|
1309
|
-
instances: List[WikidataThing]
|
|
1310
|
-
List of wikidata things.
|
|
1311
|
-
"""
|
|
1312
|
-
pulled: List[WikidataThing] = []
|
|
1313
|
-
if len(qids) == 0:
|
|
1314
|
-
return []
|
|
1315
|
-
jobs: List[List[str]] = list(chunks(list(qids), API_LIMIT))
|
|
1316
|
-
num_processes: int = min(len(jobs), multiprocessing.cpu_count())
|
|
1317
|
-
if num_processes > 1:
|
|
1318
|
-
with Pool(processes=num_processes) as pool:
|
|
1319
|
-
# Wikidata thing is not support in multiprocessing
|
|
1320
|
-
for lst in pool.imap_unordered(__waiting_multi_request__, jobs):
|
|
1321
|
-
pulled.extend([WikidataThing.from_wikidata(e) for e in lst])
|
|
1322
|
-
else:
|
|
1323
|
-
pulled = WikiDataAPIClient.__wikidata_multiple_task__(jobs[0])
|
|
1324
|
-
return pulled
|
knowledge/utils/graph.py
CHANGED
|
@@ -112,10 +112,10 @@ def things_session_iter(
|
|
|
112
112
|
force_refresh_timeout: int [default:= 360]
|
|
113
113
|
Force refresh timeout
|
|
114
114
|
|
|
115
|
-
|
|
115
|
+
Yields
|
|
116
116
|
-------
|
|
117
|
-
|
|
118
|
-
|
|
117
|
+
ThingObject
|
|
118
|
+
Next thing object
|
|
119
119
|
|
|
120
120
|
Raises
|
|
121
121
|
------
|
|
@@ -391,10 +391,10 @@ async def async_things_session_iter(
|
|
|
391
391
|
force_refresh_timeout: int [default:= 360]
|
|
392
392
|
Force refresh timeout
|
|
393
393
|
|
|
394
|
-
|
|
394
|
+
Yields
|
|
395
395
|
-------
|
|
396
|
-
|
|
397
|
-
|
|
396
|
+
ThingObject
|
|
397
|
+
Next thing object
|
|
398
398
|
"""
|
|
399
399
|
next_page_id: Optional[str] = None
|
|
400
400
|
if async_client.current_session is None:
|
{personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: personal_knowledge_library
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.1.1
|
|
4
4
|
Summary: Library to access Wacom's Personal Knowledge graph.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: semantic-knowledge,knowledge-graph
|
|
@@ -8,6 +8,12 @@ Author: Markus Weber
|
|
|
8
8
|
Author-email: markus.weber@wacom.com
|
|
9
9
|
Requires-Python: >=3.9,<4.0
|
|
10
10
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
11
17
|
Provides-Extra: dev
|
|
12
18
|
Requires-Dist: Faker (==18.9.0) ; extra == "dev"
|
|
13
19
|
Requires-Dist: OntoSpy (==2.1.1) ; extra == "dev"
|
{personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/RECORD
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
knowledge/__init__.py,sha256=
|
|
1
|
+
knowledge/__init__.py,sha256=UL4La2Ep9cXbMd4imaLqt6UCqBhjLjusWHDZXU8Pc8s,2680
|
|
2
2
|
knowledge/base/__init__.py,sha256=q0NJRQLhfZ8khE-uZCK0SVA38dzbaMomcgL5Olnjtio,895
|
|
3
3
|
knowledge/base/access.py,sha256=BSh-6QbeHOCu55XTxA-p3DwEyRzgtN8TSxtcn6UvmZo,4411
|
|
4
4
|
knowledge/base/entity.py,sha256=b-Ana_H_WI2-AT_n-V_HzUL6W9Ri16DcZFS3q4ziI94,8445
|
|
@@ -9,13 +9,14 @@ knowledge/base/tenant.py,sha256=f2Z_LjUcjIt5J2_Rse9aQyTzJ0sSyVvCzlm8PP3PqY8,6084
|
|
|
9
9
|
knowledge/nel/__init__.py,sha256=eTT88LV_KQ-Ku-a4RnTv_TUCNogl357ui4KT1pEKMuQ,330
|
|
10
10
|
knowledge/nel/base.py,sha256=bsUj9PwoZoJWW33RfoYmQHbFhArKH1kMOsDgU3yj0T8,15032
|
|
11
11
|
knowledge/nel/engine.py,sha256=qvQkfsdeYPWJ_5m8mmGFw1tvd699vOQ3IKoBIwALRWk,5347
|
|
12
|
-
knowledge/ontomapping/__init__.py,sha256=
|
|
13
|
-
knowledge/ontomapping/manager.py,sha256=
|
|
14
|
-
knowledge/public/__init__.py,sha256=
|
|
15
|
-
knowledge/public/cache.py,sha256=
|
|
16
|
-
knowledge/public/
|
|
17
|
-
knowledge/public/
|
|
18
|
-
knowledge/public/
|
|
12
|
+
knowledge/ontomapping/__init__.py,sha256=9E-rZpxQPfukjDz_0ymrRieYDpv_e3tvpqDfkWzPJy0,18846
|
|
13
|
+
knowledge/ontomapping/manager.py,sha256=pXBwRGSTcS731df5vewNv6oMgP18HzvnjZqiUgyFIhI,13361
|
|
14
|
+
knowledge/public/__init__.py,sha256=FrW5sqJGVcIfg5IVpt_g7qlWiIYNGA370jkE5mJDhoo,812
|
|
15
|
+
knowledge/public/cache.py,sha256=uKEuW9WN9xPmY-fwWPLSxmdEP6xg-XG3g8fKAmGZgBQ,14510
|
|
16
|
+
knowledge/public/client.py,sha256=KA8EwLHAjn1-ggT8keUMaKIJnbJDlFEp0j8V0RT_32I,15437
|
|
17
|
+
knowledge/public/helper.py,sha256=PDsInMHMHgP8rMvcFk5E47afdE3rC7V3BzPS-DdOsww,13238
|
|
18
|
+
knowledge/public/relations.py,sha256=DrL-rYuwLzaE2H2TOM2gG1TGqHbkcPL4EsRD54LQqLs,4182
|
|
19
|
+
knowledge/public/wikidata.py,sha256=LwMP2kq2mH5Oi9JhPvG8lN9pZMDlQvxgaZgQKQO3kaM,36683
|
|
19
20
|
knowledge/services/__init__.py,sha256=FQMTLg7RYglLwCtEO0sOrgj55YrzSNqJV-yj3aqttb8,3490
|
|
20
21
|
knowledge/services/asyncio/__init__.py,sha256=dq9yGTxg_hoQb8E1kEtY4AeB8zrdJfw3-XlPoYn2j5U,225
|
|
21
22
|
knowledge/services/asyncio/base.py,sha256=im6J1CWOp1N1tt6WdJ1uU0R-VFpvdYc6lZu8TyPrU5A,16222
|
|
@@ -33,10 +34,10 @@ knowledge/services/session.py,sha256=y8uTETRMDOBbC30UAlrrtWTN7Zgs2klqHsoMjLO2tq0
|
|
|
33
34
|
knowledge/services/tenant.py,sha256=4pyQCYlwvREfmRcXRn47irIsFMCia_c-LVvFysDypkI,11691
|
|
34
35
|
knowledge/services/users.py,sha256=uus66ijd1OAN-gJqP95zmhDL31xje3gHBfF4rMnMjM0,16436
|
|
35
36
|
knowledge/utils/__init__.py,sha256=rgLqdgZwjuCnqA4NEY9echbGFaJ5YUhqBEkyVv9R9CM,255
|
|
36
|
-
knowledge/utils/graph.py,sha256=
|
|
37
|
+
knowledge/utils/graph.py,sha256=kuPZEGhexVN9McoT8JK2ViIrfXTh-nFPv_8XPfG0wlA,12318
|
|
37
38
|
knowledge/utils/wikidata.py,sha256=vRH-4AMR-3xywyvmDqbjI4KSw4tF4TEYqUGCJmUMqK8,5512
|
|
38
39
|
knowledge/utils/wikipedia.py,sha256=rBuFqYVM58JCj5ISLxuhYVVl2gOIlPLWx7_CghyQgvE,5052
|
|
39
|
-
personal_knowledge_library-3.
|
|
40
|
-
personal_knowledge_library-3.
|
|
41
|
-
personal_knowledge_library-3.
|
|
42
|
-
personal_knowledge_library-3.
|
|
40
|
+
personal_knowledge_library-3.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
41
|
+
personal_knowledge_library-3.1.1.dist-info/METADATA,sha256=kJZ15MOIszkwgs8hv6YMnKXLc0RKNyFfYz9X4r1l6f0,57087
|
|
42
|
+
personal_knowledge_library-3.1.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
43
|
+
personal_knowledge_library-3.1.1.dist-info/RECORD,,
|
{personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/LICENSE
RENAMED
|
File without changes
|
{personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/WHEEL
RENAMED
|
File without changes
|