personal_knowledge_library 3.0.0__py3-none-any.whl → 3.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of personal_knowledge_library might be problematic. Click here for more details.
- knowledge/__init__.py +1 -1
- knowledge/ontomapping/__init__.py +33 -115
- knowledge/ontomapping/manager.py +24 -25
- knowledge/public/__init__.py +8 -7
- knowledge/public/cache.py +413 -86
- knowledge/public/client.py +428 -0
- knowledge/public/helper.py +39 -11
- knowledge/public/relations.py +2 -1
- knowledge/public/wikidata.py +47 -381
- knowledge/utils/graph.py +6 -6
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/METADATA +7 -1
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/RECORD +14 -13
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/LICENSE +0 -0
- {personal_knowledge_library-3.0.0.dist-info → personal_knowledge_library-3.1.1.dist-info}/WHEEL +0 -0
knowledge/__init__.py
CHANGED
|
@@ -10,7 +10,9 @@ import loguru
|
|
|
10
10
|
from rdflib import Graph, RDFS, URIRef
|
|
11
11
|
|
|
12
12
|
from knowledge.base.ontology import OntologyClassReference, OntologyPropertyReference, DataPropertyType
|
|
13
|
-
from knowledge.public.
|
|
13
|
+
from knowledge.public.cache import WikidataCache
|
|
14
|
+
from knowledge.public.wikidata import WikidataClass
|
|
15
|
+
from knowledge.public.client import WikiDataAPIClient
|
|
14
16
|
|
|
15
17
|
# Classes
|
|
16
18
|
TOPIC_CLASS: str = "wacom:core#Topic"
|
|
@@ -25,11 +27,8 @@ CONTEXT_NAME: str = "core"
|
|
|
25
27
|
CWD: Path = Path(__file__).parent
|
|
26
28
|
ontology_graph: Graph = Graph()
|
|
27
29
|
logger = loguru.logger
|
|
28
|
-
#
|
|
29
|
-
|
|
30
|
-
subclasses_cache: Optional[Dict[str, WikidataClass]] = None
|
|
31
|
-
superclasses_path: Optional[Path] = None
|
|
32
|
-
subclasses_path: Optional[Path] = None
|
|
30
|
+
# Cache
|
|
31
|
+
wikidata_cache: WikidataCache() = WikidataCache()
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
def flatten(hierarchy: WikidataClass) -> Set[str]:
|
|
@@ -45,15 +44,24 @@ def flatten(hierarchy: WikidataClass) -> Set[str]:
|
|
|
45
44
|
-------
|
|
46
45
|
hierarchy: Set[str]
|
|
47
46
|
Hierarchy
|
|
48
|
-
|
|
49
47
|
"""
|
|
50
|
-
hierarchy_set: Set[str] =
|
|
48
|
+
hierarchy_set: Set[str] = set()
|
|
51
49
|
jobs: List[WikidataClass] = [hierarchy]
|
|
52
|
-
|
|
53
|
-
|
|
50
|
+
|
|
51
|
+
while jobs:
|
|
52
|
+
job = jobs.pop()
|
|
53
|
+
|
|
54
|
+
# Skip if already visited
|
|
55
|
+
if job.qid in hierarchy_set:
|
|
56
|
+
continue
|
|
57
|
+
|
|
54
58
|
hierarchy_set.add(job.qid)
|
|
59
|
+
|
|
60
|
+
# Add only unvisited subclasses
|
|
55
61
|
for c in job.subclasses:
|
|
56
|
-
|
|
62
|
+
if c.qid not in hierarchy_set:
|
|
63
|
+
jobs.append(c)
|
|
64
|
+
|
|
57
65
|
return hierarchy_set
|
|
58
66
|
|
|
59
67
|
|
|
@@ -334,7 +342,7 @@ class MappingConfiguration:
|
|
|
334
342
|
properties.append(prop_conf)
|
|
335
343
|
return properties
|
|
336
344
|
|
|
337
|
-
def add_class(self, class_configuration: ClassConfiguration
|
|
345
|
+
def add_class(self, class_configuration: ClassConfiguration):
|
|
338
346
|
"""
|
|
339
347
|
Adds a class configuration.
|
|
340
348
|
|
|
@@ -342,30 +350,30 @@ class MappingConfiguration:
|
|
|
342
350
|
----------
|
|
343
351
|
class_configuration: ClassConfiguration
|
|
344
352
|
The class configuration
|
|
345
|
-
subclasses: Dict[str, List[str]]
|
|
346
|
-
The subclasses
|
|
347
353
|
"""
|
|
348
354
|
self.__classes.append(class_configuration)
|
|
349
355
|
class_idx: int = len(self.__classes) - 1
|
|
350
356
|
number_of_classes: int = len(class_configuration.wikidata_classes)
|
|
351
357
|
if number_of_classes > 0:
|
|
352
|
-
logger.
|
|
353
|
-
for
|
|
354
|
-
if c
|
|
355
|
-
for subclass in subclasses
|
|
358
|
+
logger.debug(f"Adding {number_of_classes} classes for {class_configuration.concept_type.iri}")
|
|
359
|
+
for _, c in enumerate(class_configuration.wikidata_classes):
|
|
360
|
+
if wikidata_cache.subclass_in_cache(c):
|
|
361
|
+
for subclass in wikidata_cache.get_subclass(c).subclasses:
|
|
356
362
|
if subclass in self.__index:
|
|
357
363
|
logger.warning(f"Class {subclass} already exists in the index.")
|
|
358
364
|
class_config: ClassConfiguration = self.__classes[self.__index[subclass]]
|
|
359
|
-
logger.warning(
|
|
360
|
-
|
|
365
|
+
logger.warning(
|
|
366
|
+
f"Class {class_config.concept_type} "
|
|
367
|
+
f"is conflicting with {class_configuration.concept_type}."
|
|
368
|
+
)
|
|
361
369
|
self.__index[subclass] = class_idx
|
|
362
370
|
self.__direct_index[c] = class_idx
|
|
363
371
|
else:
|
|
364
372
|
w_classes: Dict[str, WikidataClass] = WikiDataAPIClient.subclasses(c)
|
|
365
373
|
for subclass in w_classes.values():
|
|
374
|
+
wikidata_cache.cache_subclass(subclass)
|
|
366
375
|
for cls in flatten(subclass):
|
|
367
376
|
self.__index[cls] = class_idx
|
|
368
|
-
|
|
369
377
|
for c in class_configuration.dbpedia_classes:
|
|
370
378
|
self.__index[c] = len(self.__classes) - 1
|
|
371
379
|
|
|
@@ -480,15 +488,13 @@ class MappingConfiguration:
|
|
|
480
488
|
mapping_configuration: Optional[MappingConfiguration] = None
|
|
481
489
|
|
|
482
490
|
|
|
483
|
-
def build_configuration(mapping: Dict[str, Any]
|
|
491
|
+
def build_configuration(mapping: Dict[str, Any]) -> MappingConfiguration:
|
|
484
492
|
"""
|
|
485
493
|
Builds the configuration from the mapping file.
|
|
486
494
|
Parameters
|
|
487
495
|
----------
|
|
488
496
|
mapping: Dict[str, Any]
|
|
489
497
|
The mapping file
|
|
490
|
-
subclasses: Dict[str, List[str]]
|
|
491
|
-
The subclasses
|
|
492
498
|
|
|
493
499
|
Returns
|
|
494
500
|
-------
|
|
@@ -502,7 +508,7 @@ def build_configuration(mapping: Dict[str, Any], subclasses: Dict[str, List[str]
|
|
|
502
508
|
class_config: ClassConfiguration = ClassConfiguration(c)
|
|
503
509
|
class_config.dbpedia_classes = c_conf[DBPEDIA_TYPES]
|
|
504
510
|
class_config.wikidata_classes = c_conf[WIKIDATA_TYPES]
|
|
505
|
-
conf.add_class(class_config
|
|
511
|
+
conf.add_class(class_config)
|
|
506
512
|
dataproperty_count: int = len(mapping["data_properties"])
|
|
507
513
|
logger.debug(f"Adding {dataproperty_count} data properties to the mapping configuration")
|
|
508
514
|
for p, p_conf in mapping["data_properties"].items():
|
|
@@ -517,7 +523,6 @@ def build_configuration(mapping: Dict[str, Any], subclasses: Dict[str, List[str]
|
|
|
517
523
|
for do in p_conf["domains"]:
|
|
518
524
|
property_config.domains.append(do)
|
|
519
525
|
property_config.domains.extend(subclasses_of(do))
|
|
520
|
-
|
|
521
526
|
conf.add_property(property_config)
|
|
522
527
|
object_property_count: int = len(mapping["object_properties"])
|
|
523
528
|
logger.debug(f"Adding {object_property_count} object properties to the mapping configuration")
|
|
@@ -538,20 +543,6 @@ def build_configuration(mapping: Dict[str, Any], subclasses: Dict[str, List[str]
|
|
|
538
543
|
conf.add_property(property_config)
|
|
539
544
|
return conf
|
|
540
545
|
|
|
541
|
-
|
|
542
|
-
def update_superclass_cache(path: Path):
|
|
543
|
-
"""
|
|
544
|
-
Updates the taxonomy cache.
|
|
545
|
-
|
|
546
|
-
Parameters
|
|
547
|
-
----------
|
|
548
|
-
path: Path
|
|
549
|
-
The path to the cache file.
|
|
550
|
-
"""
|
|
551
|
-
with open(path, "w", encoding="uft-8") as fp_taxonomy_write:
|
|
552
|
-
fp_taxonomy_write.write(json.dumps(superclasses_cache, indent=2, cls=WikidataClassEncoder))
|
|
553
|
-
|
|
554
|
-
|
|
555
546
|
def register_ontology(rdf_str: str):
|
|
556
547
|
"""
|
|
557
548
|
Registers the ontology.
|
|
@@ -562,41 +553,6 @@ def register_ontology(rdf_str: str):
|
|
|
562
553
|
"""
|
|
563
554
|
ontology_graph.parse(data=rdf_str, format="xml")
|
|
564
555
|
|
|
565
|
-
|
|
566
|
-
def superclass_path_from(configuration_path: Path) -> Path:
|
|
567
|
-
"""
|
|
568
|
-
Returns the path to the superclass cache file.
|
|
569
|
-
|
|
570
|
-
Parameters
|
|
571
|
-
----------
|
|
572
|
-
configuration_path: Path
|
|
573
|
-
The path to the configuration file.
|
|
574
|
-
|
|
575
|
-
Returns
|
|
576
|
-
-------
|
|
577
|
-
path: Path
|
|
578
|
-
The path to the superclass cache file.
|
|
579
|
-
"""
|
|
580
|
-
return configuration_path.parent / "superclasses.json"
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
def subclass_path_from(path: Path):
|
|
584
|
-
"""
|
|
585
|
-
Returns the path to the subclass cache file.
|
|
586
|
-
|
|
587
|
-
Parameters
|
|
588
|
-
----------
|
|
589
|
-
path: Path
|
|
590
|
-
The path to the configuration file.
|
|
591
|
-
|
|
592
|
-
Returns
|
|
593
|
-
-------
|
|
594
|
-
subclass_path: Path
|
|
595
|
-
The path to the subclass cache file.
|
|
596
|
-
"""
|
|
597
|
-
return path.parent / "subclasses.json"
|
|
598
|
-
|
|
599
|
-
|
|
600
556
|
def load_configuration(configuration: Path):
|
|
601
557
|
"""
|
|
602
558
|
Loads the configuration.
|
|
@@ -606,17 +562,11 @@ def load_configuration(configuration: Path):
|
|
|
606
562
|
ValueError
|
|
607
563
|
If the configuration file is not found.
|
|
608
564
|
"""
|
|
609
|
-
global mapping_configuration
|
|
610
|
-
subclasses_path = subclass_path_from(configuration)
|
|
611
|
-
superclasses_path = superclass_path_from(configuration)
|
|
565
|
+
global mapping_configuration
|
|
612
566
|
if configuration.exists():
|
|
613
567
|
with configuration.open("r", encoding="utf-8") as fp_configuration:
|
|
614
568
|
configuration = json.loads(fp_configuration.read())
|
|
615
|
-
|
|
616
|
-
if subclasses_path.exists():
|
|
617
|
-
with subclasses_path.open("r", encoding="utf-8") as fp_sub:
|
|
618
|
-
subclasses = json.loads(fp_sub.read())
|
|
619
|
-
mapping_configuration = build_configuration(configuration, subclasses)
|
|
569
|
+
mapping_configuration = build_configuration(configuration)
|
|
620
570
|
else:
|
|
621
571
|
raise ValueError(f"Configuration file {configuration} not found.")
|
|
622
572
|
|
|
@@ -633,35 +583,3 @@ def get_mapping_configuration() -> MappingConfiguration:
|
|
|
633
583
|
if mapping_configuration is None:
|
|
634
584
|
raise ValueError("Please load configuration")
|
|
635
585
|
return mapping_configuration
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
def save_superclasses_cache(path: Path):
|
|
639
|
-
"""
|
|
640
|
-
Saves the taxonomy cache.
|
|
641
|
-
|
|
642
|
-
Parameters
|
|
643
|
-
----------
|
|
644
|
-
path: Path
|
|
645
|
-
The path to the cache file.
|
|
646
|
-
"""
|
|
647
|
-
global superclasses_cache
|
|
648
|
-
if superclasses_cache is None:
|
|
649
|
-
return
|
|
650
|
-
with open(path, "w", encoding="utf-8") as fp_taxonomy_write:
|
|
651
|
-
fp_taxonomy_write.write(json.dumps(superclasses_cache, indent=2, cls=WikidataClassEncoder))
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
def save_subclasses_cache(path: Path):
|
|
655
|
-
"""
|
|
656
|
-
Saves the taxonomy cache.
|
|
657
|
-
|
|
658
|
-
Parameters
|
|
659
|
-
----------
|
|
660
|
-
path: Path
|
|
661
|
-
The path to the cache file.
|
|
662
|
-
"""
|
|
663
|
-
global subclasses_cache
|
|
664
|
-
if subclasses_cache is None:
|
|
665
|
-
return
|
|
666
|
-
with open(path, "w", encoding="utf-8") as fp_taxonomy_write:
|
|
667
|
-
fp_taxonomy_write.write(json.dumps(subclasses_cache, indent=2, cls=WikidataClassEncoder))
|
knowledge/ontomapping/manager.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# Copyright © 2023-present Wacom. All rights reserved.
|
|
3
|
-
import logging
|
|
4
3
|
import time
|
|
5
|
-
from datetime import datetime
|
|
4
|
+
from datetime import datetime, timezone
|
|
6
5
|
from typing import Optional, Any, List, Dict, Tuple, Set
|
|
7
6
|
|
|
7
|
+
import loguru
|
|
8
|
+
|
|
8
9
|
from knowledge.base.entity import Label, LanguageCode, Description
|
|
10
|
+
from knowledge.base.language import LOCALE_LANGUAGE_MAPPING, LocaleCode, LANGUAGE_LOCALE_MAPPING, EN_US
|
|
9
11
|
from knowledge.base.ontology import (
|
|
10
12
|
ThingObject,
|
|
11
13
|
DataProperty,
|
|
@@ -18,18 +20,19 @@ from knowledge.base.ontology import (
|
|
|
18
20
|
from knowledge.ontomapping import (
|
|
19
21
|
ClassConfiguration,
|
|
20
22
|
TOPIC_CLASS,
|
|
21
|
-
superclasses_cache,
|
|
22
23
|
PropertyConfiguration,
|
|
23
24
|
PropertyType,
|
|
24
25
|
get_mapping_configuration,
|
|
25
|
-
save_superclasses_cache,
|
|
26
|
-
superclasses_path,
|
|
27
26
|
)
|
|
28
|
-
from knowledge.public.cache import
|
|
29
|
-
from knowledge.public.
|
|
30
|
-
from knowledge.
|
|
27
|
+
from knowledge.public.cache import WikidataCache
|
|
28
|
+
from knowledge.public.client import WikiDataAPIClient
|
|
29
|
+
from knowledge.public.wikidata import WikidataThing, WikidataClass, WikidataProperty
|
|
31
30
|
from knowledge.utils.wikipedia import get_wikipedia_summary
|
|
32
31
|
|
|
32
|
+
logger = loguru.logger
|
|
33
|
+
# Initialize the cache
|
|
34
|
+
cache: WikidataCache = WikidataCache()
|
|
35
|
+
|
|
33
36
|
|
|
34
37
|
def flatten(hierarchy: WikidataClass, use_names: bool = False) -> List[str]:
|
|
35
38
|
"""
|
|
@@ -78,16 +81,10 @@ def wikidata_taxonomy(qid: str) -> Optional[WikidataClass]:
|
|
|
78
81
|
hierarchy: WikidataClass
|
|
79
82
|
Hierarchy.
|
|
80
83
|
"""
|
|
81
|
-
if superclasses_cache and qid in superclasses_cache:
|
|
82
|
-
taxonomy: WikidataClass = superclasses_cache[qid]
|
|
83
|
-
return taxonomy
|
|
84
84
|
hierarchy: Dict[str, WikidataClass] = WikiDataAPIClient.superclasses(qid)
|
|
85
85
|
if qid not in hierarchy:
|
|
86
|
-
|
|
86
|
+
logger.warning(f"Taxonomy for {qid} not found.")
|
|
87
87
|
return None
|
|
88
|
-
if hierarchy:
|
|
89
|
-
superclasses_cache.update(hierarchy)
|
|
90
|
-
save_superclasses_cache(superclasses_path)
|
|
91
88
|
return hierarchy.get(qid)
|
|
92
89
|
|
|
93
90
|
|
|
@@ -116,10 +113,13 @@ def convert_dict(structure: Dict[str, Any], locale: str) -> Optional[str]:
|
|
|
116
113
|
if structure_type == "quantity" and isinstance(value, dict):
|
|
117
114
|
return value["amount"]
|
|
118
115
|
if structure_type == "wikibase-item" and isinstance(value, dict):
|
|
119
|
-
|
|
116
|
+
qid: str = value["id"]
|
|
117
|
+
if cache.qid_in_cache(qid):
|
|
118
|
+
wikidata_data: WikidataThing = cache.get_wikidata_object(qid)
|
|
119
|
+
else:
|
|
120
|
+
wikidata_data: WikidataThing = WikiDataAPIClient.retrieve_entity(qid)
|
|
120
121
|
if locale in wikidata_data.label:
|
|
121
122
|
return wikidata_data.label[locale].content
|
|
122
|
-
return None
|
|
123
123
|
if structure_type == "external-id":
|
|
124
124
|
return value
|
|
125
125
|
if structure_type == "string":
|
|
@@ -215,7 +215,7 @@ def wikidata_to_thing(
|
|
|
215
215
|
)
|
|
216
216
|
)
|
|
217
217
|
except Exception as e:
|
|
218
|
-
|
|
218
|
+
logger.error(f"Failed to get Wikipedia summary for {title} ({lang}): {e}")
|
|
219
219
|
if len(descriptions) == 0:
|
|
220
220
|
descriptions = list(wikidata_thing.description.values())
|
|
221
221
|
t3: float = time.perf_counter()
|
|
@@ -228,7 +228,8 @@ def wikidata_to_thing(
|
|
|
228
228
|
)
|
|
229
229
|
thing.add_data_property(
|
|
230
230
|
DataProperty(
|
|
231
|
-
content=datetime.
|
|
231
|
+
content=datetime.now(timezone.utc).isoformat(),
|
|
232
|
+
property_ref=OntologyPropertyReference.parse("wacom:core#lastUpdate"),
|
|
232
233
|
)
|
|
233
234
|
)
|
|
234
235
|
t4: float = time.perf_counter()
|
|
@@ -291,7 +292,6 @@ def wikidata_to_thing(
|
|
|
291
292
|
"source_concept": thing.concept_type,
|
|
292
293
|
"source_classes": class_types,
|
|
293
294
|
"property": prop_missing.pid,
|
|
294
|
-
"property_label": prop_missing.label,
|
|
295
295
|
"target_qid": target_thing.qid,
|
|
296
296
|
"target_classes": target_thing.ontology_types,
|
|
297
297
|
}
|
|
@@ -304,7 +304,6 @@ def wikidata_to_thing(
|
|
|
304
304
|
"source_concept": thing.concept_type,
|
|
305
305
|
"source_classes": class_types,
|
|
306
306
|
"property": prop_missing.pid,
|
|
307
|
-
"property_label": prop_missing.label,
|
|
308
307
|
"target_qid": target_thing.qid,
|
|
309
308
|
"target_classes": target_thing.ontology_types,
|
|
310
309
|
}
|
|
@@ -312,9 +311,9 @@ def wikidata_to_thing(
|
|
|
312
311
|
for p, lst in relation_props.items():
|
|
313
312
|
thing.add_relation(ObjectProperty(p, outgoing=lst))
|
|
314
313
|
t7: float = time.perf_counter()
|
|
315
|
-
|
|
316
|
-
f"Wikidata to Thing: {t2 - t1}
|
|
317
|
-
f"{t4 - t3}
|
|
318
|
-
f"properties, {t7 - t6}
|
|
314
|
+
logger.debug(
|
|
315
|
+
f"Wikidata to Thing: {(t2 - t1) * 1000.:.2f} ms for labels, {(t3 - t2) * 1000.:.2f} ms for descriptions, "
|
|
316
|
+
f"{(t4 - t3) * 1000.:.2f} ms for sources, {(t5 - t4) * 1000.:.2f} ms for class types, "
|
|
317
|
+
f"{(t6 - t5) * 1000.:.2f} ms for data properties, {(t7 - t6) * 1000.:.2f} ms for object properties"
|
|
319
318
|
)
|
|
320
319
|
return thing, import_warnings
|
knowledge/public/__init__.py
CHANGED
|
@@ -3,20 +3,21 @@
|
|
|
3
3
|
"""Mapping of Wikidata property ids to its string."""
|
|
4
4
|
import json
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Dict
|
|
6
|
+
from typing import Dict, List
|
|
7
7
|
|
|
8
8
|
# OntologyPropertyReference constants
|
|
9
9
|
INSTANCE_OF_PROPERTY: str = "P31"
|
|
10
10
|
IMAGE_PROPERTY: str = "P18"
|
|
11
11
|
|
|
12
12
|
# Mapping for property names
|
|
13
|
-
|
|
13
|
+
DEFAULT_TIMEOUT: int = 60
|
|
14
|
+
DEFAULT_TOKEN_REFRESH_TIME: int = 360
|
|
15
|
+
STATUS_FORCE_LIST: List[int] = [429, 500, 502, 503, 504]
|
|
16
|
+
DEFAULT_BACKOFF_FACTOR: float = 0.1
|
|
17
|
+
DEFAULT_MAX_RETRIES: int = 3
|
|
14
18
|
|
|
15
19
|
CWD: Path = Path(__file__).parent
|
|
16
|
-
|
|
17
|
-
if CONFIGURATION_FILE.exists():
|
|
18
|
-
with CONFIGURATION_FILE.open("r") as f:
|
|
19
|
-
PROPERTY_MAPPING = json.load(f)
|
|
20
|
+
|
|
20
21
|
|
|
21
22
|
from knowledge.public import wikidata
|
|
22
23
|
from knowledge.public import helper
|
|
@@ -24,4 +25,4 @@ from knowledge.public import relations
|
|
|
24
25
|
from knowledge.public import cache
|
|
25
26
|
|
|
26
27
|
|
|
27
|
-
__all__ = ["wikidata", "helper", "relations", "cache", "
|
|
28
|
+
__all__ = ["wikidata", "helper", "relations", "cache", "client", "INSTANCE_OF_PROPERTY", "IMAGE_PROPERTY"]
|