personal_knowledge_library 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of personal_knowledge_library might be problematic. Click here for more details.
- knowledge/__init__.py +1 -1
- knowledge/base/ontology.py +3 -1
- knowledge/ontomapping/__init__.py +2 -0
- knowledge/public/client.py +3 -2
- knowledge/utils/__init__.py +3 -3
- knowledge/utils/import_format.py +163 -0
- {personal_knowledge_library-3.1.1.dist-info → personal_knowledge_library-3.1.2.dist-info}/METADATA +1 -1
- {personal_knowledge_library-3.1.1.dist-info → personal_knowledge_library-3.1.2.dist-info}/RECORD +10 -9
- {personal_knowledge_library-3.1.1.dist-info → personal_knowledge_library-3.1.2.dist-info}/WHEEL +1 -1
- {personal_knowledge_library-3.1.1.dist-info → personal_knowledge_library-3.1.2.dist-info}/LICENSE +0 -0
knowledge/__init__.py
CHANGED
knowledge/base/ontology.py
CHANGED
|
@@ -2179,7 +2179,9 @@ class ThingObject(abc.ABC):
|
|
|
2179
2179
|
labels.extend([la.__dict__() for la in self.label])
|
|
2180
2180
|
labels.extend([la.__dict__() for la in self.alias])
|
|
2181
2181
|
dict_object: Dict[str, Any] = {
|
|
2182
|
-
SOURCE_REFERENCE_ID_TAG:
|
|
2182
|
+
SOURCE_REFERENCE_ID_TAG: (
|
|
2183
|
+
self.default_source_reference_id() if self.source_reference_id is not None else reference_id
|
|
2184
|
+
),
|
|
2183
2185
|
SOURCE_SYSTEM_TAG: self.source_system,
|
|
2184
2186
|
IMAGE_TAG: self.image,
|
|
2185
2187
|
LABELS_TAG: labels,
|
|
@@ -543,6 +543,7 @@ def build_configuration(mapping: Dict[str, Any]) -> MappingConfiguration:
|
|
|
543
543
|
conf.add_property(property_config)
|
|
544
544
|
return conf
|
|
545
545
|
|
|
546
|
+
|
|
546
547
|
def register_ontology(rdf_str: str):
|
|
547
548
|
"""
|
|
548
549
|
Registers the ontology.
|
|
@@ -553,6 +554,7 @@ def register_ontology(rdf_str: str):
|
|
|
553
554
|
"""
|
|
554
555
|
ontology_graph.parse(data=rdf_str, format="xml")
|
|
555
556
|
|
|
557
|
+
|
|
556
558
|
def load_configuration(configuration: Path):
|
|
557
559
|
"""
|
|
558
560
|
Loads the configuration.
|
knowledge/public/client.py
CHANGED
|
@@ -348,8 +348,9 @@ class WikiDataAPIClient:
|
|
|
348
348
|
return WikiDataAPIClient.__wikidata_task__(qid)
|
|
349
349
|
|
|
350
350
|
@staticmethod
|
|
351
|
-
def retrieve_entities(
|
|
352
|
-
|
|
351
|
+
def retrieve_entities(
|
|
352
|
+
qids: Union[List[str], Set[str]], progress: Optional[Callable[[int, int], None]] = None
|
|
353
|
+
) -> List[WikidataThing]:
|
|
353
354
|
"""
|
|
354
355
|
Retrieve multiple Wikidata things.
|
|
355
356
|
Parameters
|
knowledge/utils/__init__.py
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
# Copyright © 2024-present Wacom. All rights reserved.
|
|
3
3
|
""" "Utilities"""
|
|
4
4
|
|
|
5
|
-
__all__ = ["
|
|
5
|
+
__all__ = ["import_format", "graph", "wikidata", "wikipedia"]
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
from knowledge.utils import wikipedia
|
|
7
|
+
from knowledge.utils import import_format
|
|
9
8
|
from knowledge.utils import graph
|
|
10
9
|
from knowledge.utils import wikidata
|
|
10
|
+
from knowledge.utils import wikipedia
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2024-present Wacom. All rights reserved.
|
|
3
|
+
import gzip
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
from json import JSONDecodeError
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Dict, Any
|
|
10
|
+
|
|
11
|
+
import loguru
|
|
12
|
+
|
|
13
|
+
from knowledge.base.ontology import ThingObject, OntologyPropertyReference
|
|
14
|
+
|
|
15
|
+
logger = loguru.logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_http_url(url: str) -> bool:
|
|
19
|
+
"""Check if a string is an HTTP(S) URL.
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
url: str
|
|
23
|
+
The URL to check.
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
bool
|
|
28
|
+
True if the URL is HTTP(S), False otherwise.
|
|
29
|
+
"""
|
|
30
|
+
return bool(re.match(r"^(https?://)", url, re.IGNORECASE))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def is_local_url(url: str) -> bool:
|
|
34
|
+
"""Check if a string is a local file path or relative URL.
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
url: str
|
|
38
|
+
The URL to check.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
bool
|
|
43
|
+
True if the URL is a local file path or relative URL, False otherwise.
|
|
44
|
+
"""
|
|
45
|
+
return bool(re.match(r"^(file://|/|\.{1,2}/)", url, re.IGNORECASE))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def __import_format_to_thing__(line: str) -> ThingObject:
|
|
49
|
+
"""
|
|
50
|
+
Convert a line of JSON to a ThingObject.
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
line: str
|
|
54
|
+
The line of JSON to convert.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
entity: ThingObject
|
|
59
|
+
The ThingObject created from the JSON line.
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
JSONDecodeError
|
|
64
|
+
If the line is not valid JSON.
|
|
65
|
+
"""
|
|
66
|
+
thing_dict: Dict[str, Any] = json.loads(line)
|
|
67
|
+
entity: ThingObject = ThingObject.from_import_dict(thing_dict)
|
|
68
|
+
if entity.image:
|
|
69
|
+
if not is_local_url(entity.image) and not is_http_url(entity.image):
|
|
70
|
+
path: Path = Path(entity.image)
|
|
71
|
+
if not path.exists():
|
|
72
|
+
entity.image = path.absolute().as_uri()
|
|
73
|
+
else:
|
|
74
|
+
logger.warning(f"Image path {path} does not exist. Setting to None.")
|
|
75
|
+
entity.image = None
|
|
76
|
+
remove_props: List[OntologyPropertyReference] = []
|
|
77
|
+
# Remove empty properties
|
|
78
|
+
for obj_prop, value in entity.object_properties.items():
|
|
79
|
+
if len(value.incoming_relations) == 0 and len(value.outgoing_relations) == 0:
|
|
80
|
+
remove_props.append(obj_prop)
|
|
81
|
+
for prop in remove_props:
|
|
82
|
+
del entity.object_properties[prop]
|
|
83
|
+
return entity
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def load_import_format(file_path: Path) -> List[ThingObject]:
|
|
87
|
+
"""
|
|
88
|
+
Load the import format file.
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
file_path: Path
|
|
92
|
+
The path to the file.
|
|
93
|
+
|
|
94
|
+
Returns
|
|
95
|
+
-------
|
|
96
|
+
entity_list: List[ThingObject]
|
|
97
|
+
The list of entities.
|
|
98
|
+
|
|
99
|
+
Raises
|
|
100
|
+
------
|
|
101
|
+
FileNotFoundError
|
|
102
|
+
If the file does not exist or is not a file.
|
|
103
|
+
"""
|
|
104
|
+
if not file_path.exists():
|
|
105
|
+
logger.error(f"File {file_path} does not exist.")
|
|
106
|
+
raise FileNotFoundError(f"File {file_path} does not exist.")
|
|
107
|
+
if not file_path.is_file():
|
|
108
|
+
logger.error(f"Path {file_path} is not a file.")
|
|
109
|
+
raise FileNotFoundError(f"Path {file_path} is not a file.")
|
|
110
|
+
cached_entities: List[ThingObject] = []
|
|
111
|
+
if file_path.suffix == ".gz":
|
|
112
|
+
with gzip.open(file_path, "rb") as f_gz:
|
|
113
|
+
for line in f_gz.readlines():
|
|
114
|
+
line = line.decode("utf-8")
|
|
115
|
+
try:
|
|
116
|
+
cached_entities.append(__import_format_to_thing__(line))
|
|
117
|
+
except JSONDecodeError as e:
|
|
118
|
+
logging.error(f"Error decoding JSON: {e}. Line: {line}")
|
|
119
|
+
else:
|
|
120
|
+
with file_path.open(encoding="utf8") as f:
|
|
121
|
+
# Skip the first line
|
|
122
|
+
for line in f.readlines():
|
|
123
|
+
try:
|
|
124
|
+
cached_entities.append(__import_format_to_thing__(line))
|
|
125
|
+
except JSONDecodeError as e:
|
|
126
|
+
logging.error(f"Error decoding JSON: {e}. Line: {line}")
|
|
127
|
+
return cached_entities
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def save_import_format(file_path: Path, entities: List[ThingObject]) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Save the import format file.
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
file_path: Path
|
|
136
|
+
The path to the file.
|
|
137
|
+
entities: List[ThingObject]
|
|
138
|
+
The list of entities.
|
|
139
|
+
"""
|
|
140
|
+
# Create the directory if it does not exist
|
|
141
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
if file_path.suffix == ".gz":
|
|
143
|
+
with gzip.open(file_path, "wt", encoding="utf-8") as fp_thing:
|
|
144
|
+
for entity in entities:
|
|
145
|
+
fp_thing.write(f"{json.dumps(entity.__import_format_dict__(), ensure_ascii=False)}\n")
|
|
146
|
+
elif file_path.suffix == ".ndjson":
|
|
147
|
+
with file_path.open("w", encoding="utf-8") as fp_thing:
|
|
148
|
+
for entity in entities:
|
|
149
|
+
fp_thing.write(f"{json.dumps(entity.__import_format_dict__(), ensure_ascii=False)}\n")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def append_import_format(file_path: Path, entity: ThingObject) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Append to the import format file.
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
file_path: Path
|
|
158
|
+
The path to the file.
|
|
159
|
+
entity: ThingObject
|
|
160
|
+
The entity to append.
|
|
161
|
+
"""
|
|
162
|
+
with file_path.open("a", encoding="utf-8") as fp_thing:
|
|
163
|
+
fp_thing.write(f"{json.dumps(entity.__import_format_dict__(), ensure_ascii=False)}\n")
|
{personal_knowledge_library-3.1.1.dist-info → personal_knowledge_library-3.1.2.dist-info}/RECORD
RENAMED
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
knowledge/__init__.py,sha256=
|
|
1
|
+
knowledge/__init__.py,sha256=K8mEnEYsqCb86zEKbKdwpctRUteEBwRxVRYOpeJuB8U,2680
|
|
2
2
|
knowledge/base/__init__.py,sha256=q0NJRQLhfZ8khE-uZCK0SVA38dzbaMomcgL5Olnjtio,895
|
|
3
3
|
knowledge/base/access.py,sha256=BSh-6QbeHOCu55XTxA-p3DwEyRzgtN8TSxtcn6UvmZo,4411
|
|
4
4
|
knowledge/base/entity.py,sha256=b-Ana_H_WI2-AT_n-V_HzUL6W9Ri16DcZFS3q4ziI94,8445
|
|
5
5
|
knowledge/base/language.py,sha256=QHNafhiwg9UPaQS1svzsSz7T2L-VAKlQwd1n5w0HaI8,1500
|
|
6
|
-
knowledge/base/ontology.py,sha256
|
|
6
|
+
knowledge/base/ontology.py,sha256=LKOMcnUzROK4BD7avMAbvTnjp3N8nzBwt6lLr2qLqkE,95441
|
|
7
7
|
knowledge/base/search.py,sha256=J1PSVpTU2JKF9xSZLZZvAbJfFy1HiMPJRzPjHhR7IQM,14722
|
|
8
8
|
knowledge/base/tenant.py,sha256=f2Z_LjUcjIt5J2_Rse9aQyTzJ0sSyVvCzlm8PP3PqY8,6084
|
|
9
9
|
knowledge/nel/__init__.py,sha256=eTT88LV_KQ-Ku-a4RnTv_TUCNogl357ui4KT1pEKMuQ,330
|
|
10
10
|
knowledge/nel/base.py,sha256=bsUj9PwoZoJWW33RfoYmQHbFhArKH1kMOsDgU3yj0T8,15032
|
|
11
11
|
knowledge/nel/engine.py,sha256=qvQkfsdeYPWJ_5m8mmGFw1tvd699vOQ3IKoBIwALRWk,5347
|
|
12
|
-
knowledge/ontomapping/__init__.py,sha256=
|
|
12
|
+
knowledge/ontomapping/__init__.py,sha256=u_t6i5a6pYYnf43Q_S6sG7NRLeGuKmahIHW6TFGHqOk,18848
|
|
13
13
|
knowledge/ontomapping/manager.py,sha256=pXBwRGSTcS731df5vewNv6oMgP18HzvnjZqiUgyFIhI,13361
|
|
14
14
|
knowledge/public/__init__.py,sha256=FrW5sqJGVcIfg5IVpt_g7qlWiIYNGA370jkE5mJDhoo,812
|
|
15
15
|
knowledge/public/cache.py,sha256=uKEuW9WN9xPmY-fwWPLSxmdEP6xg-XG3g8fKAmGZgBQ,14510
|
|
16
|
-
knowledge/public/client.py,sha256=
|
|
16
|
+
knowledge/public/client.py,sha256=7Z-nYhYIni_jKDWJw18CvuUxuOCns74l8XjeEEJXl_Y,15437
|
|
17
17
|
knowledge/public/helper.py,sha256=PDsInMHMHgP8rMvcFk5E47afdE3rC7V3BzPS-DdOsww,13238
|
|
18
18
|
knowledge/public/relations.py,sha256=DrL-rYuwLzaE2H2TOM2gG1TGqHbkcPL4EsRD54LQqLs,4182
|
|
19
19
|
knowledge/public/wikidata.py,sha256=LwMP2kq2mH5Oi9JhPvG8lN9pZMDlQvxgaZgQKQO3kaM,36683
|
|
@@ -33,11 +33,12 @@ knowledge/services/search.py,sha256=atwvcqagA6yZkzKiad6Yjn3cKu7L5LpKiVJsxKKOY5g,
|
|
|
33
33
|
knowledge/services/session.py,sha256=y8uTETRMDOBbC30UAlrrtWTN7Zgs2klqHsoMjLO2tq0,14145
|
|
34
34
|
knowledge/services/tenant.py,sha256=4pyQCYlwvREfmRcXRn47irIsFMCia_c-LVvFysDypkI,11691
|
|
35
35
|
knowledge/services/users.py,sha256=uus66ijd1OAN-gJqP95zmhDL31xje3gHBfF4rMnMjM0,16436
|
|
36
|
-
knowledge/utils/__init__.py,sha256=
|
|
36
|
+
knowledge/utils/__init__.py,sha256=7unsBV5ge6eIyicKCIc_C4OroD-zkKRMVE0jXcH6EOw,313
|
|
37
37
|
knowledge/utils/graph.py,sha256=kuPZEGhexVN9McoT8JK2ViIrfXTh-nFPv_8XPfG0wlA,12318
|
|
38
|
+
knowledge/utils/import_format.py,sha256=Ho50tGlkNXoWDRFBke6fB_09PFeupTakrjdoHpNqDX4,5037
|
|
38
39
|
knowledge/utils/wikidata.py,sha256=vRH-4AMR-3xywyvmDqbjI4KSw4tF4TEYqUGCJmUMqK8,5512
|
|
39
40
|
knowledge/utils/wikipedia.py,sha256=rBuFqYVM58JCj5ISLxuhYVVl2gOIlPLWx7_CghyQgvE,5052
|
|
40
|
-
personal_knowledge_library-3.1.
|
|
41
|
-
personal_knowledge_library-3.1.
|
|
42
|
-
personal_knowledge_library-3.1.
|
|
43
|
-
personal_knowledge_library-3.1.
|
|
41
|
+
personal_knowledge_library-3.1.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
42
|
+
personal_knowledge_library-3.1.2.dist-info/METADATA,sha256=c3tQS96PrPj-ulfkQrgJdV49Zdj012MWYLqmTGHu_2g,57087
|
|
43
|
+
personal_knowledge_library-3.1.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
44
|
+
personal_knowledge_library-3.1.2.dist-info/RECORD,,
|
{personal_knowledge_library-3.1.1.dist-info → personal_knowledge_library-3.1.2.dist-info}/LICENSE
RENAMED
|
File without changes
|