personal_knowledge_library 3.1.0__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

knowledge/__init__.py CHANGED
@@ -17,7 +17,7 @@ __license__ = "Wacom"
17
17
  __maintainer__ = ["Markus Weber"]
18
18
  __email__ = "markus.weber@wacom.com"
19
19
  __status__ = "beta"
20
- __version__ = "3.1.0"
20
+ __version__ = "3.1.2"
21
21
 
22
22
  import loguru
23
23
 
@@ -2179,7 +2179,9 @@ class ThingObject(abc.ABC):
2179
2179
  labels.extend([la.__dict__() for la in self.label])
2180
2180
  labels.extend([la.__dict__() for la in self.alias])
2181
2181
  dict_object: Dict[str, Any] = {
2182
- SOURCE_REFERENCE_ID_TAG: self.source_reference_id if self.source_reference_id is None else reference_id,
2182
+ SOURCE_REFERENCE_ID_TAG: (
2183
+ self.default_source_reference_id() if self.source_reference_id is not None else reference_id
2184
+ ),
2183
2185
  SOURCE_SYSTEM_TAG: self.source_system,
2184
2186
  IMAGE_TAG: self.image,
2185
2187
  LABELS_TAG: labels,
@@ -543,6 +543,7 @@ def build_configuration(mapping: Dict[str, Any]) -> MappingConfiguration:
543
543
  conf.add_property(property_config)
544
544
  return conf
545
545
 
546
+
546
547
  def register_ontology(rdf_str: str):
547
548
  """
548
549
  Registers the ontology.
@@ -553,6 +554,7 @@ def register_ontology(rdf_str: str):
553
554
  """
554
555
  ontology_graph.parse(data=rdf_str, format="xml")
555
556
 
557
+
556
558
  def load_configuration(configuration: Path):
557
559
  """
558
560
  Loads the configuration.
knowledge/public/cache.py CHANGED
@@ -53,7 +53,7 @@ class WikidataCache:
53
53
 
54
54
  _instance = None # Singleton instance
55
55
 
56
- def __init__(self, max_size: int = 10000):
56
+ def __init__(self, max_size: int = 100000):
57
57
  self.max_size = max_size
58
58
  self.cache: OrderedDict = OrderedDict() # Maintain insertion order for LRU eviction
59
59
  self.property_cache: OrderedDict = OrderedDict() # Cache for properties
@@ -3,7 +3,7 @@
3
3
  import multiprocessing
4
4
  from collections import deque
5
5
  from multiprocessing import Pool
6
- from typing import Union, Any, Dict, List, Tuple, Set
6
+ from typing import Union, Any, Dict, List, Tuple, Set, Optional, Callable
7
7
 
8
8
  import requests
9
9
  from requests import Response
@@ -321,18 +321,10 @@ class WikiDataAPIClient:
321
321
  """
322
322
  try:
323
323
  results: List[WikidataThing] = []
324
- request_qids: List[str] = []
325
- for qid in qids:
326
- if wikidata_cache.qid_in_cache(qid):
327
- results.append(wikidata_cache.get_wikidata_object(qid))
328
- else:
329
- request_qids.append(qid)
330
- if len(request_qids) > 0:
331
- for e in __waiting_multi_request__(request_qids):
324
+ if len(qids) > 0:
325
+ for e in __waiting_multi_request__(qids):
332
326
  w_thing = WikidataThing.from_wikidata(e)
333
327
  results.append(w_thing)
334
- # Add the thing to the cache
335
- wikidata_cache.cache_wikidata_object(w_thing)
336
328
  return results
337
329
  except Exception as e:
338
330
  logger.exception(e)
@@ -356,13 +348,17 @@ class WikiDataAPIClient:
356
348
  return WikiDataAPIClient.__wikidata_task__(qid)
357
349
 
358
350
  @staticmethod
359
- def retrieve_entities(qids: Union[List[str], Set[str]]) -> List[WikidataThing]:
351
+ def retrieve_entities(
352
+ qids: Union[List[str], Set[str]], progress: Optional[Callable[[int, int], None]] = None
353
+ ) -> List[WikidataThing]:
360
354
  """
361
355
  Retrieve multiple Wikidata things.
362
356
  Parameters
363
357
  ----------
364
358
  qids: List[str]
365
359
  QIDs of the entities.
360
+ progress: Optional[Callable[[int, int], None]]
361
+ Optional callback function to report progress.
366
362
 
367
363
  Returns
368
364
  -------
@@ -370,17 +366,40 @@ class WikiDataAPIClient:
370
366
  List of wikidata things.
371
367
  """
372
368
  pulled: List[WikidataThing] = []
369
+ task_size: int = len(qids)
373
370
  if len(qids) == 0:
374
371
  return []
375
- jobs: List[List[str]] = list(chunks(list(qids), API_LIMIT))
372
+ missing_qids: List[str] = []
373
+ for qid in qids:
374
+ if not wikidata_cache.qid_in_cache(qid):
375
+ if qid and qid.startswith("Q") and len(qid) > 1:
376
+ missing_qids.append(qid)
377
+ else:
378
+ pulled.append(wikidata_cache.get_wikidata_object(qid))
379
+ ctr: int = len(pulled)
380
+ if progress:
381
+ progress(len(pulled), task_size)
382
+ jobs: List[List[str]] = list(chunks(list(missing_qids), API_LIMIT))
376
383
  num_processes: int = min(len(jobs), multiprocessing.cpu_count())
377
384
  if num_processes > 1:
378
385
  with Pool(processes=num_processes) as pool:
379
386
  # Wikidata thing is not support in multiprocessing
380
387
  for lst in pool.imap_unordered(__waiting_multi_request__, jobs):
381
- pulled.extend([WikidataThing.from_wikidata(e) for e in lst])
388
+ for w_dict in lst:
389
+ w_thing = WikidataThing.from_wikidata(w_dict)
390
+ wikidata_cache.cache_wikidata_object(w_thing)
391
+ pulled.append(w_thing)
392
+ ctr += 1
393
+ if progress:
394
+ progress(ctr, task_size)
382
395
  else:
383
- pulled = WikiDataAPIClient.__wikidata_multiple_task__(jobs[0])
396
+ results = WikiDataAPIClient.__wikidata_multiple_task__(jobs[0])
397
+ for w_thing in results:
398
+ wikidata_cache.cache_wikidata_object(w_thing)
399
+ ctr += 1
400
+ if progress:
401
+ progress(ctr, task_size)
402
+ pulled.extend(results)
384
403
  return pulled
385
404
 
386
405
  @staticmethod
@@ -877,7 +877,7 @@ class WikidataThing:
877
877
  val = {"tabular": data_value["value"]}
878
878
  elif data_type == "entity-schema":
879
879
  val = {"id": data_value["value"]["id"]}
880
- elif data_type == "wikibase-form":
880
+ elif data_type in ["wikibase-form", "musical-notation"]:
881
881
  continue
882
882
  else:
883
883
  raise WikiDataAPIException(f"Data type: {data_type} not supported.")
@@ -2,9 +2,9 @@
2
2
  # Copyright © 2024-present Wacom. All rights reserved.
3
3
  """ "Utilities"""
4
4
 
5
- __all__ = ["wikipedia", "graph", "wikidata"]
5
+ __all__ = ["import_format", "graph", "wikidata", "wikipedia"]
6
6
 
7
-
8
- from knowledge.utils import wikipedia
7
+ from knowledge.utils import import_format
9
8
  from knowledge.utils import graph
10
9
  from knowledge.utils import wikidata
10
+ from knowledge.utils import wikipedia
@@ -0,0 +1,163 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2024-present Wacom. All rights reserved.
3
+ import gzip
4
+ import json
5
+ import logging
6
+ import re
7
+ from json import JSONDecodeError
8
+ from pathlib import Path
9
+ from typing import List, Dict, Any
10
+
11
+ import loguru
12
+
13
+ from knowledge.base.ontology import ThingObject, OntologyPropertyReference
14
+
15
+ logger = loguru.logger
16
+
17
+
18
+ def is_http_url(url: str) -> bool:
19
+ """Check if a string is an HTTP(S) URL.
20
+ Parameters
21
+ ----------
22
+ url: str
23
+ The URL to check.
24
+
25
+ Returns
26
+ -------
27
+ bool
28
+ True if the URL is HTTP(S), False otherwise.
29
+ """
30
+ return bool(re.match(r"^(https?://)", url, re.IGNORECASE))
31
+
32
+
33
+ def is_local_url(url: str) -> bool:
34
+ """Check if a string is a local file path or relative URL.
35
+ Parameters
36
+ ----------
37
+ url: str
38
+ The URL to check.
39
+
40
+ Returns
41
+ -------
42
+ bool
43
+ True if the URL is a local file path or relative URL, False otherwise.
44
+ """
45
+ return bool(re.match(r"^(file://|/|\.{1,2}/)", url, re.IGNORECASE))
46
+
47
+
48
+ def __import_format_to_thing__(line: str) -> ThingObject:
49
+ """
50
+ Convert a line of JSON to a ThingObject.
51
+ Parameters
52
+ ----------
53
+ line: str
54
+ The line of JSON to convert.
55
+
56
+ Returns
57
+ -------
58
+ entity: ThingObject
59
+ The ThingObject created from the JSON line.
60
+
61
+ Raises
62
+ ------
63
+ JSONDecodeError
64
+ If the line is not valid JSON.
65
+ """
66
+ thing_dict: Dict[str, Any] = json.loads(line)
67
+ entity: ThingObject = ThingObject.from_import_dict(thing_dict)
68
+ if entity.image:
69
+ if not is_local_url(entity.image) and not is_http_url(entity.image):
70
+ path: Path = Path(entity.image)
71
+ if not path.exists():
72
+ entity.image = path.absolute().as_uri()
73
+ else:
74
+ logger.warning(f"Image path {path} does not exist. Setting to None.")
75
+ entity.image = None
76
+ remove_props: List[OntologyPropertyReference] = []
77
+ # Remove empty properties
78
+ for obj_prop, value in entity.object_properties.items():
79
+ if len(value.incoming_relations) == 0 and len(value.outgoing_relations) == 0:
80
+ remove_props.append(obj_prop)
81
+ for prop in remove_props:
82
+ del entity.object_properties[prop]
83
+ return entity
84
+
85
+
86
+ def load_import_format(file_path: Path) -> List[ThingObject]:
87
+ """
88
+ Load the import format file.
89
+ Parameters
90
+ ----------
91
+ file_path: Path
92
+ The path to the file.
93
+
94
+ Returns
95
+ -------
96
+ entity_list: List[ThingObject]
97
+ The list of entities.
98
+
99
+ Raises
100
+ ------
101
+ FileNotFoundError
102
+ If the file does not exist or is not a file.
103
+ """
104
+ if not file_path.exists():
105
+ logger.error(f"File {file_path} does not exist.")
106
+ raise FileNotFoundError(f"File {file_path} does not exist.")
107
+ if not file_path.is_file():
108
+ logger.error(f"Path {file_path} is not a file.")
109
+ raise FileNotFoundError(f"Path {file_path} is not a file.")
110
+ cached_entities: List[ThingObject] = []
111
+ if file_path.suffix == ".gz":
112
+ with gzip.open(file_path, "rb") as f_gz:
113
+ for line in f_gz.readlines():
114
+ line = line.decode("utf-8")
115
+ try:
116
+ cached_entities.append(__import_format_to_thing__(line))
117
+ except JSONDecodeError as e:
118
+ logging.error(f"Error decoding JSON: {e}. Line: {line}")
119
+ else:
120
+ with file_path.open(encoding="utf8") as f:
121
+ # Skip the first line
122
+ for line in f.readlines():
123
+ try:
124
+ cached_entities.append(__import_format_to_thing__(line))
125
+ except JSONDecodeError as e:
126
+ logging.error(f"Error decoding JSON: {e}. Line: {line}")
127
+ return cached_entities
128
+
129
+
130
+ def save_import_format(file_path: Path, entities: List[ThingObject]) -> None:
131
+ """
132
+ Save the import format file.
133
+ Parameters
134
+ ----------
135
+ file_path: Path
136
+ The path to the file.
137
+ entities: List[ThingObject]
138
+ The list of entities.
139
+ """
140
+ # Create the directory if it does not exist
141
+ file_path.parent.mkdir(parents=True, exist_ok=True)
142
+ if file_path.suffix == ".gz":
143
+ with gzip.open(file_path, "wt", encoding="utf-8") as fp_thing:
144
+ for entity in entities:
145
+ fp_thing.write(f"{json.dumps(entity.__import_format_dict__(), ensure_ascii=False)}\n")
146
+ elif file_path.suffix == ".ndjson":
147
+ with file_path.open("w", encoding="utf-8") as fp_thing:
148
+ for entity in entities:
149
+ fp_thing.write(f"{json.dumps(entity.__import_format_dict__(), ensure_ascii=False)}\n")
150
+
151
+
152
+ def append_import_format(file_path: Path, entity: ThingObject) -> None:
153
+ """
154
+ Append to the import format file.
155
+ Parameters
156
+ ----------
157
+ file_path: Path
158
+ The path to the file.
159
+ entity: ThingObject
160
+ The entity to append.
161
+ """
162
+ with file_path.open("a", encoding="utf-8") as fp_thing:
163
+ fp_thing.write(f"{json.dumps(entity.__import_format_dict__(), ensure_ascii=False)}\n")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: personal_knowledge_library
3
- Version: 3.1.0
3
+ Version: 3.1.2
4
4
  Summary: Library to access Wacom's Personal Knowledge graph.
5
5
  License: Apache-2.0
6
6
  Keywords: semantic-knowledge,knowledge-graph
@@ -1,22 +1,22 @@
1
- knowledge/__init__.py,sha256=jQahttsGtb_GDFJCWfJEJ3J_wJF4v-msV1VN4RUG5Fk,2680
1
+ knowledge/__init__.py,sha256=K8mEnEYsqCb86zEKbKdwpctRUteEBwRxVRYOpeJuB8U,2680
2
2
  knowledge/base/__init__.py,sha256=q0NJRQLhfZ8khE-uZCK0SVA38dzbaMomcgL5Olnjtio,895
3
3
  knowledge/base/access.py,sha256=BSh-6QbeHOCu55XTxA-p3DwEyRzgtN8TSxtcn6UvmZo,4411
4
4
  knowledge/base/entity.py,sha256=b-Ana_H_WI2-AT_n-V_HzUL6W9Ri16DcZFS3q4ziI94,8445
5
5
  knowledge/base/language.py,sha256=QHNafhiwg9UPaQS1svzsSz7T2L-VAKlQwd1n5w0HaI8,1500
6
- knowledge/base/ontology.py,sha256=-v5g_0H-naeGBbYDDRenm0rTvkGFnW9ZWPdYK7xqwZs,95395
6
+ knowledge/base/ontology.py,sha256=LKOMcnUzROK4BD7avMAbvTnjp3N8nzBwt6lLr2qLqkE,95441
7
7
  knowledge/base/search.py,sha256=J1PSVpTU2JKF9xSZLZZvAbJfFy1HiMPJRzPjHhR7IQM,14722
8
8
  knowledge/base/tenant.py,sha256=f2Z_LjUcjIt5J2_Rse9aQyTzJ0sSyVvCzlm8PP3PqY8,6084
9
9
  knowledge/nel/__init__.py,sha256=eTT88LV_KQ-Ku-a4RnTv_TUCNogl357ui4KT1pEKMuQ,330
10
10
  knowledge/nel/base.py,sha256=bsUj9PwoZoJWW33RfoYmQHbFhArKH1kMOsDgU3yj0T8,15032
11
11
  knowledge/nel/engine.py,sha256=qvQkfsdeYPWJ_5m8mmGFw1tvd699vOQ3IKoBIwALRWk,5347
12
- knowledge/ontomapping/__init__.py,sha256=9E-rZpxQPfukjDz_0ymrRieYDpv_e3tvpqDfkWzPJy0,18846
12
+ knowledge/ontomapping/__init__.py,sha256=u_t6i5a6pYYnf43Q_S6sG7NRLeGuKmahIHW6TFGHqOk,18848
13
13
  knowledge/ontomapping/manager.py,sha256=pXBwRGSTcS731df5vewNv6oMgP18HzvnjZqiUgyFIhI,13361
14
14
  knowledge/public/__init__.py,sha256=FrW5sqJGVcIfg5IVpt_g7qlWiIYNGA370jkE5mJDhoo,812
15
- knowledge/public/cache.py,sha256=nqKIVfY45bHsuf4dOe4aPK-JfRL1pT7NQT_KF1puzLY,14509
16
- knowledge/public/client.py,sha256=kW4bRXgBYxHMNsyyBtMSNTYKNqReaPih_Que2lgNrG0,14676
15
+ knowledge/public/cache.py,sha256=uKEuW9WN9xPmY-fwWPLSxmdEP6xg-XG3g8fKAmGZgBQ,14510
16
+ knowledge/public/client.py,sha256=7Z-nYhYIni_jKDWJw18CvuUxuOCns74l8XjeEEJXl_Y,15437
17
17
  knowledge/public/helper.py,sha256=PDsInMHMHgP8rMvcFk5E47afdE3rC7V3BzPS-DdOsww,13238
18
18
  knowledge/public/relations.py,sha256=DrL-rYuwLzaE2H2TOM2gG1TGqHbkcPL4EsRD54LQqLs,4182
19
- knowledge/public/wikidata.py,sha256=wGM4XH5jOF1R7woML90lq28piu9AW7ADgpUcK2qIJV4,36661
19
+ knowledge/public/wikidata.py,sha256=LwMP2kq2mH5Oi9JhPvG8lN9pZMDlQvxgaZgQKQO3kaM,36683
20
20
  knowledge/services/__init__.py,sha256=FQMTLg7RYglLwCtEO0sOrgj55YrzSNqJV-yj3aqttb8,3490
21
21
  knowledge/services/asyncio/__init__.py,sha256=dq9yGTxg_hoQb8E1kEtY4AeB8zrdJfw3-XlPoYn2j5U,225
22
22
  knowledge/services/asyncio/base.py,sha256=im6J1CWOp1N1tt6WdJ1uU0R-VFpvdYc6lZu8TyPrU5A,16222
@@ -33,11 +33,12 @@ knowledge/services/search.py,sha256=atwvcqagA6yZkzKiad6Yjn3cKu7L5LpKiVJsxKKOY5g,
33
33
  knowledge/services/session.py,sha256=y8uTETRMDOBbC30UAlrrtWTN7Zgs2klqHsoMjLO2tq0,14145
34
34
  knowledge/services/tenant.py,sha256=4pyQCYlwvREfmRcXRn47irIsFMCia_c-LVvFysDypkI,11691
35
35
  knowledge/services/users.py,sha256=uus66ijd1OAN-gJqP95zmhDL31xje3gHBfF4rMnMjM0,16436
36
- knowledge/utils/__init__.py,sha256=rgLqdgZwjuCnqA4NEY9echbGFaJ5YUhqBEkyVv9R9CM,255
36
+ knowledge/utils/__init__.py,sha256=7unsBV5ge6eIyicKCIc_C4OroD-zkKRMVE0jXcH6EOw,313
37
37
  knowledge/utils/graph.py,sha256=kuPZEGhexVN9McoT8JK2ViIrfXTh-nFPv_8XPfG0wlA,12318
38
+ knowledge/utils/import_format.py,sha256=Ho50tGlkNXoWDRFBke6fB_09PFeupTakrjdoHpNqDX4,5037
38
39
  knowledge/utils/wikidata.py,sha256=vRH-4AMR-3xywyvmDqbjI4KSw4tF4TEYqUGCJmUMqK8,5512
39
40
  knowledge/utils/wikipedia.py,sha256=rBuFqYVM58JCj5ISLxuhYVVl2gOIlPLWx7_CghyQgvE,5052
40
- personal_knowledge_library-3.1.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
41
- personal_knowledge_library-3.1.0.dist-info/METADATA,sha256=3tiRSrNWLBnrHbaKzj7WC_y6VMgXKBfwLLTOnoHJqsY,57087
42
- personal_knowledge_library-3.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
43
- personal_knowledge_library-3.1.0.dist-info/RECORD,,
41
+ personal_knowledge_library-3.1.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
42
+ personal_knowledge_library-3.1.2.dist-info/METADATA,sha256=c3tQS96PrPj-ulfkQrgJdV49Zdj012MWYLqmTGHu_2g,57087
43
+ personal_knowledge_library-3.1.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
44
+ personal_knowledge_library-3.1.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any