personal_knowledge_library 3.0.0__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

knowledge/public/cache.py CHANGED
@@ -1,115 +1,442 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # Copyright © 2023-present Wacom. All rights reserved.
3
+ import threading
4
+ from collections import OrderedDict
5
+ from functools import wraps
3
6
  from pathlib import Path
4
- from typing import Optional, Dict
7
+ from typing import Dict, Any
5
8
 
6
- import ndjson
9
+ import loguru
10
+ import orjson
7
11
 
8
- from knowledge.public.wikidata import WikidataThing, WikiDataAPIClient
12
+ from knowledge.public.wikidata import WikidataThing, WikidataProperty, WikidataClass
9
13
 
14
+ # Configure logging
15
+ logger = loguru.logger
10
16
 
11
- def cache_wikidata_object(wikidata_object: WikidataThing):
17
+
18
+ def singleton(cls):
12
19
  """
13
- Caches a Wikidata object.
14
- Parameters
15
- ----------
16
- wikidata_object: WikidataObject
17
- The Wikidata object
20
+ Singleton decorator to ensure that a class has only one instance and provide a global point of access to it.
18
21
  """
19
- wikidata_cache[wikidata_object.qid] = wikidata_object
22
+ instances: Dict[str, Any] = {}
23
+ lock: threading.Lock = threading.Lock()
24
+
25
+ @wraps(cls)
26
+ def get_instance(*args, **kwargs):
27
+ if cls not in instances:
28
+ with lock:
29
+ if cls not in instances: # Double-checked locking
30
+ instances[cls] = cls(*args, **kwargs)
31
+ return instances[cls]
32
+
33
+ return get_instance
20
34
 
21
35
 
22
- def get_wikidata_object(qid_object: str) -> WikidataThing:
36
+ @singleton
37
+ class WikidataCache:
23
38
  """
24
- Returns a Wikidata object from the cache.
39
+ WikidataCache
40
+ --------------
41
+ A singleton class that manages a cache of Wikidata objects using an LRU (Least Recently Used) strategy.
25
42
 
26
43
  Parameters
27
44
  ----------
28
- qid_object: str
29
- The QID of the Wikidata object.
30
- Returns
31
- -------
32
- wikidata_object: WikidataThing
33
- The Wikidata object.
34
- """
35
- if qid_object not in wikidata_cache:
36
- raise ValueError(f"Wikidata object {qid_object} not in cache.")
37
- return wikidata_cache[qid_object]
38
-
45
+ max_size: int
46
+ The maximum size of the cache. When the cache exceeds this size, the least recently used item will be removed.
39
47
 
40
- def pull_wikidata_object(qid_object: str) -> Optional[WikidataThing]:
41
- """
42
- Pulls a Wikidata object from the cache or from the Wikidata API.
43
- Parameters
48
+ Attributes
44
49
  ----------
45
- qid_object: str
46
- The QID of the Wikidata object.
47
- Returns
48
- -------
49
- wikidata_object: Optional[WikidataThing]
50
- The Wikidata object, if it exists, otherwise None.
50
+ cache: OrderedDict
51
+ The cache that stores Wikidata objects.
51
52
  """
52
- if qid_object in wikidata_cache:
53
- return wikidata_cache[qid_object]
54
- wikidata_object: Optional[WikidataThing] = WikiDataAPIClient.retrieve_entity(qid_object)
55
- cache_wikidata_object(wikidata_object)
56
- return wikidata_object
57
53
 
54
+ _instance = None # Singleton instance
58
55
 
59
- def cache_wikidata_objects() -> Dict[str, WikidataThing]:
60
- """
61
- Returns the Wikidata cache.
62
- Returns
63
- -------
64
- wikidata_cache: Dict[str, WikidataThing]
65
- Wikidata cache.
66
- """
67
- return wikidata_cache
56
+ def __init__(self, max_size: int = 10000):
57
+ self.max_size = max_size
58
+ self.cache: OrderedDict = OrderedDict() # Maintain insertion order for LRU eviction
59
+ self.property_cache: OrderedDict = OrderedDict() # Cache for properties
60
+ self.subclass_cache: OrderedDict = OrderedDict() # Cache for subclasses
61
+ self.superclass_cache: OrderedDict = OrderedDict() # Cache for superclasses
68
62
 
63
+ def cache_property(self, prop: WikidataProperty):
64
+ """Adds a property to the property cache with LRU eviction.
69
65
 
70
- def number_of_cached_objects() -> int:
71
- """
72
- Returns the number of cached objects.
73
- Returns
74
- -------
75
- number_of_cached_objects: int
76
- Number of cached objects.
77
- """
78
- return len(wikidata_cache)
66
+ Parameters
67
+ ----------
68
+ prop: Dict[str, Any]
69
+ The property to cache.
70
+ """
71
+ if prop.pid in self.property_cache:
72
+ self.property_cache.move_to_end(prop.pid)
73
+ elif len(self.property_cache) >= self.max_size:
74
+ self.property_cache.popitem(last=False) # Remove the least recently used item
75
+ self.property_cache[prop.pid] = prop
79
76
 
77
+ def get_property(self, pid: str) -> WikidataProperty:
78
+ """Retrieves a property from the property cache.
80
79
 
81
- def load_cache(cache: Path):
82
- """
83
- Load the cache from the file.
84
- Parameters
85
- ----------
86
- cache: Path
87
- The path to the cache file.
88
- """
89
- if cache.exists():
90
- with cache.open("r") as r:
91
- reader = ndjson.reader(r)
92
- for line in reader:
93
- wiki_data_thing: WikidataThing = WikidataThing.create_from_dict(line)
94
- # Cache the object
95
- cache_wikidata_object(wiki_data_thing)
80
+ Parameters
81
+ ----------
82
+ pid: str
83
+ The PID of the property to retrieve.
96
84
 
85
+ Returns
86
+ -------
87
+ Dict[str, Any]
88
+ The property associated with the given PID.
89
+ """
90
+ if pid in self.property_cache:
91
+ self.property_cache.move_to_end(pid)
92
+ return self.property_cache[pid]
93
+ raise KeyError(f"Property {pid} not found in cache.")
97
94
 
98
- def qid_in_cache(ref_qid: str) -> bool:
99
- """
100
- Checks if a QID is in the cache.
101
- Parameters
102
- ----------
103
- ref_qid: str
104
- The QID to check.
95
+ def cache_wikidata_object(self, wikidata_object: WikidataThing):
96
+ """Adds a Wikidata object to the cache with LRU eviction.
105
97
 
106
- Returns
107
- -------
108
- in_cache: bool
109
- True if the QID is in the cache, otherwise False.
110
- """
111
- return ref_qid in wikidata_cache
98
+ Parameters
99
+ ----------
100
+ wikidata_object: WikidataThing
101
+ The Wikidata object to cache.
102
+ """
103
+ if wikidata_object.qid in self.cache:
104
+ self.cache.move_to_end(wikidata_object.qid) # Mark as most recently used
105
+ elif len(self.cache) >= self.max_size:
106
+ self.cache.popitem(last=False) # Remove the least recently used item
107
+
108
+ self.cache[wikidata_object.qid] = wikidata_object
109
+
110
+ def get_wikidata_object(self, qid: str) -> WikidataThing:
111
+ """Retrieves a Wikidata object from the cache.
112
+
113
+ Parameters
114
+ ----------
115
+ qid: str
116
+ The QID of the Wikidata object to retrieve.
117
+
118
+ Returns
119
+ -------
120
+ WikidataThing
121
+ The Wikidata object associated with the given QID.
122
+ """
123
+ if qid in self.cache:
124
+ self.cache.move_to_end(qid) # Mark as most recently used
125
+ return self.cache[qid]
126
+ raise KeyError(f"Wikidata object {qid} not found in cache.")
127
+
128
+ def cache_subclass(self, subclass: WikidataClass):
129
+ """Adds a subclass to the subclass cache with LRU eviction.
130
+
131
+ Parameters
132
+ ----------
133
+ subclass: WikidataClass
134
+ The subclass to cache.
135
+ """
136
+ if subclass.qid in self.subclass_cache:
137
+ self.subclass_cache.move_to_end(subclass.qid)
138
+ elif len(self.subclass_cache) >= self.max_size:
139
+ self.subclass_cache.popitem(last=False)
140
+
141
+ self.subclass_cache[subclass.qid] = subclass
142
+
143
+ def get_subclass(self, qid: str) -> WikidataClass:
144
+ """Retrieves a subclass from the subclass cache.
145
+
146
+ Parameters
147
+ ----------
148
+ qid: str
149
+ The QID of the subclass to retrieve.
150
+
151
+ Returns
152
+ -------
153
+ WikidataClass
154
+ The subclass associated with the given QID.
155
+ """
156
+ if qid in self.subclass_cache:
157
+ self.subclass_cache.move_to_end(qid)
158
+ return self.subclass_cache[qid]
159
+ raise KeyError(f"Subclass {qid} not found in cache.")
160
+
161
+ def cache_superclass(self, superclass: WikidataClass):
162
+ """Adds a superclass to the superclass cache with LRU eviction.
163
+
164
+ Parameters
165
+ ----------
166
+ superclass: WikidataClass
167
+ The superclass to cache.
168
+ """
169
+ if superclass.qid in self.superclass_cache:
170
+ self.superclass_cache.move_to_end(superclass.qid)
171
+ elif len(self.superclass_cache) >= self.max_size:
172
+ self.superclass_cache.popitem(last=False)
173
+
174
+ self.superclass_cache[superclass.qid] = superclass
175
+
176
+ def get_superclass(self, qid: str) -> WikidataClass:
177
+ """Retrieves a superclass from the superclass cache.
178
+
179
+ Parameters
180
+ ----------
181
+ qid: str
182
+ The QID of the superclass to retrieve.
183
+
184
+ Returns
185
+ -------
186
+ WikidataClass
187
+ The superclass associated with the given QID.
188
+ """
189
+ if qid in self.superclass_cache:
190
+ self.superclass_cache.move_to_end(qid)
191
+ return self.superclass_cache[qid]
192
+ raise KeyError(f"Superclass {qid} not found in cache.")
193
+
194
+ @staticmethod
195
+ def __path__objects__(path: Path) -> Path:
196
+ """Caches the objects from a path.
197
+
198
+ Parameters
199
+ ----------
200
+ path: Path
201
+ The path to the file containing the objects.
202
+
203
+ Returns
204
+ -------
205
+ Path
206
+ The path to the file containing the objects.
207
+ """
208
+ return path / "wikidata_cache.ndjson"
209
+
210
+ @staticmethod
211
+ def __path__properties__(path: Path) -> Path:
212
+ """Caches the properties from a path.
213
+
214
+ Parameters
215
+ ----------
216
+ path: Path
217
+ The path to the file containing the properties.
218
+ """
219
+ return path / "property_cache.ndjson"
220
+
221
+ @staticmethod
222
+ def __path__subclasses__(path: Path) -> Path:
223
+ """Caches the subclasses from a path.
224
+
225
+ Parameters
226
+ ----------
227
+ path: Path
228
+ The path to the file containing the subclasses.
229
+
230
+ Returns
231
+ -------
232
+ Path
233
+ The path to the file containing the subclasses.
234
+ """
235
+ return path / "subclass_cache.ndjson"
236
+
237
+ @staticmethod
238
+ def __path__superclasses__(path: Path) -> Path:
239
+ """Caches the superclasses from a path.
240
+
241
+ Parameters
242
+ ----------
243
+ path: Path
244
+ The path to the file containing the superclasses.
245
+
246
+ Returns
247
+ -------
248
+ Path
249
+ The path to the file containing the superclasses.
250
+ """
251
+ return path / "superclass_cache.ndjson"
252
+
253
+ def save_cache(self, cache_path: Path):
254
+ """Saves the cache to a file.
255
+
256
+ Parameters
257
+ ----------
258
+ cache_path: Path
259
+ The path to the file where the cache will be saved.
260
+ """
261
+
262
+ if not cache_path.exists():
263
+ cache_path.mkdir(parents=True, exist_ok=True)
264
+ elif cache_path.is_file():
265
+ logger.warning(f"Cache path {cache_path} is a file. Please provide a directory.")
266
+ return
267
+ # Save the cache to a file
268
+ with WikidataCache.__path__objects__(cache_path).open("w") as file:
269
+ for thing in self.cache.values():
270
+ thing: WikidataThing
271
+ file.write(orjson.dumps(thing.__dict__()).decode("utf-8") + "\n")
272
+ # Save the property cache to a file
273
+ with WikidataCache.__path__properties__(cache_path).open("w") as file:
274
+ for prop in self.property_cache.values():
275
+ prop: WikidataProperty
276
+ file.write(orjson.dumps(prop.__dict__()).decode("utf-8") + "\n")
277
+ # Save the superclass cache to a file
278
+ with WikidataCache.__path__subclasses__(cache_path).open("w") as file:
279
+ for subclass in self.subclass_cache.values():
280
+ subclass: WikidataClass
281
+ file.write(orjson.dumps(subclass.__dict__()).decode("utf-8") + "\n")
282
+ # Save the subclass cache to a file
283
+ with WikidataCache.__path__superclasses__(cache_path).open("w") as file:
284
+ for superclass in self.superclass_cache.values():
285
+ superclass: WikidataClass
286
+ file.write(orjson.dumps(superclass.__dict__()).decode("utf-8") + "\n")
287
+
288
+ def load_cache(self, cache_path: Path) -> None:
289
+ """Loads the cache from a path.
290
+
291
+ Parameters
292
+ ----------
293
+ cache_path: Path
294
+ The path to the file from which the cache will be loaded.
295
+ """
296
+ if not cache_path.exists():
297
+ logger.warning(f"Cache file {cache_path} not found. Skipping load.")
298
+ return
299
+ if cache_path.is_file():
300
+ logger.warning(f"Cache path {cache_path} is a file. Please provide a directory.")
301
+ return
302
+
303
+ wikidata_object_path: Path = WikidataCache.__path__objects__(cache_path)
304
+ if wikidata_object_path.exists():
305
+ with wikidata_object_path.open("r") as file:
306
+ for line in file:
307
+ try:
308
+ thing_data = orjson.loads(line)
309
+ thing = WikidataThing.create_from_dict(thing_data)
310
+ self.cache_wikidata_object(thing)
311
+ except Exception as e:
312
+ logger.error(f"Error loading cache: {e}. Line: {line}")
313
+ path_property: Path = WikidataCache.__path__properties__(cache_path)
314
+ if path_property.exists():
315
+ with path_property.open("r") as file:
316
+ for line in file:
317
+ try:
318
+ prop_data = orjson.loads(line)
319
+ prop = WikidataProperty.create_from_dict(prop_data)
320
+ self.cache_property(prop)
321
+ except Exception as e:
322
+ logger.error(f"Error loading property cache: {e}. Line: {line}")
323
+ subclass_path: Path = WikidataCache.__path__subclasses__(cache_path)
324
+ if subclass_path.exists():
325
+ with WikidataCache.__path__subclasses__(cache_path).open("r") as file:
326
+ for line in file:
327
+ try:
328
+ subclass_data = orjson.loads(line)
329
+ subclass = WikidataClass.create_from_dict(subclass_data)
330
+ self.subclass_cache[subclass.qid] = subclass
331
+ except Exception as e:
332
+ logger.error(f"Error loading subclass cache: {e}. Line: {line}")
333
+ superclass_path: Path = WikidataCache.__path__superclasses__(cache_path)
334
+ if superclass_path.exists():
335
+ with superclass_path.open("r") as file:
336
+ for line in file:
337
+ try:
338
+ superclass_data = orjson.loads(line)
339
+ superclass = WikidataClass.create_from_dict(superclass_data)
340
+ self.superclass_cache[superclass.qid] = superclass
341
+ except Exception as e:
342
+ logger.error(f"Error loading superclass cache: {e}. Line: {line}")
343
+
344
+ def qid_in_cache(self, qid: str) -> bool:
345
+ """Checks if a QID is in the cache.
346
+
347
+ Parameters
348
+ ----------
349
+ qid: str
350
+ The QID to check.
351
+
352
+ Returns
353
+ -------
354
+ bool
355
+ True if the QID is in the cache, False otherwise.
356
+ """
357
+ return qid in self.cache
358
+
359
+ def property_in_cache(self, pid: str) -> bool:
360
+ """Checks if a property is in the cache.
361
+
362
+ Parameters
363
+ ----------
364
+ pid: str
365
+ The PID to check.
366
+
367
+ Returns
368
+ -------
369
+ bool
370
+ True if the PID is in the cache, False otherwise.
371
+ """
372
+ return pid in self.property_cache
373
+
374
+ def subclass_in_cache(self, qid: str) -> bool:
375
+ """Checks if a subclass is in the cache.
376
+
377
+ Parameters
378
+ ----------
379
+ qid: str
380
+ The QID to check.
381
+
382
+ Returns
383
+ -------
384
+ bool
385
+ True if the QID is in the subclass cache, False otherwise.
386
+ """
387
+ return qid in self.subclass_cache
388
+
389
+ def superclass_in_cache(self, qid: str) -> bool:
390
+ """Checks if a superclass is in the cache.
391
+
392
+ Parameters
393
+ ----------
394
+ qid: str
395
+ The QID to check.
396
+
397
+ Returns
398
+ -------
399
+ bool
400
+ True if the QID is in the superclass cache, False otherwise.
401
+ """
402
+ return qid in self.superclass_cache
403
+
404
+ def number_of_cached_subclasses(self) -> int:
405
+ """Returns the number of cached subclasses.
406
+
407
+ Returns
408
+ -------
409
+ int
410
+ The number of subclasses in the cache.
411
+ """
412
+ return len(self.subclass_cache)
413
+
414
+ def number_of_cached_superclasses(self) -> int:
415
+ """Returns the number of cached superclasses.
416
+
417
+ Returns
418
+ -------
419
+ int
420
+ The number of superclasses in the cache.
421
+ """
422
+ return len(self.superclass_cache)
423
+
424
+ def number_of_cached_objects(self) -> int:
425
+ """Returns the number of cached objects.
426
+
427
+ Returns
428
+ -------
429
+ int
430
+ The number of objects in the cache.
431
+ """
432
+ return len(self.cache)
112
433
 
434
+ def number_of_cached_properties(self) -> int:
435
+ """Returns the number of cached properties.
113
436
 
114
- wikidata_cache: Dict[str, WikidataThing] = {}
115
- # Wikidata cache
437
+ Returns
438
+ -------
439
+ int
440
+ The number of properties in the cache.
441
+ """
442
+ return len(self.property_cache)