personal_knowledge_library 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

Files changed (42) hide show
  1. knowledge/__init__.py +91 -0
  2. knowledge/base/__init__.py +22 -0
  3. knowledge/base/access.py +167 -0
  4. knowledge/base/entity.py +267 -0
  5. knowledge/base/language.py +27 -0
  6. knowledge/base/ontology.py +2734 -0
  7. knowledge/base/search.py +473 -0
  8. knowledge/base/tenant.py +192 -0
  9. knowledge/nel/__init__.py +11 -0
  10. knowledge/nel/base.py +495 -0
  11. knowledge/nel/engine.py +123 -0
  12. knowledge/ontomapping/__init__.py +667 -0
  13. knowledge/ontomapping/manager.py +320 -0
  14. knowledge/public/__init__.py +27 -0
  15. knowledge/public/cache.py +115 -0
  16. knowledge/public/helper.py +373 -0
  17. knowledge/public/relations.py +128 -0
  18. knowledge/public/wikidata.py +1324 -0
  19. knowledge/services/__init__.py +128 -0
  20. knowledge/services/asyncio/__init__.py +7 -0
  21. knowledge/services/asyncio/base.py +458 -0
  22. knowledge/services/asyncio/graph.py +1420 -0
  23. knowledge/services/asyncio/group.py +450 -0
  24. knowledge/services/asyncio/search.py +439 -0
  25. knowledge/services/asyncio/users.py +270 -0
  26. knowledge/services/base.py +533 -0
  27. knowledge/services/graph.py +1897 -0
  28. knowledge/services/group.py +819 -0
  29. knowledge/services/helper.py +142 -0
  30. knowledge/services/ontology.py +1234 -0
  31. knowledge/services/search.py +488 -0
  32. knowledge/services/session.py +444 -0
  33. knowledge/services/tenant.py +281 -0
  34. knowledge/services/users.py +445 -0
  35. knowledge/utils/__init__.py +10 -0
  36. knowledge/utils/graph.py +417 -0
  37. knowledge/utils/wikidata.py +197 -0
  38. knowledge/utils/wikipedia.py +175 -0
  39. personal_knowledge_library-3.0.0.dist-info/LICENSE +201 -0
  40. personal_knowledge_library-3.0.0.dist-info/METADATA +1163 -0
  41. personal_knowledge_library-3.0.0.dist-info/RECORD +42 -0
  42. personal_knowledge_library-3.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,1324 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2023-present Wacom. All rights reserved.
3
+ import hashlib
4
+ import multiprocessing
5
+ import urllib
6
+ from abc import ABC
7
+ from collections import deque
8
+ from datetime import datetime
9
+ from multiprocessing import Pool
10
+ from typing import Optional, Union, Any, Dict, List, Tuple, Set
11
+
12
+ import requests
13
+ from requests import Response
14
+ from requests.adapters import HTTPAdapter
15
+ from urllib3 import Retry
16
+
17
+ from knowledge import logger
18
+ from knowledge.base.entity import (
19
+ Description,
20
+ DESCRIPTIONS_TAG,
21
+ Label,
22
+ LanguageCode,
23
+ LABELS_TAG,
24
+ REPOSITORY_TAG,
25
+ DISPLAY_TAG,
26
+ DESCRIPTION_TAG,
27
+ )
28
+ from knowledge.base.language import LANGUAGE_LOCALE_MAPPING, EN_US, LocaleCode
29
+ from knowledge.public import PROPERTY_MAPPING, INSTANCE_OF_PROPERTY, IMAGE_PROPERTY
30
+ from knowledge.public.helper import (
31
+ __waiting_request__,
32
+ __waiting_multi_request__,
33
+ QID_TAG,
34
+ REVISION_TAG,
35
+ PID_TAG,
36
+ LABEL_TAG,
37
+ CLAIMS_TAG,
38
+ LABEL_VALUE_TAG,
39
+ WIKIDATA_LANGUAGE_TAG,
40
+ ALIASES_TAG,
41
+ MODIFIED_TAG,
42
+ ONTOLOGY_TYPES_TAG,
43
+ SITELINKS_TAG,
44
+ parse_date,
45
+ ID_TAG,
46
+ LAST_REVID_TAG,
47
+ wikidate,
48
+ WikiDataAPIException,
49
+ WIKIDATA_SPARQL_URL,
50
+ SOURCE_TAG,
51
+ URLS_TAG,
52
+ TITLES_TAG,
53
+ image_url,
54
+ WIKIDATA_SEARCH_URL,
55
+ SUPERCLASSES_TAG,
56
+ API_LIMIT,
57
+ )
58
+
59
+ # Constants
60
+ QUALIFIERS_TAG: str = "QUALIFIERS"
61
+ LITERALS_TAG: str = "LITERALS"
62
+
63
+
64
+ def chunks(lst: List[str], chunk_size: int):
65
+ """
66
+ Yield successive n-sized chunks from lst.Yield successive n-sized chunks from lst.
67
+ Parameters
68
+ ----------
69
+ lst: List[str]
70
+ Full length.
71
+ chunk_size: int
72
+ Chunk size.
73
+
74
+ """
75
+ for i in range(0, len(lst), chunk_size):
76
+ yield lst[i : i + chunk_size]
77
+
78
+
79
+ class WikidataProperty:
80
+ """
81
+ WikidataProperty
82
+ ----------------
83
+ Property id and its label from wikidata.
84
+
85
+ Parameters
86
+ ----------
87
+ pid: str
88
+ Property ID.
89
+ label: Optional[str] (default: None)
90
+ Label of the property.
91
+ """
92
+
93
+ def __init__(self, pid: str, label: Optional[str] = None):
94
+ super().__init__()
95
+ self.__pid: str = pid
96
+ self.__label: Optional[str] = label
97
+
98
+ @property
99
+ def pid(self):
100
+ """Property id."""
101
+ return self.__pid
102
+
103
+ @property
104
+ def label(self) -> str:
105
+ """Label with lazy loading mechanism.
106
+
107
+ Returns
108
+ -------
109
+ label: str
110
+ Label of the property.
111
+ """
112
+ if self.__label:
113
+ return self.__label
114
+ if self.pid in PROPERTY_MAPPING: # only English mappings
115
+ self.__label = PROPERTY_MAPPING[self.pid]
116
+ else:
117
+ prop_dict = __waiting_request__(self.pid)
118
+ if "labels" in prop_dict:
119
+ labels: Dict[str, Any] = prop_dict.get("labels")
120
+ if "en" in labels:
121
+ en_label: Dict[str, Any] = labels.get("en")
122
+ self.__label = en_label.get("value", self.__pid)
123
+ PROPERTY_MAPPING[self.pid] = self.__label
124
+ else:
125
+ self.__label = self.pid
126
+
127
+ else:
128
+ self.__label = self.__pid
129
+ return self.__label
130
+
131
+ @property
132
+ def label_cached(self) -> str:
133
+ """Label with cached value."""
134
+ if self.__label:
135
+ return self.__label
136
+ if self.pid in PROPERTY_MAPPING: # only English mappings
137
+ self.__label = PROPERTY_MAPPING[self.pid]
138
+ return self.__label
139
+
140
+ def __dict__(self):
141
+ return {PID_TAG: self.pid, LABEL_TAG: self.label_cached}
142
+
143
+ @classmethod
144
+ def create_from_dict(cls, prop_dict: Dict[str, Any]) -> "WikidataProperty":
145
+ """Create a property from a dictionary.
146
+ Parameters
147
+ ----------
148
+ prop_dict: Dict[str, Any]
149
+ Property dictionary.
150
+
151
+ Returns
152
+ -------
153
+ instance: WikidataProperty
154
+ Instance of WikidataProperty.
155
+ """
156
+ return WikidataProperty(prop_dict[PID_TAG], prop_dict.get(LABEL_TAG))
157
+
158
+ def __repr__(self):
159
+ return f"<Property:={self.pid}]>"
160
+
161
+
162
+ class WikidataSearchResult:
163
+ """
164
+ WikidataSearchResult
165
+ --------------------
166
+ Search result from wikidata.
167
+ """
168
+
169
+ def __init__(self, qid: str, label: Label, description: Optional[Description], repository: str, aliases: List[str]):
170
+ self.__qid: str = qid
171
+ self.__label: Label = label
172
+ self.__description: Optional[Description] = description
173
+ self.__repository: str = repository
174
+ self.__aliases: List[str] = aliases
175
+
176
+ @property
177
+ def qid(self) -> str:
178
+ """QID of the search result."""
179
+ return self.__qid
180
+
181
+ @property
182
+ def label(self) -> Label:
183
+ """Label of the search result."""
184
+ return self.__label
185
+
186
+ @property
187
+ def description(self) -> Optional[Description]:
188
+ """Description of the search result."""
189
+ return self.__description
190
+
191
+ @property
192
+ def repository(self) -> str:
193
+ """Repository of the search result."""
194
+ return self.__repository
195
+
196
+ @property
197
+ def aliases(self) -> List[str]:
198
+ """Aliases of the search result."""
199
+ return self.__aliases
200
+
201
+ @classmethod
202
+ def from_dict(cls, search_result: Dict[str, Any]) -> "WikidataSearchResult":
203
+ """
204
+ Create a search result from a dictionary.
205
+ Parameters
206
+ ----------
207
+ search_result: Dict[str, Any]
208
+ Search result dictionary.
209
+
210
+ Returns
211
+ -------
212
+ WikidataSearchResult
213
+ Instance of WikidataSearchResult.
214
+ """
215
+ qid: str = search_result[ID_TAG]
216
+ display: Dict[str, Any] = search_result[DISPLAY_TAG]
217
+ label: Label = Label(
218
+ content=display[LABEL_TAG]["value"],
219
+ language_code=LANGUAGE_LOCALE_MAPPING.get(LanguageCode(display[LABEL_TAG]["language"]), EN_US),
220
+ )
221
+ description: Optional[Description] = None
222
+ if DESCRIPTION_TAG in display:
223
+ description: Description = Description(
224
+ description=display[DESCRIPTION_TAG]["value"], language_code=display[DESCRIPTION_TAG]["language"]
225
+ )
226
+ aliases: List[str] = [alias["value"] for alias in display.get(ALIASES_TAG, [])]
227
+ repository: str = search_result[REPOSITORY_TAG]
228
+ return WikidataSearchResult(
229
+ qid=qid, label=label, description=description, repository=repository, aliases=aliases
230
+ )
231
+
232
+ def __repr__(self):
233
+ desc_str: str = ""
234
+ if self.description:
235
+ desc_str: str = f" - description:= {self.description}"
236
+ return f"<Search Result:={self.qid} - label:= {self.label}{desc_str}>"
237
+
238
+
239
+ class WikidataClass:
240
+ """
241
+ WikidataClass
242
+ ----------------
243
+ In Wikidata, classes are used to group items together based on their common characteristics.
244
+ Classes in Wikidata are represented as items themselves, and they are typically identified by the prefix "Q"
245
+ followed by a unique number.
246
+
247
+ There are several types of classes in Wikidata, including:
248
+
249
+ - **Main Classes**: These are the most general classes in Wikidata, and they represent broad categories of items.
250
+ Examples of main classes include "person" (Q215627), "physical location" (Q17334923), and "event" (occurrence).
251
+ - **Subclasses**: These are more specific classes that are grouped under a main class.
252
+ For example, "politician" (Q82955) is a subclass of "person" (Q215627), and "city" (Q515) is a subclass
253
+ of "location" (Q17334923).
254
+ - **Properties**: These are classes that represent specific attributes or characteristics of items. For example,
255
+ "gender" (Q48277) is a property that can be used to describe the gender of a person.
256
+ - **Instances**: These are individual items that belong to a class. For example, Barack Obama (Q76) is an instance
257
+ of the "person" (Q215627) class.
258
+ - **Meta-classes**: These are classes that are used to group together other classes based on their properties or
259
+ characteristics. For example, the "monotypic taxon" (Q310890) class groups together classes that represent
260
+ individual species of organisms.
261
+
262
+ Overall, classes in Wikidata are a tool for organizing and categorizing information in a structured and consistent
263
+ way, which makes it easier to search and analyze data across a wide range of topics and domains.
264
+
265
+ Parameters
266
+ ----------
267
+ qid: str
268
+ Class QID.
269
+
270
+ """
271
+
272
+ def __init__(self, qid: str, label: Optional[str] = None):
273
+ super().__init__()
274
+ self.__qid: str = qid
275
+ self.__label: Optional[str] = label
276
+ self.__superclasses: List[WikidataClass] = []
277
+ self.__subclasses: List[WikidataClass] = []
278
+
279
+ @property
280
+ def qid(self):
281
+ """Property id."""
282
+ return self.__qid
283
+
284
+ @property
285
+ def label(self) -> str:
286
+ """Label with lazy loading mechanism."""
287
+ if self.__label:
288
+ return self.__label
289
+
290
+ class_dict = __waiting_request__(self.qid)
291
+ self.__label = (
292
+ class_dict["labels"].get("en").get("value", self.__qid)
293
+ if class_dict.get("labels") and class_dict["labels"].get("en")
294
+ else self.__qid
295
+ )
296
+ return self.__label
297
+
298
+ @property
299
+ def superclasses(self) -> List["WikidataClass"]:
300
+ """Superclasses."""
301
+ return self.__superclasses
302
+
303
+ @property
304
+ def subclasses(self) -> List["WikidataClass"]:
305
+ """Subclasses."""
306
+ return self.__subclasses
307
+
308
+ @classmethod
309
+ def create_from_dict(cls, class_dict: Dict[str, Any]) -> "WikidataClass":
310
+ """
311
+ Create a class from a dictionary.
312
+ Parameters
313
+ ----------
314
+ class_dict: Dict[str, Any]
315
+ Class dictionary.
316
+
317
+ Returns
318
+ -------
319
+ instance: WikidataClass
320
+ Instance of WikidataClass.
321
+ """
322
+ wiki_cls: WikidataClass = cls(class_dict[QID_TAG], class_dict.get(LABEL_TAG))
323
+ for superclass in class_dict.get(SUPERCLASSES_TAG, []):
324
+ wiki_cls.__superclasses.append(WikidataClass.create_from_dict(superclass))
325
+ return wiki_cls
326
+
327
+ def __hierarchy__(self, visited: Optional[set] = None):
328
+ if visited is None:
329
+ visited = set()
330
+ if self.qid in visited:
331
+ return {
332
+ QID_TAG: self.qid,
333
+ LABEL_TAG: self.label,
334
+ SUPERCLASSES_TAG: [],
335
+ }
336
+ visited.add(self.qid)
337
+ return {
338
+ QID_TAG: self.qid,
339
+ LABEL_TAG: self.label,
340
+ SUPERCLASSES_TAG: [superclass.__hierarchy__(visited) for superclass in self.superclasses],
341
+ }
342
+
343
+ def __dict__(self):
344
+ return self.__hierarchy__()
345
+
346
+ def __repr__(self):
347
+ return f"<WikidataClass:={self.qid}]>"
348
+
349
+
350
+ class Claim:
351
+ """
352
+ Claim
353
+ ------
354
+ A Wikidata claim is a statement that describes a particular property-value relationship about an item in the
355
+ Wikidata knowledge base. In Wikidata, an item represents a specific concept, such as a person, place, or
356
+ organization, and a property describes a particular aspect of that concept, such as its name, date of birth,
357
+ or location.
358
+
359
+ A claim consists of three elements:
360
+
361
+ - Subject: The item to which the statement applies
362
+ - Predicate: The property that describes the statement
363
+ - Object: The value of the property for the given item
364
+
365
+ For example, a claim could be "Barack Obama (subject) has a birthdate (predicate) of August 4, 1961 (object)."
366
+ Claims in Wikidata help to organize information and provide a structured way to represent knowledge that can
367
+ be easily queried, analyzed, and visualized.
368
+ """
369
+
370
+ def __init__(self, pid: WikidataProperty, literal: List[Dict[str, Any]], qualifiers: List[Dict[str, Any]]):
371
+ super().__init__()
372
+ self.__pid: WikidataProperty = pid
373
+ self.__literals: List[Dict[str, Any]] = literal
374
+ self.__qualifiers: List[Dict[str, Any]] = qualifiers
375
+
376
+ @property
377
+ def pid(self) -> WikidataProperty:
378
+ """Property name. Predicate of the claim."""
379
+ return self.__pid
380
+
381
+ @property
382
+ def literals(self) -> List[Dict[str, Any]]:
383
+ """Literals. Objects of the statement."""
384
+ return self.__literals
385
+
386
+ @property
387
+ def qualifiers(self) -> List[Dict[str, Any]]:
388
+ """Qualifiers."""
389
+ return self.__qualifiers
390
+
391
+ def __dict__(self):
392
+ return {PID_TAG: self.pid.__dict__(), LITERALS_TAG: self.literals, QUALIFIERS_TAG: self.qualifiers}
393
+
394
+ def __eq__(self, other):
395
+ if not isinstance(other, Claim):
396
+ return False
397
+ return self.pid == other.pid
398
+
399
+ def __hash__(self):
400
+ return hash(self.pid)
401
+
402
+ def __repr__(self):
403
+ return f"<Claim:={self.pid}, {self.literals}]>"
404
+
405
+ @classmethod
406
+ def create_from_dict(cls, claim) -> "Claim":
407
+ """Create a claim from a dictionary."""
408
+ pid: WikidataProperty = WikidataProperty.create_from_dict(claim["pid"])
409
+ literals = claim[LITERALS_TAG]
410
+ qualifiers = claim[QUALIFIERS_TAG]
411
+ return cls(pid, literals, qualifiers)
412
+
413
+
414
+ class SiteLinks:
415
+ """
416
+ SiteLinks
417
+ ---------
418
+ Sitelinks in Wikidata are links between items in Wikidata and pages on external websites, such as Wikipedia,
419
+ Wikimedia Commons, and other Wikimedia projects. A site-link connects a Wikidata item to a specific page on an
420
+ external website that provides more information about the topic represented by the item.
421
+
422
+ For example, a Wikidata item about a particular city might have sitelinks to the corresponding page on the English,
423
+ French, and German Wikipedia sites. Each site-link connects the Wikidata item to a specific page on the external
424
+ website that provides more detailed information about the city.
425
+
426
+ Sitelinks in Wikidata help to connect and integrate information across different languages and projects,
427
+ making it easier to access and share knowledge on a wide range of topics. They also help to provide context and
428
+ additional information about Wikidata items, improving the overall quality and usefulness of the knowledge base.
429
+
430
+ Parameters
431
+ ----------
432
+ source: str
433
+ Source of sitelinks.
434
+ """
435
+
436
+ def __init__(
437
+ self, source: str, urls: Union[Dict[str, str], None] = None, titles: Union[Dict[str, str], None] = None
438
+ ):
439
+ self.__source: str = source
440
+ self.__urls: Dict[str, str] = {} if urls is None else urls
441
+ self.__title: Dict[str, str] = {} if titles is None else titles
442
+
443
+ @property
444
+ def urls(self) -> Dict[str, str]:
445
+ """URLs for the source."""
446
+ return self.__urls
447
+
448
+ @property
449
+ def titles(self) -> Dict[str, str]:
450
+ """Titles for the source."""
451
+ return self.__title
452
+
453
+ @property
454
+ def urls_languages(self) -> List[str]:
455
+ """List of all supported languages."""
456
+ return list(self.__urls.keys())
457
+
458
+ @property
459
+ def source(self) -> str:
460
+ """Sitelinks source."""
461
+ return self.__source
462
+
463
+ @classmethod
464
+ def create_from_dict(cls, entity_dict: Dict[str, Any]) -> "SiteLinks":
465
+ """
466
+ Create a SiteLinks object from a dictionary.
467
+
468
+ Parameters
469
+ ----------
470
+ entity_dict: Dict[str, Any]
471
+ dictionary containing the entity information.
472
+
473
+ Returns
474
+ -------
475
+ instance: SiteLinks
476
+ The SiteLinks instance.
477
+ """
478
+ return SiteLinks(
479
+ source=entity_dict[SOURCE_TAG], urls=entity_dict.get(URLS_TAG), titles=entity_dict.get(TITLES_TAG)
480
+ )
481
+
482
+ def __dict__(self):
483
+ return {SOURCE_TAG: self.__source, URLS_TAG: self.__urls, TITLES_TAG: self.__title}
484
+
485
+ def __repr__(self):
486
+ return f'<SiteLinks:={self.source}, supported languages:=[{"|".join(self.urls_languages)}]>'
487
+
488
+
489
+ class WikidataThing:
490
+ """
491
+ WikidataEntity
492
+ -----------
493
+ Generic entity within wikidata.
494
+
495
+ Each entity is derived from this object, thus all entity shares:
496
+ - **qid**: A unique resource identity to identify the entity and reference it in relations
497
+ - **label**: Human understandable label
498
+ - **description**: Description of entity
499
+
500
+ Parameters
501
+ ----------
502
+ revision: str
503
+ Revision of the entity
504
+ qid: str
505
+ QID for entity. For new entities the URI is None, as the knowledge graph backend assigns this.
506
+ modified: datetime
507
+ Last modified date
508
+ label: List[Label]
509
+ List of labels
510
+ description: List[Description] (optional)
511
+ List of descriptions
512
+ qid: str
513
+ QID for entity. For new entities the URI is None, as the knowledge graph backend assigns this.
514
+ """
515
+
516
+ def __init__(
517
+ self,
518
+ revision: str,
519
+ qid: str,
520
+ modified: datetime,
521
+ label: Optional[Dict[str, Label]] = None,
522
+ aliases: Optional[Dict[str, List[Label]]] = None,
523
+ description: Optional[Dict[str, Description]] = None,
524
+ ):
525
+ self.__qid: str = qid
526
+ self.__revision: str = revision
527
+ self.__modified: datetime = modified
528
+ self.__label: Dict[str, Label] = label if label else {}
529
+ self.__description: Dict[str, Description] = description if description else {}
530
+ self.__aliases: Dict[str, List[Label]] = aliases if aliases else {}
531
+ self.__claims: Dict[str, Claim] = {}
532
+ self.__sitelinks: Dict[str, SiteLinks] = {}
533
+ self.__ontology_types: List[str] = []
534
+
535
+ @property
536
+ def qid(self) -> str:
537
+ """QID for entity."""
538
+ return self.__qid
539
+
540
+ @property
541
+ def revision(self) -> str:
542
+ """Revision version of entity."""
543
+ return self.__revision
544
+
545
+ @property
546
+ def modified(self) -> datetime:
547
+ """Modification date of entity."""
548
+ return self.__modified
549
+
550
+ @property
551
+ def label(self) -> Dict[str, Label]:
552
+ """Labels of the entity."""
553
+ return self.__label
554
+
555
+ @property
556
+ def ontology_types(self) -> List[str]:
557
+ """Ontology types of the entity."""
558
+ return self.__ontology_types
559
+
560
+ @ontology_types.setter
561
+ def ontology_types(self, ontology_types: List[str]):
562
+ self.__ontology_types = ontology_types
563
+
564
+ @property
565
+ def label_languages(self) -> List[str]:
566
+ """All available languages for a labels."""
567
+ return list(self.__label.keys())
568
+
569
+ @property
570
+ def aliases(self) -> Dict[str, List[Label]]:
571
+ """Alternative labels of the concept."""
572
+ return self.__aliases
573
+
574
+ @property
575
+ def alias_languages(self) -> List[str]:
576
+ """All available languages for a aliases."""
577
+ return list(self.__aliases.keys())
578
+
579
+ @property
580
+ def description(self) -> Dict[str, Description]:
581
+ """Description of the thing (optional)."""
582
+ return self.__description
583
+
584
+ @description.setter
585
+ def description(self, description: Dict[str, Description]):
586
+ self.__description = description
587
+
588
+ @property
589
+ def description_languages(self) -> List[str]:
590
+ """All available languages for a description."""
591
+ return list(self.__description.keys())
592
+
593
+ def add_label(self, label: str, language_code: str):
594
+ """Adding a label for entity.
595
+
596
+ Parameters
597
+ ----------
598
+ label: str
599
+ Label
600
+ language_code: str
601
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., 'en_US'.
602
+ """
603
+ self.__label[language_code] = Label(label, LocaleCode(language_code), True)
604
+
605
+ def label_lang(self, language_code: str) -> Optional[Label]:
606
+ """
607
+ Get label for language_code code.
608
+
609
+ Parameters
610
+ ----------
611
+ language_code: LanguageCode
612
+ Requested language_code code
613
+ Returns
614
+ -------
615
+ label: Optional[Label]
616
+ Returns the label for a specific language code
617
+ """
618
+ return self.label.get(language_code)
619
+
620
+ def add_description(self, description: str, language_code: str):
621
+ """Adding a description for entity.
622
+
623
+ Parameters
624
+ ----------
625
+ description: str
626
+ Description
627
+ language_code: str
628
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., 'en_US'.
629
+ """
630
+ self.__description[language_code] = Description(
631
+ description=description, language_code=LocaleCode(language_code)
632
+ )
633
+
634
+ def description_lang(self, language_code: str) -> Optional[Description]:
635
+ """
636
+ Get description for entity.
637
+
638
+ Parameters
639
+ ----------
640
+ language_code: LanguageCode
641
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., 'en_US'.
642
+ Returns
643
+ -------
644
+ label: LocalizedContent
645
+ Returns the label for a specific language_code code
646
+ """
647
+ return self.description.get(language_code)
648
+
649
+ def alias_lang(self, language_code: str) -> List[Label]:
650
+ """
651
+ Get alias for language_code code.
652
+
653
+ Parameters
654
+ ----------
655
+ language_code: str
656
+ Requested language_code code
657
+ Returns
658
+ -------
659
+ aliases: List[Label]
660
+ Returns a list of aliases for a specific language code
661
+ """
662
+ return self.aliases.get(language_code)
663
+
664
+ def add_alias(self, alias: str, language_code: str):
665
+ """Adding an alias for entity.
666
+
667
+ Parameters
668
+ ----------
669
+ alias: str
670
+ Alias
671
+ language_code: str
672
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., 'en_US'.
673
+ """
674
+ if language_code not in self.__aliases:
675
+ self.aliases[language_code] = []
676
+ self.__aliases[language_code].append(Label(alias, LocaleCode(language_code), False))
677
+
678
+ def image(self, dpi: int = 500) -> Optional[str]:
679
+ """
680
+ Generate URL for image from Wikimedia.
681
+
682
+ Parameters
683
+ ----------
684
+ dpi: int
685
+ DPI value. Range: [50-1000]
686
+
687
+ Returns
688
+ -------
689
+ wikimedia_url: str
690
+ URL for Wikimedia
691
+ """
692
+ if not (50 <= dpi <= 1000):
693
+ raise ValueError(f"DPI should bei with range of [50-1000]. Value:={dpi}")
694
+ claim: Optional[Claim] = self.claims.get(IMAGE_PROPERTY)
695
+ if claim and len(claim.literals) > 0:
696
+ img = claim.literals[0]["value"]
697
+ if isinstance(img, dict) and "image_url" in img:
698
+ return img["image_url"]
699
+ extension: str = ""
700
+ conversion: str = ""
701
+ fixed_img: str = img.replace(" ", "_")
702
+ if fixed_img.lower().endswith("svg"):
703
+ extension: str = ".png"
704
+ if fixed_img.lower().endswith("tif") or fixed_img.lower().endswith("tiff"):
705
+ extension: str = ".jpg"
706
+ conversion: str = "lossy-page1-"
707
+ hash_img: str = hashlib.md5(fixed_img.encode("utf-8")).hexdigest()
708
+ url_img_part: str = urllib.parse.quote_plus(fixed_img)
709
+ return (
710
+ f"https://upload.wikimedia.org/wikipedia/commons/thumb/"
711
+ f"{hash_img[0]}/{hash_img[:2]}/{url_img_part}/{dpi}px-{conversion + url_img_part + extension}"
712
+ )
713
+ return None
714
+
715
+ @property
716
+ def instance_of(self) -> List[WikidataClass]:
717
+ """Instance of."""
718
+ claim: Optional[Claim] = self.claims.get(INSTANCE_OF_PROPERTY)
719
+ if claim:
720
+ return [WikidataClass(li["value"].get("id")) for li in claim.literals if "value" in li]
721
+ return []
722
+
723
+ @property
724
+ def sitelinks(self) -> Dict[str, SiteLinks]:
725
+ """Different sitelinks assigned to entity."""
726
+ return self.__sitelinks
727
+
728
+ def __dict__(self):
729
+ return {
730
+ QID_TAG: self.qid,
731
+ REVISION_TAG: self.revision,
732
+ MODIFIED_TAG: self.modified.isoformat(),
733
+ LABELS_TAG: {lang: la.__dict__() for lang, la in self.label.items()},
734
+ DESCRIPTIONS_TAG: {lang: la.__dict__() for lang, la in self.description.items()},
735
+ ALIASES_TAG: {lang: [a.__dict__() for a in al] for lang, al in self.aliases.items()},
736
+ CLAIMS_TAG: {pid: cl.__dict__() for pid, cl in self.claims.items()},
737
+ ONTOLOGY_TYPES_TAG: self.ontology_types,
738
+ SITELINKS_TAG: {source: site.__dict__() for source, site in self.sitelinks.items()},
739
+ }
740
+
741
+ @classmethod
742
+ def create_from_dict(cls, entity_dict: Dict[str, Any]) -> "WikidataThing":
743
+ """
744
+ Create WikidataThing from dict.
745
+
746
+ Parameters
747
+ ----------
748
+ entity_dict: Dict[str, Any]
749
+ dictionary with WikidataThing information.
750
+
751
+ Returns
752
+ -------
753
+ thing: WikidataThing
754
+ Instance of WikidataThing
755
+ """
756
+ labels: Dict[str, Label] = {}
757
+ aliases: Dict[str, List[Label]] = {}
758
+ descriptions: Dict[str, Description] = {}
759
+ for language, la in entity_dict[LABELS_TAG].items():
760
+ labels[language] = Label.create_from_dict(la)
761
+ for language, de in entity_dict[DESCRIPTIONS_TAG].items():
762
+ descriptions[language] = Description.create_from_dict(de)
763
+ for language, al in entity_dict[ALIASES_TAG].items():
764
+ aliases[language] = []
765
+ for a in al:
766
+ aliases[language].append(Label.create_from_dict(a))
767
+ # Initiate the wikidata thing
768
+ thing: WikidataThing = WikidataThing(
769
+ qid=entity_dict[QID_TAG],
770
+ revision=entity_dict[REVISION_TAG],
771
+ modified=parse_date(entity_dict[MODIFIED_TAG]),
772
+ label=labels,
773
+ aliases=aliases,
774
+ description=descriptions,
775
+ )
776
+ # Load the ontology types
777
+ thing.ontology_types = entity_dict.get(ONTOLOGY_TYPES_TAG, [])
778
+ # Load the claims
779
+ for pid, claim in entity_dict[CLAIMS_TAG].items():
780
+ thing.claims[pid] = Claim.create_from_dict(claim)
781
+ # Load the sitelinks
782
+ for wiki_source, site_link in entity_dict[SITELINKS_TAG].items():
783
+ thing.sitelinks[wiki_source] = SiteLinks.create_from_dict(site_link)
784
+ return thing
785
+
786
+ @staticmethod
787
+ def from_wikidata(entity_dict: Dict[str, Any], supported_languages: Optional[List[str]] = None) -> "WikidataThing":
788
+ """
789
+ Create WikidataThing from Wikidata JSON response.
790
+ Parameters
791
+ ----------
792
+ entity_dict: Dict[str, Any]
793
+ dictionary with WikidataThing information.
794
+ supported_languages: Optional[List[str]]
795
+ List of supported languages. If None, all languages are supported.
796
+
797
+ Returns
798
+ -------
799
+ thing: WikidataThing
800
+ Instance of WikidataThing.
801
+ """
802
+ labels: Dict[str, Label] = {}
803
+ aliases: Dict[str, List[Label]] = {}
804
+ descriptions: Dict[str, Description] = {}
805
+ if LABELS_TAG in entity_dict:
806
+ # Extract the labels
807
+ for label in entity_dict[LABELS_TAG].values():
808
+ if supported_languages is None or label[WIKIDATA_LANGUAGE_TAG] in supported_languages:
809
+ la_content: str = label[LABEL_VALUE_TAG]
810
+ la_lang: LanguageCode = LanguageCode(label[WIKIDATA_LANGUAGE_TAG])
811
+ if la_lang in LANGUAGE_LOCALE_MAPPING:
812
+ la: Label = Label(content=la_content, language_code=LANGUAGE_LOCALE_MAPPING[la_lang], main=True)
813
+ labels[la.language_code] = la
814
+ else:
815
+ labels["en_US"] = Label("No Label", EN_US)
816
+ if ALIASES_TAG in entity_dict:
817
+ # Extract the aliases
818
+ for alias in entity_dict[ALIASES_TAG].values():
819
+ if supported_languages is None or alias[WIKIDATA_LANGUAGE_TAG] in supported_languages:
820
+ for a in alias:
821
+ la_content: str = a[LABEL_VALUE_TAG]
822
+ la_lang: LanguageCode = LanguageCode(a[WIKIDATA_LANGUAGE_TAG])
823
+ if la_lang in LANGUAGE_LOCALE_MAPPING:
824
+ la: Label = Label(
825
+ content=la_content, language_code=LANGUAGE_LOCALE_MAPPING[la_lang], main=False
826
+ )
827
+ if la.language_code not in aliases:
828
+ aliases[la.language_code] = []
829
+ aliases[la.language_code].append(la)
830
+ if DESCRIPTIONS_TAG in entity_dict:
831
+ # Extracting the descriptions
832
+ for desc in entity_dict[DESCRIPTIONS_TAG].values():
833
+ if supported_languages is None or desc[WIKIDATA_LANGUAGE_TAG] in supported_languages:
834
+ desc_content: str = desc[LABEL_VALUE_TAG]
835
+ desc_lang: LanguageCode = LanguageCode(desc[WIKIDATA_LANGUAGE_TAG])
836
+ if desc_lang in LANGUAGE_LOCALE_MAPPING:
837
+ de: Description = Description(
838
+ description=desc_content, language_code=LANGUAGE_LOCALE_MAPPING[desc_lang]
839
+ )
840
+ descriptions[de.language_code] = de
841
+ # Initiate the wikidata thing
842
+ thing: WikidataThing = WikidataThing(
843
+ qid=entity_dict[ID_TAG],
844
+ revision=entity_dict[LAST_REVID_TAG],
845
+ modified=parse_date(entity_dict[MODIFIED_TAG]),
846
+ label=labels,
847
+ aliases=aliases,
848
+ description=descriptions,
849
+ )
850
+
851
+ # Iterate over the claims
852
+ for pid, claim_group in entity_dict[CLAIMS_TAG].items():
853
+ literal: List[Dict[str, Any]] = []
854
+ qualifiers: List[Dict[str, Any]] = []
855
+ for claim in claim_group:
856
+ try:
857
+ snak_type: str = claim["mainsnak"]["snaktype"]
858
+ if snak_type == "value":
859
+ data_value: Dict[str, Any] = claim["mainsnak"]["datavalue"]
860
+ data_type: str = claim["mainsnak"]["datatype"]
861
+ val: Dict[str, Any] = {}
862
+ if data_type == "monolingualtext":
863
+ val = data_value["value"]
864
+ elif data_type in {"string", "external-id", "url"}:
865
+ val = data_value["value"]
866
+ elif data_type == "commonsMedia":
867
+ val = {"image_url": image_url(data_value["value"])}
868
+ elif data_type == "time":
869
+ val = wikidate(data_value["value"])
870
+ elif data_type == "quantity":
871
+ if "amount" in data_value["value"]:
872
+ val = {"amount": data_value["value"]["amount"], "unit": data_value["value"]["unit"]}
873
+ elif data_type == "wikibase-lexeme":
874
+ val = {"id": data_value["value"]["id"]}
875
+ elif data_type in {"geo-shape", "wikibase-property"}:
876
+ # Not supported
877
+ val = data_value["value"]
878
+ elif data_type in {"globe-coordinate", "globecoordinate"}:
879
+ val = {
880
+ "longitude": data_value["value"].get("longitude"),
881
+ "latitude": data_value["value"].get("latitude"),
882
+ "altitude": data_value["value"].get("altitude"),
883
+ "globe": data_value["value"].get("globe"),
884
+ "precision": data_value["value"].get("precision"),
885
+ }
886
+ elif data_type in {"wikibase-entityid", "wikibase-item"}:
887
+ val = {"id": data_value["value"]["id"]}
888
+ elif data_type == "math":
889
+ val = {"math": data_value["value"]}
890
+ elif data_type == "tabular-data":
891
+ val = {"tabular": data_value["value"]}
892
+ elif data_type == "entity-schema":
893
+ val = {"id": data_value["value"]["id"]}
894
+ elif data_type == "wikibase-form":
895
+ continue
896
+ else:
897
+ raise WikiDataAPIException(f"Data type: {data_type} not supported.")
898
+ literal.append({"type": data_type, "value": val})
899
+
900
+ if "qualifiers" in claim:
901
+ for p, qual in claim["qualifiers"].items():
902
+ for elem in qual:
903
+ if "datavalue" in elem:
904
+ qualifiers.append(
905
+ {
906
+ "property": p,
907
+ "datatype": elem["datavalue"]["type"],
908
+ "value": elem["datavalue"]["value"],
909
+ }
910
+ )
911
+ except Exception as e:
912
+ logger.exception(e)
913
+ thing.add_claim(pid, Claim(WikidataProperty(pid), literal, qualifiers))
914
+ # Extract sitelinks
915
+ if SITELINKS_TAG in entity_dict:
916
+ for source, sitelink in entity_dict[SITELINKS_TAG].items():
917
+ try:
918
+ start_idx = source.find("wiki")
919
+ language_code: str = source[:start_idx]
920
+ wiki_source: str = source[start_idx:]
921
+ url: Optional[str] = sitelink.get("url")
922
+ title: Optional[str] = sitelink.get("title")
923
+ if wiki_source not in thing.sitelinks:
924
+ thing.sitelinks[wiki_source] = SiteLinks(source=wiki_source)
925
+ if url and language_code not in thing.sitelinks[wiki_source].urls:
926
+ thing.sitelinks[wiki_source].urls[language_code] = requests.utils.unquote(url)
927
+ if title and language_code not in thing.sitelinks[wiki_source].titles:
928
+ thing.sitelinks[wiki_source].titles[language_code] = title
929
+ except Exception as e:
930
+ logger.warning(f"Unexpected source: {source}. Exception: {e}")
931
+ return thing
932
+
933
+ @property
934
+ def claims(self) -> Dict[str, Claim]:
935
+ """Returns the claims."""
936
+ return self.__claims
937
+
938
+ @property
939
+ def claims_dict(self) -> Dict[str, Claim]:
940
+ """Returns the claims as a dictionary."""
941
+ return dict(list(self.__claims.items()))
942
+
943
+ @property
944
+ def claim_properties(self) -> List[WikidataProperty]:
945
+ """Returns the list of properties of the claims."""
946
+ return [p.pid for p in self.__claims.values()]
947
+
948
+ def add_claim(self, pid: str, claim: Claim):
949
+ """
950
+ Adding a claim.
951
+
952
+ Parameters
953
+ ----------
954
+ pid: str
955
+ Property ID.
956
+ claim: Claim
957
+ Wikidata claim
958
+ """
959
+ self.__claims[pid] = claim
960
+
961
+ def __hash__(self):
962
+ return 0
963
+
964
+ def __eq__(self, other):
965
+ # another object is equal to self, iff
966
+ # it is an instance of MyClass
967
+ return isinstance(other, WikidataThing) and other.qid == self.qid
968
+
969
+ def __repr__(self):
970
+ return f"<WikidataThing [QID:={self.qid}]>"
971
+
972
+ def __getstate__(self) -> Dict[str, Any]:
973
+ return self.__dict__().copy()
974
+
975
+ def __setstate__(self, state: Dict[str, Any]):
976
+ labels: Dict[str, Label] = {}
977
+ aliases: Dict[str, List[Label]] = {}
978
+ descriptions: Dict[str, Description] = {}
979
+ for language, la in state[LABELS_TAG].items():
980
+ labels[language] = Label.create_from_dict(la)
981
+ for language, de in state[DESCRIPTIONS_TAG].items():
982
+ descriptions[language] = Description.create_from_dict(de)
983
+ for language, al in state[ALIASES_TAG].items():
984
+ aliases[language] = []
985
+ for a in al:
986
+ aliases[language].append(Label.create_from_dict(a))
987
+ # Initiate the wikidata thing
988
+ self.__qid = state[QID_TAG]
989
+ self.__revision = state.get(REVISION_TAG)
990
+ self.__modified = parse_date(state[MODIFIED_TAG]) if MODIFIED_TAG in state else None
991
+ self.__label = labels
992
+ self.__aliases = aliases
993
+ self.__description = descriptions
994
+ # Load the ontology types
995
+ self.__ontology_types = state.get(ONTOLOGY_TYPES_TAG, [])
996
+ # Load the claims
997
+ self.__claims = {}
998
+ for pid, claim in state[CLAIMS_TAG].items():
999
+ self.__claims[pid] = Claim.create_from_dict(claim)
1000
+ # Load the sitelinks
1001
+ self.__sitelinks = {}
1002
+ for wiki_source, site_link in state[SITELINKS_TAG].items():
1003
+ self.__sitelinks[wiki_source] = SiteLinks.create_from_dict(site_link)
1004
+
1005
+
1006
+ class WikiDataAPIClient(ABC):
1007
+ """
1008
+ WikiDataAPIClient
1009
+ -----------------
1010
+ Utility class for the WikiData.
1011
+
1012
+ """
1013
+
1014
+ def __init__(self):
1015
+ pass
1016
+
1017
+ @staticmethod
1018
+ def sparql_query(query_string: str, wikidata_sparql_url: str = WIKIDATA_SPARQL_URL, max_retries: int = 3) -> dict:
1019
+ """Send a SPARQL query and return the JSON formatted result.
1020
+
1021
+ Parameters
1022
+ -----------
1023
+ query_string: str
1024
+ SPARQL query string
1025
+ wikidata_sparql_url: str
1026
+ Wikidata SPARQL endpoint to use
1027
+ max_retries: int
1028
+ Maximum number of retries
1029
+ """
1030
+ # Define the retry policy
1031
+ retry_policy: Retry = Retry(
1032
+ total=max_retries, # maximum number of retries
1033
+ backoff_factor=1, # factor by which to multiply the delay between retries
1034
+ status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
1035
+ respect_retry_after_header=True, # respect the Retry-After header
1036
+ )
1037
+
1038
+ # Create a session and mount the retry adapter
1039
+ with requests.Session() as session:
1040
+ retry_adapter = HTTPAdapter(max_retries=retry_policy)
1041
+ session.mount("https://", retry_adapter)
1042
+
1043
+ # Make a request using the session
1044
+ response: Response = session.get(
1045
+ wikidata_sparql_url, params={"query": query_string, "format": "json"}, timeout=10000
1046
+ )
1047
+ if response.ok:
1048
+ return response.json()
1049
+
1050
+ raise WikiDataAPIException(
1051
+ f"Failed to query entities. " f"Response code:={response.status_code}, Exception:= {response.content}."
1052
+ )
1053
+
1054
+ @staticmethod
1055
+ def superclasses(qid: str) -> Dict[str, WikidataClass]:
1056
+ """
1057
+ Returns the Wikidata class with all its superclasses for the given QID.
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ qid: str
1062
+ Wikidata QID (e.g., 'Q146' for house cat).
1063
+
1064
+ Returns
1065
+ -------
1066
+ classes: Dict[str, WikidataClass]
1067
+ A dictionary of WikidataClass objects, where the keys are QIDs and the values are the corresponding
1068
+ """
1069
+ # Fetch superclasses
1070
+ query = f"""
1071
+ SELECT DISTINCT ?class ?classLabel ?superclass ?superclassLabel
1072
+ WHERE
1073
+ {{
1074
+ wd:{qid} wdt:P279* ?class.
1075
+ ?class wdt:P279 ?superclass.
1076
+ SERVICE wikibase:label {{bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
1077
+ }}
1078
+ """
1079
+ try:
1080
+ reply: Dict[str, Any] = WikiDataAPIClient.sparql_query(query)
1081
+ wikidata_classes: Dict[str, WikidataClass] = {}
1082
+ cycle_detector: Set[Tuple[str, str]] = set()
1083
+ adjacency_list: Dict[str, Set[str]] = {}
1084
+
1085
+ if "results" in reply:
1086
+ for b in reply["results"]["bindings"]:
1087
+ superclass_qid = b["superclass"]["value"].rsplit("/", 1)[-1]
1088
+ class_qid = b["class"]["value"].rsplit("/", 1)[-1]
1089
+ superclass_label = b["superclassLabel"]["value"]
1090
+ class_label = b["classLabel"]["value"]
1091
+
1092
+ wikidata_classes.setdefault(class_qid, WikidataClass(class_qid, class_label))
1093
+ wikidata_classes.setdefault(superclass_qid, WikidataClass(superclass_qid, superclass_label))
1094
+
1095
+ adjacency_list.setdefault(class_qid, set()).add(superclass_qid)
1096
+ except Exception as e:
1097
+ logger.exception(e)
1098
+ return {qid: WikidataClass(qid, f"Class {qid}")}
1099
+ queue = deque([qid])
1100
+ visited = set()
1101
+
1102
+ while queue:
1103
+ current_qid = queue.popleft()
1104
+ if current_qid in visited:
1105
+ continue
1106
+ visited.add(current_qid)
1107
+
1108
+ if current_qid in adjacency_list:
1109
+ for superclass_qid in adjacency_list[current_qid]:
1110
+ if (current_qid, superclass_qid) not in cycle_detector:
1111
+ wikidata_classes[current_qid].superclasses.append(wikidata_classes[superclass_qid])
1112
+ queue.append(superclass_qid)
1113
+ cycle_detector.add((current_qid, superclass_qid))
1114
+
1115
+ return wikidata_classes
1116
+
1117
+ @staticmethod
1118
+ def subclasses(qid: str) -> Dict[str, WikidataClass]:
1119
+ """
1120
+ Returns the Wikidata class with all its subclasses for the given QID.
1121
+
1122
+ Parameters
1123
+ ----------
1124
+ qid: str
1125
+ Wikidata QID (e.g., 'Q146' for house cat).
1126
+
1127
+ Returns
1128
+ -------
1129
+ classes: Dict[str, WikidataClass]
1130
+ A dictionary of WikidataClass objects, where the keys are QIDs and the values are the corresponding
1131
+ classes with their subclasses populated.
1132
+ """
1133
+ # Fetch subclasses
1134
+ query = f"""
1135
+ SELECT DISTINCT ?class ?classLabel ?subclass ?subclassLabel
1136
+ WHERE
1137
+ {{
1138
+ ?subclass wdt:P279 wd:{qid}.
1139
+ ?subclass wdt:P279 ?class.
1140
+ SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
1141
+ }}
1142
+ LIMIT 1000
1143
+ """
1144
+ try:
1145
+ reply: Dict[str, Any] = WikiDataAPIClient.sparql_query(query)
1146
+ wikidata_classes: Dict[str, WikidataClass] = {}
1147
+ cycle_detector: Set[Tuple[str, str]] = set()
1148
+ adjacency_list: Dict[str, Set[str]] = {}
1149
+
1150
+ if "results" in reply:
1151
+ for b in reply["results"]["bindings"]:
1152
+ subclass_qid = b["subclass"]["value"].rsplit("/", 1)[-1]
1153
+ class_qid = b["class"]["value"].rsplit("/", 1)[-1]
1154
+ subclass_label = b["subclassLabel"]["value"]
1155
+ class_label = b["classLabel"]["value"]
1156
+
1157
+ wikidata_classes.setdefault(class_qid, WikidataClass(class_qid, class_label))
1158
+ wikidata_classes.setdefault(subclass_qid, WikidataClass(subclass_qid, subclass_label))
1159
+
1160
+ # subclass -> class relationship (reverse of superclass logic)
1161
+ adjacency_list.setdefault(class_qid, set()).add(subclass_qid)
1162
+ except Exception as e:
1163
+ logger.exception(e)
1164
+ return {qid: WikidataClass(qid, f"Class {qid}")}
1165
+
1166
+ queue = deque([qid])
1167
+ visited = set()
1168
+
1169
+ while queue:
1170
+ current_qid = queue.popleft()
1171
+ if current_qid in visited:
1172
+ continue
1173
+ visited.add(current_qid)
1174
+
1175
+ # Ensure the starting QID is in the dictionary
1176
+ if current_qid not in wikidata_classes:
1177
+ # If not present, we might need to fetch its label separately
1178
+ wikidata_classes[current_qid] = WikidataClass(current_qid, f"Class {current_qid}")
1179
+
1180
+ if current_qid in adjacency_list:
1181
+ for subclass_qid in adjacency_list[current_qid]:
1182
+ if (current_qid, subclass_qid) not in cycle_detector:
1183
+ wikidata_classes[current_qid].subclasses.append(wikidata_classes[subclass_qid])
1184
+ queue.append(subclass_qid)
1185
+ cycle_detector.add((current_qid, subclass_qid))
1186
+
1187
+ return wikidata_classes
1188
+
1189
+ @staticmethod
1190
+ def search_term(
1191
+ search_term: str, language: LanguageCode, url: str = WIKIDATA_SEARCH_URL
1192
+ ) -> List[WikidataSearchResult]:
1193
+ """
1194
+ Search for a term in the WikiData.
1195
+ Parameters
1196
+ ----------
1197
+ search_term: str
1198
+ The term to search for.
1199
+ language: str
1200
+ The language to search in.
1201
+ url: str
1202
+ The URL of the WikiData search API.
1203
+
1204
+ Returns
1205
+ -------
1206
+ search_results_dict: List[WikidataSearchResult]
1207
+ The search results.
1208
+ """
1209
+ search_results_dict: List[WikidataSearchResult] = []
1210
+ # Define the retry policy
1211
+ retry_policy: Retry = Retry(
1212
+ total=3, # maximum number of retries
1213
+ backoff_factor=1, # factor by which to multiply the delay between retries
1214
+ status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
1215
+ respect_retry_after_header=True, # respect the Retry-After header
1216
+ )
1217
+
1218
+ # Create a session and mount the retry adapter
1219
+ with requests.Session() as session:
1220
+ retry_adapter = HTTPAdapter(max_retries=retry_policy)
1221
+ session.mount("https://", retry_adapter)
1222
+ params: Dict[str, str] = {
1223
+ "action": "wbsearchentities",
1224
+ "format": "json",
1225
+ "language": language,
1226
+ "search": search_term,
1227
+ }
1228
+ # Make a request using the session
1229
+ response: Response = session.get(url, params=params, timeout=200000)
1230
+
1231
+ # Check the response status code
1232
+ if not response.ok:
1233
+ raise WikiDataAPIException(
1234
+ f"Search request failed with status code : {response.status_code}. " f"URL:= {url}"
1235
+ )
1236
+ search_result_dict_full: Dict[str, Any] = response.json()
1237
+ for search_result_dict in search_result_dict_full["search"]:
1238
+ search_results_dict.append(WikidataSearchResult.from_dict(search_result_dict))
1239
+ return search_results_dict
1240
+
1241
+ @staticmethod
1242
+ def __wikidata_task__(qid: str) -> WikidataThing:
1243
+ """Retrieve a single Wikidata thing.
1244
+
1245
+ Parameters
1246
+ ----------
1247
+ qid: str
1248
+ QID of the entity.
1249
+
1250
+ Returns
1251
+ -------
1252
+ instance: WikidataThing
1253
+ Single wikidata thing
1254
+ """
1255
+ try:
1256
+ return WikidataThing.from_wikidata(__waiting_request__(qid))
1257
+ except Exception as e:
1258
+ logger.exception(e)
1259
+ raise WikiDataAPIException(e) from e
1260
+
1261
+ @staticmethod
1262
+ def __wikidata_multiple_task__(qids: List[str]) -> List[WikidataThing]:
1263
+ """Retrieve multiple Wikidata things.
1264
+
1265
+ Parameters
1266
+ ----------
1267
+ qids: List[str]
1268
+ QIDs of the entities.
1269
+
1270
+ Returns
1271
+ -------
1272
+ instances: List[WikidataThing]
1273
+ List of wikidata things
1274
+ """
1275
+ try:
1276
+ return [WikidataThing.from_wikidata(e) for e in __waiting_multi_request__(qids)]
1277
+ except Exception as e:
1278
+ logger.exception(e)
1279
+ raise WikiDataAPIException(e) from e
1280
+
1281
+ @staticmethod
1282
+ def retrieve_entity(qid: str) -> WikidataThing:
1283
+ """
1284
+ Retrieve a single Wikidata thing.
1285
+
1286
+ Parameters
1287
+ ----------
1288
+ qid: str
1289
+ QID of the entity.
1290
+
1291
+ Returns
1292
+ -------
1293
+ instance: WikidataThing
1294
+ Single wikidata thing
1295
+ """
1296
+ return WikiDataAPIClient.__wikidata_task__(qid)
1297
+
1298
+ @staticmethod
1299
+ def retrieve_entities(qids: Union[List[str], Set[str]]) -> List[WikidataThing]:
1300
+ """
1301
+ Retrieve multiple Wikidata things.
1302
+ Parameters
1303
+ ----------
1304
+ qids: List[str]
1305
+ QIDs of the entities.
1306
+
1307
+ Returns
1308
+ -------
1309
+ instances: List[WikidataThing]
1310
+ List of wikidata things.
1311
+ """
1312
+ pulled: List[WikidataThing] = []
1313
+ if len(qids) == 0:
1314
+ return []
1315
+ jobs: List[List[str]] = list(chunks(list(qids), API_LIMIT))
1316
+ num_processes: int = min(len(jobs), multiprocessing.cpu_count())
1317
+ if num_processes > 1:
1318
+ with Pool(processes=num_processes) as pool:
1319
+ # Wikidata thing is not support in multiprocessing
1320
+ for lst in pool.imap_unordered(__waiting_multi_request__, jobs):
1321
+ pulled.extend([WikidataThing.from_wikidata(e) for e in lst])
1322
+ else:
1323
+ pulled = WikiDataAPIClient.__wikidata_multiple_task__(jobs[0])
1324
+ return pulled