personal_knowledge_library 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

Files changed (42) hide show
  1. knowledge/__init__.py +91 -0
  2. knowledge/base/__init__.py +22 -0
  3. knowledge/base/access.py +167 -0
  4. knowledge/base/entity.py +267 -0
  5. knowledge/base/language.py +27 -0
  6. knowledge/base/ontology.py +2734 -0
  7. knowledge/base/search.py +473 -0
  8. knowledge/base/tenant.py +192 -0
  9. knowledge/nel/__init__.py +11 -0
  10. knowledge/nel/base.py +495 -0
  11. knowledge/nel/engine.py +123 -0
  12. knowledge/ontomapping/__init__.py +667 -0
  13. knowledge/ontomapping/manager.py +320 -0
  14. knowledge/public/__init__.py +27 -0
  15. knowledge/public/cache.py +115 -0
  16. knowledge/public/helper.py +373 -0
  17. knowledge/public/relations.py +128 -0
  18. knowledge/public/wikidata.py +1324 -0
  19. knowledge/services/__init__.py +128 -0
  20. knowledge/services/asyncio/__init__.py +7 -0
  21. knowledge/services/asyncio/base.py +458 -0
  22. knowledge/services/asyncio/graph.py +1420 -0
  23. knowledge/services/asyncio/group.py +450 -0
  24. knowledge/services/asyncio/search.py +439 -0
  25. knowledge/services/asyncio/users.py +270 -0
  26. knowledge/services/base.py +533 -0
  27. knowledge/services/graph.py +1897 -0
  28. knowledge/services/group.py +819 -0
  29. knowledge/services/helper.py +142 -0
  30. knowledge/services/ontology.py +1234 -0
  31. knowledge/services/search.py +488 -0
  32. knowledge/services/session.py +444 -0
  33. knowledge/services/tenant.py +281 -0
  34. knowledge/services/users.py +445 -0
  35. knowledge/utils/__init__.py +10 -0
  36. knowledge/utils/graph.py +417 -0
  37. knowledge/utils/wikidata.py +197 -0
  38. knowledge/utils/wikipedia.py +175 -0
  39. personal_knowledge_library-3.0.0.dist-info/LICENSE +201 -0
  40. personal_knowledge_library-3.0.0.dist-info/METADATA +1163 -0
  41. personal_knowledge_library-3.0.0.dist-info/RECORD +42 -0
  42. personal_knowledge_library-3.0.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,488 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2024-present Wacom. All rights reserved.
3
+ from typing import Dict, Any, Optional, List
4
+
5
+ import requests
6
+ from requests.adapters import HTTPAdapter
7
+ from urllib3 import Retry
8
+
9
+ from knowledge.base.language import LocaleCode
10
+ from knowledge.base.search import DocumentSearchResponse, LabelMatchingResponse, VectorDBDocument
11
+ from knowledge.services import (
12
+ DEFAULT_TIMEOUT,
13
+ AUTHORIZATION_HEADER_FLAG,
14
+ APPLICATION_JSON_HEADER,
15
+ CONTENT_TYPE_HEADER_FLAG,
16
+ USER_AGENT_HEADER_FLAG,
17
+ STATUS_FORCE_LIST,
18
+ )
19
+ from knowledge.services.base import WacomServiceAPIClient, handle_error
20
+
21
+
22
+ class SemanticSearchClient(WacomServiceAPIClient):
23
+ """
24
+ Semantic Search Client
25
+ ======================
26
+ Client for searching semantically similar documents and labels.
27
+
28
+ Parameters
29
+ ----------
30
+ service_url: str
31
+ Service URL for the client.
32
+ service_endpoint: str (Default:= 'vector/v1')
33
+ Service endpoint for the client.
34
+ """
35
+
36
+ def __init__(self, service_url: str, service_endpoint: str = "vector/api/v1"):
37
+ super().__init__("Async Semantic Search ", service_url, service_endpoint)
38
+
39
+ def retrieve_documents_chunks(
40
+ self,
41
+ locale: LocaleCode,
42
+ uri: str,
43
+ max_retries: int = 3,
44
+ backoff_factor: float = 0.1,
45
+ auth_key: Optional[str] = None,
46
+ ) -> List[VectorDBDocument]:
47
+ """
48
+ Retrieve document chunks from vector database. The service is automatically chunking the document into
49
+ smaller parts. The chunks are returned as a list of dictionaries, with metadata and content.
50
+
51
+ Parameters
52
+ ----------
53
+ locale: LocaleCode
54
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
55
+ uri: str
56
+ URI of the document
57
+ max_retries: int
58
+ Maximum number of retries
59
+ backoff_factor: float
60
+ A backoff factor to apply between attempts after the second try.
61
+ auth_key: Optional[str] (Default:= None)
62
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
63
+
64
+ Returns
65
+ -------
66
+ document: List[VectorDBDocument]:
67
+ List of document chunks with metadata and content related to the document.
68
+
69
+ Raises
70
+ ------
71
+ WacomServiceException
72
+ If the request fails.
73
+ """
74
+ if auth_key is None:
75
+ auth_key, _ = self.handle_token()
76
+ url: str = f"{self.service_base_url}documents/"
77
+
78
+ headers: Dict[str, str] = {
79
+ USER_AGENT_HEADER_FLAG: self.user_agent,
80
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
81
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
82
+ }
83
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
84
+ with requests.Session() as session:
85
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
86
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
87
+ response = session.get(url, params={"locale": locale, "uri": uri}, headers=headers, timeout=DEFAULT_TIMEOUT)
88
+ if response.ok:
89
+ return [VectorDBDocument(elem) for elem in response.json()]
90
+ raise handle_error(
91
+ "Failed to retrieve the document.", response, headers=headers, parameters={"locale": locale, "uri": uri}
92
+ )
93
+
94
+ def retrieve_labels(
95
+ self,
96
+ locale: LocaleCode,
97
+ uri: str,
98
+ max_retries: int = 3,
99
+ backoff_factor: float = 0.1,
100
+ auth_key: Optional[str] = None,
101
+ ) -> List[VectorDBDocument]:
102
+ """
103
+ Retrieve labels from vector database.
104
+
105
+ Parameters
106
+ ----------
107
+ locale: LocaleCode
108
+ Locale
109
+ uri: str
110
+ URI of the document
111
+ max_retries: int
112
+ Maximum number of retries
113
+ backoff_factor: float
114
+ A backoff factor to apply between attempts after the second try.
115
+ auth_key: Optional[str] (Default:= None)
116
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
117
+
118
+ Returns
119
+ -------
120
+ document: List[VectorDBDocument]
121
+ List of labels with metadata and content related to the entity with uri.
122
+
123
+ Raises
124
+ ------
125
+ WacomServiceException
126
+ If the request fails.
127
+ """
128
+ if auth_key is None:
129
+ auth_key, _ = self.handle_token()
130
+ url: str = f"{self.service_base_url}labels/"
131
+
132
+ headers: Dict[str, str] = {
133
+ USER_AGENT_HEADER_FLAG: self.user_agent,
134
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
135
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
136
+ }
137
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
138
+ with requests.Session() as session:
139
+ session.mount(
140
+ mount_point,
141
+ HTTPAdapter(
142
+ max_retries=Retry(
143
+ total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST
144
+ )
145
+ ),
146
+ )
147
+ response = session.get(url, params={"uri": uri, "locale": locale}, headers=headers, timeout=DEFAULT_TIMEOUT)
148
+ if response.ok:
149
+ return [VectorDBDocument(elem) for elem in response.json()]
150
+ raise handle_error(
151
+ "Failed to retrieve the labels.", response, headers=headers, parameters={"locale": locale, "uri": uri}
152
+ )
153
+
154
+ def count_documents(
155
+ self,
156
+ locale: LocaleCode,
157
+ concept_type: Optional[str] = None,
158
+ auth_key: Optional[str] = None,
159
+ max_retries: int = 3,
160
+ backoff_factor: float = 0.1,
161
+ ) -> int:
162
+ """
163
+ Count all documents for a tenant.
164
+
165
+ Parameters
166
+ ----------
167
+ locale: LocaleCode
168
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
169
+ concept_type: Optional[str] (Default:= None)
170
+ Concept type.
171
+ auth_key: Optional[str] (Default:= None)
172
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
173
+ max_retries: int
174
+ Maximum number of retries
175
+ backoff_factor: float
176
+ A backoff factor to apply between attempts after the second try.
177
+
178
+ Returns
179
+ -------
180
+ number_of_docs: int
181
+ Number of documents.
182
+
183
+ Raises
184
+ ------
185
+ WacomServiceException
186
+ If the request fails.
187
+ """
188
+ if auth_key is None:
189
+ auth_key, _ = self.handle_token()
190
+ url: str = f"{self.service_base_url}documents/count/"
191
+ headers: Dict[str, str] = {
192
+ USER_AGENT_HEADER_FLAG: self.user_agent,
193
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
194
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
195
+ }
196
+ params: Dict[str, Any] = {"locale": locale}
197
+ if concept_type:
198
+ params["concept_type"] = concept_type
199
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
200
+ with requests.Session() as session:
201
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
202
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
203
+ response = session.get(url, params=params, headers=headers)
204
+ if response.ok:
205
+ return response.json().get("count", 0)
206
+ raise handle_error("Counting documents failed.", response, headers=headers, parameters={"locale": locale})
207
+
208
+ def count_documents_filter(
209
+ self,
210
+ locale: LocaleCode,
211
+ filters: Dict[str, Any],
212
+ auth_key: Optional[str] = None,
213
+ max_retries: int = 3,
214
+ backoff_factor: float = 0.1,
215
+ ) -> int:
216
+ """
217
+ Count all documents for a tenant with filters.
218
+
219
+ Parameters
220
+ ----------
221
+ locale: LocaleCode
222
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
223
+ filters: Dict[str, Any]
224
+ Filters for the search
225
+ auth_key: Optional[str] (Default:= None)
226
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
227
+ max_retries: int
228
+ Maximum number of retries
229
+ backoff_factor: float
230
+ A backoff factor to apply between attempts after the second try.
231
+
232
+ Returns
233
+ -------
234
+ number_of_docs: int
235
+ Number of documents.
236
+
237
+ Raises
238
+ ------
239
+ WacomServiceException
240
+ If the request fails.
241
+ """
242
+ if auth_key is None:
243
+ auth_key, _ = self.handle_token()
244
+ url: str = f"{self.service_base_url}documents/count/filter"
245
+ headers: Dict[str, str] = {
246
+ USER_AGENT_HEADER_FLAG: self.user_agent,
247
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
248
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
249
+ }
250
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
251
+ with requests.Session() as session:
252
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
253
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
254
+ response = session.post(url, json={"locale": locale, "filter": filters}, headers=headers)
255
+ if response.ok:
256
+ return response.json().get("count", 0)
257
+ raise handle_error(
258
+ "Counting documents failed.", response, headers=headers, parameters={"locale": locale, "filter": filters}
259
+ )
260
+
261
+ def count_labels(
262
+ self,
263
+ locale: LocaleCode,
264
+ concept_type: Optional[str] = None,
265
+ auth_key: Optional[str] = None,
266
+ max_retries: int = 3,
267
+ backoff_factor: float = 0.1,
268
+ ) -> int:
269
+ """
270
+ Count all labels entries for a tenant.
271
+
272
+ Parameters
273
+ ----------
274
+ locale: LocaleCode
275
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
276
+ concept_type: Optional[str] (Default:= None)
277
+ Concept type.
278
+ max_retries: int
279
+ Maximum number of retries
280
+ backoff_factor: float
281
+ A backoff factor to apply between attempts after the second try.
282
+ auth_key: Optional[str] (Default:= None)
283
+ If auth key is provided, it will be used for the request.
284
+ Returns
285
+ -------
286
+ count: int
287
+ Number of words.
288
+
289
+ Raises
290
+ ------
291
+ WacomServiceException
292
+ If the request fails.
293
+ """
294
+ if auth_key is None:
295
+ auth_key, _ = self.handle_token()
296
+ url: str = f"{self.service_base_url}labels/count/"
297
+ headers: Dict[str, str] = {
298
+ USER_AGENT_HEADER_FLAG: self.user_agent,
299
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
300
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
301
+ }
302
+ params: Dict[str, Any] = {"locale": locale}
303
+ if concept_type:
304
+ params["concept_type"] = concept_type
305
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
306
+ with requests.Session() as session:
307
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
308
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
309
+ response = session.get(url, params=params, headers=headers)
310
+ if response.ok:
311
+ return response.json().get("count", 0)
312
+ raise handle_error("Counting labels failed.", response, headers=headers, parameters={"locale": locale})
313
+
314
+ def count_labels_filter(
315
+ self,
316
+ locale: LocaleCode,
317
+ filters: Dict[str, Any],
318
+ auth_key: Optional[str] = None,
319
+ max_retries: int = 3,
320
+ backoff_factor: float = 0.1,
321
+ ) -> int:
322
+ """
323
+ Count all labels for a tenant with filters.
324
+
325
+ Parameters
326
+ ----------
327
+ locale: LocaleCode
328
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
329
+ filters: Dict[str, Any]
330
+ Filters for the search
331
+ auth_key: Optional[str] (Default:= None)
332
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
333
+ max_retries: int
334
+ Maximum number of retries
335
+ backoff_factor: float
336
+ A backoff factor to apply between attempts after the second try.
337
+
338
+ Returns
339
+ -------
340
+ number_of_docs: int
341
+ Number of labels.
342
+
343
+ Raises
344
+ ------
345
+ WacomServiceException
346
+ If the request fails.
347
+ """
348
+ if auth_key is None:
349
+ auth_key, _ = self.handle_token()
350
+ url: str = f"{self.service_base_url}labels/count/filter"
351
+ headers: Dict[str, str] = {
352
+ USER_AGENT_HEADER_FLAG: self.user_agent,
353
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
354
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
355
+ }
356
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
357
+ with requests.Session() as session:
358
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
359
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
360
+ response = session.post(url, json={"locale": locale, "filter": filters}, headers=headers)
361
+ if response.ok:
362
+ return response.json().get("count", 0)
363
+ raise handle_error(
364
+ "Counting labels failed.", response, headers=headers, parameters={"locale": locale, "filter": filters}
365
+ )
366
+
367
+ def document_search(
368
+ self,
369
+ query: str,
370
+ locale: LocaleCode,
371
+ filters: Optional[Dict[str, Any]] = None,
372
+ max_results: int = 10,
373
+ max_retries: int = 3,
374
+ backoff_factor: float = 0.1,
375
+ auth_key: Optional[str] = None,
376
+ ) -> DocumentSearchResponse:
377
+ """
378
+ Async Semantic search.
379
+
380
+ Parameters
381
+ ----------
382
+ query: str
383
+ Query text for the search
384
+ locale: LocaleCode
385
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
386
+ filters: Optional[Dict[str, Any]] = None
387
+ Filters for the search
388
+ max_results: int
389
+ Maximum number of results
390
+ max_retries: int
391
+ Maximum number of retries
392
+ backoff_factor: float
393
+ A backoff factor to apply between attempts after the second try.
394
+ auth_key: Optional[str] (Default:= None)
395
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
396
+
397
+ Returns
398
+ -------
399
+ search_results: DocumentSearchResponse
400
+ Search results response.
401
+
402
+ Raises
403
+ ------
404
+ WacomServiceException
405
+ If the request fails.
406
+ """
407
+ if auth_key is None:
408
+ auth_key, _ = self.handle_token()
409
+ url: str = f"{self.service_base_url}documents/search/"
410
+ headers: Dict[str, str] = {
411
+ USER_AGENT_HEADER_FLAG: self.user_agent,
412
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
413
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
414
+ }
415
+ params: Dict[str, Any] = {
416
+ "query": query,
417
+ "metadata": filters if filters else {},
418
+ "locale": locale,
419
+ "max_results": max_results,
420
+ }
421
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
422
+ with requests.Session() as session:
423
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
424
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
425
+ response = session.post(url, headers=headers, json=params)
426
+ if response.ok:
427
+ response_dict: Dict[str, Any] = response.json()
428
+ return DocumentSearchResponse.from_dict(response_dict)
429
+ raise handle_error("Semantic Search failed.", response, headers=headers, parameters=params)
430
+
431
+ def labels_search(
432
+ self,
433
+ query: str,
434
+ locale: LocaleCode,
435
+ filters: Optional[Dict[str, Any]] = None,
436
+ max_results: int = 10,
437
+ max_retries: int = 3,
438
+ backoff_factor: float = 0.1,
439
+ auth_key: Optional[str] = None,
440
+ ) -> LabelMatchingResponse:
441
+ """
442
+ Async search for semantically similar labels.
443
+
444
+ Parameters
445
+ ----------
446
+ query: str
447
+ Query text for the search
448
+ locale: LocaleCode
449
+ ISO-3166 Country Codes and ISO-639 Language Codes in the format '<language_code>_<country>', e.g., en_US.
450
+ filters: Optional[Dict[str, Any]] = None
451
+ Filters for the search
452
+ max_results: int
453
+ Maximum number of results
454
+ max_retries: int
455
+ Maximum number of retries
456
+ backoff_factor: float
457
+ A backoff factor to apply between attempts after the second try.
458
+ auth_key: Optional[str] (Default:= None)
459
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
460
+
461
+ Returns
462
+ -------
463
+ list_entities: Dict[str, Any]
464
+ Search results response.
465
+ """
466
+ if auth_key is None:
467
+ auth_key, _ = self.handle_token()
468
+ url: str = f"{self.service_base_url}labels/match/"
469
+ headers: Dict[str, str] = {
470
+ USER_AGENT_HEADER_FLAG: self.user_agent,
471
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
472
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
473
+ }
474
+ params: Dict[str, Any] = {
475
+ "query": query,
476
+ "metadata": filters if filters else {},
477
+ "locale": locale,
478
+ "max_results": max_results,
479
+ }
480
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
481
+ with requests.Session() as session:
482
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
483
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
484
+ response = session.post(url, headers=headers, json=params)
485
+ if response.ok:
486
+ response_dict: Dict[str, Any] = response.json()
487
+ return LabelMatchingResponse.from_dict(response_dict)
488
+ raise handle_error("Label fuzzy matching failed.", response, headers=headers, parameters=params)