personal_knowledge_library 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

@@ -36,6 +36,7 @@ from knowledge.base.ontology import (
36
36
  ObjectProperty,
37
37
  EN_US,
38
38
  )
39
+ from knowledge.base.response import JobStatus, ErrorLogResponse, NewEntityUrisResponse
39
40
  from knowledge.services import (
40
41
  AUTHORIZATION_HEADER_FLAG,
41
42
  APPLICATION_JSON_HEADER,
@@ -69,6 +70,7 @@ from knowledge.services import (
69
70
  DEFAULT_BACKOFF_FACTOR,
70
71
  ENTITIES_TAG,
71
72
  NEL_PARAM,
73
+ IndexType,
72
74
  )
73
75
  from knowledge.services.base import (
74
76
  WacomServiceAPIClient,
@@ -161,6 +163,7 @@ class WacomKnowledgeService(WacomServiceAPIClient):
161
163
  SEARCH_RELATION_ENDPOINT: str = "semantic-search/relation"
162
164
  ONTOLOGY_UPDATE_ENDPOINT: str = "ontology-update"
163
165
  IMPORT_ENTITIES_ENDPOINT: str = "import"
166
+ IMPORT_ERROR_LOG_ENDPOINT: str = "import/errorlog"
164
167
  REBUILD_VECTOR_SEARCH_INDEX: str = "vector-search/rebuild"
165
168
  REBUILD_NEL_INDEX: str = "nel/rebuild"
166
169
 
@@ -577,6 +580,127 @@ class WacomKnowledgeService(WacomServiceAPIClient):
577
580
  if not response.ok:
578
581
  raise handle_error("Updating entity failed.", response)
579
582
 
583
+ def add_entity_indexes(
584
+ self,
585
+ entity_uri: str,
586
+ targets: List[IndexType],
587
+ auth_key: Optional[str] = None,
588
+ timeout: int = DEFAULT_TIMEOUT,
589
+ max_retries: int = DEFAULT_MAX_RETRIES,
590
+ backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
591
+ ) -> Dict[IndexType, Any]:
592
+ """
593
+ Updates index targets of an entity. The index targets can be set to "NEL", "ElasticSearch", "VectorSearchWord",
594
+ or "VectorSearchDocument".
595
+ If the target is already set for the entity, there will be no changes.
596
+
597
+ Parameters
598
+ ----------
599
+ entity_uri: str
600
+ URI of entity
601
+ targets: List[Literal["NEL", "ElasticSearch", "VectorSearchWord", "VectorSearchDocument"]]
602
+ List of indexing targets
603
+ auth_key: Optional[str]
604
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
605
+ timeout: int
606
+ Timeout for the request (default: 60 seconds)
607
+ max_retries: int
608
+ Maximum number of retries
609
+ backoff_factor: float
610
+ A backoff factor to apply between attempts after the second try (most errors are resolved immediately by a
611
+ second try without a delay)
612
+
613
+ Returns
614
+ -------
615
+ update_status: Dict[str, Any]
616
+ Status per target (depending on the targets of entity and the ones set in the request). If the entity
617
+ already has the target set, the status will be "Target already exists" for that target,
618
+ otherwise it will be "UPSERT".
619
+
620
+ Raises
621
+ ------
622
+ WacomServiceException
623
+ If the graph service returns an error code
624
+ """
625
+ if auth_key is None:
626
+ auth_key, _ = self.handle_token()
627
+ url: str = f"{self.service_base_url}{WacomKnowledgeService.ENTITY_ENDPOINT}/{entity_uri}/indexes"
628
+ # Header info
629
+ headers: dict = {
630
+ USER_AGENT_HEADER_FLAG: self.user_agent,
631
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
632
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
633
+ }
634
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
635
+ with requests.Session() as session:
636
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
637
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
638
+ response: Response = session.patch(
639
+ url, json=targets, headers=headers, timeout=timeout, verify=self.verify_calls
640
+ )
641
+ if response.ok:
642
+ return response.json()
643
+ raise handle_error("Updating entity indexes failed.", response)
644
+
645
+ def remove_entity_indexes(
646
+ self,
647
+ entity_uri: str,
648
+ targets: List[IndexType],
649
+ auth_key: Optional[str] = None,
650
+ timeout: int = DEFAULT_TIMEOUT,
651
+ max_retries: int = DEFAULT_MAX_RETRIES,
652
+ backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
653
+ ) -> Dict[IndexType, Any]:
654
+ """
655
+ Deletes the search index for a given entity.
656
+
657
+ Parameters
658
+ ----------
659
+ entity_uri: str
660
+ URI of entity
661
+ targets: List[Literal["NEL", "ElasticSearch", "VectorSearchWord", "VectorSearchDocument"]]
662
+ List of indexing targets
663
+ auth_key: Optional[str]
664
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
665
+ timeout: int
666
+ Timeout for the request (default: 60 seconds)
667
+ max_retries: int
668
+ Maximum number of retries
669
+ backoff_factor: float
670
+ A backoff factor to apply between attempts after the second try (most errors are resolved immediately by a
671
+ second try without a delay)
672
+
673
+ Returns
674
+ -------
675
+ update_status: Dict[str, Any]
676
+ Status per target (depending on the targets of entity and the ones set in the request), e.g.,
677
+ response will only contain {"NEL: "DELETE"}, if NEL is the only target in the request.
678
+
679
+ Raises
680
+ ------
681
+ WacomServiceException
682
+ If the graph service returns an error code
683
+ """
684
+ if auth_key is None:
685
+ auth_key, _ = self.handle_token()
686
+ url: str = f"{self.service_base_url}{WacomKnowledgeService.ENTITY_ENDPOINT}/{entity_uri}/indexes"
687
+ # Header info
688
+ headers: dict = {
689
+ USER_AGENT_HEADER_FLAG: self.user_agent,
690
+ CONTENT_TYPE_HEADER_FLAG: APPLICATION_JSON_HEADER,
691
+ AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}",
692
+ }
693
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
694
+ with requests.Session() as session:
695
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
696
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
697
+ response: Response = session.delete(
698
+ url, json=targets, headers=headers, timeout=timeout, verify=self.verify_calls
699
+ )
700
+ if response.ok:
701
+ return response.json()
702
+ raise handle_error("Deleting entity indexes failed.", response)
703
+
580
704
  def relations(
581
705
  self,
582
706
  uri: str,
@@ -1685,10 +1809,8 @@ class WacomKnowledgeService(WacomServiceAPIClient):
1685
1809
 
1686
1810
  # Compress the NDJSON string to a gzip byte array
1687
1811
  compressed_data: bytes = gzip.compress(ndjson_content.encode("utf-8"))
1688
- with open("import.gzip", "wb") as f:
1689
- f.write(compressed_data)
1690
1812
  files: List[Tuple[str, Tuple[str, bytes, str]]] = [
1691
- ("file", ("import.njson", compressed_data, "application/x-gzip"))
1813
+ ("file", ("import.njson.gz", compressed_data, "application/x-gzip"))
1692
1814
  ]
1693
1815
  url: str = f"{self.service_base_url}{self.IMPORT_ENTITIES_ENDPOINT}"
1694
1816
  mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
@@ -1709,7 +1831,7 @@ class WacomKnowledgeService(WacomServiceAPIClient):
1709
1831
  timeout: int = DEFAULT_TIMEOUT,
1710
1832
  max_retries: int = DEFAULT_MAX_RETRIES,
1711
1833
  backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
1712
- ) -> Dict[str, Any]:
1834
+ ) -> JobStatus:
1713
1835
  """
1714
1836
  Retrieve the status of the job.
1715
1837
 
@@ -1729,7 +1851,7 @@ class WacomKnowledgeService(WacomServiceAPIClient):
1729
1851
 
1730
1852
  Returns
1731
1853
  -------
1732
- job_status: Dict[str, Any]
1854
+ job_status: JobStatus
1733
1855
  Status of the job
1734
1856
  """
1735
1857
  if auth_key is None:
@@ -1742,7 +1864,105 @@ class WacomKnowledgeService(WacomServiceAPIClient):
1742
1864
  session.mount(mount_point, HTTPAdapter(max_retries=retries))
1743
1865
  response: Response = session.get(url, headers=headers, timeout=timeout, verify=self.verify_calls)
1744
1866
  if response.ok:
1745
- return response.json()
1867
+ return JobStatus.from_dict(response.json())
1868
+ raise handle_error(f"Retrieving job status for {job_id} failed.", response)
1869
+
1870
+ def import_error_log(
1871
+ self,
1872
+ job_id: str,
1873
+ auth_key: Optional[str] = None,
1874
+ next_page_id: Optional[str] = None,
1875
+ timeout: int = DEFAULT_TIMEOUT,
1876
+ max_retries: int = DEFAULT_MAX_RETRIES,
1877
+ backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
1878
+ ) -> ErrorLogResponse:
1879
+ """
1880
+ Retrieve the error log of the job.
1881
+
1882
+ Parameters
1883
+ ----------
1884
+ job_id: str
1885
+ ID of the job
1886
+ next_page_id: Optional[str] = None
1887
+ ID of the next page within pagination.
1888
+ auth_key: Optional[str] = None
1889
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
1890
+ timeout: int
1891
+ Timeout for the request (default: 60 seconds)
1892
+ max_retries: int
1893
+ Maximum number of retries
1894
+ backoff_factor: float
1895
+ A backoff factor to apply between attempts after the second try (most errors are resolved immediately by a
1896
+ second try without a delay)
1897
+
1898
+ Returns
1899
+ -------
1900
+ error: ErrorLogResponse
1901
+ Error log of the job
1902
+ """
1903
+ if auth_key is None:
1904
+ auth_key, _ = self.handle_token()
1905
+ headers: dict = {USER_AGENT_HEADER_FLAG: self.user_agent, AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}"}
1906
+ url: str = f"{self.service_base_url}{self.IMPORT_ERROR_LOG_ENDPOINT}/{job_id}"
1907
+ params: Dict[str, str] = {NEXT_PAGE_ID_TAG: next_page_id} if next_page_id else {}
1908
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
1909
+ with requests.Session() as session:
1910
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
1911
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
1912
+ response: Response = session.get(
1913
+ url, headers=headers, timeout=timeout, params=params, verify=self.verify_calls
1914
+ )
1915
+ if response.ok:
1916
+ return ErrorLogResponse.from_dict(response.json())
1917
+ raise handle_error(f"Retrieving job status for {job_id} failed.", response)
1918
+
1919
+ def import_new_uris(
1920
+ self,
1921
+ job_id: str,
1922
+ auth_key: Optional[str] = None,
1923
+ next_page_id: Optional[str] = None,
1924
+ timeout: int = DEFAULT_TIMEOUT,
1925
+ max_retries: int = DEFAULT_MAX_RETRIES,
1926
+ backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
1927
+ ) -> NewEntityUrisResponse:
1928
+ """
1929
+ Retrieve the new entity uris from the job.
1930
+
1931
+ Parameters
1932
+ ----------
1933
+ job_id: str
1934
+ ID of the job
1935
+ next_page_id: Optional[str] = None
1936
+ ID of the next page within pagination.
1937
+ auth_key: Optional[str] = None
1938
+ If the auth key is set the logged-in user (if any) will be ignored and the auth key will be used.
1939
+ timeout: int
1940
+ Timeout for the request (default: 60 seconds)
1941
+ max_retries: int
1942
+ Maximum number of retries
1943
+ backoff_factor: float
1944
+ A backoff factor to apply between attempts after the second try (most errors are resolved immediately by a
1945
+ second try without a delay)
1946
+
1947
+ Returns
1948
+ -------
1949
+ response: NewEntityUrisResponse
1950
+ New entity uris of the job.
1951
+ """
1952
+ if auth_key is None:
1953
+ auth_key, _ = self.handle_token()
1954
+ headers: dict = {USER_AGENT_HEADER_FLAG: self.user_agent, AUTHORIZATION_HEADER_FLAG: f"Bearer {auth_key}"}
1955
+ url: str = f"{self.service_base_url}{self.IMPORT_ENTITIES_ENDPOINT}/{job_id}/new-entities"
1956
+ params: Dict[str, str] = {NEXT_PAGE_ID_TAG: next_page_id} if next_page_id else {}
1957
+ mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
1958
+ with requests.Session() as session:
1959
+ retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
1960
+ session.mount(mount_point, HTTPAdapter(max_retries=retries))
1961
+ response: Response = session.get(
1962
+ url, headers=headers, timeout=timeout, params=params, verify=self.verify_calls
1963
+ )
1964
+ if response.ok:
1965
+ return NewEntityUrisResponse.from_dict(response.json())
1746
1966
  raise handle_error(f"Retrieving job status for {job_id} failed.", response)
1747
1967
 
1748
1968
  # ------------------------------------ Admin endpoints -------------------------------------------------------------
@@ -309,7 +309,7 @@ class SemanticSearchClient(WacomServiceAPIClient):
309
309
  response = session.get(url, params=params, headers=headers)
310
310
  if response.ok:
311
311
  return response.json().get("count", 0)
312
- raise handle_error("Counting labels failed.", response, headers=headers, parameters={"locale": locale})
312
+ raise handle_error("Counting labels failed.", response, headers=headers, parameters={"locale": locale})
313
313
 
314
314
  def count_labels_filter(
315
315
  self,
@@ -360,9 +360,9 @@ class SemanticSearchClient(WacomServiceAPIClient):
360
360
  response = session.post(url, json={"locale": locale, "filter": filters}, headers=headers)
361
361
  if response.ok:
362
362
  return response.json().get("count", 0)
363
- raise handle_error(
364
- "Counting labels failed.", response, headers=headers, parameters={"locale": locale, "filter": filters}
365
- )
363
+ raise handle_error(
364
+ "Counting labels failed.", response, headers=headers, parameters={"locale": locale, "filter": filters}
365
+ )
366
366
 
367
367
  def document_search(
368
368
  self,
@@ -224,11 +224,12 @@ class TenantManagementServiceAPI(WacomServiceAPIClient):
224
224
  CONTENT_TYPE_HEADER_FLAG: "application/json",
225
225
  }
226
226
  payload: dict = {
227
- "rights": rights,
228
227
  "vectorSearchDataProperties": vector_search_data_properties,
229
228
  "vectorSearchObjectProperties": vector_search_object_properties,
230
229
  "contentDataPropertyName": content_data_property_name,
231
230
  }
231
+ if len(rights) > 0:
232
+ payload["rights"] = rights
232
233
  mount_point: str = "https://" if self.service_url.startswith("https") else "http://"
233
234
  with requests.Session() as session:
234
235
  retries: Retry = Retry(total=max_retries, backoff_factor=backoff_factor, status_forcelist=STATUS_FORCE_LIST)
@@ -40,6 +40,8 @@ class UserRole(enum.Enum):
40
40
  """User only has control over his personal entities."""
41
41
  ADMIN = "TenantAdmin"
42
42
  """TenantAdmin has access to all entities independent of the access rights."""
43
+ CONTENT_MANAGER = "ContentManager"
44
+ """ContentManager is a special user for content accounts. The same visibility rules as for USER accounts apply."""
43
45
 
44
46
 
45
47
  USER_ROLE_MAPPING: Dict[str, UserRole] = {str(r.value): r for r in UserRole}
@@ -2,9 +2,9 @@
2
2
  # Copyright © 2024-present Wacom. All rights reserved.
3
3
  """ "Utilities"""
4
4
 
5
- __all__ = ["wikipedia", "graph", "wikidata"]
5
+ __all__ = ["import_format", "graph", "wikidata", "wikipedia"]
6
6
 
7
-
8
- from knowledge.utils import wikipedia
7
+ from knowledge.utils import import_format
9
8
  from knowledge.utils import graph
10
9
  from knowledge.utils import wikidata
10
+ from knowledge.utils import wikipedia