PyPI - personal_knowledge_library - Versions diffs - 3.0.0__py3-none-any.whl - Mend

personal_knowledge_library 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of personal_knowledge_library might be problematic. Click here for more details.

Files changed (42) hide show

knowledge/__init__.py +91 -0
knowledge/base/__init__.py +22 -0
knowledge/base/access.py +167 -0
knowledge/base/entity.py +267 -0
knowledge/base/language.py +27 -0
knowledge/base/ontology.py +2734 -0
knowledge/base/search.py +473 -0
knowledge/base/tenant.py +192 -0
knowledge/nel/__init__.py +11 -0
knowledge/nel/base.py +495 -0
knowledge/nel/engine.py +123 -0
knowledge/ontomapping/__init__.py +667 -0
knowledge/ontomapping/manager.py +320 -0
knowledge/public/__init__.py +27 -0
knowledge/public/cache.py +115 -0
knowledge/public/helper.py +373 -0
knowledge/public/relations.py +128 -0
knowledge/public/wikidata.py +1324 -0
knowledge/services/__init__.py +128 -0
knowledge/services/asyncio/__init__.py +7 -0
knowledge/services/asyncio/base.py +458 -0
knowledge/services/asyncio/graph.py +1420 -0
knowledge/services/asyncio/group.py +450 -0
knowledge/services/asyncio/search.py +439 -0
knowledge/services/asyncio/users.py +270 -0
knowledge/services/base.py +533 -0
knowledge/services/graph.py +1897 -0
knowledge/services/group.py +819 -0
knowledge/services/helper.py +142 -0
knowledge/services/ontology.py +1234 -0
knowledge/services/search.py +488 -0
knowledge/services/session.py +444 -0
knowledge/services/tenant.py +281 -0
knowledge/services/users.py +445 -0
knowledge/utils/__init__.py +10 -0
knowledge/utils/graph.py +417 -0
knowledge/utils/wikidata.py +197 -0
knowledge/utils/wikipedia.py +175 -0
personal_knowledge_library-3.0.0.dist-info/LICENSE +201 -0
personal_knowledge_library-3.0.0.dist-info/METADATA +1163 -0
personal_knowledge_library-3.0.0.dist-info/RECORD +42 -0
personal_knowledge_library-3.0.0.dist-info/WHEEL +4 -0

knowledge/base/search.py ADDED Viewed

@@ -0,0 +1,473 @@
+# -*- coding: utf-8 -*-
+# Copyright © 2024-present Wacom. All rights reserved.
+from typing import List, Any, Dict, Optional
+from knowledge.base.entity import Label
+from knowledge.base.language import LocaleCode
+from knowledge.base.ontology import OntologyClassReference
+class LabelSearchResult:
+    """
+    LabelSearchResult
+    =================
+    This is a search result model.
+    Properties
+    ----------
+    score: float
+        Score of the search result.
+    entity_uri: str
+        Unique identifier of the entity.
+    label: str
+        Label of the search result.
+    locale: LocaleCode
+        Locale of the search result.
+    concept_type: OntologyClassReference
+        Concept type of the search result.
+    metadata: Dict[str, Any]
+        Metadata of the search result.
+    """
+    def __init__(self, score: float, content_uri: str, metadata: Dict[str, Any], content: str):
+        self.__score: float = score
+        self.__content_uri: str = content_uri
+        self.__metadata: Dict[str, Any] = metadata
+        self.__concept_type: OntologyClassReference = OntologyClassReference.parse(
+            metadata.get("concept_type", "wacom:core#Topic")
+        )
+        self.__locale: LocaleCode = LocaleCode(metadata.get("locale", "en_US"))
+        if "concept_type" in self.__metadata:
+            del self.__metadata["concept_type"]
+        if "locale" in self.__metadata:
+            del self.__metadata["locale"]
+        self.__label: Label = Label(content=content, language_code=self.__locale)
+    @property
+    def score(self) -> float:
+        """Score of the search result."""
+        return self.__score
+    @property
+    def entity_uri(self) -> str:
+        """Unique identifier of the entity."""
+        return self.__content_uri
+    @property
+    def locale(self) -> LocaleCode:
+        """Locale of the search result."""
+        return self.__locale
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        """Metadata of the search result."""
+        return self.__metadata
+    @property
+    def label(self) -> Label:
+        """Label of the search result."""
+        return self.__label
+    @property
+    def concept_type(self) -> OntologyClassReference:
+        """Concept type of the search result."""
+        return self.__concept_type
+    def __repr__(self):
+        return (
+            f"LabelSearchResult(score={self.score}, entity_uri={self.entity_uri}, label={self.label}, "
+            f"locale={self.locale}, concept_type={self.concept_type}, metadata={self.metadata})"
+        )
+class DocumentSearchResult:
+    """
+    DocumentSearchResult
+    ====================
+    This is a search result model.
+    Properties
+    ----------
+    score: float
+        Score of the search result.
+    content_uri: str
+        Unique identifier of the entity.
+    metadata: Dict[str, Any]
+        Metadata of the search result.
+    content_chunk: str
+        Content chunk of the search result.
+    concept_type: OntologyClassReference
+        Concept type of the search result.
+    locale: LocaleCode
+        Locale of the search result.
+    """
+    def __init__(self, score: float, content_uri: str, metadata: Dict[str, Any], content: str):
+        self.__score: float = score
+        self.__content_uri: str = content_uri
+        self.__content: str = content
+        self.__metadata: Dict[str, Any] = metadata
+        self.__concept_type: OntologyClassReference = OntologyClassReference.parse(
+            metadata.get("concept_type", "wacom:core#Thing")
+        )
+        self.__locale: LocaleCode = LocaleCode(metadata.get("locale", "en_US"))
+        if "concept_type" in self.__metadata:
+            del self.__metadata["concept_type"]
+        if "locale" in self.__metadata:
+            del self.__metadata["locale"]
+    @property
+    def score(self) -> float:
+        """Score of the search result."""
+        return self.__score
+    @property
+    def content_uri(self) -> str:
+        """Unique identifier of the content."""
+        return self.__content_uri
+    @property
+    def content_chunk(self) -> str:
+        """Chunk of the document."""
+        return self.__content
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        """Metadata of the search result."""
+        return self.__metadata
+    @property
+    def concept_type(self) -> OntologyClassReference:
+        """Concept type of the search result."""
+        return self.__concept_type
+    @property
+    def locale(self) -> LocaleCode:
+        """Locale of the search result."""
+        return self.__locale
+class PerformanceStats:
+    """
+    PerformanceStats
+    ================
+    This is a performance stats model.
+    Properties
+    ----------
+    locale_code: LocaleCode
+        Performance for the model with the given locale.
+    model_name: str
+        Name of the model used for the search.
+    top_k: int
+        Top-k results requested.
+    model_loading_time: float
+        Loading time in milliseconds for the embedding model.
+    embedding_time: float
+        Embedding time in milliseconds for the search query.
+    vector_db_response_time: float
+        Response time in milliseconds for the vector database.
+    """
+    def __init__(self, stats: Dict[str, Any]):
+        self.__locale: LocaleCode = stats.get("locale")
+        self.__model_name: str = stats.get("model-name", "unknown")
+        self.__top_k: int = stats.get("top-k", 10)
+        self.__loading_time: float = stats.get("loading", 0.0) * 1000
+        self.__embedding_time: float = stats.get("embedding", 0.0) * 1000
+        self.__vector_db_response_time: float = stats.get("request", 0.0) * 1000
+        self.__overall_time: float = stats.get("overall", 0.0) * 1000
+    @property
+    def locale_code(self) -> LocaleCode:
+        """Performance for the model with the given locale."""
+        return self.__locale
+    @property
+    def model_name(self) -> str:
+        """Name of the model used for the search."""
+        return self.__model_name
+    @property
+    def top_k(self) -> int:
+        """Top-k results requested."""
+        return self.__top_k
+    @property
+    def model_loading_time(self) -> float:
+        """Loading time in milliseconds for the embedding model."""
+        return self.__loading_time
+    @property
+    def embedding_time(self) -> float:
+        """Embedding time in milliseconds for the search query."""
+        return self.__embedding_time
+    @property
+    def vector_db_response_time(self) -> float:
+        """Response time in milliseconds for the vector database."""
+        return self.__vector_db_response_time
+    @property
+    def overall_time(self) -> float:
+        """Overall time in milliseconds for the search query."""
+        return self.__overall_time
+    def __repr__(self):
+        return (
+            f"PerformanceStats(locale_code={self.locale_code}, model_name={self.model_name}, top_k={self.top_k}, "
+            f"model_loading_time={self.model_loading_time}, embedding_time={self.embedding_time}, "
+            f"vector_db_response_time={self.vector_db_response_time})"
+        )
+class DocumentSearchStats(PerformanceStats):
+    """
+    DocumentSearchStats
+    ====================
+    This is a performance stats model for document search.
+    Properties
+    ----------
+    locale_code: LocaleCode
+        Performance for the model with the given locale.
+    model_name: str
+        Name of the model used for the search.
+    top_k: int
+        Top-k results requested.
+    model_loading_time: float
+        Loading time in milliseconds for the embedding model.
+    embedding_time: float
+        Embedding time in milliseconds for the search query.
+    vector_db_response_time: float
+        Response time in milliseconds for the vector database.
+    preprocessing_time: float
+        Preprocessing time in milliseconds for search query.
+    """
+    def __init__(self, stats: Dict[str, Any]):
+        super().__init__(stats)
+        self.__preprocessing: float = stats.get("preprocessing", 0.0) * 1000.0
+    @property
+    def preprocessing_time(self) -> float:
+        """Preprocessing time in milliseconds for search query."""
+        return self.__preprocessing
+    def __repr__(self):
+        return (
+            f"DocumentSearchStats(locale_code={self.locale_code}, model_name={self.model_name}, "
+            f"top_k={self.top_k}, "
+            f"model_loading_time={self.model_loading_time}, embedding_time={self.embedding_time}, "
+            f"vector_db_response_time={self.vector_db_response_time}, preprocessing_time={self.preprocessing_time})"
+        )
+class LabelSearchStats(PerformanceStats):
+    """
+    LabelSearchStats
+    ================
+    This is a performance stats model for label search.
+    Properties
+    ----------
+    locale_code: LocaleCode
+        Performance for the model with the given locale.
+    model_name: str
+        Name of the model used for the search.
+    top_k: int
+        Top-k results requested.
+    model_loading_time: float
+        Loading time in milliseconds for the embedding model.
+    embedding_time: float
+        Embedding time in milliseconds for the search query.
+    vector_db_response_time: float
+        Response time in milliseconds for the vector database.
+    tokenizer_time: float
+        Tokenizer time in milliseconds for search query.
+    number_of_tokens: int
+        Number of tokens in the search query.
+    """
+    def __init__(self, stats: Dict[str, Any]):
+        super().__init__(stats)
+        self.__tokenizer: float = stats.get("tokenizer")
+        self.__number_of_tokens: int = stats.get("number-of-tokens")
+    @property
+    def tokenizer_time(self) -> float:
+        """Tokenizer time in milliseconds for search query."""
+        return self.__tokenizer
+    @property
+    def number_of_tokens(self) -> int:
+        """Number of tokens in the search query."""
+        return self.__number_of_tokens
+    def __repr__(self):
+        return (
+            f"LabelSearchStats(locale_code={self.locale_code}, model_name={self.model_name}, "
+            f"top_k={self.top_k}, "
+            f"model_loading_time={self.model_loading_time}, embedding_time={self.embedding_time}, "
+            f"vector_db_response_time={self.vector_db_response_time}, tokenizer_time={self.tokenizer_time}, "
+            f"number_of_tokens={self.number_of_tokens})"
+        )
+class DocumentSearchResponse:
+    """
+    DocumentSearchResponse
+    ======================
+    Response model for semantic search service.
+    Properties
+    ----------
+    results: List[DocumentSearchResult]
+        Search results
+    max_results: int
+        Maximum number of results
+    stats: Optional[PerformanceStats]
+        Performance stats
+    """
+    def __init__(
+        self, results: List[DocumentSearchResult], max_results: int = 10, stats: Optional[DocumentSearchStats] = None
+    ):
+        self.__results: List[DocumentSearchResult] = results
+        self.__max_results = max_results
+        self.__stats: Optional[DocumentSearchStats] = stats
+    @property
+    def results(self) -> List[DocumentSearchResult]:
+        """List of search results."""
+        return self.__results
+    @property
+    def max_results(self) -> int:
+        """Maximum number of results."""
+        return self.__max_results
+    @property
+    def stats(self) -> Optional[DocumentSearchStats]:
+        """Performance stats."""
+        return self.__stats
+    @staticmethod
+    def from_dict(data: Dict[str, Any]) -> "DocumentSearchResponse":
+        """
+        Create a DocumentSearchResponse from a dictionary.
+        Parameters
+        ----------
+        data: Dict[str, Any]
+            Dictionary with the response data.
+        Returns
+        -------
+        DocumentSearchResponse
+            SegmentedContent search response.
+        """
+        return DocumentSearchResponse(
+            results=[DocumentSearchResult(**result) for result in data["results"]],
+            max_results=data["max_results"],
+            stats=DocumentSearchStats(data["stats"]) if "stats" in data and data["stats"] else None,
+        )
+class LabelMatchingResponse:
+    """
+    SemanticSearchResponse
+    ======================
+    Response model for semantic search service.
+    Properties
+    ----------
+    results: List[LabelSearchResult]
+        Search results
+    max_results: int
+        Maximum number of results
+    stats: Optional[LabelSearchStats]
+        Performance stats
+    """
+    def __init__(
+        self, results: List[LabelSearchResult], max_results: int = 10, stats: Optional[LabelSearchStats] = None
+    ):
+        self.__results = results
+        self.__max_results = max_results
+        self.__stats: Optional[LabelSearchStats] = stats
+    @property
+    def results(self) -> List[LabelSearchResult]:
+        """List of label search results."""
+        return self.__results
+    @property
+    def max_results(self) -> int:
+        """Maximum number of results."""
+        return self.__max_results
+    @property
+    def stats(self) -> Optional[LabelSearchStats]:
+        """Performance stats."""
+        return self.__stats
+    @staticmethod
+    def from_dict(data: Dict[str, Any]):
+        """
+        Create a LabelMatchingResponse from a dictionary.
+        Parameters
+        ----------
+        data: Dict[str, Any]
+            Dictionary with the response data.
+        Returns
+        -------
+        LabelMatchingResponse
+            Label matching response.
+        """
+        return LabelMatchingResponse(
+            results=[LabelSearchResult(**result) for result in data["results"]],
+            max_results=data["max_results"],
+            stats=LabelSearchStats(data["stats"]) if "stats" in data and data["stats"] else None,
+        )
+class VectorDBDocument:
+    """
+    VectorDBDocument
+    ================
+    SegmentedContent model for the vector database.
+    Properties
+    ----------
+    """
+    def __init__(self, data: Dict[str, Any]):
+        self.__id: str = data.get("id", "")
+        self.__content: str = data.get("content", "")
+        self.__uri: str = data.get("uri", "")
+        self.__metadata: Dict[str, Any] = data.get("meta", {})
+    @property
+    def id(self) -> str:
+        """ID of the document."""
+        return self.__id
+    @property
+    def content(self) -> str:
+        """Content of the document."""
+        return self.__content
+    @property
+    def uri(self) -> str:
+        """URI of the document."""
+        return self.__uri
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        """Metadata of the document."""
+        return self.__metadata
+    def __repr__(self):
+        return f"VectorDatabaseDocument(content={self.content}, uri={self.uri}, metadata={self.metadata})"

knowledge/base/tenant.py ADDED Viewed

@@ -0,0 +1,192 @@
+# -*- coding: utf-8 -*-
+# Copyright © 2024-present Wacom. All rights reserved.
+from typing import List, Dict, Any
+class TenantConfiguration:
+    """
+    Tenant configuration
+    ====================
+    This class represents the configuration of a tenant.
+    The configuration includes the following properties:
+        - identifier: str
+        - ontology_name: str
+        - ontology_version: int
+        - is_locked: bool
+        - name: str
+        - rights: List[str]
+    Parameters
+    ----------
+    identifier: str
+        Identifier of the tenant
+    ontology_name: str
+        Name of the ontology
+    ontology_version: int
+        Version of the ontology
+    is_locked: bool
+        Flag to indicate if the tenant is locked
+    name: str
+        Name of the tenant
+    rights: List[str]
+        List of rights
+    vector_search_data_properties: List[str]
+        List of vector search data properties which are used for vector search in the metadata
+    vector_search_object_properties: List[str]
+        List of vector search object properties which are used for vector search in the metadata
+    content_data_property_name: str
+        Name of the content data property which is used for vector search to index documents
+    """
+    def __init__(
+        self,
+        identifier: str,
+        ontology_name: str,
+        ontology_version: int,
+        is_locked: bool,
+        name: str,
+        rights: List[str],
+        vector_search_data_properties: List[str],
+        vector_search_object_properties: List[str],
+        content_data_property_name: str,
+    ):
+        # Constructor to initialize the properties
+        self.__identifier: str = identifier
+        self.__ontology_name: str = ontology_name
+        self.__ontology_version: int = ontology_version
+        self.__is_locked: bool = is_locked
+        self.__name: str = name
+        self.__rights: List[str] = rights
+        self.__vector_search_data_properties: List[str] = vector_search_data_properties
+        self.__vector_search_object_properties: List[str] = vector_search_object_properties
+        self.__content_data_property_name: str = content_data_property_name
+    @property
+    def identifier(self) -> str:
+        """
+        Identifier of the tenant
+        Returns
+        -------
+        str
+            Identifier of the tenant
+        """
+        return self.__identifier
+    @property
+    def ontology_name(self) -> str:
+        """
+        Name of the ontology.
+        Returns
+        -------
+        str
+            Name of the ontology.
+        """
+        return self.__ontology_name
+    @property
+    def ontology_version(self) -> int:
+        """
+        Version of the ontology.
+        """
+        return self.__ontology_version
+    @property
+    def is_locked(self) -> bool:
+        """
+        Flag to indicate if the tenant is locked.
+        """
+        return self.__is_locked
+    @property
+    def name(self) -> str:
+        """
+        Name of the tenant.
+        """
+        return self.__name
+    @name.setter
+    def name(self, value):
+        self.__name = value
+    @property
+    def rights(self):
+        """
+        List of rights being assigned to the tenant, and will be added to the user's rights in the token.
+        """
+        return self.__rights
+    @rights.setter
+    def rights(self, value):
+        self.__rights = value
+    @property
+    def vector_search_data_properties(self) -> List[str]:
+        """
+        List of vector search data properties which are used for vector search in the metadata.
+        """
+        return self.__vector_search_data_properties
+    @vector_search_data_properties.setter
+    def vector_search_data_properties(self, value):
+        self.__vector_search_data_properties = value
+    @property
+    def vector_search_object_properties(self):
+        """
+        List of vector search object properties which are used for vector search in the metadata.
+        """
+        return self.__vector_search_object_properties
+    @vector_search_object_properties.setter
+    def vector_search_object_properties(self, value):
+        self.__vector_search_object_properties = value
+    @property
+    def content_data_property_name(self):
+        """
+        Name of the content data property which is used for vector search to index documents.
+        """
+        return self.__content_data_property_name
+    @content_data_property_name.setter
+    def content_data_property_name(self, value):
+        self.__content_data_property_name = value
+    @classmethod
+    def from_dict(cls, data_dict: Dict[str, Any]) -> "TenantConfiguration":
+        """
+        Create a TenantConfiguration object from a dictionary.
+        Parameters
+        ----------
+        data_dict: Dict[str, Any]
+            Dictionary containing the tenant configuration data.
+        Returns
+        -------
+        TenantConfiguration
+            The tenant configuration object.
+        """
+        return cls(
+            identifier=data_dict.get("id"),
+            ontology_name=data_dict.get("ontologyName"),
+            ontology_version=data_dict.get("ontologyVersion"),
+            is_locked=data_dict.get("isLocked"),
+            name=data_dict.get("name"),
+            rights=data_dict.get("rights"),
+            vector_search_data_properties=data_dict.get("vectorSearchDataProperties"),
+            vector_search_object_properties=data_dict.get("vectorSearchObjectProperties"),
+            content_data_property_name=data_dict.get("contentDataPropertyName"),
+        )
+    def __repr__(self):
+        return (
+            f"TenantConfiguration(identifier='{self.identifier}', ontology_name='{self.ontology_name}', "
+            f"ontology_version={self.ontology_version}, is_locked={self.is_locked}, "
+            f"name='{self.name}', rights={self.rights}, "
+            f"vector_search_data_properties={self.vector_search_data_properties}, "
+            f"vector_search_object_properties={self.vector_search_object_properties}, "
+            f"content_data_property_name='{self.content_data_property_name}')"
+        )

knowledge/nel/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+# -*- coding: utf-8 -*-
+# Copyright © 2021-present Wacom. All rights reserved.
+"""
+Named Entity Linking
+--------------------
+This module provides the Named Entity Linking (NEL) functionality on top of the knowledge graph entities.
+"""
+from knowledge.nel import base
+from knowledge.nel import engine
+__all__ = ["base", "engine"]