PyPI - cognee - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

cognee 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

cognee/modules/retrieval/base_graph_retriever.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Optional
+from typing import Any, List, Optional, Type
 from abc import ABC, abstractmethod
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@@ -14,7 +14,11 @@ class BaseGraphRetriever(ABC):
     @abstractmethod
     async def get_completion(
-        self, query: str, context: Optional[List[Edge]] = None, session_id: Optional[str] = None
-    ) -> str:
+        self,
+        query: str,
+        context: Optional[List[Edge]] = None,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
+    ) -> List[Any]:
         """Generates a response using the query and optional context (triplets)."""
         pass

cognee/modules/retrieval/base_retriever.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Optional
+from typing import Any, Optional, Type, List
 class BaseRetriever(ABC):
@@ -12,7 +12,11 @@ class BaseRetriever(ABC):
     @abstractmethod
     async def get_completion(
-        self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
-    ) -> Any:
+        self,
+        query: str,
+        context: Optional[Any] = None,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
+    ) -> List[Any]:
         """Generates a response using the query and optional context."""
         pass

cognee/modules/retrieval/completion_retriever.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import asyncio
-from typing import Any, Optional
+from typing import Any, Optional, Type, List
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.vector import get_vector_engine
@@ -75,8 +75,12 @@ class CompletionRetriever(BaseRetriever):
             raise NoDataError("No data found in the system, please add data first.") from error
     async def get_completion(
-        self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
-    ) -> str:
+        self,
+        query: str,
+        context: Optional[Any] = None,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
+    ) -> List[Any]:
         """
         Generates an LLM completion using the context.
@@ -91,6 +95,7 @@ class CompletionRetriever(BaseRetriever):
               completion; if None, it retrieves the context for the query. (default None)
             - session_id (Optional[str]): Optional session identifier for caching. If None,
               defaults to 'default_session'. (default None)
+            - response_model (Type): The Pydantic model type for structured output. (default str)
         Returns:
         --------
@@ -118,6 +123,7 @@ class CompletionRetriever(BaseRetriever):
                     system_prompt_path=self.system_prompt_path,
                     system_prompt=self.system_prompt,
                     conversation_history=conversation_history,
+                    response_model=response_model,
                 ),
             )
         else:
@@ -127,6 +133,7 @@ class CompletionRetriever(BaseRetriever):
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
+                response_model=response_model,
             )
         if session_save:
@@ -137,4 +144,4 @@ class CompletionRetriever(BaseRetriever):
                 session_id=session_id,
             )
-        return completion
+        return [completion]

cognee/modules/retrieval/graph_completion_context_extension_retriever.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import asyncio
-from typing import Optional, List, Type
+from typing import Optional, List, Type, Any
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -39,6 +39,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
         save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
     ):
         super().__init__(
             user_prompt_path=user_prompt_path,
@@ -48,6 +50,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
             node_name=node_name,
             save_interaction=save_interaction,
             system_prompt=system_prompt,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
         )
     async def get_completion(
@@ -56,7 +60,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
         context: Optional[List[Edge]] = None,
         session_id: Optional[str] = None,
         context_extension_rounds=4,
-    ) -> List[str]:
+        response_model: Type = str,
+    ) -> List[Any]:
         """
         Extends the context for a given query by retrieving related triplets and generating new
         completions based on them.
@@ -76,6 +81,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
               defaults to 'default_session'. (default None)
             - context_extension_rounds: The maximum number of rounds to extend the context with
               new triplets before halting. (default 4)
+            - response_model (Type): The Pydantic model type for structured output. (default str)
         Returns:
         --------
@@ -143,6 +149,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
                     system_prompt_path=self.system_prompt_path,
                     system_prompt=self.system_prompt,
                     conversation_history=conversation_history,
+                    response_model=response_model,
                 ),
             )
         else:
@@ -152,6 +159,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
+                response_model=response_model,
             )
         if self.save_interaction and context_text and triplets and completion:

cognee/modules/retrieval/graph_completion_cot_retriever.py CHANGED Viewed

@@ -7,7 +7,7 @@ from cognee.shared.logging_utils import get_logger
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 from cognee.modules.retrieval.utils.completion import (
-    generate_structured_completion,
+    generate_completion,
     summarize_text,
 )
 from cognee.modules.retrieval.utils.session_cache import (
@@ -44,7 +44,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
     questions based on reasoning. The public methods are:
     - get_completion
-    - get_structured_completion
     Instance variables include:
     - validation_system_prompt_path
@@ -66,6 +65,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
         save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
     ):
         super().__init__(
             user_prompt_path=user_prompt_path,
@@ -75,6 +76,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
             node_type=node_type,
             node_name=node_name,
             save_interaction=save_interaction,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
         )
         self.validation_system_prompt_path = validation_system_prompt_path
         self.validation_user_prompt_path = validation_user_prompt_path
@@ -121,7 +124,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
                 triplets += await self.get_context(followup_question)
                 context_text = await self.resolve_edges_to_text(list(set(triplets)))
-            completion = await generate_structured_completion(
+            completion = await generate_completion(
                 query=query,
                 context=context_text,
                 user_prompt_path=self.user_prompt_path,
@@ -165,24 +168,28 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         return completion, context_text, triplets
-    async def get_structured_completion(
+    async def get_completion(
         self,
         query: str,
         context: Optional[List[Edge]] = None,
         session_id: Optional[str] = None,
-        max_iter: int = 4,
+        max_iter=4,
         response_model: Type = str,
-    ) -> Any:
+    ) -> List[Any]:
         """
-        Generate structured completion responses based on a user query and contextual information.
+        Generate completion responses based on a user query and contextual information.
-        This method applies the same chain-of-thought logic as get_completion but returns
+        This method interacts with a language model client to retrieve a structured response,
+        using a series of iterations to refine the answers and generate follow-up questions
+        based on reasoning derived from previous outputs. It raises exceptions if the context
+        retrieval fails or if the model encounters issues in generating outputs. It returns
         structured output using the provided response model.
         Parameters:
         -----------
             - query (str): The user's query to be processed and answered.
-            - context (Optional[List[Edge]]): Optional context that may assist in answering the query.
+            - context (Optional[Any]): Optional context that may assist in answering the query.
               If not provided, it will be fetched based on the query. (default None)
             - session_id (Optional[str]): Optional session identifier for caching. If None,
               defaults to 'default_session'. (default None)
@@ -192,7 +199,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         Returns:
         --------
-            - Any: The generated structured completion based on the response model.
+            - List[str]: A list containing the generated answer to the user's query.
         """
         # Check if session saving is enabled
         cache_config = CacheConfig()
@@ -228,45 +236,4 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
                 session_id=session_id,
             )
-        return completion
-    async def get_completion(
-        self,
-        query: str,
-        context: Optional[List[Edge]] = None,
-        session_id: Optional[str] = None,
-        max_iter=4,
-    ) -> List[str]:
-        """
-        Generate completion responses based on a user query and contextual information.
-        This method interacts with a language model client to retrieve a structured response,
-        using a series of iterations to refine the answers and generate follow-up questions
-        based on reasoning derived from previous outputs. It raises exceptions if the context
-        retrieval fails or if the model encounters issues in generating outputs.
-        Parameters:
-        -----------
-            - query (str): The user's query to be processed and answered.
-            - context (Optional[Any]): Optional context that may assist in answering the query.
-              If not provided, it will be fetched based on the query. (default None)
-            - session_id (Optional[str]): Optional session identifier for caching. If None,
-              defaults to 'default_session'. (default None)
-            - max_iter: The maximum number of iterations to refine the answer and generate
-              follow-up questions. (default 4)
-        Returns:
-        --------
-            - List[str]: A list containing the generated answer to the user's query.
-        """
-        completion = await self.get_structured_completion(
-            query=query,
-            context=context,
-            session_id=session_id,
-            max_iter=max_iter,
-            response_model=str,
-        )
         return [completion]

cognee/modules/retrieval/graph_completion_retriever.py CHANGED Viewed

@@ -47,6 +47,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
         save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
     ):
         """Initialize retriever with prompt paths and search parameters."""
         self.save_interaction = save_interaction
@@ -54,8 +56,10 @@ class GraphCompletionRetriever(BaseGraphRetriever):
         self.system_prompt_path = system_prompt_path
         self.system_prompt = system_prompt
         self.top_k = top_k if top_k is not None else 5
+        self.wide_search_top_k = wide_search_top_k
         self.node_type = node_type
         self.node_name = node_name
+        self.triplet_distance_penalty = triplet_distance_penalty
     async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
         """
@@ -105,6 +109,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
             collections=vector_index_collections or None,
             node_type=self.node_type,
             node_name=self.node_name,
+            wide_search_top_k=self.wide_search_top_k,
+            triplet_distance_penalty=self.triplet_distance_penalty,
         )
         return found_triplets
@@ -141,12 +147,17 @@ class GraphCompletionRetriever(BaseGraphRetriever):
         return triplets
+    async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
+        context = await self.resolve_edges_to_text(triplets)
+        return context
     async def get_completion(
         self,
         query: str,
         context: Optional[List[Edge]] = None,
         session_id: Optional[str] = None,
-    ) -> List[str]:
+        response_model: Type = str,
+    ) -> List[Any]:
         """
         Generates a completion using graph connections context based on a query.
@@ -188,6 +199,7 @@ class GraphCompletionRetriever(BaseGraphRetriever):
                     system_prompt_path=self.system_prompt_path,
                     system_prompt=self.system_prompt,
                     conversation_history=conversation_history,
+                    response_model=response_model,
                 ),
             )
         else:
@@ -197,6 +209,7 @@ class GraphCompletionRetriever(BaseGraphRetriever):
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
+                response_model=response_model,
             )
         if self.save_interaction and context and triplets and completion:

cognee/modules/retrieval/graph_summary_completion_retriever.py CHANGED Viewed

@@ -26,6 +26,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
         save_interaction: bool = False,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
     ):
         """Initialize retriever with default prompt paths and search parameters."""
         super().__init__(
@@ -36,6 +38,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
             node_name=node_name,
             save_interaction=save_interaction,
             system_prompt=system_prompt,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
         )
         self.summarize_prompt_path = summarize_prompt_path

cognee/modules/retrieval/register_retriever.py ADDED Viewed

@@ -0,0 +1,10 @@
+from typing import Type
+from .base_retriever import BaseRetriever
+from .registered_community_retrievers import registered_community_retrievers
+from ..search.types import SearchType
+def use_retriever(search_type: SearchType, retriever: Type[BaseRetriever]):
+    """Register a retriever class for a given search type."""
+    registered_community_retrievers[search_type] = retriever

cognee/modules/retrieval/registered_community_retrievers.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ registered_community_retrievers = {}

cognee/modules/retrieval/temporal_retriever.py CHANGED Viewed

@@ -47,6 +47,8 @@ class TemporalRetriever(GraphCompletionRetriever):
         top_k: Optional[int] = 5,
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
+        wide_search_top_k: Optional[int] = 100,
+        triplet_distance_penalty: Optional[float] = 3.5,
     ):
         super().__init__(
             user_prompt_path=user_prompt_path,
@@ -54,6 +56,8 @@ class TemporalRetriever(GraphCompletionRetriever):
             top_k=top_k,
             node_type=node_type,
             node_name=node_name,
+            wide_search_top_k=wide_search_top_k,
+            triplet_distance_penalty=triplet_distance_penalty,
         )
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
@@ -146,8 +150,12 @@ class TemporalRetriever(GraphCompletionRetriever):
         return self.descriptions_to_string(top_k_events)
     async def get_completion(
-        self, query: str, context: Optional[str] = None, session_id: Optional[str] = None
-    ) -> List[str]:
+        self,
+        query: str,
+        context: Optional[str] = None,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
+    ) -> List[Any]:
         """
         Generates a response using the query and optional context.
@@ -159,6 +167,7 @@ class TemporalRetriever(GraphCompletionRetriever):
               retrieved based on the query. (default None)
             - session_id (Optional[str]): Optional session identifier for caching. If None,
               defaults to 'default_session'. (default None)
+            - response_model (Type): The Pydantic model type for structured output. (default str)
         Returns:
         --------
@@ -186,6 +195,7 @@ class TemporalRetriever(GraphCompletionRetriever):
                         user_prompt_path=self.user_prompt_path,
                         system_prompt_path=self.system_prompt_path,
                         conversation_history=conversation_history,
+                        response_model=response_model,
                     ),
                 )
             else:
@@ -194,6 +204,7 @@ class TemporalRetriever(GraphCompletionRetriever):
                     context=context,
                     user_prompt_path=self.user_prompt_path,
                     system_prompt_path=self.system_prompt_path,
+                    response_model=response_model,
                 )
             if session_save:

cognee/modules/retrieval/triplet_retriever.py ADDED Viewed

@@ -0,0 +1,182 @@
+import asyncio
+from typing import Any, Optional, Type, List
+from cognee.shared.logging_utils import get_logger
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.retrieval.utils.completion import generate_completion, summarize_text
+from cognee.modules.retrieval.utils.session_cache import (
+    save_conversation_history,
+    get_conversation_history,
+)
+from cognee.modules.retrieval.base_retriever import BaseRetriever
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
+from cognee.context_global_variables import session_user
+from cognee.infrastructure.databases.cache.config import CacheConfig
+logger = get_logger("TripletRetriever")
+class TripletRetriever(BaseRetriever):
+    """
+    Retriever for handling LLM-based completion searches using triplets.
+    Public methods:
+    - get_context(query: str) -> str
+    - get_completion(query: str, context: Optional[Any] = None) -> Any
+    """
+    def __init__(
+        self,
+        user_prompt_path: str = "context_for_question.txt",
+        system_prompt_path: str = "answer_simple_question.txt",
+        system_prompt: Optional[str] = None,
+        top_k: Optional[int] = 5,
+    ):
+        """Initialize retriever with optional custom prompt paths."""
+        self.user_prompt_path = user_prompt_path
+        self.system_prompt_path = system_prompt_path
+        self.top_k = top_k if top_k is not None else 1
+        self.system_prompt = system_prompt
+    async def get_context(self, query: str) -> str:
+        """
+        Retrieves relevant triplets as context.
+        Fetches triplets based on a query from a vector engine and combines their text.
+        Returns empty string if no triplets are found. Raises NoDataError if the collection is not
+        found.
+        Parameters:
+        -----------
+            - query (str): The query string used to search for relevant triplets.
+        Returns:
+        --------
+            - str: A string containing the combined text of the retrieved triplets, or an
+              empty string if none are found.
+        """
+        vector_engine = get_vector_engine()
+        try:
+            if not await vector_engine.has_collection(collection_name="Triplet_text"):
+                logger.error("Triplet_text collection not found")
+                raise NoDataError(
+                    "In order to use TRIPLET_COMPLETION first use the create_triplet_embeddings memify pipeline. "
+                )
+            found_triplets = await vector_engine.search("Triplet_text", query, limit=self.top_k)
+            if len(found_triplets) == 0:
+                return ""
+            triplets_payload = [found_triplet.payload["text"] for found_triplet in found_triplets]
+            combined_context = "\n".join(triplets_payload)
+            return combined_context
+        except CollectionNotFoundError as error:
+            logger.error("Triplet_text collection not found")
+            raise NoDataError("No data found in the system, please add data first.") from error
+    async def get_completion(
+        self,
+        query: str,
+        context: Optional[Any] = None,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
+    ) -> List[Any]:
+        """
+        Generates an LLM completion using the context.
+        Retrieves context if not provided and generates a completion based on the query and
+        context using an external completion generator.
+        Parameters:
+        -----------
+            - query (str): The query string to be used for generating a completion.
+            - context (Optional[Any]): Optional pre-fetched context to use for generating the
+              completion; if None, it retrieves the context for the query. (default None)
+            - session_id (Optional[str]): Optional session identifier for caching. If None,
+              defaults to 'default_session'. (default None)
+            - response_model (Type): The Pydantic model type for structured output. (default str)
+        Returns:
+        --------
+            - Any: The generated completion based on the provided query and context.
+        """
+        if context is None:
+            context = await self.get_context(query)
+        cache_config = CacheConfig()
+        user = session_user.get()
+        user_id = getattr(user, "id", None)
+        session_save = user_id and cache_config.caching
+        if session_save:
+            completion = await self._get_completion_with_session(
+                query=query,
+                context=context,
+                session_id=session_id,
+                response_model=response_model,
+            )
+        else:
+            completion = await self._get_completion_without_session(
+                query=query,
+                context=context,
+                response_model=response_model,
+            )
+        return [completion]
+    async def _get_completion_with_session(
+        self,
+        query: str,
+        context: str,
+        session_id: Optional[str],
+        response_model: Type,
+    ) -> Any:
+        """Generate completion with session history and caching."""
+        conversation_history = await get_conversation_history(session_id=session_id)
+        context_summary, completion = await asyncio.gather(
+            summarize_text(context),
+            generate_completion(
+                query=query,
+                context=context,
+                user_prompt_path=self.user_prompt_path,
+                system_prompt_path=self.system_prompt_path,
+                system_prompt=self.system_prompt,
+                conversation_history=conversation_history,
+                response_model=response_model,
+            ),
+        )
+        await save_conversation_history(
+            query=query,
+            context_summary=context_summary,
+            answer=completion,
+            session_id=session_id,
+        )
+        return completion
+    async def _get_completion_without_session(
+        self,
+        query: str,
+        context: str,
+        response_model: Type,
+    ) -> Any:
+        """Generate completion without session history."""
+        completion = await generate_completion(
+            query=query,
+            context=context,
+            user_prompt_path=self.user_prompt_path,
+            system_prompt_path=self.system_prompt_path,
+            system_prompt=self.system_prompt,
+            response_model=response_model,
+        )
+        return completion

cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

cognee 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl