PyPI - cognee - Versions diffs - 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl - Mend

cognee 0.5.1.dev0py3-none-any.whl → 0.5.2.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (238) hide show

cognee/modules/retrieval/completion_retriever.py CHANGED Viewed

@@ -8,6 +8,7 @@ from cognee.modules.retrieval.utils.session_cache import (
     save_conversation_history,
     get_conversation_history,
 )
+from cognee.modules.retrieval.utils.access_tracking import update_node_access_timestamps
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
@@ -20,10 +21,6 @@ logger = get_logger("CompletionRetriever")
 class CompletionRetriever(BaseRetriever):
     """
     Retriever for handling LLM-based completion searches.
-    Public methods:
-    - get_context(query: str) -> str
-    - get_completion(query: str, context: Optional[Any] = None) -> Any
     """
     def __init__(
@@ -32,14 +29,31 @@ class CompletionRetriever(BaseRetriever):
         system_prompt_path: str = "answer_simple_question.txt",
         system_prompt: Optional[str] = None,
         top_k: Optional[int] = 1,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
     ):
         """Initialize retriever with optional custom prompt paths."""
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
         self.top_k = top_k if top_k is not None else 1
         self.system_prompt = system_prompt
+        self.session_id = session_id
+        self.response_model = response_model
+    async def get_retrieved_objects(self, query: str) -> Any:
+        vector_engine = get_vector_engine()
+        try:
+            found_chunks = await vector_engine.search(
+                "DocumentChunk_text", query, limit=self.top_k, include_payload=True
+            )
+            return found_chunks
+        except CollectionNotFoundError as error:
+            logger.error("DocumentChunk_text collection not found")
+            raise NoDataError("No data found in the system, please add data first.") from error
-    async def get_context(self, query: str) -> str:
+    async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> str:
         """
         Retrieves relevant document chunks as context.
@@ -58,28 +72,18 @@ class CompletionRetriever(BaseRetriever):
             - str: A string containing the combined text of the retrieved document chunks, or an
               empty string if none are found.
         """
-        vector_engine = get_vector_engine()
-        try:
-            found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
-            if len(found_chunks) == 0:
-                return ""
-            # Combine all chunks text returned from vector search (number of chunks is determined by top_k
-            chunks_payload = [found_chunk.payload["text"] for found_chunk in found_chunks]
+        if retrieved_objects:
+            # Combine all chunks text returned from vector search (number of chunks is determined by top_k)
+            chunks_payload = [found_chunk.payload["text"] for found_chunk in retrieved_objects]
             combined_context = "\n".join(chunks_payload)
             return combined_context
-        except CollectionNotFoundError as error:
-            logger.error("DocumentChunk_text collection not found")
-            raise NoDataError("No data found in the system, please add data first.") from error
+        return ""
-    async def get_completion(
+    async def get_completion_from_context(
         self,
         query: str,
+        retrieved_objects: Any,
         context: Optional[Any] = None,
-        session_id: Optional[str] = None,
-        response_model: Type = str,
     ) -> List[Any]:
         """
         Generates an LLM completion using the context.
@@ -102,9 +106,6 @@ class CompletionRetriever(BaseRetriever):
             - Any: The generated completion based on the provided query and context.
         """
-        if context is None:
-            context = await self.get_context(query)
         # Check if we need to generate context summary for caching
         cache_config = CacheConfig()
         user = session_user.get()
@@ -112,7 +113,7 @@ class CompletionRetriever(BaseRetriever):
         session_save = user_id and cache_config.caching
         if session_save:
-            conversation_history = await get_conversation_history(session_id=session_id)
+            conversation_history = await get_conversation_history(session_id=self.session_id)
             context_summary, completion = await asyncio.gather(
                 summarize_text(context),
@@ -123,7 +124,7 @@ class CompletionRetriever(BaseRetriever):
                     system_prompt_path=self.system_prompt_path,
                     system_prompt=self.system_prompt,
                     conversation_history=conversation_history,
-                    response_model=response_model,
+                    response_model=self.response_model,
                 ),
             )
         else:
@@ -133,7 +134,7 @@ class CompletionRetriever(BaseRetriever):
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
-                response_model=response_model,
+                response_model=self.response_model,
             )
         if session_save:
@@ -141,7 +142,7 @@ class CompletionRetriever(BaseRetriever):
                 query=query,
                 context_summary=context_summary,
                 answer=completion,
-                session_id=session_id,
+                session_id=self.session_id,
             )
         return [completion]

cognee/modules/retrieval/cypher_search_retriever.py CHANGED Viewed

@@ -23,12 +23,29 @@ class CypherSearchRetriever(BaseRetriever):
         self,
         user_prompt_path: str = "context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
+        session_id: Optional[str] = None,
     ):
         """Initialize retriever with optional custom prompt paths."""
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
+        self.session_id = session_id
-    async def get_context(self, query: str) -> Any:
+    async def get_retrieved_objects(self, query: str) -> Any:
+        try:
+            graph_engine = await get_graph_engine()
+            is_empty = await graph_engine.is_empty()
+            if is_empty:
+                logger.warning("Search attempt on an empty knowledge graph")
+                return []
+            result = await graph_engine.query(query)
+        except Exception as e:
+            logger.error("Failed to execture cypher search retrieval: %s", str(e))
+            raise CypherSearchError() from e
+        return result
+    async def get_context_from_objects(self, query: str, retrieved_objects: Any) -> Any:
         """
         Retrieves relevant context using a cypher query.
@@ -44,22 +61,12 @@ class CypherSearchRetriever(BaseRetriever):
             - Any: The result of the cypher query execution.
         """
-        try:
-            graph_engine = await get_graph_engine()
-            is_empty = await graph_engine.is_empty()
-            if is_empty:
-                logger.warning("Search attempt on an empty knowledge graph")
-                return []
-            result = jsonable_encoder(await graph_engine.query(query))
-        except Exception as e:
-            logger.error("Failed to execture cypher search retrieval: %s", str(e))
-            raise CypherSearchError() from e
-        return result
+        # TODO: Do we want to return a string response here?
+        # return jsonable_encoder(retrieved_objects)
+        return None
-    async def get_completion(
-        self, query: str, context: Optional[Any] = None, session_id: Optional[str] = None
+    async def get_completion_from_context(
+        self, query: str, retrieved_objects: Any, context: Optional[Any] = None
     ) -> Any:
         """
         Returns the graph connections context.
@@ -72,7 +79,6 @@ class CypherSearchRetriever(BaseRetriever):
             - query (str): The query to retrieve context.
             - context (Optional[Any]): Optional context to use, otherwise fetched using the
               query. (default None)
-            - session_id (Optional[str]): Optional session identifier for caching. If None,
               defaults to 'default_session'. (default None)
         Returns:
@@ -80,6 +86,5 @@ class CypherSearchRetriever(BaseRetriever):
             - Any: The context, either provided or retrieved.
         """
-        if context is None:
-            context = await self.get_context(query)
-        return context
+        # TODO: Do we want to generate a completion using LLM here?
+        return None

cognee/modules/retrieval/graph_completion_context_extension_retriever.py CHANGED Viewed

@@ -18,16 +18,6 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
     """
     Handles graph context completion for question answering tasks, extending context based
     on retrieved triplets.
-    Public methods:
-    - get_completion
-    Instance variables:
-    - user_prompt_path
-    - system_prompt_path
-    - top_k
-    - node_type
-    - node_name
     """
     def __init__(
@@ -41,6 +31,9 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
         save_interaction: bool = False,
         wide_search_top_k: Optional[int] = 100,
         triplet_distance_penalty: Optional[float] = 3.5,
+        context_extension_rounds: int = 4,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
     ):
         super().__init__(
             user_prompt_path=user_prompt_path,
@@ -52,53 +45,38 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
             system_prompt=system_prompt,
             wide_search_top_k=wide_search_top_k,
             triplet_distance_penalty=triplet_distance_penalty,
+            session_id=session_id,
+            response_model=response_model,
         )
-    async def get_completion(
-        self,
-        query: str,
-        context: Optional[List[Edge]] = None,
-        session_id: Optional[str] = None,
-        context_extension_rounds=4,
-        response_model: Type = str,
-    ) -> List[Any]:
+        # context_extension_rounds: The maximum number of rounds to extend the context with
+        # new triplets before halting. (default 4)
+        self.context_extension_rounds = context_extension_rounds
+    async def get_retrieved_objects(self, query: str) -> List[Edge]:
         """
         Extends the context for a given query by retrieving related triplets and generating new
         completions based on them.
-        The method runs for a specified number of rounds to enhance context until no new
+        The method runs for a specified number of rounds to enhance results until no new
         triplets are found or the maximum rounds are reached. It retrieves triplet suggestions
         based on a generated completion from previous iterations, logging the process of context
         extension.
         Parameters:
         -----------
             - query (str): The input query for which the completion is generated.
-            - context (Optional[Any]): The existing context to use for enhancing the query; if
-              None, it will be initialized from triplets generated for the query. (default None)
-            - session_id (Optional[str]): Optional session identifier for caching. If None,
-              defaults to 'default_session'. (default None)
-            - context_extension_rounds: The maximum number of rounds to extend the context with
-              new triplets before halting. (default 4)
-            - response_model (Type): The Pydantic model type for structured output. (default str)
         Returns:
         --------
-            - List[str]: A list containing the generated answer based on the query and the
-              extended context.
+            - List[Edge]: A list of retrieved triplet edges relevant to the query.
         """
-        triplets = context
-        if triplets is None:
-            triplets = await self.get_context(query)
+        triplets = await self.get_triplets(query)
         context_text = await self.resolve_edges_to_text(triplets)
         round_idx = 1
-        while round_idx <= context_extension_rounds:
+        while round_idx <= self.context_extension_rounds:
             prev_size = len(triplets)
             logger.info(
@@ -112,7 +90,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
                 system_prompt=self.system_prompt,
             )
-            triplets += await self.get_context(completion)
+            triplets += await self.get_triplets(completion)
             triplets = list(set(triplets))
             context_text = await self.resolve_edges_to_text(triplets)
@@ -131,6 +109,24 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
             round_idx += 1
+        return triplets
+    async def get_completion_from_context(
+        self,
+        query: str,
+        retrieved_objects: List[Edge],
+        context: str,
+    ) -> List[Any]:
+        """
+        Returns a human readable answer based on the provided query and extended context derived from the retrieved objects.
+        Returns:
+        --------
+            - List[str]: A list containing the generated answer based on the query and the
+              extended context.
+        """
         # Check if we need to generate context summary for caching
         cache_config = CacheConfig()
         user = session_user.get()
@@ -138,33 +134,33 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
         session_save = user_id and cache_config.caching
         if session_save:
-            conversation_history = await get_conversation_history(session_id=session_id)
+            conversation_history = await get_conversation_history(session_id=self.session_id)
             context_summary, completion = await asyncio.gather(
-                summarize_text(context_text),
+                summarize_text(context),
                 generate_completion(
                     query=query,
-                    context=context_text,
+                    context=context,
                     user_prompt_path=self.user_prompt_path,
                     system_prompt_path=self.system_prompt_path,
                     system_prompt=self.system_prompt,
                     conversation_history=conversation_history,
-                    response_model=response_model,
+                    response_model=self.response_model,
                 ),
             )
         else:
             completion = await generate_completion(
                 query=query,
-                context=context_text,
+                context=context,
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
-                response_model=response_model,
+                response_model=self.response_model,
             )
-        if self.save_interaction and context_text and triplets and completion:
+        if self.save_interaction and context and retrieved_objects and completion:
             await self.save_qa(
-                question=query, answer=completion, context=context_text, triplets=triplets
+                question=query, answer=completion, context=context, triplets=retrieved_objects
             )
         if session_save:
@@ -172,7 +168,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
                 query=query,
                 context_summary=context_summary,
                 answer=completion,
-                session_id=session_id,
+                session_id=self.session_id,
             )
         return [completion]

cognee/modules/retrieval/graph_completion_cot_retriever.py CHANGED Viewed

@@ -18,6 +18,7 @@ from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
 from cognee.context_global_variables import session_user
 from cognee.infrastructure.databases.cache.config import CacheConfig
+from cognee.exceptions.exceptions import CogneeValidationError
 logger = get_logger()
@@ -67,6 +68,9 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         save_interaction: bool = False,
         wide_search_top_k: Optional[int] = 100,
         triplet_distance_penalty: Optional[float] = 3.5,
+        max_iter: int = 4,
+        session_id: Optional[str] = None,
+        response_model: Type = str,
     ):
         super().__init__(
             user_prompt_path=user_prompt_path,
@@ -78,19 +82,68 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
             save_interaction=save_interaction,
             wide_search_top_k=wide_search_top_k,
             triplet_distance_penalty=triplet_distance_penalty,
+            session_id=session_id,
+            response_model=response_model,
         )
         self.validation_system_prompt_path = validation_system_prompt_path
         self.validation_user_prompt_path = validation_user_prompt_path
         self.followup_system_prompt_path = followup_system_prompt_path
         self.followup_user_prompt_path = followup_user_prompt_path
+        self.completion = []
+        self.max_iter = max_iter
+    async def get_retrieved_objects(self, query: str) -> List[Edge]:
+        """
+        Run chain-of-thought completion with optional structured output.
+        Parameters:
+        -----------
+            - query: User query
+        Returns:
+        --------
+            - List of retrieved edges
+        """
+        # Check if session saving is enabled
+        cache_config = CacheConfig()
+        user = session_user.get()
+        user_id = getattr(user, "id", None)
+        session_save = user_id and cache_config.caching
+        # Load conversation history if enabled
+        conversation_history = ""
+        if session_save:
+            conversation_history = await get_conversation_history(session_id=self.session_id)
+        completion, context_text, triplets = await self._run_cot_completion(
+            query=query,
+            conversation_history=conversation_history,
+        )
+        # Note: completion info is stored to reduce the need to call LLM again in get_completion_from_context
+        self.completion = completion
+        if self.save_interaction and context_text and triplets and completion:
+            await self.save_qa(
+                question=query, answer=str(completion), context=context_text, triplets=triplets
+            )
+        # Save to session cache if enabled
+        if session_save:
+            context_summary = await summarize_text(context_text)
+            await save_conversation_history(
+                query=query,
+                context_summary=context_summary,
+                answer=str(completion),
+                session_id=self.session_id,
+            )
+        return triplets
     async def _run_cot_completion(
         self,
         query: str,
-        context: Optional[List[Edge]] = None,
         conversation_history: str = "",
-        max_iter: int = 4,
-        response_model: Type = str,
     ) -> tuple[Any, str, List[Edge]]:
         """
         Run chain-of-thought completion with optional structured output.
@@ -113,15 +166,12 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         triplets = []
         completion = ""
-        for round_idx in range(max_iter + 1):
+        for round_idx in range(self.max_iter + 1):
             if round_idx == 0:
-                if context is None:
-                    triplets = await self.get_context(query)
-                    context_text = await self.resolve_edges_to_text(triplets)
-                else:
-                    context_text = await self.resolve_edges_to_text(context)
+                triplets = await self.get_triplets(query)
+                context_text = await self.resolve_edges_to_text(triplets)
             else:
-                triplets += await self.get_context(followup_question)
+                triplets += await self.get_triplets(followup_question)
                 context_text = await self.resolve_edges_to_text(list(set(triplets)))
             completion = await generate_completion(
@@ -131,12 +181,12 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
                 conversation_history=conversation_history if conversation_history else None,
-                response_model=response_model,
+                response_model=self.response_model,
             )
             logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}")
-            if round_idx < max_iter:
+            if round_idx < self.max_iter:
                 answer_text = _as_answer_text(completion)
                 valid_args = {"query": query, "answer": answer_text, "context": context_text}
                 valid_user_prompt = render_prompt(
@@ -168,13 +218,11 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         return completion, context_text, triplets
-    async def get_completion(
+    async def get_completion_from_context(
         self,
         query: str,
-        context: Optional[List[Edge]] = None,
-        session_id: Optional[str] = None,
-        max_iter=4,
-        response_model: Type = str,
+        retrieved_objects: List[Edge],
+        context: str,
     ) -> List[Any]:
         """
         Generate completion responses based on a user query and contextual information.
@@ -202,38 +250,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
             - List[str]: A list containing the generated answer to the user's query.
         """
-        # Check if session saving is enabled
-        cache_config = CacheConfig()
-        user = session_user.get()
-        user_id = getattr(user, "id", None)
-        session_save = user_id and cache_config.caching
-        # Load conversation history if enabled
-        conversation_history = ""
-        if session_save:
-            conversation_history = await get_conversation_history(session_id=session_id)
-        completion, context_text, triplets = await self._run_cot_completion(
-            query=query,
-            context=context,
-            conversation_history=conversation_history,
-            max_iter=max_iter,
-            response_model=response_model,
-        )
-        if self.save_interaction and context and triplets and completion:
-            await self.save_qa(
-                question=query, answer=str(completion), context=context_text, triplets=triplets
-            )
-        # Save to session cache if enabled
-        if session_save:
-            context_summary = await summarize_text(context_text)
-            await save_conversation_history(
-                query=query,
-                context_summary=context_summary,
-                answer=str(completion),
-                session_id=session_id,
-            )
+        if not retrieved_objects:
+            raise CogneeValidationError("No context retrieved to generate completion.")
+        completion = self.completion
         return [completion]

cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl

cognee 0.5.1.dev0py3-none-any.whl → 0.5.2.dev0py3-none-any.whl