PyPI - cognee - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

cognee 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

cognee/modules/retrieval/graph_completion_cot_retriever.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Optional, List, Type
+from typing import Optional, List, Type
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -32,6 +33,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         validation_system_prompt_path: str = "cot_validation_system_prompt.txt",
         followup_system_prompt_path: str = "cot_followup_system_prompt.txt",
         followup_user_prompt_path: str = "cot_followup_user_prompt.txt",
+        system_prompt: Optional[str] = None,
         top_k: Optional[int] = 5,
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
@@ -40,6 +42,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         super().__init__(
             user_prompt_path=user_prompt_path,
             system_prompt_path=system_prompt_path,
+            system_prompt=system_prompt,
             top_k=top_k,
             node_type=node_type,
             node_name=node_name,
@@ -51,8 +54,11 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         self.followup_user_prompt_path = followup_user_prompt_path
     async def get_completion(
-        self, query: str, context: Optional[Any] = None, max_iter=4
-    ) -> List[str]:
+        self,
+        query: str,
+        context: Optional[List[Edge]] = None,
+        max_iter=4,
+    ) -> str:
         """
         Generate completion responses based on a user query and contextual information.
@@ -77,25 +83,29 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         """
         followup_question = ""
         triplets = []
-        completion = [""]
+        completion = ""
         for round_idx in range(max_iter + 1):
             if round_idx == 0:
                 if context is None:
-                    context = await self.get_context(query)
+                    triplets = await self.get_context(query)
+                    context_text = await self.resolve_edges_to_text(triplets)
+                else:
+                    context_text = await self.resolve_edges_to_text(context)
             else:
-                triplets += await self.get_triplets(followup_question)
-                context = await self.resolve_edges_to_text(list(set(triplets)))
+                triplets += await self.get_context(followup_question)
+                context_text = await self.resolve_edges_to_text(list(set(triplets)))
             completion = await generate_completion(
                 query=query,
-                context=context,
+                context=context_text,
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
+                system_prompt=self.system_prompt,
             )
             logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}")
             if round_idx < max_iter:
-                valid_args = {"query": query, "answer": completion, "context": context}
+                valid_args = {"query": query, "answer": completion, "context": context_text}
                 valid_user_prompt = LLMGateway.render_prompt(
                     filename=self.validation_user_prompt_path, context=valid_args
                 )
@@ -125,7 +135,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         if self.save_interaction and context and triplets and completion:
             await self.save_qa(
-                question=query, answer=completion, context=context, triplets=triplets
+                question=query, answer=completion, context=context_text, triplets=triplets
             )
-        return [completion]
+        return completion

cognee/modules/retrieval/graph_completion_retriever.py CHANGED Viewed

@@ -1,15 +1,15 @@
-from typing import Any, Optional, Type, List, Coroutine
-from collections import Counter
+from typing import Any, Optional, Type, List
 from uuid import NAMESPACE_OID, uuid5
-import string
 from cognee.infrastructure.engine import DataPoint
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
+from cognee.modules.users.methods import get_default_user
 from cognee.tasks.storage import add_data_points
+from cognee.modules.graph.utils import resolve_edges_to_text
 from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
-from cognee.modules.retrieval.base_retriever import BaseRetriever
+from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever
 from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
 from cognee.modules.retrieval.utils.completion import generate_completion
-from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
 from cognee.modules.retrieval.utils.models import CogneeUserInteraction
@@ -19,7 +19,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
 logger = get_logger("GraphCompletionRetriever")
-class GraphCompletionRetriever(BaseRetriever):
+class GraphCompletionRetriever(BaseGraphRetriever):
     """
     Retriever for handling graph-based completion searches.
@@ -36,6 +36,7 @@ class GraphCompletionRetriever(BaseRetriever):
         self,
         user_prompt_path: str = "graph_context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
+        system_prompt: Optional[str] = None,
         top_k: Optional[int] = 5,
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
@@ -45,26 +46,11 @@ class GraphCompletionRetriever(BaseRetriever):
         self.save_interaction = save_interaction
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
+        self.system_prompt = system_prompt
         self.top_k = top_k if top_k is not None else 5
         self.node_type = node_type
         self.node_name = node_name
-    def _get_nodes(self, retrieved_edges: list) -> dict:
-        """Creates a dictionary of nodes with their names and content."""
-        nodes = {}
-        for edge in retrieved_edges:
-            for node in (edge.node1, edge.node2):
-                if node.id not in nodes:
-                    text = node.attributes.get("text")
-                    if text:
-                        name = self._get_title(text)
-                        content = text
-                    else:
-                        name = node.attributes.get("name", "Unnamed Node")
-                        content = node.attributes.get("description", name)
-                    nodes[node.id] = {"node": node, "name": name, "content": content}
-        return nodes
     async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
         """
         Converts retrieved graph edges into a human-readable string format.
@@ -79,18 +65,9 @@ class GraphCompletionRetriever(BaseRetriever):
             - str: A formatted string representation of the nodes and their connections.
         """
-        nodes = self._get_nodes(retrieved_edges)
-        node_section = "\n".join(
-            f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
-            for info in nodes.values()
-        )
-        connection_section = "\n".join(
-            f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}"
-            for edge in retrieved_edges
-        )
-        return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"
+        return await resolve_edges_to_text(retrieved_edges)
-    async def get_triplets(self, query: str) -> list:
+    async def get_triplets(self, query: str) -> List[Edge]:
         """
         Retrieves relevant graph triplets based on a query string.
@@ -105,7 +82,7 @@ class GraphCompletionRetriever(BaseRetriever):
             - list: A list of found triplets that match the query.
         """
         subclasses = get_all_subclasses(DataPoint)
-        vector_index_collections = []
+        vector_index_collections: List[str] = []
         for subclass in subclasses:
             if "metadata" in subclass.model_fields:
@@ -116,8 +93,11 @@ class GraphCompletionRetriever(BaseRetriever):
                         for field_name in index_fields:
                             vector_index_collections.append(f"{subclass.__name__}_{field_name}")
+        user = await get_default_user()
         found_triplets = await brute_force_triplet_search(
             query,
+            user=user,
             top_k=self.top_k,
             collections=vector_index_collections or None,
             node_type=self.node_type,
@@ -126,7 +106,7 @@ class GraphCompletionRetriever(BaseRetriever):
         return found_triplets
-    async def get_context(self, query: str) -> str | tuple[str, list]:
+    async def get_context(self, query: str) -> List[Edge]:
         """
         Retrieves and resolves graph triplets into context based on a query.
@@ -145,13 +125,17 @@ class GraphCompletionRetriever(BaseRetriever):
         if len(triplets) == 0:
             logger.warning("Empty context was provided to the completion")
-            return "", triplets
+            return []
-        context = await self.resolve_edges_to_text(triplets)
+        # context = await self.resolve_edges_to_text(triplets)
-        return context, triplets
+        return triplets
-    async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
+    async def get_completion(
+        self,
+        query: str,
+        context: Optional[List[Edge]] = None,
+    ) -> Any:
         """
         Generates a completion using graph connections context based on a query.
@@ -167,44 +151,27 @@ class GraphCompletionRetriever(BaseRetriever):
             - Any: A generated completion based on the query and context provided.
         """
-        triplets = None
+        triplets = context
+        if triplets is None:
+            triplets = await self.get_context(query)
-        if context is None:
-            context, triplets = await self.get_context(query)
+        context_text = await resolve_edges_to_text(triplets)
         completion = await generate_completion(
             query=query,
-            context=context,
+            context=context_text,
             user_prompt_path=self.user_prompt_path,
             system_prompt_path=self.system_prompt_path,
+            system_prompt=self.system_prompt,
         )
         if self.save_interaction and context and triplets and completion:
             await self.save_qa(
-                question=query, answer=completion, context=context, triplets=triplets
+                question=query, answer=completion, context=context_text, triplets=triplets
             )
-        return [completion]
-    def _top_n_words(self, text, stop_words=None, top_n=3, separator=", "):
-        """Concatenates the top N frequent words in text."""
-        if stop_words is None:
-            stop_words = DEFAULT_STOP_WORDS
-        words = [word.lower().strip(string.punctuation) for word in text.split()]
-        if stop_words:
-            words = [word for word in words if word and word not in stop_words]
-        top_words = [word for word, freq in Counter(words).most_common(top_n)]
-        return separator.join(top_words)
-    def _get_title(self, text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
-        """Creates a title, by combining first words with most frequent words from the text."""
-        first_n_words = text.split()[:first_n_words]
-        top_n_words = self._top_n_words(text, top_n=top_n_words)
-        return f"{' '.join(first_n_words)}... [{top_n_words}]"
+        return completion
     async def save_qa(self, question: str, answer: str, context: str, triplets: List) -> None:
         """

cognee/modules/retrieval/graph_summary_completion_retriever.py CHANGED Viewed

@@ -21,6 +21,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
         user_prompt_path: str = "graph_context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
         summarize_prompt_path: str = "summarize_search_results.txt",
+        system_prompt: Optional[str] = None,
         top_k: Optional[int] = 5,
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
@@ -34,6 +35,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
             node_type=node_type,
             node_name=node_name,
             save_interaction=save_interaction,
+            system_prompt=system_prompt,
         )
         self.summarize_prompt_path = summarize_prompt_path
@@ -57,4 +59,4 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
             - str: A summary string representing the content of the retrieved edges.
         """
         direct_text = await super().resolve_edges_to_text(retrieved_edges)
-        return await summarize_text(direct_text, self.summarize_prompt_path)
+        return await summarize_text(direct_text, self.summarize_prompt_path, self.system_prompt)

cognee/modules/retrieval/insights_retriever.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import asyncio
 from typing import Any, Optional
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node
+from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
-from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
 logger = get_logger("InsightsRetriever")
-class InsightsRetriever(BaseRetriever):
+class InsightsRetriever(BaseGraphRetriever):
     """
     Retriever for handling graph connection-based insights.
@@ -95,7 +96,17 @@ class InsightsRetriever(BaseRetriever):
                 unique_node_connections_map[unique_id] = True
                 unique_node_connections.append(node_connection)
-        return unique_node_connections
+        return [
+            Edge(
+                node1=Node(node_id=connection[0]["id"], attributes=connection[0]),
+                node2=Node(node_id=connection[2]["id"], attributes=connection[2]),
+                attributes={
+                    **connection[1],
+                    "relationship_type": connection[1]["relationship_name"],
+                },
+            )
+            for connection in unique_node_connections
+        ]
     async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
         """

cognee/modules/retrieval/summaries_retriever.py CHANGED Viewed

@@ -62,7 +62,7 @@ class SummariesRetriever(BaseRetriever):
         logger.info(f"Returning {len(summary_payloads)} summary payloads")
         return summary_payloads
-    async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
+    async def get_completion(self, query: str, context: Optional[Any] = None, **kwargs) -> Any:
         """
         Generates a completion using summaries context.

cognee/modules/retrieval/temporal_retriever.py ADDED Viewed

@@ -0,0 +1,152 @@
+import os
+from typing import Any, Optional, List, Type
+from operator import itemgetter
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.retrieval.utils.completion import generate_completion
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.infrastructure.llm import LLMGateway
+from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
+from cognee.shared.logging_utils import get_logger
+from cognee.tasks.temporal_graph.models import QueryInterval
+logger = get_logger()
+class TemporalRetriever(GraphCompletionRetriever):
+    """
+    Handles graph completion by generating responses based on a series of interactions with
+    a language model. This class extends from GraphCompletionRetriever and is designed to
+    manage the retrieval and validation process for user queries, integrating follow-up
+    questions based on reasoning. The public methods are:
+    - get_completion
+    Instance variables include:
+    - validation_system_prompt_path
+    - validation_user_prompt_path
+    - followup_system_prompt_path
+    - followup_user_prompt_path
+    """
+    def __init__(
+        self,
+        user_prompt_path: str = "graph_context_for_question.txt",
+        system_prompt_path: str = "answer_simple_question.txt",
+        time_extraction_prompt_path: str = "extract_query_time.txt",
+        top_k: Optional[int] = 5,
+        node_type: Optional[Type] = None,
+        node_name: Optional[List[str]] = None,
+    ):
+        super().__init__(
+            user_prompt_path=user_prompt_path,
+            system_prompt_path=system_prompt_path,
+            top_k=top_k,
+            node_type=node_type,
+            node_name=node_name,
+        )
+        self.user_prompt_path = user_prompt_path
+        self.system_prompt_path = system_prompt_path
+        self.time_extraction_prompt_path = time_extraction_prompt_path
+        self.top_k = top_k if top_k is not None else 5
+        self.node_type = node_type
+        self.node_name = node_name
+    def descriptions_to_string(self, results):
+        descs = []
+        for entry in results:
+            d = entry.get("description")
+            if d:
+                descs.append(d.strip())
+        return "\n#####################\n".join(descs)
+    async def extract_time_from_query(self, query: str):
+        prompt_path = self.time_extraction_prompt_path
+        if os.path.isabs(prompt_path):
+            base_directory = os.path.dirname(prompt_path)
+            prompt_path = os.path.basename(prompt_path)
+        else:
+            base_directory = None
+        system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
+        interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval)
+        time_from = interval.starts_at
+        time_to = interval.ends_at
+        return time_from, time_to
+    async def filter_top_k_events(self, relevant_events, scored_results):
+        # Build a score lookup from vector search results
+        score_lookup = {res.payload["id"]: res.score for res in scored_results}
+        events_with_scores = []
+        for event in relevant_events[0]["events"]:
+            score = score_lookup.get(event["id"], float("inf"))
+            events_with_scores.append({**event, "score": score})
+        events_with_scores.sort(key=itemgetter("score"))
+        return events_with_scores[: self.top_k]
+    async def get_context(self, query: str) -> Any:
+        """Retrieves context based on the query."""
+        time_from, time_to = await self.extract_time_from_query(query)
+        graph_engine = await get_graph_engine()
+        triplets = []
+        if time_from and time_to:
+            ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to)
+        elif time_from:
+            ids = await graph_engine.collect_time_ids(time_from=time_from)
+        elif time_to:
+            ids = await graph_engine.collect_time_ids(time_to=time_to)
+        else:
+            logger.info(
+                "No timestamps identified based on the query, performing retrieval using triplet search on events and entities."
+            )
+            triplets = await self.get_context(query)
+            return await self.resolve_edges_to_text(triplets)
+        if ids:
+            relevant_events = await graph_engine.collect_events(ids=ids)
+        else:
+            logger.info(
+                "No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities."
+            )
+            triplets = await self.get_context(query)
+            return await self.resolve_edges_to_text(triplets)
+        vector_engine = get_vector_engine()
+        query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
+        vector_search_results = await vector_engine.search(
+            collection_name="Event_name", query_vector=query_vector, limit=0
+        )
+        top_k_events = await self.filter_top_k_events(relevant_events, vector_search_results)
+        return self.descriptions_to_string(top_k_events)
+    async def get_completion(self, query: str, context: Optional[str] = None) -> str:
+        """Generates a response using the query and optional context."""
+        if not context:
+            context = await self.get_context(query=query)
+        if context:
+            completion = await generate_completion(
+                query=query,
+                context=context,
+                user_prompt_path=self.user_prompt_path,
+                system_prompt_path=self.system_prompt_path,
+            )
+        return completion

cognee/modules/retrieval/utils/brute_force_triplet_search.py CHANGED Viewed

@@ -8,7 +8,7 @@ from cognee.infrastructure.databases.vector.exceptions import CollectionNotFound
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
-from cognee.modules.users.methods import get_default_user
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
 from cognee.modules.users.models import User
 from cognee.shared.utils import send_telemetry
@@ -63,9 +63,10 @@ async def get_memory_fragment(
     if properties_to_project is None:
         properties_to_project = ["id", "description", "name", "type", "text"]
+    memory_fragment = CogneeGraph()
     try:
         graph_engine = await get_graph_engine()
-        memory_fragment = CogneeGraph()
         await memory_fragment.project_graph_from_db(
             graph_engine,
@@ -87,41 +88,15 @@ async def get_memory_fragment(
 async def brute_force_triplet_search(
-    query: str,
-    user: User = None,
-    top_k: int = 5,
-    collections: List[str] = None,
-    properties_to_project: List[str] = None,
-    memory_fragment: Optional[CogneeGraph] = None,
-    node_type: Optional[Type] = None,
-    node_name: Optional[List[str]] = None,
-) -> list:
-    if user is None:
-        user = await get_default_user()
-    retrieved_results = await brute_force_search(
-        query,
-        user,
-        top_k,
-        collections=collections,
-        properties_to_project=properties_to_project,
-        memory_fragment=memory_fragment,
-        node_type=node_type,
-        node_name=node_name,
-    )
-    return retrieved_results
-async def brute_force_search(
     query: str,
     user: User,
-    top_k: int,
-    collections: List[str] = None,
-    properties_to_project: List[str] = None,
+    top_k: int = 5,
+    collections: Optional[List[str]] = None,
+    properties_to_project: Optional[List[str]] = None,
     memory_fragment: Optional[CogneeGraph] = None,
     node_type: Optional[Type] = None,
     node_name: Optional[List[str]] = None,
-) -> list:
+) -> List[Edge]:
     """
     Performs a brute force search to retrieve the top triplets from the graph.

cognee/modules/retrieval/utils/completion.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from typing import Optional
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
@@ -6,11 +7,14 @@ async def generate_completion(
     context: str,
     user_prompt_path: str,
     system_prompt_path: str,
+    system_prompt: Optional[str] = None,
 ) -> str:
     """Generates a completion using LLM with given context and prompts."""
     args = {"question": query, "context": context}
     user_prompt = LLMGateway.render_prompt(user_prompt_path, args)
-    system_prompt = LLMGateway.read_query_prompt(system_prompt_path)
+    system_prompt = (
+        system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
+    )
     return await LLMGateway.acreate_structured_output(
         text_input=user_prompt,
@@ -21,10 +25,13 @@ async def generate_completion(
 async def summarize_text(
     text: str,
-    prompt_path: str = "summarize_search_results.txt",
+    system_prompt_path: str = "summarize_search_results.txt",
+    system_prompt: str = None,
 ) -> str:
     """Summarizes text using LLM with the specified prompt."""
-    system_prompt = LLMGateway.read_query_prompt(prompt_path)
+    system_prompt = (
+        system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
+    )
     return await LLMGateway.acreate_structured_output(
         text_input=text,

cognee 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

cognee 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl