PyPI - cognee - Versions diffs - 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

cognee 0.2.3.dev1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (252) hide show

cognee/modules/retrieval/graph_completion_retriever.py CHANGED Viewed

@@ -1,19 +1,25 @@
 from typing import Any, Optional, Type, List
-from collections import Counter
-import string
+from uuid import NAMESPACE_OID, uuid5
 from cognee.infrastructure.engine import DataPoint
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
+from cognee.modules.users.methods import get_default_user
+from cognee.tasks.storage import add_data_points
+from cognee.modules.graph.utils import resolve_edges_to_text
 from cognee.modules.graph.utils.convert_node_to_data_point import get_all_subclasses
-from cognee.modules.retrieval.base_retriever import BaseRetriever
+from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever
 from cognee.modules.retrieval.utils.brute_force_triplet_search import brute_force_triplet_search
 from cognee.modules.retrieval.utils.completion import generate_completion
-from cognee.modules.retrieval.utils.stop_words import DEFAULT_STOP_WORDS
 from cognee.shared.logging_utils import get_logger
+from cognee.modules.retrieval.utils.extract_uuid_from_node import extract_uuid_from_node
+from cognee.modules.retrieval.utils.models import CogneeUserInteraction
+from cognee.modules.engine.models.node_set import NodeSet
+from cognee.infrastructure.databases.graph import get_graph_engine
 logger = get_logger("GraphCompletionRetriever")
-class GraphCompletionRetriever(BaseRetriever):
+class GraphCompletionRetriever(BaseGraphRetriever):
     """
     Retriever for handling graph-based completion searches.
@@ -30,33 +36,21 @@ class GraphCompletionRetriever(BaseRetriever):
         self,
         user_prompt_path: str = "graph_context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
+        system_prompt: Optional[str] = None,
         top_k: Optional[int] = 5,
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
+        save_interaction: bool = False,
     ):
         """Initialize retriever with prompt paths and search parameters."""
+        self.save_interaction = save_interaction
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
+        self.system_prompt = system_prompt
         self.top_k = top_k if top_k is not None else 5
         self.node_type = node_type
         self.node_name = node_name
-    def _get_nodes(self, retrieved_edges: list) -> dict:
-        """Creates a dictionary of nodes with their names and content."""
-        nodes = {}
-        for edge in retrieved_edges:
-            for node in (edge.node1, edge.node2):
-                if node.id not in nodes:
-                    text = node.attributes.get("text")
-                    if text:
-                        name = self._get_title(text)
-                        content = text
-                    else:
-                        name = node.attributes.get("name", "Unnamed Node")
-                        content = node.attributes.get("description", name)
-                    nodes[node.id] = {"node": node, "name": name, "content": content}
-        return nodes
     async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
         """
         Converts retrieved graph edges into a human-readable string format.
@@ -71,18 +65,9 @@ class GraphCompletionRetriever(BaseRetriever):
             - str: A formatted string representation of the nodes and their connections.
         """
-        nodes = self._get_nodes(retrieved_edges)
-        node_section = "\n".join(
-            f"Node: {info['name']}\n__node_content_start__\n{info['content']}\n__node_content_end__\n"
-            for info in nodes.values()
-        )
-        connection_section = "\n".join(
-            f"{nodes[edge.node1.id]['name']} --[{edge.attributes['relationship_type']}]--> {nodes[edge.node2.id]['name']}"
-            for edge in retrieved_edges
-        )
-        return f"Nodes:\n{node_section}\n\nConnections:\n{connection_section}"
+        return await resolve_edges_to_text(retrieved_edges)
-    async def get_triplets(self, query: str) -> list:
+    async def get_triplets(self, query: str) -> List[Edge]:
         """
         Retrieves relevant graph triplets based on a query string.
@@ -97,7 +82,7 @@ class GraphCompletionRetriever(BaseRetriever):
             - list: A list of found triplets that match the query.
         """
         subclasses = get_all_subclasses(DataPoint)
-        vector_index_collections = []
+        vector_index_collections: List[str] = []
         for subclass in subclasses:
             if "metadata" in subclass.model_fields:
@@ -108,8 +93,11 @@ class GraphCompletionRetriever(BaseRetriever):
                         for field_name in index_fields:
                             vector_index_collections.append(f"{subclass.__name__}_{field_name}")
+        user = await get_default_user()
         found_triplets = await brute_force_triplet_search(
             query,
+            user=user,
             top_k=self.top_k,
             collections=vector_index_collections or None,
             node_type=self.node_type,
@@ -118,7 +106,7 @@ class GraphCompletionRetriever(BaseRetriever):
         return found_triplets
-    async def get_context(self, query: str) -> str:
+    async def get_context(self, query: str) -> List[Edge]:
         """
         Retrieves and resolves graph triplets into context based on a query.
@@ -137,11 +125,17 @@ class GraphCompletionRetriever(BaseRetriever):
         if len(triplets) == 0:
             logger.warning("Empty context was provided to the completion")
-            return ""
+            return []
+        # context = await self.resolve_edges_to_text(triplets)
-        return await self.resolve_edges_to_text(triplets)
+        return triplets
-    async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
+    async def get_completion(
+        self,
+        query: str,
+        context: Optional[List[Edge]] = None,
+    ) -> Any:
         """
         Generates a completion using graph connections context based on a query.
@@ -157,33 +151,90 @@ class GraphCompletionRetriever(BaseRetriever):
             - Any: A generated completion based on the query and context provided.
         """
-        if context is None:
-            context = await self.get_context(query)
+        triplets = context
+        if triplets is None:
+            triplets = await self.get_context(query)
+        context_text = await resolve_edges_to_text(triplets)
         completion = await generate_completion(
             query=query,
-            context=context,
+            context=context_text,
             user_prompt_path=self.user_prompt_path,
             system_prompt_path=self.system_prompt_path,
+            system_prompt=self.system_prompt,
         )
-        return [completion]
-    def _top_n_words(self, text, stop_words=None, top_n=3, separator=", "):
-        """Concatenates the top N frequent words in text."""
-        if stop_words is None:
-            stop_words = DEFAULT_STOP_WORDS
+        if self.save_interaction and context and triplets and completion:
+            await self.save_qa(
+                question=query, answer=completion, context=context_text, triplets=triplets
+            )
-        words = [word.lower().strip(string.punctuation) for word in text.split()]
+        return completion
-        if stop_words:
-            words = [word for word in words if word and word not in stop_words]
-        top_words = [word for word, freq in Counter(words).most_common(top_n)]
+    async def save_qa(self, question: str, answer: str, context: str, triplets: List) -> None:
+        """
+        Saves a question and answer pair for later analysis or storage.
+        Parameters:
+        -----------
+            - question (str): The question text.
+            - answer (str): The answer text.
+            - context (str): The context text.
+            - triplets (List): A list of triples retrieved from the graph.
+        """
+        nodeset_name = "Interactions"
+        interactions_node_set = NodeSet(
+            id=uuid5(NAMESPACE_OID, name=nodeset_name), name=nodeset_name
+        )
+        source_id = uuid5(NAMESPACE_OID, name=(question + answer + context))
-        return separator.join(top_words)
+        cognee_user_interaction = CogneeUserInteraction(
+            id=source_id,
+            question=question,
+            answer=answer,
+            context=context,
+            belongs_to_set=interactions_node_set,
+        )
-    def _get_title(self, text: str, first_n_words: int = 7, top_n_words: int = 3) -> str:
-        """Creates a title, by combining first words with most frequent words from the text."""
-        first_n_words = text.split()[:first_n_words]
-        top_n_words = self._top_n_words(text, top_n=top_n_words)
-        return f"{' '.join(first_n_words)}... [{top_n_words}]"
+        await add_data_points(data_points=[cognee_user_interaction], update_edge_collection=False)
+        relationships = []
+        relationship_name = "used_graph_element_to_answer"
+        for triplet in triplets:
+            target_id_1 = extract_uuid_from_node(triplet.node1)
+            target_id_2 = extract_uuid_from_node(triplet.node2)
+            if target_id_1 and target_id_2:
+                relationships.append(
+                    (
+                        source_id,
+                        target_id_1,
+                        relationship_name,
+                        {
+                            "relationship_name": relationship_name,
+                            "source_node_id": source_id,
+                            "target_node_id": target_id_1,
+                            "ontology_valid": False,
+                            "feedback_weight": 0,
+                        },
+                    )
+                )
+                relationships.append(
+                    (
+                        source_id,
+                        target_id_2,
+                        relationship_name,
+                        {
+                            "relationship_name": relationship_name,
+                            "source_node_id": source_id,
+                            "target_node_id": target_id_2,
+                            "ontology_valid": False,
+                            "feedback_weight": 0,
+                        },
+                    )
+                )
+            if len(relationships) > 0:
+                graph_engine = await get_graph_engine()
+                await graph_engine.add_edges(relationships)

cognee/modules/retrieval/graph_summary_completion_retriever.py CHANGED Viewed

@@ -21,9 +21,11 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
         user_prompt_path: str = "graph_context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
         summarize_prompt_path: str = "summarize_search_results.txt",
+        system_prompt: Optional[str] = None,
         top_k: Optional[int] = 5,
         node_type: Optional[Type] = None,
         node_name: Optional[List[str]] = None,
+        save_interaction: bool = False,
     ):
         """Initialize retriever with default prompt paths and search parameters."""
         super().__init__(
@@ -32,6 +34,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
             top_k=top_k,
             node_type=node_type,
             node_name=node_name,
+            save_interaction=save_interaction,
+            system_prompt=system_prompt,
         )
         self.summarize_prompt_path = summarize_prompt_path
@@ -55,4 +59,4 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
             - str: A summary string representing the content of the retrieved edges.
         """
         direct_text = await super().resolve_edges_to_text(retrieved_edges)
-        return await summarize_text(direct_text, self.summarize_prompt_path)
+        return await summarize_text(direct_text, self.summarize_prompt_path, self.system_prompt)

cognee/modules/retrieval/insights_retriever.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import asyncio
 from typing import Any, Optional
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge, Node
+from cognee.modules.retrieval.base_graph_retriever import BaseGraphRetriever
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
-from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.infrastructure.databases.vector.exceptions.exceptions import CollectionNotFoundError
 logger = get_logger("InsightsRetriever")
-class InsightsRetriever(BaseRetriever):
+class InsightsRetriever(BaseGraphRetriever):
     """
     Retriever for handling graph connection-based insights.
@@ -95,7 +96,17 @@ class InsightsRetriever(BaseRetriever):
                 unique_node_connections_map[unique_id] = True
                 unique_node_connections.append(node_connection)
-        return unique_node_connections
+        return [
+            Edge(
+                node1=Node(node_id=connection[0]["id"], attributes=connection[0]),
+                node2=Node(node_id=connection[2]["id"], attributes=connection[2]),
+                attributes={
+                    **connection[1],
+                    "relationship_type": connection[1]["relationship_name"],
+                },
+            )
+            for connection in unique_node_connections
+        ]
     async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
         """

cognee/modules/retrieval/natural_language_retriever.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Any, Optional
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.databases.graph import get_graph_engine
-from cognee.infrastructure.databases.graph.networkx.adapter import NetworkXAdapter
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.modules.retrieval.base_retriever import BaseRetriever
 from cognee.modules.retrieval.exceptions import SearchTypeNotSupported
@@ -123,9 +122,6 @@ class NaturalLanguageRetriever(BaseRetriever):
         """
         graph_engine = await get_graph_engine()
-        if isinstance(graph_engine, (NetworkXAdapter)):
-            raise SearchTypeNotSupported("Natural language search type not supported.")
         return await self._execute_cypher_query(query, graph_engine)
     async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:

cognee/modules/retrieval/summaries_retriever.py CHANGED Viewed

@@ -62,7 +62,7 @@ class SummariesRetriever(BaseRetriever):
         logger.info(f"Returning {len(summary_payloads)} summary payloads")
         return summary_payloads
-    async def get_completion(self, query: str, context: Optional[Any] = None) -> Any:
+    async def get_completion(self, query: str, context: Optional[Any] = None, **kwargs) -> Any:
         """
         Generates a completion using summaries context.

cognee/modules/retrieval/temporal_retriever.py ADDED Viewed

@@ -0,0 +1,152 @@
+import os
+from typing import Any, Optional, List, Type
+from operator import itemgetter
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.retrieval.utils.completion import generate_completion
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.infrastructure.llm import LLMGateway
+from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
+from cognee.shared.logging_utils import get_logger
+from cognee.tasks.temporal_graph.models import QueryInterval
+logger = get_logger()
+class TemporalRetriever(GraphCompletionRetriever):
+    """
+    Handles graph completion by generating responses based on a series of interactions with
+    a language model. This class extends from GraphCompletionRetriever and is designed to
+    manage the retrieval and validation process for user queries, integrating follow-up
+    questions based on reasoning. The public methods are:
+    - get_completion
+    Instance variables include:
+    - validation_system_prompt_path
+    - validation_user_prompt_path
+    - followup_system_prompt_path
+    - followup_user_prompt_path
+    """
+    def __init__(
+        self,
+        user_prompt_path: str = "graph_context_for_question.txt",
+        system_prompt_path: str = "answer_simple_question.txt",
+        time_extraction_prompt_path: str = "extract_query_time.txt",
+        top_k: Optional[int] = 5,
+        node_type: Optional[Type] = None,
+        node_name: Optional[List[str]] = None,
+    ):
+        super().__init__(
+            user_prompt_path=user_prompt_path,
+            system_prompt_path=system_prompt_path,
+            top_k=top_k,
+            node_type=node_type,
+            node_name=node_name,
+        )
+        self.user_prompt_path = user_prompt_path
+        self.system_prompt_path = system_prompt_path
+        self.time_extraction_prompt_path = time_extraction_prompt_path
+        self.top_k = top_k if top_k is not None else 5
+        self.node_type = node_type
+        self.node_name = node_name
+    def descriptions_to_string(self, results):
+        descs = []
+        for entry in results:
+            d = entry.get("description")
+            if d:
+                descs.append(d.strip())
+        return "\n#####################\n".join(descs)
+    async def extract_time_from_query(self, query: str):
+        prompt_path = self.time_extraction_prompt_path
+        if os.path.isabs(prompt_path):
+            base_directory = os.path.dirname(prompt_path)
+            prompt_path = os.path.basename(prompt_path)
+        else:
+            base_directory = None
+        system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
+        interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval)
+        time_from = interval.starts_at
+        time_to = interval.ends_at
+        return time_from, time_to
+    async def filter_top_k_events(self, relevant_events, scored_results):
+        # Build a score lookup from vector search results
+        score_lookup = {res.payload["id"]: res.score for res in scored_results}
+        events_with_scores = []
+        for event in relevant_events[0]["events"]:
+            score = score_lookup.get(event["id"], float("inf"))
+            events_with_scores.append({**event, "score": score})
+        events_with_scores.sort(key=itemgetter("score"))
+        return events_with_scores[: self.top_k]
+    async def get_context(self, query: str) -> Any:
+        """Retrieves context based on the query."""
+        time_from, time_to = await self.extract_time_from_query(query)
+        graph_engine = await get_graph_engine()
+        triplets = []
+        if time_from and time_to:
+            ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to)
+        elif time_from:
+            ids = await graph_engine.collect_time_ids(time_from=time_from)
+        elif time_to:
+            ids = await graph_engine.collect_time_ids(time_to=time_to)
+        else:
+            logger.info(
+                "No timestamps identified based on the query, performing retrieval using triplet search on events and entities."
+            )
+            triplets = await self.get_context(query)
+            return await self.resolve_edges_to_text(triplets)
+        if ids:
+            relevant_events = await graph_engine.collect_events(ids=ids)
+        else:
+            logger.info(
+                "No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities."
+            )
+            triplets = await self.get_context(query)
+            return await self.resolve_edges_to_text(triplets)
+        vector_engine = get_vector_engine()
+        query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
+        vector_search_results = await vector_engine.search(
+            collection_name="Event_name", query_vector=query_vector, limit=0
+        )
+        top_k_events = await self.filter_top_k_events(relevant_events, vector_search_results)
+        return self.descriptions_to_string(top_k_events)
+    async def get_completion(self, query: str, context: Optional[str] = None) -> str:
+        """Generates a response using the query and optional context."""
+        if not context:
+            context = await self.get_context(query=query)
+        if context:
+            completion = await generate_completion(
+                query=query,
+                context=context,
+                user_prompt_path=self.user_prompt_path,
+                system_prompt_path=self.system_prompt_path,
+            )
+        return completion

cognee/modules/retrieval/user_qa_feedback.py ADDED Viewed

@@ -0,0 +1,83 @@
+from typing import Any, Optional, List
+from uuid import NAMESPACE_OID, uuid5, UUID
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.infrastructure.llm import LLMGateway
+from cognee.modules.engine.models import NodeSet
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.retrieval.base_feedback import BaseFeedback
+from cognee.modules.retrieval.utils.models import CogneeUserFeedback
+from cognee.modules.retrieval.utils.models import UserFeedbackEvaluation
+from cognee.tasks.storage import add_data_points
+logger = get_logger("CompletionRetriever")
+class UserQAFeedback(BaseFeedback):
+    """
+    Interface for handling user feedback queries.
+    Public methods:
+    - get_context(query: str) -> str
+    - get_completion(query: str, context: Optional[Any] = None) -> Any
+    """
+    def __init__(self, last_k: Optional[int] = 1) -> None:
+        """Initialize retriever with optional custom prompt paths."""
+        self.last_k = last_k
+    async def add_feedback(self, feedback_text: str) -> List[str]:
+        feedback_sentiment = await LLMGateway.acreate_structured_output(
+            text_input=feedback_text,
+            system_prompt="You are a sentiment analysis assistant. For each piece of user feedback you receive, return exactly one of: Positive, Negative, or Neutral classification and a corresponding score from -5 (worst negative) to 5 (best positive)",
+            response_model=UserFeedbackEvaluation,
+        )
+        graph_engine = await get_graph_engine()
+        last_interaction_ids = await graph_engine.get_last_user_interaction_ids(limit=self.last_k)
+        nodeset_name = "UserQAFeedbacks"
+        feedbacks_node_set = NodeSet(id=uuid5(NAMESPACE_OID, name=nodeset_name), name=nodeset_name)
+        feedback_id = uuid5(NAMESPACE_OID, name=feedback_text)
+        cognee_user_feedback = CogneeUserFeedback(
+            id=feedback_id,
+            feedback=feedback_text,
+            sentiment=feedback_sentiment.evaluation.value,
+            score=feedback_sentiment.score,
+            belongs_to_set=feedbacks_node_set,
+        )
+        await add_data_points(data_points=[cognee_user_feedback], update_edge_collection=False)
+        relationships = []
+        relationship_name = "gives_feedback_to"
+        to_node_ids = []
+        for interaction_id in last_interaction_ids:
+            target_id_1 = feedback_id
+            target_id_2 = UUID(interaction_id)
+            if target_id_1 and target_id_2:
+                relationships.append(
+                    (
+                        target_id_1,
+                        target_id_2,
+                        relationship_name,
+                        {
+                            "relationship_name": relationship_name,
+                            "source_node_id": target_id_1,
+                            "target_node_id": target_id_2,
+                            "ontology_valid": False,
+                        },
+                    )
+                )
+                to_node_ids.append(str(target_id_2))
+        if len(relationships) > 0:
+            graph_engine = await get_graph_engine()
+            await graph_engine.add_edges(relationships)
+            await graph_engine.apply_feedback_weight(
+                node_ids=to_node_ids, weight=feedback_sentiment.score
+            )
+        return [feedback_text]

cognee/modules/retrieval/utils/brute_force_triplet_search.py CHANGED Viewed

@@ -8,7 +8,7 @@ from cognee.infrastructure.databases.vector.exceptions import CollectionNotFound
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
-from cognee.modules.users.methods import get_default_user
+from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
 from cognee.modules.users.models import User
 from cognee.shared.utils import send_telemetry
@@ -63,9 +63,10 @@ async def get_memory_fragment(
     if properties_to_project is None:
         properties_to_project = ["id", "description", "name", "type", "text"]
+    memory_fragment = CogneeGraph()
     try:
         graph_engine = await get_graph_engine()
-        memory_fragment = CogneeGraph()
         await memory_fragment.project_graph_from_db(
             graph_engine,
@@ -87,41 +88,15 @@ async def get_memory_fragment(
 async def brute_force_triplet_search(
-    query: str,
-    user: User = None,
-    top_k: int = 5,
-    collections: List[str] = None,
-    properties_to_project: List[str] = None,
-    memory_fragment: Optional[CogneeGraph] = None,
-    node_type: Optional[Type] = None,
-    node_name: Optional[List[str]] = None,
-) -> list:
-    if user is None:
-        user = await get_default_user()
-    retrieved_results = await brute_force_search(
-        query,
-        user,
-        top_k,
-        collections=collections,
-        properties_to_project=properties_to_project,
-        memory_fragment=memory_fragment,
-        node_type=node_type,
-        node_name=node_name,
-    )
-    return retrieved_results
-async def brute_force_search(
     query: str,
     user: User,
-    top_k: int,
-    collections: List[str] = None,
-    properties_to_project: List[str] = None,
+    top_k: int = 5,
+    collections: Optional[List[str]] = None,
+    properties_to_project: Optional[List[str]] = None,
     memory_fragment: Optional[CogneeGraph] = None,
     node_type: Optional[Type] = None,
     node_name: Optional[List[str]] = None,
-) -> list:
+) -> List[Edge]:
     """
     Performs a brute force search to retrieve the top triplets from the graph.

cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl

cognee 0.2.3.dev1py3-none-any.whl → 0.3.0py3-none-any.whl