PyPI - cognee - Versions diffs - 0.3.7__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl - Mend

cognee 0.3.7py3-none-any.whl → 0.3.7.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

cognee/modules/retrieval/graph_completion_cot_retriever.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import asyncio
+import json
 from typing import Optional, List, Type, Any
+from pydantic import BaseModel
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
-from cognee.modules.retrieval.utils.completion import generate_completion, summarize_text
+from cognee.modules.retrieval.utils.completion import (
+    generate_structured_completion,
+    summarize_text,
+)
 from cognee.modules.retrieval.utils.session_cache import (
     save_conversation_history,
     get_conversation_history,
@@ -17,6 +22,20 @@ from cognee.infrastructure.databases.cache.config import CacheConfig
 logger = get_logger()
+def _as_answer_text(completion: Any) -> str:
+    """Convert completion to human-readable text for validation and follow-up prompts."""
+    if isinstance(completion, str):
+        return completion
+    if isinstance(completion, BaseModel):
+        # Add notice that this is a structured response
+        json_str = completion.model_dump_json(indent=2)
+        return f"[Structured Response]\n{json_str}"
+    try:
+        return json.dumps(completion, indent=2)
+    except TypeError:
+        return str(completion)
 class GraphCompletionCotRetriever(GraphCompletionRetriever):
     """
     Handles graph completion by generating responses based on a series of interactions with
@@ -25,6 +44,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
     questions based on reasoning. The public methods are:
     - get_completion
+    - get_structured_completion
     Instance variables include:
     - validation_system_prompt_path
@@ -61,51 +81,35 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         self.followup_system_prompt_path = followup_system_prompt_path
         self.followup_user_prompt_path = followup_user_prompt_path
-    async def get_completion(
+    async def _run_cot_completion(
         self,
         query: str,
         context: Optional[List[Edge]] = None,
-        session_id: Optional[str] = None,
-        max_iter=4,
-    ) -> List[str]:
+        conversation_history: str = "",
+        max_iter: int = 4,
+        response_model: Type = str,
+    ) -> tuple[Any, str, List[Edge]]:
         """
-        Generate completion responses based on a user query and contextual information.
-        This method interacts with a language model client to retrieve a structured response,
-        using a series of iterations to refine the answers and generate follow-up questions
-        based on reasoning derived from previous outputs. It raises exceptions if the context
-        retrieval fails or if the model encounters issues in generating outputs.
+        Run chain-of-thought completion with optional structured output.
         Parameters:
         -----------
-            - query (str): The user's query to be processed and answered.
-            - context (Optional[Any]): Optional context that may assist in answering the query.
-              If not provided, it will be fetched based on the query. (default None)
-            - session_id (Optional[str]): Optional session identifier for caching. If None,
-              defaults to 'default_session'. (default None)
-            - max_iter: The maximum number of iterations to refine the answer and generate
-              follow-up questions. (default 4)
+            - query: User query
+            - context: Optional pre-fetched context edges
+            - conversation_history: Optional conversation history string
+            - max_iter: Maximum CoT iterations
+            - response_model: Type for structured output (str for plain text)
         Returns:
         --------
-            - List[str]: A list containing the generated answer to the user's query.
+            - completion_result: The generated completion (string or structured model)
+            - context_text: The resolved context text
+            - triplets: The list of triplets used
         """
         followup_question = ""
         triplets = []
         completion = ""
-        # Retrieve conversation history if session saving is enabled
-        cache_config = CacheConfig()
-        user = session_user.get()
-        user_id = getattr(user, "id", None)
-        session_save = user_id and cache_config.caching
-        conversation_history = ""
-        if session_save:
-            conversation_history = await get_conversation_history(session_id=session_id)
         for round_idx in range(max_iter + 1):
             if round_idx == 0:
                 if context is None:
@@ -117,17 +121,21 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
                 triplets += await self.get_context(followup_question)
                 context_text = await self.resolve_edges_to_text(list(set(triplets)))
-            completion = await generate_completion(
+            completion = await generate_structured_completion(
                 query=query,
                 context=context_text,
                 user_prompt_path=self.user_prompt_path,
                 system_prompt_path=self.system_prompt_path,
                 system_prompt=self.system_prompt,
-                conversation_history=conversation_history if session_save else None,
+                conversation_history=conversation_history if conversation_history else None,
+                response_model=response_model,
             )
             logger.info(f"Chain-of-thought: round {round_idx} - answer: {completion}")
             if round_idx < max_iter:
-                valid_args = {"query": query, "answer": completion, "context": context_text}
+                answer_text = _as_answer_text(completion)
+                valid_args = {"query": query, "answer": answer_text, "context": context_text}
                 valid_user_prompt = render_prompt(
                     filename=self.validation_user_prompt_path, context=valid_args
                 )
@@ -140,7 +148,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
                     system_prompt=valid_system_prompt,
                     response_model=str,
                 )
-                followup_args = {"query": query, "answer": completion, "reasoning": reasoning}
+                followup_args = {"query": query, "answer": answer_text, "reasoning": reasoning}
                 followup_prompt = render_prompt(
                     filename=self.followup_user_prompt_path, context=followup_args
                 )
@@ -155,19 +163,110 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
                     f"Chain-of-thought: round {round_idx} - follow-up question: {followup_question}"
                 )
+        return completion, context_text, triplets
+    async def get_structured_completion(
+        self,
+        query: str,
+        context: Optional[List[Edge]] = None,
+        session_id: Optional[str] = None,
+        max_iter: int = 4,
+        response_model: Type = str,
+    ) -> Any:
+        """
+        Generate structured completion responses based on a user query and contextual information.
+        This method applies the same chain-of-thought logic as get_completion but returns
+        structured output using the provided response model.
+        Parameters:
+        -----------
+            - query (str): The user's query to be processed and answered.
+            - context (Optional[List[Edge]]): Optional context that may assist in answering the query.
+              If not provided, it will be fetched based on the query. (default None)
+            - session_id (Optional[str]): Optional session identifier for caching. If None,
+              defaults to 'default_session'. (default None)
+            - max_iter: The maximum number of iterations to refine the answer and generate
+              follow-up questions. (default 4)
+            - response_model (Type): The Pydantic model type for structured output. (default str)
+        Returns:
+        --------
+            - Any: The generated structured completion based on the response model.
+        """
+        # Check if session saving is enabled
+        cache_config = CacheConfig()
+        user = session_user.get()
+        user_id = getattr(user, "id", None)
+        session_save = user_id and cache_config.caching
+        # Load conversation history if enabled
+        conversation_history = ""
+        if session_save:
+            conversation_history = await get_conversation_history(session_id=session_id)
+        completion, context_text, triplets = await self._run_cot_completion(
+            query=query,
+            context=context,
+            conversation_history=conversation_history,
+            max_iter=max_iter,
+            response_model=response_model,
+        )
         if self.save_interaction and context and triplets and completion:
             await self.save_qa(
-                question=query, answer=completion, context=context_text, triplets=triplets
+                question=query, answer=str(completion), context=context_text, triplets=triplets
             )
-        # Save to session cache
+        # Save to session cache if enabled
         if session_save:
             context_summary = await summarize_text(context_text)
             await save_conversation_history(
                 query=query,
                 context_summary=context_summary,
-                answer=completion,
+                answer=str(completion),
                 session_id=session_id,
             )
+        return completion
+    async def get_completion(
+        self,
+        query: str,
+        context: Optional[List[Edge]] = None,
+        session_id: Optional[str] = None,
+        max_iter=4,
+    ) -> List[str]:
+        """
+        Generate completion responses based on a user query and contextual information.
+        This method interacts with a language model client to retrieve a structured response,
+        using a series of iterations to refine the answers and generate follow-up questions
+        based on reasoning derived from previous outputs. It raises exceptions if the context
+        retrieval fails or if the model encounters issues in generating outputs.
+        Parameters:
+        -----------
+            - query (str): The user's query to be processed and answered.
+            - context (Optional[Any]): Optional context that may assist in answering the query.
+              If not provided, it will be fetched based on the query. (default None)
+            - session_id (Optional[str]): Optional session identifier for caching. If None,
+              defaults to 'default_session'. (default None)
+            - max_iter: The maximum number of iterations to refine the answer and generate
+              follow-up questions. (default 4)
+        Returns:
+        --------
+            - List[str]: A list containing the generated answer to the user's query.
+        """
+        completion = await self.get_structured_completion(
+            query=query,
+            context=context,
+            session_id=session_id,
+            max_iter=max_iter,
+            response_model=str,
+        )
         return [completion]

cognee/modules/retrieval/utils/completion.py CHANGED Viewed

@@ -1,17 +1,18 @@
-from typing import Optional
+from typing import Optional, Type, Any
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
-async def generate_completion(
+async def generate_structured_completion(
     query: str,
     context: str,
     user_prompt_path: str,
     system_prompt_path: str,
     system_prompt: Optional[str] = None,
     conversation_history: Optional[str] = None,
-) -> str:
-    """Generates a completion using LLM with given context and prompts."""
+    response_model: Type = str,
+) -> Any:
+    """Generates a structured completion using LLM with given context and prompts."""
     args = {"question": query, "context": context}
     user_prompt = render_prompt(user_prompt_path, args)
     system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
@@ -23,6 +24,26 @@ async def generate_completion(
     return await LLMGateway.acreate_structured_output(
         text_input=user_prompt,
         system_prompt=system_prompt,
+        response_model=response_model,
+    )
+async def generate_completion(
+    query: str,
+    context: str,
+    user_prompt_path: str,
+    system_prompt_path: str,
+    system_prompt: Optional[str] = None,
+    conversation_history: Optional[str] = None,
+) -> str:
+    """Generates a completion using LLM with given context and prompts."""
+    return await generate_structured_completion(
+        query=query,
+        context=context,
+        user_prompt_path=user_prompt_path,
+        system_prompt_path=system_prompt_path,
+        system_prompt=system_prompt,
+        conversation_history=conversation_history,
         response_model=str,
     )

cognee/modules/search/methods/search.py CHANGED Viewed

@@ -24,7 +24,7 @@ from cognee.modules.data.models import Dataset
 from cognee.modules.data.methods.get_authorized_existing_datasets import (
     get_authorized_existing_datasets,
 )
+from cognee import __version__ as cognee_version
 from .get_search_type_tools import get_search_type_tools
 from .no_access_control_search import no_access_control_search
 from ..utils.prepare_search_result import prepare_search_result
@@ -64,7 +64,14 @@ async def search(
         Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
     """
     query = await log_query(query_text, query_type.value, user.id)
-    send_telemetry("cognee.search EXECUTION STARTED", user.id)
+    send_telemetry(
+        "cognee.search EXECUTION STARTED",
+        user.id,
+        additional_properties={
+            "cognee_version": cognee_version,
+            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
+        },
+    )
     # Use search function filtered by permissions if access control is enabled
     if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
@@ -101,7 +108,14 @@ async def search(
             )
         ]
-    send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
+    send_telemetry(
+        "cognee.search EXECUTION COMPLETED",
+        user.id,
+        additional_properties={
+            "cognee_version": cognee_version,
+            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
+        },
+    )
     await log_result(
         query.id,

cognee/shared/logging_utils.py CHANGED Viewed

@@ -430,6 +430,15 @@ def setup_logging(log_level=None, name=None):
     stream_handler.setFormatter(console_formatter)
     stream_handler.setLevel(log_level)
+    root_logger = logging.getLogger()
+    if root_logger.hasHandlers():
+        root_logger.handlers.clear()
+    root_logger.addHandler(stream_handler)
+    # Note: root logger needs to be set at NOTSET to allow all messages through and specific stream and file handlers
+    # can define their own levels.
+    root_logger.setLevel(logging.NOTSET)
     # Check if we already have a log file path from the environment
     # NOTE: environment variable must be used here as it allows us to
     # log to a single file with a name based on a timestamp in a multiprocess setting.
@@ -441,17 +450,15 @@ def setup_logging(log_level=None, name=None):
         log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
         os.environ["LOG_FILE_NAME"] = log_file_path
-    # Create a file handler that uses our custom PlainFileHandler
-    file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
-    file_handler.setLevel(DEBUG)
-    # Configure root logger
-    root_logger = logging.getLogger()
-    if root_logger.hasHandlers():
-        root_logger.handlers.clear()
-    root_logger.addHandler(stream_handler)
-    root_logger.addHandler(file_handler)
-    root_logger.setLevel(log_level)
+    try:
+        # Create a file handler that uses our custom PlainFileHandler
+        file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
+        file_handler.setLevel(DEBUG)
+        root_logger.addHandler(file_handler)
+    except Exception as e:
+        # Note: Exceptions happen in case of read only file systems or log file path poiting to location where it does
+        # not have write permission. Logging to file is not mandatory so we just log a warning to console.
+        root_logger.warning(f"Warning: Could not create log file handler at {log_file_path}: {e}")
     if log_level > logging.DEBUG:
         import warnings

cognee/shared/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ import http.server
 import socketserver
 from threading import Thread
 import pathlib
-from uuid import uuid4
+from uuid import uuid4, uuid5, NAMESPACE_OID
 from cognee.base_config import get_base_config
 from cognee.infrastructure.databases.graph import get_graph_engine
@@ -51,6 +51,26 @@ def get_anonymous_id():
     return anonymous_id
+def _sanitize_nested_properties(obj, property_names: list[str]):
+    """
+    Recursively replaces any property whose key matches one of `property_names`
+    (e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
+    of its string value. Returns a new sanitized copy.
+    """
+    if isinstance(obj, dict):
+        new_obj = {}
+        for k, v in obj.items():
+            if k in property_names and isinstance(v, str):
+                new_obj[k] = str(uuid5(NAMESPACE_OID, v))
+            else:
+                new_obj[k] = _sanitize_nested_properties(v, property_names)
+        return new_obj
+    elif isinstance(obj, list):
+        return [_sanitize_nested_properties(item, property_names) for item in obj]
+    else:
+        return obj
 def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
     if os.getenv("TELEMETRY_DISABLED"):
         return
@@ -58,7 +78,9 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
     env = os.getenv("ENV")
     if env in ["test", "dev"]:
         return
+    additional_properties = _sanitize_nested_properties(
+        obj=additional_properties, property_names=["url"]
+    )
     current_time = datetime.now(timezone.utc)
     payload = {
         "anonymous_id": str(get_anonymous_id()),

cognee/tasks/feedback/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .extract_feedback_interactions import extract_feedback_interactions
+from .generate_improved_answers import generate_improved_answers
+from .create_enrichments import create_enrichments
+from .link_enrichments_to_feedback import link_enrichments_to_feedback
+from .models import FeedbackEnrichment
+__all__ = [
+    "extract_feedback_interactions",
+    "generate_improved_answers",
+    "create_enrichments",
+    "link_enrichments_to_feedback",
+    "FeedbackEnrichment",
+]

cognee/tasks/feedback/create_enrichments.py ADDED Viewed

@@ -0,0 +1,84 @@
+from __future__ import annotations
+from typing import List
+from uuid import NAMESPACE_OID, uuid5
+from cognee.infrastructure.llm import LLMGateway
+from cognee.infrastructure.llm.prompts.read_query_prompt import read_query_prompt
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.engine.models import NodeSet
+from .models import FeedbackEnrichment
+logger = get_logger("create_enrichments")
+def _validate_enrichments(enrichments: List[FeedbackEnrichment]) -> bool:
+    """Validate that all enrichments contain required fields for completion."""
+    return all(
+        enrichment.question is not None
+        and enrichment.original_answer is not None
+        and enrichment.improved_answer is not None
+        and enrichment.new_context is not None
+        and enrichment.feedback_id is not None
+        and enrichment.interaction_id is not None
+        for enrichment in enrichments
+    )
+async def _generate_enrichment_report(
+    question: str, improved_answer: str, new_context: str, report_prompt_location: str
+) -> str:
+    """Generate educational report using feedback report prompt."""
+    try:
+        prompt_template = read_query_prompt(report_prompt_location)
+        rendered_prompt = prompt_template.format(
+            question=question,
+            improved_answer=improved_answer,
+            new_context=new_context,
+        )
+        return await LLMGateway.acreate_structured_output(
+            text_input=rendered_prompt,
+            system_prompt="You are a helpful assistant that creates educational content.",
+            response_model=str,
+        )
+    except Exception as exc:
+        logger.warning("Failed to generate enrichment report", error=str(exc), question=question)
+        return f"Educational content for: {question} - {improved_answer}"
+async def create_enrichments(
+    enrichments: List[FeedbackEnrichment],
+    report_prompt_location: str = "feedback_report_prompt.txt",
+) -> List[FeedbackEnrichment]:
+    """Fill text and belongs_to_set fields of existing FeedbackEnrichment DataPoints."""
+    if not enrichments:
+        logger.info("No enrichments provided; returning empty list")
+        return []
+    if not _validate_enrichments(enrichments):
+        logger.error("Input validation failed; missing required fields")
+        return []
+    logger.info("Completing enrichments", count=len(enrichments))
+    nodeset = NodeSet(id=uuid5(NAMESPACE_OID, name="FeedbackEnrichment"), name="FeedbackEnrichment")
+    completed_enrichments: List[FeedbackEnrichment] = []
+    for enrichment in enrichments:
+        report_text = await _generate_enrichment_report(
+            enrichment.question,
+            enrichment.improved_answer,
+            enrichment.new_context,
+            report_prompt_location,
+        )
+        enrichment.text = report_text
+        enrichment.belongs_to_set = [nodeset]
+        completed_enrichments.append(enrichment)
+    logger.info("Completed enrichments", successful=len(completed_enrichments))
+    return completed_enrichments

cognee 0.3.7__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

cognee 0.3.7py3-none-any.whl → 0.3.7.dev1py3-none-any.whl