PyPI - cognee - Versions diffs - 0.3.7.dev2__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

cognee 0.3.7.dev2py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

cognee/base_config.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+from pathlib import Path
 from typing import Optional
 from functools import lru_cache
 from cognee.root_dir import get_absolute_path, ensure_absolute_path
@@ -11,6 +12,9 @@ class BaseConfig(BaseSettings):
     data_root_directory: str = get_absolute_path(".data_storage")
     system_root_directory: str = get_absolute_path(".cognee_system")
     cache_root_directory: str = get_absolute_path(".cognee_cache")
+    logs_root_directory: str = os.getenv(
+        "COGNEE_LOGS_DIR", str(os.path.join(os.path.dirname(os.path.dirname(__file__)), "logs"))
+    )
     monitoring_tool: object = Observer.NONE
     @pydantic.model_validator(mode="after")
@@ -30,6 +34,8 @@ class BaseConfig(BaseSettings):
         # Require absolute paths for root directories
         self.data_root_directory = ensure_absolute_path(self.data_root_directory)
         self.system_root_directory = ensure_absolute_path(self.system_root_directory)
+        self.logs_root_directory = ensure_absolute_path(self.logs_root_directory)
         # Set monitoring tool based on available keys
         if self.langfuse_public_key and self.langfuse_secret_key:
             self.monitoring_tool = Observer.LANGFUSE
@@ -49,6 +55,7 @@ class BaseConfig(BaseSettings):
             "system_root_directory": self.system_root_directory,
             "monitoring_tool": self.monitoring_tool,
             "cache_root_directory": self.cache_root_directory,
+            "logs_root_directory": self.logs_root_directory,
         }

cognee/infrastructure/databases/vector/create_vector_engine.py CHANGED Viewed

@@ -47,7 +47,7 @@ def create_vector_engine(
             embedding_engine=embedding_engine,
         )
-    if vector_db_provider == "pgvector":
+    if vector_db_provider.lower() == "pgvector":
         from cognee.infrastructure.databases.relational import get_relational_config
         # Get configuration for postgres database
@@ -78,7 +78,7 @@ def create_vector_engine(
             embedding_engine,
         )
-    elif vector_db_provider == "chromadb":
+    elif vector_db_provider.lower() == "chromadb":
         try:
             import chromadb
         except ImportError:
@@ -94,7 +94,7 @@ def create_vector_engine(
             embedding_engine=embedding_engine,
         )
-    elif vector_db_provider == "neptune_analytics":
+    elif vector_db_provider.lower() == "neptune_analytics":
         try:
             from langchain_aws import NeptuneAnalyticsGraph
         except ImportError:
@@ -122,7 +122,7 @@ def create_vector_engine(
             embedding_engine=embedding_engine,
         )
-    else:
+    elif vector_db_provider.lower() == "lancedb":
         from .lancedb.LanceDBAdapter import LanceDBAdapter
         return LanceDBAdapter(
@@ -130,3 +130,9 @@ def create_vector_engine(
             api_key=vector_db_key,
             embedding_engine=embedding_engine,
         )
+    else:
+        raise EnvironmentError(
+            f"Unsupported graph database provider: {vector_db_provider}. "
+            f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
+        )

cognee/infrastructure/files/utils/guess_file_type.py CHANGED Viewed

@@ -22,89 +22,6 @@ class FileTypeException(Exception):
         self.message = message
-class TxtFileType(filetype.Type):
-    """
-    Represents a text file type with specific MIME and extension properties.
-    Public methods:
-    - match: Determines whether a given buffer matches the text file type.
-    """
-    MIME = "text/plain"
-    EXTENSION = "txt"
-    def __init__(self):
-        super(TxtFileType, self).__init__(mime=TxtFileType.MIME, extension=TxtFileType.EXTENSION)
-    def match(self, buf):
-        """
-        Determine if the given buffer contains text content.
-        Parameters:
-        -----------
-            - buf: The buffer to check for text content.
-        Returns:
-        --------
-            Returns True if the buffer is identified as text content, otherwise False.
-        """
-        return is_text_content(buf)
-txt_file_type = TxtFileType()
-filetype.add_type(txt_file_type)
-class CustomPdfMatcher(filetype.Type):
-    """
-    Match PDF file types based on MIME type and extension.
-    Public methods:
-    - match
-    Instance variables:
-    - MIME: The MIME type of the PDF.
-    - EXTENSION: The file extension of the PDF.
-    """
-    MIME = "application/pdf"
-    EXTENSION = "pdf"
-    def __init__(self):
-        super(CustomPdfMatcher, self).__init__(
-            mime=CustomPdfMatcher.MIME, extension=CustomPdfMatcher.EXTENSION
-        )
-    def match(self, buf):
-        """
-        Determine if the provided buffer is a PDF file.
-        This method checks for the presence of the PDF signature in the buffer.
-        Raises:
-        - TypeError: If the buffer is not of bytes type.
-        Parameters:
-        -----------
-            - buf: The buffer containing the data to be checked.
-        Returns:
-        --------
-            Returns True if the buffer contains a PDF signature, otherwise returns False.
-        """
-        return b"PDF-" in buf
-custom_pdf_matcher = CustomPdfMatcher()
-filetype.add_type(custom_pdf_matcher)
 def guess_file_type(file: BinaryIO) -> filetype.Type:
     """
     Guess the file type from the given binary file stream.

cognee/infrastructure/llm/prompts/extract_query_time.txt CHANGED Viewed

@@ -1,15 +1,13 @@
-For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query.
-## Timestamp requirements
-- If the query contains interval extrack both starts_at and ends_at  properties
-- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same
-- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none
-    -For example: "before 2009" -- starts_at: None, ends_at: 2009 or  "after 2009" -- starts_at: 2009, ends_at: None
-- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at
-- If starts_at or ends_at cannot be extracted both of them has to be None
-## Output Format
-Your reply should be a JSON: list of dictionaries with the following structure:
-```python
-class QueryInterval(BaseModel):
-    starts_at: Optional[Timestamp] = None
-    ends_at: Optional[Timestamp] = None
-```
+You are tasked with identifying relevant time periods where the answer to a given query should be searched.
+Current date is:  `{{ time_now }}`. Determine relevant period(s) and return structured intervals.
+Extraction rules:
+1. Query without specific timestamp: use the time period with starts_at set to None and ends_at set to now.
+2. Explicit time intervals: If the query specifies a range (e.g., from 2010 to 2020, between January and March 2023), extract both start and end dates. Always assign the earlier date to starts_at and the later date to ends_at.
+3. Single timestamp: If the query refers to one specific moment (e.g., in 2015, on March 5, 2022), set starts_at and ends_at to that same timestamp.
+4. Open-ended time references: For phrases such as "before X" or "after X", represent the unspecified side as None. For example: before 2009 → starts_at: None, ends_at: 2009; after 2009 → starts_at: 2009, ends_at: None.
+5. Current-time references ("now", "current", "today"): If the query explicitly refers to the present, set both starts_at and ends_at to now (the ingestion timestamp).
+6. "Who is" and "Who was" questions: These imply a general identity or biographical inquiry without a specific temporal scope. Set both starts_at and ends_at to None.
+7. Ordering rule: Always ensure the earlier date is assigned to starts_at and the later date to ends_at.
+8. No temporal information: If no valid or inferable time reference is found, set both starts_at and ends_at to None.

cognee/infrastructure/loaders/LoaderEngine.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import filetype
 from typing import Dict, List, Optional, Any
 from .LoaderInterface import LoaderInterface
+from cognee.infrastructure.files.utils.guess_file_type import guess_file_type
 from cognee.shared.logging_utils import get_logger
 logger = get_logger(__name__)
@@ -80,7 +81,7 @@ class LoaderEngine:
         """
         from pathlib import Path
-        file_info = filetype.guess(file_path)
+        file_info = guess_file_type(file_path)
         path_extension = Path(file_path).suffix.lstrip(".")

cognee/modules/ontology/get_default_ontology_resolver.py CHANGED Viewed

@@ -21,7 +21,8 @@ def get_ontology_resolver_from_env(
             Supported value: "rdflib".
         matching_strategy (str): The matching strategy to apply.
             Supported value: "fuzzy".
-        ontology_file_path (str): Path to the ontology file required for the resolver.
+        ontology_file_path (str): Path to the ontology file(s) required for the resolver.
+            Can be a single path or comma-separated paths for multiple files.
     Returns:
         BaseOntologyResolver: An instance of the requested ontology resolver.
@@ -31,8 +32,13 @@ def get_ontology_resolver_from_env(
             or if required parameters are missing.
     """
     if ontology_resolver == "rdflib" and matching_strategy == "fuzzy" and ontology_file_path:
+        if "," in ontology_file_path:
+            file_paths = [path.strip() for path in ontology_file_path.split(",")]
+        else:
+            file_paths = ontology_file_path
         return RDFLibOntologyResolver(
-            matching_strategy=FuzzyMatchingStrategy(), ontology_file=ontology_file_path
+            matching_strategy=FuzzyMatchingStrategy(), ontology_file=file_paths
         )
     else:
         raise EnvironmentError(

cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import difflib
 from cognee.shared.logging_utils import get_logger
 from collections import deque
-from typing import List, Tuple, Dict, Optional, Any
+from typing import List, Tuple, Dict, Optional, Any, Union
 from rdflib import Graph, URIRef, RDF, RDFS, OWL
 from cognee.modules.ontology.exceptions import (
@@ -26,22 +26,50 @@ class RDFLibOntologyResolver(BaseOntologyResolver):
     def __init__(
         self,
-        ontology_file: Optional[str] = None,
+        ontology_file: Optional[Union[str, List[str]]] = None,
         matching_strategy: Optional[MatchingStrategy] = None,
     ) -> None:
         super().__init__(matching_strategy)
         self.ontology_file = ontology_file
         try:
-            if ontology_file and os.path.exists(ontology_file):
+            files_to_load = []
+            if ontology_file is not None:
+                if isinstance(ontology_file, str):
+                    files_to_load = [ontology_file]
+                elif isinstance(ontology_file, list):
+                    files_to_load = ontology_file
+                else:
+                    raise ValueError(
+                        f"ontology_file must be a string, list of strings, or None. Got: {type(ontology_file)}"
+                    )
+            if files_to_load:
                 self.graph = Graph()
-                self.graph.parse(ontology_file)
-                logger.info("Ontology loaded successfully from file: %s", ontology_file)
+                loaded_files = []
+                for file_path in files_to_load:
+                    if os.path.exists(file_path):
+                        self.graph.parse(file_path)
+                        loaded_files.append(file_path)
+                        logger.info("Ontology loaded successfully from file: %s", file_path)
+                    else:
+                        logger.warning(
+                            "Ontology file '%s' not found. Skipping this file.",
+                            file_path,
+                        )
+                if not loaded_files:
+                    logger.info(
+                        "No valid ontology files found. No owl ontology will be attached to the graph."
+                    )
+                    self.graph = None
+                else:
+                    logger.info("Total ontology files loaded: %d", len(loaded_files))
             else:
                 logger.info(
-                    "Ontology file '%s' not found. No owl ontology will be attached to the graph.",
-                    ontology_file,
+                    "No ontology file provided. No owl ontology will be attached to the graph."
                 )
                 self.graph = None
             self.build_lookup()
         except Exception as e:
             logger.error("Failed to load ontology", exc_info=e)

cognee/modules/retrieval/temporal_retriever.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import asyncio
 from typing import Any, Optional, List, Type
+from datetime import datetime
 from operator import itemgetter
 from cognee.infrastructure.databases.vector import get_vector_engine
@@ -79,7 +79,11 @@ class TemporalRetriever(GraphCompletionRetriever):
         else:
             base_directory = None
-        system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
+        time_now = datetime.now().strftime("%d-%m-%Y")
+        system_prompt = render_prompt(
+            prompt_path, {"time_now": time_now}, base_directory=base_directory
+        )
         interval = await LLMGateway.acreate_structured_output(query, system_prompt, QueryInterval)
@@ -108,8 +112,6 @@ class TemporalRetriever(GraphCompletionRetriever):
         graph_engine = await get_graph_engine()
-        triplets = []
         if time_from and time_to:
             ids = await graph_engine.collect_time_ids(time_from=time_from, time_to=time_to)
         elif time_from:

cognee 0.3.7.dev2__py3-none-any.whl → 0.3.8__py3-none-any.whl

cognee 0.3.7.dev2py3-none-any.whl → 0.3.8py3-none-any.whl