PyPI - cognee - Versions diffs - 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

cognee 0.2.3.dev1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (252) hide show

cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py CHANGED Viewed

@@ -57,7 +57,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         api_key: str = None,
         endpoint: str = None,
         api_version: str = None,
-        max_tokens: int = 512,
+        max_completion_tokens: int = 512,
     ):
         self.api_key = api_key
         self.endpoint = endpoint
@@ -65,7 +65,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         self.provider = provider
         self.model = model
         self.dimensions = dimensions
-        self.max_tokens = max_tokens
+        self.max_completion_tokens = max_completion_tokens
         self.tokenizer = self.get_tokenizer()
         self.retry_count = 0
@@ -179,20 +179,29 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
         model = self.model.split("/")[-1]
         if "openai" in self.provider.lower():
-            tokenizer = TikTokenTokenizer(model=model, max_tokens=self.max_tokens)
+            tokenizer = TikTokenTokenizer(
+                model=model, max_completion_tokens=self.max_completion_tokens
+            )
         elif "gemini" in self.provider.lower():
-            tokenizer = GeminiTokenizer(model=model, max_tokens=self.max_tokens)
+            tokenizer = GeminiTokenizer(
+                model=model, max_completion_tokens=self.max_completion_tokens
+            )
         elif "mistral" in self.provider.lower():
-            tokenizer = MistralTokenizer(model=model, max_tokens=self.max_tokens)
+            tokenizer = MistralTokenizer(
+                model=model, max_completion_tokens=self.max_completion_tokens
+            )
         else:
             try:
                 tokenizer = HuggingFaceTokenizer(
-                    model=self.model.replace("hosted_vllm/", ""), max_tokens=self.max_tokens
+                    model=self.model.replace("hosted_vllm/", ""),
+                    max_completion_tokens=self.max_completion_tokens,
                 )
             except Exception as e:
                 logger.warning(f"Could not get tokenizer from HuggingFace due to: {e}")
                 logger.info("Switching to TikToken default tokenizer.")
-                tokenizer = TikTokenTokenizer(model=None, max_tokens=self.max_tokens)
+                tokenizer = TikTokenTokenizer(
+                    model=None, max_completion_tokens=self.max_completion_tokens
+                )
         logger.debug(f"Tokenizer loaded for model: {self.model}")
         return tokenizer

cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py CHANGED Viewed

@@ -30,7 +30,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
     Instance variables:
     - model
     - dimensions
-    - max_tokens
+    - max_completion_tokens
     - endpoint
     - mock
     - huggingface_tokenizer_name
@@ -39,7 +39,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
     model: str
     dimensions: int
-    max_tokens: int
+    max_completion_tokens: int
     endpoint: str
     mock: bool
     huggingface_tokenizer_name: str
@@ -50,13 +50,13 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
         self,
         model: Optional[str] = "avr/sfr-embedding-mistral:latest",
         dimensions: Optional[int] = 1024,
-        max_tokens: int = 512,
+        max_completion_tokens: int = 512,
         endpoint: Optional[str] = "http://localhost:11434/api/embeddings",
         huggingface_tokenizer: str = "Salesforce/SFR-Embedding-Mistral",
     ):
         self.model = model
         self.dimensions = dimensions
-        self.max_tokens = max_tokens
+        self.max_completion_tokens = max_completion_tokens
         self.endpoint = endpoint
         self.huggingface_tokenizer_name = huggingface_tokenizer
         self.tokenizer = self.get_tokenizer()
@@ -132,7 +132,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
         """
         logger.debug("Loading HuggingfaceTokenizer for OllamaEmbeddingEngine...")
         tokenizer = HuggingFaceTokenizer(
-            model=self.huggingface_tokenizer_name, max_tokens=self.max_tokens
+            model=self.huggingface_tokenizer_name, max_completion_tokens=self.max_completion_tokens
         )
         logger.debug("Tokenizer loaded for OllamaEmbeddingEngine")
         return tokenizer

cognee/infrastructure/databases/vector/embeddings/config.py CHANGED Viewed

@@ -18,7 +18,7 @@ class EmbeddingConfig(BaseSettings):
     embedding_endpoint: Optional[str] = None
     embedding_api_key: Optional[str] = None
     embedding_api_version: Optional[str] = None
-    embedding_max_tokens: Optional[int] = 8191
+    embedding_max_completion_tokens: Optional[int] = 8191
     huggingface_tokenizer: Optional[str] = None
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
@@ -38,7 +38,7 @@ class EmbeddingConfig(BaseSettings):
             "embedding_endpoint": self.embedding_endpoint,
             "embedding_api_key": self.embedding_api_key,
             "embedding_api_version": self.embedding_api_version,
-            "embedding_max_tokens": self.embedding_max_tokens,
+            "embedding_max_completion_tokens": self.embedding_max_completion_tokens,
             "huggingface_tokenizer": self.huggingface_tokenizer,
         }

cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py CHANGED Viewed

@@ -250,9 +250,7 @@ def embedding_rate_limit_sync(func):
             logger.warning(error_msg)
             # Create a custom embedding rate limit exception
-            from cognee.infrastructure.databases.exceptions.EmbeddingException import (
-                EmbeddingException,
-            )
+            from cognee.infrastructure.databases.exceptions import EmbeddingException
             raise EmbeddingException(error_msg)
@@ -307,9 +305,7 @@ def embedding_rate_limit_async(func):
             logger.warning(error_msg)
             # Create a custom embedding rate limit exception
-            from cognee.infrastructure.databases.exceptions.EmbeddingException import (
-                EmbeddingException,
-            )
+            from cognee.infrastructure.databases.exceptions import EmbeddingException
             raise EmbeddingException(error_msg)

cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py CHANGED Viewed

@@ -27,12 +27,13 @@ def get_embedding_engine() -> EmbeddingEngine:
         config.embedding_provider,
         config.embedding_model,
         config.embedding_dimensions,
-        config.embedding_max_tokens,
+        config.embedding_max_completion_tokens,
         config.embedding_endpoint,
         config.embedding_api_key,
         config.embedding_api_version,
         config.huggingface_tokenizer,
         llm_config.llm_api_key,
+        llm_config.llm_provider,
     )
@@ -41,12 +42,13 @@ def create_embedding_engine(
     embedding_provider,
     embedding_model,
     embedding_dimensions,
-    embedding_max_tokens,
+    embedding_max_completion_tokens,
     embedding_endpoint,
     embedding_api_key,
     embedding_api_version,
     huggingface_tokenizer,
     llm_api_key,
+    llm_provider,
 ):
     """
     Create and return an embedding engine based on the specified provider.
@@ -58,7 +60,7 @@ def create_embedding_engine(
           'ollama', or another supported provider.
         - embedding_model: The model to be used for the embedding engine.
         - embedding_dimensions: The number of dimensions for the embeddings.
-        - embedding_max_tokens: The maximum number of tokens for the embeddings.
+        - embedding_max_completion_tokens: The maximum number of tokens for the embeddings.
         - embedding_endpoint: The endpoint for the embedding service, relevant for certain
           providers.
         - embedding_api_key: API key to authenticate with the embedding service, if
@@ -81,7 +83,7 @@ def create_embedding_engine(
         return FastembedEmbeddingEngine(
             model=embedding_model,
             dimensions=embedding_dimensions,
-            max_tokens=embedding_max_tokens,
+            max_completion_tokens=embedding_max_completion_tokens,
         )
     if embedding_provider == "ollama":
@@ -90,7 +92,7 @@ def create_embedding_engine(
         return OllamaEmbeddingEngine(
             model=embedding_model,
             dimensions=embedding_dimensions,
-            max_tokens=embedding_max_tokens,
+            max_completion_tokens=embedding_max_completion_tokens,
             endpoint=embedding_endpoint,
             huggingface_tokenizer=huggingface_tokenizer,
         )
@@ -99,10 +101,11 @@ def create_embedding_engine(
     return LiteLLMEmbeddingEngine(
         provider=embedding_provider,
-        api_key=embedding_api_key or llm_api_key,
+        api_key=embedding_api_key
+        or (embedding_api_key if llm_provider == "custom" else llm_api_key),
         endpoint=embedding_endpoint,
         api_version=embedding_api_version,
         model=embedding_model,
         dimensions=embedding_dimensions,
-        max_tokens=embedding_max_tokens,
+        max_completion_tokens=embedding_max_completion_tokens,
     )

cognee/infrastructure/files/storage/LocalFileStorage.py CHANGED Viewed

@@ -189,6 +189,15 @@ class LocalFileStorage(Storage):
         return os.path.isfile(os.path.join(parsed_storage_path, file_path))
+    def get_size(self, file_path: str) -> int:
+        parsed_storage_path = get_parsed_path(self.storage_path)
+        return (
+            os.path.getsize(os.path.join(parsed_storage_path, file_path))
+            if self.file_exists(file_path)
+            else 0
+        )
     def ensure_directory_exists(self, directory_path: str = ""):
         """
         Ensure that the specified directory exists, creating it if necessary.

cognee/infrastructure/files/storage/S3FileStorage.py CHANGED Viewed

@@ -146,6 +146,11 @@ class S3FileStorage(Storage):
             self.s3.isfile, os.path.join(self.storage_path.replace("s3://", ""), file_path)
         )
+    async def get_size(self, file_path: str) -> int:
+        return await run_async(
+            self.s3.size, os.path.join(self.storage_path.replace("s3://", ""), file_path)
+        )
     async def ensure_directory_exists(self, directory_path: str = ""):
         """
         Ensure that the specified directory exists, creating it if necessary.

cognee/infrastructure/files/storage/StorageManager.py CHANGED Viewed

@@ -46,6 +46,12 @@ class StorageManager:
         else:
             return self.storage.is_file(file_path)
+    async def get_size(self, file_path: str) -> int:
+        if inspect.iscoroutinefunction(self.storage.get_size):
+            return await self.storage.get_size(file_path)
+        else:
+            return self.storage.get_size(file_path)
     async def store(self, file_path: str, data: BinaryIO, overwrite: bool = False) -> str:
         """
         Store data at the specified file path.
@@ -84,7 +90,7 @@ class StorageManager:
         """
         # Check the actual storage type by class name to determine if open() is async or sync
-        if self.storage.__class__.__name__ == "S3FileStorage" and file_path.startswith("s3://"):
+        if self.storage.__class__.__name__ == "S3FileStorage":
             # S3FileStorage.open() is async
             async with self.storage.open(file_path, *args, **kwargs) as file:
                 yield file

cognee/infrastructure/files/storage/storage.py CHANGED Viewed

@@ -40,6 +40,22 @@ class Storage(Protocol):
         """
         pass
+    def get_size(self, file_path: str) -> int:
+        """
+        Get the size of a specified file in bytes.
+        Parameters:
+        -----------
+            - file_path (str): The path of the file to get the size of.
+        Returns:
+        --------
+            - int: The size of the file in bytes.
+        """
+        pass
     def store(self, file_path: str, data: Union[BinaryIO, str], overwrite: bool):
         """
         Store data at the specified file path.

cognee/infrastructure/files/utils/get_data_file_path.py CHANGED Viewed

@@ -5,19 +5,24 @@ from urllib.parse import urlparse
 def get_data_file_path(file_path: str):
     # Check if this is a file URI BEFORE normalizing (which corrupts URIs)
     if file_path.startswith("file://"):
+        # Remove first occurrence of file:// prefix
+        pure_file_path = file_path.replace("file://", "", 1)
         # Normalize the file URI for Windows - replace backslashes with forward slashes
-        normalized_file_uri = os.path.normpath(file_path)
+        normalized_file_uri = os.path.normpath(pure_file_path)
-        parsed_url = urlparse(normalized_file_uri)
-        # Convert URI path to file system path
+        # Convert path to proper file system path
         if os.name == "nt":  # Windows
             # Handle Windows drive letters correctly
-            fs_path = parsed_url.path
-            if fs_path.startswith("/") and len(fs_path) > 1 and fs_path[2] == ":":
-                fs_path = fs_path[1:]  # Remove leading slash for Windows drive paths
-        else:  # Unix-like systems
-            fs_path = parsed_url.path
+            fs_path = normalized_file_uri
+            if (
+                (fs_path.startswith("/") or fs_path.startswith("\\"))
+                and len(fs_path) > 1
+                and fs_path[2] == ":"
+            ):
+                fs_path = fs_path[1:]
+        else:
+            # Unix - like systems
+            fs_path = normalized_file_uri
         # Now split the actual filesystem path
         actual_fs_path = os.path.normpath(fs_path)

cognee/infrastructure/files/utils/get_file_metadata.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import io
 import os.path
 from typing import BinaryIO, TypedDict
+from pathlib import Path
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
@@ -55,7 +56,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
     file_type = guess_file_type(file)
     file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
-    file_name = str(file_path).split("/")[-1].split(".")[0] if file_path else None
+    file_name = Path(file_path).stem if file_path else None
     # Get file size
     pos = file.tell()  # remember current pointer

cognee/infrastructure/llm/LLMGateway.py CHANGED Viewed

@@ -1,6 +1,5 @@
-from typing import Type
+from typing import Type, Optional, Coroutine
 from pydantic import BaseModel
-from typing import Coroutine
 from cognee.infrastructure.llm import get_llm_config
@@ -79,7 +78,10 @@ class LLMGateway:
     @staticmethod
     def extract_content_graph(
-        content: str, response_model: Type[BaseModel], mode: str = "simple"
+        content: str,
+        response_model: Type[BaseModel],
+        mode: str = "simple",
+        custom_prompt: Optional[str] = None,
     ) -> Coroutine:
         llm_config = get_llm_config()
         if llm_config.structured_output_framework.upper() == "BAML":
@@ -87,13 +89,20 @@ class LLMGateway:
                 extract_content_graph,
             )
-            return extract_content_graph(content=content, response_model=response_model, mode=mode)
+            return extract_content_graph(
+                content=content,
+                response_model=response_model,
+                mode=mode,
+                custom_prompt=custom_prompt,
+            )
         else:
             from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
                 extract_content_graph,
             )
-            return extract_content_graph(content=content, response_model=response_model)
+            return extract_content_graph(
+                content=content, response_model=response_model, custom_prompt=custom_prompt
+            )
     @staticmethod
     def extract_categories(content: str, response_model: Type[BaseModel]) -> Coroutine:
@@ -135,3 +144,21 @@ class LLMGateway:
             )
             return extract_summary(content=content, response_model=response_model)
+    @staticmethod
+    def extract_event_graph(content: str, response_model: Type[BaseModel]) -> Coroutine:
+        # TODO: Add BAML version of category and extraction and update function (consulted with Igor)
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
+            extract_event_graph,
+        )
+        return extract_event_graph(content=content, response_model=response_model)
+    @staticmethod
+    def extract_event_entities(content: str, response_model: Type[BaseModel]) -> Coroutine:
+        # TODO: Add BAML version of category and extraction and update function (consulted with Igor)
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.extraction import (
+            extract_event_entities,
+        )
+        return extract_event_entities(content=content, response_model=response_model)

cognee/infrastructure/llm/config.py CHANGED Viewed

@@ -18,7 +18,7 @@ class LLMConfig(BaseSettings):
     - llm_api_version
     - llm_temperature
     - llm_streaming
-    - llm_max_tokens
+    - llm_max_completion_tokens
     - transcription_model
     - graph_prompt_path
     - llm_rate_limit_enabled
@@ -35,13 +35,13 @@ class LLMConfig(BaseSettings):
     structured_output_framework: str = "instructor"
     llm_provider: str = "openai"
-    llm_model: str = "gpt-4o-mini"
+    llm_model: str = "openai/gpt-4o-mini"
     llm_endpoint: str = ""
     llm_api_key: Optional[str] = None
     llm_api_version: Optional[str] = None
     llm_temperature: float = 0.0
     llm_streaming: bool = False
-    llm_max_tokens: int = 16384
+    llm_max_completion_tokens: int = 16384
     baml_llm_provider: str = "openai"
     baml_llm_model: str = "gpt-4o-mini"
@@ -52,6 +52,8 @@ class LLMConfig(BaseSettings):
     transcription_model: str = "whisper-1"
     graph_prompt_path: str = "generate_graph_prompt.txt"
+    temporal_graph_prompt_path: str = "generate_event_graph_prompt.txt"
+    event_entity_prompt_path: str = "generate_event_entity_prompt.txt"
     llm_rate_limit_enabled: bool = False
     llm_rate_limit_requests: int = 60
     llm_rate_limit_interval: int = 60  # in seconds (default is 60 requests per minute)
@@ -171,7 +173,7 @@ class LLMConfig(BaseSettings):
             "api_version": self.llm_api_version,
             "temperature": self.llm_temperature,
             "streaming": self.llm_streaming,
-            "max_tokens": self.llm_max_tokens,
+            "max_completion_tokens": self.llm_max_completion_tokens,
             "transcription_model": self.transcription_model,
             "graph_prompt_path": self.graph_prompt_path,
             "rate_limit_enabled": self.llm_rate_limit_enabled,

cognee/infrastructure/llm/prompts/extract_query_time.txt ADDED Viewed

@@ -0,0 +1,15 @@
+For the purposes of identifying timestamps in a query, you are tasked with extracting relevant timestamps from the query.
+## Timestamp requirements
+- If the query contains interval extrack both starts_at and ends_at  properties
+- If the query contains an instantaneous timestamp, starts_at and ends_at should be the same
+- If the query its open-ended (before 2009 or after 2009), the corresponding non defined end of the time should be none
+    -For example: "before 2009" -- starts_at: None, ends_at: 2009 or  "after 2009" -- starts_at: 2009, ends_at: None
+- Put always the data that comes first in time as starts_at and the timestamps that comes second in time as ends_at
+- If starts_at or ends_at cannot be extracted both of them has to be None
+## Output Format
+Your reply should be a JSON: list of dictionaries with the following structure:
+```python
+class QueryInterval(BaseModel):
+    starts_at: Optional[Timestamp] = None
+    ends_at: Optional[Timestamp] = None
+```

cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt ADDED Viewed

@@ -0,0 +1,25 @@
+For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular entities from events text. An entity is any distinct, identifiable thing, person, place, object, organization, concept, or phenomenon that can be named, referenced, or described in the event context. This includes but is not limited to: people, places, objects, organizations, concepts, events, processes, states, conditions, properties, attributes, roles, functions, and any other meaningful referents that contribute to understanding the event.
+**Temporal Entity Exclusion**: Do not extract timestamp-like entities (dates, times, durations) as these are handled separately. However, extract named temporal periods, eras, historical epochs, and culturally significant time references
+## Input Format
+The input will be a list of dictionaries, each containing:
+- `event_name`: The name of the event
+- `description`: The description of the event
+## Task
+For each event, extract all entities mentioned in the event description and determine their relationship to the event.
+## Output Format
+Return the same enriched JSON with an additional key in each dictionary: `attributes`.
+The `attributes` should be a list of dictionaries, each containing:
+- `entity`: The name of the entity
+- `entity_type`: The type/category of the entity (person, place, organization, object, concept, etc.)
+- `relationship`: A concise description of how the entity relates to the event
+## Requirements
+- **Be extremely thorough** - extract EVERY non-temporal entity mentioned, no matter how small, obvious, or seemingly insignificant
+- **After you are done with obvious entities, every noun, pronoun, proper noun, and named reference =  one entity**
+- We expect rich entity networks from any event, easily reaching a dozens of entities per event
+- Granularity and richness of the entity extraction is key to our success and is of utmost importance
+- **Do not skip any entities** - if you're unsure whether something is an entity, extract it anyway
+- Use the event name for context when determining relationships
+- Relationships should be technical with one or at most two words. If two words, use underscore camelcase style
+- Relationships could imply general meaning like: subject, object, participant, recipient, agent, instrument, tool, source, cause, effect, purpose, manner, resource, etc.
+- You can combine two words to form a relationship name: subject_role, previous_owner, etc.
+- Focus on how the entity specifically relates to the event

cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt ADDED Viewed

@@ -0,0 +1,30 @@
+For the purposes of building event-based knowledge graphs, you are tasked with extracting highly granular stream events from a text. The events are defined as follows:
+## Event Definition
+- Anything with a date or a timestamp is an event
+- Anything that took place in time (even if the time is unknown) is an event
+- Anything that lasted over a period of time, or happened in an instant is an event: from historical milestones (wars, presidencies, olympiads) to personal milestones (birth, death, employment, etc.), to mundane actions (a walk, a conversation, etc.)
+- **ANY action or verb represents an event** - this is the most important rule
+- Every single verb in the text corresponds to an event that must be extracted
+- This includes: thinking, feeling, seeing, hearing, moving, speaking, writing, reading, eating, sleeping, working, playing, studying, traveling, meeting, calling, texting, buying, selling, creating, destroying, building, breaking, starting, stopping, beginning, ending, etc.
+- Even the most mundane or obvious actions are events: "he walked", "she sat", "they talked", "I thought", "we waited"
+## Requirements
+- **Be extremely thorough** - extract EVERY event mentioned, no matter how small or obvious
+- **Timestamped first" - every time stamp, or date should have atleast one event
+- **Verbs/actions  = one event** - After you are done with timestamped events -- every verb that is an action should have a corresponding event.
+- We expect long streams of events from any piece of text, easily reaching a hundred events
+- Granularity and richness of the stream is key to our success and is of utmost importance
+- Not all events will have timestamps, add timestamps only to known events
+- For events that were instantaneous, just attach the time_from or time_to property don't create both
+- **Do not skip any events** - if you're unsure whether something is an event, extract it anyway
+- **Quantity over filtering** - it's better to extract too many events than to miss any
+- **Descriptions** - Always include the event description together with entities (Who did what, what happened? What is the event?). If you can include the corresponding part from the text.
+## Output Format
+Your reply should be a JSON: list of dictionaries with the following structure:
+```python
+class Event(BaseModel):
+    name: str [concise]
+    description: Optional[str] = None
+    time_from: Optional[Timestamp] = None
+    time_to: Optional[Timestamp] = None
+    location: Optional[str] = None
+```

cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Type
+from typing import Type, Optional
 from pydantic import BaseModel
 from cognee.infrastructure.llm.config import get_llm_config
 from cognee.shared.logging_utils import get_logger, setup_logging
@@ -6,7 +6,10 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.asyn
 async def extract_content_graph(
-    content: str, response_model: Type[BaseModel], mode: str = "simple"
+    content: str,
+    response_model: Type[BaseModel],
+    mode: str = "simple",
+    custom_prompt: Optional[str] = None,
 ):
     config = get_llm_config()
     setup_logging()
@@ -26,8 +29,16 @@ async def extract_content_graph(
     #     return graph
     # else:
-    graph = await b.ExtractContentGraphGeneric(
-        content, mode=mode, baml_options={"client_registry": config.baml_registry}
-    )
+    if custom_prompt:
+        graph = await b.ExtractContentGraphGeneric(
+            content,
+            mode="custom",
+            custom_prompt_content=custom_prompt,
+            baml_options={"client_registry": config.baml_registry},
+        )
+    else:
+        graph = await b.ExtractContentGraphGeneric(
+            content, mode=mode, baml_options={"client_registry": config.baml_registry}
+        )
     return graph

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from .knowledge_graph.extract_content_graph import extract_content_graph
+from .knowledge_graph.extract_event_graph import extract_event_graph
 from .extract_categories import extract_categories
 from .extract_summary import extract_summary, extract_code_summary
+from .extract_event_entities import extract_event_entities

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py ADDED Viewed

@@ -0,0 +1,44 @@
+import os
+from typing import List, Type
+from pydantic import BaseModel
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.config import (
+    get_llm_config,
+)
+async def extract_event_entities(content: str, response_model: Type[BaseModel]):
+    """
+    Extracts event-related entities from the given content using an LLM with structured output.
+    This function loads an event entity extraction prompt from the LLM configuration,
+    renders it into a system prompt, and queries the LLM to produce structured entities
+    that conform to the specified response model.
+    Args:
+        content (str): The input text from which to extract event entities.
+        response_model (Type[BaseModel]): A Pydantic model defining the structure of the expected output.
+    Returns:
+        BaseModel: An instance of the response_model populated with extracted event entities.
+    """
+    llm_config = get_llm_config()
+    prompt_path = llm_config.event_entity_prompt_path
+    # Check if the prompt path is an absolute path or just a filename
+    if os.path.isabs(prompt_path):
+        # directory containing the file
+        base_directory = os.path.dirname(prompt_path)
+        # just the filename itself
+        prompt_path = os.path.basename(prompt_path)
+    else:
+        base_directory = None
+    system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
+    content_graph = await LLMGateway.acreate_structured_output(
+        content, system_prompt, response_model
+    )
+    return content_graph

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1 1	from .extract_content_graph import extract_content_graph
2	+ from .extract_event_graph import extract_event_graph

cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl

cognee 0.2.3.dev1py3-none-any.whl → 0.3.0py3-none-any.whl