PyPI - cognee - Versions diffs - 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl - Mend

cognee 0.2.1.dev7py3-none-any.whl → 0.2.2.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py ADDED Viewed

@@ -0,0 +1,89 @@
+import os
+from typing import Type
+from pydantic import BaseModel
+from baml_py import ClientRegistry
+from cognee.shared.logging_utils import get_logger
+from cognee.shared.data_models import SummarizedCode
+from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
+from cognee.infrastructure.llm.config import get_llm_config
+logger = get_logger("extract_summary_baml")
+def get_mock_summarized_code():
+    """Local mock function to avoid circular imports."""
+    return SummarizedCode(
+        high_level_summary="Mock code summary",
+        key_features=["Mock feature 1", "Mock feature 2"],
+        imports=["mock_import"],
+        constants=["MOCK_CONSTANT"],
+        classes=[],
+        functions=[],
+        workflow_description="Mock workflow description",
+    )
+async def extract_summary(content: str, response_model: Type[BaseModel]):
+    """
+    Extract summary using BAML framework.
+    Args:
+        content: The content to summarize
+        response_model: The Pydantic model type for the response
+    Returns:
+        BaseModel: The summarized content in the specified format
+    """
+    config = get_llm_config()
+    # Use BAML's SummarizeContent function
+    summary_result = await b.SummarizeContent(
+        content, baml_options={"client_registry": config.baml_registry}
+    )
+    # Convert BAML result to the expected response model
+    if response_model is SummarizedCode:
+        # If it's asking for SummarizedCode but we got SummarizedContent,
+        # we need to use SummarizeCode instead
+        code_result = await b.SummarizeCode(
+            content, baml_options={"client_registry": config.baml_registry}
+        )
+        return code_result
+    else:
+        # For other models, return the summary result
+        return summary_result
+async def extract_code_summary(content: str):
+    """
+    Extract code summary using BAML framework with mocking support.
+    Args:
+        content: The code content to summarize
+    Returns:
+        SummarizedCode: The summarized code information
+    """
+    enable_mocking = os.getenv("MOCK_CODE_SUMMARY", "false")
+    if isinstance(enable_mocking, bool):
+        enable_mocking = str(enable_mocking).lower()
+    enable_mocking = enable_mocking in ("true", "1", "yes")
+    if enable_mocking:
+        result = get_mock_summarized_code()
+        return result
+    else:
+        try:
+            config = get_llm_config()
+            result = await b.SummarizeCode(
+                content, baml_options={"client_registry": config.baml_registry}
+            )
+        except Exception as e:
+            logger.error(
+                "Failed to extract code summary with BAML, falling back to mock summary", exc_info=e
+            )
+            result = get_mock_summarized_code()
+        return result

cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import Type
+from pydantic import BaseModel
+from cognee.infrastructure.llm.config import get_llm_config
+from cognee.shared.logging_utils import get_logger, setup_logging
+from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.async_client import b
+async def extract_content_graph(
+    content: str, response_model: Type[BaseModel], mode: str = "simple"
+):
+    config = get_llm_config()
+    setup_logging()
+    get_logger(level="INFO")
+    # if response_model:
+    #     # tb = TypeBuilder()
+    #     # country = tb.union \
+    #     #     ([tb.literal_string("USA"), tb.literal_string("UK"), tb.literal_string("Germany"), tb.literal_string("other")])
+    #     # tb.Node.add_property("country", country)
+    #
+    #     graph = await b.ExtractDynamicContentGraph(
+    #         content, mode=mode, baml_options={"client_registry": baml_registry}
+    #     )
+    #
+    #     return graph
+    # else:
+    graph = await b.ExtractContentGraphGeneric(
+        content, mode=mode, baml_options={"client_registry": config.baml_registry}
+    )
+    return graph

cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml ADDED Viewed

@@ -0,0 +1,18 @@
+// This helps use auto generate libraries you can use in the language of
+// your choice. You can have multiple generators if you use multiple languages.
+// Just ensure that the output_dir is different for each generator.
+generator target {
+    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
+    output_type "python/pydantic"
+    // Where the generated code will be saved (relative to baml_src/)
+    output_dir "../baml/"
+    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
+    // The BAML VSCode extension version should also match this version.
+    version "0.201.0"
+    // Valid values: "sync", "async"
+    // This controls what `b.FunctionName()` will be (sync or async).
+    default_client_mode sync
+}

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .knowledge_graph.extract_content_graph import extract_content_graph
+from .extract_categories import extract_categories
+from .extract_summary import extract_summary, extract_code_summary

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py ADDED Viewed

@@ -0,0 +1,12 @@
+from typing import Type
+from pydantic import BaseModel
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+async def extract_categories(content: str, response_model: Type[BaseModel]):
+    system_prompt = LLMGateway.read_query_prompt("classify_content.txt")
+    llm_output = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
+    return llm_output

cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py RENAMED Viewed

@@ -5,20 +5,29 @@ from typing import Type
 from instructor.exceptions import InstructorRetryException
 from pydantic import BaseModel
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from cognee.infrastructure.llm.prompts import read_query_prompt
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 from cognee.shared.data_models import SummarizedCode
-from cognee.tasks.summarization.mock_summary import get_mock_summarized_code
 logger = get_logger("extract_summary")
-async def extract_summary(content: str, response_model: Type[BaseModel]):
-    llm_client = get_llm_client()
+def get_mock_summarized_code():
+    """Local mock function to avoid circular imports."""
+    return SummarizedCode(
+        high_level_summary="Mock code summary",
+        key_features=["Mock feature 1", "Mock feature 2"],
+        imports=["mock_import"],
+        constants=["MOCK_CONSTANT"],
+        classes=[],
+        functions=[],
+        workflow_description="Mock workflow description",
+    )
-    system_prompt = read_query_prompt("summarize_content.txt")
+async def extract_summary(content: str, response_model: Type[BaseModel]):
+    system_prompt = LLMGateway.read_query_prompt("summarize_content.txt")
-    llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model)
+    llm_output = await LLMGateway.acreate_structured_output(content, system_prompt, response_model)
     return llm_output

cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py RENAMED Viewed

@@ -1,13 +1,14 @@
 import os
 from typing import Type
 from pydantic import BaseModel
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
-from cognee.infrastructure.llm.prompts import render_prompt
-from cognee.infrastructure.llm.config import get_llm_config
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.config import (
+    get_llm_config,
+)
 async def extract_content_graph(content: str, response_model: Type[BaseModel]):
-    llm_client = get_llm_client()
     llm_config = get_llm_config()
     prompt_path = llm_config.graph_prompt_path
@@ -21,9 +22,9 @@ async def extract_content_graph(content: str, response_model: Type[BaseModel]):
     else:
         base_directory = None
-    system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
+    system_prompt = LLMGateway.render_prompt(prompt_path, {}, base_directory=base_directory)
-    content_graph = await llm_client.acreate_structured_output(
+    content_graph = await LLMGateway.acreate_structured_output(
         content, system_prompt, response_model
     )

cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py RENAMED Viewed

@@ -3,9 +3,15 @@ from pydantic import BaseModel
 import instructor
 from cognee.exceptions import InvalidValueError
-from cognee.infrastructure.llm.llm_interface import LLMInterface
-from cognee.infrastructure.llm.prompts import read_query_prompt
-from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
+    rate_limit_async,
+    sleep_and_retry_async,
+)
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 class AnthropicAdapter(LLMInterface):
@@ -85,7 +91,7 @@ class AnthropicAdapter(LLMInterface):
         if not system_prompt:
             raise InvalidValueError(message="No system prompt path provided.")
-        system_prompt = read_query_prompt(system_prompt)
+        system_prompt = LLMGateway.read_query_prompt(system_prompt)
         formatted_prompt = (
             f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""

cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py RENAMED Viewed

@@ -1,5 +1,4 @@
 import litellm
-import logging
 from pydantic import BaseModel
 from typing import Type, Optional
 from litellm import acompletion, JSONSchemaValidationError
@@ -7,9 +6,11 @@ from litellm import acompletion, JSONSchemaValidationError
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.observability.get_observe import get_observe
 from cognee.exceptions import InvalidValueError
-from cognee.infrastructure.llm.llm_interface import LLMInterface
-from cognee.infrastructure.llm.prompts import read_query_prompt
-from cognee.infrastructure.llm.rate_limiter import (
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
     rate_limit_async,
     sleep_and_retry_async,
 )
@@ -135,7 +136,7 @@ class GeminiAdapter(LLMInterface):
             text_input = "No user input provided."
         if not system_prompt:
             raise InvalidValueError(message="No system prompt path provided.")
-        system_prompt = read_query_prompt(system_prompt)
+        system_prompt = LLMGateway.read_query_prompt(system_prompt)
         formatted_prompt = (
             f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py ADDED Viewed

File without changes

cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py RENAMED Viewed

@@ -1,6 +1,5 @@
 """Adapter for Generic API LLM provider API"""
-import logging
 import litellm
 import instructor
 from typing import Type
@@ -10,8 +9,13 @@ from litellm.exceptions import ContentPolicyViolationError
 from instructor.exceptions import InstructorRetryException
 from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
-from cognee.infrastructure.llm.llm_interface import LLMInterface
-from cognee.infrastructure.llm.rate_limiter import rate_limit_async, sleep_and_retry_async
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
+    rate_limit_async,
+    sleep_and_retry_async,
+)
 class GenericAPIAdapter(LLMInterface):

cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} RENAMED Viewed

@@ -4,7 +4,9 @@ from enum import Enum
 from cognee.exceptions import InvalidValueError
 from cognee.infrastructure.llm import get_llm_config
-from cognee.infrastructure.llm.ollama.adapter import OllamaAPIAdapter
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.ollama.adapter import (
+    OllamaAPIAdapter,
+)
 # Define an Enum for LLM Providers
@@ -59,7 +61,9 @@ def get_llm_client():
         if llm_config.llm_api_key is None:
             raise InvalidValueError(message="LLM API key is not set.")
-        from .openai.adapter import OpenAIAdapter
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.openai.adapter import (
+            OpenAIAdapter,
+        )
         return OpenAIAdapter(
             api_key=llm_config.llm_api_key,
@@ -78,7 +82,9 @@ def get_llm_client():
         if llm_config.llm_api_key is None:
             raise InvalidValueError(message="LLM API key is not set.")
-        from .generic_llm_api.adapter import GenericAPIAdapter
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
+            GenericAPIAdapter,
+        )
         return OllamaAPIAdapter(
             llm_config.llm_endpoint,
@@ -89,7 +95,9 @@ def get_llm_client():
         )
     elif provider == LLMProvider.ANTHROPIC:
-        from .anthropic.adapter import AnthropicAdapter
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.anthropic.adapter import (
+            AnthropicAdapter,
+        )
         return AnthropicAdapter(max_tokens=max_tokens, model=llm_config.llm_model)
@@ -97,7 +105,9 @@ def get_llm_client():
         if llm_config.llm_api_key is None:
             raise InvalidValueError(message="LLM API key is not set.")
-        from .generic_llm_api.adapter import GenericAPIAdapter
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
+            GenericAPIAdapter,
+        )
         return GenericAPIAdapter(
             llm_config.llm_endpoint,
@@ -114,7 +124,9 @@ def get_llm_client():
         if llm_config.llm_api_key is None:
             raise InvalidValueError(message="LLM API key is not set.")
-        from .gemini.adapter import GeminiAdapter
+        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.gemini.adapter import (
+            GeminiAdapter,
+        )
         return GeminiAdapter(
             api_key=llm_config.llm_api_key,

cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} RENAMED Viewed

@@ -3,7 +3,7 @@
 from typing import Type, Protocol
 from abc import abstractmethod
 from pydantic import BaseModel
-from cognee.infrastructure.llm.prompts import read_query_prompt
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
 class LLMInterface(Protocol):
@@ -57,7 +57,7 @@ class LLMInterface(Protocol):
             text_input = "No user input provided."
         if not system_prompt:
             raise ValueError("No system prompt path provided.")
-        system_prompt = read_query_prompt(system_prompt)
+        system_prompt = LLMGateway.read_query_prompt(system_prompt)
         formatted_prompt = f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py ADDED Viewed

File without changes

cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py RENAMED Viewed

@@ -4,8 +4,10 @@ from typing import Type
 from openai import OpenAI
 from pydantic import BaseModel
-from cognee.infrastructure.llm.llm_interface import LLMInterface
-from cognee.infrastructure.llm.rate_limiter import (
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
     rate_limit_async,
     sleep_and_retry_async,
 )

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py ADDED Viewed

File without changes

cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py RENAMED Viewed

@@ -8,11 +8,13 @@ from litellm.exceptions import ContentPolicyViolationError
 from instructor.exceptions import InstructorRetryException
 from cognee.exceptions import InvalidValueError
-from cognee.infrastructure.llm.prompts import read_query_prompt
-from cognee.infrastructure.llm.llm_interface import LLMInterface
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
 from cognee.infrastructure.llm.exceptions import ContentPolicyFilterError
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
-from cognee.infrastructure.llm.rate_limiter import (
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
     rate_limit_async,
     rate_limit_sync,
     sleep_and_retry_async,
@@ -324,7 +326,7 @@ class OpenAIAdapter(LLMInterface):
             text_input = "No user input provided."
         if not system_prompt:
             raise InvalidValueError(message="No system prompt path provided.")
-        system_prompt = read_query_prompt(system_prompt)
+        system_prompt = LLMGateway.read_query_prompt(system_prompt)
         formatted_prompt = (
             f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""

cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} RENAMED Viewed

@@ -50,11 +50,6 @@ from limits import RateLimitItemPerMinute, storage
 from limits.strategies import MovingWindowRateLimiter
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.llm.config import get_llm_config
-import threading
-import logging
-import functools
-import openai
-import os
 logger = get_logger()

cognee/infrastructure/llm/tokenizer/Gemini/adapter.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Any, Union
+from typing import List, Any
 from ..tokenizer_interface import TokenizerInterface
@@ -24,7 +24,9 @@ class GeminiTokenizer(TokenizerInterface):
         # Get LLM API key from config
         from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
-        from cognee.infrastructure.llm.config import get_llm_config
+        from cognee.infrastructure.llm.config import (
+            get_llm_config,
+        )
         config = get_embedding_config()
         llm_config = get_llm_config()

cognee/infrastructure/llm/tokenizer/TikToken/adapter.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Any
+from typing import List, Any, Optional
 import tiktoken
 from ..tokenizer_interface import TokenizerInterface
@@ -12,13 +12,17 @@ class TikTokenTokenizer(TokenizerInterface):
     def __init__(
         self,
-        model: str,
+        model: Optional[str] = None,
         max_tokens: int = 8191,
     ):
         self.model = model
         self.max_tokens = max_tokens
         # Initialize TikToken for GPT based on model
-        self.tokenizer = tiktoken.encoding_for_model(self.model)
+        if model:
+            self.tokenizer = tiktoken.encoding_for_model(self.model)
+        else:
+            # Use default if model not provided
+            self.tokenizer = tiktoken.get_encoding("cl100k_base")
     def extract_tokens(self, text: str) -> List[Any]:
         """

cognee/infrastructure/llm/tokenizer/__init__.py CHANGED Viewed

@@ -1 +1,5 @@
 from .tokenizer_interface import TokenizerInterface
+from .Mistral import MistralTokenizer
+from .Gemini import GeminiTokenizer
+from .HuggingFace import HuggingFaceTokenizer
+from .TikToken import TikTokenTokenizer

cognee/infrastructure/llm/utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import litellm
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.get_llm_client import (
+    get_llm_client,
+)
 from cognee.shared.logging_utils import get_logger
 logger = get_logger()

cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl

cognee 0.2.1.dev7py3-none-any.whl → 0.2.2.dev1py3-none-any.whl