PyPI - cognee - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

cognee 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (224) hide show

cognee/infrastructure/llm/config.py CHANGED Viewed

@@ -38,6 +38,7 @@ class LLMConfig(BaseSettings):
     """
     structured_output_framework: str = "instructor"
+    llm_instructor_mode: str = ""
     llm_provider: str = "openai"
     llm_model: str = "openai/gpt-5-mini"
     llm_endpoint: str = ""
@@ -73,6 +74,41 @@ class LLMConfig(BaseSettings):
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
+    @model_validator(mode="after")
+    def strip_quotes_from_strings(self) -> "LLMConfig":
+        """
+        Strip surrounding quotes from specific string fields that often come from
+        environment variables with extra quotes (e.g., via Docker's --env-file).
+        Only applies to known config keys where quotes are invalid or cause issues.
+        """
+        string_fields_to_strip = [
+            "llm_api_key",
+            "llm_endpoint",
+            "llm_api_version",
+            "baml_llm_api_key",
+            "baml_llm_endpoint",
+            "baml_llm_api_version",
+            "fallback_api_key",
+            "fallback_endpoint",
+            "fallback_model",
+            "llm_provider",
+            "llm_model",
+            "baml_llm_provider",
+            "baml_llm_model",
+        ]
+        cls = self.__class__
+        for field_name in string_fields_to_strip:
+            if field_name not in cls.model_fields:
+                continue
+            value = getattr(self, field_name, None)
+            if isinstance(value, str) and len(value) >= 2:
+                if value[0] == value[-1] and value[0] in ("'", '"'):
+                    setattr(self, field_name, value[1:-1])
+        return self
     def model_post_init(self, __context) -> None:
         """Initialize the BAML registry after the model is created."""
         # Check if BAML is selected as structured output framework but not available
@@ -181,6 +217,7 @@ class LLMConfig(BaseSettings):
               instance.
         """
         return {
+            "llm_instructor_mode": self.llm_instructor_mode.lower(),
             "provider": self.llm_provider,
             "model": self.llm_model,
             "endpoint": self.llm_endpoint,

cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py CHANGED Viewed

@@ -10,7 +10,7 @@ from cognee.infrastructure.llm.config import (
 async def extract_content_graph(
-    content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None
+    content: str, response_model: Type[BaseModel], custom_prompt: Optional[str] = None, **kwargs
 ):
     if custom_prompt:
         system_prompt = custom_prompt
@@ -30,7 +30,7 @@ async def extract_content_graph(
         system_prompt = render_prompt(prompt_path, {}, base_directory=base_directory)
     content_graph = await LLMGateway.acreate_structured_output(
-        content, system_prompt, response_model
+        content, system_prompt, response_model, **kwargs
     )
     return content_graph

cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py CHANGED Viewed

@@ -1,7 +1,15 @@
 import asyncio
 from typing import Type
-from cognee.shared.logging_utils import get_logger
+from pydantic import BaseModel
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.llm.config import get_llm_config
 from cognee.infrastructure.llm.structured_output_framework.baml.baml_src.extraction.create_dynamic_baml_type import (
     create_dynamic_baml_type,
@@ -10,12 +18,18 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.type
     TypeBuilder,
 )
 from cognee.infrastructure.llm.structured_output_framework.baml.baml_client import b
-from pydantic import BaseModel
+from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
+import logging
 logger = get_logger()
+@retry(
+    stop=stop_after_delay(128),
+    wait=wait_exponential_jitter(8, 128),
+    before_sleep=before_sleep_log(logger, logging.DEBUG),
+    reraise=True,
+)
 async def acreate_structured_output(
     text_input: str, system_prompt: str, response_model: Type[BaseModel]
 ):
@@ -45,11 +59,12 @@ async def acreate_structured_output(
     tb = TypeBuilder()
     type_builder = create_dynamic_baml_type(tb, tb.ResponseModel, response_model)
-    result = await b.AcreateStructuredOutput(
-        text_input=text_input,
-        system_prompt=system_prompt,
-        baml_options={"client_registry": config.baml_registry, "tb": type_builder},
-    )
+    async with llm_rate_limiter_context_manager():
+        result = await b.AcreateStructuredOutput(
+            text_input=text_input,
+            system_prompt=system_prompt,
+            baml_options={"client_registry": config.baml_registry, "tb": type_builder},
+        )
     # Transform BAML response to proper pydantic reponse model
     if response_model is str:

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py CHANGED Viewed

@@ -15,6 +15,7 @@ from tenacity import (
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
+from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.infrastructure.llm.config import get_llm_config
 logger = get_logger()
@@ -28,13 +29,16 @@ class AnthropicAdapter(LLMInterface):
     name = "Anthropic"
     model: str
+    default_instructor_mode = "anthropic_tools"
-    def __init__(self, max_completion_tokens: int, model: str = None):
+    def __init__(self, max_completion_tokens: int, model: str = None, instructor_mode: str = None):
         import anthropic
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
         self.aclient = instructor.patch(
             create=anthropic.AsyncAnthropic(api_key=get_llm_config().llm_api_key).messages.create,
-            mode=instructor.Mode.ANTHROPIC_TOOLS,
+            mode=instructor.Mode(self.instructor_mode),
         )
         self.model = model
@@ -42,13 +46,13 @@ class AnthropicAdapter(LLMInterface):
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(2, 128),
+        wait=wait_exponential_jitter(8, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -66,17 +70,17 @@ class AnthropicAdapter(LLMInterface):
             - BaseModel: An instance of BaseModel containing the structured response.
         """
-        return await self.aclient(
-            model=self.model,
-            max_tokens=4096,
-            max_retries=5,
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"""Use the given format to extract information
-                from the following input: {text_input}. {system_prompt}""",
-                }
-            ],
-            response_model=response_model,
-        )
+        async with llm_rate_limiter_context_manager():
+            return await self.aclient(
+                model=self.model,
+                max_tokens=4096,
+                max_retries=2,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"""Use the given format to extract information
+                    from the following input: {text_input}. {system_prompt}""",
+                    }
+                ],
+                response_model=response_model,
+            )

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Bedrock LLM adapter module."""
+from .adapter import BedrockAdapter
+__all__ = ["BedrockAdapter"]

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py ADDED Viewed

@@ -0,0 +1,153 @@
+import litellm
+import instructor
+from typing import Type
+from pydantic import BaseModel
+from litellm.exceptions import ContentPolicyViolationError
+from instructor.exceptions import InstructorRetryException
+from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
+from cognee.infrastructure.llm.exceptions import (
+    ContentPolicyFilterError,
+    MissingSystemPromptPathError,
+)
+from cognee.infrastructure.files.storage.s3_config import get_s3_config
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.rate_limiter import (
+    rate_limit_async,
+    rate_limit_sync,
+    sleep_and_retry_async,
+    sleep_and_retry_sync,
+)
+from cognee.modules.observability.get_observe import get_observe
+observe = get_observe()
+class BedrockAdapter(LLMInterface):
+    """
+    Adapter for AWS Bedrock API with support for three authentication methods:
+    1. API Key (Bearer Token)
+    2. AWS Credentials (access key + secret key)
+    3. AWS Profile (boto3 credential chain)
+    """
+    name = "Bedrock"
+    model: str
+    api_key: str
+    default_instructor_mode = "json_schema_mode"
+    MAX_RETRIES = 5
+    def __init__(
+        self,
+        model: str,
+        api_key: str = None,
+        max_completion_tokens: int = 16384,
+        streaming: bool = False,
+        instructor_mode: str = None,
+    ):
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )
+        self.client = instructor.from_litellm(litellm.completion)
+        self.model = model
+        self.api_key = api_key
+        self.max_completion_tokens = max_completion_tokens
+        self.streaming = streaming
+    def _create_bedrock_request(
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+    ) -> dict:
+        """Create Bedrock request with authentication."""
+        request_params = {
+            "model": self.model,
+            "custom_llm_provider": "bedrock",
+            "drop_params": True,
+            "messages": [
+                {"role": "user", "content": text_input},
+                {"role": "system", "content": system_prompt},
+            ],
+            "response_model": response_model,
+            "max_retries": self.MAX_RETRIES,
+            "max_completion_tokens": self.max_completion_tokens,
+            "stream": self.streaming,
+        }
+        s3_config = get_s3_config()
+        # Add authentication parameters
+        if self.api_key:
+            request_params["api_key"] = self.api_key
+        elif s3_config.aws_access_key_id and s3_config.aws_secret_access_key:
+            request_params["aws_access_key_id"] = s3_config.aws_access_key_id
+            request_params["aws_secret_access_key"] = s3_config.aws_secret_access_key
+            if s3_config.aws_session_token:
+                request_params["aws_session_token"] = s3_config.aws_session_token
+        elif s3_config.aws_profile_name:
+            request_params["aws_profile_name"] = s3_config.aws_profile_name
+        if s3_config.aws_region:
+            request_params["aws_region_name"] = s3_config.aws_region
+        # Add optional parameters
+        if s3_config.aws_bedrock_runtime_endpoint:
+            request_params["aws_bedrock_runtime_endpoint"] = s3_config.aws_bedrock_runtime_endpoint
+        return request_params
+    @observe(as_type="generation")
+    @sleep_and_retry_async()
+    @rate_limit_async
+    async def acreate_structured_output(
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+    ) -> BaseModel:
+        """Generate structured output from AWS Bedrock API."""
+        try:
+            request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
+            return await self.aclient.chat.completions.create(**request_params)
+        except (
+            ContentPolicyViolationError,
+            InstructorRetryException,
+        ) as error:
+            if (
+                isinstance(error, InstructorRetryException)
+                and "content management policy" not in str(error).lower()
+            ):
+                raise error
+            raise ContentPolicyFilterError(
+                f"The provided input contains content that is not aligned with our content policy: {text_input}"
+            )
+    @observe
+    @sleep_and_retry_sync()
+    @rate_limit_sync
+    def create_structured_output(
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+    ) -> BaseModel:
+        """Generate structured output from AWS Bedrock API (synchronous)."""
+        request_params = self._create_bedrock_request(text_input, system_prompt, response_model)
+        return self.client.chat.completions.create(**request_params)
+    def show_prompt(self, text_input: str, system_prompt: str) -> str:
+        """Format and display the prompt for a user query."""
+        if not text_input:
+            text_input = "No user input provided."
+        if not system_prompt:
+            raise MissingSystemPromptPathError()
+        system_prompt = LLMGateway.read_query_prompt(system_prompt)
+        formatted_prompt = (
+            f"""System Prompt:\n{system_prompt}\n\nUser Input:\n{text_input}\n"""
+            if system_prompt
+            else None
+        )
+        return formatted_prompt

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py CHANGED Viewed

@@ -13,6 +13,7 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
     LLMInterface,
 )
 import logging
+from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.shared.logging_utils import get_logger
 from tenacity import (
     retry,
@@ -41,6 +42,7 @@ class GeminiAdapter(LLMInterface):
     name: str
     model: str
     api_key: str
+    default_instructor_mode = "json_mode"
     def __init__(
         self,
@@ -49,6 +51,7 @@ class GeminiAdapter(LLMInterface):
         model: str,
         api_version: str,
         max_completion_tokens: int,
+        instructor_mode: str = None,
         fallback_model: str = None,
         fallback_api_key: str = None,
         fallback_endpoint: str = None,
@@ -63,17 +66,21 @@ class GeminiAdapter(LLMInterface):
         self.fallback_api_key = fallback_api_key
         self.fallback_endpoint = fallback_endpoint
-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(2, 128),
+        wait=wait_exponential_jitter(8, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -99,24 +106,25 @@ class GeminiAdapter(LLMInterface):
         """
         try:
-            return await self.aclient.chat.completions.create(
-                model=self.model,
-                messages=[
-                    {
-                        "role": "user",
-                        "content": f"""{text_input}""",
-                    },
-                    {
-                        "role": "system",
-                        "content": system_prompt,
-                    },
-                ],
-                api_key=self.api_key,
-                max_retries=5,
-                api_base=self.endpoint,
-                api_version=self.api_version,
-                response_model=response_model,
-            )
+            async with llm_rate_limiter_context_manager():
+                return await self.aclient.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": f"""{text_input}""",
+                        },
+                        {
+                            "role": "system",
+                            "content": system_prompt,
+                        },
+                    ],
+                    api_key=self.api_key,
+                    max_retries=2,
+                    api_base=self.endpoint,
+                    api_version=self.api_version,
+                    response_model=response_model,
+                )
         except (
             ContentFilterFinishReasonError,
             ContentPolicyViolationError,
@@ -134,23 +142,24 @@ class GeminiAdapter(LLMInterface):
                 )
             try:
-                return await self.aclient.chat.completions.create(
-                    model=self.fallback_model,
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": f"""{text_input}""",
-                        },
-                        {
-                            "role": "system",
-                            "content": system_prompt,
-                        },
-                    ],
-                    max_retries=5,
-                    api_key=self.fallback_api_key,
-                    api_base=self.fallback_endpoint,
-                    response_model=response_model,
-                )
+                async with llm_rate_limiter_context_manager():
+                    return await self.aclient.chat.completions.create(
+                        model=self.fallback_model,
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": f"""{text_input}""",
+                            },
+                            {
+                                "role": "system",
+                                "content": system_prompt,
+                            },
+                        ],
+                        max_retries=2,
+                        api_key=self.fallback_api_key,
+                        api_base=self.fallback_endpoint,
+                        response_model=response_model,
+                    )
             except (
                 ContentFilterFinishReasonError,
                 ContentPolicyViolationError,

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py CHANGED Viewed

@@ -13,6 +13,7 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
     LLMInterface,
 )
 import logging
+from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.shared.logging_utils import get_logger
 from tenacity import (
     retry,
@@ -41,6 +42,7 @@ class GenericAPIAdapter(LLMInterface):
     name: str
     model: str
     api_key: str
+    default_instructor_mode = "json_mode"
     def __init__(
         self,
@@ -49,6 +51,7 @@ class GenericAPIAdapter(LLMInterface):
         model: str,
         name: str,
         max_completion_tokens: int,
+        instructor_mode: str = None,
         fallback_model: str = None,
         fallback_api_key: str = None,
         fallback_endpoint: str = None,
@@ -63,17 +66,21 @@ class GenericAPIAdapter(LLMInterface):
         self.fallback_api_key = fallback_api_key
         self.fallback_endpoint = fallback_endpoint
-        self.aclient = instructor.from_litellm(litellm.acompletion, mode=instructor.Mode.JSON)
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+        self.aclient = instructor.from_litellm(
+            litellm.acompletion, mode=instructor.Mode(self.instructor_mode)
+        )
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(2, 128),
+        wait=wait_exponential_jitter(8, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -99,23 +106,24 @@ class GenericAPIAdapter(LLMInterface):
         """
         try:
-            return await self.aclient.chat.completions.create(
-                model=self.model,
-                messages=[
-                    {
-                        "role": "user",
-                        "content": f"""{text_input}""",
-                    },
-                    {
-                        "role": "system",
-                        "content": system_prompt,
-                    },
-                ],
-                max_retries=5,
-                api_key=self.api_key,
-                api_base=self.endpoint,
-                response_model=response_model,
-            )
+            async with llm_rate_limiter_context_manager():
+                return await self.aclient.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": f"""{text_input}""",
+                        },
+                        {
+                            "role": "system",
+                            "content": system_prompt,
+                        },
+                    ],
+                    max_retries=2,
+                    api_key=self.api_key,
+                    api_base=self.endpoint,
+                    response_model=response_model,
+                )
         except (
             ContentFilterFinishReasonError,
             ContentPolicyViolationError,
@@ -133,23 +141,24 @@ class GenericAPIAdapter(LLMInterface):
                 ) from error
             try:
-                return await self.aclient.chat.completions.create(
-                    model=self.fallback_model,
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": f"""{text_input}""",
-                        },
-                        {
-                            "role": "system",
-                            "content": system_prompt,
-                        },
-                    ],
-                    max_retries=5,
-                    api_key=self.fallback_api_key,
-                    api_base=self.fallback_endpoint,
-                    response_model=response_model,
-                )
+                async with llm_rate_limiter_context_manager():
+                    return await self.aclient.chat.completions.create(
+                        model=self.fallback_model,
+                        messages=[
+                            {
+                                "role": "user",
+                                "content": f"""{text_input}""",
+                            },
+                            {
+                                "role": "system",
+                                "content": system_prompt,
+                            },
+                        ],
+                        max_retries=2,
+                        api_key=self.fallback_api_key,
+                        api_base=self.fallback_endpoint,
+                        response_model=response_model,
+                    )
             except (
                 ContentFilterFinishReasonError,
                 ContentPolicyViolationError,

cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

cognee 0.4.1py3-none-any.whl → 0.5.0py3-none-any.whl