PyPI - cognee - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl - Mend

cognee 0.5.0py3-none-any.whl → 0.5.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py CHANGED Viewed

@@ -1,15 +1,7 @@
 import asyncio
 from typing import Type
-from pydantic import BaseModel
-from tenacity import (
-    retry,
-    stop_after_delay,
-    wait_exponential_jitter,
-    retry_if_not_exception_type,
-    before_sleep_log,
-)
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.llm.config import get_llm_config
 from cognee.infrastructure.llm.structured_output_framework.baml.baml_src.extraction.create_dynamic_baml_type import (
     create_dynamic_baml_type,
@@ -18,18 +10,12 @@ from cognee.infrastructure.llm.structured_output_framework.baml.baml_client.type
     TypeBuilder,
 )
 from cognee.infrastructure.llm.structured_output_framework.baml.baml_client import b
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
-import logging
+from pydantic import BaseModel
 logger = get_logger()
-@retry(
-    stop=stop_after_delay(128),
-    wait=wait_exponential_jitter(8, 128),
-    before_sleep=before_sleep_log(logger, logging.DEBUG),
-    reraise=True,
-)
 async def acreate_structured_output(
     text_input: str, system_prompt: str, response_model: Type[BaseModel]
 ):
@@ -59,12 +45,11 @@ async def acreate_structured_output(
     tb = TypeBuilder()
     type_builder = create_dynamic_baml_type(tb, tb.ResponseModel, response_model)
-    async with llm_rate_limiter_context_manager():
-        result = await b.AcreateStructuredOutput(
-            text_input=text_input,
-            system_prompt=system_prompt,
-            baml_options={"client_registry": config.baml_registry, "tb": type_builder},
-        )
+    result = await b.AcreateStructuredOutput(
+        text_input=text_input,
+        system_prompt=system_prompt,
+        baml_options={"client_registry": config.baml_registry, "tb": type_builder},
+    )
     # Transform BAML response to proper pydantic reponse model
     if response_model is str:

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py CHANGED Viewed

@@ -15,7 +15,6 @@ from tenacity import (
 from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
     LLMInterface,
 )
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.infrastructure.llm.config import get_llm_config
 logger = get_logger()
@@ -46,13 +45,13 @@ class AnthropicAdapter(LLMInterface):
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -70,17 +69,17 @@ class AnthropicAdapter(LLMInterface):
             - BaseModel: An instance of BaseModel containing the structured response.
         """
-        async with llm_rate_limiter_context_manager():
-            return await self.aclient(
-                model=self.model,
-                max_tokens=4096,
-                max_retries=2,
-                messages=[
-                    {
-                        "role": "user",
-                        "content": f"""Use the given format to extract information
-                    from the following input: {text_input}. {system_prompt}""",
-                    }
-                ],
-                response_model=response_model,
-            )
+        return await self.aclient(
+            model=self.model,
+            max_tokens=4096,
+            max_retries=5,
+            messages=[
+                {
+                    "role": "user",
+                    "content": f"""Use the given format to extract information
+                from the following input: {text_input}. {system_prompt}""",
+                }
+            ],
+            response_model=response_model,
+        )

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py CHANGED Viewed

@@ -13,7 +13,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
     LLMInterface,
 )
 import logging
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.shared.logging_utils import get_logger
 from tenacity import (
     retry,
@@ -74,13 +73,13 @@ class GeminiAdapter(LLMInterface):
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -106,25 +105,24 @@ class GeminiAdapter(LLMInterface):
         """
         try:
-            async with llm_rate_limiter_context_manager():
-                return await self.aclient.chat.completions.create(
-                    model=self.model,
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": f"""{text_input}""",
-                        },
-                        {
-                            "role": "system",
-                            "content": system_prompt,
-                        },
-                    ],
-                    api_key=self.api_key,
-                    max_retries=2,
-                    api_base=self.endpoint,
-                    api_version=self.api_version,
-                    response_model=response_model,
-                )
+            return await self.aclient.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"""{text_input}""",
+                    },
+                    {
+                        "role": "system",
+                        "content": system_prompt,
+                    },
+                ],
+                api_key=self.api_key,
+                max_retries=5,
+                api_base=self.endpoint,
+                api_version=self.api_version,
+                response_model=response_model,
+            )
         except (
             ContentFilterFinishReasonError,
             ContentPolicyViolationError,
@@ -142,24 +140,23 @@ class GeminiAdapter(LLMInterface):
                 )
             try:
-                async with llm_rate_limiter_context_manager():
-                    return await self.aclient.chat.completions.create(
-                        model=self.fallback_model,
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": f"""{text_input}""",
-                            },
-                            {
-                                "role": "system",
-                                "content": system_prompt,
-                            },
-                        ],
-                        max_retries=2,
-                        api_key=self.fallback_api_key,
-                        api_base=self.fallback_endpoint,
-                        response_model=response_model,
-                    )
+                return await self.aclient.chat.completions.create(
+                    model=self.fallback_model,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": f"""{text_input}""",
+                        },
+                        {
+                            "role": "system",
+                            "content": system_prompt,
+                        },
+                    ],
+                    max_retries=5,
+                    api_key=self.fallback_api_key,
+                    api_base=self.fallback_endpoint,
+                    response_model=response_model,
+                )
             except (
                 ContentFilterFinishReasonError,
                 ContentPolicyViolationError,

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py CHANGED Viewed

@@ -13,7 +13,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
     LLMInterface,
 )
 import logging
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.shared.logging_utils import get_logger
 from tenacity import (
     retry,
@@ -74,13 +73,13 @@ class GenericAPIAdapter(LLMInterface):
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -106,24 +105,23 @@ class GenericAPIAdapter(LLMInterface):
         """
         try:
-            async with llm_rate_limiter_context_manager():
-                return await self.aclient.chat.completions.create(
-                    model=self.model,
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": f"""{text_input}""",
-                        },
-                        {
-                            "role": "system",
-                            "content": system_prompt,
-                        },
-                    ],
-                    max_retries=2,
-                    api_key=self.api_key,
-                    api_base=self.endpoint,
-                    response_model=response_model,
-                )
+            return await self.aclient.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"""{text_input}""",
+                    },
+                    {
+                        "role": "system",
+                        "content": system_prompt,
+                    },
+                ],
+                max_retries=5,
+                api_key=self.api_key,
+                api_base=self.endpoint,
+                response_model=response_model,
+            )
         except (
             ContentFilterFinishReasonError,
             ContentPolicyViolationError,
@@ -141,24 +139,23 @@ class GenericAPIAdapter(LLMInterface):
                 ) from error
             try:
-                async with llm_rate_limiter_context_manager():
-                    return await self.aclient.chat.completions.create(
-                        model=self.fallback_model,
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": f"""{text_input}""",
-                            },
-                            {
-                                "role": "system",
-                                "content": system_prompt,
-                            },
-                        ],
-                        max_retries=2,
-                        api_key=self.fallback_api_key,
-                        api_base=self.fallback_endpoint,
-                        response_model=response_model,
-                    )
+                return await self.aclient.chat.completions.create(
+                    model=self.fallback_model,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": f"""{text_input}""",
+                        },
+                        {
+                            "role": "system",
+                            "content": system_prompt,
+                        },
+                    ],
+                    max_retries=5,
+                    api_key=self.fallback_api_key,
+                    api_base=self.fallback_endpoint,
+                    response_model=response_model,
+                )
             except (
                 ContentFilterFinishReasonError,
                 ContentPolicyViolationError,

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py CHANGED Viewed

@@ -24,7 +24,6 @@ class LLMProvider(Enum):
     - CUSTOM: Represents a custom provider option.
     - GEMINI: Represents the Gemini provider.
     - MISTRAL: Represents the Mistral AI provider.
-    - BEDROCK: Represents the AWS Bedrock provider.
     """
     OPENAI = "openai"
@@ -33,7 +32,6 @@ class LLMProvider(Enum):
     CUSTOM = "custom"
     GEMINI = "gemini"
     MISTRAL = "mistral"
-    BEDROCK = "bedrock"
 def get_llm_client(raise_api_key_error: bool = True):
@@ -156,7 +154,7 @@ def get_llm_client(raise_api_key_error: bool = True):
         )
     elif provider == LLMProvider.MISTRAL:
-        if llm_config.llm_api_key is None and raise_api_key_error:
+        if llm_config.llm_api_key is None:
             raise LLMAPIKeyNotSetError()
         from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.mistral.adapter import (
@@ -171,21 +169,5 @@ def get_llm_client(raise_api_key_error: bool = True):
             instructor_mode=llm_config.llm_instructor_mode.lower(),
         )
-    elif provider == LLMProvider.BEDROCK:
-        # if llm_config.llm_api_key is None and raise_api_key_error:
-        #     raise LLMAPIKeyNotSetError()
-        from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.bedrock.adapter import (
-            BedrockAdapter,
-        )
-        return BedrockAdapter(
-            model=llm_config.llm_model,
-            api_key=llm_config.llm_api_key,
-            max_completion_tokens=max_completion_tokens,
-            streaming=llm_config.llm_streaming,
-            instructor_mode=llm_config.llm_instructor_mode.lower(),
-        )
     else:
         raise UnsupportedLLMProviderError(provider)

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py CHANGED Viewed

@@ -10,7 +10,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
     LLMInterface,
 )
 from cognee.infrastructure.llm.config import get_llm_config
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 import logging
 from tenacity import (
@@ -63,13 +62,13 @@ class MistralAdapter(LLMInterface):
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a response from the user query.
@@ -98,14 +97,13 @@ class MistralAdapter(LLMInterface):
                 },
             ]
             try:
-                async with llm_rate_limiter_context_manager():
-                    response = await self.aclient.chat.completions.create(
-                        model=self.model,
-                        max_tokens=self.max_completion_tokens,
-                        max_retries=2,
-                        messages=messages,
-                        response_model=response_model,
-                    )
+                response = await self.aclient.chat.completions.create(
+                    model=self.model,
+                    max_tokens=self.max_completion_tokens,
+                    max_retries=5,
+                    messages=messages,
+                    response_model=response_model,
+                )
                 if response.choices and response.choices[0].message.content:
                     content = response.choices[0].message.content
                     return response_model.model_validate_json(content)

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py CHANGED Viewed

@@ -11,8 +11,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
 )
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.shared.logging_utils import get_logger
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from tenacity import (
     retry,
     stop_after_delay,
@@ -70,13 +68,13 @@ class OllamaAPIAdapter(LLMInterface):
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a structured output from the LLM using the provided text and system prompt.
@@ -97,33 +95,33 @@ class OllamaAPIAdapter(LLMInterface):
             - BaseModel: A structured output that conforms to the specified response model.
         """
-        async with llm_rate_limiter_context_manager():
-            response = self.aclient.chat.completions.create(
-                model=self.model,
-                messages=[
-                    {
-                        "role": "user",
-                        "content": f"{text_input}",
-                    },
-                    {
-                        "role": "system",
-                        "content": system_prompt,
-                    },
-                ],
-                max_retries=2,
-                response_model=response_model,
-            )
+        response = self.aclient.chat.completions.create(
+            model=self.model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": f"{text_input}",
+                },
+                {
+                    "role": "system",
+                    "content": system_prompt,
+                },
+            ],
+            max_retries=5,
+            response_model=response_model,
+        )
         return response
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def create_transcript(self, input_file: str, **kwargs) -> str:
+    async def create_transcript(self, input_file: str) -> str:
         """
         Generate an audio transcript from a user query.
@@ -162,7 +160,7 @@ class OllamaAPIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def transcribe_image(self, input_file: str, **kwargs) -> str:
+    async def transcribe_image(self, input_file: str) -> str:
         """
         Transcribe content from an image using base64 encoding.

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py CHANGED Viewed

@@ -22,7 +22,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
 from cognee.infrastructure.llm.exceptions import (
     ContentPolicyFilterError,
 )
-from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.modules.observability.get_observe import get_observe
 from cognee.shared.logging_utils import get_logger
@@ -106,13 +105,13 @@ class OpenAIAdapter(LLMInterface):
     @observe(as_type="generation")
     @retry(
         stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(8, 128),
+        wait=wait_exponential_jitter(2, 128),
         retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
     async def acreate_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -136,9 +135,34 @@ class OpenAIAdapter(LLMInterface):
         """
         try:
-            async with llm_rate_limiter_context_manager():
+            return await self.aclient.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"""{text_input}""",
+                    },
+                    {
+                        "role": "system",
+                        "content": system_prompt,
+                    },
+                ],
+                api_key=self.api_key,
+                api_base=self.endpoint,
+                api_version=self.api_version,
+                response_model=response_model,
+                max_retries=self.MAX_RETRIES,
+            )
+        except (
+            ContentFilterFinishReasonError,
+            ContentPolicyViolationError,
+            InstructorRetryException,
+        ) as e:
+            if not (self.fallback_model and self.fallback_api_key):
+                raise e
+            try:
                 return await self.aclient.chat.completions.create(
-                    model=self.model,
+                    model=self.fallback_model,
                     messages=[
                         {
                             "role": "user",
@@ -149,40 +173,11 @@ class OpenAIAdapter(LLMInterface):
                             "content": system_prompt,
                         },
                     ],
-                    api_key=self.api_key,
-                    api_base=self.endpoint,
-                    api_version=self.api_version,
+                    api_key=self.fallback_api_key,
+                    # api_base=self.fallback_endpoint,
                     response_model=response_model,
                     max_retries=self.MAX_RETRIES,
-                    **kwargs,
                 )
-        except (
-            ContentFilterFinishReasonError,
-            ContentPolicyViolationError,
-            InstructorRetryException,
-        ) as e:
-            if not (self.fallback_model and self.fallback_api_key):
-                raise e
-            try:
-                async with llm_rate_limiter_context_manager():
-                    return await self.aclient.chat.completions.create(
-                        model=self.fallback_model,
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": f"""{text_input}""",
-                            },
-                            {
-                                "role": "system",
-                                "content": system_prompt,
-                            },
-                        ],
-                        api_key=self.fallback_api_key,
-                        # api_base=self.fallback_endpoint,
-                        response_model=response_model,
-                        max_retries=self.MAX_RETRIES,
-                        **kwargs,
-                    )
             except (
                 ContentFilterFinishReasonError,
                 ContentPolicyViolationError,
@@ -207,7 +202,7 @@ class OpenAIAdapter(LLMInterface):
         reraise=True,
     )
     def create_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel]
     ) -> BaseModel:
         """
         Generate a response from a user query.
@@ -247,7 +242,6 @@ class OpenAIAdapter(LLMInterface):
             api_version=self.api_version,
             response_model=response_model,
             max_retries=self.MAX_RETRIES,
-            **kwargs,
         )
     @retry(
@@ -257,7 +251,7 @@ class OpenAIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def create_transcript(self, input, **kwargs):
+    async def create_transcript(self, input):
         """
         Generate an audio transcript from a user query.
@@ -284,7 +278,6 @@ class OpenAIAdapter(LLMInterface):
                 api_base=self.endpoint,
                 api_version=self.api_version,
                 max_retries=self.MAX_RETRIES,
-                **kwargs,
             )
         return transcription
@@ -296,7 +289,7 @@ class OpenAIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    async def transcribe_image(self, input, **kwargs) -> BaseModel:
+    async def transcribe_image(self, input) -> BaseModel:
         """
         Generate a transcription of an image from a user query.
@@ -341,5 +334,4 @@ class OpenAIAdapter(LLMInterface):
             api_version=self.api_version,
             max_completion_tokens=300,
             max_retries=self.MAX_RETRIES,
-            **kwargs,
         )

cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

cognee 0.5.0py3-none-any.whl → 0.5.0.dev0py3-none-any.whl