PyPI - cognee - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

cognee 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (265) hide show

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""Adapter for Instructor-backed Structured Output Framework for Llama CPP"""
+import litellm
+import logging
+import instructor
+from typing import Type, Optional
+from openai import AsyncOpenAI
+from pydantic import BaseModel
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
+    LLMInterface,
+)
+from cognee.shared.logging_utils import get_logger
+from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
+from tenacity import (
+    retry,
+    stop_after_delay,
+    wait_exponential_jitter,
+    retry_if_not_exception_type,
+    before_sleep_log,
+)
+logger = get_logger()
+class LlamaCppAPIAdapter(LLMInterface):
+    """
+    Adapter for Llama CPP LLM provider with support for TWO modes:
+    1. SERVER MODE (OpenAI-compatible):
+       - Connects to llama-cpp-python server via HTTP (local or remote)
+       - Uses instructor.from_openai()
+       - Requires: endpoint, api_key, model
+    2. LOCAL MODE (In-process):
+       - Loads model directly using llama-cpp-python library
+       - Uses instructor.patch() on llama.Llama object
+       - Requires: model_path
+    Public methods:
+    - acreate_structured_output
+    Instance variables:
+    - name
+    - model (for server mode) or model_path (for local mode)
+    - mode_type: "server" or "local"
+    - max_completion_tokens
+    - aclient
+    """
+    name: str
+    model: Optional[str]
+    model_path: Optional[str]
+    mode_type: str  # "server" or "local"
+    default_instructor_mode = instructor.Mode.JSON
+    def __init__(
+        self,
+        name: str = "LlamaCpp",
+        max_completion_tokens: int = 2048,
+        instructor_mode: Optional[str] = None,
+        # Server mode parameters
+        endpoint: Optional[str] = None,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+        # Local mode parameters
+        model_path: Optional[str] = None,
+        n_ctx: int = 2048,
+        n_gpu_layers: int = 0,
+        chat_format: str = "chatml",
+    ):
+        self.name = name
+        self.max_completion_tokens = max_completion_tokens
+        self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
+        # Determine which mode to use
+        if model_path:
+            self._init_local_mode(model_path, n_ctx, n_gpu_layers, chat_format)
+        elif endpoint:
+            self._init_server_mode(endpoint, api_key, model)
+        else:
+            raise ValueError(
+                "Must provide either 'model_path' (for local mode) or 'endpoint' (for server mode)"
+            )
+    def _init_local_mode(self, model_path: str, n_ctx: int, n_gpu_layers: int, chat_format: str):
+        """Initialize local mode using llama-cpp-python library directly"""
+        try:
+            import llama_cpp
+        except ImportError:
+            raise ImportError(
+                "llama-cpp-python is not installed. Install with: pip install llama-cpp-python"
+            )
+        logger.info(f"Initializing LlamaCpp in LOCAL mode with model: {model_path}")
+        self.mode_type = "local"
+        self.model_path = model_path
+        self.model = None
+        # Initialize llama-cpp-python with the model
+        self.llama = llama_cpp.Llama(
+            model_path=model_path,
+            n_gpu_layers=n_gpu_layers,  # -1 for all GPU, 0 for CPU only
+            chat_format=chat_format,
+            n_ctx=n_ctx,
+            verbose=False,
+        )
+        self.aclient = instructor.patch(
+            create=self.llama.create_chat_completion_openai_v1,
+            mode=instructor.Mode(self.instructor_mode),
+        )
+    def _init_server_mode(self, endpoint: str, api_key: Optional[str], model: Optional[str]):
+        """Initialize server mode connecting to llama-cpp-python server"""
+        logger.info(f"Initializing LlamaCpp in SERVER mode with endpoint: {endpoint}")
+        self.mode_type = "server"
+        self.model = model
+        self.model_path = None
+        self.endpoint = endpoint
+        self.api_key = api_key
+        # Use instructor.from_openai() for server mode (OpenAI-compatible API)
+        self.aclient = instructor.from_openai(
+            AsyncOpenAI(base_url=self.endpoint, api_key=self.api_key),
+            mode=instructor.Mode(self.instructor_mode),
+        )
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(8, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
+    async def acreate_structured_output(
+        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
+    ) -> BaseModel:
+        """
+        Generate a structured output from the LLM using the provided text and system prompt.
+        Works in both local and server modes transparently.
+        Parameters:
+        -----------
+            - text_input (str): The input text provided by the user.
+            - system_prompt (str): The system prompt that guides the response generation.
+            - response_model (Type[BaseModel]): The model type that the response should conform to.
+        Returns:
+        --------
+            - BaseModel: A structured output that conforms to the specified response model.
+        """
+        async with llm_rate_limiter_context_manager():
+            # Prepare messages (system first, then user is more standard)
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": text_input},
+            ]
+            if self.mode_type == "server":
+                # Server mode: use async client with OpenAI-compatible API
+                response = await self.aclient.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    response_model=response_model,
+                    max_retries=2,
+                    max_completion_tokens=self.max_completion_tokens,
+                    **kwargs,
+                )
+            else:
+                import asyncio
+                # Local mode: instructor.patch() returns a SYNC callable
+                # Per docs: https://python.useinstructor.com/integrations/llama-cpp-python/
+                def _call_sync():
+                    return self.aclient(
+                        messages=messages,
+                        response_model=response_model,
+                        max_tokens=self.max_completion_tokens,
+                        **kwargs,
+                    )
+                # Run sync function in thread pool to avoid blocking
+                response = await asyncio.to_thread(_call_sync)
+        return response

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py CHANGED Viewed

@@ -3,18 +3,14 @@
 from typing import Type, Protocol
 from abc import abstractmethod
 from pydantic import BaseModel
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
 class LLMInterface(Protocol):
     """
-    Define an interface for LLM models with methods for structured output and prompt
-    display.
+    Define an interface for LLM models with methods for structured output, multimodal processing, and prompt display.
     Methods:
-    - acreate_structured_output(text_input: str, system_prompt: str, response_model:
-    Type[BaseModel])
-    - show_prompt(text_input: str, system_prompt: str)
+    - acreate_structured_output(text_input: str, system_prompt: str, response_model: Type[BaseModel])
     """
     @abstractmethod

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import litellm
 import instructor
 from pydantic import BaseModel
-from typing import Type
+from typing import Type, Optional
 from litellm import JSONSchemaValidationError
+from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.observability.get_observe import get_observe
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
-    LLMInterface,
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
+    GenericAPIAdapter,
 )
 from cognee.infrastructure.llm.config import get_llm_config
 from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
@@ -20,12 +20,14 @@ from tenacity import (
     retry_if_not_exception_type,
     before_sleep_log,
 )
+from ..types import TranscriptionReturnType
+from mistralai import Mistral
 logger = get_logger()
 observe = get_observe()
-class MistralAdapter(LLMInterface):
+class MistralAdapter(GenericAPIAdapter):
     """
     Adapter for Mistral AI API, for structured output generation and prompt display.
@@ -34,10 +36,6 @@ class MistralAdapter(LLMInterface):
     - show_prompt
     """
-    name = "Mistral"
-    model: str
-    api_key: str
-    max_completion_tokens: int
     default_instructor_mode = "mistral_tools"
     def __init__(
@@ -46,12 +44,19 @@ class MistralAdapter(LLMInterface):
         model: str,
         max_completion_tokens: int,
         endpoint: str = None,
+        transcription_model: str = None,
+        image_transcribe_model: str = None,
         instructor_mode: str = None,
     ):
-        from mistralai import Mistral
-        self.model = model
-        self.max_completion_tokens = max_completion_tokens
+        super().__init__(
+            api_key=api_key,
+            model=model,
+            max_completion_tokens=max_completion_tokens,
+            name="Mistral",
+            endpoint=endpoint,
+            transcription_model=transcription_model,
+            image_transcribe_model=image_transcribe_model,
+        )
         self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
@@ -60,7 +65,9 @@ class MistralAdapter(LLMInterface):
             mode=instructor.Mode(self.instructor_mode),
             api_key=get_llm_config().llm_api_key,
         )
+        self.mistral_client = Mistral(api_key=self.api_key)
+    @observe(as_type="generation")
     @retry(
         stop=stop_after_delay(128),
         wait=wait_exponential_jitter(8, 128),
@@ -119,3 +126,41 @@ class MistralAdapter(LLMInterface):
             logger.error(f"Schema validation failed: {str(e)}")
             logger.debug(f"Raw response: {e.raw_response}")
             raise ValueError(f"Response failed schema validation: {str(e)}")
+    @observe(as_type="transcription")
+    @retry(
+        stop=stop_after_delay(128),
+        wait=wait_exponential_jitter(2, 128),
+        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
+        before_sleep=before_sleep_log(logger, logging.DEBUG),
+        reraise=True,
+    )
+    async def create_transcript(self, input) -> Optional[TranscriptionReturnType]:
+        """
+        Generate an audio transcript from a user query.
+        This method creates a transcript from the specified audio file.
+        The audio file is processed and the transcription is retrieved from the API.
+        Parameters:
+        -----------
+            - input: The path to the audio file that needs to be transcribed.
+        Returns:
+        --------
+            The generated transcription of the audio file.
+        """
+        transcription_model = self.transcription_model
+        if self.transcription_model.startswith("mistral"):
+            transcription_model = self.transcription_model.split("/")[-1]
+        file_name = input.split("/")[-1]
+        async with open_data_file(input, mode="rb") as f:
+            transcription_response = self.mistral_client.audio.transcriptions.complete(
+                model=transcription_model,
+                file={
+                    "content": f,
+                    "file_name": file_name,
+                },
+            )
+            return TranscriptionReturnType(transcription_response.text, transcription_response)

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py CHANGED Viewed

@@ -12,7 +12,6 @@ from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.ll
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from tenacity import (
     retry,
     stop_after_delay,

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import base64
 import litellm
 import instructor
 from typing import Type
@@ -16,8 +15,8 @@ from tenacity import (
     before_sleep_log,
 )
-from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.llm_interface import (
-    LLMInterface,
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.generic_llm_api.adapter import (
+    GenericAPIAdapter,
 )
 from cognee.infrastructure.llm.exceptions import (
     ContentPolicyFilterError,
@@ -26,13 +25,16 @@ from cognee.shared.rate_limiting import llm_rate_limiter_context_manager
 from cognee.infrastructure.files.utils.open_data_file import open_data_file
 from cognee.modules.observability.get_observe import get_observe
 from cognee.shared.logging_utils import get_logger
+from cognee.infrastructure.llm.structured_output_framework.litellm_instructor.llm.types import (
+    TranscriptionReturnType,
+)
 logger = get_logger()
 observe = get_observe()
-class OpenAIAdapter(LLMInterface):
+class OpenAIAdapter(GenericAPIAdapter):
     """
     Adapter for OpenAI's GPT-3, GPT-4 API.
@@ -53,12 +55,7 @@ class OpenAIAdapter(LLMInterface):
     - MAX_RETRIES
     """
-    name = "OpenAI"
-    model: str
-    api_key: str
-    api_version: str
     default_instructor_mode = "json_schema_mode"
     MAX_RETRIES = 5
     """Adapter for OpenAI's GPT-3, GPT=4 API"""
@@ -66,17 +63,29 @@ class OpenAIAdapter(LLMInterface):
     def __init__(
         self,
         api_key: str,
-        endpoint: str,
-        api_version: str,
         model: str,
-        transcription_model: str,
         max_completion_tokens: int,
+        endpoint: str = None,
+        api_version: str = None,
+        transcription_model: str = None,
         instructor_mode: str = None,
         streaming: bool = False,
         fallback_model: str = None,
         fallback_api_key: str = None,
         fallback_endpoint: str = None,
     ):
+        super().__init__(
+            api_key=api_key,
+            model=model,
+            max_completion_tokens=max_completion_tokens,
+            name="OpenAI",
+            endpoint=endpoint,
+            api_version=api_version,
+            transcription_model=transcription_model,
+            fallback_model=fallback_model,
+            fallback_api_key=fallback_api_key,
+            fallback_endpoint=fallback_endpoint,
+        )
         self.instructor_mode = instructor_mode if instructor_mode else self.default_instructor_mode
         # TODO: With gpt5 series models OpenAI expects JSON_SCHEMA as a mode for structured outputs.
         #       Make sure all new gpt models will work with this mode as well.
@@ -91,18 +100,8 @@ class OpenAIAdapter(LLMInterface):
             self.aclient = instructor.from_litellm(litellm.acompletion)
             self.client = instructor.from_litellm(litellm.completion)
-        self.transcription_model = transcription_model
-        self.model = model
-        self.api_key = api_key
-        self.endpoint = endpoint
-        self.api_version = api_version
-        self.max_completion_tokens = max_completion_tokens
         self.streaming = streaming
-        self.fallback_model = fallback_model
-        self.fallback_api_key = fallback_api_key
-        self.fallback_endpoint = fallback_endpoint
     @observe(as_type="generation")
     @retry(
         stop=stop_after_delay(128),
@@ -198,7 +197,7 @@ class OpenAIAdapter(LLMInterface):
                         f"The provided input contains content that is not aligned with our content policy: {text_input}"
                     ) from error
-    @observe
+    @observe(as_type="transcription")
     @retry(
         stop=stop_after_delay(128),
         wait=wait_exponential_jitter(2, 128),
@@ -206,58 +205,7 @@ class OpenAIAdapter(LLMInterface):
         before_sleep=before_sleep_log(logger, logging.DEBUG),
         reraise=True,
     )
-    def create_structured_output(
-        self, text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
-    ) -> BaseModel:
-        """
-        Generate a response from a user query.
-        This method creates structured output by sending a synchronous request to the OpenAI API
-        using the provided parameters to generate a completion based on the user input and
-        system prompt.
-        Parameters:
-        -----------
-            - text_input (str): The input text provided by the user for generating a response.
-            - system_prompt (str): The system's prompt to guide the model's response.
-            - response_model (Type[BaseModel]): The expected model type for the response.
-        Returns:
-        --------
-            - BaseModel: A structured output generated by the model, returned as an instance of
-              BaseModel.
-        """
-        return self.client.chat.completions.create(
-            model=self.model,
-            messages=[
-                {
-                    "role": "user",
-                    "content": f"""{text_input}""",
-                },
-                {
-                    "role": "system",
-                    "content": system_prompt,
-                },
-            ],
-            api_key=self.api_key,
-            api_base=self.endpoint,
-            api_version=self.api_version,
-            response_model=response_model,
-            max_retries=self.MAX_RETRIES,
-            **kwargs,
-        )
-    @retry(
-        stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(2, 128),
-        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
-        before_sleep=before_sleep_log(logger, logging.DEBUG),
-        reraise=True,
-    )
-    async def create_transcript(self, input, **kwargs):
+    async def create_transcript(self, input, **kwargs) -> TranscriptionReturnType:
         """
         Generate an audio transcript from a user query.
@@ -286,60 +234,6 @@ class OpenAIAdapter(LLMInterface):
                 max_retries=self.MAX_RETRIES,
                 **kwargs,
             )
+            return TranscriptionReturnType(transcription.text, transcription)
-        return transcription
-    @retry(
-        stop=stop_after_delay(128),
-        wait=wait_exponential_jitter(2, 128),
-        retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
-        before_sleep=before_sleep_log(logger, logging.DEBUG),
-        reraise=True,
-    )
-    async def transcribe_image(self, input, **kwargs) -> BaseModel:
-        """
-        Generate a transcription of an image from a user query.
-        This method encodes the image and sends a request to the OpenAI API to obtain a
-        description of the contents of the image.
-        Parameters:
-        -----------
-            - input: The path to the image file that needs to be transcribed.
-        Returns:
-        --------
-            - BaseModel: A structured output generated by the model, returned as an instance of
-              BaseModel.
-        """
-        async with open_data_file(input, mode="rb") as image_file:
-            encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
-        return litellm.completion(
-            model=self.model,
-            messages=[
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": "What's in this image?",
-                        },
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{encoded_image}",
-                            },
-                        },
-                    ],
-                }
-            ],
-            api_key=self.api_key,
-            api_base=self.endpoint,
-            api_version=self.api_version,
-            max_completion_tokens=300,
-            max_retries=self.MAX_RETRIES,
-            **kwargs,
-        )
+    # transcribe_image is inherited from GenericAPIAdapter

cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py ADDED Viewed

@@ -0,0 +1,10 @@
+from pydantic import BaseModel
+class TranscriptionReturnType:
+    text: str
+    payload: BaseModel
+    def __init__(self, text: str, payload: BaseModel):
+        self.text = text
+        self.payload = payload

cognee/modules/chunking/models/DocumentChunk.py CHANGED Viewed

@@ -33,5 +33,4 @@ class DocumentChunk(DataPoint):
     cut_type: str
     is_part_of: Document
     contains: List[Union[Entity, Event, tuple[Edge, Entity]]] = None
     metadata: dict = {"index_fields": ["text"]}

cognee/modules/cognify/config.py CHANGED Viewed

@@ -9,6 +9,7 @@ class CognifyConfig(BaseSettings):
     classification_model: object = DefaultContentPrediction
     summarization_model: object = SummarizedContent
     triplet_embedding: bool = False
+    chunks_per_batch: Optional[int] = None
     model_config = SettingsConfigDict(env_file=".env", extra="allow")
     def to_dict(self) -> dict:
@@ -16,6 +17,7 @@ class CognifyConfig(BaseSettings):
             "classification_model": self.classification_model,
             "summarization_model": self.summarization_model,
             "triplet_embedding": self.triplet_embedding,
+            "chunks_per_batch": self.chunks_per_batch,
         }

cognee/modules/data/models/Data.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Data(Base):
     __tablename__ = "data"
     id = Column(UUID, primary_key=True, default=uuid4)
+    label = Column(String, nullable=True)
     name = Column(String)
     extension = Column(String)
     mime_type = Column(String)
@@ -36,6 +36,7 @@ class Data(Base):
     data_size = Column(Integer, nullable=True)  # File size in bytes
     created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
     updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
+    last_accessed = Column(DateTime(timezone=True), nullable=True)
     datasets = relationship(
         "Dataset",
@@ -49,6 +50,7 @@ class Data(Base):
         return {
             "id": str(self.id),
             "name": self.name,
+            "label": self.label,
             "extension": self.extension,
             "mimeType": self.mime_type,
             "rawDataLocation": self.raw_data_location,

cognee/modules/engine/models/Entity.py CHANGED Viewed

@@ -7,5 +7,4 @@ class Entity(DataPoint):
     name: str
     is_a: Optional[EntityType] = None
     description: str
     metadata: dict = {"index_fields": ["name"]}

cognee/modules/engine/operations/setup.py CHANGED Viewed

@@ -15,3 +15,9 @@ async def setup():
     """
     await create_relational_db_and_tables()
     await create_pgvector_db_and_tables()
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(setup())

cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

cognee 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl