PyPI - ragbits-core - Versions diffs - 0.16.0__py3-none-any.whl → 1.4.0.dev202512021005__py3-none-any.whl - Mend

ragbits-core 0.16.0py3-none-any.whl → 1.4.0.dev202512021005py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

ragbits/core/__init__.py +21 -2
ragbits/core/audit/__init__.py +15 -157
ragbits/core/audit/metrics/__init__.py +83 -0
ragbits/core/audit/metrics/base.py +198 -0
ragbits/core/audit/metrics/logfire.py +19 -0
ragbits/core/audit/metrics/otel.py +65 -0
ragbits/core/audit/traces/__init__.py +171 -0
ragbits/core/audit/{base.py → traces/base.py} +9 -5
ragbits/core/audit/{cli.py → traces/cli.py} +8 -4
ragbits/core/audit/traces/logfire.py +18 -0
ragbits/core/audit/{otel.py → traces/otel.py} +5 -8
ragbits/core/config.py +15 -0
ragbits/core/embeddings/__init__.py +2 -1
ragbits/core/embeddings/base.py +19 -0
ragbits/core/embeddings/dense/base.py +10 -1
ragbits/core/embeddings/dense/fastembed.py +22 -1
ragbits/core/embeddings/dense/litellm.py +37 -10
ragbits/core/embeddings/dense/local.py +15 -1
ragbits/core/embeddings/dense/noop.py +11 -1
ragbits/core/embeddings/dense/vertex_multimodal.py +14 -1
ragbits/core/embeddings/sparse/bag_of_tokens.py +47 -17
ragbits/core/embeddings/sparse/base.py +10 -1
ragbits/core/embeddings/sparse/fastembed.py +25 -2
ragbits/core/llms/__init__.py +3 -3
ragbits/core/llms/base.py +612 -88
ragbits/core/llms/exceptions.py +27 -0
ragbits/core/llms/litellm.py +408 -83
ragbits/core/llms/local.py +180 -41
ragbits/core/llms/mock.py +88 -23
ragbits/core/prompt/__init__.py +2 -2
ragbits/core/prompt/_cli.py +32 -19
ragbits/core/prompt/base.py +105 -19
ragbits/core/prompt/{discovery/prompt_discovery.py → discovery.py} +1 -1
ragbits/core/prompt/exceptions.py +22 -6
ragbits/core/prompt/prompt.py +180 -98
ragbits/core/sources/__init__.py +2 -0
ragbits/core/sources/azure.py +1 -1
ragbits/core/sources/base.py +8 -1
ragbits/core/sources/gcs.py +1 -1
ragbits/core/sources/git.py +1 -1
ragbits/core/sources/google_drive.py +595 -0
ragbits/core/sources/hf.py +71 -31
ragbits/core/sources/local.py +1 -1
ragbits/core/sources/s3.py +1 -1
ragbits/core/utils/config_handling.py +13 -2
ragbits/core/utils/function_schema.py +220 -0
ragbits/core/utils/helpers.py +22 -0
ragbits/core/utils/lazy_litellm.py +44 -0
ragbits/core/vector_stores/base.py +18 -1
ragbits/core/vector_stores/chroma.py +28 -11
ragbits/core/vector_stores/hybrid.py +1 -1
ragbits/core/vector_stores/hybrid_strategies.py +21 -8
ragbits/core/vector_stores/in_memory.py +13 -4
ragbits/core/vector_stores/pgvector.py +123 -47
ragbits/core/vector_stores/qdrant.py +15 -7
ragbits/core/vector_stores/weaviate.py +440 -0
{ragbits_core-0.16.0.dist-info → ragbits_core-1.4.0.dev202512021005.dist-info}/METADATA +22 -6
ragbits_core-1.4.0.dev202512021005.dist-info/RECORD +79 -0
{ragbits_core-0.16.0.dist-info → ragbits_core-1.4.0.dev202512021005.dist-info}/WHEEL +1 -1
ragbits/core/prompt/discovery/__init__.py +0 -3
ragbits/core/prompt/lab/__init__.py +0 -0
ragbits/core/prompt/lab/app.py +0 -262
ragbits_core-0.16.0.dist-info/RECORD +0 -72

ragbits/core/prompt/base.py CHANGED Viewed

@@ -1,11 +1,12 @@
+import json
 from abc import ABCMeta, abstractmethod
 from typing import Any, Generic
 from pydantic import BaseModel
-from typing_extensions import TypeVar
+from typing_extensions import Self, TypeVar
 ChatFormat = list[dict[str, Any]]
-OutputT = TypeVar("OutputT", default=str)
+PromptOutputT = TypeVar("PromptOutputT", default=str)
 class BasePrompt(metaclass=ABCMeta):
@@ -14,7 +15,6 @@ class BasePrompt(metaclass=ABCMeta):
     """
     @property
-    @abstractmethod
     def chat(self) -> ChatFormat:
         """
         Returns the conversation in the standard OpenAI chat format.
@@ -22,6 +22,9 @@ class BasePrompt(metaclass=ABCMeta):
         Returns:
             ChatFormat: A list of dictionaries, each containing the role and content of a message.
         """
+        if not hasattr(self, "_conversation_history"):
+            self._conversation_history: list[dict[str, Any]] = []
+        return self._conversation_history
     @property
     def json_mode(self) -> bool:
@@ -46,15 +49,108 @@ class BasePrompt(metaclass=ABCMeta):
         """
         return []
+    def list_pdfs(self) -> list[str]:  # noqa: PLR6301
+        """
+        Returns the PDFs in form of URLs or base64 encoded strings.
+        Returns:
+            list of PDFs
+        """
+        return []
+    def add_assistant_message(self, message: str | PromptOutputT) -> Self:
+        """
+        Add an assistant message to the conversation history.
+        Args:
+            message (str): The assistant message content.
+        Returns:
+            Prompt[PromptInputT, PromptOutputT]: The current prompt instance to allow chaining.
+        """
+        if not hasattr(self, "_conversation_history"):
+            self._conversation_history = []
+        if isinstance(message, BaseModel):
+            message = message.model_dump_json()
+        self._conversation_history.append({"role": "assistant", "content": str(message)})
+        return self
+    def add_tool_use_message(
+        self,
+        id: str,
+        name: str,
+        arguments: dict,
+        result: Any,  # noqa: ANN401
+    ) -> Self:
+        """
+        Add tool call messages to the conversation history.
-class BasePromptWithParser(Generic[OutputT], BasePrompt, metaclass=ABCMeta):
+        Args:
+            id (str): The id of the tool call.
+            name (str): The name of the tool.
+            arguments (dict): The arguments of the tool.
+            result (any): The tool call result.
+        Returns:
+            Prompt[PromptInputT, PromptOutputT]: The current prompt instance to allow chaining.
+        """
+        if not hasattr(self, "_conversation_history"):
+            self._conversation_history = []
+        self._conversation_history.extend(
+            [
+                {
+                    "role": "assistant",
+                    "content": None,
+                    "tool_calls": [
+                        {
+                            "id": id,
+                            "type": "function",
+                            "function": {
+                                "name": name,
+                                "arguments": json.dumps(arguments),
+                            },
+                        }
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "tool_call_id": id,
+                    "content": str(result),
+                },
+            ]
+        )
+        return self
+    def add_user_message(self, message: str | dict[str, Any] | list[dict[str, Any]]) -> Self:
+        """
+        Add a user message to the conversation history.
+        Args:
+            message: The user message content. Can be:
+                - A string: Used directly as content
+                - A dictionary: With format {"type": "text", "text": "message"} or image content
+        Returns:
+            Prompt: The current prompt instance to allow chaining.
+        """
+        if not hasattr(self, "_conversation_history"):
+            self._conversation_history = []
+        self._conversation_history.append({"role": "user", "content": message})
+        return self
+class BasePromptWithParser(Generic[PromptOutputT], BasePrompt, metaclass=ABCMeta):
     """
     Base class for prompts that know how to parse the output from the LLM to their specific
     output type.
     """
     @abstractmethod
-    async def parse_response(self, response: str) -> OutputT:
+    async def parse_response(self, response: str) -> PromptOutputT:
         """
         Parse the response from the LLM to the desired output type.
@@ -62,7 +158,7 @@ class BasePromptWithParser(Generic[OutputT], BasePrompt, metaclass=ABCMeta):
             response (str): The response from the LLM.
         Returns:
-            OutputT: The parsed response.
+            PromptOutputT_co: The parsed response.
         Raises:
             ResponseParsingError: If the response cannot be parsed.
@@ -75,16 +171,6 @@ class SimplePrompt(BasePrompt):
     """
     def __init__(self, content: str | ChatFormat) -> None:
-        self._content = content
-    @property
-    def chat(self) -> ChatFormat:
-        """
-        Returns the conversation in the chat format.
-        Returns:
-            ChatFormat: A list of dictionaries, each containing the role and content of a message.
-        """
-        if isinstance(self._content, str):
-            return [{"role": "user", "content": self._content}]
-        return self._content
+        self._conversation_history: list[dict[str, Any]] = (
+            [{"role": "user", "content": content}] if isinstance(content, str) else content
+        )

ragbits/core/prompt/{discovery/prompt_discovery.py → discovery.py} RENAMED Viewed

@@ -4,7 +4,7 @@ import os
 from pathlib import Path
 from typing import Any, get_origin
-from ragbits.core.audit import trace
+from ragbits.core.audit.traces import trace
 from ragbits.core.config import core_config
 from ragbits.core.prompt import Prompt

ragbits/core/prompt/exceptions.py CHANGED Viewed

@@ -8,12 +8,28 @@ class PromptError(Exception):
         self.message = message
-class PromptWithImagesOfInvalidFormat(PromptError):
+class PromptWithAttachmentOfUnknownFormat(PromptError):
     """
-    Raised when there is an image attached to the prompt that is not in the correct format.
+    Raised when there is a file with an unknown format attached to the prompt.
     """
-    def __init__(
-        self, message: str = "Invalid format of image in prompt detected. Use one of supported OpenAI mime types"
-    ) -> None:
-        super().__init__(message)
+    def __init__(self) -> None:
+        super().__init__("Could not determine MIME type for the attachment file")
+class PromptWithAttachmentOfUnsupportedFormat(PromptError):
+    """
+    Raised when there is a file with an unsupported format attached to the prompt.
+    """
+    def __init__(self, mime_type: str) -> None:
+        super().__init__(f"Unsupported MIME type for the attachment file: {mime_type}")
+class PromptWithEmptyAttachment(PromptError):
+    """
+    Raised when there is an empty file attached to the prompt.
+    """
+    def __init__(self) -> None:
+        super().__init__("Attachment must have either bytes data or URL provided")

ragbits/core/prompt/prompt.py CHANGED Viewed

@@ -1,24 +1,38 @@
 import asyncio
 import base64
-import imghdr
+import mimetypes
 import textwrap
+import warnings
 from abc import ABCMeta
 from collections.abc import Awaitable, Callable
 from typing import Any, Generic, cast, get_args, get_origin, overload
+import filetype
 from jinja2 import Environment, Template, meta
 from pydantic import BaseModel
 from typing_extensions import TypeVar, get_original_bases
-from ragbits.core.prompt.base import BasePromptWithParser, ChatFormat, OutputT
-from ragbits.core.prompt.exceptions import PromptWithImagesOfInvalidFormat
+from ragbits.core.prompt.base import BasePromptWithParser, ChatFormat, PromptOutputT
+from ragbits.core.prompt.exceptions import (
+    PromptWithAttachmentOfUnknownFormat,
+    PromptWithAttachmentOfUnsupportedFormat,
+    PromptWithEmptyAttachment,
+)
 from ragbits.core.prompt.parsers import DEFAULT_PARSERS, build_pydantic_parser
-InputT = TypeVar("InputT", bound=BaseModel | None)
-FewShotExample = tuple[str | InputT, str | OutputT]
+PromptInputT = TypeVar("PromptInputT", bound=BaseModel | None)
+FewShotExample = tuple[str | PromptInputT, str | PromptOutputT]
-class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=ABCMeta):
+class Attachment(BaseModel):
+    """Represents an attachment that can be passed to a LLM."""
+    url: str | None = None
+    data: bytes | None = None
+    mime_type: str | None = None
+class Prompt(Generic[PromptInputT, PromptOutputT], BasePromptWithParser[PromptOutputT], metaclass=ABCMeta):
     """
     Generic class for prompts. It contains the system and user prompts, and additional messages.
@@ -31,15 +45,15 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
     # Additional messages to be added to the conversation after the system prompt,
     # pairs of user message and assistant response
-    few_shots: list[FewShotExample[InputT, OutputT]] = []
+    few_shots: list[FewShotExample[PromptInputT, PromptOutputT]] = []
     # function that parses the response from the LLM to specific output type
     # if not provided, the class tries to set it automatically based on the output type
-    response_parser: Callable[[str], OutputT | Awaitable[OutputT]]
+    response_parser: Callable[[str], PromptOutputT | Awaitable[PromptOutputT]]
     # Automatically set in __init_subclass__
-    input_type: type[InputT] | None
-    output_type: type[OutputT]
+    input_type: type[PromptInputT] | None
+    output_type: type[PromptOutputT]
     system_prompt_template: Template | None
     user_prompt_template: Template
     image_input_fields: list[str] | None = None
@@ -72,7 +86,7 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         return Template(template)
     @classmethod
-    def _render_template(cls, template: Template, input_data: InputT | None) -> str:
+    def _render_template(cls, template: Template, input_data: PromptInputT | None) -> str:
         # Workaround for not being able to use `input is not None`
         # because of mypy issue: https://github.com/python/mypy/issues/12622
         context = {}
@@ -81,29 +95,39 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         return template.render(**context)
     @classmethod
-    def _get_images_from_input_data(cls, input_data: InputT | None | str) -> list[bytes | str]:
-        images: list[bytes | str] = []
+    def _get_attachments_from_input_data(cls, input_data: PromptInputT | None | str) -> list[Attachment]:
+        attachments: list[Attachment] = []
         if isinstance(input_data, BaseModel):
+            # to support backward compatibility with the old image_input_fields:
             image_input_fields = cls.image_input_fields or []
             for field in image_input_fields:
-                images_for_field = getattr(input_data, field)
-                if images_for_field:
-                    if isinstance(images_for_field, list | tuple):
-                        images.extend(images_for_field)
-                    else:
-                        images.append(images_for_field)
-        return images
+                if image_for_field := getattr(input_data, field):
+                    iter_image = [image_for_field] if isinstance(image_for_field, (str | bytes)) else image_for_field
+                    attachments.extend(
+                        [
+                            Attachment(url=image) if isinstance(image, str) else Attachment(data=image)
+                            for image in iter_image
+                        ]
+                    )
+            for value in input_data.__dict__.values():
+                if isinstance(value, Attachment):
+                    attachments.append(value)
+                elif isinstance(value, list):
+                    attachments.extend([item for item in value if isinstance(item, Attachment)])
+        return attachments
     @classmethod
     def _format_message(cls, message: str) -> str:
         return textwrap.dedent(message).strip()
     @classmethod
-    def _detect_response_parser(cls) -> Callable[[str], OutputT | Awaitable[OutputT]]:
+    def _detect_response_parser(cls) -> Callable[[str], PromptOutputT | Awaitable[PromptOutputT]]:
         if hasattr(cls, "response_parser") and cls.response_parser is not None:
             return cls.response_parser
         if issubclass(cls.output_type, BaseModel):
-            return cast(Callable[[str], OutputT], build_pydantic_parser(cls.output_type))
+            return cast(Callable[[str], PromptOutputT], build_pydantic_parser(cls.output_type))
         if cls.output_type in DEFAULT_PARSERS:
             return DEFAULT_PARSERS[cls.output_type]
         raise ValueError(f"Response parser not provided for output type {cls.output_type}")
@@ -123,28 +147,58 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         return super().__init_subclass__(**kwargs)
     @overload
-    def __init__(self: "Prompt[None, OutputT]") -> None: ...
+    def __init__(
+        self: "Prompt[None, PromptOutputT]", input_data: None = None, history: ChatFormat | None = None
+    ) -> None: ...
     @overload
-    def __init__(self: "Prompt[InputT, OutputT]", input_data: InputT) -> None: ...
+    def __init__(
+        self: "Prompt[PromptInputT, PromptOutputT]", input_data: PromptInputT, history: ChatFormat | None = None
+    ) -> None: ...
+    def __init__(self, input_data: PromptInputT | None = None, history: ChatFormat | None = None) -> None:
+        """
+        Initialize the Prompt instance.
-    def __init__(self, *args: Any, **kwargs: Any) -> None:
-        input_data = args[0] if args else kwargs.get("input_data")
+        Args:
+            input_data: The input data to render the prompt templates with. Must be a Pydantic model
+                instance if the prompt has an input type defined. If None and input_type is defined,
+                a ValueError will be raised.
+            history: Optional conversation history to initialize the prompt with. If provided,
+                should be in the standard OpenAI chat format.
+        Raises:
+            ValueError: If input_data is None when input_type is defined, or if input_data
+                is a string instead of a Pydantic model.
+        """
         if self.input_type and input_data is None:
             raise ValueError("Input data must be provided")
+        if isinstance(input_data, str):
+            raise ValueError("Input data must be of pydantic model type")
+        if self.image_input_fields:
+            warnings.warn(
+                message="The 'image_input_fields' attribute is deprecated. "
+                "Use 'Attachment' objects in the prompt input instead.",
+                category=UserWarning,
+                stacklevel=2,
+            )
         self.rendered_system_prompt = (
             self._render_template(self.system_prompt_template, input_data) if self.system_prompt_template else None
         )
-        self.rendered_user_prompt = self._render_template(self.user_prompt_template, input_data)
-        self.images = self._get_images_from_input_data(input_data)
+        self.attachments = self._get_attachments_from_input_data(input_data)
         # Additional few shot examples that can be added dynamically using methods
         # (in opposite to the static `few_shots` attribute which is defined in the class)
-        self._instance_few_shots: list[FewShotExample[InputT, OutputT]] = []
+        self._instance_few_shots: list[FewShotExample[PromptInputT, PromptOutputT]] = []
         # Additional conversation history that can be added dynamically using methods
-        self._conversation_history: list[dict[str, Any]] = []
+        self._conversation_history: list[dict[str, Any]] = history or []
+        self.add_user_message(input_data or self._render_template(self.user_prompt_template, input_data))
+        self.rendered_user_prompt = self.chat[-1]["content"]
         super().__init__()
     @property
@@ -155,12 +209,6 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         Returns:
             ChatFormat: A list of dictionaries, each containing the role and content of a message.
         """
-        user_content = (
-            [{"type": "text", "text": self.rendered_user_prompt}]
-            + [self._create_message_with_image(image) for image in self.images]
-            if self.images
-            else self.rendered_user_prompt
-        )
         chat = [
             *(
                 [{"role": "system", "content": self.rendered_system_prompt}]
@@ -168,23 +216,24 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
                 else []
             ),
             *self.list_few_shots(),
-            {"role": "user", "content": user_content},
             *self._conversation_history,
         ]
         return chat
-    def add_few_shot(self, user_message: str | InputT, assistant_message: str | OutputT) -> "Prompt[InputT, OutputT]":
+    def add_few_shot(
+        self, user_message: str | PromptInputT, assistant_message: str | PromptOutputT
+    ) -> "Prompt[PromptInputT, PromptOutputT]":
         """
         Add a few-shot example to the conversation.
         Args:
-            user_message (str | InputT): The raw user message or input data that will be rendered using the
+            user_message (str | PromptInputT): The raw user message or input data that will be rendered using the
                 user prompt template.
-            assistant_message (str | OutputT): The raw assistant response or output data that will be cast to a string
-                or in case of a Pydantic model, to JSON.
+            assistant_message (str | PromptOutputT): The raw assistant response or output data that will be cast to a
+                string or in case of a Pydantic model, to JSON.
         Returns:
-            Prompt[InputT, OutputT]: The current prompt instance in order to allow chaining.
+            Prompt[PromptInputT, PromptOutputT]: The current prompt instance in order to allow chaining.
         """
         self._instance_few_shots.append((user_message, assistant_message))
         return self
@@ -201,13 +250,14 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         for user_message, assistant_message in self.few_shots + self._instance_few_shots:
             if not isinstance(user_message, str):
                 rendered_text_message = self._render_template(self.user_prompt_template, user_message)
-                images_in_input_data = self._get_images_from_input_data(user_message)
-                if images_in_input_data:
-                    user_content = [{"type": "text", "text": rendered_text_message}] + [
-                        self._create_message_with_image(image) for image in images_in_input_data
-                    ]
-                else:
-                    user_content = rendered_text_message
+                input_attachments = self._get_attachments_from_input_data(user_message)
+                user_parts: list[dict[str, Any]] = [{"type": "text", "text": rendered_text_message}]
+                for attachment in input_attachments:
+                    user_parts.append(self.create_message_with_attachment(attachment))
+                user_content = user_parts if len(user_parts) > 1 else rendered_text_message
             else:
                 user_content = user_message
@@ -220,56 +270,38 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
             result.append({"role": "assistant", "content": assistant_content})
         return result
-    def add_user_message(self, message: str | dict[str, Any] | InputT) -> "Prompt[InputT, OutputT]":
+    def add_user_message(self, message: str | dict[str, Any] | PromptInputT) -> "Prompt[PromptInputT, PromptOutputT]":  # type: ignore
         """
         Add a user message to the conversation history.
         Args:
-            message (str | dict[str, Any] | InputT): The user message content. Can be:
+            message (str | dict[str, Any] | PromptInputT): The user message content. Can be:
                 - A string: Used directly as content
                 - A dictionary: With format {"type": "text", "text": "message"} or image content
-                - An InputT model: Will be rendered using the user prompt template
+                - An PromptInputT model: Will be rendered using the user prompt template
         Returns:
-            Prompt[InputT, OutputT]: The current prompt instance to allow chaining.
+            Prompt[PromptInputT, PromptOutputT]: The current prompt instance to allow chaining.
         """
-        content: str | list[dict[str, Any]] | dict[str, Any] | InputT
+        content: str | list[dict[str, Any]] | dict[str, Any]
         if isinstance(message, BaseModel):
-            # Type checking to ensure we're passing InputT to the methods
-            input_model: InputT = cast(InputT, message)
+            # Type checking to ensure we're passing PromptInputT to the methods
+            input_model: PromptInputT = cast(PromptInputT, message)
             # Render the message using the template if it's an input model
             rendered_text = self._render_template(self.user_prompt_template, input_model)
-            images_in_input = self._get_images_from_input_data(input_model)
+            input_attachments = self._get_attachments_from_input_data(input_model)
-            if images_in_input:
-                content = [{"type": "text", "text": rendered_text}] + [
-                    self._create_message_with_image(image) for image in images_in_input
-                ]
-            else:
-                content = rendered_text
-        else:
-            # Use the message directly if it's a string or dict
-            content = message
-        self._conversation_history.append({"role": "user", "content": content})
-        return self
-    def add_assistant_message(self, message: str | OutputT) -> "Prompt[InputT, OutputT]":
-        """
-        Add an assistant message to the conversation history.
+            content_list: list[dict[str, Any]] = [{"type": "text", "text": rendered_text}]
+            for attachment in input_attachments:
+                content_list.append(self.create_message_with_attachment(attachment))
-        Args:
-            message (str): The assistant message content.
+            content = content_list if len(content_list) > 1 else rendered_text
+        else:
+            content = cast(str | dict[str, Any], message)
-        Returns:
-            Prompt[InputT, OutputT]: The current prompt instance to allow chaining.
-        """
-        if isinstance(message, BaseModel):
-            message = message.model_dump_json()
-        self._conversation_history.append({"role": "assistant", "content": str(message)})
-        return self
+        return super().add_user_message(content)
     def list_images(self) -> list[str]:
         """
@@ -281,25 +313,75 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         return [
             content["image_url"]["url"]
             for message in self.chat
+            if message["content"]
             for content in message["content"]
             if isinstance(message["content"], list) and content["type"] == "image_url"
         ]
+    def list_pdfs(self) -> list[str]:  # noqa: PLR6301
+        """
+        Returns the PDFs in form of URLs or base64 encoded strings.
+        Returns:
+            list of PDFs
+        """
+        return [
+            content["file"].get("file_id") or content["file"]["file_data"]
+            for message in self.chat
+            if message["content"]
+            for content in message["content"]
+            if isinstance(message["content"], list) and content["type"] == "file"
+        ]
     @staticmethod
-    def _create_message_with_image(image: str | bytes) -> dict:
-        if isinstance(image, bytes):
-            image_type = imghdr.what(None, image)
-            if not image_type:
-                raise PromptWithImagesOfInvalidFormat()
-            image_url = f"data:image/{image_type};base64,{base64.b64encode(image).decode('utf-8')}"
-        else:
-            image_url = image
-        return {
-            "type": "image_url",
-            "image_url": {
-                "url": image_url,
-            },
-        }
+    def create_message_with_attachment(attachment: Attachment) -> dict[str, Any]:
+        """
+        Create a message with an attachment in the OpenAI chat format.
+        Args:
+            attachment (Attachment): The attachment to include in the message.
+        Returns:
+            dict[str, Any]: A dictionary representing the message with the attachment.
+        """
+        if not (attachment.data or attachment.url):
+            raise PromptWithEmptyAttachment()
+        def get_mime_type() -> str:
+            if attachment.mime_type:
+                return attachment.mime_type
+            if attachment.data:
+                detected = filetype.guess(attachment.data)
+                if detected:
+                    return detected.mime
+            if attachment.url:
+                guessed_type, _ = mimetypes.guess_type(attachment.url)
+                if guessed_type:
+                    return guessed_type
+            raise PromptWithAttachmentOfUnknownFormat()
+        def encode_data_url(data: bytes, mime: str) -> str:
+            return f"data:{mime};base64,{base64.b64encode(data).decode('utf-8')}"
+        mime_type = get_mime_type()
+        if mime_type.startswith("image/"):
+            return {
+                "type": "image_url",
+                "image_url": {
+                    "url": attachment.url or encode_data_url(attachment.data, mime_type)  # type: ignore[arg-type]
+                },
+            }
+        if mime_type == "application/pdf":
+            return {
+                "type": "file",
+                "file": {"file_id": attachment.url}
+                if attachment.url
+                else {"file_data": encode_data_url(attachment.data, mime_type)},  # type: ignore[arg-type]
+            }
+        raise PromptWithAttachmentOfUnsupportedFormat(mime_type)
     def output_schema(self) -> dict | type[BaseModel] | None:
         """
@@ -321,7 +403,7 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
         """
         return issubclass(self.output_type, BaseModel)
-    async def parse_response(self, response: str) -> OutputT:
+    async def parse_response(self, response: str) -> PromptOutputT:
         """
         Parse the response from the LLM to the desired output type.
@@ -329,7 +411,7 @@ class Prompt(Generic[InputT, OutputT], BasePromptWithParser[OutputT], metaclass=
             response (str): The response from the LLM.
         Returns:
-            OutputT: The parsed response.
+            PromptOutputT: The parsed response.
         Raises:
             ResponseParsingError: If the response cannot be parsed.

ragbits/core/sources/__init__.py CHANGED Viewed

@@ -6,11 +6,13 @@ from ragbits.core.sources.hf import HuggingFaceSource
 from ragbits.core.sources.local import LocalFileSource
 from ragbits.core.sources.s3 import S3Source
 from ragbits.core.sources.web import WebSource
+from ragbits.core.sources.google_drive import GoogleDriveSource
 __all__ = [
     "AzureBlobStorageSource",
     "GCSSource",
     "GitSource",
+    "GoogleDriveSource",
     "HuggingFaceSource",
     "LocalFileSource",
     "S3Source",

ragbits-core 0.16.0__py3-none-any.whl → 1.4.0.dev202512021005__py3-none-any.whl

ragbits-core 0.16.0py3-none-any.whl → 1.4.0.dev202512021005py3-none-any.whl