PyPI - camel-ai - Versions diffs - 0.1.6.2__py3-none-any.whl → 0.1.6.5__py3-none-any.whl - Mend

camel-ai 0.1.6.2py3-none-any.whl → 0.1.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

camel/__init__.py +1 -1
camel/configs/gemini_config.py +0 -1
camel/configs/groq_config.py +1 -1
camel/configs/mistral_config.py +14 -10
camel/embeddings/mistral_embedding.py +5 -5
camel/interpreters/docker_interpreter.py +1 -1
camel/loaders/__init__.py +1 -2
camel/loaders/base_io.py +118 -52
camel/loaders/jina_url_reader.py +6 -6
camel/loaders/unstructured_io.py +34 -295
camel/models/__init__.py +2 -0
camel/models/mistral_model.py +120 -26
camel/models/model_factory.py +3 -3
camel/models/openai_compatibility_model.py +105 -0
camel/retrievers/auto_retriever.py +40 -52
camel/retrievers/bm25_retriever.py +9 -6
camel/retrievers/vector_retriever.py +29 -20
camel/storages/object_storages/__init__.py +22 -0
camel/storages/object_storages/amazon_s3.py +205 -0
camel/storages/object_storages/azure_blob.py +166 -0
camel/storages/object_storages/base.py +115 -0
camel/storages/object_storages/google_cloud.py +152 -0
camel/toolkits/retrieval_toolkit.py +6 -6
camel/toolkits/search_toolkit.py +4 -4
camel/types/enums.py +7 -0
camel/utils/token_counting.py +7 -3
{camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/METADATA +9 -5
{camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/RECORD +29 -23
{camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/WHEEL +0 -0

camel/loaders/unstructured_io.py CHANGED Viewed

@@ -12,12 +12,19 @@
 # limitations under the License.
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
 import uuid
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+import warnings
+from typing import (
+    Any,
+    Dict,
+    List,
+    Literal,
+    Optional,
+    Tuple,
+    Union,
+)
 from unstructured.documents.elements import Element
-from camel.utils import dependencies_required
 class UnstructuredIO:
     r"""A class to handle various functionalities provided by the
@@ -25,56 +32,12 @@ class UnstructuredIO:
     extracting, staging, chunking data, and integrating with cloud
     services like S3 and Azure for data connection.
-    Attributes:
-        UNSTRUCTURED_MIN_VERSION (str): The minimum required version of
-            the Unstructured library.
+    References:
+        https://docs.unstructured.io/
     """
-    UNSTRUCTURED_MIN_VERSION = "0.10.30"  # Define the minimum version
-    def __init__(self):
-        r"""Initializes the UnstructuredIO class and ensures the
-        installed version of Unstructured library meets the minimum
-        requirements.
-        """
-        self._ensure_unstructured_version(self.UNSTRUCTURED_MIN_VERSION)
-    @dependencies_required('unstructured')
-    def _ensure_unstructured_version(self, min_version: str) -> None:
-        r"""Validates that the installed 'Unstructured' library version
-        satisfies the specified minimum version requirement. This function is
-        essential for ensuring compatibility with features that depend on a
-        certain version of the 'Unstructured' package.
-        Args:
-            min_version (str): The minimum version required, specified in
-                `'major.minor.patch'` format.
-        Raises:
-            ImportError: If the 'Unstructured' package is not available in the
-                environment.
-            ValueError: If the current `'Unstructured'` version is older than
-                the required minimum version.
-        Notes:
-            Uses the 'packaging.version' module to parse and compare version
-                strings.
-        """
-        from packaging import version
-        from unstructured.__version__ import __version__
-        # Use packaging.version to compare versions
-        min_ver = version.parse(min_version)
-        installed_ver = version.parse(__version__)
-        if installed_ver < min_ver:
-            raise ValueError(
-                f"Require `unstructured>={min_version}`, "
-                f"you have {__version__}."
-            )
+    @staticmethod
     def create_element_from_text(
-        self,
         text: str,
         element_id: Optional[Union[str, uuid.UUID]] = None,
         embeddings: Optional[List[float]] = None,
@@ -89,8 +52,8 @@ class UnstructuredIO:
         Args:
             text (str): The text content for the element.
-            element_id (Union[str, uuid.UUID], optional): Unique identifier
-                forthe element. Defaults to an empty string.
+            element_id (Optional[Union[str, uuid.UUID]], optional): Unique
+                identifier for the element. Defaults to `None`.
             embeddings (Optional[List[float]], optional): A list of float
                 numbers representing the text embeddings. Defaults to `None`.
             filename (Optional[str], optional): The name of the file the
@@ -120,16 +83,16 @@ class UnstructuredIO:
         return Text(
             text=text,
-            element_id=element_id if element_id else str(uuid.uuid4()),
+            element_id=element_id or uuid.uuid4(),
             metadata=metadata,
             embeddings=embeddings,
         )
+    @staticmethod
     def parse_file_or_url(
-        self,
         input_path: str,
         **kwargs: Any,
-    ) -> List[Element]:
+    ) -> Union[List[Element], None]:
         r"""Loads a file or a URL and parses its contents into elements.
         Args:
@@ -137,12 +100,12 @@ class UnstructuredIO:
             **kwargs: Extra kwargs passed to the partition function.
         Returns:
-            List[Element]: List of elements after parsing the file or URL.
+            Union[List[Element],None]: List of elements after parsing the file
+                or URL if success.
         Raises:
             FileNotFoundError: If the file does not exist at the path
                 specified.
-            Exception: For any other issues during file or URL parsing.
         Notes:
             Available document types:
@@ -166,8 +129,9 @@ class UnstructuredIO:
             try:
                 elements = partition_html(url=input_path, **kwargs)
                 return elements
-            except Exception as e:
-                raise Exception("Failed to parse the URL.") from e
+            except Exception:
+                warnings.warn(f"Failed to parse the URL: {input_path}")
+                return None
         else:
             # Handling file
@@ -184,13 +148,12 @@ class UnstructuredIO:
                 with open(input_path, "rb") as f:
                     elements = partition(file=f, **kwargs)
                     return elements
-            except Exception as e:
-                raise Exception(
-                    "Failed to parse the unstructured file."
-                ) from e
+            except Exception:
+                warnings.warn(f"Failed to partition the file: {input_path}")
+                return None
+    @staticmethod
     def clean_text_data(
-        self,
         text: str,
         clean_options: Optional[List[Tuple[str, Dict[str, Any]]]] = None,
     ) -> str:
@@ -253,7 +216,7 @@ class UnstructuredIO:
         )
         from unstructured.cleaners.translate import translate_text
-        cleaning_functions = {
+        cleaning_functions: Any = {
             "clean_extra_whitespace": clean_extra_whitespace,
             "clean_bullets": clean_bullets,
             "clean_ordered_bullets": clean_ordered_bullets,
@@ -291,8 +254,8 @@ class UnstructuredIO:
         return cleaned_text
+    @staticmethod
     def extract_data_from_text(
-        self,
         text: str,
         extract_type: Literal[
             'extract_datetimetz',
@@ -340,7 +303,7 @@ class UnstructuredIO:
             extract_us_phone_number,
         )
-        extraction_functions = {
+        extraction_functions: Any = {
             "extract_datetimetz": extract_datetimetz,
             "extract_email_address": extract_email_address,
             "extract_ip_address": extract_ip_address,
@@ -357,8 +320,8 @@ class UnstructuredIO:
         return extraction_functions[extract_type](text, **kwargs)
+    @staticmethod
     def stage_elements(
-        self,
         elements: List[Any],
         stage_type: Literal[
             'convert_to_csv',
@@ -416,7 +379,7 @@ class UnstructuredIO:
             weaviate,
         )
-        staging_functions = {
+        staging_functions: Any = {
             "convert_to_csv": base.convert_to_csv,
             "convert_to_dataframe": base.convert_to_dataframe,
             "convert_to_dict": base.convert_to_dict,
@@ -441,8 +404,9 @@ class UnstructuredIO:
         return staging_functions[stage_type](elements, **kwargs)
+    @staticmethod
     def chunk_elements(
-        self, elements: List[Any], chunk_type: str, **kwargs
+        elements: List[Any], chunk_type: str, **kwargs
     ) -> List[Element]:
         r"""Chunks elements by titles.
@@ -470,228 +434,3 @@ class UnstructuredIO:
         # Format chunks into a list of dictionaries (or your preferred format)
         return chunking_functions[chunk_type](elements, **kwargs)
-    def run_s3_ingest(
-        self,
-        s3_url: str,
-        output_dir: str,
-        num_processes: int = 2,
-        anonymous: bool = True,
-    ) -> None:
-        r"""Processes documents from an S3 bucket and stores structured
-        outputs locally.
-        Args:
-            s3_url (str): The URL of the S3 bucket.
-            output_dir (str): Local directory to store the processed outputs.
-            num_processes (int, optional): Number of processes to use.
-                (default: :obj:`2`)
-            anonymous (bool, optional): Flag to run anonymously if
-                required. (default: :obj:`True`)
-        Notes:
-            You need to install the necessary extras by using:
-            `pip install "unstructured[s3]"`.
-        References:
-            https://unstructured-io.github.io/unstructured/
-        """
-        from unstructured.ingest.interfaces import (
-            FsspecConfig,
-            PartitionConfig,
-            ProcessorConfig,
-            ReadConfig,
-        )
-        from unstructured.ingest.runner import S3Runner
-        runner = S3Runner(
-            processor_config=ProcessorConfig(
-                verbose=True,
-                output_dir=output_dir,
-                num_processes=num_processes,
-            ),
-            read_config=ReadConfig(),
-            partition_config=PartitionConfig(),
-            fsspec_config=FsspecConfig(remote_url=s3_url),
-        )
-        runner.run(anonymous=anonymous)
-    def run_azure_ingest(
-        self,
-        azure_url: str,
-        output_dir: str,
-        account_name: str,
-        num_processes: int = 2,
-    ) -> None:
-        r"""Processes documents from an Azure storage container and stores
-        structured outputs locally.
-        Args:
-            azure_url (str): The URL of the Azure storage container.
-            output_dir (str): Local directory to store the processed outputs.
-            account_name (str): Azure account name for accessing the container.
-            num_processes (int, optional): Number of processes to use.
-                (default: :obj:`2`)
-        Notes:
-            You need to install the necessary extras by using:
-            `pip install "unstructured[azure]"`.
-        References:
-            https://unstructured-io.github.io/unstructured/
-        """
-        from unstructured.ingest.interfaces import (
-            FsspecConfig,
-            PartitionConfig,
-            ProcessorConfig,
-            ReadConfig,
-        )
-        from unstructured.ingest.runner import AzureRunner
-        runner = AzureRunner(
-            processor_config=ProcessorConfig(
-                verbose=True,
-                output_dir=output_dir,
-                num_processes=num_processes,
-            ),
-            read_config=ReadConfig(),
-            partition_config=PartitionConfig(),
-            fsspec_config=FsspecConfig(remote_url=azure_url),
-        )
-        runner.run(account_name=account_name)
-    def run_github_ingest(
-        self,
-        repo_url: str,
-        git_branch: str,
-        output_dir: str,
-        num_processes: int = 2,
-    ) -> None:
-        r"""Processes documents from a GitHub repository and stores
-        structured outputs locally.
-        Args:
-            repo_url (str): URL of the GitHub repository.
-            git_branch (str): Git branch name to process.
-            output_dir (str): Local directory to store the processed outputs.
-            num_processes (int, optional): Number of processes to use.
-                (default: :obj:`2`)
-        Notes:
-            You need to install the necessary extras by using:
-            `pip install "unstructured[github]"`.
-        References:
-            https://unstructured-io.github.io/unstructured/
-        """
-        from unstructured.ingest.interfaces import (
-            PartitionConfig,
-            ProcessorConfig,
-            ReadConfig,
-        )
-        from unstructured.ingest.runner import GithubRunner
-        runner = GithubRunner(
-            processor_config=ProcessorConfig(
-                verbose=True,
-                output_dir=output_dir,
-                num_processes=num_processes,
-            ),
-            read_config=ReadConfig(),
-            partition_config=PartitionConfig(),
-        )
-        runner.run(url=repo_url, git_branch=git_branch)
-    def run_slack_ingest(
-        self,
-        channels: List[str],
-        token: str,
-        start_date: str,
-        end_date: str,
-        output_dir: str,
-        num_processes: int = 2,
-    ) -> None:
-        r"""Processes documents from specified Slack channels and stores
-        structured outputs locally.
-        Args:
-            channels (List[str]): List of Slack channel IDs.
-            token (str): Slack API token.
-            start_date (str): Start date for fetching data.
-            end_date (str): End date for fetching data.
-            output_dir (str): Local directory to store the processed outputs.
-            num_processes (int, optional): Number of processes to use.
-                (default: :obj:`2`)
-        Notes:
-            You need to install the necessary extras by using:
-            `pip install "unstructured[slack]"`.
-        References:
-            https://unstructured-io.github.io/unstructured/
-        """
-        from unstructured.ingest.interfaces import (
-            PartitionConfig,
-            ProcessorConfig,
-            ReadConfig,
-        )
-        from unstructured.ingest.runner import SlackRunner
-        runner = SlackRunner(
-            processor_config=ProcessorConfig(
-                verbose=True,
-                output_dir=output_dir,
-                num_processes=num_processes,
-            ),
-            read_config=ReadConfig(),
-            partition_config=PartitionConfig(),
-        )
-        runner.run(
-            channels=channels,
-            token=token,
-            start_date=start_date,
-            end_date=end_date,
-        )
-    def run_discord_ingest(
-        self,
-        channels: List[str],
-        token: str,
-        output_dir: str,
-        num_processes: int = 2,
-    ) -> None:
-        r"""Processes messages from specified Discord channels and stores
-        structured outputs locally.
-        Args:
-            channels (List[str]): List of Discord channel IDs.
-            token (str): Discord bot token.
-            output_dir (str): Local directory to store the processed outputs.
-            num_processes (int, optional): Number of processes to use.
-                (default: :obj:`2`)
-        Notes:
-            You need to install the necessary extras by using:
-            `pip install "unstructured[discord]"`.
-        References:
-            https://unstructured-io.github.io/unstructured/
-        """
-        from unstructured.ingest.interfaces import (
-            PartitionConfig,
-            ProcessorConfig,
-            ReadConfig,
-        )
-        from unstructured.ingest.runner import DiscordRunner
-        runner = DiscordRunner(
-            processor_config=ProcessorConfig(
-                verbose=True,
-                output_dir=output_dir,
-                num_processes=num_processes,
-            ),
-            read_config=ReadConfig(),
-            partition_config=PartitionConfig(),
-        )
-        runner.run(channels=channels, token=token)

camel/models/__init__.py CHANGED Viewed

@@ -23,6 +23,7 @@ from .nemotron_model import NemotronModel
 from .ollama_model import OllamaModel
 from .open_source_model import OpenSourceModel
 from .openai_audio_models import OpenAIAudioModels
+from .openai_compatibility_model import OpenAICompatibilityModel
 from .openai_model import OpenAIModel
 from .stub_model import StubModel
 from .vllm_model import VLLMModel
@@ -45,4 +46,5 @@ __all__ = [
     'OllamaModel',
     'VLLMModel',
     'GeminiModel',
+    'OpenAICompatibilityModel',
 ]

camel/models/mistral_model.py CHANGED Viewed

@@ -15,7 +15,10 @@ import os
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 if TYPE_CHECKING:
-    from mistralai.models.chat_completion import ChatCompletionResponse
+    from mistralai.models import (
+        ChatCompletionResponse,
+        Messages,
+    )
 from camel.configs import MISTRAL_API_PARAMS
 from camel.messages import OpenAIMessage
@@ -23,16 +26,24 @@ from camel.models import BaseModelBackend
 from camel.types import ChatCompletion, ModelType
 from camel.utils import (
     BaseTokenCounter,
-    MistralTokenCounter,
+    OpenAITokenCounter,
     api_keys_required,
 )
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import LLMEvent, record
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    LLMEvent = None
 class MistralModel(BaseModelBackend):
     r"""Mistral API in a unified BaseModelBackend interface."""
-    # TODO: Support tool calling.
     def __init__(
         self,
         model_type: ModelType,
@@ -52,32 +63,37 @@ class MistralModel(BaseModelBackend):
                 mistral service. (default: :obj:`None`)
             url (Optional[str]): The url to the mistral service.
             token_counter (Optional[BaseTokenCounter]): Token counter to use
-                for the model. If not provided, `MistralTokenCounter` will be
+                for the model. If not provided, `OpenAITokenCounter` will be
                 used.
         """
         super().__init__(
             model_type, model_config_dict, api_key, url, token_counter
         )
         self._api_key = api_key or os.environ.get("MISTRAL_API_KEY")
+        self._url = url or os.environ.get("MISTRAL_SERVER_URL")
-        from mistralai.client import MistralClient
+        from mistralai import Mistral
-        self._client = MistralClient(api_key=self._api_key)
+        self._client = Mistral(api_key=self._api_key, server_url=self._url)
         self._token_counter: Optional[BaseTokenCounter] = None
-    def _convert_response_from_mistral_to_openai(
+    def _to_openai_response(
         self, response: 'ChatCompletionResponse'
     ) -> ChatCompletion:
         tool_calls = None
-        if response.choices[0].message.tool_calls is not None:
+        if (
+            response.choices
+            and response.choices[0].message
+            and response.choices[0].message.tool_calls is not None
+        ):
             tool_calls = [
                 dict(
-                    id=tool_call.id,
+                    id=tool_call.id,  # type: ignore[union-attr]
                     function={
-                        "name": tool_call.function.name,
-                        "arguments": tool_call.function.arguments,
+                        "name": tool_call.function.name,  # type: ignore[union-attr]
+                        "arguments": tool_call.function.arguments,  # type: ignore[union-attr]
                     },
-                    type=tool_call.type.value,
+                    type=tool_call.TYPE,  # type: ignore[union-attr]
                 )
                 for tool_call in response.choices[0].message.tool_calls
             ]
@@ -86,14 +102,14 @@ class MistralModel(BaseModelBackend):
             id=response.id,
             choices=[
                 dict(
-                    index=response.choices[0].index,
+                    index=response.choices[0].index,  # type: ignore[index]
                     message={
-                        "role": response.choices[0].message.role,
-                        "content": response.choices[0].message.content,
+                        "role": response.choices[0].message.role,  # type: ignore[index,union-attr]
+                        "content": response.choices[0].message.content,  # type: ignore[index,union-attr]
                         "tool_calls": tool_calls,
                     },
-                    finish_reason=response.choices[0].finish_reason.value
-                    if response.choices[0].finish_reason
+                    finish_reason=response.choices[0].finish_reason  # type: ignore[index]
+                    if response.choices[0].finish_reason  # type: ignore[index]
                     else None,
                 )
             ],
@@ -105,17 +121,79 @@ class MistralModel(BaseModelBackend):
         return obj
+    def _to_mistral_chatmessage(
+        self,
+        messages: List[OpenAIMessage],
+    ) -> List["Messages"]:
+        import uuid
+        from mistralai.models import (
+            AssistantMessage,
+            FunctionCall,
+            SystemMessage,
+            ToolCall,
+            ToolMessage,
+            UserMessage,
+        )
+        new_messages = []
+        for msg in messages:
+            tool_id = uuid.uuid4().hex[:9]
+            tool_call_id = uuid.uuid4().hex[:9]
+            role = msg.get("role")
+            function_call = msg.get("function_call")
+            content = msg.get("content")
+            mistral_function_call = None
+            if function_call:
+                mistral_function_call = FunctionCall(
+                    name=function_call.get("name"),  # type: ignore[attr-defined]
+                    arguments=function_call.get("arguments"),  # type: ignore[attr-defined]
+                )
+            tool_calls = None
+            if mistral_function_call:
+                tool_calls = [
+                    ToolCall(function=mistral_function_call, id=tool_id)
+                ]
+            if role == "user":
+                new_messages.append(UserMessage(content=content))  # type: ignore[arg-type]
+            elif role == "assistant":
+                new_messages.append(
+                    AssistantMessage(content=content, tool_calls=tool_calls)  # type: ignore[arg-type]
+                )
+            elif role == "system":
+                new_messages.append(SystemMessage(content=content))  # type: ignore[arg-type]
+            elif role in {"tool", "function"}:
+                new_messages.append(
+                    ToolMessage(
+                        content=content,  # type: ignore[arg-type]
+                        tool_call_id=tool_call_id,
+                        name=msg.get("name"),  # type: ignore[arg-type]
+                    )
+                )
+            else:
+                raise ValueError(f"Unsupported message role: {role}")
+        return new_messages  # type: ignore[return-value]
     @property
     def token_counter(self) -> BaseTokenCounter:
         r"""Initialize the token counter for the model backend.
+        # NOTE: Temporarily using `OpenAITokenCounter` due to a current issue
+        # with installing `mistral-common` alongside `mistralai`.
+        # Refer to: https://github.com/mistralai/mistral-common/issues/37
         Returns:
             BaseTokenCounter: The token counter following the model's
                 tokenization style.
         """
         if not self._token_counter:
-            self._token_counter = MistralTokenCounter(
-                model_type=self.model_type
+            self._token_counter = OpenAITokenCounter(
+                model=ModelType.GPT_4O_MINI
             )
         return self._token_counter
@@ -131,17 +209,33 @@ class MistralModel(BaseModelBackend):
                 in OpenAI API format.
         Returns:
-            ChatCompletion
+            ChatCompletion.
         """
-        response = self._client.chat(
-            messages=messages,
+        mistral_messages = self._to_mistral_chatmessage(messages)
+        response = self._client.chat.complete(
+            messages=mistral_messages,
             model=self.model_type.value,
             **self.model_config_dict,
         )
-        response = self._convert_response_from_mistral_to_openai(response)  # type:ignore[assignment]
+        openai_response = self._to_openai_response(response)  # type: ignore[arg-type]
+        # Add AgentOps LLM Event tracking
+        if LLMEvent:
+            llm_event = LLMEvent(
+                thread_id=openai_response.id,
+                prompt=" ".join(
+                    [message.get("content") for message in messages]  # type: ignore[misc]
+                ),
+                prompt_tokens=openai_response.usage.prompt_tokens,  # type: ignore[union-attr]
+                completion=openai_response.choices[0].message.content,
+                completion_tokens=openai_response.usage.completion_tokens,  # type: ignore[union-attr]
+                model=self.model_type.value,
+            )
+            record(llm_event)
-        return response  # type:ignore[return-value]
+        return openai_response
     def check_model_config(self):
         r"""Check whether the model configuration contains any
@@ -161,7 +255,7 @@ class MistralModel(BaseModelBackend):
     @property
     def stream(self) -> bool:
         r"""Returns whether the model is in stream mode, which sends partial
-        results each time. Mistral doesn't support stream mode.
+        results each time. Current it's not supported.
         Returns:
             bool: Whether the model is in stream mode.

camel-ai 0.1.6.2__py3-none-any.whl → 0.1.6.5__py3-none-any.whl

camel-ai 0.1.6.2py3-none-any.whl → 0.1.6.5py3-none-any.whl