PyPI - camel-ai - Versions diffs - 0.1.6.2__py3-none-any.whl → 0.1.6.5__py3-none-any.whl - Mend

camel-ai 0.1.6.2py3-none-any.whl → 0.1.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

camel/__init__.py +1 -1
camel/configs/gemini_config.py +0 -1
camel/configs/groq_config.py +1 -1
camel/configs/mistral_config.py +14 -10
camel/embeddings/mistral_embedding.py +5 -5
camel/interpreters/docker_interpreter.py +1 -1
camel/loaders/__init__.py +1 -2
camel/loaders/base_io.py +118 -52
camel/loaders/jina_url_reader.py +6 -6
camel/loaders/unstructured_io.py +34 -295
camel/models/__init__.py +2 -0
camel/models/mistral_model.py +120 -26
camel/models/model_factory.py +3 -3
camel/models/openai_compatibility_model.py +105 -0
camel/retrievers/auto_retriever.py +40 -52
camel/retrievers/bm25_retriever.py +9 -6
camel/retrievers/vector_retriever.py +29 -20
camel/storages/object_storages/__init__.py +22 -0
camel/storages/object_storages/amazon_s3.py +205 -0
camel/storages/object_storages/azure_blob.py +166 -0
camel/storages/object_storages/base.py +115 -0
camel/storages/object_storages/google_cloud.py +152 -0
camel/toolkits/retrieval_toolkit.py +6 -6
camel/toolkits/search_toolkit.py +4 -4
camel/types/enums.py +7 -0
camel/utils/token_counting.py +7 -3
{camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/METADATA +9 -5
{camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/RECORD +29 -23
{camel_ai-0.1.6.2.dist-info → camel_ai-0.1.6.5.dist-info}/WHEEL +0 -0

camel/models/model_factory.py CHANGED Viewed

@@ -22,6 +22,7 @@ from camel.models.litellm_model import LiteLLMModel
 from camel.models.mistral_model import MistralModel
 from camel.models.ollama_model import OllamaModel
 from camel.models.open_source_model import OpenSourceModel
+from camel.models.openai_compatibility_model import OpenAICompatibilityModel
 from camel.models.openai_model import OpenAIModel
 from camel.models.stub_model import StubModel
 from camel.models.vllm_model import VLLMModel
@@ -105,11 +106,10 @@ class ModelFactory:
                 )
             elif model_platform.is_vllm:
                 model_class = VLLMModel
-                return model_class(
-                    model_type, model_config_dict, url, api_key, token_counter
-                )
             elif model_platform.is_litellm:
                 model_class = LiteLLMModel
+            elif model_platform.is_openai_compatibility_model:
+                model_class = OpenAICompatibilityModel
             else:
                 raise ValueError(
                     f"Unknown pair of model platform `{model_platform}` "

camel/models/openai_compatibility_model.py ADDED Viewed

@@ -0,0 +1,105 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from typing import Any, Dict, List, Optional, Union
+from openai import OpenAI, Stream
+from camel.messages import OpenAIMessage
+from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
+from camel.utils import (
+    BaseTokenCounter,
+    OpenAITokenCounter,
+)
+class OpenAICompatibilityModel:
+    r"""Constructor for model backend supporting OpenAI compatibility."""
+    def __init__(
+        self,
+        model_type: str,
+        model_config_dict: Dict[str, Any],
+        api_key: str,
+        url: str,
+        token_counter: Optional[BaseTokenCounter] = None,
+    ) -> None:
+        r"""Constructor for model backend.
+        Args:
+            model_type (ModelType): Model for which a backend is created.
+            model_config_dict (Dict[str, Any]): A dictionary that will
+                be fed into openai.ChatCompletion.create().
+            api_key (str): The API key for authenticating with the
+                model service. (default: :obj:`None`)
+            url (str): The url to the model service. (default:
+                :obj:`None`)
+            token_counter (Optional[BaseTokenCounter]): Token counter to use
+                for the model. If not provided, `OpenAITokenCounter(ModelType.
+                GPT_3_5_TURBO)` will be used.
+        """
+        self.model_type = model_type
+        self.model_config_dict = model_config_dict
+        self._token_counter = token_counter
+        self._client = OpenAI(
+            timeout=60,
+            max_retries=3,
+            api_key=api_key,
+            base_url=url,
+        )
+    def run(
+        self,
+        messages: List[OpenAIMessage],
+    ) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
+        r"""Runs inference of OpenAI chat completion.
+        Args:
+            messages (List[OpenAIMessage]): Message list with the chat history
+                in OpenAI API format.
+        Returns:
+            Union[ChatCompletion, Stream[ChatCompletionChunk]]:
+                `ChatCompletion` in the non-stream mode, or
+                `Stream[ChatCompletionChunk]` in the stream mode.
+        """
+        response = self._client.chat.completions.create(
+            messages=messages,
+            model=self.model_type,
+            **self.model_config_dict,
+        )
+        return response
+    @property
+    def token_counter(self) -> BaseTokenCounter:
+        r"""Initialize the token counter for the model backend.
+        Returns:
+            OpenAITokenCounter: The token counter following the model's
+                tokenization style.
+        """
+        if not self._token_counter:
+            self._token_counter = OpenAITokenCounter(ModelType.GPT_3_5_TURBO)
+        return self._token_counter
+    @property
+    def stream(self) -> bool:
+        r"""Returns whether the model is in stream mode, which sends partial
+        results each time.
+        Returns:
+            bool: Whether the model is in stream mode.
+        """
+        return self.model_config_dict.get('stream', False)

camel/retrievers/auto_retriever.py CHANGED Viewed

@@ -15,7 +15,7 @@ import datetime
 import os
 import re
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import Collection, List, Optional, Sequence, Tuple, Union
 from urllib.parse import urlparse
 from camel.embeddings import BaseEmbedding, OpenAIEmbedding
@@ -97,36 +97,36 @@ class AutoRetriever:
             f"Unsupported vector storage type: {self.storage_type}"
         )
-    def _collection_name_generator(self, content_input_path: str) -> str:
+    def _collection_name_generator(self, content: str) -> str:
         r"""Generates a valid collection name from a given file path or URL.
         Args:
-            content_input_path: str. The input URL or file path from which to
-                generate the collection name.
+            contents (str): Local file path, remote URL or string content.
         Returns:
             str: A sanitized, valid collection name suitable for use.
         """
-        # Check path type
-        parsed_url = urlparse(content_input_path)
-        self.is_url = all([parsed_url.scheme, parsed_url.netloc])
+        # Check if the content is URL
+        parsed_url = urlparse(content)
+        is_url = all([parsed_url.scheme, parsed_url.netloc])
         # Convert given path into a collection name, ensuring it only
         # contains numbers, letters, and underscores
-        if self.is_url:
+        if is_url:
             # For URLs, remove https://, replace /, and any characters not
             # allowed by Milvus with _
             collection_name = re.sub(
                 r'[^0-9a-zA-Z]+',
                 '_',
-                content_input_path.replace("https://", ""),
+                content.replace("https://", ""),
             )
-        else:
+        elif os.path.exists(content):
             # For file paths, get the stem and replace spaces with _, also
             # ensuring only allowed characters are present
-            collection_name = re.sub(
-                r'[^0-9a-zA-Z]+', '_', Path(content_input_path).stem
-            )
+            collection_name = re.sub(r'[^0-9a-zA-Z]+', '_', Path(content).stem)
+        else:
+            # the content is string input
+            collection_name = content[:10]
         # Ensure the collection name does not start or end with underscore
         collection_name = collection_name.strip("_")
@@ -193,18 +193,18 @@ class AutoRetriever:
     def run_vector_retriever(
         self,
         query: str,
-        content_input_paths: Union[str, List[str]],
+        contents: Union[str, List[str]],
         top_k: int = DEFAULT_TOP_K_RESULTS,
         similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
         return_detailed_info: bool = False,
-    ) -> str:
+    ) -> dict[str, Sequence[Collection[str]]]:
         r"""Executes the automatic vector retriever process using vector
         storage.
         Args:
             query (str): Query string for information retriever.
-            content_input_paths (Union[str, List[str]]): Paths to local
-                files or remote URLs.
+            contents (Union[str, List[str]]): Local file paths, remote URLs or
+                string contents.
             top_k (int, optional): The number of top results to return during
                 retrieve. Must be a positive integer. Defaults to
                 `DEFAULT_TOP_K_RESULTS`.
@@ -216,31 +216,26 @@ class AutoRetriever:
                 metadata. Defaults to `False`.
         Returns:
-            string: By default, returns only the text information. If
-                `return_detailed_info` is `True`, return detailed information
-                including similarity score, content path and metadata.
+            dict[str, Sequence[Collection[str]]]: By default, returns
+                only the text information. If `return_detailed_info` is
+                `True`, return detailed information including similarity
+                score, content path and metadata.
         Raises:
             ValueError: If there's an vector storage existing with content
                 name in the vector path but the payload is None. If
-                `content_input_paths` is empty.
+                `contents` is empty.
             RuntimeError: If any errors occur during the retrieve process.
         """
-        if not content_input_paths:
-            raise ValueError("content_input_paths cannot be empty.")
+        if not contents:
+            raise ValueError("content cannot be empty.")
-        content_input_paths = (
-            [content_input_paths]
-            if isinstance(content_input_paths, str)
-            else content_input_paths
-        )
+        contents = [contents] if isinstance(contents, str) else contents
         all_retrieved_info = []
-        for content_input_path in content_input_paths:
+        for content in contents:
             # Generate a valid collection name
-            collection_name = self._collection_name_generator(
-                content_input_path
-            )
+            collection_name = self._collection_name_generator(content)
             try:
                 vector_storage_instance = self._initialize_vector_storage(
                     collection_name
@@ -251,13 +246,11 @@ class AutoRetriever:
                 file_is_modified = False  # initialize with a default value
                 if (
                     vector_storage_instance.status().vector_count != 0
-                    and not self.is_url
+                    and os.path.exists(content)
                 ):
                     # Get original modified date from file
                     modified_date_from_file = (
-                        self._get_file_modified_date_from_file(
-                            content_input_path
-                        )
+                        self._get_file_modified_date_from_file(content)
                     )
                     # Get modified date from vector storage
                     modified_date_from_storage = (
@@ -280,18 +273,16 @@ class AutoRetriever:
                     # Process and store the content to the vector storage
                     vr = VectorRetriever(
                         storage=vector_storage_instance,
-                        similarity_threshold=similarity_threshold,
                         embedding_model=self.embedding_model,
                     )
-                    vr.process(content_input_path)
+                    vr.process(content)
                 else:
                     vr = VectorRetriever(
                         storage=vector_storage_instance,
-                        similarity_threshold=similarity_threshold,
                         embedding_model=self.embedding_model,
                     )
                 # Retrieve info by given query from the vector storage
-                retrieved_info = vr.query(query, top_k)
+                retrieved_info = vr.query(query, top_k, similarity_threshold)
                 all_retrieved_info.extend(retrieved_info)
             except Exception as e:
                 raise RuntimeError(
@@ -318,20 +309,17 @@ class AutoRetriever:
         # Select the 'top_k' results
         all_retrieved_info = all_retrieved_info_sorted[:top_k]
-        retrieved_infos = "\n".join(str(info) for info in all_retrieved_info)
-        retrieved_infos_text = "\n".join(
-            info['text'] for info in all_retrieved_info if 'text' in info
-        )
+        text_retrieved_info = [item['text'] for item in all_retrieved_info]
-        detailed_info = (
-            f"Original Query:\n{{ {query} }}\n"
-            f"Retrieved Context:\n{retrieved_infos}"
-        )
+        detailed_info = {
+            "Original Query": query,
+            "Retrieved Context": all_retrieved_info,
+        }
-        text_info = (
-            f"Original Query:\n{{ {query} }}\n"
-            f"Retrieved Context:\n{retrieved_infos_text}"
-        )
+        text_info = {
+            "Original Query": query,
+            "Retrieved Context": text_retrieved_info,
+        }
         if return_detailed_info:
             return detailed_info

camel/retrievers/bm25_retriever.py CHANGED Viewed

@@ -74,13 +74,16 @@ class BM25Retriever(BaseRetriever):
         elements = self.unstructured_modules.parse_file_or_url(
             content_input_path, **kwargs
         )
-        self.chunks = self.unstructured_modules.chunk_elements(
-            chunk_type=chunk_type, elements=elements
-        )
+        if elements:
+            self.chunks = self.unstructured_modules.chunk_elements(
+                chunk_type=chunk_type, elements=elements
+            )
-        # Convert chunks to a list of strings for tokenization
-        tokenized_corpus = [str(chunk).split(" ") for chunk in self.chunks]
-        self.bm25 = BM25Okapi(tokenized_corpus)
+            # Convert chunks to a list of strings for tokenization
+            tokenized_corpus = [str(chunk).split(" ") for chunk in self.chunks]
+            self.bm25 = BM25Okapi(tokenized_corpus)
+        else:
+            self.bm25 = None
     def query(
         self,

camel/retrievers/vector_retriever.py CHANGED Viewed

@@ -11,7 +11,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+import os
+import warnings
 from typing import Any, Dict, List, Optional
+from urllib.parse import urlparse
 from camel.embeddings import BaseEmbedding, OpenAIEmbedding
 from camel.loaders import UnstructuredIO
@@ -38,24 +41,18 @@ class VectorRetriever(BaseRetriever):
         embedding_model (BaseEmbedding): Embedding model used to generate
             vector embeddings.
         storage (BaseVectorStorage): Vector storage to query.
-        similarity_threshold (float, optional): The similarity threshold
-            for filtering results. Defaults to `DEFAULT_SIMILARITY_THRESHOLD`.
         unstructured_modules (UnstructuredIO): A module for parsing files and
             URLs and chunking content based on specified parameters.
     """
     def __init__(
         self,
-        similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
         embedding_model: Optional[BaseEmbedding] = None,
         storage: Optional[BaseVectorStorage] = None,
     ) -> None:
         r"""Initializes the retriever class with an optional embedding model.
         Args:
-            similarity_threshold (float, optional): The similarity threshold
-                for filtering results. Defaults to
-                `DEFAULT_SIMILARITY_THRESHOLD`.
             embedding_model (Optional[BaseEmbedding]): The embedding model
                 instance. Defaults to `OpenAIEmbedding` if not provided.
             storage (BaseVectorStorage): Vector storage to query.
@@ -68,12 +65,11 @@ class VectorRetriever(BaseRetriever):
                 vector_dim=self.embedding_model.get_output_dim()
             )
         )
-        self.similarity_threshold = similarity_threshold
-        self.unstructured_modules: UnstructuredIO = UnstructuredIO()
+        self.uio: UnstructuredIO = UnstructuredIO()
     def process(
         self,
-        content_input_path: str,
+        content: str,
         chunk_type: str = "chunk_by_title",
         **kwargs: Any,
     ) -> None:
@@ -82,18 +78,27 @@ class VectorRetriever(BaseRetriever):
         vector storage.
         Args:
-            content_input_path (str): File path or URL of the content to be
-                processed.
+            contents (str): Local file path, remote URL or string content.
             chunk_type (str): Type of chunking going to apply. Defaults to
                 "chunk_by_title".
             **kwargs (Any): Additional keyword arguments for content parsing.
         """
-        elements = self.unstructured_modules.parse_file_or_url(
-            content_input_path, **kwargs
-        )
-        chunks = self.unstructured_modules.chunk_elements(
-            chunk_type=chunk_type, elements=elements
-        )
+        # Check if the content is URL
+        parsed_url = urlparse(content)
+        is_url = all([parsed_url.scheme, parsed_url.netloc])
+        if is_url or os.path.exists(content):
+            elements = self.uio.parse_file_or_url(content, **kwargs)
+        else:
+            elements = [self.uio.create_element_from_text(text=content)]
+        if elements:
+            chunks = self.uio.chunk_elements(
+                chunk_type=chunk_type, elements=elements
+            )
+        if not elements:
+            warnings.warn(
+                f"No elements were extracted from the content: {content}"
+            )
+            return
         # Iterate to process and store embeddings, set batch of 50
         for i in range(0, len(chunks), 50):
             batch_chunks = chunks[i : i + 50]
@@ -105,7 +110,7 @@ class VectorRetriever(BaseRetriever):
             # Prepare the payload for each vector record, includes the content
             # path, chunk metadata, and chunk text
             for vector, chunk in zip(batch_vectors, batch_chunks):
-                content_path_info = {"content path": content_input_path}
+                content_path_info = {"content path": content}
                 chunk_metadata = {"metadata": chunk.metadata.to_dict()}
                 chunk_text = {"text": str(chunk)}
                 combined_dict = {
@@ -124,12 +129,16 @@ class VectorRetriever(BaseRetriever):
         self,
         query: str,
         top_k: int = DEFAULT_TOP_K_RESULTS,
+        similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
     ) -> List[Dict[str, Any]]:
         r"""Executes a query in vector storage and compiles the retrieved
         results into a dictionary.
         Args:
             query (str): Query string for information retriever.
+            similarity_threshold (float, optional): The similarity threshold
+                for filtering results. Defaults to
+                `DEFAULT_SIMILARITY_THRESHOLD`.
             top_k (int, optional): The number of top results to return during
                 retriever. Must be a positive integer. Defaults to 1.
@@ -161,7 +170,7 @@ class VectorRetriever(BaseRetriever):
         formatted_results = []
         for result in query_results:
             if (
-                result.similarity >= self.similarity_threshold
+                result.similarity >= similarity_threshold
                 and result.record.payload is not None
             ):
                 result_dict = {
@@ -182,7 +191,7 @@ class VectorRetriever(BaseRetriever):
                     'text': (
                         f"No suitable information retrieved "
                         f"from {content_path} with similarity_threshold"
-                        f" = {self.similarity_threshold}."
+                        f" = {similarity_threshold}."
                     )
                 }
             ]

camel/storages/object_storages/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from .amazon_s3 import AmazonS3Storage
+from .azure_blob import AzureBlobStorage
+from .google_cloud import GoogleCloudStorage
+__all__ = [
+    "AmazonS3Storage",
+    "AzureBlobStorage",
+    "GoogleCloudStorage",
+]

camel-ai 0.1.6.2__py3-none-any.whl → 0.1.6.5__py3-none-any.whl

camel-ai 0.1.6.2py3-none-any.whl → 0.1.6.5py3-none-any.whl