PyPI - camel-ai - Versions diffs - 0.1.6.8__py3-none-any.whl → 0.1.7.0__py3-none-any.whl - Mend

camel-ai 0.1.6.8py3-none-any.whl → 0.1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (20) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +5 -2
camel/configs/__init__.py +10 -3
camel/configs/samba_config.py +67 -8
camel/embeddings/openai_embedding.py +13 -6
camel/loaders/firecrawl_reader.py +24 -0
camel/models/model_factory.py +2 -2
camel/models/ollama_model.py +26 -4
camel/models/samba_model.py +257 -97
camel/models/vllm_model.py +24 -2
camel/retrievers/auto_retriever.py +7 -6
camel/retrievers/vector_retriever.py +11 -7
camel/toolkits/__init__.py +3 -0
camel/toolkits/reddit_toolkit.py +229 -0
camel/toolkits/retrieval_toolkit.py +27 -11
camel/types/enums.py +1 -17
camel/utils/constants.py +8 -2
{camel_ai-0.1.6.8.dist-info → camel_ai-0.1.7.0.dist-info}/METADATA +5 -3
{camel_ai-0.1.6.8.dist-info → camel_ai-0.1.7.0.dist-info}/RECORD +20 -19
{camel_ai-0.1.6.8.dist-info → camel_ai-0.1.7.0.dist-info}/WHEEL +0 -0

camel/models/samba_model.py CHANGED Viewed

@@ -11,14 +11,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+import json
 import os
+import time
+import uuid
 from typing import Any, Dict, List, Optional, Union
+import httpx
 from openai import OpenAI, Stream
-from camel.configs import SAMBA_API_PARAMS
+from camel.configs import SAMBA_FAST_API_PARAMS, SAMBA_VERSE_API_PARAMS
 from camel.messages import OpenAIMessage
-from camel.types import ChatCompletion, ChatCompletionChunk, ModelType
+from camel.types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    CompletionUsage,
+    ModelType,
+)
 from camel.utils import (
     BaseTokenCounter,
     OpenAITokenCounter,
@@ -31,7 +40,7 @@ class SambaModel:
     def __init__(
         self,
-        model_type: ModelType,
+        model_type: str,
         model_config_dict: Dict[str, Any],
         api_key: Optional[str] = None,
         url: Optional[str] = None,
@@ -40,21 +49,29 @@ class SambaModel:
         r"""Constructor for SambaNova backend.
         Args:
-            model_type (ModelType): Model for which a SambaNova backend is
-                created.
+            model_type (str): Model for which a SambaNova backend is
+                created. Supported models via Fast API: `https://sambanova.ai/
+                fast-api?api_ref=128521`. Supported models via SambaVerse API
+                is listed in `https://sambaverse.sambanova.ai/models`.
             model_config_dict (Dict[str, Any]): A dictionary that will
                 be fed into API request.
             api_key (Optional[str]): The API key for authenticating with the
                 SambaNova service. (default: :obj:`None`)
-            url (Optional[str]): The url to the SambaNova service. (default:
-                :obj:`"https://fast-api.snova.ai/v1/chat/completions"`)
+            url (Optional[str]): The url to the SambaNova service. Current
+                support SambaNova Fast API: :obj:`"https://fast-api.snova.ai/
+                v1/chat/ completions"` and SambaVerse API: :obj:`"https://
+                sambaverse.sambanova.ai/api/predict"`. (default::obj:`"https://
+                fast-api.snova.ai/v1/chat/completions"`)
             token_counter (Optional[BaseTokenCounter]): Token counter to use
                 for the model. If not provided, `OpenAITokenCounter(ModelType.
                 GPT_4O_MINI)` will be used.
         """
         self.model_type = model_type
         self._api_key = api_key or os.environ.get("SAMBA_API_KEY")
-        self._url = url or os.environ.get("SAMBA_API_BASE_URL")
+        self._url = url or os.environ.get(
+            "SAMBA_API_BASE_URL",
+            "https://fast-api.snova.ai/v1/chat/completions",
+        )
         self._token_counter = token_counter
         self.model_config_dict = model_config_dict
         self.check_model_config()
@@ -79,12 +96,26 @@ class SambaModel:
             ValueError: If the model configuration dictionary contains any
                 unexpected arguments to SambaNova API.
         """
-        for param in self.model_config_dict:
-            if param not in SAMBA_API_PARAMS:
-                raise ValueError(
-                    f"Unexpected argument `{param}` is "
-                    "input into SambaNova model backend."
-                )
+        if self._url == "https://fast-api.snova.ai/v1/chat/completions":
+            for param in self.model_config_dict:
+                if param not in SAMBA_FAST_API_PARAMS:
+                    raise ValueError(
+                        f"Unexpected argument `{param}` is "
+                        "input into SambaNova Fast API."
+                    )
+        elif self._url == "https://sambaverse.sambanova.ai/api/predict":
+            for param in self.model_config_dict:
+                if param not in SAMBA_VERSE_API_PARAMS:
+                    raise ValueError(
+                        f"Unexpected argument `{param}` is "
+                        "input into SambaVerse API."
+                    )
+        else:
+            raise ValueError(
+                f"{self._url} is not supported, please check the url to the"
+                " SambaNova service"
+            )
     @api_keys_required("SAMBA_API_KEY")
     def run(  # type: ignore[misc]
@@ -125,38 +156,44 @@ class SambaModel:
             RuntimeError: If the HTTP request fails.
         """
-        import httpx
-        headers = {
-            "Authorization": f"Basic {self._api_key}",
-            "Content-Type": "application/json",
-        }
-        data = {
-            "messages": messages,
-            "max_tokens": self.token_limit,
-            "stop": self.model_config_dict.get("stop"),
-            "model": self.model_type.value,
-            "stream": True,
-            "stream_options": self.model_config_dict.get("stream_options"),
-        }
-        try:
-            with httpx.stream(
-                "POST",
-                self._url or "https://fast-api.snova.ai/v1/chat/completions",
-                headers=headers,
-                json=data,
-            ) as api_response:
-                stream = Stream[ChatCompletionChunk](
-                    cast_to=ChatCompletionChunk,
-                    response=api_response,
-                    client=OpenAI(),
-                )
-                for chunk in stream:
-                    yield chunk
-        except httpx.HTTPError as e:
-            raise RuntimeError(f"HTTP request failed: {e!s}")
+        # Handle SambaNova's Fast API
+        if self._url == "https://fast-api.snova.ai/v1/chat/completions":
+            headers = {
+                "Authorization": f"Basic {self._api_key}",
+                "Content-Type": "application/json",
+            }
+            data = {
+                "messages": messages,
+                "max_tokens": self.token_limit,
+                "stop": self.model_config_dict.get("stop"),
+                "model": self.model_type,
+                "stream": True,
+                "stream_options": self.model_config_dict.get("stream_options"),
+            }
+            try:
+                with httpx.stream(
+                    "POST",
+                    self._url,
+                    headers=headers,
+                    json=data,
+                ) as api_response:
+                    stream = Stream[ChatCompletionChunk](
+                        cast_to=ChatCompletionChunk,
+                        response=api_response,
+                        client=OpenAI(api_key="required_but_not_used"),
+                    )
+                    for chunk in stream:
+                        yield chunk
+            except httpx.HTTPError as e:
+                raise RuntimeError(f"HTTP request failed: {e!s}")
+        elif self._url == "https://sambaverse.sambanova.ai/api/predict":
+            raise ValueError(
+                "https://sambaverse.sambanova.ai/api/predict doesn't support"
+                " stream mode"
+            )
     def _run_non_streaming(
         self, messages: List[OpenAIMessage]
@@ -177,62 +214,138 @@ class SambaModel:
                 expected data.
         """
-        import json
-        import httpx
-        headers = {
-            "Authorization": f"Basic {self._api_key}",
-            "Content-Type": "application/json",
-        }
-        data = {
-            "messages": messages,
-            "max_tokens": self.token_limit,
-            "stop": self.model_config_dict.get("stop"),
-            "model": self.model_type.value,
-            "stream": True,
-            "stream_options": self.model_config_dict.get("stream_options"),
-        }
-        try:
-            with httpx.stream(
-                "POST",
-                self._url or "https://fast-api.snova.ai/v1/chat/completions",
-                headers=headers,
-                json=data,
-            ) as api_response:
-                samba_response = []
-                for chunk in api_response.iter_text():
-                    if chunk.startswith('data: '):
-                        chunk = chunk[6:]
-                    if '[DONE]' in chunk:
-                        break
-                    json_data = json.loads(chunk)
-                    samba_response.append(json_data)
-                return self._to_openai_response(samba_response)
-        except httpx.HTTPError as e:
-            raise RuntimeError(f"HTTP request failed: {e!s}")
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Failed to decode JSON response: {e!s}")
-    def _to_openai_response(
+        # Handle SambaNova's Fast API
+        if self._url == "https://fast-api.snova.ai/v1/chat/completions":
+            headers = {
+                "Authorization": f"Basic {self._api_key}",
+                "Content-Type": "application/json",
+            }
+            data = {
+                "messages": messages,
+                "max_tokens": self.token_limit,
+                "stop": self.model_config_dict.get("stop"),
+                "model": self.model_type,
+                "stream": True,
+                "stream_options": self.model_config_dict.get("stream_options"),
+            }
+            try:
+                with httpx.stream(
+                    "POST",
+                    self._url,
+                    headers=headers,
+                    json=data,
+                ) as api_response:
+                    samba_response = []
+                    for chunk in api_response.iter_text():
+                        if chunk.startswith('data: '):
+                            chunk = chunk[6:]
+                        if '[DONE]' in chunk:
+                            break
+                        json_data = json.loads(chunk)
+                        samba_response.append(json_data)
+                    return self._fastapi_to_openai_response(samba_response)
+            except httpx.HTTPError as e:
+                raise RuntimeError(f"HTTP request failed: {e!s}")
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Failed to decode JSON response: {e!s}")
+        # Handle SambaNova's Sambaverse API
+        else:
+            headers = {
+                "Content-Type": "application/json",
+                "key": str(self._api_key),
+                "modelName": self.model_type,
+            }
+            data = {
+                "instance": json.dumps(
+                    {
+                        "conversation_id": str(uuid.uuid4()),
+                        "messages": messages,
+                    }
+                ),
+                "params": {
+                    "do_sample": {"type": "bool", "value": "true"},
+                    "max_tokens_to_generate": {
+                        "type": "int",
+                        "value": str(self.model_config_dict.get("max_tokens")),
+                    },
+                    "process_prompt": {"type": "bool", "value": "true"},
+                    "repetition_penalty": {
+                        "type": "float",
+                        "value": str(
+                            self.model_config_dict.get("repetition_penalty")
+                        ),
+                    },
+                    "return_token_count_only": {
+                        "type": "bool",
+                        "value": "false",
+                    },
+                    "select_expert": {
+                        "type": "str",
+                        "value": self.model_type.split('/')[1],
+                    },
+                    "stop_sequences": {
+                        "type": "str",
+                        "value": self.model_config_dict.get("stop_sequences"),
+                    },
+                    "temperature": {
+                        "type": "float",
+                        "value": str(
+                            self.model_config_dict.get("temperature")
+                        ),
+                    },
+                    "top_k": {
+                        "type": "int",
+                        "value": str(self.model_config_dict.get("top_k")),
+                    },
+                    "top_p": {
+                        "type": "float",
+                        "value": str(self.model_config_dict.get("top_p")),
+                    },
+                },
+            }
+            try:
+                # Send the request and handle the response
+                with httpx.Client() as client:
+                    response = client.post(
+                        self._url,  # type: ignore[arg-type]
+                        headers=headers,
+                        json=data,
+                    )
+                raw_text = response.text
+                # Split the string into two dictionaries
+                dicts = raw_text.split('}\n{')
+                # Keep only the last dictionary
+                last_dict = '{' + dicts[-1]
+                # Parse the dictionary
+                last_dict = json.loads(last_dict)
+                return self._sambaverse_to_openai_response(last_dict)  # type: ignore[arg-type]
+            except httpx.HTTPStatusError:
+                raise RuntimeError(f"HTTP request failed: {raw_text}")
+    def _fastapi_to_openai_response(
         self, samba_response: List[Dict[str, Any]]
     ) -> ChatCompletion:
-        r"""Converts SambaNova response chunks into an OpenAI-compatible
-        response.
+        r"""Converts SambaNova Fast API response chunks into an
+            OpenAI-compatible response.
         Args:
             samba_response (List[Dict[str, Any]]): A list of dictionaries
-                representing partial responses from the SambaNova API.
+                representing partial responses from the SambaNova Fast API.
         Returns:
             ChatCompletion: A `ChatCompletion` object constructed from the
                 aggregated response data.
-        Raises:
-            ValueError: If the response data is invalid or incomplete.
         """
         # Step 1: Combine the content from each chunk
         full_content = ""
         for chunk in samba_response:
@@ -268,17 +381,64 @@ class SambaModel:
         return obj
+    def _sambaverse_to_openai_response(
+        self, samba_response: Dict[str, Any]
+    ) -> ChatCompletion:
+        r"""Converts SambaVerse API response into an OpenAI-compatible
+        response.
+        Args:
+            samba_response (Dict[str, Any]): A dictionary representing
+                responses from the SambaVerse API.
+        Returns:
+            ChatCompletion: A `ChatCompletion` object constructed from the
+                aggregated response data.
+        """
+        choices = [
+            dict(
+                index=0,
+                message={
+                    "role": 'assistant',
+                    "content": samba_response['result']['responses'][0][
+                        'completion'
+                    ],
+                },
+                finish_reason=samba_response['result']['responses'][0][
+                    'stop_reason'
+                ],
+            )
+        ]
+        obj = ChatCompletion.construct(
+            id=None,
+            choices=choices,
+            created=int(time.time()),
+            model=self.model_type,
+            object="chat.completion",
+            # SambaVerse API only provide `total_tokens`
+            usage=CompletionUsage(
+                completion_tokens=0,
+                prompt_tokens=0,
+                total_tokens=int(
+                    samba_response['result']['responses'][0][
+                        'total_tokens_count'
+                    ]
+                ),
+            ),
+        )
+        return obj
     @property
     def token_limit(self) -> int:
-        r"""Returns the maximum token limit for a given model.
+        r"""Returns the maximum token limit for the given model.
         Returns:
             int: The maximum token limit for the given model.
         """
-        return (
-            self.model_config_dict.get("max_tokens")
-            or self.model_type.token_limit
-        )
+        max_tokens = self.model_config_dict["max_tokens"]
+        return max_tokens
     @property
     def stream(self) -> bool:

camel/models/vllm_model.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # limitations under the License.
 # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
 import os
+import subprocess
 from typing import Any, Dict, List, Optional, Union
 from openai import OpenAI, Stream
@@ -52,17 +53,38 @@ class VLLMModel:
         """
         self.model_type = model_type
         self.model_config_dict = model_config_dict
-        self._url = url or os.environ.get("VLLM_BASE_URL")
+        self._url = (
+            url
+            or os.environ.get("VLLM_BASE_URL")
+            or "http://localhost:8000/v1"
+        )
+        if not url and not os.environ.get("VLLM_BASE_URL"):
+            self._start_server()
         # Use OpenAI cilent as interface call vLLM
         self._client = OpenAI(
             timeout=60,
             max_retries=3,
-            base_url=self._url or "http://localhost:8000/v1",
+            base_url=self._url,
             api_key=api_key,
         )
         self._token_counter = token_counter
         self.check_model_config()
+    def _start_server(self) -> None:
+        r"""Starts the vllm server in a subprocess."""
+        try:
+            subprocess.Popen(
+                ["vllm", "server", "--port", "8000"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            print(
+                f"vllm server started on http://localhost:8000/v1 "
+                f"for {self.model_type} model"
+            )
+        except Exception as e:
+            print(f"Failed to start vllm server: {e}")
     @property
     def token_counter(self) -> BaseTokenCounter:
         r"""Initialize the token counter for the model backend.

camel/retrievers/auto_retriever.py CHANGED Viewed

@@ -25,15 +25,13 @@ from camel.storages import (
     VectorDBQuery,
 )
 from camel.types import StorageType
+from camel.utils import Constants
 try:
     from unstructured.documents.elements import Element
 except ImportError:
     Element = None
-DEFAULT_TOP_K_RESULTS = 1
-DEFAULT_SIMILARITY_THRESHOLD = 0.75
 class AutoRetriever:
     r"""Facilitates the automatic retrieval of information using a
@@ -178,9 +176,10 @@ class AutoRetriever:
         self,
         query: str,
         contents: Union[str, List[str], Element, List[Element]],
-        top_k: int = DEFAULT_TOP_K_RESULTS,
-        similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
+        top_k: int = Constants.DEFAULT_TOP_K_RESULTS,
+        similarity_threshold: float = Constants.DEFAULT_SIMILARITY_THRESHOLD,
         return_detailed_info: bool = False,
+        max_characters: int = 500,
     ) -> dict[str, Sequence[Collection[str]]]:
         r"""Executes the automatic vector retriever process using vector
         storage.
@@ -198,6 +197,8 @@ class AutoRetriever:
             return_detailed_info (bool, optional): Whether to return detailed
                 information including similarity score, content path and
                 metadata. Defaults to `False`.
+            max_characters (int): Max number of characters in each chunk.
+                Defaults to `500`.
         Returns:
             dict[str, Sequence[Collection[str]]]: By default, returns
@@ -262,7 +263,7 @@ class AutoRetriever:
                         storage=vector_storage_instance,
                         embedding_model=self.embedding_model,
                     )
-                    vr.process(content)
+                    vr.process(content=content, max_characters=max_characters)
                 else:
                     vr = VectorRetriever(
                         storage=vector_storage_instance,

camel/retrievers/vector_retriever.py CHANGED Viewed

@@ -25,15 +25,13 @@ from camel.storages import (
     VectorDBQuery,
     VectorRecord,
 )
+from camel.utils import Constants
 try:
     from unstructured.documents.elements import Element
 except ImportError:
     Element = None
-DEFAULT_TOP_K_RESULTS = 1
-DEFAULT_SIMILARITY_THRESHOLD = 0.75
 class VectorRetriever(BaseRetriever):
     r"""An implementation of the `BaseRetriever` by using vector storage and
@@ -76,6 +74,7 @@ class VectorRetriever(BaseRetriever):
         self,
         content: Union[str, Element],
         chunk_type: str = "chunk_by_title",
+        max_characters: int = 500,
         **kwargs: Any,
     ) -> None:
         r"""Processes content from a file or URL, divides it into chunks by
@@ -87,6 +86,8 @@ class VectorRetriever(BaseRetriever):
                 string content or Element object.
             chunk_type (str): Type of chunking going to apply. Defaults to
                 "chunk_by_title".
+            max_characters (int): Max number of characters in each chunk.
+                Defaults to `500`.
             **kwargs (Any): Additional keyword arguments for content parsing.
         """
         if isinstance(content, Element):
@@ -101,7 +102,9 @@ class VectorRetriever(BaseRetriever):
                 elements = [self.uio.create_element_from_text(text=content)]
         if elements:
             chunks = self.uio.chunk_elements(
-                chunk_type=chunk_type, elements=elements
+                chunk_type=chunk_type,
+                elements=elements,
+                max_characters=max_characters,
             )
         if not elements:
             warnings.warn(
@@ -142,8 +145,8 @@ class VectorRetriever(BaseRetriever):
     def query(
         self,
         query: str,
-        top_k: int = DEFAULT_TOP_K_RESULTS,
-        similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
+        top_k: int = Constants.DEFAULT_TOP_K_RESULTS,
+        similarity_threshold: float = Constants.DEFAULT_SIMILARITY_THRESHOLD,
     ) -> List[Dict[str, Any]]:
         r"""Executes a query in vector storage and compiles the retrieved
         results into a dictionary.
@@ -154,7 +157,8 @@ class VectorRetriever(BaseRetriever):
                 for filtering results. Defaults to
                 `DEFAULT_SIMILARITY_THRESHOLD`.
             top_k (int, optional): The number of top results to return during
-                retriever. Must be a positive integer. Defaults to 1.
+                retriever. Must be a positive integer. Defaults to
+                `DEFAULT_TOP_K_RESULTS`.
         Returns:
             List[Dict[str, Any]]: Concatenated list of the query results.

camel/toolkits/__init__.py CHANGED Viewed

@@ -29,6 +29,7 @@ from .weather_toolkit import WEATHER_FUNCS, WeatherToolkit
 from .slack_toolkit import SLACK_FUNCS, SlackToolkit
 from .dalle_toolkit import DALLE_FUNCS, DalleToolkit
 from .linkedin_toolkit import LINKEDIN_FUNCS, LinkedInToolkit
+from .reddit_toolkit import REDDIT_FUNCS, RedditToolkit
 from .base import BaseToolkit
 from .code_execution import CodeExecutionToolkit
@@ -49,6 +50,7 @@ __all__ = [
     'SLACK_FUNCS',
     'DALLE_FUNCS',
     'LINKEDIN_FUNCS',
+    'REDDIT_FUNCS',
     'BaseToolkit',
     'GithubToolkit',
     'MathToolkit',
@@ -61,5 +63,6 @@ __all__ = [
     'RetrievalToolkit',
     'OpenAPIToolkit',
     'LinkedInToolkit',
+    'RedditToolkit',
     'CodeExecutionToolkit',
 ]

camel-ai 0.1.6.8__py3-none-any.whl → 0.1.7.0__py3-none-any.whl

Potentially problematic release.

camel-ai 0.1.6.8py3-none-any.whl → 0.1.7.0py3-none-any.whl