PyPI - camel-ai - Versions diffs - 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl - Mend

camel-ai 0.2.36py3-none-any.whl → 0.2.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (84) hide show

camel/__init__.py +1 -1
camel/agents/__init__.py +2 -0
camel/agents/repo_agent.py +579 -0
camel/configs/aiml_config.py +20 -19
camel/configs/anthropic_config.py +25 -27
camel/configs/cohere_config.py +11 -10
camel/configs/deepseek_config.py +16 -16
camel/configs/gemini_config.py +8 -8
camel/configs/groq_config.py +18 -19
camel/configs/internlm_config.py +8 -8
camel/configs/litellm_config.py +26 -24
camel/configs/mistral_config.py +8 -8
camel/configs/moonshot_config.py +11 -11
camel/configs/nvidia_config.py +13 -13
camel/configs/ollama_config.py +14 -15
camel/configs/openai_config.py +3 -3
camel/configs/openrouter_config.py +9 -9
camel/configs/qwen_config.py +8 -8
camel/configs/reka_config.py +12 -11
camel/configs/samba_config.py +14 -14
camel/configs/sglang_config.py +15 -16
camel/configs/siliconflow_config.py +18 -17
camel/configs/togetherai_config.py +18 -19
camel/configs/vllm_config.py +18 -19
camel/configs/yi_config.py +7 -8
camel/configs/zhipuai_config.py +8 -9
camel/datagen/evol_instruct/__init__.py +20 -0
camel/datagen/evol_instruct/evol_instruct.py +424 -0
camel/datagen/evol_instruct/scorer.py +166 -0
camel/datagen/evol_instruct/templates.py +268 -0
camel/datasets/static_dataset.py +25 -23
camel/environments/models.py +10 -1
camel/environments/single_step.py +296 -136
camel/extractors/__init__.py +16 -1
camel/interpreters/docker_interpreter.py +1 -1
camel/interpreters/e2b_interpreter.py +1 -1
camel/interpreters/subprocess_interpreter.py +1 -1
camel/loaders/__init__.py +2 -2
camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
camel/memories/context_creators/score_based.py +198 -67
camel/models/aiml_model.py +9 -3
camel/models/anthropic_model.py +11 -3
camel/models/azure_openai_model.py +9 -3
camel/models/base_audio_model.py +6 -0
camel/models/base_model.py +4 -0
camel/models/deepseek_model.py +9 -3
camel/models/gemini_model.py +9 -3
camel/models/groq_model.py +9 -3
camel/models/internlm_model.py +8 -2
camel/models/model_factory.py +4 -0
camel/models/moonshot_model.py +8 -2
camel/models/nemotron_model.py +9 -3
camel/models/nvidia_model.py +9 -3
camel/models/ollama_model.py +9 -3
camel/models/openai_audio_models.py +5 -3
camel/models/openai_compatible_model.py +9 -3
camel/models/openai_model.py +9 -3
camel/models/openrouter_model.py +9 -3
camel/models/qwen_model.py +9 -3
camel/models/samba_model.py +9 -3
camel/models/sglang_model.py +11 -4
camel/models/siliconflow_model.py +8 -2
camel/models/stub_model.py +2 -1
camel/models/togetherai_model.py +9 -3
camel/models/vllm_model.py +9 -3
camel/models/yi_model.py +9 -3
camel/models/zhipuai_model.py +9 -3
camel/retrievers/auto_retriever.py +14 -0
camel/storages/__init__.py +2 -0
camel/storages/vectordb_storages/__init__.py +2 -0
camel/storages/vectordb_storages/tidb.py +332 -0
camel/toolkits/__init__.py +7 -0
camel/toolkits/browser_toolkit.py +84 -61
camel/toolkits/openai_agent_toolkit.py +131 -0
camel/toolkits/searxng_toolkit.py +207 -0
camel/toolkits/thinking_toolkit.py +230 -0
camel/types/enums.py +4 -0
camel/utils/chunker/code_chunker.py +9 -15
camel/verifiers/base.py +28 -5
camel/verifiers/python_verifier.py +321 -68
{camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/METADATA +103 -8
{camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/RECORD +84 -75
{camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
{camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0

camel/environments/single_step.py CHANGED Viewed

@@ -12,12 +12,10 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from abc import abstractmethod
-from typing import Any, Dict, Optional, Tuple, Union
+import random
+from typing import Any, Dict, List, Optional, Tuple, Union
 from camel.datasets import BaseGenerator, DataPoint, StaticDataset
-from camel.extractors.base import BaseExtractor
 from camel.logger import get_logger
 from camel.verifiers.base import (
     BaseVerifier,
@@ -30,18 +28,23 @@ logger = get_logger(__name__)
 class SingleStepEnv:
-    r"""A single-step environment for reinforcement learning with LLMs.
+    r"""A lightweight environment for single-step RL with LLMs as policy.
+    This environment models a single interaction between an LLM-based agent
+    and a problem drawn from a dataset—such as a question-answering or
+    math problem—where the agent produces one response and receives feedback.
+    Core Flow:
+        - A question is sampled from a (possibly infinitely long) dataset.
+        - The LLM generates a single-step response (the action).
+        - The response is verified against the ground truth.
+        - A reward is computed based on correctness and optional custom logic.
     Key Features:
-    - Samples questions from a dataset and asks the LLM
-    - Extracts verifiable information from model responses.
-    - Verifies extracted responses against ground truth.
-    - Computes and assigns rewards based on correctness.
-    - Supports async setup, teardown, and cleanup of resources.
-    This class is intended as a foundation for RL experiments involving
-    LLM-based policies, ensuring structured interactions between model
-    actions and verification mechanisms.
+        - Batched evaluation with per-sample state tracking.
+        - Async setup and teardown for verifiers and related resources.
+        - Supports deterministic sampling via local RNG (optional seed).
+        - Extensible reward computation via subclassing.
     """
     PLACEHOLDER_OBS = Observation(
@@ -54,43 +57,47 @@ class SingleStepEnv:
         self,
         dataset: Union[StaticDataset, BaseGenerator],
         verifier: BaseVerifier,
-        extractor: BaseExtractor,
         **kwargs,
     ) -> None:
-        r"""Initialize the environment.
+        r"""Initialize the SingleStepEnv.
         Args:
-            dataset: Dataset to sample questions from.
-            verifier: Verifier to check responses.
-            extractor: Extractor to process LLM responses.
-            **kwargs: Additional environment parameters.
+            dataset (Union[StaticDataset, BaseGenerator]): Dataset to sample
+                problems from.
+            verifier (BaseVerifier): Verifier used to evaluate LLM responses
+                against ground-truth answers.
+            **kwargs: Optional metadata or configuration values.
+        Notes:
+            This class assumes all interactions are single-step: one question,
+            one LLM response, one reward.
         """
         self.dataset = dataset
         self.verifier = verifier
-        self.extractor = extractor
         self._metadata = kwargs
         # State tracking
         self._is_setup: bool = False
-        self._state: Optional[DataPoint] = None
-        self._episode_ended: bool = False
+        self._states: List[DataPoint] = []
+        self._states_done: List[bool] = []
+        self.current_batch_size: int = 0
     async def setup(self) -> None:
-        r"""Set up the environment by initializing the verifier and extractor.
+        r"""Set up the environment by initializing the verifier.
         This method ensures that the environment is ready for interaction.
-        It sets up necessary components, including the verifier and extractor.
+        It sets up necessary components, including the verifier.
         Raises:
             Exception: If setup fails due to an internal error.
         """
         if self._is_setup:
+            logger.warning("Environment has already been set up")
             return
         try:
             await self.verifier.setup()
-            await self.extractor.setup()
             self._is_setup = True
             logger.info('Environment setup completed successfully')
@@ -101,7 +108,7 @@ class SingleStepEnv:
     async def close(self) -> None:
         r"""Clean up and close all resources used by the environment.
-        This method shuts down the verifier and extractor, resets the internal
+        This method shuts down the verifier, resets the internal
         state, and ensures that the environment is properly closed.
         Raises:
@@ -109,170 +116,323 @@ class SingleStepEnv:
         """
         if not self._is_setup:
+            logger.warning(
+                "Not closing environment - has not been set up yet."
+            )
             return
         try:
             self._is_setup = False
             await self.verifier.cleanup()
-            await self.extractor.cleanup()
-            self._state = None
-            self._episode_ended = False
+            self._states = []
+            self._states_done = []
+            self.current_batch_size = 0
             logger.info('Environment closed successfully')
         except Exception as e:
             logger.error(f'Failed to close environment: {e}')
             raise
-    async def reset(self) -> Observation:
-        r"""Reset the environment and start a new episode.
+    async def reset(
+        self, batch_size: int = 1, seed: Optional[int] = None
+    ) -> Union[Observation, List[Observation]]:
+        r"""Resets the environment and starts a new episode.
-        This method samples a new data point from the dataset and returns the
-        initial observation.
+        This method samples a new batch of data points from the dataset and
+        returns the corresponding initial observations.
+        If a seed is provided, a local random number generator is initialized
+        for deterministic sampling. The global random state is not affected.
+        Args:
+            batch_size (int): Number of data points to sample.
+                (default: :obj:`1`)
+            seed (Optional[int]): Seed for deterministic sampling. If None,
+                sampling is non-deterministic. (default: :obj:`None`)
         Returns:
-            Observation: The first observation of the new episode, including
-                the question.
+            Observation or List[Observation]: Initial observation(s) for the
+                episode.
         Raises:
-            Exception: If the environment is not set up properly.
+            RuntimeError: If called before all previous states are processed.
+            ValueError: If batch size exceeds dataset size.
+            TypeError: If the dataset is of an unsupported type.
         """
+        if batch_size <= 0:
+            raise ValueError("Batch size must be positive")
         if not self._is_setup:
+            logger.warning(
+                "reset() called on un-setup environment. Setting up..."
+            )
             await self.setup()
-        self._episode_ended = False
-        # Sample a datapoint
-        self._state = self.dataset.sample()
-        observation = Observation(
-            question=self._state.question, context={}, metadata={}
-        )
-        return observation
-    async def step(self, action: Action) -> StepResult:
-        r"""Take a step in the environment using the given action.
-        This method processes the LLM response, extracts verifiable content,
-        verifies correctness, computes rewards, and ends the episode.
+        if self._batch_started() and not self._batch_done():
+            logger.error(
+                "Reset called before all states were processed. "
+                "Call step on remaining states first."
+            )
+            raise RuntimeError(
+                "reset() called before all states in batch were processed."
+            )
+        if seed is not None:
+            rng = random.Random(seed)
+        else:
+            rng = random.Random()
+        if isinstance(self.dataset, StaticDataset):
+            dataset_len = len(self.dataset)
+            if batch_size > dataset_len:
+                raise ValueError(
+                    f"Batch size {batch_size} is too large for dataset "
+                    f"of size {dataset_len}"
+                )
+            start_idx = rng.randint(0, dataset_len - batch_size)
+            idx_slice = slice(start_idx, start_idx + batch_size)
+            val = self.dataset[idx_slice]
+            self._states = [val] if isinstance(val, DataPoint) else val
+            self.current_batch_size = len(self._states)
+            self._states_done = [False] * self.current_batch_size
+            observations = [
+                Observation(question=sample.question, context={}, metadata={})
+                for sample in self._states
+            ]
+            return observations[0] if batch_size == 1 else observations
+        elif isinstance(self.dataset, BaseGenerator):
+            raise NotImplementedError(
+                "Reset not yet implemented for BaseGenerator datasets."
+            )
+        else:
+            raise TypeError(f"Unsupported dataset type: {type(self.dataset)}")
+    async def step(
+        self, action: Union[Action, List[Action]]
+    ) -> Union[
+        Tuple[Observation, float, bool, Dict[str, Any]],
+        List[Tuple[Observation, float, bool, Dict[str, Any]]],
+    ]:
+        r"""Process actions for a subset of states and update their finished
+        status.
         Args:
-            action (Action): The action containing the LLM response to
-                evaluate.
+            action: Single action (for batch_size=1 or micro-batch of size 1)
+                or list of actions (for batch_size>=2 with multiple actions).
+                Each action must have an index for batch_size>=2, indicating
+                which state it corresponds to.
         Returns:
-            StepResult: Contains the next observation (placeholder), total
-                reward, reward breakdown, completion flag, and additional
-                information.
+            Union[StepResult, List[StepResult]]: StepResult or list of
+                StepResults for the processed states.
         Raises:
-            RuntimeError: If the environment is not set up, the episode has
-                ended, or there is no valid current observation.
+            RuntimeError: If environment isn't set up or episode has ended.
+            ValueError: If indices are invalid, duplicate, or correspond to
+                finished states.
         """
         if not self._is_setup:
             raise RuntimeError("Environment not set up. Call setup() first.")
-        if self._episode_ended:
-            raise RuntimeError("Episode has ended. Call reset() first.")
-        if self._state is None:
+        if self._batch_done():
+            raise RuntimeError(
+                "Episodes have ended for batch. Call reset() first."
+            )
+        if not self._states:
             raise RuntimeError("No current observation. Call reset() first.")
-        # extract verifiable part from llm response
-        extraction_result = await self.extractor.extract(action.llm_response)
-        if not extraction_result:
-            raise RuntimeError(f"Couldn't extract from {action.llm_response}")
-        # verify the extracted
-        verification_result = await self.verifier.verify(
-            solution=extraction_result, ground_truth=self._state.final_answer
+        # Normalize actions into a list for uniform processing
+        if self.current_batch_size == 1:
+            if isinstance(action, list):
+                if len(action) != 1 or not isinstance(action[0], Action):
+                    raise ValueError(
+                        "For batch_size=1, expect a single Action or a "
+                        "list containing exactly one Action"
+                    )
+            elif not isinstance(action, Action):
+                raise ValueError(
+                    "For batch_size=1, expect a single Action or a "
+                    "list containing exactly one Action"
+                )
+            if isinstance(action, Action):
+                actions = [action]
+            else:
+                actions = action
+            if actions[0].index is None:
+                actions[0].index = 0
+            if actions[0].index != 0:
+                raise ValueError("For batch_size=1, index must be None or 0")
+        else:  # batch_size >= 2
+            if isinstance(action, Action):
+                if action.index is None:
+                    raise ValueError(
+                        "For batch_size>=2, each Action must have an index"
+                    )
+                if not isinstance(action.index, int):
+                    raise ValueError("Index must be an integer")
+                actions = [action]
+            elif isinstance(action, list):
+                if not action:  # Empty list
+                    raise ValueError("Action list cannot be empty")
+                actions = action
+                for act in actions:
+                    if not isinstance(act, Action):
+                        raise ValueError(
+                            "All elements in list must be Action objects"
+                        )
+                    if act.index is None:
+                        raise ValueError(
+                            "For batch_size>=2, each Action must have an index"
+                        )
+                    if not isinstance(act.index, int):
+                        raise ValueError("Index must be an integer")
+            else:
+                raise ValueError(
+                    "For batch_size>=2, expect an Action or list of Actions"
+                )
+        # Validate indices
+        indices: List[int] = []
+        for act in actions:
+            assert act.index is not None
+            indices.append(act.index)
+        if len(set(indices)) != len(indices):
+            raise ValueError("Duplicate state indices in actions.")
+        for idx in indices:
+            if idx < 0 or idx >= len(self._states):
+                raise ValueError(f"Invalid state index {idx}.")
+            if self._states_done[idx]:
+                raise ValueError(f"State at index {idx} is already finished.")
+        num_actions = len(actions)
+        if self.current_batch_size % num_actions != 0:
+            logger.warning(
+                f"Number of actions ({num_actions}) is not a divisor of "
+                f"total batch size ({self.current_batch_size})"
+            )
+        proposed_solutions = [act.llm_response for act in actions]
+        ground_truths: List[str] = []
+        for idx in indices:
+            ground_truths.append(self._states[idx].final_answer)
+        verification_results = await self.verifier.verify_batch(
+            solutions=proposed_solutions,
+            ground_truths=ground_truths,  # type: ignore [arg-type]
+            raise_on_error=True,
         )
-        # compute rewards
-        total_reward, rewards_dict = await self._compute_reward(
-            action, extraction_result, verification_result
+        total_rewards, rewards_dicts = await self._compute_reward_batch(
+            proposed_solutions, verification_results
         )
-        self._episode_ended = True
-        return StepResult(
-            observation=self.PLACEHOLDER_OBS,
-            reward=total_reward,
-            rewards_dict=rewards_dict,
-            done=True,
-            info={
-                "extraction_result": extraction_result,
-                "verification_result": verification_result,
-                "state": self._state,
-            },
-        )
-    async def _compute_reward(
+        # TODO Batch this
+        step_results = []
+        for i, action in enumerate(actions):
+            assert action.index is not None
+            idx = action.index
+            step_result = StepResult(
+                observation=self.PLACEHOLDER_OBS,
+                reward=total_rewards[i],
+                rewards_dict=rewards_dicts[i],
+                done=True,
+                info={
+                    "proposed_solution": proposed_solutions[i],
+                    "verification_result": verification_results[i],
+                    "state": self._states[idx],
+                },
+            )
+            step_results.append(step_result.as_tuple())
+            self._states_done[idx] = True
+        return step_results[0] if len(step_results) == 1 else step_results
+    async def _compute_reward_batch(
         self,
-        action: Action,
-        extraction_result: str,
-        verification_result: VerificationResult,
-    ) -> Tuple[float, Dict[str, float]]:
-        r"""Compute reward scores based on verification results.
-        This method calculates the reward based on correctness and any
-        additional custom reward components.
+        proposed_solutions: List[str],
+        verification_results: List[VerificationResult],
+    ) -> Tuple[List[float], List[Dict[str, float]]]:
+        r"""Compute rewards for a batch of proposed solutions based on
+        verification results.
         Args:
-            action (Action): The action taken in the environment.
-            extraction_result (str): The extracted verifiable content from the
-                LLM response.
-            verification_result (VerificationResult): The result of verifying
-                the extracted response.
+            proposed_solutions (List[str]): List of LLM-generated responses to
+                evaluate.
+            verification_results (List[VerificationResult]): List of
+                verification outcomes for each solution.
         Returns:
-            Tuple[float, Dict[str, float]]: A tuple containing:
-                - Total reward (float)
-                - Dictionary of individual reward components.
-        Raises:
-            Exception: If an error occurs while computing rewards.
+            Tuple containing:
+                - List of total rewards for each solution.
+                - List of reward component dictionaries for each solution.
         """
+        if len(proposed_solutions) != len(verification_results):
+            raise ValueError(
+                f"Length mismatch: {len(proposed_solutions)} solutions vs "
+                f"{len(verification_results)} verification results"
+            )
-        rewards: Dict[str, float] = {}
+        total_rewards = []
+        rewards_dicts = []
-        rewards["correctness"] = (
-            self.ACCURACY_REWARD if verification_result.status else 0.0
-        )
+        for solution, verification_result in zip(
+            proposed_solutions, verification_results
+        ):
+            rewards: Dict[str, float] = {}
-        further_rewards = await self._compute_custom_reward(
-            action, extraction_result, verification_result
-        )
+            rewards["correctness"] = (
+                self.ACCURACY_REWARD if verification_result.status else 0.0
+            )
-        rewards = rewards | further_rewards
+            further_rewards = await self._compute_custom_reward(
+                solution, verification_result
+            )
+            rewards = {**rewards, **further_rewards}
-        return sum(rewards.values()), rewards
+            total_reward = sum(rewards.values())
+            total_rewards.append(total_reward)
+            rewards_dicts.append(rewards)
+        return total_rewards, rewards_dicts
-    @abstractmethod
     async def _compute_custom_reward(
-        self,
-        action: Action,
-        extraction_result: str,
-        verification_result: VerificationResult,
+        self, proposed_solution: str, verification_result: VerificationResult
     ) -> Dict[str, float]:
-        r"""Compute additional custom reward components.
+        r"""Compute additional custom reward components for a single solution.
-        This method should be implemented by subclasses to define
-        domain-specific reward calculations.
+        To be overridden by subclasses for domain-specific rewards.
         Args:
-            action (Action): The action taken in the environment.
-            extraction_result (str): The extracted verifiable content from the
-                LLM response.
-            verification_result (VerificationResult): The result of verifying
-                the extracted response.
+            proposed_solution (str): The LLM-generated response.
+            verification_result (VerificationResult): The verification outcome.
+        Returns:
+            Dict[str, float]: Dictionary of custom reward components.
+        """
+        return {}
+    def _batch_done(self) -> bool:
+        r"""Check if all states in the current batch are done.
+        Returns:
+            bool: True if all states are marked as done, False otherwise.
+        """
+        return all(self._states_done)
+    def _batch_started(self) -> bool:
+        r"""Check if any state in the current batch is done.
         Returns:
-            Dict[str, float]: A dictionary mapping custom reward categories
-                to their values.
+            bool: True if at least one state is marked as done, False
+                otherwise.
         """
-        pass
+        return any(self._states_done)
     @property
     def metadata(self) -> Dict[str, Any]:

camel/extractors/__init__.py CHANGED Viewed

@@ -12,5 +12,20 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 from .base import BaseExtractor, BaseExtractorStrategy
+from .python_strategies import (
+    BoxedStrategy,
+    PythonDictStrategy,
+    PythonListStrategy,
+    PythonSetStrategy,
+    PythonTupleStrategy,
+)
-__all__ = ["BaseExtractor", "BaseExtractorStrategy"]
+__all__ = [
+    "BaseExtractor",
+    "BaseExtractorStrategy",
+    "BoxedStrategy",
+    "PythonListStrategy",
+    "PythonDictStrategy",
+    "PythonSetStrategy",
+    "PythonTupleStrategy",
+]

camel/interpreters/docker_interpreter.py CHANGED Viewed

@@ -210,7 +210,7 @@ class DockerInterpreter(BaseInterpreter):
         if self.require_confirm:
             logger.info(
                 f"The following {code_type} code will run on your "
-                "computer: {code}"
+                f"computer: {code}"
             )
             while True:
                 choice = input("Running code? [Y/n]:").lower()

camel/interpreters/e2b_interpreter.py CHANGED Viewed

@@ -99,7 +99,7 @@ class E2BInterpreter(BaseInterpreter):
         if self.require_confirm:
             logger.info(
                 f"The following {code_type} code will run on your "
-                "e2b sandbox: {code}"
+                f"e2b sandbox: {code}"
             )
             while True:
                 choice = input("Running code? [Y/n]:").lower()

camel/interpreters/subprocess_interpreter.py CHANGED Viewed

@@ -292,7 +292,7 @@ class SubprocessInterpreter(BaseInterpreter):
         if self.require_confirm:
             logger.info(
                 f"The following {code_type} code will run on your "
-                "computer: {code}"
+                f"computer: {code}"
             )
             while True:
                 choice = input("Running code? [Y/n]:").lower().strip()

camel/loaders/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ from .chunkr_reader import ChunkrReader
 from .firecrawl_reader import Firecrawl
 from .jina_url_reader import JinaURLReader
 from .mineru_extractor import MinerU
-from .panda_reader import PandaReader
+from .pandas_reader import PandasReader
 from .unstructured_io import UnstructuredIO
 __all__ = [
@@ -30,6 +30,6 @@ __all__ = [
     'Firecrawl',
     'Apify',
     'ChunkrReader',
-    'PandaReader',
+    'PandasReader',
     'MinerU',
 ]

camel-ai 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.36py3-none-any.whl → 0.2.38py3-none-any.whl