PyPI - camel-ai - Versions diffs - 0.2.38__py3-none-any.whl → 0.2.39__py3-none-any.whl - Mend

camel-ai 0.2.38py3-none-any.whl → 0.2.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (85) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +4 -0
camel/agents/repo_agent.py +2 -2
camel/benchmarks/apibank.py +1 -1
camel/benchmarks/apibench.py +1 -1
camel/configs/__init__.py +3 -0
camel/configs/modelscope_config.py +59 -0
camel/datagen/self_improving_cot.py +1 -1
camel/datasets/__init__.py +2 -0
camel/datasets/base_generator.py +22 -9
camel/datasets/few_shot_generator.py +2 -3
camel/datasets/self_instruct_generator.py +415 -0
camel/embeddings/openai_compatible_embedding.py +13 -5
camel/environments/models.py +1 -1
camel/environments/single_step.py +155 -89
camel/interpreters/docker_interpreter.py +1 -1
camel/interpreters/internal_python_interpreter.py +1 -1
camel/loaders/unstructured_io.py +2 -1
camel/memories/blocks/chat_history_block.py +1 -1
camel/memories/context_creators/score_based.py +2 -2
camel/models/__init__.py +2 -0
camel/models/model_factory.py +119 -0
camel/models/modelscope_model.py +208 -0
camel/models/openai_audio_models.py +2 -2
camel/models/openai_model.py +49 -2
camel/models/togetherai_model.py +2 -2
camel/models/vllm_model.py +1 -1
camel/models/zhipuai_model.py +2 -2
camel/retrievers/vector_retriever.py +1 -1
camel/storages/graph_storages/neo4j_graph.py +1 -1
camel/storages/vectordb_storages/base.py +2 -2
camel/storages/vectordb_storages/milvus.py +2 -2
camel/storages/vectordb_storages/qdrant.py +2 -2
camel/tasks/task.py +2 -2
camel/toolkits/__init__.py +4 -1
camel/toolkits/arxiv_toolkit.py +2 -1
camel/toolkits/ask_news_toolkit.py +11 -3
camel/toolkits/audio_analysis_toolkit.py +2 -0
camel/toolkits/base.py +3 -0
camel/toolkits/code_execution.py +3 -1
camel/toolkits/dappier_toolkit.py +2 -1
camel/toolkits/data_commons_toolkit.py +2 -0
camel/toolkits/excel_toolkit.py +2 -0
camel/toolkits/file_write_toolkit.py +2 -0
camel/toolkits/github_toolkit.py +6 -4
camel/toolkits/google_scholar_toolkit.py +2 -0
camel/toolkits/human_toolkit.py +17 -1
camel/toolkits/image_analysis_toolkit.py +2 -0
camel/toolkits/linkedin_toolkit.py +2 -1
camel/toolkits/math_toolkit.py +2 -0
camel/toolkits/mcp_toolkit.py +42 -52
camel/toolkits/meshy_toolkit.py +20 -2
camel/toolkits/networkx_toolkit.py +2 -0
camel/toolkits/notion_toolkit.py +7 -0
camel/toolkits/openbb_toolkit.py +2 -1
camel/toolkits/pubmed_toolkit.py +2 -0
camel/toolkits/reddit_toolkit.py +2 -1
camel/toolkits/retrieval_toolkit.py +2 -1
camel/toolkits/search_toolkit.py +2 -1
camel/toolkits/semantic_scholar_toolkit.py +2 -0
camel/toolkits/slack_toolkit.py +2 -0
camel/toolkits/stripe_toolkit.py +2 -1
camel/toolkits/sympy_toolkit.py +2 -0
camel/toolkits/terminal_toolkit.py +2 -0
camel/toolkits/twitter_toolkit.py +2 -1
camel/toolkits/video_analysis_toolkit.py +2 -1
camel/toolkits/video_download_toolkit.py +2 -1
camel/toolkits/weather_toolkit.py +2 -0
camel/toolkits/whatsapp_toolkit.py +2 -1
camel/toolkits/zapier_toolkit.py +2 -1
camel/types/enums.py +65 -0
camel/types/unified_model_type.py +5 -0
camel/utils/__init__.py +2 -0
camel/utils/chunker/code_chunker.py +9 -9
camel/utils/commons.py +50 -30
camel/utils/constants.py +2 -2
camel/utils/mcp.py +79 -0
camel/verifiers/__init__.py +2 -0
camel/verifiers/base.py +15 -15
camel/verifiers/math_verifier.py +182 -0
camel/verifiers/python_verifier.py +18 -26
{camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/METADATA +3 -1
{camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/RECORD +85 -80
{camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/WHEEL +0 -0
{camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/licenses/LICENSE +0 -0

camel/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from camel.logger import disable_logging, enable_logging, set_log_level
-__version__ = '0.2.38'
+__version__ = '0.2.39'
 __all__ = [
     '__version__',

camel/agents/chat_agent.py CHANGED Viewed

@@ -526,6 +526,10 @@ class ChatAgent(BaseAgent):
             message.content = response.output_messages[0].content
             if not self._try_format_message(message, response_format):
                 logger.warning(f"Failed to parse response: {message.content}")
+                logger.warning(
+                    "To improve reliability, consider using models "
+                    "that are better equipped to handle structured output"
+                )
     async def _aformat_response_if_needed(
         self,

camel/agents/repo_agent.py CHANGED Viewed

@@ -17,7 +17,7 @@ from string import Template
 from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 if TYPE_CHECKING:
-    from github import Github
+    from github.MainClass import Github
 from pydantic import BaseModel
 from camel.agents import ChatAgent
@@ -219,7 +219,7 @@ class RepoAgent(ChatAgent):
             List[RepositoryInfo]: A list of objects containing information
                 about the all repositories, including the contents.
         """
-        from github import Github
+        from github.MainClass import Github
         github_client = Github(self.github_auth_token)
         res = []

camel/benchmarks/apibank.py CHANGED Viewed

@@ -48,7 +48,7 @@ def process_messages(
     Args:
         chat_history (List[Dict[str, Any]):
             A list of dictionaries representing the chat history.
-        prompt (str): A propmt to be set as the system message.
+        prompt (str): A prompt to be set as the system message.
     Returns:
         List[Dict[str, str]]: A list of dictionaries representing

camel/benchmarks/apibench.py CHANGED Viewed

@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
 # Mapping of dataset names to file names
-# 'Oracle' retriver used here which means all the full
+# 'Oracle' retriever used here which means all the full
 # API documentation will be included in the prompt
 dataset_mapping = {
     "huggingface": {

camel/configs/__init__.py CHANGED Viewed

@@ -21,6 +21,7 @@ from .groq_config import GROQ_API_PARAMS, GroqConfig
 from .internlm_config import INTERNLM_API_PARAMS, InternLMConfig
 from .litellm_config import LITELLM_API_PARAMS, LiteLLMConfig
 from .mistral_config import MISTRAL_API_PARAMS, MistralConfig
+from .modelscope_config import MODELSCOPE_API_PARAMS, ModelScopeConfig
 from .moonshot_config import MOONSHOT_API_PARAMS, MoonshotConfig
 from .nvidia_config import NVIDIA_API_PARAMS, NvidiaConfig
 from .ollama_config import OLLAMA_API_PARAMS, OllamaConfig
@@ -85,6 +86,8 @@ __all__ = [
     'INTERNLM_API_PARAMS',
     'MoonshotConfig',
     "MOONSHOT_API_PARAMS",
+    'ModelScopeConfig',
+    'MODELSCOPE_API_PARAMS',
     'SiliconFlowConfig',
     'SILICONFLOW_API_PARAMS',
     'AIMLConfig',

camel/configs/modelscope_config.py ADDED Viewed

@@ -0,0 +1,59 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from __future__ import annotations
+from typing import Optional, Union
+from camel.configs.base_config import BaseConfig
+class ModelScopeConfig(BaseConfig):
+    r"""Defines the parameters for generating chat completions using the
+    ModelScope API. You can refer to the following link for more details:
+    https://www.modelscope.cn/docs/model-service/API-Inference/intro
+    Args:
+        tool_choice (Union[dict[str, str], str], optional): Controls which (if
+            any) tool is called by the model. :obj:`"none"` means the model
+            will not call any tool and instead generates a message.
+            :obj:`"auto"` means the model can pick between generating a
+            message or calling one or more tools. :obj:`"required"` or
+            specifying a particular tool via
+            {"type": "function", "function": {"name": "some_function"}}
+            can be used to guide the model to use tools more strongly.
+            (default: :obj:`None`)
+        max_tokens (int, optional): Specifies the maximum number of tokens
+            the model can generate. This sets an upper limit, but does not
+            guarantee that this number will always be reached.
+            (default: :obj:`None`)
+        top_p (float, optional): Controls the randomness of the generated
+            results. Lower values lead to less randomness, while higher
+            values increase randomness. (default: :obj:`None`)
+        temperature (float, optional): Controls the diversity and focus of
+            the generated results. Lower values make the output more focused,
+            while higher values make it more diverse. (default: :obj:`0.3`)
+        stream (bool, optional): If True, enables streaming output.
+            (default: :obj:`None`)
+    """
+    tool_choice: Optional[Union[dict[str, str], str]] = None
+    max_tokens: Optional[int] = None
+    top_p: Optional[float] = None
+    temperature: Optional[float] = None
+    stream: Optional[bool] = None
+MODELSCOPE_API_PARAMS = {
+    param for param in ModelScopeConfig.model_fields.keys()
+}

camel/datagen/self_improving_cot.py CHANGED Viewed

@@ -518,7 +518,7 @@ class SelfImprovingCoTPipeline:
             self.reason_agent.model_backend.model_config_dict['n'] = (
                 self.rejection_sampling_n
             )
-            # Generate multiple condidate traces in one call using parameter n
+            # Generate multiple candidate traces in one call using parameter n
             responses = self.reason_agent.step(prompt)
             # Extract cancidate traces
             candidate_traces = [choice.content for choice in responses.msgs]

camel/datasets/__init__.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from .base_generator import BaseGenerator
 from .few_shot_generator import FewShotGenerator
 from .models import DataPoint
+from .self_instruct_generator import SelfInstructGenerator
 from .static_dataset import StaticDataset
 __all__ = [
@@ -21,4 +22,5 @@ __all__ = [
     "DataPoint",
     "FewShotGenerator",
     "StaticDataset",
+    "SelfInstructGenerator",
 ]

camel/datasets/base_generator.py CHANGED Viewed

@@ -39,6 +39,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
     def __init__(
         self,
         seed: int = 42,
+        buffer: int = 20,
         cache: Union[str, Path, None] = None,
         data_path: Union[str, Path, None] = None,
         **kwargs,
@@ -47,6 +48,8 @@ class BaseGenerator(abc.ABC, IterableDataset):
         Args:
             seed (int): Random seed for reproducibility. (default: :obj:`42`)
+            buffer (int): Amount of DataPoints to be generated when the
+                iterator runs out of DataPoints in data. (default:  :obj:`20`)
             cache (Union[str, Path, None]): Optional path to save generated
                 datapoints during iteration. If None is provided, datapoints
                 will be discarded every 100 generations.
@@ -56,7 +59,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
         """
         self._rng = random.Random(seed)
         self.cache = Path(cache) if cache else None
+        self._buffer = buffer
         self._data: List[DataPoint] = []
         self._batch_to_save: List[DataPoint] = []
@@ -72,15 +75,27 @@ class BaseGenerator(abc.ABC, IterableDataset):
                 )
     @abc.abstractmethod
-    async def generate_new(self, n: int, **kwargs) -> List[DataPoint]:
-        r"""Generate n new datapoints.
+    async def generate_new(self, n: int, **kwargs) -> None:
+        r"""Generate n new datapoints and append them to self._data.
+        Subclass implementations must generate the specified number of
+        datapoints and append them directly to the `self._data` list.
+        This method should not return the datapoints; the iterator
+        relies on `self._data` being populated.
         Args:
-            n (int): Number of datapoints to generate.
+            n (int): Number of datapoints to generate and append.
             **kwargs: Additional generation parameters.
         Returns:
-            List[DataPoint]: A list of newly generated datapoints.
+            None: This method should not return anything.
+        Example:
+            ```python
+            async def generate_new(self, n: int, **kwargs) -> None:
+                new_points = [DataPoint(...) for _ in range(n)]
+                self._data.extend(new_points)
+            ```
         """
         pass
@@ -99,8 +114,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
         async def generator():
             while True:
                 if not self._data:
-                    new_datapoints = await self.generate_new(20)
-                    self._data.extend(new_datapoints)
+                    await self.generate_new(self._buffer)
                 datapoint = self._data.pop(0)
                 yield datapoint
                 self._batch_to_save.append(datapoint)
@@ -137,8 +151,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
         while True:
             if not self._data:
-                new_datapoints = asyncio.run(self.generate_new(20))
-                self._data.extend(new_datapoints)
+                asyncio.run(self.generate_new(self._buffer))
             datapoint = self._data.pop(0)
             yield datapoint
             self._batch_to_save.append(datapoint)

camel/datasets/few_shot_generator.py CHANGED Viewed

@@ -126,7 +126,7 @@ class FewShotGenerator(BaseGenerator):
         max_retries: int = 10,
         num_examples: int = 3,
         **kwargs,
-    ) -> List[DataPoint]:
+    ) -> None:
         r"""Generates and validates `n` new datapoints through
         few-shot prompting, with a retry limit.
@@ -203,7 +203,7 @@ class FewShotGenerator(BaseGenerator):
                 try:
                     verifier_response = await self.verifier.verify(
                         solution=rationale,
-                        ground_truth=None,
+                        reference_answer=None,
                     )
                     if not verifier_response or not verifier_response.result:
                         raise ValueError(
@@ -255,4 +255,3 @@ class FewShotGenerator(BaseGenerator):
         # Thread-safe way to extend the data list
         async with asyncio.Lock():
             self._data.extend(valid_data_points)
-        return valid_data_points

camel/datasets/self_instruct_generator.py ADDED Viewed

@@ -0,0 +1,415 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import asyncio
+import random
+from datetime import datetime
+from typing import Iterable, List, Optional, cast
+from pydantic import BaseModel, Field, ValidationError
+from camel.agents import ChatAgent
+from camel.logger import get_logger
+from camel.models import ModelFactory
+from camel.types import ModelPlatformType, ModelType
+from camel.verifiers import BaseVerifier
+from .base_generator import BaseGenerator
+from .models import DataPoint
+from .static_dataset import StaticDataset
+logger = get_logger(__name__)
+DEFAULT_INSTRUCTION_SYSTEM_PROMPT = """
+You are a high-capacity instruction generation assistant.
+Your task is to generate a **new, creative, and challenging question** based on
+several examples.
+These examples may cover different domains or styles, but your goal is to:
+- **Understand their specific patterns** in structure, and complexity;
+- **Combine and synthesize** ideas from multiple examples, rather than copying
+  or lightly editing any single one;
+- **Intelligently integrate** multiple reasoning steps, constraints, or
+  concepts into a single, coherent question;
+- Ensure the new question is **non-trivial** and requires deep thinking or
+  multi-step reasoning.
+**Guidelines:**
+- Use the examples as inspiration for format, depth, and tone.
+- Your new question should be self-contained, logically sound, and answerable.
+- Do not repeat exact phrasings or create shallow combinations; instead,
+  produce something meaningfully new.
+- Avoid open-ended or subjective questions that depend on personal opinions or
+  discussion.
+- The generated question must have a **clear, objective, and verifiable
+  answer**.
+- Aim for increased depth or novelty through subtle combination or
+  transformation.
+- Keep the final output to a **single unified question** with one clear answer,
+  not a multi-part task.
+**Output Format (strict):**
+```
+Question: [Generated question]
+```
+"""
+DEFAULT_RATIONALE_SYSTEM_PROMPT = """You are an advanced Python code assistant.
+Your task is to **solve the given question by writing Python code only**,
+without any explanation or natural language output.
+The code must compute the answer **programmatically**, not by hardcoding or
+guessing the result.
+**Rules:**
+- Use Python code to perform the actual computation.
+- Use {package_list} to solve the problem. Do not import any other libraries.
+- **Do not hardcode the final answer** (e.g., avoid writing `print(1/2)` unless
+  that value is computed).
+- The result must be obtained through valid computation logic in code.
+- Do not include explanations. Output code only.
+- The entire code must be wrapped in triple backticks:
+```
+[Your Python code here]
+```
+Now, solve the following question using Python. Only output the code:
+"""
+class SelfInstructGenerator(BaseGenerator):
+    r"""A generator for creating synthetic datapoints using self-instruct.
+    It utilizes both a human-provided dataset (seed_dataset) and generated
+    machine instructions (machine_instructions) to produce new, synthetic
+    datapoints that include a question, a computed rationale (code), and a
+    final answer (from a verifier).
+    """
+    class QuestionSchema(BaseModel):
+        r"""Schema for the generated question.
+        Attributes:
+            question (str): The question generated by the model.
+        """
+        question: str = Field(description="The question generated")
+    class RationaleSchema(BaseModel):
+        r"""Schema for the generated rationale code.
+        Attributes:
+            code (str): The generated code without any formatting.
+        """
+        code: str = Field(
+            description="The generated code without any formatting"
+        )
+    def __init__(
+        self,
+        seed_dataset: StaticDataset,
+        verifier: BaseVerifier,
+        instruction_agent: Optional[ChatAgent] = None,
+        rationale_agent: Optional[ChatAgent] = None,
+        seed: int = 42,
+        **kwargs,
+    ):
+        r"""Initialize the self-instruct generator.
+        Args:
+            seed_dataset (StaticDataset): Dataset containing seed instructions.
+            verifier (BaseVerifier): Verifier instance to validate generated
+                solutions.
+            instruction_agent (Optional[ChatAgent]): Agent for generating
+                instructions. If not provided, a default agent will be created.
+            rationale_agent (Optional[ChatAgent]): Agent for generating
+                rationales. If not provided, a default agent will be created.
+            seed (int): Random seed for reproducibility. (default: :obj:`42`)
+            **kwargs: Additional keyword arguments passed to the BaseGenerator.
+        """
+        super().__init__(seed=seed, **kwargs)
+        self.seed_dataset = seed_dataset
+        self.verifier = verifier
+        # extract packages from verifier
+        self.packages: List[str] = getattr(
+            self.verifier, "required_packages", []
+        )
+        # create default agents if not provided
+        self.instruction_agent = (
+            instruction_agent or self.default_instruction_agent()
+        )
+        self.rationale_agent = (
+            rationale_agent or self.default_rationale_agent()
+        )
+        # Extract questions from the seed dataset as human_instructions
+        self.human_instructions: List[str] = [
+            dp.question
+            for dp in list(cast(Iterable[DataPoint], self.seed_dataset))
+        ]
+        self.machine_instructions: List[DataPoint] = []
+        # Create an instance-level lock for thread-safe updates to _data
+        self._lock = asyncio.Lock()
+        self._data = []  # Storage for generated DataPoint instances
+    def default_instruction_agent(self) -> ChatAgent:
+        r"""Create the default instruction generation agent.
+        This agent is configured with a moderate temperature setting to
+        encourage creative and diverse instruction generation behavior.
+        Returns:
+            ChatAgent: An agent with the default instruction prompt.
+        """
+        model = ModelFactory.create(
+            model_platform=ModelPlatformType.DEFAULT,
+            model_type=ModelType.DEFAULT,
+            model_config_dict={"temperature": 0.7},
+        )
+        return ChatAgent(
+            DEFAULT_INSTRUCTION_SYSTEM_PROMPT,
+            model=model,
+        )
+    def default_rationale_agent(self) -> ChatAgent:
+        r"""Create the default rationale generation agent.
+        This agent is configured with a deterministic (zero temperature)
+        setting to ensure consistent and precise rationale generation based on
+        a given instruction and package list.
+        Returns:
+            ChatAgent: An agent with the rationale prompt
+        """
+        model = ModelFactory.create(
+            model_platform=ModelPlatformType.DEFAULT,
+            model_type=ModelType.DEFAULT,
+            model_config_dict={"temperature": 0.0},
+        )
+        return ChatAgent(
+            DEFAULT_RATIONALE_SYSTEM_PROMPT.format(package_list=self.packages),
+            model=model,
+        )
+    @staticmethod
+    def format_support_block(dp: DataPoint) -> str:
+        r"""Format a DataPoint into a few-shot example block.
+        Args:
+            dp (DataPoint): A data point.
+        Returns:
+            str: A formatted string containing the question and its
+                corresponding code block in Markdown-style Python format.
+        """
+        support_q = dp.question.strip()
+        support_code = dp.rationale.strip() if dp.rationale else ""
+        return (
+            f"Question:\n{support_q}\n\n"
+            "Code:\n"
+            "```python\n"
+            f"{support_code}\n"
+            "```"
+        )
+    def generate_new_instruction(
+        self,
+        agent: ChatAgent,
+        support_human_dps: list[DataPoint],
+        support_machine_dps: list[DataPoint],
+    ) -> str:
+        r"""Generate a new instruction using self-instruct prompting.
+        Args:
+            agent (ChatAgent): The agent to use for generating the instruction.
+            support_human_dps (list[DataPoint]): List of human examples to
+                sample.
+            support_machine_dps (list[DataPoint]): List of machine examples to
+                sample.
+        Returns:
+            str: The newly generated question.
+        """
+        human_sample = [dp.question for dp in list(support_human_dps)]
+        machine_sample = [dp.question for dp in list(support_machine_dps)]
+        few_shot_examples = human_sample + machine_sample
+        # Build the prompt using the few-shot examples
+        prompt = "Below are some question examples:\n\n"
+        for idx, instr in enumerate(few_shot_examples, start=1):
+            prompt += f"Question {idx}: {instr}\n"
+        prompt += f"Question {len(few_shot_examples) + 1}:\n"
+        prompt += "Now generate a new question based on the given examples.\n"
+        question_template = f"Question: {prompt}"
+        response = cast(
+            SelfInstructGenerator.QuestionSchema,
+            agent.step(question_template, response_format=self.QuestionSchema)
+            .msgs[0]
+            .parsed,
+        )
+        return response.question
+    def generate_rationale(
+        self,
+        question: str,
+        agent: Optional[ChatAgent] = None,
+        support_human_dps: Optional[list[DataPoint]] = None,
+    ) -> str:
+        r"""Generate rationale code (solution) for the given question.
+        Args:
+            question (str): The question to be solved.
+            agent (Optional[ChatAgent]): The agent to use for generating the
+                rationale. If None is provided, the default rationale agent
+                will be used. (default: :obj:`None`)
+            support_human_dps (Optional[list[DataPoint]]): List of human
+                examples to sample. (default: :obj:`None`)
+        Returns:
+            str: The generated code solution as a string.
+        """
+        # Build few-shot example prompt
+        few_shot_prompt = ""
+        if support_human_dps:
+            few_shot_examples = [
+                self.format_support_block(dp) for dp in support_human_dps
+            ]
+            few_shot_prompt += "Below are example questions and solutions:\n\n"
+            few_shot_prompt += "\n\n".join(few_shot_examples)
+        few_shot_prompt += f"\n\nWrite code to solve the question:\n{question}"
+        response = cast(
+            SelfInstructGenerator.RationaleSchema,
+            (agent or self.default_rationale_agent())
+            .step(few_shot_prompt, response_format=self.RationaleSchema)
+            .msgs[0]
+            .parsed,
+        )
+        return response.code
+    async def generate_new(
+        self,
+        n: int,
+        max_retries: int = 10,
+        human_sample_count: int = 3,
+        machine_sample_count: int = 1,
+        **kwargs,
+    ) -> None:
+        r"""Generates and validates `n` new datapoints through
+        self-instruct prompting, with a retry limit.
+        Args:
+            n (int): The number of valid datapoints to generate.
+            max_retries (int): Maximum number of retries before stopping.
+                (default: :obj:`10`)
+            human_sample_count (int): Number of human examples to sample.
+                (default: :obj:`3`)
+            machine_sample_count (int): Number of machine examples to sample.
+                (default: :obj:`1`)
+            **kwargs: Additional keyword arguments.
+        Notes:
+            - Retries on validation failures until `n` valid datapoints exist
+                or `max_retries` is reached, whichever comes first.
+            - If retries are exhausted before reaching `n`, a `RuntimeError`
+                is raised.
+            - Metadata includes a timestamp for tracking datapoint creation.
+        """
+        valid_data_points: list[DataPoint] = []
+        retries = 0
+        while len(valid_data_points) < n and retries < max_retries:
+            try:
+                human_dps_list = list(cast(List[DataPoint], self.seed_dataset))
+                support_human_dps = random.sample(
+                    human_dps_list,
+                    min(human_sample_count, len(human_dps_list)),
+                )
+                machine_dps_list = list(self.machine_instructions)
+                support_machine_dps = []
+                if machine_dps_list and machine_sample_count > 0:
+                    support_machine_dps = random.sample(
+                        machine_dps_list,
+                        min(machine_sample_count, len(machine_dps_list)),
+                    )
+                question = self.generate_new_instruction(
+                    self.instruction_agent,
+                    support_human_dps,
+                    support_machine_dps,
+                )
+                rationale = self.generate_rationale(
+                    question, self.rationale_agent, support_human_dps
+                )
+                if not isinstance(rationale, str):
+                    raise TypeError(f"Rationale {rationale} is not a string.")
+                try:
+                    verifier_response = await self.verifier.verify(
+                        solution=rationale,
+                        reference_answer=None,
+                    )
+                    if not verifier_response or not verifier_response.result:
+                        raise ValueError(
+                            "Verifier unsuccessful, response: "
+                            f"{verifier_response}"
+                        )
+                except (ValueError, AttributeError) as e:
+                    logger.warning(
+                        f"Verifier issue: {e}, "
+                        f"retrying... ({retries + 1}/{max_retries})"
+                    )
+                    retries += 1
+                    continue
+                try:
+                    new_datapoint = DataPoint(
+                        question=question,
+                        rationale=rationale,
+                        final_answer=verifier_response.result,
+                        metadata={
+                            "synthetic": str(True),
+                            "created": datetime.now().isoformat(),
+                            "generator": "self_instruct",
+                        },
+                    )
+                except ValidationError as e:
+                    logger.warning(
+                        f"Datapoint validation failed: {e}, "
+                        f"retrying... ({retries + 1}/{max_retries})"
+                    )
+                    retries += 1
+                    continue
+                valid_data_points.append(new_datapoint)
+            except Exception as e:
+                logger.warning(
+                    f"Unexpected error: {e}, retrying..."
+                    f" ({retries + 1}/{max_retries})"
+                )
+                retries += 1
+        if len(valid_data_points) < n:
+            raise RuntimeError(
+                f"Failed to generate {n} valid datapoints "
+                f"after {max_retries} retries."
+            )
+        async with self._lock:
+            self._data.extend(valid_data_points)

camel-ai 0.2.38__py3-none-any.whl → 0.2.39__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.38py3-none-any.whl → 0.2.39py3-none-any.whl