PyPI - camel-ai - Versions diffs - 0.2.21__py3-none-any.whl → 0.2.23a0__py3-none-any.whl - Mend

camel-ai 0.2.21py3-none-any.whl → 0.2.23a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (106) hide show

camel/__init__.py +1 -1
camel/agents/_types.py +41 -0
camel/agents/_utils.py +188 -0
camel/agents/chat_agent.py +556 -965
camel/agents/knowledge_graph_agent.py +7 -1
camel/agents/multi_hop_generator_agent.py +1 -1
camel/configs/base_config.py +10 -13
camel/configs/deepseek_config.py +4 -30
camel/configs/gemini_config.py +5 -31
camel/configs/openai_config.py +14 -32
camel/configs/qwen_config.py +36 -36
camel/datagen/self_improving_cot.py +79 -1
camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
camel/datagen/self_instruct/self_instruct.py +7 -2
camel/datasets/__init__.py +28 -0
camel/datasets/base.py +969 -0
camel/embeddings/openai_embedding.py +10 -1
camel/environments/__init__.py +16 -0
camel/environments/base.py +503 -0
camel/extractors/__init__.py +16 -0
camel/extractors/base.py +263 -0
camel/interpreters/docker/Dockerfile +12 -0
camel/interpreters/docker_interpreter.py +19 -1
camel/interpreters/subprocess_interpreter.py +42 -17
camel/loaders/__init__.py +2 -0
camel/loaders/mineru_extractor.py +250 -0
camel/memories/agent_memories.py +16 -1
camel/memories/blocks/chat_history_block.py +10 -2
camel/memories/blocks/vectordb_block.py +1 -0
camel/memories/context_creators/score_based.py +20 -3
camel/memories/records.py +10 -0
camel/messages/base.py +8 -8
camel/models/_utils.py +57 -0
camel/models/aiml_model.py +48 -17
camel/models/anthropic_model.py +41 -3
camel/models/azure_openai_model.py +39 -3
camel/models/base_model.py +132 -4
camel/models/cohere_model.py +88 -11
camel/models/deepseek_model.py +107 -63
camel/models/gemini_model.py +133 -15
camel/models/groq_model.py +72 -10
camel/models/internlm_model.py +14 -3
camel/models/litellm_model.py +9 -2
camel/models/mistral_model.py +42 -5
camel/models/model_manager.py +48 -3
camel/models/moonshot_model.py +33 -4
camel/models/nemotron_model.py +32 -3
camel/models/nvidia_model.py +43 -3
camel/models/ollama_model.py +139 -17
camel/models/openai_audio_models.py +7 -1
camel/models/openai_compatible_model.py +37 -3
camel/models/openai_model.py +158 -46
camel/models/qwen_model.py +61 -4
camel/models/reka_model.py +53 -3
camel/models/samba_model.py +209 -4
camel/models/sglang_model.py +153 -14
camel/models/siliconflow_model.py +16 -3
camel/models/stub_model.py +46 -4
camel/models/togetherai_model.py +38 -3
camel/models/vllm_model.py +37 -3
camel/models/yi_model.py +36 -3
camel/models/zhipuai_model.py +38 -3
camel/retrievers/__init__.py +3 -0
camel/retrievers/hybrid_retrival.py +237 -0
camel/toolkits/__init__.py +9 -2
camel/toolkits/arxiv_toolkit.py +2 -1
camel/toolkits/ask_news_toolkit.py +4 -2
camel/toolkits/base.py +22 -3
camel/toolkits/code_execution.py +2 -0
camel/toolkits/dappier_toolkit.py +2 -1
camel/toolkits/data_commons_toolkit.py +38 -12
camel/toolkits/function_tool.py +13 -0
camel/toolkits/github_toolkit.py +5 -1
camel/toolkits/google_maps_toolkit.py +2 -1
camel/toolkits/google_scholar_toolkit.py +2 -0
camel/toolkits/human_toolkit.py +0 -3
camel/toolkits/linkedin_toolkit.py +3 -2
camel/toolkits/meshy_toolkit.py +3 -2
camel/toolkits/mineru_toolkit.py +178 -0
camel/toolkits/networkx_toolkit.py +240 -0
camel/toolkits/notion_toolkit.py +2 -0
camel/toolkits/openbb_toolkit.py +3 -2
camel/toolkits/reddit_toolkit.py +11 -3
camel/toolkits/retrieval_toolkit.py +6 -1
camel/toolkits/semantic_scholar_toolkit.py +2 -1
camel/toolkits/stripe_toolkit.py +8 -2
camel/toolkits/sympy_toolkit.py +44 -1
camel/toolkits/video_toolkit.py +2 -0
camel/toolkits/whatsapp_toolkit.py +3 -2
camel/toolkits/zapier_toolkit.py +191 -0
camel/types/__init__.py +2 -2
camel/types/agents/__init__.py +16 -0
camel/types/agents/tool_calling_record.py +52 -0
camel/types/enums.py +3 -0
camel/types/openai_types.py +16 -14
camel/utils/__init__.py +2 -1
camel/utils/async_func.py +2 -2
camel/utils/commons.py +114 -1
camel/verifiers/__init__.py +23 -0
camel/verifiers/base.py +340 -0
camel/verifiers/models.py +82 -0
camel/verifiers/python_verifier.py +202 -0
{camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/METADATA +273 -256
{camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/RECORD +106 -85
{camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/WHEEL +1 -1
{camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/LICENSE +0 -0

camel/agents/knowledge_graph_agent.py CHANGED Viewed

@@ -144,6 +144,7 @@ class KnowledgeGraphAgent(ChatAgent):
         self,
         element: "Element",
         parse_graph_elements: bool = False,
+        prompt: Optional[str] = None,
     ) -> Union[str, GraphElement]:
         r"""Run the agent to extract node and relationship information.
@@ -151,6 +152,8 @@ class KnowledgeGraphAgent(ChatAgent):
             element (Element): The input element.
             parse_graph_elements (bool, optional): Whether to parse into
                 `GraphElement`. Defaults to `False`.
+            prompt (str, optional): The custom prompt to be used.
+                Defaults to `None`.
         Returns:
             Union[str, GraphElement]: The extracted node and relationship
@@ -160,7 +163,10 @@ class KnowledgeGraphAgent(ChatAgent):
         self.reset()
         self.element = element
-        knowledge_graph_prompt = TextPrompt(text_prompt)
+        # Use the provided prompt or fall back to the default text_prompt
+        final_prompt = prompt if prompt is not None else text_prompt
+        knowledge_graph_prompt = TextPrompt(final_prompt)
         knowledge_graph_generation = knowledge_graph_prompt.format(
             task=str(element)
         )

camel/agents/multi_hop_generator_agent.py CHANGED Viewed

@@ -75,7 +75,7 @@ class MultiHopGeneratorAgent(ProgrammableChatAgent):
             Supporting Facts: [List of relevant text segments used]
             """  # noqa: E501
         )
-        self.system_message = BaseMessage.make_assistant_message(
+        self._system_message = BaseMessage.make_assistant_message(
             role_name='Assistant', content=system_text
         )

camel/configs/base_config.py CHANGED Viewed

@@ -66,6 +66,8 @@ class BaseConfig(ABC, BaseModel):
         This method converts the current configuration object to a dictionary
         representation, which can be used for serialization or other purposes.
+        The dictionary won't contain None values, as some API does not support
+        None values. (Like tool in OpenAI beta API)
         Returns:
             dict[str, Any]: A dictionary representation of the current
@@ -73,17 +75,12 @@ class BaseConfig(ABC, BaseModel):
         """
         config_dict = self.model_dump()
-        tools_schema = None
-        if self.tools:
-            from camel.toolkits import FunctionTool
+        # Convert tools to OpenAI tool schema
+        config_dict["tools"] = (
+            [tool.get_openai_tool_schema() for tool in self.tools]
+            if self.tools
+            else None
+        )
-            tools_schema = []
-            for tool in self.tools:
-                if not isinstance(tool, FunctionTool):
-                    raise ValueError(
-                        f"The tool {tool} should "
-                        "be an instance of `FunctionTool`."
-                    )
-                tools_schema.append(tool.get_openai_tool_schema())
-        config_dict["tools"] = tools_schema
-        return config_dict
+        # Remove None values
+        return {k: v for k, v in config_dict.items() if v is not None}

camel/configs/deepseek_config.py CHANGED Viewed

@@ -14,12 +14,11 @@
 from __future__ import annotations
-from typing import Any, Optional, Sequence, Type, Union
+from typing import Optional, Sequence, Type, Union
 from pydantic import BaseModel
 from camel.configs.base_config import BaseConfig
-from camel.types import NOT_GIVEN, NotGiven
 class DeepSeekConfig(BaseConfig):
@@ -89,10 +88,10 @@ class DeepSeekConfig(BaseConfig):
     temperature: float = 1.0  # deepseek default: 1.0
     top_p: float = 1.0
     stream: bool = False
-    stop: Union[str, Sequence[str], NotGiven] = NOT_GIVEN
-    max_tokens: Union[int, NotGiven] = NOT_GIVEN
+    stop: Optional[Union[str, Sequence[str]]] = None
+    max_tokens: Optional[int] = None
     presence_penalty: float = 0.0
-    response_format: Union[Type[BaseModel], dict, NotGiven] = NOT_GIVEN
+    response_format: Optional[Union[Type[BaseModel], dict]] = None
     frequency_penalty: float = 0.0
     tool_choice: Optional[Union[dict[str, str], str]] = None
     logprobs: bool = False
@@ -105,30 +104,5 @@ class DeepSeekConfig(BaseConfig):
         if self.stream:
             self.stream_options = {"include_usage": include_usage}
-    def as_dict(self) -> dict[str, Any]:
-        r"""Convert the current configuration to a dictionary.
-        This method converts the current configuration object to a dictionary
-        representation, which can be used for serialization or other purposes.
-        Returns:
-            dict[str, Any]: A dictionary representation of the current
-                configuration.
-        """
-        config_dict = self.model_dump()
-        if self.tools:
-            from camel.toolkits import FunctionTool
-            tools_schema = []
-            for tool in self.tools:
-                if not isinstance(tool, FunctionTool):
-                    raise ValueError(
-                        f"The tool {tool} should "
-                        "be an instance of `FunctionTool`."
-                    )
-                tools_schema.append(tool.get_openai_tool_schema())
-        config_dict["tools"] = NOT_GIVEN
-        return config_dict
 DEEPSEEK_API_PARAMS = {param for param in DeepSeekConfig.model_fields.keys()}

camel/configs/gemini_config.py CHANGED Viewed

@@ -14,12 +14,11 @@
 from __future__ import annotations
-from typing import Any, Optional, Sequence, Type, Union
+from typing import Optional, Sequence, Type, Union
 from pydantic import BaseModel
 from camel.configs.base_config import BaseConfig
-from camel.types import NOT_GIVEN, NotGiven
 class GeminiConfig(BaseConfig):
@@ -80,35 +79,10 @@ class GeminiConfig(BaseConfig):
     top_p: float = 1.0
     n: int = 1
     stream: bool = False
-    stop: Union[str, Sequence[str], NotGiven] = NOT_GIVEN
-    max_tokens: Union[int, NotGiven] = NOT_GIVEN
-    response_format: Union[Type[BaseModel], dict, NotGiven] = NOT_GIVEN
-    tool_choice: Optional[Union[dict[str, str], str, NotGiven]] = NOT_GIVEN
-    def as_dict(self) -> dict[str, Any]:
-        r"""Convert the current configuration to a dictionary.
-        This method converts the current configuration object to a dictionary
-        representation, which can be used for serialization or other purposes.
-        Returns:
-            dict[str, Any]: A dictionary representation of the current
-                configuration.
-        """
-        config_dict = self.model_dump()
-        if self.tools:
-            from camel.toolkits import FunctionTool
-            tools_schema = []
-            for tool in self.tools:
-                if not isinstance(tool, FunctionTool):
-                    raise ValueError(
-                        f"The tool {tool} should "
-                        "be an instance of `FunctionTool`."
-                    )
-                tools_schema.append(tool.get_openai_tool_schema())
-        config_dict["tools"] = NOT_GIVEN
-        return config_dict
+    stop: Optional[Union[str, Sequence[str]]] = None
+    max_tokens: Optional[int] = None
+    response_format: Optional[Union[Type[BaseModel], dict]] = None
+    tool_choice: Optional[Union[dict[str, str], str]] = None
 Gemini_API_PARAMS = {param for param in GeminiConfig.model_fields.keys()}

camel/configs/openai_config.py CHANGED Viewed

@@ -13,12 +13,11 @@
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 from __future__ import annotations
-from typing import Any, Optional, Sequence, Type, Union
+from typing import Dict, Optional, Sequence, Type, Union
 from pydantic import BaseModel, Field
 from camel.configs.base_config import BaseConfig
-from camel.types import NOT_GIVEN, NotGiven
 class ChatGPTConfig(BaseConfig):
@@ -95,45 +94,28 @@ class ChatGPTConfig(BaseConfig):
             forces the model to call that tool. :obj:`"none"` is the default
             when no tools are present. :obj:`"auto"` is the default if tools
             are present.
+        reasoning_effort(str, optional): A parameter specifying the level of
+            reasoning used by certain model types. Valid values are :obj:
+            `"low"`, :obj:`"medium"`, or :obj:`"high"`. If set, it is only
+            applied to the model types that support it (e.g., :obj:`o1`,
+            :obj:`o1mini`, :obj:`o1preview`, :obj:`o3mini`). If not provided
+            or if the model type does not support it, this parameter is
+            ignored. (default: :obj:`None`)
     """
     temperature: float = 0.2  # openai default: 1.0
     top_p: float = 1.0
     n: int = 1
     stream: bool = False
-    stop: Union[str, Sequence[str], NotGiven] = NOT_GIVEN
-    max_tokens: Union[int, NotGiven] = NOT_GIVEN
+    stop: Optional[Union[str, Sequence[str]]] = None
+    max_tokens: Optional[int] = None
     presence_penalty: float = 0.0
-    response_format: Union[Type[BaseModel], dict, NotGiven] = NOT_GIVEN
+    response_format: Optional[Union[Type[BaseModel], Dict]] = None
     frequency_penalty: float = 0.0
-    logit_bias: dict = Field(default_factory=dict)
+    logit_bias: Dict = Field(default_factory=dict)
     user: str = ""
-    tool_choice: Optional[Union[dict[str, str], str]] = None
-    def as_dict(self) -> dict[str, Any]:
-        r"""Convert the current configuration to a dictionary.
-        This method converts the current configuration object to a dictionary
-        representation, which can be used for serialization or other purposes.
-        Returns:
-            dict[str, Any]: A dictionary representation of the current
-                configuration.
-        """
-        config_dict = self.model_dump()
-        if self.tools:
-            from camel.toolkits import FunctionTool
-            tools_schema = []
-            for tool in self.tools:
-                if not isinstance(tool, FunctionTool):
-                    raise ValueError(
-                        f"The tool {tool} should "
-                        "be an instance of `FunctionTool`."
-                    )
-                tools_schema.append(tool.get_openai_tool_schema())
-        config_dict["tools"] = NOT_GIVEN
-        return config_dict
+    tool_choice: Optional[Union[Dict[str, str], str]] = None
+    reasoning_effort: Optional[str] = None
 OPENAI_API_PARAMS = {param for param in ChatGPTConfig.model_fields.keys()}

camel/configs/qwen_config.py CHANGED Viewed

@@ -13,10 +13,9 @@
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 from __future__ import annotations
-from typing import ClassVar, Optional, Union
+from typing import Dict, List, Optional, Union
 from camel.configs.base_config import BaseConfig
-from camel.types import NOT_GIVEN, NotGiven
 class QwenConfig(BaseConfig):
@@ -27,58 +26,59 @@ class QwenConfig(BaseConfig):
     Args:
         stream (bool, optional): Whether to stream the response.
             (default: :obj:`False`)
-        temperature (float, optional): Controls the diversity and focus of
-            the generated results. Lower values make the output more focused,
-            while higher values make it more diverse. (default: :obj:`0.3`)
-        top_p (float, optional): Controls the diversity and focus of the
-            generated results. Higher values make the output more diverse,
+        temperature (float, optional): Controls the diversity and
+            focus of the generated results. Lower values make the output more
+            focused, while higher values make it more diverse.
+            (default: :obj:`0.3`)
+        top_p (float, optional): Controls the diversity and focus of
+            the generated results. Higher values make the output more diverse,
             while lower values make it more focused. (default: :obj:`0.9`)
-        presence_penalty (float, optional): Controls the repetition of
+        presence_penalty (float, optional): Controls the repetition
             content in the generated results. Positive values reduce the
             repetition of content, while negative values increase it.
             (default: :obj:`0.0`)
-        response_format (object, optional): Specifies the format of the
-            returned content. The available values are `{"type": "text"}` or
-            `{"type": "json_object"}`. Setting it to `{"type": "json_object"}`
-            will output a standard JSON string.
-            (default: :obj:`{"type": "text"}`)
-        max_tokens (Union[int, NotGiven], optional): Allows the model to
+        response_format (Optional[Dict[str, str]], optional): Specifies the
+            format of the returned content. The available values are
+            `{"type": "text"}` or `{"type": "json_object"}`. Setting it to
+            `{"type": "json_object"}` will output a standard JSON string.
+            (default: :obj:`None`)
+        max_tokens (Optional[int], optional): Allows the model to
             generate the maximum number of tokens.
-            (default: :obj:`NOT_GIVEN`)
-        seed (int, optional): Sets the seed parameter to make the text
-            generation process more deterministic, typically used to ensure
-            that the results are consistent across model runs. By passing the
-            same seed value (specified by you) in each model call while
-            keeping other parameters unchanged, the model is likely to return
-            the same result.
             (default: :obj:`None`)
-        stop (str or list, optional): Using the stop parameter, the model will
-            automatically stop generating text when it is about to include the
-            specified string or token_id. You can use the stop parameter to
-            control the output of the model by passing sensitive words.
+        seed (Optional[int], optional): Sets the seed parameter to make the
+            text generation process more deterministic, typically used to
+            ensure that the results are consistent across model runs. By
+            passing the same seed value (specified by you) in each model call
+            while keeping other parameters unchanged, the model is likely to
+            return the same result.
             (default: :obj:`None`)
-        tools (list, optional): Specifies an array of tools that the model can
+        stop (Optional[Union[str, List]], optional): Using the stop parameter,
+            the model will automatically stop generating text when it is about
+            to include the specified string or token_id. You can use the stop
+            parameter to control the output of the model by passing sensitive
+            words. (default: :obj:`None`)
+        tools (List, optional): Specifies an array of tools that the model can
             call. It can contain one or more tool objects. During a function
             call process, the model will select one tool from the array.
             (default: :obj:`None`)
-        extra_body (dict, optional): Additional parameters to be sent to the
-            Qwen API. If you want to enable internet search, you can set this
-            parameter to `{"enable_search": True}`.
-            (default: :obj:`{"enable_search": False}`)
+        extra_body (Optional[Dict[str, str]], optional): Additional parameters
+            to be sent to the Qwen API. If you want to enable internet search,
+            you can set this parameter to `{"enable_search": True}`.
+            (default: :obj:`None`)
         include_usage (bool, optional): When streaming, specifies whether to
-            include usage information in `stream_options`. (default:
-            :obj:`True`)
+            include usage information in `stream_options`.
+            (default: :obj:`True`)
     """
     stream: bool = False
     temperature: float = 0.3
     top_p: float = 0.9
     presence_penalty: float = 0.0
-    response_format: ClassVar[dict] = {"type": "text"}
-    max_tokens: Union[int, NotGiven] = NOT_GIVEN
+    response_format: Optional[Dict[str, str]] = None
+    max_tokens: Optional[int] = None
     seed: Optional[int] = None
-    stop: Optional[Union[str, list]] = None
-    extra_body: ClassVar[dict] = {"enable_search": False}
+    stop: Optional[Union[str, List]] = None
+    extra_body: Optional[Dict[str, str]] = None
     def __init__(self, include_usage: bool = True, **kwargs):
         super().__init__(**kwargs)

camel/datagen/self_improving_cot.py CHANGED Viewed

@@ -85,6 +85,7 @@ class SelfImprovingCoTPipeline:
         problems: List[Dict],
         max_iterations: int = 3,
         score_threshold: Union[float, Dict[str, float]] = 0.7,
+        rejection_sampling_n: Optional[int] = None,
         evaluate_agent: Optional[ChatAgent] = None,
         reward_model: Optional[BaseRewardModel] = None,
         output_path: Optional[str] = None,
@@ -111,6 +112,11 @@ class SelfImprovingCoTPipeline:
                 "coherence": 0.7}. If using reward model and threshold for a
                 dimension is not specified, will use the default value 0.7.
                 (default: :obj:`0.7`)
+            rejection_sampling_n (int, optional): Specifies the number of
+                samples to be drawn using the rejection sampling
+                method, where samples are accepted or rejected based on
+                a predefined condition to achieve a desired distribution.
+                (default: :obj: `None`)
             evaluate_agent (Optional[ChatAgent]): The chat agent used for
                 evaluating reasoning traces. (default: :obj:`None`)
             reward_model (BaseRewardModel, optional): Model used to evaluate
@@ -139,6 +145,7 @@ class SelfImprovingCoTPipeline:
         self.output_path = output_path
         self.max_iterations = max_iterations
         self.score_threshold = score_threshold
+        self.rejection_sampling_n = rejection_sampling_n
         self.reward_model = reward_model
         self.evaluator = (
             Evaluator(reward_model=reward_model) if reward_model else None
@@ -486,6 +493,71 @@ class SelfImprovingCoTPipeline:
             return evaluation.model_dump()
+    @retry_on_error()
+    def generate_reasoning_trace_rejection(self, problem: str) -> str:
+        r"""Generate multiple candidate reasoning traces for a problem and
+        select the best one based on evaluation.
+        Args:
+            problem (str): The problem text for generating a reasoning trace.
+        Returns:
+            str: The best candidate trace that meets quality criteria, or the
+                first candidate if none qualify.
+        """
+        few_shot_examples = (
+            f"Examples: {self.few_shot_examples}"
+            if self.few_shot_examples
+            else ""
+        )
+        prompt = self.REASONING_TEMPLATE.format(
+            problem=problem, few_shot_examples=few_shot_examples
+        )
+        responses, candidate_traces = None, []
+        if 'n' in self.reason_agent.model_backend.model_config_dict:
+            self.reason_agent.model_backend.model_config_dict['n'] = (
+                self.rejection_sampling_n
+            )
+            # Generate multiple condidate traces in one call using parameter n
+            responses = self.reason_agent.step(prompt)
+            # Extract cancidate traces
+            candidate_traces = [choice.content for choice in responses.msgs]
+        else:
+            sampling_n = (
+                self.rejection_sampling_n
+                if self.rejection_sampling_n is not None
+                else 1
+            )
+            for _i in range(sampling_n):
+                trace = self.generate_reasoning_trace(problem)
+                candidate_traces.append(trace)
+        best_trace = None
+        best_avg_score = 0.01
+        candidate_avg_scores = []
+        for trace in candidate_traces:
+            eval_results = self.evaluate_trace(problem, trace)
+            # Remove feedback from scores
+            scores = {k: v for k, v in eval_results.items() if k != "feedback"}
+            # Compute average score (assuming at least one score exists)
+            if scores:
+                avg_score = sum(scores.values()) / len(scores)
+            else:
+                avg_score = 0.0
+            candidate_avg_scores.append(avg_score)
+            # If the candidate meets the threshold and is the best, select it
+            if (
+                self._check_score_threshold(scores)
+                and avg_score > best_avg_score
+            ):
+                best_trace = trace
+                best_avg_score = avg_score
+        if best_trace is None:
+            best_trace = candidate_traces[
+                candidate_avg_scores.index(max(candidate_avg_scores))
+            ]
+        return best_trace
     @retry_on_error()
     def improve_trace(
         self,
@@ -602,7 +674,13 @@ class SelfImprovingCoTPipeline:
         problem_text = problem["problem"]
         solution_text = problem.get("solution", "")
-        current_trace = self.generate_reasoning_trace(problem_text)
+        current_trace = None
+        if self.rejection_sampling_n:
+            current_trace = self.generate_reasoning_trace_rejection(
+                problem_text
+            )
+        else:
+            current_trace = self.generate_reasoning_trace(problem_text)
         improvement_history = []
         scores = {}

camel/datagen/self_instruct/filter/instruction_filter.py CHANGED Viewed

@@ -11,14 +11,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Tuple, Union
+from camel.logger import get_logger
 from .filter_function import FilterFunction, RewardModelFilter
 from .filter_registry import FILTER_REGISTRY
+logger = get_logger(__name__)
 class InstructionFilter:
-    def __init__(self, filters_config: Dict[str, Dict[str, Any]]):
+    def __init__(
+        self,
+        filters_config: Dict[str, Dict[str, Any]],
+        stop_on_first_failure: bool = False,
+    ):
         r"""Initialize the InstructionFilter with a dictionary of filter
             configurations.
@@ -37,12 +45,15 @@ class InstructionFilter:
                 Each key in filters_config corresponds to a filter name
                     (registered in FILTER_REGISTRY).
                 Each value is a dict of parameters for that filter.
+            stop_on_first_failure (bool): If True, stops checking filters after
+                the first failure.
         """
         self.filters: List[FilterFunction] = []
         for filter_name, params in filters_config.items():
             if filter_name not in FILTER_REGISTRY:
                 raise ValueError(f"Unknown filter function: {filter_name}")
             self.filters.append(FILTER_REGISTRY[filter_name](params))
+        self.stop_on_first_failure: bool = stop_on_first_failure
     def add_filter(self, filter_function: FilterFunction):
         r"""Add a custom filter function to the InstructionFilter.
@@ -55,7 +66,7 @@ class InstructionFilter:
     def filter(
         self, prompt: str, instruction: str, return_details: bool = False
-    ):
+    ) -> Union[bool, Tuple[bool, List[str]]]:
         r"""Check if the given instruction passes all filter functions.
         Args:
@@ -75,6 +86,11 @@ class InstructionFilter:
                 f.prompt = prompt
             if not f.apply(instruction):
                 failed_filters.append(type(f).__name__)
+                logger.warning(
+                    f"{type(f).__name__} failed instruction: {instruction}"
+                )
+                if self.stop_on_first_failure:
+                    break
         if return_details:
             return len(failed_filters) == 0, failed_filters

camel/datagen/self_instruct/self_instruct.py CHANGED Viewed

@@ -45,6 +45,8 @@ class SelfInstructPipeline:
         filter_config (Optional[Dict[str, Dict[str, Any]]]): configuration
             for the filter functions registered in FILE_REGISTRY.
             (default::obj:`None`)
+        stop_on_first_failure (bool): If True, stops checking filters after
+            the first failure.
     """
     def __init__(
@@ -56,6 +58,7 @@ class SelfInstructPipeline:
         human_to_machine_ratio: tuple = (6, 2),
         instruction_filter: Optional[InstructionFilter] = None,
         filter_config: Optional[Dict[str, Dict[str, Any]]] = None,
+        stop_on_first_failure: bool = False,
     ):
         self.agent = agent
         self.num_machine_instructions = num_machine_instructions
@@ -80,7 +83,9 @@ class SelfInstructPipeline:
             config_to_use = (
                 filter_config if filter_config is not None else default_config
             )
-            self.instruction_filter = InstructionFilter(config_to_use)
+            self.instruction_filter = InstructionFilter(
+                config_to_use, stop_on_first_failure
+            )
     def load_seed(self, path: str):
         r"""Load seed tasks from a file. Defaults to a predefined seed file if
@@ -361,7 +366,7 @@ class SelfInstructPipeline:
         in JSON format.
         """
         with open(self.data_output_path, 'w') as f:
-            json.dump(self.machine_tasks, f, indent=4)
+            json.dump(self.machine_tasks, f, indent=4, ensure_ascii=False)
     def generate(self):
         r"""Execute the entire pipeline to generate machine instructions

camel/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from .base import (
+    BaseDataset,
+    DataPoint,
+    GenerativeDataset,
+    SeedDataset,
+    SyntheticDataset,
+)
+__all__ = [
+    "DataPoint",
+    "BaseDataset",
+    "SeedDataset",
+    "GenerativeDataset",
+    "SyntheticDataset",
+]

camel-ai 0.2.21__py3-none-any.whl → 0.2.23a0__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.21py3-none-any.whl → 0.2.23a0py3-none-any.whl