PyPI - camel-ai - Versions diffs - 0.2.18__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

camel-ai 0.2.18py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (22) hide show

camel/__init__.py +1 -1
camel/agents/multi_hop_generator_agent.py +35 -3
camel/agents/programmed_agent_instruction.py +73 -18
camel/configs/gemini_config.py +1 -1
camel/configs/sglang_config.py +4 -0
camel/datagen/source2synth/__init__.py +31 -0
camel/{synthetic_datagen → datagen}/source2synth/data_processor.py +194 -29
camel/{synthetic_datagen → datagen}/source2synth/models.py +25 -0
camel/{synthetic_datagen → datagen}/source2synth/user_data_processor_config.py +9 -8
camel/embeddings/__init__.py +2 -0
camel/embeddings/jina_embedding.py +156 -0
camel/messages/func_message.py +1 -1
camel/models/deepseek_model.py +29 -11
camel/models/groq_model.py +0 -2
camel/models/openai_model.py +1 -9
camel/toolkits/search_toolkit.py +5 -6
camel/types/enums.py +68 -10
camel/utils/token_counting.py +1 -1
{camel_ai-0.2.18.dist-info → camel_ai-0.2.19.dist-info}/METADATA +5 -2
{camel_ai-0.2.18.dist-info → camel_ai-0.2.19.dist-info}/RECORD +22 -20
{camel_ai-0.2.18.dist-info → camel_ai-0.2.19.dist-info}/LICENSE +0 -0
{camel_ai-0.2.18.dist-info → camel_ai-0.2.19.dist-info}/WHEEL +0 -0

camel/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from camel.logger import disable_logging, enable_logging, set_log_level
-__version__ = '0.2.18'
+__version__ = '0.2.19'
 __all__ = [
     '__version__',

camel/agents/multi_hop_generator_agent.py CHANGED Viewed

@@ -22,17 +22,36 @@ from camel.agents.programmed_agent_instruction import (
     ProgrammedAgentInstructionResult,
     programmable_capability,
 )
-from camel.messages import BaseMessage
-from camel.synthetic_datagen.source2synth.models import (
+from camel.datagen.source2synth.models import (
     ContextPrompt,
     MultiHopQA,
 )
+from camel.messages import BaseMessage
 class MultiHopGeneratorAgent(ProgrammableChatAgent):
+    r"""An agent specialized in generating multi-hop question-answer pairs.
+    This agent is designed to create complex questions that require multiple
+    steps of reasoning to answer. It analyzes context to identify related
+    facts and generates questions that require connecting these facts
+    logically.
+    Attributes:
+        model_config (ConfigDict): Configuration for model behavior.
+        system_message (BaseMessage): System message defining agent's role and
+            instructions.
+    """
     model_config = ConfigDict(arbitrary_types_allowed=True)
-    def __init__(self, **kwargs: Any):
+    def __init__(self, **kwargs: Any) -> None:
+        r"""Initialize the MultiHopGeneratorAgent.
+        Args:
+            **kwargs (Any): Additional keyword arguments to pass to parent
+                class.
+        """
         super().__init__(**kwargs)
         system_text: str = textwrap.dedent(
@@ -64,6 +83,19 @@ class MultiHopGeneratorAgent(ProgrammableChatAgent):
     def generate_multi_hop_qa(
         self, context: str
     ) -> ProgrammedAgentInstructionResult[MultiHopQA]:
+        r"""Generate a multi-hop question-answer pair from given context.
+        Args:
+            context (str): The input text context to generate QA from.
+        Returns:
+            ProgrammedAgentInstructionResult[MultiHopQA]: Result containing the
+                generated question, reasoning steps, answer, and supporting
+                facts.
+        Raises:
+            RuntimeError: If the agent fails to generate a response.
+        """
         context_prompt = ContextPrompt(
             main_context=context, related_contexts=None
         )

camel/agents/programmed_agent_instruction.py CHANGED Viewed

@@ -26,6 +26,16 @@ T = TypeVar('T')
 class ProgrammableAgentRequirement(Enum):
+    r"""Requirements for programmable agent state.
+    Defines the possible requirements that can be used to repair the state
+    of a programmable agent.
+    Attributes:
+        LAST_MESSAGE_NOT_USER (str): Requires that the last message in the
+            conversation was not from the user.
+    """
     LAST_MESSAGE_NOT_USER = "LAST_MESSAGE_NOT_USER"
@@ -34,6 +44,11 @@ class ProgrammedAgentInstructionResult(BaseModel, Generic[T]):
     Contains the messages exchanged during execution and the computed value.
     The value type is specified by the generic type parameter T.
+    Attributes:
+        user_message (BaseMessage): The message sent by the user.
+        agent_message (BaseMessage): The message sent by the agent.
+        value (T): The computed result value of type T.
     """
     user_message: BaseMessage
@@ -48,8 +63,7 @@ class AbstractProgrammableAgent(abc.ABC):
     A programmable agent is an agent that can be programmed to perform a
     specific function or task. This class defines the interface for a
-    programmable
-    agent.
+    programmable agent.
     These methods should be implemented in order to ensure the agent supports
     the necessary guarantees to enable a programming interface while
@@ -68,16 +82,15 @@ class AbstractProgrammableAgent(abc.ABC):
         An atomic operation is an operation that is guaranteed to
         be executed without interruption by any other operation.
-        If the operation fails or times out the agents state should be
-        unchanged.
+        Args:
+            callback (Callable[[], ProgrammedAgentInstructionResult[T]]): The
+                operation to execute atomically.
-        If an operation is already in progress, this method should throw an
-        exception. (It is up to the caller to do any queuing)
+        Returns:
+            ProgrammedAgentInstructionResult[T]: The result of the operation.
-        If the agent is in a state where it can perform the operation,
-        it must leave the agent in a state where it can perform the
-        operation again. Though if state changes in successful operation
-        improve its ability to perform the operation, it should keep them.
+        Raises:
+            RuntimeError: If an operation is already in progress.
         """
         raise NotImplementedError
@@ -86,10 +99,13 @@ class AbstractProgrammableAgent(abc.ABC):
         r"""Repair the state of the agent.
         Agents may have other non-atomic interfaces, such as a user interface,
-        or chat between other agents.
+        or chat between other agents. This method should restore the agent to
+        a state where it can perform operations according to the specified
+        requirement.
-        This method should restore the agent to a state where it can perform
-        operations according to the specified requirement.
+        Args:
+            requirement (ProgrammableAgentRequirement): The requirement to
+                repair the state for.
         """
         raise NotImplementedError
@@ -99,10 +115,16 @@ def programmable_capability(
 ) -> Callable[..., ProgrammedAgentInstructionResult[T]]:
     r"""Decorator for programmable agent capabilities.
-    Wraps a method to ensure it is executed atomically via the agent's
-    run_atomic interface.
-    The decorated method must return a ProgrammedAgentInstructionResult with
-    appropriate type parameter.
+    This decorator ensures that the decorated method is executed atomically
+    and maintains the agent's state guarantees.
+    Args:
+        func (Callable[..., ProgrammedAgentInstructionResult[T]]): The method
+            to decorate.
+    Returns:
+        Callable[..., ProgrammedAgentInstructionResult[T]]: The decorated
+            method that ensures atomic execution.
     """
     @wraps(func)
@@ -120,9 +142,20 @@ class ProgrammableChatAgent(ChatAgent, AbstractProgrammableAgent):
     Provides a default implementation of atomic execution using threading locks
     and basic state tracking for message roles. Implementing classes need to
     provide specific repair logic for their use cases.
+    Attributes:
+        _operation_lock (threading.Lock): Lock for ensuring atomic operations.
+        _last_message_role (Optional[str]): Role of the last message in the
+            conversation.
     """
-    def __init__(self, **kwargs: Any):
+    def __init__(self, **kwargs: Any) -> None:
+        r"""Initialize the ProgrammableChatAgent.
+        Args:
+            **kwargs (Any): Additional keyword arguments to pass to parent
+                class.
+        """
         super().__init__(**kwargs)
         self._operation_lock = threading.Lock()
         self._last_message_role: Optional[str] = None
@@ -130,6 +163,20 @@ class ProgrammableChatAgent(ChatAgent, AbstractProgrammableAgent):
     def run_atomic(
         self, callback: Callable[[], ProgrammedAgentInstructionResult[T]]
     ) -> ProgrammedAgentInstructionResult[T]:
+        r"""Run an atomic operation on the agent.
+        Ensures thread-safe execution of the callback function by using a lock.
+        Args:
+            callback (Callable[[], ProgrammedAgentInstructionResult[T]]): The
+                operation to execute atomically.
+        Returns:
+            ProgrammedAgentInstructionResult[T]: The result of the operation.
+        Raises:
+            RuntimeError: If an operation is already in progress.
+        """
         if not self._operation_lock.acquire(blocking=False):
             raise RuntimeError("Operation already in progress")
@@ -141,6 +188,14 @@ class ProgrammableChatAgent(ChatAgent, AbstractProgrammableAgent):
             self._operation_lock.release()
     def repair_state(self, requirement: ProgrammableAgentRequirement) -> None:
+        r"""Repair the state of the agent.
+        Implements basic state repair for message role requirements.
+        Args:
+            requirement (ProgrammableAgentRequirement): The requirement to
+                repair the state for.
+        """
         if requirement == ProgrammableAgentRequirement.LAST_MESSAGE_NOT_USER:
             if self._last_message_role == "user":
                 raise NotImplementedError(

camel/configs/gemini_config.py CHANGED Viewed

@@ -83,7 +83,7 @@ class GeminiConfig(BaseConfig):
     stop: Union[str, Sequence[str], NotGiven] = NOT_GIVEN
     max_tokens: Union[int, NotGiven] = NOT_GIVEN
     response_format: Union[Type[BaseModel], dict, NotGiven] = NOT_GIVEN
-    tool_choice: Optional[Union[dict[str, str], str]] = None
+    tool_choice: Optional[Union[dict[str, str], str, NotGiven]] = NOT_GIVEN
     def as_dict(self) -> dict[str, Any]:
         r"""Convert the current configuration to a dictionary.

camel/configs/sglang_config.py CHANGED Viewed

@@ -56,6 +56,10 @@ class SGLangConfig(BaseConfig):
             in the chat completion. The total length of input tokens and
             generated tokens is limited by the model's context length.
             (default: :obj:`None`)
+        tools (list[FunctionTool], optional): A list of tools the model may
+            call. Currently, only functions are supported as a tool. Use this
+            to provide a list of functions the model may generate JSON inputs
+            for. A max of 128 functions are supported.
     """
     stop: Union[str, Sequence[str], NotGiven] = NOT_GIVEN

camel/datagen/source2synth/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from .data_processor import (
+    DataCurator,
+    ExampleConstructor,
+    UserDataProcessor,
+)
+from .models import MultiHopQA, ReasoningStep
+from .user_data_processor_config import (
+    ProcessorConfig,
+)
+__all__ = [
+    "DataCurator",
+    "ExampleConstructor",
+    "ProcessorConfig",
+    "UserDataProcessor",
+    "ReasoningStep",
+    "MultiHopQA",
+]

camel/{synthetic_datagen → datagen}/source2synth/data_processor.py RENAMED Viewed

@@ -15,33 +15,61 @@
 import random
 from typing import Any, Dict, List, Optional, Sequence
-import numpy as np
 from tqdm import tqdm
 from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
-from camel.logger import get_logger
-from camel.synthetic_datagen.source2synth.user_data_processor_config import (
+from camel.datagen.source2synth.user_data_processor_config import (
     ProcessorConfig,
 )
+from camel.logger import get_logger
 logger = get_logger(__name__)
 class UserDataProcessor:
-    r"""User Data Processor."""
+    r"""A processor for generating multi-hop question-answer pairs from user
+    data.
+    This class handles the processing of text data to generate multi-hop
+    question-answer pairs using either an AI model or rule-based approaches.
+    It manages the entire pipeline from text preprocessing to dataset curation.
+    Attributes:
+        config (ProcessorConfig): Configuration for data processing parameters.
+        rng (random.Random): Random number generator for reproducibility.
+        multi_hop_agent (Optional[MultiHopGeneratorAgent]): Agent for
+            generating QA pairs.
+    """
     def __init__(self, config: Optional[ProcessorConfig] = None):
+        r"""Initialize the UserDataProcessor.
+        Args:
+            config (Optional[ProcessorConfig], optional): Configuration for
+                data processing. (default: :obj:`None`)
+        """
         self.config = config or ProcessorConfig()
-        random.seed(self.config.seed)
-        np.random.seed(self.config.seed)
+        self.rng = random.Random(self.config.seed)
         self.multi_hop_agent = (
-            MultiHopGeneratorAgent() if self.config.use_ai_model else None
+            self.config.hop_generating_agent
+            if self.config.use_ai_model
+            else None
         )
     def process_text(
         self, text: str, source: str = "user_input"
     ) -> List[Dict[str, Any]]:
-        r"""Process a single text."""
+        r"""Process a single text to generate multi-hop QA pairs.
+        Args:
+            text (str): The input text to process.
+            source (str, optional): Source identifier for the text.
+                (default: :obj:`"user_input"`)
+        Returns:
+            List[Dict[str, Any]]: List of processed examples with QA pairs and
+                metadata.
+        """
         # Convert text to standard format
         raw_data = [
             {
@@ -55,7 +83,7 @@ class UserDataProcessor:
         examples = constructor.construct_examples(raw_data)
         # Manage data
-        curator = DataCurator(self.config)
+        curator = DataCurator(self.config, self.rng)
         final_dataset = curator.curate_dataset(examples)
         return final_dataset
@@ -63,7 +91,20 @@ class UserDataProcessor:
     def process_batch(
         self, texts: List[str], sources: Optional[List[str]] = None
     ) -> List[Dict[str, Any]]:
-        r"""Process multiple texts in batch."""
+        r"""Process multiple texts in batch to generate multi-hop QA pairs.
+        Args:
+            texts (List[str]): List of input texts to process.
+            sources (Optional[List[str]], optional): List of source
+                identifiers. (default: :obj:`None`)
+        Returns:
+            List[Dict[str, Any]]: List of processed examples with QA pairs and
+                metadata.
+        Raises:
+            ValueError: If length of sources doesn't match length of texts.
+        """
         if sources is None:
             sources = ["user_input"] * len(texts)
         elif len(sources) != len(texts):
@@ -82,27 +123,52 @@ class UserDataProcessor:
         examples = constructor.construct_examples(raw_data)
         # Manage data
-        curator = DataCurator(self.config)
+        curator = DataCurator(self.config, self.rng)
         final_dataset = curator.curate_dataset(examples)
         return final_dataset
 class ExampleConstructor:
-    r"""Example Constructor."""
+    r"""Constructs training examples from raw text data.
+    This class handles the construction of training examples by preprocessing
+    text, extracting information pairs, and generating question-answer pairs.
+    Attributes:
+        config (ProcessorConfig): Configuration for example construction.
+        multi_hop_agent (Optional[MultiHopGeneratorAgent]): Agent for QA
+            generation.
+    """
     def __init__(
         self,
         config: ProcessorConfig,
         multi_hop_agent: Optional[MultiHopGeneratorAgent] = None,
     ):
+        r"""Initialize the ExampleConstructor.
+        Args:
+            config (ProcessorConfig): Configuration for example construction.
+            multi_hop_agent (Optional[MultiHopGeneratorAgent], optional):
+                Agent for generating multi-hop QA pairs. (default: :obj:`None`)
+        """
         self.config = config
         self.multi_hop_agent = multi_hop_agent
     def construct_examples(
         self, raw_data: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        r"""Construct training examples."""
+        r"""Construct training examples from raw data.
+        Args:
+            raw_data (List[Dict[str, Any]]): List of raw data dictionaries
+                containing text and metadata.
+        Returns:
+            List[Dict[str, Any]]: List of constructed examples with QA pairs
+                and metadata.
+        """
         logger.info("Starting to construct training examples...")
         examples = []
@@ -135,7 +201,15 @@ class ExampleConstructor:
         return examples
     def _preprocess_text(self, text: str) -> str:
-        r"""Text preprocessing."""
+        r"""Preprocess input text for example construction.
+        Args:
+            text (str): Input text to preprocess.
+        Returns:
+            str: Preprocessed text, or empty string if text fails quality
+                checks.
+        """
         if not isinstance(text, str):
             return ''
@@ -156,7 +230,14 @@ class ExampleConstructor:
         return text
     def _check_text_quality(self, text: str) -> bool:
-        r"""Check text quality."""
+        r"""Check the quality of input text.
+        Args:
+            text (str): Text to check quality for.
+        Returns:
+            bool: True if text passes quality checks, False otherwise.
+        """
         # 1. Basic quality check
         if text.count('.') < 2:  # Must have at least 2 sentences
             return False
@@ -171,7 +252,15 @@ class ExampleConstructor:
         return True
     def _extract_info_pairs(self, text: str) -> List[Dict[str, Sequence[str]]]:
-        r"""Extract information pairs and relationships."""
+        r"""Extract information pairs and relationships from text.
+        Args:
+            text (str): Input text to extract information from.
+        Returns:
+            List[Dict[str, Sequence[str]]]: List of dictionaries containing
+                premise, intermediate, conclusion, and related contexts.
+        """
         # Split into sentences
         sentences = [s.strip() for s in text.split('.') if s.strip()]
         info_pairs = []
@@ -200,7 +289,15 @@ class ExampleConstructor:
     def _generate_qa_pairs(
         self, info_pairs: List[Dict[str, Sequence[str]]]
     ) -> List[Dict[str, str]]:
-        r"""Generate multi-hop question-answer pairs."""
+        r"""Generate multi-hop question-answer pairs from information pairs.
+        Args:
+            info_pairs (List[Dict[str, Sequence[str]]]): List of information
+                pairs extracted from text.
+        Returns:
+            List[Dict[str, str]]: List of generated QA pairs.
+        """
         qa_pairs = []
         for pair in info_pairs:
@@ -219,7 +316,15 @@ class ExampleConstructor:
         return qa_pairs
     def _calculate_complexity(self, qa_pairs: List[Dict[str, Any]]) -> float:
-        r"""Calculate complexity of QA pairs."""
+        r"""Calculate the complexity score for a set of QA pairs.
+        Args:
+            qa_pairs (List[Dict[str, Any]]): List of QA pairs to calculate
+                complexity for.
+        Returns:
+            float: Complexity score between 0.0 and 1.0.
+        """
         if not qa_pairs:
             return 0.0
@@ -233,10 +338,10 @@ class ExampleConstructor:
             supporting_facts_count = len(qa.get('supporting_facts', []))
             # 3. Question length
-            question_length = len(qa['question'].split())
+            question_length = len(qa.get('question', '').split())
             # 4. Answer length
-            answer_length = len(qa['answer'].split())
+            answer_length = len(qa.get('answer', '').split())
             # Calculate complexity of a single QA pair
             qa_complexity = (
@@ -256,15 +361,37 @@ class ExampleConstructor:
 class DataCurator:
-    r"""Data Manager."""
+    r"""Manages and curates datasets of multi-hop question-answer pairs.
+    This class handles dataset management tasks including quality filtering,
+    complexity filtering, deduplication, and dataset sampling.
-    def __init__(self, config: ProcessorConfig):
+    Attributes:
+        config (ProcessorConfig): Configuration for data curation parameters.
+        rng (random.Random): Random number generator for reproducible sampling.
+    """
+    def __init__(self, config: ProcessorConfig, rng: random.Random):
+        r"""Initialize the DataCurator.
+        Args:
+            config (ProcessorConfig): Configuration for data curation.
+            rng (random.Random): Random number generator for reproducibility.
+        """
         self.config = config
+        self.rng = rng
     def curate_dataset(
         self, examples: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        r"""Dataset management."""
+        r"""Manage and curate a dataset through multiple filtering stages.
+        Args:
+            examples (List[Dict[str, Any]]): List of examples to curate.
+        Returns:
+            List[Dict[str, Any]]: Curated dataset meeting quality criteria.
+        """
         logger.info("Starting dataset management...")
         # 1. Quality filtering
@@ -296,7 +423,14 @@ class DataCurator:
     def _quality_filter(
         self, examples: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        r"""Quality filtering."""
+        r"""Filter examples based on quality criteria.
+        Args:
+            examples (List[Dict[str, Any]]): List of examples to filter.
+        Returns:
+            List[Dict[str, Any]]: Examples that pass quality checks.
+        """
         filtered = []
         for example in examples:
@@ -314,7 +448,14 @@ class DataCurator:
         return filtered
     def _check_qa_quality(self, qa_pairs: List[Dict[str, str]]) -> bool:
-        r"""Check quality of QA pairs."""
+        r"""Check the quality of question-answer pairs.
+        Args:
+            qa_pairs (List[Dict[str, str]]): List of QA pairs to check.
+        Returns:
+            bool: True if QA pairs meet quality criteria, False otherwise.
+        """
         if not qa_pairs:
             return False
@@ -335,7 +476,17 @@ class DataCurator:
     def _complexity_filter(
         self, examples: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        r"""Complexity filtering."""
+        """
+        Filter examples based on complexity threshold.
+        Removes examples with complexity scores below the configured threshold.
+        Args:
+            examples (List[Dict[str, Any]]): List of examples to filter.
+        Returns:
+            List[Dict[str, Any]]: Examples meeting complexity threshold.
+        """
         return [
             example
             for example in examples
@@ -346,7 +497,14 @@ class DataCurator:
     def _remove_duplicates(
         self, examples: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        r"""Remove duplicates."""
+        r"""Remove duplicate examples from the dataset.
+        Args:
+            examples (List[Dict[str, Any]]): List of examples to deduplicate.
+        Returns:
+            List[Dict[str, Any]]: Deduplicated examples.
+        """
         seen = set()
         unique_examples = []
@@ -366,8 +524,15 @@ class DataCurator:
     def _sample_dataset(
         self, examples: List[Dict[str, Any]]
     ) -> List[Dict[str, Any]]:
-        r"""Sample to target dataset size."""
+        r"""Sample examples to match target dataset size.
+        Args:
+            examples (List[Dict[str, Any]]): List of examples to sample from.
+        Returns:
+            List[Dict[str, Any]]: Sampled dataset of target size or smaller.
+        """
         if len(examples) <= self.config.dataset_size:
             return examples
-        return random.sample(examples, self.config.dataset_size)
+        return self.rng.sample(examples, self.config.dataset_size)

camel-ai 0.2.18__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.18py3-none-any.whl → 0.2.19py3-none-any.whl