PyPI - camel-ai - Versions diffs - 0.2.25__py3-none-any.whl → 0.2.26__py3-none-any.whl - Mend

camel-ai 0.2.25py3-none-any.whl → 0.2.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (20) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +4 -4
camel/agents/knowledge_graph_agent.py +15 -3
camel/configs/anthropic_config.py +0 -1
camel/datasets/base.py +219 -17
camel/environments/base.py +16 -8
camel/extractors/__init__.py +2 -2
camel/extractors/base.py +86 -64
camel/extractors/python_strategies.py +226 -0
camel/models/anthropic_model.py +19 -55
camel/py.typed +0 -0
camel/storages/graph_storages/graph_element.py +3 -1
camel/storages/graph_storages/neo4j_graph.py +78 -4
camel/toolkits/__init__.py +2 -0
camel/toolkits/pubmed_toolkit.py +346 -0
camel/toolkits/terminal_toolkit.py +2 -2
{camel_ai-0.2.25.dist-info → camel_ai-0.2.26.dist-info}/METADATA +2 -1
{camel_ai-0.2.25.dist-info → camel_ai-0.2.26.dist-info}/RECORD +20 -17
{camel_ai-0.2.25.dist-info → camel_ai-0.2.26.dist-info}/WHEEL +0 -0
{camel_ai-0.2.25.dist-info → camel_ai-0.2.26.dist-info}/licenses/LICENSE +0 -0

camel/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@
 from camel.logger import disable_logging, enable_logging, set_log_level
-__version__ = '0.2.25'
+__version__ = '0.2.26'
 __all__ = [
     '__version__',

camel/agents/chat_agent.py CHANGED Viewed

@@ -699,12 +699,12 @@ class ChatAgent(BaseAgent):
         if not response and self.model_backend.num_models > 1:
             raise ModelProcessingError(
                 "Unable to process messages: none of the provided models "
-                "run succesfully."
+                "run successfully."
             )
         elif not response:
             raise ModelProcessingError(
                 f"Unable to process messages: the only provided model "
-                f"did not run succesfully. Error: {error_info}"
+                f"did not run successfully. Error: {error_info}"
             )
         logger.info(
@@ -744,12 +744,12 @@ class ChatAgent(BaseAgent):
         if not response and self.model_backend.num_models > 1:
             raise ModelProcessingError(
                 "Unable to process messages: none of the provided models "
-                "run succesfully."
+                "run successfully."
             )
         elif not response:
             raise ModelProcessingError(
                 f"Unable to process messages: the only provided model "
-                f"did not run succesfully. Error: {error_info}"
+                f"did not run successfully. Error: {error_info}"
             )
         logger.info(

camel/agents/knowledge_graph_agent.py CHANGED Viewed

@@ -226,7 +226,8 @@ class KnowledgeGraphAgent(ChatAgent):
         node_pattern = r"Node\(id='(.*?)', type='(.*?)'\)"
         rel_pattern = (
             r"Relationship\(subj=Node\(id='(.*?)', type='(.*?)'\), "
-            r"obj=Node\(id='(.*?)', type='(.*?)'\), type='(.*?)'\)"
+            r"obj=Node\(id='(.*?)', type='(.*?)'\), "
+            r"type='(.*?)'(?:, timestamp='(.*?)')?\)"
         )
         nodes = {}
@@ -243,13 +244,24 @@ class KnowledgeGraphAgent(ChatAgent):
         # Extract relationships
         for match in re.finditer(rel_pattern, input_string):
-            subj_id, subj_type, obj_id, obj_type, rel_type = match.groups()
+            groups = match.groups()
+            if len(groups) == 6:
+                subj_id, subj_type, obj_id, obj_type, rel_type, timestamp = (
+                    groups
+                )
+            else:
+                subj_id, subj_type, obj_id, obj_type, rel_type = groups
+                timestamp = None
             properties = {'source': 'agent_created'}
             if subj_id in nodes and obj_id in nodes:
                 subj = nodes[subj_id]
                 obj = nodes[obj_id]
                 relationship = Relationship(
-                    subj=subj, obj=obj, type=rel_type, properties=properties
+                    subj=subj,
+                    obj=obj,
+                    type=rel_type,
+                    timestamp=timestamp,
+                    properties=properties,
                 )
                 if self._validate_relationship(relationship):
                     relationships.append(relationship)

camel/configs/anthropic_config.py CHANGED Viewed

@@ -70,7 +70,6 @@ class AnthropicConfig(BaseConfig):
     stop_sequences: ClassVar[Union[List[str], NotGiven]] = []
     temperature: float = 1
     top_p: Union[float, NotGiven] = 0.7
-    top_k: Union[int, NotGiven] = 5
     stream: bool = False
     metadata: Union[dict, NotGiven] = NotGiven()
     thinking: Union[dict, NotGiven] = NotGiven()

camel/datasets/base.py CHANGED Viewed

@@ -12,14 +12,17 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import json
 import os
 import random
+from pathlib import Path
 from typing import (
     Any,
     Callable,
     Dict,
     List,
     Optional,
+    Sized,
     TypeVar,
     Union,
 )
@@ -326,42 +329,241 @@ class SeedDataset(BaseDataset):
     r"""A dataset containing validated seed examples for data generation.
     Ensures that all items adhere to the DataPoint schema.
-    This class is used to initialize a dataset from a list of dictionary items,
-    validating each against the DataPoint schema.
+    This class can initialize from Hugging Face Datasets,
+    PyTorch Datasets, JSON file paths, or lists of dictionaries,
+    converting them into a consistent internal format.
     """
     def __init__(
         self,
-        data: List[Dict[str, str]],
+        data: Union[HFDataset, Dataset, Path, List[Dict[str, Any]]],
         cache_dir: Optional[str] = None,
+        seed: Optional[int] = None,
         min_samples: int = 1,
+        strict: bool = False,
         **kwargs,
     ):
-        r"""Initialize the seed dataset.
+        r"""Initialize the seed dataset and validate integrity.
         Args:
-            data (List[Dict[str, str]]): List of dictionary items to create the
-                dataset from.
-            cache_dir (Optional[str]): Directory to cache dataset files.
-                (default: :obj:`None`)
+            data (Union[HFDataset, Dataset, Path, List[Dict[str, Any]]]):
+            Input data, which can be:
+                - A Hugging Face Dataset (HFDataset)
+                - A PyTorch Dataset (torch.utils.data.Dataset)
+                - A Path object representing the path to a JSON file
+                - A list of dictionaries with DataPoint-compatible fields
+            seed (Optional[int]): Seed for reproducibility.
+                (default: :obj:`1`)
             min_samples (int): Minimum number of samples required.
                 (default: :obj:`1`)
+            strict (bool): Whether to raise an error on invalid datapoints
+                (True) or skip/filter them (False). (default: False)
             **kwargs: Additional dataset parameters.
         Raises:
-            ValueError: If dataset size is less than min_samples or if sample
-                validation fails.
+            TypeError: If the data type is not supported.
+            ValueError: If dataset size is less than min_samples or
+            if sample validation fails.
+            FileNotFoundError: If the JSON file path doesn't exist.
+            json.JSONDecodeError: If the JSON file is invalid.
         """
-        if len(data) < min_samples:
+        # Initialize BaseDataset with empty data, we'll populate it ourselves
+        super().__init__(data=[], cache_dir=cache_dir, **kwargs)
+        self._rng = random.Random(seed)
+        self._strict = strict
+        # Type checking and conversion into list of dicts to have a
+        # consistent internal format. Since Seed Dataset should be
+        # small, we can load it entirely into memory
+        self.data: List[DataPoint] = self._init_data(data)
+        self._length = len(self.data)
+        if self._length < min_samples:
             raise ValueError(
-                f"Seed dataset must contain at least {min_samples} samples."
+                "The dataset does not contain enough samples. "
+                f"Need {max(0, min_samples)}, got {self._length}"
             )
-        super().__init__(
-            data=data,
-            cache_dir=cache_dir,
-            **kwargs,
-        )
+    def _init_data(
+        self, data: Union[HFDataset, Dataset, Path, List[Dict[str, Any]]]
+    ) -> List[DataPoint]:
+        if isinstance(data, HFDataset):
+            raw_data = self._init_from_hf_dataset(data)
+        elif isinstance(data, Dataset):
+            raw_data = self._init_from_pytorch_dataset(data)
+        elif isinstance(data, Path):
+            raw_data = self._init_from_json_path(data)
+        elif isinstance(data, list):
+            raw_data = self._init_from_list(data)
+        else:
+            raise TypeError("Unsupported data type")
+        def create_datapoint(
+            item: Dict[str, Any], idx: int
+        ) -> Optional[DataPoint]:
+            # Add type checks for required fields to make mypy happy
+            question = item.get('question')
+            if not isinstance(question, str):
+                if self._strict:
+                    raise ValueError(
+                        f"Sample at index {idx} has invalid 'question': "
+                        f"expected str, got {type(question)}"
+                    )
+                else:
+                    logger.warning(
+                        f"Skipping sample at index {idx}: invalid 'question'"
+                    )
+                    return None
+            rationale = item.get('rationale')
+            if not isinstance(rationale, str):
+                if self._strict:
+                    raise ValueError(
+                        f"Sample at index {idx} has invalid 'rationale': "
+                        f"expected str, got {type(rationale)}"
+                    )
+                else:
+                    logger.warning(
+                        f"Skipping sample at index {idx}: invalid 'rationale'"
+                    )
+                    return None
+            final_answer = item.get('final_answer')
+            if not isinstance(final_answer, str):
+                if self._strict:
+                    raise ValueError(
+                        f"Sample at index {idx} has invalid 'final_answer': "
+                        f"expected str, got {type(final_answer)}"
+                    )
+                else:
+                    logger.warning(
+                        f"Skipping sample at index {idx}: "
+                        "invalid 'final_answer'"
+                    )
+                    return None
+            try:
+                return DataPoint(
+                    question=question,
+                    rationale=rationale,
+                    final_answer=final_answer,
+                    metadata=item.get('metadata'),
+                    difficulty=item.get('difficulty'),
+                )
+            except ValidationError as e:
+                if self._strict:
+                    raise ValueError(
+                        f"Sample at index {idx} validation error: {e}"
+                    )
+                else:
+                    logger.warning(
+                        f"Skipping invalid sample at index {idx} "
+                        f"due to validation error: {e}"
+                    )
+                    return None
+        unfiltered_data = [
+            create_datapoint(item, i) for i, item in enumerate(raw_data)
+        ]
+        return [dp for dp in unfiltered_data if dp is not None]
+    def __len__(self) -> int:
+        r"""Return the size of the dataset."""
+        return self._length
+    def __getitem__(self, idx: int) -> DataPoint:
+        r"""Get an item from the dataset.
+        Args:
+            idx (int): Index of the item to get.
+        Returns:
+            DataPoint: DataPoint from the dataset with the given index.
+        Raises:
+            IndexError: If idx is out of bounds.
+        """
+        if idx < 0 or idx >= self._length:
+            raise IndexError(
+                f"Index {idx} out of bounds for dataset of size {self._length}"
+            )
+        return self.data[idx]
+    def sample(self) -> DataPoint:
+        r"""Sample a random datapoint from the dataset.
+        Returns:
+            DataPoint: A randomly sampled DataPoint.
+        Raises:
+            RuntimeError: If the dataset is empty.
+        """
+        if self._length == 0:
+            raise RuntimeError("Dataset is empty, cannot sample.")
+        idx = self._rng.randint(0, self._length - 1)
+        return self[idx]
+    @property
+    def metadata(self) -> Dict[str, Any]:
+        r"""Get dataset metadata."""
+        return self._metadata.copy()
+    def _init_from_hf_dataset(self, data: HFDataset) -> List[Dict[str, Any]]:
+        return [dict(item) for item in data]
+    def _init_from_pytorch_dataset(
+        self, data: Dataset
+    ) -> List[Dict[str, Any]]:
+        if not isinstance(data, Sized):
+            raise TypeError(
+                f"{type(data).__name__} does not implement `__len__()`."
+            )
+        raw_data = []
+        for i in range(len(data)):
+            item = data[i]
+            if not isinstance(item, dict):
+                raise TypeError(
+                    f"Item at index {i} is not a dict: "
+                    f"got {type(item).__name__}"
+                )
+            raw_data.append(dict(item))
+        return raw_data
+    def _init_from_json_path(self, data: Path) -> List[Dict[str, Any]]:
+        if not data.exists():
+            raise FileNotFoundError(f"JSON file not found: {data}")
+        try:
+            logger.debug(f"Loading JSON from {data}")
+            with data.open('r', encoding='utf-8') as f:
+                loaded_data = json.load(f)
+            logger.info(
+                f"Successfully loaded {len(loaded_data)} items from {data}"
+            )
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON in file {data}: {e}")
+        if not isinstance(loaded_data, list):
+            raise ValueError("JSON file must contain a list of dictionaries")
+        for i, item in enumerate(loaded_data):
+            if not isinstance(item, dict):
+                raise ValueError(
+                    f"Expected a dictionary at index {i}, "
+                    f"got {type(item).__name__}"
+                )
+        return loaded_data
+    def _init_from_list(
+        self, data: List[Dict[str, Any]]
+    ) -> List[Dict[str, Any]]:
+        for i, item in enumerate(data):
+            if not isinstance(item, dict):
+                raise ValueError(
+                    f"Expected a dictionary at index {i}, "
+                    f"got {type(item).__name__}"
+                )
+        return data
 class SyntheticDataset(BaseDataset):

camel/environments/base.py CHANGED Viewed

@@ -151,20 +151,26 @@ class BaseEnvironment(ABC):
         r"""Initialize the environment.
         Args:
-            dataset: Dataset to sample questions from.
-            verifier: Verifier to check responses.
-            extractor: Extractor to process LLM responses.
-            max_steps: Maximum steps per episode.
-            teacher_agent: Optional agent for reward shaping and hints
-            curriculum_config: Configuration for curriculum learning including:
+            dataset (BaseDataset): Dataset to sample questions from.
+            verifier (BaseVerifier): Verifier to check responses.
+            extractor (BaseExtractor): Extractor to process LLM responses.
+            max_steps (Optional[int]): Maximum steps per episode. (default:
+            :obj:`None`)
+            teacher_agent (Optional[ChatAgent]): Optional agent for reward
+                shaping and hints. (default: :obj:`None`)
+            curriculum_config (Optional[Dict[str, Any]]): Configuration for
+                curriculum learning including:
                 - difficulty_levels: List of available difficulty levels
                 - promotion_threshold: Score needed to advance
                 - demotion_threshold: Score triggering level decrease
                 - min_questions_per_level: Questions before promotion
-            practice_env_config: Configuration for practice environments:
+                (default: :obj:`None`)
+            practice_env_config (Optional[Dict[str, Any]]): Configuration for
+                practice environments:
                 - max_practice_envs: Maximum concurrent environments
                 - difficulty_range: Allowed difficulty variation
                 - focus_areas: Specific skills to practice
+                (default: :obj:`None`)
             **kwargs: Additional environment parameters.
         """
         self.dataset = dataset
@@ -289,7 +295,9 @@ class BaseEnvironment(ABC):
         # extract verifiable part from llm response
         extraction_result = await self.extractor.extract(action.llm_response)
-        # TODO: extract executable llm response specifically
+        # Ensure extraction_result is a string
+        if extraction_result is None:
+            extraction_result = ""
         # verify the extracted
         verification_result = await self.verifier.verify(

camel/extractors/__init__.py CHANGED Viewed

@@ -11,6 +11,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-from .base import BaseExtractor
+from .base import BaseExtractor, BaseExtractorStrategy
-__all__ = ["BaseExtractor"]
+__all__ = ["BaseExtractor", "BaseExtractorStrategy"]

camel/extractors/base.py CHANGED Viewed

@@ -12,11 +12,10 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import asyncio
 from abc import ABC, abstractmethod
 from types import TracebackType
-from typing import Any, Dict, Optional, Type
-from typing_extensions import Self
+from typing import Any, Dict, List, Optional, Type
 from camel.logger import get_logger
 from camel.utils import BatchProcessor
@@ -24,16 +23,36 @@ from camel.utils import BatchProcessor
 logger = get_logger(__name__)
-class BaseExtractor(ABC):
-    r"""Base class for all response extractors.
+class BaseExtractorStrategy(ABC):
+    r"""Abstract base class for extraction strategies."""
+    @abstractmethod
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Asynchronously extracts relevant parts from text.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Extracted str if successful, otherwise None.
+        """
+        pass
+class BaseExtractor:
+    r"""Base class for response extractors with a fixed strategy pipeline.
-    An extractor takes the response and extracts the relevant parts,
-    converting them into a format that the verifier can handle.
-    Implements async context manager protocol for proper resource management.
+    This extractor:
+    - Uses a **fixed multi-stage pipeline** of extraction strategies.
+    - Tries **each strategy in order** within a stage until one succeeds.
+    - Feeds the **output of one stage into the next** for processing.
+    - Supports **async execution** for efficient processing.
+    - Provides **batch processing and resource monitoring** options.
     """
     def __init__(
         self,
+        pipeline: List[List[BaseExtractorStrategy]],
         cache_templates: bool = True,
         max_cache_size: int = 1000,
         extraction_timeout: float = 30.0,
@@ -43,9 +62,12 @@ class BaseExtractor(ABC):
         memory_threshold: float = 85.0,
         **kwargs,
     ):
-        r"""Initialize the extractor.
+        r"""Initialize the extractor with a multi-stage strategy pipeline.
         Args:
+            pipeline (List[List[BaseExtractorStrategy]]):
+                A fixed list of lists where each list represents a stage
+                containing extractor strategies executed in order.
             cache_templates (bool): Whether to cache extraction templates.
                 (default: :obj:`True`)
             max_cache_size (int): Maximum number of templates to cache.
@@ -61,11 +83,8 @@ class BaseExtractor(ABC):
             memory_threshold (float): Memory usage percentage threshold for
                 scaling down. (default: :obj:`85.0`)
             **kwargs: Additional extractor parameters.
-        Raises:
-            ValueError: If invalid parameter values are provided
         """
-        # Store all parameters in metadata dict for compatibility
         self._metadata = {
             'cache_templates': cache_templates,
             'max_cache_size': max_cache_size,
@@ -81,14 +100,7 @@ class BaseExtractor(ABC):
         self._cache: Dict[str, Any] = {}
         self._batch_processor: Optional[BatchProcessor] = None
-        # Store configuration parameters
-        self._cache_templates = cache_templates
-        self._max_cache_size = max_cache_size
-        self._extraction_timeout = extraction_timeout
-        self._batch_size = batch_size
-        self._monitoring_interval = monitoring_interval
-        self._cpu_threshold = cpu_threshold
-        self._memory_threshold = memory_threshold
+        self._pipeline = pipeline
     async def setup(self) -> None:
         r"""Set up the extractor with necessary resources.
@@ -106,17 +118,15 @@ class BaseExtractor(ABC):
             return
         try:
-            # Initialize template cache if enabled
-            if self._cache_templates:
+            if self._metadata["cache_templates"]:
                 self._template_cache: Dict[str, Any] = {}
-            # Set up batch processing if needed
-            if self._batch_size > 1:
+            if self._metadata["batch_size"] > 1:
                 self._batch_processor = BatchProcessor(
-                    initial_batch_size=self._batch_size,
-                    monitoring_interval=self._monitoring_interval,
-                    cpu_threshold=self._cpu_threshold,
-                    memory_threshold=self._memory_threshold,
+                    initial_batch_size=self._metadata["batch_size"],
+                    monitoring_interval=self._metadata["monitoring_interval"],
+                    cpu_threshold=self._metadata["cpu_threshold"],
+                    memory_threshold=self._metadata["memory_threshold"],
                 )
             self._is_setup = True
@@ -171,13 +181,6 @@ class BaseExtractor(ABC):
                     )
             # Preserve init config in metadata
-            self._metadata = {
-                'cache_templates': self._cache_templates,
-                'max_cache_size': self._max_cache_size,
-                'extraction_timeout': self._extraction_timeout,
-                'batch_size': self._batch_size,
-            }
             if not errors:
                 logger.info(
                     f"{self.__class__.__name__} cleaned up successfully"
@@ -187,23 +190,19 @@ class BaseExtractor(ABC):
             errors.append(f"Unexpected error during cleanup: {e}")
         finally:
-            # Always mark as uninitialized, even if cleanup fails
             self._is_setup = False
             self._batch_processor = None
         if errors:
-            error_msg = (
-                f"Errors during {self.__class__.__name__} cleanup: "
-                f"{'; '.join(errors)}"
-            )
+            error_msg = f"Errors during cleanup: {'; '.join(errors)}"
             logger.error(error_msg)
             raise RuntimeError(error_msg)
-    async def __aenter__(self) -> Self:
+    async def __aenter__(self) -> "BaseExtractor":
         r"""Async context manager entry.
         Returns:
-            Self reference for context manager usage.
+            BaseExtractor: The initialized extractor instance.
         """
         await self.setup()
         return self
@@ -226,38 +225,61 @@ class BaseExtractor(ABC):
         """
         await self.cleanup()
-    @abstractmethod
-    async def extract(
-        self, response: str, context: Optional[Dict[str, Any]] = None
-    ) -> str:
-        r"""Extract relevant parts from a response.
-        Extracts:
-        1. Final answer or output
-        2. Chain of thought reasoning steps
-        3. Difficulty assessment
+    async def extract(self, response: str) -> Optional[str]:
+        r"""Extracts a normalized, comparable part of the LLM response
+        using the fixed multi-stage strategy pipeline.
         Args:
-            response (str): Raw response from agent generation.
-            context (Optional[Dict[str, Any]]): Optional context for
-            extraction like:
-                - final_answer
-                - rationale
-                - complexity
+            response (str): The raw response text.
         Returns:
-            str: Extracted content string.
+            Optional[str]: Extracted data if successful, otherwise None.
         Raises:
             ValueError: If response is empty or invalid.
-            NotImplementedError: If no implementation is provided.
             RuntimeError: If extractor is not initialized.
         """
         if not self._is_setup:
             raise RuntimeError(
-                f"{self.__class__.__name__} must be initialized "
-                "before extraction"
+                "Extractor must be initialized before extraction"
             )
         if not response or not response.strip():
             raise ValueError("Empty or whitespace-only response")
-        raise NotImplementedError("Subclasses must implement extract()")
+        current_input = response  # Initial input
+        for stage in self._pipeline:
+            stage_success = (
+                False  # Track if any strategy in the stage succeeds
+            )
+            for strategy in stage:
+                try:
+                    # Apply the extraction timeout
+                    result = await asyncio.wait_for(
+                        strategy.extract(current_input),
+                        timeout=self._metadata["extraction_timeout"],
+                    )
+                    if result is not None:
+                        current_input = result  # Feed into next stage
+                        stage_success = True
+                        break  # Move to next stage if valid extraction occurs
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        f"Strategy {strategy.__class__.__name__} timed out "
+                        f"after {self._metadata['extraction_timeout']} seconds"
+                    )
+                except Exception as e:
+                    logger.warning(
+                        f"Strategy {strategy.__class__.__name__} failed: {e}"
+                    )
+            if not stage_success:
+                logger.debug(
+                    "No strategy in stage succeeded, stopping extraction."
+                )
+                return None  # Stop processing if the stage fails
+        return current_input  # Final processed output

camel-ai 0.2.25__py3-none-any.whl → 0.2.26__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.25py3-none-any.whl → 0.2.26py3-none-any.whl