PyPI - camel-ai - Versions diffs - 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl - Mend

camel-ai 0.2.24py3-none-any.whl → 0.2.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (23) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +4 -4
camel/agents/knowledge_graph_agent.py +15 -3
camel/configs/anthropic_config.py +0 -1
camel/configs/sglang_config.py +7 -5
camel/datasets/base.py +219 -17
camel/environments/base.py +16 -8
camel/extractors/__init__.py +2 -2
camel/extractors/base.py +86 -64
camel/extractors/python_strategies.py +226 -0
camel/interpreters/subprocess_interpreter.py +187 -46
camel/models/anthropic_model.py +19 -55
camel/models/sglang_model.py +35 -5
camel/py.typed +0 -0
camel/storages/graph_storages/graph_element.py +3 -1
camel/storages/graph_storages/neo4j_graph.py +78 -4
camel/toolkits/__init__.py +2 -0
camel/toolkits/pubmed_toolkit.py +346 -0
camel/toolkits/terminal_toolkit.py +2 -2
{camel_ai-0.2.24.dist-info → camel_ai-0.2.26.dist-info}/METADATA +2 -1
{camel_ai-0.2.24.dist-info → camel_ai-0.2.26.dist-info}/RECORD +23 -20
{camel_ai-0.2.24.dist-info → camel_ai-0.2.26.dist-info}/WHEEL +0 -0
{camel_ai-0.2.24.dist-info → camel_ai-0.2.26.dist-info}/licenses/LICENSE +0 -0

camel/extractors/base.py CHANGED Viewed

@@ -12,11 +12,10 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import asyncio
 from abc import ABC, abstractmethod
 from types import TracebackType
-from typing import Any, Dict, Optional, Type
-from typing_extensions import Self
+from typing import Any, Dict, List, Optional, Type
 from camel.logger import get_logger
 from camel.utils import BatchProcessor
@@ -24,16 +23,36 @@ from camel.utils import BatchProcessor
 logger = get_logger(__name__)
-class BaseExtractor(ABC):
-    r"""Base class for all response extractors.
+class BaseExtractorStrategy(ABC):
+    r"""Abstract base class for extraction strategies."""
+    @abstractmethod
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Asynchronously extracts relevant parts from text.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Extracted str if successful, otherwise None.
+        """
+        pass
+class BaseExtractor:
+    r"""Base class for response extractors with a fixed strategy pipeline.
-    An extractor takes the response and extracts the relevant parts,
-    converting them into a format that the verifier can handle.
-    Implements async context manager protocol for proper resource management.
+    This extractor:
+    - Uses a **fixed multi-stage pipeline** of extraction strategies.
+    - Tries **each strategy in order** within a stage until one succeeds.
+    - Feeds the **output of one stage into the next** for processing.
+    - Supports **async execution** for efficient processing.
+    - Provides **batch processing and resource monitoring** options.
     """
     def __init__(
         self,
+        pipeline: List[List[BaseExtractorStrategy]],
         cache_templates: bool = True,
         max_cache_size: int = 1000,
         extraction_timeout: float = 30.0,
@@ -43,9 +62,12 @@ class BaseExtractor(ABC):
         memory_threshold: float = 85.0,
         **kwargs,
     ):
-        r"""Initialize the extractor.
+        r"""Initialize the extractor with a multi-stage strategy pipeline.
         Args:
+            pipeline (List[List[BaseExtractorStrategy]]):
+                A fixed list of lists where each list represents a stage
+                containing extractor strategies executed in order.
             cache_templates (bool): Whether to cache extraction templates.
                 (default: :obj:`True`)
             max_cache_size (int): Maximum number of templates to cache.
@@ -61,11 +83,8 @@ class BaseExtractor(ABC):
             memory_threshold (float): Memory usage percentage threshold for
                 scaling down. (default: :obj:`85.0`)
             **kwargs: Additional extractor parameters.
-        Raises:
-            ValueError: If invalid parameter values are provided
         """
-        # Store all parameters in metadata dict for compatibility
         self._metadata = {
             'cache_templates': cache_templates,
             'max_cache_size': max_cache_size,
@@ -81,14 +100,7 @@ class BaseExtractor(ABC):
         self._cache: Dict[str, Any] = {}
         self._batch_processor: Optional[BatchProcessor] = None
-        # Store configuration parameters
-        self._cache_templates = cache_templates
-        self._max_cache_size = max_cache_size
-        self._extraction_timeout = extraction_timeout
-        self._batch_size = batch_size
-        self._monitoring_interval = monitoring_interval
-        self._cpu_threshold = cpu_threshold
-        self._memory_threshold = memory_threshold
+        self._pipeline = pipeline
     async def setup(self) -> None:
         r"""Set up the extractor with necessary resources.
@@ -106,17 +118,15 @@ class BaseExtractor(ABC):
             return
         try:
-            # Initialize template cache if enabled
-            if self._cache_templates:
+            if self._metadata["cache_templates"]:
                 self._template_cache: Dict[str, Any] = {}
-            # Set up batch processing if needed
-            if self._batch_size > 1:
+            if self._metadata["batch_size"] > 1:
                 self._batch_processor = BatchProcessor(
-                    initial_batch_size=self._batch_size,
-                    monitoring_interval=self._monitoring_interval,
-                    cpu_threshold=self._cpu_threshold,
-                    memory_threshold=self._memory_threshold,
+                    initial_batch_size=self._metadata["batch_size"],
+                    monitoring_interval=self._metadata["monitoring_interval"],
+                    cpu_threshold=self._metadata["cpu_threshold"],
+                    memory_threshold=self._metadata["memory_threshold"],
                 )
             self._is_setup = True
@@ -171,13 +181,6 @@ class BaseExtractor(ABC):
                     )
             # Preserve init config in metadata
-            self._metadata = {
-                'cache_templates': self._cache_templates,
-                'max_cache_size': self._max_cache_size,
-                'extraction_timeout': self._extraction_timeout,
-                'batch_size': self._batch_size,
-            }
             if not errors:
                 logger.info(
                     f"{self.__class__.__name__} cleaned up successfully"
@@ -187,23 +190,19 @@ class BaseExtractor(ABC):
             errors.append(f"Unexpected error during cleanup: {e}")
         finally:
-            # Always mark as uninitialized, even if cleanup fails
             self._is_setup = False
             self._batch_processor = None
         if errors:
-            error_msg = (
-                f"Errors during {self.__class__.__name__} cleanup: "
-                f"{'; '.join(errors)}"
-            )
+            error_msg = f"Errors during cleanup: {'; '.join(errors)}"
             logger.error(error_msg)
             raise RuntimeError(error_msg)
-    async def __aenter__(self) -> Self:
+    async def __aenter__(self) -> "BaseExtractor":
         r"""Async context manager entry.
         Returns:
-            Self reference for context manager usage.
+            BaseExtractor: The initialized extractor instance.
         """
         await self.setup()
         return self
@@ -226,38 +225,61 @@ class BaseExtractor(ABC):
         """
         await self.cleanup()
-    @abstractmethod
-    async def extract(
-        self, response: str, context: Optional[Dict[str, Any]] = None
-    ) -> str:
-        r"""Extract relevant parts from a response.
-        Extracts:
-        1. Final answer or output
-        2. Chain of thought reasoning steps
-        3. Difficulty assessment
+    async def extract(self, response: str) -> Optional[str]:
+        r"""Extracts a normalized, comparable part of the LLM response
+        using the fixed multi-stage strategy pipeline.
         Args:
-            response (str): Raw response from agent generation.
-            context (Optional[Dict[str, Any]]): Optional context for
-            extraction like:
-                - final_answer
-                - rationale
-                - complexity
+            response (str): The raw response text.
         Returns:
-            str: Extracted content string.
+            Optional[str]: Extracted data if successful, otherwise None.
         Raises:
             ValueError: If response is empty or invalid.
-            NotImplementedError: If no implementation is provided.
             RuntimeError: If extractor is not initialized.
         """
         if not self._is_setup:
             raise RuntimeError(
-                f"{self.__class__.__name__} must be initialized "
-                "before extraction"
+                "Extractor must be initialized before extraction"
             )
         if not response or not response.strip():
             raise ValueError("Empty or whitespace-only response")
-        raise NotImplementedError("Subclasses must implement extract()")
+        current_input = response  # Initial input
+        for stage in self._pipeline:
+            stage_success = (
+                False  # Track if any strategy in the stage succeeds
+            )
+            for strategy in stage:
+                try:
+                    # Apply the extraction timeout
+                    result = await asyncio.wait_for(
+                        strategy.extract(current_input),
+                        timeout=self._metadata["extraction_timeout"],
+                    )
+                    if result is not None:
+                        current_input = result  # Feed into next stage
+                        stage_success = True
+                        break  # Move to next stage if valid extraction occurs
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        f"Strategy {strategy.__class__.__name__} timed out "
+                        f"after {self._metadata['extraction_timeout']} seconds"
+                    )
+                except Exception as e:
+                    logger.warning(
+                        f"Strategy {strategy.__class__.__name__} failed: {e}"
+                    )
+            if not stage_success:
+                logger.debug(
+                    "No strategy in stage succeeded, stopping extraction."
+                )
+                return None  # Stop processing if the stage fails
+        return current_input  # Final processed output

camel/extractors/python_strategies.py ADDED Viewed

@@ -0,0 +1,226 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import ast
+from typing import Optional
+from camel.extractors.base import BaseExtractorStrategy
+from camel.logger import get_logger
+logger = get_logger(__name__)
+class BoxedStrategy(BaseExtractorStrategy):
+    r"""Extracts content from \\boxed{} environments."""
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Extract content from \\boxed{} environments.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Content inside \\boxed{} if found, else None.
+        """
+        # Find the start of the boxed content
+        boxed_pattern = "\\boxed{"
+        if boxed_pattern not in text:
+            logger.debug("No \\boxed{} content found in the response")
+            return None
+        start_idx = text.find(boxed_pattern) + len(boxed_pattern)
+        if start_idx >= len(text):
+            logger.debug("Malformed \\boxed{} (no content after opening)")
+            return None
+        # Use stack-based approach to handle nested braces
+        stack = 1  # Start with one opening brace
+        end_idx = start_idx
+        escape_mode = False
+        for i in range(start_idx, len(text)):
+            char = text[i]
+            # Handle escape sequences
+            if escape_mode:
+                escape_mode = False
+                continue
+            if char == '\\':
+                escape_mode = True
+                continue
+            if char == '{':
+                stack += 1
+            elif char == '}':
+                stack -= 1
+            if stack == 0:  # Found the matching closing brace
+                end_idx = i
+                break
+        # Check if we found a complete boxed expression
+        if stack != 0:
+            logger.debug("Unbalanced braces in \\boxed{} content")
+            return None
+        # Extract the content
+        content = text[start_idx:end_idx].strip()
+        logger.debug(f"Extracted boxed content: {content}")
+        return content
+class PythonListStrategy(BaseExtractorStrategy):
+    r"""Extracts and normalizes Python lists."""
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Extract and normalize a Python list.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Normalized list as a string if found, else None.
+        """
+        text = text.strip()
+        if not (text.startswith('[') and text.endswith(']')):
+            logger.debug("Content is not a list format (missing brackets)")
+            return None
+        try:
+            # Fix any escaped quotes before parsing
+            fixed_content = text.replace('\\"', '"')
+            parsed = ast.literal_eval(fixed_content)
+            if isinstance(parsed, list):
+                # Sort the list for normalization
+                sorted_list = sorted(parsed, key=lambda x: str(x))
+                return repr(sorted_list)
+            else:
+                logger.debug(f"Content is not a list, got {type(parsed)}")
+                return None
+        except (SyntaxError, ValueError) as e:
+            logger.debug(f"Failed to parse as Python list: {e}")
+            return None
+class PythonDictStrategy(BaseExtractorStrategy):
+    r"""Extracts and normalizes Python dictionaries."""
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Extract and normalize a Python dictionary.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Normalized dictionary as a string, else None.
+        """
+        text = text.strip()
+        if not (text.startswith('{') and text.endswith('}')):
+            logger.debug("Content is not a dictionary format (missing braces)")
+            return None
+        try:
+            # Fix any escaped quotes before parsing
+            fixed_content = text.replace('\\"', '"')
+            parsed = ast.literal_eval(fixed_content)
+            if isinstance(parsed, dict):
+                # Sort the dictionary items for normalization
+                sorted_dict = dict(
+                    sorted(parsed.items(), key=lambda x: str(x[0]))
+                )
+                return repr(sorted_dict)
+            else:
+                logger.debug(
+                    f"Content is not a dictionary, got {type(parsed)}"
+                )
+                return None
+        except (SyntaxError, ValueError) as e:
+            logger.debug(f"Failed to parse as Python dictionary: {e}")
+            return None
+class PythonSetStrategy(BaseExtractorStrategy):
+    r"""Extracts and normalizes Python sets."""
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Extract and normalize a Python set.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Normalized set as a string if found, else None.
+        """
+        text = text.strip()
+        # Check for set syntax: {1, 2, 3} or set([1, 2, 3])
+        if not (
+            (text.startswith('{') and text.endswith('}'))
+            or (text.startswith('set(') and text.endswith(')'))
+        ):
+            logger.debug("Content is not a set format")
+            return None
+        try:
+            # Fix any escaped quotes before parsing
+            fixed_content = text.replace('\\"', '"')
+            parsed = ast.literal_eval(fixed_content)
+            if isinstance(parsed, set):
+                # Sort the set elements for normalization
+                sorted_set = sorted(parsed, key=lambda x: str(x))
+                return repr(set(sorted_set))
+            else:
+                logger.debug(f"Content is not a set, got {type(parsed)}")
+                return None
+        except (SyntaxError, ValueError) as e:
+            logger.debug(f"Failed to parse as Python set: {e}")
+            return None
+class PythonTupleStrategy(BaseExtractorStrategy):
+    r"""Extracts and normalizes Python tuples."""
+    async def extract(self, text: str) -> Optional[str]:
+        r"""Extract and normalize a Python tuple.
+        Args:
+            text (str): The input text to process.
+        Returns:
+            Optional[str]: Normalized tuple as a string if found, else None.
+        """
+        text = text.strip()
+        # Check for tuple syntax: (1, 2, 3) or (1,)
+        if not (text.startswith('(') and text.endswith(')')):
+            logger.debug("Content is not a tuple format (missing parentheses)")
+            return None
+        try:
+            # Fix any escaped quotes before parsing
+            fixed_content = text.replace('\\"', '"')
+            parsed = ast.literal_eval(fixed_content)
+            if isinstance(parsed, tuple):
+                # Sort the tuple elements for normalization
+                sorted_tuple = tuple(sorted(parsed, key=lambda x: str(x)))
+                return repr(sorted_tuple)
+            else:
+                logger.debug(f"Content is not a tuple, got {type(parsed)}")
+                return None
+        except (SyntaxError, ValueError) as e:
+            logger.debug(f"Failed to parse as Python tuple: {e}")
+            return None

camel-ai 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.24py3-none-any.whl → 0.2.26py3-none-any.whl