camel-ai 0.2.24__py3-none-any.whl → 0.2.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

camel/extractors/base.py CHANGED
@@ -12,11 +12,10 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
 
15
+ import asyncio
15
16
  from abc import ABC, abstractmethod
16
17
  from types import TracebackType
17
- from typing import Any, Dict, Optional, Type
18
-
19
- from typing_extensions import Self
18
+ from typing import Any, Dict, List, Optional, Type
20
19
 
21
20
  from camel.logger import get_logger
22
21
  from camel.utils import BatchProcessor
@@ -24,16 +23,36 @@ from camel.utils import BatchProcessor
24
23
  logger = get_logger(__name__)
25
24
 
26
25
 
27
- class BaseExtractor(ABC):
28
- r"""Base class for all response extractors.
26
+ class BaseExtractorStrategy(ABC):
27
+ r"""Abstract base class for extraction strategies."""
28
+
29
+ @abstractmethod
30
+ async def extract(self, text: str) -> Optional[str]:
31
+ r"""Asynchronously extracts relevant parts from text.
32
+
33
+ Args:
34
+ text (str): The input text to process.
35
+
36
+ Returns:
37
+ Optional[str]: Extracted str if successful, otherwise None.
38
+ """
39
+ pass
40
+
41
+
42
+ class BaseExtractor:
43
+ r"""Base class for response extractors with a fixed strategy pipeline.
29
44
 
30
- An extractor takes the response and extracts the relevant parts,
31
- converting them into a format that the verifier can handle.
32
- Implements async context manager protocol for proper resource management.
45
+ This extractor:
46
+ - Uses a **fixed multi-stage pipeline** of extraction strategies.
47
+ - Tries **each strategy in order** within a stage until one succeeds.
48
+ - Feeds the **output of one stage into the next** for processing.
49
+ - Supports **async execution** for efficient processing.
50
+ - Provides **batch processing and resource monitoring** options.
33
51
  """
34
52
 
35
53
  def __init__(
36
54
  self,
55
+ pipeline: List[List[BaseExtractorStrategy]],
37
56
  cache_templates: bool = True,
38
57
  max_cache_size: int = 1000,
39
58
  extraction_timeout: float = 30.0,
@@ -43,9 +62,12 @@ class BaseExtractor(ABC):
43
62
  memory_threshold: float = 85.0,
44
63
  **kwargs,
45
64
  ):
46
- r"""Initialize the extractor.
65
+ r"""Initialize the extractor with a multi-stage strategy pipeline.
47
66
 
48
67
  Args:
68
+ pipeline (List[List[BaseExtractorStrategy]]):
69
+ A fixed list of lists where each list represents a stage
70
+ containing extractor strategies executed in order.
49
71
  cache_templates (bool): Whether to cache extraction templates.
50
72
  (default: :obj:`True`)
51
73
  max_cache_size (int): Maximum number of templates to cache.
@@ -61,11 +83,8 @@ class BaseExtractor(ABC):
61
83
  memory_threshold (float): Memory usage percentage threshold for
62
84
  scaling down. (default: :obj:`85.0`)
63
85
  **kwargs: Additional extractor parameters.
64
-
65
- Raises:
66
- ValueError: If invalid parameter values are provided
67
86
  """
68
- # Store all parameters in metadata dict for compatibility
87
+
69
88
  self._metadata = {
70
89
  'cache_templates': cache_templates,
71
90
  'max_cache_size': max_cache_size,
@@ -81,14 +100,7 @@ class BaseExtractor(ABC):
81
100
  self._cache: Dict[str, Any] = {}
82
101
  self._batch_processor: Optional[BatchProcessor] = None
83
102
 
84
- # Store configuration parameters
85
- self._cache_templates = cache_templates
86
- self._max_cache_size = max_cache_size
87
- self._extraction_timeout = extraction_timeout
88
- self._batch_size = batch_size
89
- self._monitoring_interval = monitoring_interval
90
- self._cpu_threshold = cpu_threshold
91
- self._memory_threshold = memory_threshold
103
+ self._pipeline = pipeline
92
104
 
93
105
  async def setup(self) -> None:
94
106
  r"""Set up the extractor with necessary resources.
@@ -106,17 +118,15 @@ class BaseExtractor(ABC):
106
118
  return
107
119
 
108
120
  try:
109
- # Initialize template cache if enabled
110
- if self._cache_templates:
121
+ if self._metadata["cache_templates"]:
111
122
  self._template_cache: Dict[str, Any] = {}
112
123
 
113
- # Set up batch processing if needed
114
- if self._batch_size > 1:
124
+ if self._metadata["batch_size"] > 1:
115
125
  self._batch_processor = BatchProcessor(
116
- initial_batch_size=self._batch_size,
117
- monitoring_interval=self._monitoring_interval,
118
- cpu_threshold=self._cpu_threshold,
119
- memory_threshold=self._memory_threshold,
126
+ initial_batch_size=self._metadata["batch_size"],
127
+ monitoring_interval=self._metadata["monitoring_interval"],
128
+ cpu_threshold=self._metadata["cpu_threshold"],
129
+ memory_threshold=self._metadata["memory_threshold"],
120
130
  )
121
131
 
122
132
  self._is_setup = True
@@ -171,13 +181,6 @@ class BaseExtractor(ABC):
171
181
  )
172
182
 
173
183
  # Preserve init config in metadata
174
- self._metadata = {
175
- 'cache_templates': self._cache_templates,
176
- 'max_cache_size': self._max_cache_size,
177
- 'extraction_timeout': self._extraction_timeout,
178
- 'batch_size': self._batch_size,
179
- }
180
-
181
184
  if not errors:
182
185
  logger.info(
183
186
  f"{self.__class__.__name__} cleaned up successfully"
@@ -187,23 +190,19 @@ class BaseExtractor(ABC):
187
190
  errors.append(f"Unexpected error during cleanup: {e}")
188
191
 
189
192
  finally:
190
- # Always mark as uninitialized, even if cleanup fails
191
193
  self._is_setup = False
192
194
  self._batch_processor = None
193
195
 
194
196
  if errors:
195
- error_msg = (
196
- f"Errors during {self.__class__.__name__} cleanup: "
197
- f"{'; '.join(errors)}"
198
- )
197
+ error_msg = f"Errors during cleanup: {'; '.join(errors)}"
199
198
  logger.error(error_msg)
200
199
  raise RuntimeError(error_msg)
201
200
 
202
- async def __aenter__(self) -> Self:
201
+ async def __aenter__(self) -> "BaseExtractor":
203
202
  r"""Async context manager entry.
204
203
 
205
204
  Returns:
206
- Self reference for context manager usage.
205
+ BaseExtractor: The initialized extractor instance.
207
206
  """
208
207
  await self.setup()
209
208
  return self
@@ -226,38 +225,61 @@ class BaseExtractor(ABC):
226
225
  """
227
226
  await self.cleanup()
228
227
 
229
- @abstractmethod
230
- async def extract(
231
- self, response: str, context: Optional[Dict[str, Any]] = None
232
- ) -> str:
233
- r"""Extract relevant parts from a response.
234
-
235
- Extracts:
236
- 1. Final answer or output
237
- 2. Chain of thought reasoning steps
238
- 3. Difficulty assessment
228
+ async def extract(self, response: str) -> Optional[str]:
229
+ r"""Extracts a normalized, comparable part of the LLM response
230
+ using the fixed multi-stage strategy pipeline.
239
231
 
240
232
  Args:
241
- response (str): Raw response from agent generation.
242
- context (Optional[Dict[str, Any]]): Optional context for
243
- extraction like:
244
- - final_answer
245
- - rationale
246
- - complexity
233
+ response (str): The raw response text.
247
234
 
248
235
  Returns:
249
- str: Extracted content string.
236
+ Optional[str]: Extracted data if successful, otherwise None.
250
237
 
251
238
  Raises:
252
239
  ValueError: If response is empty or invalid.
253
- NotImplementedError: If no implementation is provided.
254
240
  RuntimeError: If extractor is not initialized.
255
241
  """
256
242
  if not self._is_setup:
257
243
  raise RuntimeError(
258
- f"{self.__class__.__name__} must be initialized "
259
- "before extraction"
244
+ "Extractor must be initialized before extraction"
260
245
  )
261
246
  if not response or not response.strip():
262
247
  raise ValueError("Empty or whitespace-only response")
263
- raise NotImplementedError("Subclasses must implement extract()")
248
+
249
+ current_input = response # Initial input
250
+
251
+ for stage in self._pipeline:
252
+ stage_success = (
253
+ False # Track if any strategy in the stage succeeds
254
+ )
255
+
256
+ for strategy in stage:
257
+ try:
258
+ # Apply the extraction timeout
259
+ result = await asyncio.wait_for(
260
+ strategy.extract(current_input),
261
+ timeout=self._metadata["extraction_timeout"],
262
+ )
263
+
264
+ if result is not None:
265
+ current_input = result # Feed into next stage
266
+ stage_success = True
267
+ break # Move to next stage if valid extraction occurs
268
+
269
+ except asyncio.TimeoutError:
270
+ logger.warning(
271
+ f"Strategy {strategy.__class__.__name__} timed out "
272
+ f"after {self._metadata['extraction_timeout']} seconds"
273
+ )
274
+ except Exception as e:
275
+ logger.warning(
276
+ f"Strategy {strategy.__class__.__name__} failed: {e}"
277
+ )
278
+
279
+ if not stage_success:
280
+ logger.debug(
281
+ "No strategy in stage succeeded, stopping extraction."
282
+ )
283
+ return None # Stop processing if the stage fails
284
+
285
+ return current_input # Final processed output
@@ -0,0 +1,226 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import ast
16
+ from typing import Optional
17
+
18
+ from camel.extractors.base import BaseExtractorStrategy
19
+ from camel.logger import get_logger
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class BoxedStrategy(BaseExtractorStrategy):
25
+ r"""Extracts content from \\boxed{} environments."""
26
+
27
+ async def extract(self, text: str) -> Optional[str]:
28
+ r"""Extract content from \\boxed{} environments.
29
+
30
+ Args:
31
+ text (str): The input text to process.
32
+
33
+ Returns:
34
+ Optional[str]: Content inside \\boxed{} if found, else None.
35
+ """
36
+ # Find the start of the boxed content
37
+ boxed_pattern = "\\boxed{"
38
+ if boxed_pattern not in text:
39
+ logger.debug("No \\boxed{} content found in the response")
40
+ return None
41
+
42
+ start_idx = text.find(boxed_pattern) + len(boxed_pattern)
43
+ if start_idx >= len(text):
44
+ logger.debug("Malformed \\boxed{} (no content after opening)")
45
+ return None
46
+
47
+ # Use stack-based approach to handle nested braces
48
+ stack = 1 # Start with one opening brace
49
+ end_idx = start_idx
50
+ escape_mode = False
51
+
52
+ for i in range(start_idx, len(text)):
53
+ char = text[i]
54
+
55
+ # Handle escape sequences
56
+ if escape_mode:
57
+ escape_mode = False
58
+ continue
59
+
60
+ if char == '\\':
61
+ escape_mode = True
62
+ continue
63
+
64
+ if char == '{':
65
+ stack += 1
66
+ elif char == '}':
67
+ stack -= 1
68
+
69
+ if stack == 0: # Found the matching closing brace
70
+ end_idx = i
71
+ break
72
+
73
+ # Check if we found a complete boxed expression
74
+ if stack != 0:
75
+ logger.debug("Unbalanced braces in \\boxed{} content")
76
+ return None
77
+
78
+ # Extract the content
79
+ content = text[start_idx:end_idx].strip()
80
+ logger.debug(f"Extracted boxed content: {content}")
81
+ return content
82
+
83
+
84
+ class PythonListStrategy(BaseExtractorStrategy):
85
+ r"""Extracts and normalizes Python lists."""
86
+
87
+ async def extract(self, text: str) -> Optional[str]:
88
+ r"""Extract and normalize a Python list.
89
+
90
+ Args:
91
+ text (str): The input text to process.
92
+
93
+ Returns:
94
+ Optional[str]: Normalized list as a string if found, else None.
95
+ """
96
+
97
+ text = text.strip()
98
+ if not (text.startswith('[') and text.endswith(']')):
99
+ logger.debug("Content is not a list format (missing brackets)")
100
+ return None
101
+
102
+ try:
103
+ # Fix any escaped quotes before parsing
104
+ fixed_content = text.replace('\\"', '"')
105
+ parsed = ast.literal_eval(fixed_content)
106
+ if isinstance(parsed, list):
107
+ # Sort the list for normalization
108
+ sorted_list = sorted(parsed, key=lambda x: str(x))
109
+ return repr(sorted_list)
110
+ else:
111
+ logger.debug(f"Content is not a list, got {type(parsed)}")
112
+ return None
113
+ except (SyntaxError, ValueError) as e:
114
+ logger.debug(f"Failed to parse as Python list: {e}")
115
+ return None
116
+
117
+
118
+ class PythonDictStrategy(BaseExtractorStrategy):
119
+ r"""Extracts and normalizes Python dictionaries."""
120
+
121
+ async def extract(self, text: str) -> Optional[str]:
122
+ r"""Extract and normalize a Python dictionary.
123
+
124
+ Args:
125
+ text (str): The input text to process.
126
+
127
+ Returns:
128
+ Optional[str]: Normalized dictionary as a string, else None.
129
+ """
130
+
131
+ text = text.strip()
132
+ if not (text.startswith('{') and text.endswith('}')):
133
+ logger.debug("Content is not a dictionary format (missing braces)")
134
+ return None
135
+
136
+ try:
137
+ # Fix any escaped quotes before parsing
138
+ fixed_content = text.replace('\\"', '"')
139
+ parsed = ast.literal_eval(fixed_content)
140
+ if isinstance(parsed, dict):
141
+ # Sort the dictionary items for normalization
142
+ sorted_dict = dict(
143
+ sorted(parsed.items(), key=lambda x: str(x[0]))
144
+ )
145
+ return repr(sorted_dict)
146
+ else:
147
+ logger.debug(
148
+ f"Content is not a dictionary, got {type(parsed)}"
149
+ )
150
+ return None
151
+ except (SyntaxError, ValueError) as e:
152
+ logger.debug(f"Failed to parse as Python dictionary: {e}")
153
+ return None
154
+
155
+
156
+ class PythonSetStrategy(BaseExtractorStrategy):
157
+ r"""Extracts and normalizes Python sets."""
158
+
159
+ async def extract(self, text: str) -> Optional[str]:
160
+ r"""Extract and normalize a Python set.
161
+
162
+ Args:
163
+ text (str): The input text to process.
164
+
165
+ Returns:
166
+ Optional[str]: Normalized set as a string if found, else None.
167
+ """
168
+
169
+ text = text.strip()
170
+ # Check for set syntax: {1, 2, 3} or set([1, 2, 3])
171
+ if not (
172
+ (text.startswith('{') and text.endswith('}'))
173
+ or (text.startswith('set(') and text.endswith(')'))
174
+ ):
175
+ logger.debug("Content is not a set format")
176
+ return None
177
+
178
+ try:
179
+ # Fix any escaped quotes before parsing
180
+ fixed_content = text.replace('\\"', '"')
181
+ parsed = ast.literal_eval(fixed_content)
182
+ if isinstance(parsed, set):
183
+ # Sort the set elements for normalization
184
+ sorted_set = sorted(parsed, key=lambda x: str(x))
185
+ return repr(set(sorted_set))
186
+ else:
187
+ logger.debug(f"Content is not a set, got {type(parsed)}")
188
+ return None
189
+ except (SyntaxError, ValueError) as e:
190
+ logger.debug(f"Failed to parse as Python set: {e}")
191
+ return None
192
+
193
+
194
+ class PythonTupleStrategy(BaseExtractorStrategy):
195
+ r"""Extracts and normalizes Python tuples."""
196
+
197
+ async def extract(self, text: str) -> Optional[str]:
198
+ r"""Extract and normalize a Python tuple.
199
+
200
+ Args:
201
+ text (str): The input text to process.
202
+
203
+ Returns:
204
+ Optional[str]: Normalized tuple as a string if found, else None.
205
+ """
206
+
207
+ text = text.strip()
208
+ # Check for tuple syntax: (1, 2, 3) or (1,)
209
+ if not (text.startswith('(') and text.endswith(')')):
210
+ logger.debug("Content is not a tuple format (missing parentheses)")
211
+ return None
212
+
213
+ try:
214
+ # Fix any escaped quotes before parsing
215
+ fixed_content = text.replace('\\"', '"')
216
+ parsed = ast.literal_eval(fixed_content)
217
+ if isinstance(parsed, tuple):
218
+ # Sort the tuple elements for normalization
219
+ sorted_tuple = tuple(sorted(parsed, key=lambda x: str(x)))
220
+ return repr(sorted_tuple)
221
+ else:
222
+ logger.debug(f"Content is not a tuple, got {type(parsed)}")
223
+ return None
224
+ except (SyntaxError, ValueError) as e:
225
+ logger.debug(f"Failed to parse as Python tuple: {e}")
226
+ return None