camel-ai 0.2.34__py3-none-any.whl → 0.2.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +1 -1
- camel/agents/_utils.py +4 -4
- camel/agents/chat_agent.py +174 -29
- camel/configs/__init__.py +3 -0
- camel/configs/openai_config.py +20 -16
- camel/configs/openrouter_config.py +106 -0
- camel/datasets/base_generator.py +188 -27
- camel/datasets/few_shot_generator.py +2 -5
- camel/environments/single_step.py +1 -7
- camel/memories/agent_memories.py +49 -2
- camel/memories/base.py +23 -1
- camel/memories/blocks/chat_history_block.py +2 -1
- camel/memories/records.py +5 -0
- camel/models/__init__.py +2 -0
- camel/models/gemini_model.py +36 -0
- camel/models/groq_model.py +6 -3
- camel/models/model_factory.py +3 -0
- camel/models/openrouter_model.py +204 -0
- camel/models/stub_model.py +25 -0
- camel/retrievers/vector_retriever.py +12 -7
- camel/storages/__init__.py +2 -0
- camel/storages/key_value_storages/__init__.py +4 -1
- camel/storages/key_value_storages/json.py +3 -7
- camel/storages/key_value_storages/mem0_cloud.py +224 -0
- camel/storages/vectordb_storages/base.py +5 -1
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/toolkits/__init__.py +2 -1
- camel/toolkits/browser_toolkit.py +43 -0
- camel/toolkits/code_execution.py +2 -1
- camel/toolkits/mcp_toolkit.py +30 -1
- camel/toolkits/memory_toolkit.py +129 -0
- camel/types/enums.py +24 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/chunker/__init__.py +22 -0
- camel/utils/chunker/base.py +24 -0
- camel/utils/chunker/code_chunker.py +193 -0
- camel/utils/chunker/uio_chunker.py +66 -0
- camel/utils/token_counting.py +133 -0
- camel/verifiers/__init__.py +1 -2
- camel/verifiers/base.py +133 -96
- camel/verifiers/models.py +0 -12
- camel/verifiers/python_verifier.py +25 -14
- {camel_ai-0.2.34.dist-info → camel_ai-0.2.36.dist-info}/METADATA +3 -1
- {camel_ai-0.2.34.dist-info → camel_ai-0.2.36.dist-info}/RECORD +47 -39
- {camel_ai-0.2.34.dist-info → camel_ai-0.2.36.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.34.dist-info → camel_ai-0.2.36.dist-info}/licenses/LICENSE +0 -0
camel/utils/token_counting.py
CHANGED
|
@@ -90,6 +90,30 @@ class BaseTokenCounter(ABC):
|
|
|
90
90
|
"""
|
|
91
91
|
pass
|
|
92
92
|
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def encode(self, text: str) -> List[int]:
|
|
95
|
+
r"""Encode text into token IDs.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
text (str): The text to encode.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
List[int]: List of token IDs.
|
|
102
|
+
"""
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
@abstractmethod
|
|
106
|
+
def decode(self, token_ids: List[int]) -> str:
|
|
107
|
+
r"""Decode token IDs back to text.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
token_ids (List[int]): List of token IDs to decode.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
str: Decoded text.
|
|
114
|
+
"""
|
|
115
|
+
pass
|
|
116
|
+
|
|
93
117
|
|
|
94
118
|
class OpenAITokenCounter(BaseTokenCounter):
|
|
95
119
|
def __init__(self, model: UnifiedModelType):
|
|
@@ -227,6 +251,28 @@ class OpenAITokenCounter(BaseTokenCounter):
|
|
|
227
251
|
total = EXTRA_TOKENS + SQUARE_TOKENS * h * w
|
|
228
252
|
return total
|
|
229
253
|
|
|
254
|
+
def encode(self, text: str) -> List[int]:
|
|
255
|
+
r"""Encode text into token IDs.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
text (str): The text to encode.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
List[int]: List of token IDs.
|
|
262
|
+
"""
|
|
263
|
+
return self.encoding.encode(text, disallowed_special=())
|
|
264
|
+
|
|
265
|
+
def decode(self, token_ids: List[int]) -> str:
|
|
266
|
+
r"""Decode token IDs back to text.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
token_ids (List[int]): List of token IDs to decode.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
str: Decoded text.
|
|
273
|
+
"""
|
|
274
|
+
return self.encoding.decode(token_ids)
|
|
275
|
+
|
|
230
276
|
|
|
231
277
|
class AnthropicTokenCounter(BaseTokenCounter):
|
|
232
278
|
@dependencies_required('anthropic')
|
|
@@ -266,6 +312,33 @@ class AnthropicTokenCounter(BaseTokenCounter):
|
|
|
266
312
|
model=self.model,
|
|
267
313
|
).input_tokens
|
|
268
314
|
|
|
315
|
+
def encode(self, text: str) -> List[int]:
|
|
316
|
+
r"""Encode text into token IDs.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
text (str): The text to encode.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
List[int]: List of token IDs.
|
|
323
|
+
"""
|
|
324
|
+
raise NotImplementedError(
|
|
325
|
+
"The Anthropic API does not provide direct access to token IDs. "
|
|
326
|
+
"Use count_tokens_from_messages() for token counting instead."
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def decode(self, token_ids: List[int]) -> str:
|
|
330
|
+
r"""Decode token IDs back to text.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
token_ids (List[int]): List of token IDs to decode.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
str: Decoded text.
|
|
337
|
+
"""
|
|
338
|
+
raise NotImplementedError(
|
|
339
|
+
"The Anthropic API does not provide functionality to decode token IDs."
|
|
340
|
+
)
|
|
341
|
+
|
|
269
342
|
|
|
270
343
|
class LiteLLMTokenCounter(BaseTokenCounter):
|
|
271
344
|
def __init__(self, model_type: UnifiedModelType):
|
|
@@ -319,6 +392,32 @@ class LiteLLMTokenCounter(BaseTokenCounter):
|
|
|
319
392
|
"""
|
|
320
393
|
return self.completion_cost(completion_response=response)
|
|
321
394
|
|
|
395
|
+
def encode(self, text: str) -> List[int]:
|
|
396
|
+
r"""Encode text into token IDs.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
text (str): The text to encode.
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
List[int]: List of token IDs.
|
|
403
|
+
"""
|
|
404
|
+
from litellm import encoding
|
|
405
|
+
|
|
406
|
+
return encoding.encode(text, disallowed_special=())
|
|
407
|
+
|
|
408
|
+
def decode(self, token_ids: List[int]) -> str:
|
|
409
|
+
r"""Decode token IDs back to text.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
token_ids (List[int]): List of token IDs to decode.
|
|
413
|
+
|
|
414
|
+
Returns:
|
|
415
|
+
str: Decoded text.
|
|
416
|
+
"""
|
|
417
|
+
from litellm import encoding
|
|
418
|
+
|
|
419
|
+
return encoding.decode(token_ids)
|
|
420
|
+
|
|
322
421
|
|
|
323
422
|
class MistralTokenCounter(BaseTokenCounter):
|
|
324
423
|
def __init__(self, model_type: ModelType):
|
|
@@ -390,3 +489,37 @@ class MistralTokenCounter(BaseTokenCounter):
|
|
|
390
489
|
)
|
|
391
490
|
|
|
392
491
|
return mistral_request
|
|
492
|
+
|
|
493
|
+
def encode(self, text: str) -> List[int]:
|
|
494
|
+
r"""Encode text into token IDs.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
text (str): The text to encode.
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
List[int]: List of token IDs.
|
|
501
|
+
"""
|
|
502
|
+
# Use the Mistral tokenizer to encode the text
|
|
503
|
+
return self.tokenizer.encode_chat_completion(
|
|
504
|
+
ChatCompletionRequest(
|
|
505
|
+
model=self.model_type,
|
|
506
|
+
messages=[
|
|
507
|
+
{
|
|
508
|
+
"role": "user",
|
|
509
|
+
"content": text,
|
|
510
|
+
}
|
|
511
|
+
],
|
|
512
|
+
)
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def decode(self, token_ids: List[int]) -> str:
|
|
516
|
+
r"""Decode token IDs back to text.
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
token_ids (List[int]): List of token IDs to decode.
|
|
520
|
+
|
|
521
|
+
Returns:
|
|
522
|
+
str: Decoded text.
|
|
523
|
+
"""
|
|
524
|
+
# Use the Mistral tokenizer to decode the tokens
|
|
525
|
+
return self.tokenizer.decode(token_ids)
|
camel/verifiers/__init__.py
CHANGED
|
@@ -12,12 +12,11 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from .base import BaseVerifier
|
|
15
|
-
from .models import VerificationOutcome
|
|
15
|
+
from .models import VerificationOutcome
|
|
16
16
|
from .python_verifier import PythonVerifier
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"BaseVerifier",
|
|
20
20
|
"VerificationOutcome",
|
|
21
|
-
"VerifierInput",
|
|
22
21
|
"PythonVerifier",
|
|
23
22
|
]
|
camel/verifiers/base.py
CHANGED
|
@@ -19,11 +19,7 @@ from typing import List, Optional
|
|
|
19
19
|
from camel.logger import get_logger
|
|
20
20
|
from camel.utils import BatchProcessor
|
|
21
21
|
|
|
22
|
-
from .models import
|
|
23
|
-
VerificationOutcome,
|
|
24
|
-
VerificationResult,
|
|
25
|
-
VerifierInput,
|
|
26
|
-
)
|
|
22
|
+
from .models import VerificationOutcome, VerificationResult
|
|
27
23
|
|
|
28
24
|
logger = get_logger(__name__)
|
|
29
25
|
|
|
@@ -157,26 +153,33 @@ class BaseVerifier(ABC):
|
|
|
157
153
|
r"""Implement verifier-specific cleanup logic."""
|
|
158
154
|
pass
|
|
159
155
|
|
|
160
|
-
async def verify(
|
|
156
|
+
async def verify(
|
|
157
|
+
self, solution: str, ground_truth: Optional[str]
|
|
158
|
+
) -> VerificationResult:
|
|
161
159
|
r"""Perform verification with full error handling.
|
|
162
160
|
|
|
163
|
-
|
|
164
|
-
|
|
161
|
+
This method verifies the correctness of a generated solution by
|
|
162
|
+
comparing it against the provided ground truth. It handles
|
|
163
|
+
execution errors, timeouts, and retry attempts to ensure robust
|
|
164
|
+
validation.
|
|
165
165
|
|
|
166
166
|
Args:
|
|
167
|
-
|
|
167
|
+
solution (str): The generated response that needs verification.
|
|
168
|
+
ground_truth (Optional[str]): The expected correct answer to
|
|
169
|
+
compare against.
|
|
168
170
|
|
|
169
171
|
Returns:
|
|
170
|
-
VerificationResult:
|
|
171
|
-
- status
|
|
172
|
-
- result:
|
|
173
|
-
- duration: Time taken for verification
|
|
174
|
-
- metadata: Additional details
|
|
175
|
-
- error_message: Error description
|
|
172
|
+
VerificationResult: A structured object containing:
|
|
173
|
+
- status (SUCCESS/FAILURE/ERROR/TIMEOUT)
|
|
174
|
+
- result (str): The verification outcome or processed output.
|
|
175
|
+
- duration (float): Time taken for verification.
|
|
176
|
+
- metadata (dict): Additional details such as retry attempts.
|
|
177
|
+
- error_message (Optional[str]): Error description,
|
|
178
|
+
if applicable.
|
|
176
179
|
|
|
177
180
|
Raises:
|
|
178
181
|
RuntimeError: If verification fails unexpectedly.
|
|
179
|
-
asyncio.TimeoutError: If verification
|
|
182
|
+
asyncio.TimeoutError: If verification exceeds the time limit.
|
|
180
183
|
"""
|
|
181
184
|
if not self._is_setup:
|
|
182
185
|
logger.warning(
|
|
@@ -191,11 +194,13 @@ class BaseVerifier(ABC):
|
|
|
191
194
|
try:
|
|
192
195
|
verification_result = (
|
|
193
196
|
await asyncio.wait_for(
|
|
194
|
-
self._verify_implementation(
|
|
197
|
+
self._verify_implementation(solution, ground_truth),
|
|
195
198
|
timeout=self._timeout,
|
|
196
199
|
)
|
|
197
200
|
if self._timeout
|
|
198
|
-
else await self._verify_implementation(
|
|
201
|
+
else await self._verify_implementation(
|
|
202
|
+
solution, ground_truth
|
|
203
|
+
)
|
|
199
204
|
)
|
|
200
205
|
|
|
201
206
|
verification_result.duration = time.time() - start_time
|
|
@@ -240,101 +245,133 @@ class BaseVerifier(ABC):
|
|
|
240
245
|
|
|
241
246
|
@abstractmethod
|
|
242
247
|
async def _verify_implementation(
|
|
243
|
-
self,
|
|
248
|
+
self, solution: str, ground_truth: Optional[str]
|
|
244
249
|
) -> VerificationResult:
|
|
245
|
-
r"""
|
|
250
|
+
r"""Abstract method for verification logic.
|
|
251
|
+
|
|
252
|
+
Subclasses must implement this method to define how the solution
|
|
253
|
+
should be processed, evaluated, and compared to the ground truth.
|
|
246
254
|
|
|
247
255
|
Args:
|
|
248
|
-
|
|
256
|
+
solution (str): The generated response requiring verification.
|
|
257
|
+
ground_truth (Optional[str]): The expected reference output.
|
|
249
258
|
|
|
250
259
|
Returns:
|
|
251
|
-
VerificationResult:
|
|
260
|
+
VerificationResult: Contains verification status and details.
|
|
252
261
|
|
|
253
262
|
Raises:
|
|
254
|
-
NotImplementedError:
|
|
263
|
+
NotImplementedError: If the method is not implemented
|
|
264
|
+
in a subclass.
|
|
255
265
|
"""
|
|
256
266
|
raise NotImplementedError(
|
|
257
267
|
"Subclasses must implement _verify_implementation()"
|
|
258
268
|
)
|
|
259
269
|
|
|
270
|
+
async def verify_batch(
|
|
271
|
+
self,
|
|
272
|
+
solutions: List[str],
|
|
273
|
+
ground_truths: List[Optional[str]],
|
|
274
|
+
raise_on_error: bool = False,
|
|
275
|
+
) -> List[VerificationResult]:
|
|
276
|
+
r"""Verify multiple solutions in parallel with controlled concurrency.
|
|
260
277
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
r"""Verify multiple results in parallel with controlled concurrency.
|
|
278
|
+
This method verifies multiple generated solutions against their
|
|
279
|
+
respective ground truths using parallel execution. It handles
|
|
280
|
+
timeouts, execution errors, and batch processing optimizations.
|
|
265
281
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
282
|
+
Args:
|
|
283
|
+
solutions (List[str]): A list of generated solutions to be
|
|
284
|
+
verified.
|
|
285
|
+
ground_truths (List[Optional[str]]): A list of expected outputs for
|
|
286
|
+
comparison. Each element corresponds to a solution.
|
|
287
|
+
raise_on_error (bool, optional): If True, raises an exception if
|
|
288
|
+
any verification fails. (default: :obj:`False`)
|
|
270
289
|
|
|
271
|
-
|
|
272
|
-
|
|
290
|
+
Returns:
|
|
291
|
+
List[VerificationResult]: A list of verification results, one per
|
|
292
|
+
input solution.
|
|
273
293
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
# Get current batch parameters from processor with defaults if not
|
|
286
|
-
# present
|
|
287
|
-
max_workers = getattr(
|
|
288
|
-
self._batch_processor, 'max_workers', self._max_parallel or 1
|
|
289
|
-
)
|
|
290
|
-
batch_size = getattr(
|
|
291
|
-
self._batch_processor, 'batch_size', self._initial_batch_size or 10
|
|
292
|
-
)
|
|
293
|
-
semaphore = asyncio.Semaphore(max(1, max_workers))
|
|
294
|
-
|
|
295
|
-
async def _verify_with_semaphore(
|
|
296
|
-
response: VerifierInput,
|
|
297
|
-
) -> VerificationResult:
|
|
298
|
-
start_time = time.time()
|
|
299
|
-
try:
|
|
300
|
-
async with semaphore:
|
|
301
|
-
verification_result = await self.verify(response)
|
|
302
|
-
processing_time = time.time() - start_time
|
|
303
|
-
success = verification_result.status == VerificationOutcome.SUCCESS
|
|
304
|
-
self._batch_processor.adjust_batch_size(success, processing_time)
|
|
305
|
-
return verification_result
|
|
306
|
-
except Exception as e:
|
|
307
|
-
processing_time = time.time() - start_time
|
|
308
|
-
self._batch_processor.adjust_batch_size(False, processing_time)
|
|
309
|
-
logger.error(f"Verification failed: {e!s}", exc_info=True)
|
|
310
|
-
return VerificationResult(
|
|
311
|
-
status=VerificationOutcome.ERROR,
|
|
312
|
-
result="",
|
|
313
|
-
error_message=str(e),
|
|
314
|
-
metadata={"error_type": type(e).__name__},
|
|
294
|
+
Raises:
|
|
295
|
+
RuntimeError: If any verification fails and `raise_on_error` is
|
|
296
|
+
True.
|
|
297
|
+
asyncio.TimeoutError: If verifications time out after maximum
|
|
298
|
+
retries.
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
if not self._is_setup:
|
|
302
|
+
logger.warning(
|
|
303
|
+
f"{self.__class__.__name__} not set up, calling setup()"
|
|
315
304
|
)
|
|
305
|
+
await self.setup()
|
|
316
306
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
batch_results = await asyncio.gather(*verification_tasks)
|
|
326
|
-
all_results.extend(batch_results)
|
|
327
|
-
except Exception as e:
|
|
328
|
-
logger.error(f"Batch verification failed: {e!s}", exc_info=True)
|
|
329
|
-
if raise_on_error:
|
|
330
|
-
raise RuntimeError(f"Batch verification failed: {e!s}") from e
|
|
307
|
+
# Retrieve batch processing settings
|
|
308
|
+
max_workers = getattr(
|
|
309
|
+
self._batch_processor, 'max_workers', self._max_parallel or 1
|
|
310
|
+
)
|
|
311
|
+
batch_size = getattr(
|
|
312
|
+
self._batch_processor, 'batch_size', self._initial_batch_size or 10
|
|
313
|
+
)
|
|
314
|
+
semaphore = asyncio.Semaphore(max(1, max_workers))
|
|
331
315
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
316
|
+
async def _verify_with_semaphore(
|
|
317
|
+
solution: str, ground_truth: Optional[str]
|
|
318
|
+
) -> VerificationResult:
|
|
319
|
+
start_time = time.time()
|
|
320
|
+
try:
|
|
321
|
+
async with semaphore:
|
|
322
|
+
verification_result = await self.verify(
|
|
323
|
+
solution, ground_truth
|
|
324
|
+
)
|
|
325
|
+
processing_time = time.time() - start_time
|
|
326
|
+
success = (
|
|
327
|
+
verification_result.status == VerificationOutcome.SUCCESS
|
|
328
|
+
)
|
|
329
|
+
self._batch_processor.adjust_batch_size(
|
|
330
|
+
success, processing_time
|
|
331
|
+
)
|
|
332
|
+
return verification_result
|
|
333
|
+
except Exception as e:
|
|
334
|
+
processing_time = time.time() - start_time
|
|
335
|
+
self._batch_processor.adjust_batch_size(False, processing_time)
|
|
336
|
+
logger.error(f"Verification failed: {e!s}", exc_info=True)
|
|
337
|
+
return VerificationResult(
|
|
338
|
+
status=VerificationOutcome.ERROR,
|
|
339
|
+
result="",
|
|
340
|
+
error_message=str(e),
|
|
341
|
+
metadata={"error_type": type(e).__name__},
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Process in batches
|
|
345
|
+
all_results: List[VerificationResult] = []
|
|
346
|
+
for i in range(0, len(solutions), batch_size):
|
|
347
|
+
batch_solutions = solutions[i : i + batch_size]
|
|
348
|
+
batch_ground_truths = ground_truths[i : i + batch_size]
|
|
339
349
|
|
|
340
|
-
|
|
350
|
+
verification_tasks = [
|
|
351
|
+
_verify_with_semaphore(solution, ground_truth)
|
|
352
|
+
for solution, ground_truth in zip(
|
|
353
|
+
batch_solutions, batch_ground_truths
|
|
354
|
+
)
|
|
355
|
+
]
|
|
356
|
+
try:
|
|
357
|
+
batch_results = await asyncio.gather(*verification_tasks)
|
|
358
|
+
all_results.extend(batch_results)
|
|
359
|
+
except Exception as e:
|
|
360
|
+
logger.error(
|
|
361
|
+
f"Batch verification failed: {e!s}", exc_info=True
|
|
362
|
+
)
|
|
363
|
+
if raise_on_error:
|
|
364
|
+
raise RuntimeError(
|
|
365
|
+
f"Batch verification failed: {e!s}"
|
|
366
|
+
) from e
|
|
367
|
+
|
|
368
|
+
if raise_on_error and any(
|
|
369
|
+
r.status
|
|
370
|
+
in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
|
|
371
|
+
for r in all_results
|
|
372
|
+
):
|
|
373
|
+
error_msg = "One or more verifications failed"
|
|
374
|
+
logger.error(error_msg)
|
|
375
|
+
raise RuntimeError(error_msg)
|
|
376
|
+
|
|
377
|
+
return all_results
|
camel/verifiers/models.py
CHANGED
|
@@ -18,18 +18,6 @@ from typing import Any, Dict, Optional
|
|
|
18
18
|
from pydantic import BaseModel, Field
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class VerifierInput(BaseModel):
|
|
22
|
-
r"""Structured input to the verifier"""
|
|
23
|
-
|
|
24
|
-
llm_response: str = Field(
|
|
25
|
-
description="The LLM response to be verified."
|
|
26
|
-
"Needs to be in a format that the verifier can handle."
|
|
27
|
-
)
|
|
28
|
-
ground_truth: Optional[str] = Field(
|
|
29
|
-
None, description="The ground truth data, if available."
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
|
|
33
21
|
class VerificationOutcome(Enum):
|
|
34
22
|
r"""Enum representing the status of a verification."""
|
|
35
23
|
|
|
@@ -20,10 +20,11 @@ import tempfile
|
|
|
20
20
|
import venv
|
|
21
21
|
from typing import List, Optional
|
|
22
22
|
|
|
23
|
+
from camel.extractors import BaseExtractor
|
|
23
24
|
from camel.logger import get_logger
|
|
24
25
|
from camel.verifiers import BaseVerifier
|
|
25
26
|
|
|
26
|
-
from .models import VerificationOutcome, VerificationResult
|
|
27
|
+
from .models import VerificationOutcome, VerificationResult
|
|
27
28
|
|
|
28
29
|
logger = get_logger(__name__)
|
|
29
30
|
|
|
@@ -47,6 +48,7 @@ class PythonVerifier(BaseVerifier):
|
|
|
47
48
|
self,
|
|
48
49
|
timeout: Optional[float] = 30.0,
|
|
49
50
|
required_packages: Optional[List[str]] = None,
|
|
51
|
+
extractor: Optional[BaseExtractor] = None,
|
|
50
52
|
):
|
|
51
53
|
r"""Initializes the PythonVerifier.
|
|
52
54
|
|
|
@@ -102,24 +104,33 @@ class PythonVerifier(BaseVerifier):
|
|
|
102
104
|
self.venv_path = None
|
|
103
105
|
|
|
104
106
|
async def _verify_implementation(
|
|
105
|
-
self,
|
|
107
|
+
self, solution: str, ground_truth: Optional[str]
|
|
106
108
|
) -> VerificationResult:
|
|
107
|
-
r"""Executes the LLM-generated
|
|
108
|
-
environment.
|
|
109
|
+
r"""Executes and verifies the LLM-generated Python solution in an
|
|
110
|
+
isolated virtual environment.
|
|
111
|
+
|
|
112
|
+
This method runs the given Python solution inside a controlled virtual
|
|
113
|
+
environment, captures its execution output, and optionally compares it
|
|
114
|
+
against a provided ground truth. Handles timeouts and execution errors.
|
|
109
115
|
|
|
110
116
|
Args:
|
|
111
|
-
|
|
112
|
-
|
|
117
|
+
solution (str): The Python code to execute and verify.
|
|
118
|
+
ground_truth (Optional[str]): The expected output for comparison.
|
|
119
|
+
If None, verification is based only on execution success.
|
|
113
120
|
|
|
114
121
|
Returns:
|
|
115
|
-
VerificationResult:
|
|
116
|
-
|
|
117
|
-
|
|
122
|
+
VerificationResult: A structured object containing:
|
|
123
|
+
- status (VerificationOutcome): SUCCESS, FAILURE, ERROR,
|
|
124
|
+
or TIMEOUT.
|
|
125
|
+
- result (str): The execution output of the solution.
|
|
126
|
+
- error_message (Optional[str]): Captured error message,
|
|
127
|
+
if any.
|
|
128
|
+
- duration (float, optional): Execution time (set externally).
|
|
118
129
|
|
|
119
130
|
Raises:
|
|
120
131
|
asyncio.TimeoutError: If execution exceeds the configured timeout.
|
|
121
|
-
Exception: Any unexpected errors
|
|
122
|
-
|
|
132
|
+
Exception: Any unexpected errors are caught and converted to an
|
|
133
|
+
ERROR verification result.
|
|
123
134
|
"""
|
|
124
135
|
if not self.venv_path:
|
|
125
136
|
return VerificationResult(
|
|
@@ -128,7 +139,7 @@ class PythonVerifier(BaseVerifier):
|
|
|
128
139
|
error_message="Virtual environment is not set up.",
|
|
129
140
|
)
|
|
130
141
|
|
|
131
|
-
script =
|
|
142
|
+
script = solution.strip()
|
|
132
143
|
venv_python = os.path.join(self.venv_path, self.bin_dir, "python")
|
|
133
144
|
|
|
134
145
|
if not os.path.exists(venv_python):
|
|
@@ -156,11 +167,11 @@ class PythonVerifier(BaseVerifier):
|
|
|
156
167
|
|
|
157
168
|
if process.returncode == 0:
|
|
158
169
|
# If ground truth is provided, compare it with the result
|
|
159
|
-
if
|
|
170
|
+
if ground_truth is not None:
|
|
160
171
|
# Normalize both strings by removing extra whitespace
|
|
161
172
|
normalized_output = ' '.join(output_result.strip().split())
|
|
162
173
|
normalized_truth = ' '.join(
|
|
163
|
-
str(
|
|
174
|
+
str(ground_truth).strip().split()
|
|
164
175
|
)
|
|
165
176
|
|
|
166
177
|
if normalized_output == normalized_truth:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: camel-ai
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.36
|
|
4
4
|
Summary: Communicative Agents for AI Society Study
|
|
5
5
|
Project-URL: Homepage, https://www.camel-ai.org/
|
|
6
6
|
Project-URL: Repository, https://github.com/camel-ai/camel
|
|
@@ -58,6 +58,7 @@ Requires-Dist: jupyter-client<9,>=8.6.2; extra == 'all'
|
|
|
58
58
|
Requires-Dist: linkup-sdk<0.3,>=0.2.1; extra == 'all'
|
|
59
59
|
Requires-Dist: litellm<2,>=1.38.1; extra == 'all'
|
|
60
60
|
Requires-Dist: mcp>=1.3.0; extra == 'all'
|
|
61
|
+
Requires-Dist: mem0ai>=0.1.67; extra == 'all'
|
|
61
62
|
Requires-Dist: mistralai<2,>=1.1.0; extra == 'all'
|
|
62
63
|
Requires-Dist: mock<6,>=5; extra == 'all'
|
|
63
64
|
Requires-Dist: mypy<2,>=1.5.1; extra == 'all'
|
|
@@ -225,6 +226,7 @@ Provides-Extra: storage
|
|
|
225
226
|
Requires-Dist: azure-storage-blob<13,>=12.21.0; extra == 'storage'
|
|
226
227
|
Requires-Dist: botocore<2,>=1.35.3; extra == 'storage'
|
|
227
228
|
Requires-Dist: google-cloud-storage<3,>=2.18.0; extra == 'storage'
|
|
229
|
+
Requires-Dist: mem0ai>=0.1.73; extra == 'storage'
|
|
228
230
|
Requires-Dist: nebula3-python==3.8.2; extra == 'storage'
|
|
229
231
|
Requires-Dist: neo4j<6,>=5.18.0; extra == 'storage'
|
|
230
232
|
Requires-Dist: pymilvus<3,>=2.4.0; extra == 'storage'
|