camel-ai 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (116) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +41 -0
  3. camel/agents/_utils.py +188 -0
  4. camel/agents/chat_agent.py +570 -965
  5. camel/agents/knowledge_graph_agent.py +7 -1
  6. camel/agents/multi_hop_generator_agent.py +1 -1
  7. camel/configs/base_config.py +10 -13
  8. camel/configs/deepseek_config.py +4 -30
  9. camel/configs/gemini_config.py +5 -31
  10. camel/configs/openai_config.py +14 -32
  11. camel/configs/qwen_config.py +36 -36
  12. camel/datagen/self_improving_cot.py +81 -3
  13. camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
  14. camel/datagen/self_instruct/self_instruct.py +53 -4
  15. camel/datasets/__init__.py +28 -0
  16. camel/datasets/base.py +969 -0
  17. camel/embeddings/openai_embedding.py +10 -1
  18. camel/environments/__init__.py +16 -0
  19. camel/environments/base.py +503 -0
  20. camel/extractors/__init__.py +16 -0
  21. camel/extractors/base.py +263 -0
  22. camel/interpreters/docker/Dockerfile +12 -0
  23. camel/interpreters/docker_interpreter.py +19 -1
  24. camel/interpreters/subprocess_interpreter.py +42 -17
  25. camel/loaders/__init__.py +2 -0
  26. camel/loaders/mineru_extractor.py +250 -0
  27. camel/memories/agent_memories.py +16 -1
  28. camel/memories/blocks/chat_history_block.py +10 -2
  29. camel/memories/blocks/vectordb_block.py +1 -0
  30. camel/memories/context_creators/score_based.py +20 -3
  31. camel/memories/records.py +10 -0
  32. camel/messages/base.py +8 -8
  33. camel/models/__init__.py +2 -0
  34. camel/models/_utils.py +57 -0
  35. camel/models/aiml_model.py +48 -17
  36. camel/models/anthropic_model.py +41 -3
  37. camel/models/azure_openai_model.py +39 -3
  38. camel/models/base_audio_model.py +92 -0
  39. camel/models/base_model.py +132 -4
  40. camel/models/cohere_model.py +88 -11
  41. camel/models/deepseek_model.py +107 -63
  42. camel/models/fish_audio_model.py +18 -8
  43. camel/models/gemini_model.py +133 -15
  44. camel/models/groq_model.py +72 -10
  45. camel/models/internlm_model.py +14 -3
  46. camel/models/litellm_model.py +9 -2
  47. camel/models/mistral_model.py +42 -5
  48. camel/models/model_manager.py +57 -3
  49. camel/models/moonshot_model.py +33 -4
  50. camel/models/nemotron_model.py +32 -3
  51. camel/models/nvidia_model.py +43 -3
  52. camel/models/ollama_model.py +139 -17
  53. camel/models/openai_audio_models.py +87 -2
  54. camel/models/openai_compatible_model.py +37 -3
  55. camel/models/openai_model.py +158 -46
  56. camel/models/qwen_model.py +61 -4
  57. camel/models/reka_model.py +53 -3
  58. camel/models/samba_model.py +209 -4
  59. camel/models/sglang_model.py +153 -14
  60. camel/models/siliconflow_model.py +16 -3
  61. camel/models/stub_model.py +46 -4
  62. camel/models/togetherai_model.py +38 -3
  63. camel/models/vllm_model.py +37 -3
  64. camel/models/yi_model.py +36 -3
  65. camel/models/zhipuai_model.py +38 -3
  66. camel/retrievers/__init__.py +3 -0
  67. camel/retrievers/hybrid_retrival.py +237 -0
  68. camel/toolkits/__init__.py +20 -3
  69. camel/toolkits/arxiv_toolkit.py +2 -1
  70. camel/toolkits/ask_news_toolkit.py +4 -2
  71. camel/toolkits/audio_analysis_toolkit.py +238 -0
  72. camel/toolkits/base.py +22 -3
  73. camel/toolkits/code_execution.py +2 -0
  74. camel/toolkits/dappier_toolkit.py +2 -1
  75. camel/toolkits/data_commons_toolkit.py +38 -12
  76. camel/toolkits/excel_toolkit.py +172 -0
  77. camel/toolkits/function_tool.py +13 -0
  78. camel/toolkits/github_toolkit.py +5 -1
  79. camel/toolkits/google_maps_toolkit.py +2 -1
  80. camel/toolkits/google_scholar_toolkit.py +2 -0
  81. camel/toolkits/human_toolkit.py +0 -3
  82. camel/toolkits/image_analysis_toolkit.py +202 -0
  83. camel/toolkits/linkedin_toolkit.py +3 -2
  84. camel/toolkits/meshy_toolkit.py +3 -2
  85. camel/toolkits/mineru_toolkit.py +178 -0
  86. camel/toolkits/networkx_toolkit.py +240 -0
  87. camel/toolkits/notion_toolkit.py +2 -0
  88. camel/toolkits/openbb_toolkit.py +3 -2
  89. camel/toolkits/page_script.js +376 -0
  90. camel/toolkits/reddit_toolkit.py +11 -3
  91. camel/toolkits/retrieval_toolkit.py +6 -1
  92. camel/toolkits/semantic_scholar_toolkit.py +2 -1
  93. camel/toolkits/stripe_toolkit.py +8 -2
  94. camel/toolkits/sympy_toolkit.py +44 -1
  95. camel/toolkits/video_analysis_toolkit.py +407 -0
  96. camel/toolkits/{video_toolkit.py → video_download_toolkit.py} +21 -25
  97. camel/toolkits/web_toolkit.py +1307 -0
  98. camel/toolkits/whatsapp_toolkit.py +3 -2
  99. camel/toolkits/zapier_toolkit.py +191 -0
  100. camel/types/__init__.py +2 -2
  101. camel/types/agents/__init__.py +16 -0
  102. camel/types/agents/tool_calling_record.py +52 -0
  103. camel/types/enums.py +3 -0
  104. camel/types/openai_types.py +16 -14
  105. camel/utils/__init__.py +2 -1
  106. camel/utils/async_func.py +2 -2
  107. camel/utils/commons.py +114 -1
  108. camel/verifiers/__init__.py +23 -0
  109. camel/verifiers/base.py +340 -0
  110. camel/verifiers/models.py +82 -0
  111. camel/verifiers/python_verifier.py +202 -0
  112. camel_ai-0.2.23.dist-info/METADATA +671 -0
  113. {camel_ai-0.2.21.dist-info → camel_ai-0.2.23.dist-info}/RECORD +127 -99
  114. {camel_ai-0.2.21.dist-info → camel_ai-0.2.23.dist-info}/WHEEL +1 -1
  115. camel_ai-0.2.21.dist-info/METADATA +0 -528
  116. {camel_ai-0.2.21.dist-info → camel_ai-0.2.23.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,340 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import asyncio
15
+ import time
16
+ from abc import ABC, abstractmethod
17
+ from typing import List, Optional
18
+
19
+ from camel.logger import get_logger
20
+ from camel.utils import BatchProcessor
21
+
22
+ from .models import (
23
+ VerificationOutcome,
24
+ VerificationResult,
25
+ VerifierInput,
26
+ )
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ class BaseVerifier(ABC):
32
+ r"""Base class for all verifiers.
33
+
34
+ Example:
35
+ ```python
36
+ verifier = MyVerifier()
37
+ await verifier.setup()
38
+ result = await verifier.verify(response)
39
+ await verifier.cleanup()
40
+ ```
41
+
42
+ Key Features:
43
+ - Async verification with retry logic
44
+ - Comprehensive error handling and logging
45
+ - Configurable batch processing
46
+ - Resource monitoring for adaptive scaling
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ max_parallel: Optional[int] = None,
52
+ timeout: Optional[float] = None,
53
+ max_retries: int = 3,
54
+ retry_delay: float = 1.0,
55
+ initial_batch_size: Optional[int] = None,
56
+ cpu_threshold: float = 80.0,
57
+ memory_threshold: float = 85.0,
58
+ **kwargs,
59
+ ):
60
+ r"""Initialize the verifier with configuration parameters.
61
+
62
+ Args:
63
+ max_parallel: Maximum number of parallel verifications. If None,
64
+ determined dynamically based on system resources.
65
+ (default: :obj:`None`)
66
+ timeout: Timeout in seconds for each verification. (default:
67
+ :obj:`None`)
68
+ max_retries: Maximum number of retry attempts. (default: :obj:`3`)
69
+ retry_delay: Delay between retries in seconds. (default:
70
+ :obj:`1.0`)
71
+ initial_batch_size: Initial size for batch processing. If None,
72
+ defaults to 10. (default: :obj:`None`)
73
+ cpu_threshold: CPU usage percentage threshold for scaling down.
74
+ (default: :obj:`80.0`)
75
+ memory_threshold: Memory usage percentage threshold for scaling
76
+ down. (default: :obj:`85.0`)
77
+ **kwargs: Additional verifier parameters.
78
+ """
79
+ self._is_setup: bool = False
80
+ self._max_parallel: Optional[int] = max_parallel
81
+ self._timeout: Optional[float] = timeout
82
+ self._max_retries: int = max_retries
83
+ self._retry_delay: float = retry_delay
84
+ self._initial_batch_size: Optional[int] = initial_batch_size
85
+ self._cpu_threshold: float = cpu_threshold
86
+ self._memory_threshold: float = memory_threshold
87
+ self._batch_processor: BatchProcessor = BatchProcessor()
88
+
89
+ async def setup(self) -> None:
90
+ r"""Set up the verifier with necessary resources.
91
+
92
+ Initializes:
93
+ 1. Batch processor with validated parameters
94
+ 2. Any verifier-specific resources
95
+
96
+ Raises:
97
+ RuntimeError: If setup fails or resources cannot be initialized.
98
+ """
99
+ if self._is_setup:
100
+ logger.debug(f"{self.__class__.__name__} already initialized")
101
+ return
102
+
103
+ try:
104
+ batch_size = max(1, self._initial_batch_size or 10)
105
+ max_parallel = max(1, self._max_parallel or 1)
106
+ self._batch_processor = BatchProcessor()
107
+
108
+ logger.info(
109
+ f"{self.__class__.__name__} initialized with "
110
+ f"batch_size={batch_size}, max_parallel={max_parallel}"
111
+ )
112
+
113
+ await self._setup()
114
+ self._is_setup = True
115
+
116
+ except Exception as e:
117
+ error_msg = (
118
+ f"Failed to initialize {self.__class__.__name__}: {e!s}"
119
+ )
120
+ logger.error(error_msg, exc_info=True)
121
+ await self.cleanup()
122
+ raise RuntimeError(error_msg) from e
123
+
124
+ @abstractmethod
125
+ async def _setup(self) -> None:
126
+ r"""Implement verifier-specific setup logic."""
127
+ pass
128
+
129
+ async def cleanup(self) -> None:
130
+ r"""Clean up verifier resources.
131
+
132
+ Ensures:
133
+ 1. Batch processor is reset
134
+ 2. All internal states are cleared
135
+
136
+ Raises:
137
+ RuntimeError: If cleanup fails.
138
+ """
139
+ if not self._is_setup:
140
+ return
141
+
142
+ try:
143
+ self._batch_processor = BatchProcessor()
144
+ await self._cleanup()
145
+ logger.info(f"{self.__class__.__name__} cleaned up successfully")
146
+
147
+ except Exception as e:
148
+ error_msg = f"Failed to cleanup {self.__class__.__name__}: {e!s}"
149
+ logger.error(error_msg, exc_info=True)
150
+ raise RuntimeError(error_msg) from e
151
+
152
+ finally:
153
+ self._is_setup = False
154
+
155
+ @abstractmethod
156
+ async def _cleanup(self) -> None:
157
+ r"""Implement verifier-specific cleanup logic."""
158
+ pass
159
+
160
+ async def verify(self, result: VerifierInput) -> VerificationResult:
161
+ r"""Perform verification with full error handling.
162
+
163
+ Verifies correctness, expected output, reasoning, and symbolic
164
+ consistency.
165
+
166
+ Args:
167
+ result: The response to verify.
168
+
169
+ Returns:
170
+ VerificationResult: Structured result containing:
171
+ - status: SUCCESS/FAILURE/ERROR/TIMEOUT
172
+ - result: Verification outcome description
173
+ - duration: Time taken for verification
174
+ - metadata: Additional details
175
+ - error_message: Error description if applicable
176
+
177
+ Raises:
178
+ RuntimeError: If verification fails unexpectedly.
179
+ asyncio.TimeoutError: If verification times out.
180
+ """
181
+ if not self._is_setup:
182
+ logger.warning(
183
+ f"{self.__class__.__name__} not set up, calling setup()"
184
+ )
185
+ await self.setup()
186
+
187
+ attempt = 0
188
+ start_time = time.time()
189
+
190
+ while attempt < self._max_retries:
191
+ try:
192
+ verification_result = (
193
+ await asyncio.wait_for(
194
+ self._verify_implementation(result),
195
+ timeout=self._timeout,
196
+ )
197
+ if self._timeout
198
+ else await self._verify_implementation(result)
199
+ )
200
+
201
+ verification_result.duration = time.time() - start_time
202
+ verification_result.metadata["attempt"] = attempt + 1
203
+ return verification_result
204
+
205
+ except asyncio.TimeoutError:
206
+ attempt += 1
207
+ if attempt == self._max_retries:
208
+ return VerificationResult(
209
+ status=VerificationOutcome.TIMEOUT,
210
+ result="",
211
+ error_message="Verification timed out "
212
+ "after all retries.",
213
+ duration=time.time() - start_time,
214
+ metadata={"attempt": attempt},
215
+ )
216
+ logger.warning(
217
+ f"Verification timeout on attempt {attempt}, retrying..."
218
+ )
219
+ await asyncio.sleep(self._retry_delay)
220
+
221
+ except Exception as e:
222
+ attempt += 1
223
+ if attempt == self._max_retries:
224
+ return VerificationResult(
225
+ status=VerificationOutcome.ERROR,
226
+ result="",
227
+ error_message=f"Verification failed: {e!s}",
228
+ duration=time.time() - start_time,
229
+ metadata={"attempt": attempt},
230
+ )
231
+ await asyncio.sleep(self._retry_delay)
232
+
233
+ return VerificationResult(
234
+ status=VerificationOutcome.ERROR,
235
+ result="",
236
+ error_message="Unexpected code path reached",
237
+ duration=time.time() - start_time,
238
+ metadata={"attempt": attempt},
239
+ )
240
+
241
+ @abstractmethod
242
+ async def _verify_implementation(
243
+ self, result: VerifierInput
244
+ ) -> VerificationResult:
245
+ r"""Implement the actual verification logic.
246
+
247
+ Args:
248
+ result: The response to verify.
249
+
250
+ Returns:
251
+ VerificationResult: Containing the verification outcome.
252
+
253
+ Raises:
254
+ NotImplementedError: Must be implemented in subclasses.
255
+ """
256
+ raise NotImplementedError(
257
+ "Subclasses must implement _verify_implementation()"
258
+ )
259
+
260
+
261
+ async def verify_batch(
262
+ self, results: List[VerifierInput], raise_on_error: bool = False
263
+ ) -> List[VerificationResult]:
264
+ r"""Verify multiple results in parallel with controlled concurrency.
265
+
266
+ Args:
267
+ results: List of responses to verify.
268
+ raise_on_error: Whether to raise an exception if any verification
269
+ fails. (default: :obj:`False`)
270
+
271
+ Returns:
272
+ List[VerificationResult]: One for each input response.
273
+
274
+ Raises:
275
+ RuntimeError: If any verification fails and raise_on_error is True.
276
+ asyncio.TimeoutError: If verifications time out and max retries
277
+ exceeded.
278
+ """
279
+ if not self._is_setup:
280
+ logger.warning(
281
+ f"{self.__class__.__name__} not set up, calling setup()"
282
+ )
283
+ await self.setup()
284
+
285
+ # Get current batch parameters from processor with defaults if not
286
+ # present
287
+ max_workers = getattr(
288
+ self._batch_processor, 'max_workers', self._max_parallel or 1
289
+ )
290
+ batch_size = getattr(
291
+ self._batch_processor, 'batch_size', self._initial_batch_size or 10
292
+ )
293
+ semaphore = asyncio.Semaphore(max(1, max_workers))
294
+
295
+ async def _verify_with_semaphore(
296
+ response: VerifierInput,
297
+ ) -> VerificationResult:
298
+ start_time = time.time()
299
+ try:
300
+ async with semaphore:
301
+ verification_result = await self.verify(response)
302
+ processing_time = time.time() - start_time
303
+ success = verification_result.status == VerificationOutcome.SUCCESS
304
+ self._batch_processor.adjust_batch_size(success, processing_time)
305
+ return verification_result
306
+ except Exception as e:
307
+ processing_time = time.time() - start_time
308
+ self._batch_processor.adjust_batch_size(False, processing_time)
309
+ logger.error(f"Verification failed: {e!s}", exc_info=True)
310
+ return VerificationResult(
311
+ status=VerificationOutcome.ERROR,
312
+ result="",
313
+ error_message=str(e),
314
+ metadata={"error_type": type(e).__name__},
315
+ )
316
+
317
+ # Process in batches
318
+ all_results: List[VerificationResult] = []
319
+ for i in range(0, len(results), batch_size):
320
+ batch = results[i : i + batch_size]
321
+ verification_tasks = [
322
+ _verify_with_semaphore(result) for result in batch
323
+ ]
324
+ try:
325
+ batch_results = await asyncio.gather(*verification_tasks)
326
+ all_results.extend(batch_results)
327
+ except Exception as e:
328
+ logger.error(f"Batch verification failed: {e!s}", exc_info=True)
329
+ if raise_on_error:
330
+ raise RuntimeError(f"Batch verification failed: {e!s}") from e
331
+
332
+ if raise_on_error and any(
333
+ r.status in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
334
+ for r in all_results
335
+ ):
336
+ error_msg = "One or more verifications failed"
337
+ logger.error(error_msg)
338
+ raise RuntimeError(error_msg)
339
+
340
+ return all_results
@@ -0,0 +1,82 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from datetime import datetime
15
+ from enum import Enum
16
+ from typing import Any, Dict, Optional
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+
21
+ class VerifierInput(BaseModel):
22
+ r"""Structured input to the verifier"""
23
+
24
+ llm_response: str = Field(
25
+ description="The LLM response to be verified."
26
+ "Needs to be in a format that the verifier can handle."
27
+ )
28
+ ground_truth: Optional[str] = Field(
29
+ None, description="The ground truth data, if available."
30
+ )
31
+
32
+
33
+ class VerificationOutcome(Enum):
34
+ r"""Enum representing the status of a verification."""
35
+
36
+ SUCCESS = "success"
37
+ FAILURE = "failure"
38
+ ERROR = "error"
39
+ TIMEOUT = "timeout"
40
+
41
+ def __bool__(self):
42
+ r"""Only VerificationOutcome.SUCCESS is truthy; others are falsy."""
43
+ return self is VerificationOutcome.SUCCESS
44
+
45
+
46
+ class VerificationResult(BaseModel):
47
+ r"""Structured result from a verification."""
48
+
49
+ status: VerificationOutcome = Field(
50
+ description="Status of the verification"
51
+ )
52
+ result: str = Field(description="Verification result")
53
+ duration: float = Field(
54
+ default=0.0, description="Duration of verification in seconds"
55
+ )
56
+ timestamp: datetime = Field(
57
+ default_factory=datetime.now,
58
+ description="When the verification was performed",
59
+ )
60
+ metadata: Dict[str, Any] = Field(
61
+ default_factory=dict,
62
+ description="Additional metadata about the verification",
63
+ )
64
+ error_message: Optional[str] = Field(
65
+ default=None, description="Error message if verification failed"
66
+ )
67
+
68
+
69
+ class VerifierConfig(BaseModel):
70
+ r"""Configuration for verifier behavior."""
71
+
72
+ enabled: bool = Field(True, description="Whether verification is enabled")
73
+ strict_mode: bool = Field(
74
+ False, description="Whether to fail on any validation error"
75
+ )
76
+ timeout: Optional[float] = Field(
77
+ None, description="Verification timeout in seconds"
78
+ )
79
+ max_retries: int = Field(3, description="Maximum number of retry attempts")
80
+ retry_delay: float = Field(
81
+ 1.0, description="Delay between retries in seconds"
82
+ )
@@ -0,0 +1,202 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import asyncio
16
+ import os
17
+ import shutil
18
+ import subprocess
19
+ import tempfile
20
+ import venv
21
+ from typing import List, Optional
22
+
23
+ from camel.logger import get_logger
24
+ from camel.verifiers import BaseVerifier
25
+
26
+ from .models import VerificationOutcome, VerificationResult, VerifierInput
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ class PythonVerifier(BaseVerifier):
32
+ r"""The PythonVerifier class verifies Python-based implementations
33
+ by executing them in an isolated virtual environment.
34
+
35
+ Features:
36
+ - Creates a virtual environment with a specified Python version.
37
+ - Installs required packages before executing the provided script.
38
+ - Executes the script and compares the output against a ground truth,
39
+ if supplied.
40
+ - Automatically cleans up the virtual environment after execution.
41
+
42
+ The verification process ensures that the code runs in a controlled
43
+ environment, minimizing external dependencies and conflicts.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ timeout: Optional[float] = 30.0,
49
+ required_packages: Optional[List[str]] = None,
50
+ ):
51
+ r"""Initializes the PythonVerifier.
52
+
53
+ Args:
54
+ timeout (Optional[float], optional): The execution timeout in
55
+ seconds. (default: :obj:`30.0`)
56
+ required_packages (Optional[List[str]], optional): A list of
57
+ packages to install in the virtual environment.
58
+ (default: :obj:`None`)
59
+ """
60
+ # TODO: Use CAMEL's Interpreter to execute the code
61
+ super().__init__(timeout=timeout)
62
+ self.venv_path: Optional[str] = None
63
+ self.required_packages = required_packages or []
64
+
65
+ if os.name == 'nt': # Windows
66
+ self.bin_dir = 'Scripts'
67
+ else: # Unix-like systems
68
+ self.bin_dir = 'bin'
69
+
70
+ async def _setup(self) -> None:
71
+ r"""Set up a virtual environment for execution
72
+ and install required packages.
73
+ """
74
+ self.venv_path = tempfile.mkdtemp()
75
+ venv.create(self.venv_path, with_pip=True)
76
+ logger.info(f"Virtual environment created at {self.venv_path}")
77
+
78
+ venv_pip = os.path.join(self.venv_path, self.bin_dir, "pip")
79
+
80
+ if self.required_packages:
81
+ try:
82
+ subprocess.run(
83
+ [venv_pip, "install", *self.required_packages],
84
+ check=True,
85
+ capture_output=True,
86
+ )
87
+ logger.info(
88
+ "Installed required packages:"
89
+ f"{', '.join(self.required_packages)}"
90
+ )
91
+ except subprocess.CalledProcessError as e:
92
+ logger.error(
93
+ "Failed to install required packages: "
94
+ f"{e.stderr.decode().strip()}"
95
+ )
96
+
97
+ async def _cleanup(self) -> None:
98
+ r"""Clean up the virtual environment."""
99
+ if self.venv_path:
100
+ shutil.rmtree(self.venv_path)
101
+ logger.info(f"Virtual environment at {self.venv_path} removed")
102
+ self.venv_path = None
103
+
104
+ async def _verify_implementation(
105
+ self, result: VerifierInput
106
+ ) -> VerificationResult:
107
+ r"""Executes the LLM-generated response in a Python virtual
108
+ environment.
109
+
110
+ Args:
111
+ result (VerifierInput): Contains the LLM-generated Python code to
112
+ execute and optional ground truth for comparison.
113
+
114
+ Returns:
115
+ VerificationResult: Contains verification status (SUCCESS/FAILURE/
116
+ ERROR), execution output, error messages if any, and execution
117
+ duration.
118
+
119
+ Raises:
120
+ asyncio.TimeoutError: If execution exceeds the configured timeout.
121
+ Exception: Any unexpected errors during execution are caught and
122
+ converted to an ERROR verification result.
123
+ """
124
+ if not self.venv_path:
125
+ return VerificationResult(
126
+ status=VerificationOutcome.ERROR,
127
+ result="",
128
+ error_message="Virtual environment is not set up.",
129
+ )
130
+
131
+ script = result.llm_response.strip()
132
+ venv_python = os.path.join(self.venv_path, self.bin_dir, "python")
133
+
134
+ if not os.path.exists(venv_python):
135
+ return VerificationResult(
136
+ status=VerificationOutcome.ERROR,
137
+ result="",
138
+ error_message="Python binary not found in virtual environment",
139
+ )
140
+
141
+ try:
142
+ process = await asyncio.create_subprocess_exec(
143
+ venv_python,
144
+ "-c",
145
+ script,
146
+ stdout=asyncio.subprocess.PIPE,
147
+ stderr=asyncio.subprocess.PIPE,
148
+ )
149
+
150
+ stdout, stderr = await asyncio.wait_for(
151
+ process.communicate(), timeout=self._timeout
152
+ )
153
+
154
+ output_result = stdout.decode().strip()
155
+ error_output = stderr.decode().strip()
156
+
157
+ if process.returncode == 0:
158
+ # If ground truth is provided, compare it with the result
159
+ if result.ground_truth is not None:
160
+ # Normalize both strings by removing extra whitespace
161
+ normalized_output = ' '.join(output_result.strip().split())
162
+ normalized_truth = ' '.join(
163
+ str(result.ground_truth).strip().split()
164
+ )
165
+
166
+ if normalized_output == normalized_truth:
167
+ return VerificationResult(
168
+ status=VerificationOutcome.SUCCESS,
169
+ result=output_result,
170
+ )
171
+ else:
172
+ return VerificationResult(
173
+ status=VerificationOutcome.FAILURE,
174
+ error_message="Output doesn't match ground truth",
175
+ result=output_result,
176
+ )
177
+ else:
178
+ return VerificationResult(
179
+ status=VerificationOutcome.SUCCESS,
180
+ result=output_result,
181
+ )
182
+
183
+ else:
184
+ return VerificationResult(
185
+ status=VerificationOutcome.ERROR,
186
+ error_message=error_output,
187
+ result=output_result,
188
+ )
189
+
190
+ except asyncio.TimeoutError:
191
+ return VerificationResult(
192
+ status=VerificationOutcome.TIMEOUT,
193
+ result="",
194
+ error_message="Execution timed out.",
195
+ )
196
+
197
+ except Exception as e:
198
+ return VerificationResult(
199
+ status=VerificationOutcome.ERROR,
200
+ result="",
201
+ error_message=f"Execution error: {e}",
202
+ )