camel-ai 0.2.22__py3-none-any.whl → 0.2.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +41 -0
- camel/agents/_utils.py +188 -0
- camel/agents/chat_agent.py +570 -965
- camel/agents/knowledge_graph_agent.py +7 -1
- camel/agents/multi_hop_generator_agent.py +1 -1
- camel/configs/base_config.py +10 -13
- camel/configs/deepseek_config.py +4 -30
- camel/configs/gemini_config.py +5 -31
- camel/configs/openai_config.py +14 -32
- camel/configs/qwen_config.py +36 -36
- camel/datagen/self_improving_cot.py +81 -3
- camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
- camel/datagen/self_instruct/self_instruct.py +52 -3
- camel/datasets/__init__.py +28 -0
- camel/datasets/base.py +969 -0
- camel/environments/__init__.py +16 -0
- camel/environments/base.py +503 -0
- camel/extractors/__init__.py +16 -0
- camel/extractors/base.py +263 -0
- camel/memories/agent_memories.py +16 -1
- camel/memories/blocks/chat_history_block.py +10 -2
- camel/memories/blocks/vectordb_block.py +1 -0
- camel/memories/context_creators/score_based.py +20 -3
- camel/memories/records.py +10 -0
- camel/messages/base.py +8 -8
- camel/models/__init__.py +2 -0
- camel/models/_utils.py +57 -0
- camel/models/aiml_model.py +48 -17
- camel/models/anthropic_model.py +41 -3
- camel/models/azure_openai_model.py +39 -3
- camel/models/base_audio_model.py +92 -0
- camel/models/base_model.py +88 -13
- camel/models/cohere_model.py +88 -11
- camel/models/deepseek_model.py +107 -45
- camel/models/fish_audio_model.py +18 -8
- camel/models/gemini_model.py +133 -15
- camel/models/groq_model.py +72 -10
- camel/models/internlm_model.py +14 -3
- camel/models/litellm_model.py +9 -2
- camel/models/mistral_model.py +42 -5
- camel/models/model_manager.py +57 -3
- camel/models/moonshot_model.py +33 -4
- camel/models/nemotron_model.py +32 -3
- camel/models/nvidia_model.py +43 -3
- camel/models/ollama_model.py +139 -17
- camel/models/openai_audio_models.py +87 -2
- camel/models/openai_compatible_model.py +37 -3
- camel/models/openai_model.py +158 -46
- camel/models/qwen_model.py +61 -4
- camel/models/reka_model.py +53 -3
- camel/models/samba_model.py +209 -4
- camel/models/sglang_model.py +153 -14
- camel/models/siliconflow_model.py +16 -3
- camel/models/stub_model.py +46 -4
- camel/models/togetherai_model.py +38 -3
- camel/models/vllm_model.py +37 -3
- camel/models/yi_model.py +36 -3
- camel/models/zhipuai_model.py +38 -3
- camel/retrievers/__init__.py +3 -0
- camel/retrievers/hybrid_retrival.py +237 -0
- camel/toolkits/__init__.py +15 -1
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +4 -2
- camel/toolkits/audio_analysis_toolkit.py +238 -0
- camel/toolkits/base.py +22 -3
- camel/toolkits/code_execution.py +2 -0
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +38 -12
- camel/toolkits/excel_toolkit.py +172 -0
- camel/toolkits/function_tool.py +13 -0
- camel/toolkits/github_toolkit.py +5 -1
- camel/toolkits/google_maps_toolkit.py +2 -1
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +0 -3
- camel/toolkits/image_analysis_toolkit.py +202 -0
- camel/toolkits/linkedin_toolkit.py +3 -2
- camel/toolkits/meshy_toolkit.py +3 -2
- camel/toolkits/mineru_toolkit.py +2 -2
- camel/toolkits/networkx_toolkit.py +240 -0
- camel/toolkits/notion_toolkit.py +2 -0
- camel/toolkits/openbb_toolkit.py +3 -2
- camel/toolkits/page_script.js +376 -0
- camel/toolkits/reddit_toolkit.py +11 -3
- camel/toolkits/retrieval_toolkit.py +6 -1
- camel/toolkits/semantic_scholar_toolkit.py +2 -1
- camel/toolkits/stripe_toolkit.py +8 -2
- camel/toolkits/sympy_toolkit.py +6 -1
- camel/toolkits/video_analysis_toolkit.py +407 -0
- camel/toolkits/{video_toolkit.py → video_download_toolkit.py} +21 -25
- camel/toolkits/web_toolkit.py +1307 -0
- camel/toolkits/whatsapp_toolkit.py +3 -2
- camel/toolkits/zapier_toolkit.py +191 -0
- camel/types/__init__.py +2 -2
- camel/types/agents/__init__.py +16 -0
- camel/types/agents/tool_calling_record.py +52 -0
- camel/types/enums.py +3 -0
- camel/types/openai_types.py +16 -14
- camel/utils/__init__.py +2 -1
- camel/utils/async_func.py +2 -2
- camel/utils/commons.py +114 -1
- camel/verifiers/__init__.py +23 -0
- camel/verifiers/base.py +340 -0
- camel/verifiers/models.py +82 -0
- camel/verifiers/python_verifier.py +202 -0
- camel_ai-0.2.23.dist-info/METADATA +671 -0
- {camel_ai-0.2.22.dist-info → camel_ai-0.2.23.dist-info}/RECORD +122 -97
- {camel_ai-0.2.22.dist-info → camel_ai-0.2.23.dist-info}/WHEEL +1 -1
- camel_ai-0.2.22.dist-info/METADATA +0 -527
- {camel_ai-0.2.22.dist-info → camel_ai-0.2.23.dist-info/licenses}/LICENSE +0 -0
camel/verifiers/base.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import asyncio
|
|
15
|
+
import time
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import List, Optional
|
|
18
|
+
|
|
19
|
+
from camel.logger import get_logger
|
|
20
|
+
from camel.utils import BatchProcessor
|
|
21
|
+
|
|
22
|
+
from .models import (
|
|
23
|
+
VerificationOutcome,
|
|
24
|
+
VerificationResult,
|
|
25
|
+
VerifierInput,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BaseVerifier(ABC):
|
|
32
|
+
r"""Base class for all verifiers.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
```python
|
|
36
|
+
verifier = MyVerifier()
|
|
37
|
+
await verifier.setup()
|
|
38
|
+
result = await verifier.verify(response)
|
|
39
|
+
await verifier.cleanup()
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Key Features:
|
|
43
|
+
- Async verification with retry logic
|
|
44
|
+
- Comprehensive error handling and logging
|
|
45
|
+
- Configurable batch processing
|
|
46
|
+
- Resource monitoring for adaptive scaling
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
max_parallel: Optional[int] = None,
|
|
52
|
+
timeout: Optional[float] = None,
|
|
53
|
+
max_retries: int = 3,
|
|
54
|
+
retry_delay: float = 1.0,
|
|
55
|
+
initial_batch_size: Optional[int] = None,
|
|
56
|
+
cpu_threshold: float = 80.0,
|
|
57
|
+
memory_threshold: float = 85.0,
|
|
58
|
+
**kwargs,
|
|
59
|
+
):
|
|
60
|
+
r"""Initialize the verifier with configuration parameters.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
max_parallel: Maximum number of parallel verifications. If None,
|
|
64
|
+
determined dynamically based on system resources.
|
|
65
|
+
(default: :obj:`None`)
|
|
66
|
+
timeout: Timeout in seconds for each verification. (default:
|
|
67
|
+
:obj:`None`)
|
|
68
|
+
max_retries: Maximum number of retry attempts. (default: :obj:`3`)
|
|
69
|
+
retry_delay: Delay between retries in seconds. (default:
|
|
70
|
+
:obj:`1.0`)
|
|
71
|
+
initial_batch_size: Initial size for batch processing. If None,
|
|
72
|
+
defaults to 10. (default: :obj:`None`)
|
|
73
|
+
cpu_threshold: CPU usage percentage threshold for scaling down.
|
|
74
|
+
(default: :obj:`80.0`)
|
|
75
|
+
memory_threshold: Memory usage percentage threshold for scaling
|
|
76
|
+
down. (default: :obj:`85.0`)
|
|
77
|
+
**kwargs: Additional verifier parameters.
|
|
78
|
+
"""
|
|
79
|
+
self._is_setup: bool = False
|
|
80
|
+
self._max_parallel: Optional[int] = max_parallel
|
|
81
|
+
self._timeout: Optional[float] = timeout
|
|
82
|
+
self._max_retries: int = max_retries
|
|
83
|
+
self._retry_delay: float = retry_delay
|
|
84
|
+
self._initial_batch_size: Optional[int] = initial_batch_size
|
|
85
|
+
self._cpu_threshold: float = cpu_threshold
|
|
86
|
+
self._memory_threshold: float = memory_threshold
|
|
87
|
+
self._batch_processor: BatchProcessor = BatchProcessor()
|
|
88
|
+
|
|
89
|
+
async def setup(self) -> None:
|
|
90
|
+
r"""Set up the verifier with necessary resources.
|
|
91
|
+
|
|
92
|
+
Initializes:
|
|
93
|
+
1. Batch processor with validated parameters
|
|
94
|
+
2. Any verifier-specific resources
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
RuntimeError: If setup fails or resources cannot be initialized.
|
|
98
|
+
"""
|
|
99
|
+
if self._is_setup:
|
|
100
|
+
logger.debug(f"{self.__class__.__name__} already initialized")
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
batch_size = max(1, self._initial_batch_size or 10)
|
|
105
|
+
max_parallel = max(1, self._max_parallel or 1)
|
|
106
|
+
self._batch_processor = BatchProcessor()
|
|
107
|
+
|
|
108
|
+
logger.info(
|
|
109
|
+
f"{self.__class__.__name__} initialized with "
|
|
110
|
+
f"batch_size={batch_size}, max_parallel={max_parallel}"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
await self._setup()
|
|
114
|
+
self._is_setup = True
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
error_msg = (
|
|
118
|
+
f"Failed to initialize {self.__class__.__name__}: {e!s}"
|
|
119
|
+
)
|
|
120
|
+
logger.error(error_msg, exc_info=True)
|
|
121
|
+
await self.cleanup()
|
|
122
|
+
raise RuntimeError(error_msg) from e
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
async def _setup(self) -> None:
|
|
126
|
+
r"""Implement verifier-specific setup logic."""
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
async def cleanup(self) -> None:
|
|
130
|
+
r"""Clean up verifier resources.
|
|
131
|
+
|
|
132
|
+
Ensures:
|
|
133
|
+
1. Batch processor is reset
|
|
134
|
+
2. All internal states are cleared
|
|
135
|
+
|
|
136
|
+
Raises:
|
|
137
|
+
RuntimeError: If cleanup fails.
|
|
138
|
+
"""
|
|
139
|
+
if not self._is_setup:
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
self._batch_processor = BatchProcessor()
|
|
144
|
+
await self._cleanup()
|
|
145
|
+
logger.info(f"{self.__class__.__name__} cleaned up successfully")
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
error_msg = f"Failed to cleanup {self.__class__.__name__}: {e!s}"
|
|
149
|
+
logger.error(error_msg, exc_info=True)
|
|
150
|
+
raise RuntimeError(error_msg) from e
|
|
151
|
+
|
|
152
|
+
finally:
|
|
153
|
+
self._is_setup = False
|
|
154
|
+
|
|
155
|
+
@abstractmethod
|
|
156
|
+
async def _cleanup(self) -> None:
|
|
157
|
+
r"""Implement verifier-specific cleanup logic."""
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
async def verify(self, result: VerifierInput) -> VerificationResult:
|
|
161
|
+
r"""Perform verification with full error handling.
|
|
162
|
+
|
|
163
|
+
Verifies correctness, expected output, reasoning, and symbolic
|
|
164
|
+
consistency.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
result: The response to verify.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
VerificationResult: Structured result containing:
|
|
171
|
+
- status: SUCCESS/FAILURE/ERROR/TIMEOUT
|
|
172
|
+
- result: Verification outcome description
|
|
173
|
+
- duration: Time taken for verification
|
|
174
|
+
- metadata: Additional details
|
|
175
|
+
- error_message: Error description if applicable
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
RuntimeError: If verification fails unexpectedly.
|
|
179
|
+
asyncio.TimeoutError: If verification times out.
|
|
180
|
+
"""
|
|
181
|
+
if not self._is_setup:
|
|
182
|
+
logger.warning(
|
|
183
|
+
f"{self.__class__.__name__} not set up, calling setup()"
|
|
184
|
+
)
|
|
185
|
+
await self.setup()
|
|
186
|
+
|
|
187
|
+
attempt = 0
|
|
188
|
+
start_time = time.time()
|
|
189
|
+
|
|
190
|
+
while attempt < self._max_retries:
|
|
191
|
+
try:
|
|
192
|
+
verification_result = (
|
|
193
|
+
await asyncio.wait_for(
|
|
194
|
+
self._verify_implementation(result),
|
|
195
|
+
timeout=self._timeout,
|
|
196
|
+
)
|
|
197
|
+
if self._timeout
|
|
198
|
+
else await self._verify_implementation(result)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
verification_result.duration = time.time() - start_time
|
|
202
|
+
verification_result.metadata["attempt"] = attempt + 1
|
|
203
|
+
return verification_result
|
|
204
|
+
|
|
205
|
+
except asyncio.TimeoutError:
|
|
206
|
+
attempt += 1
|
|
207
|
+
if attempt == self._max_retries:
|
|
208
|
+
return VerificationResult(
|
|
209
|
+
status=VerificationOutcome.TIMEOUT,
|
|
210
|
+
result="",
|
|
211
|
+
error_message="Verification timed out "
|
|
212
|
+
"after all retries.",
|
|
213
|
+
duration=time.time() - start_time,
|
|
214
|
+
metadata={"attempt": attempt},
|
|
215
|
+
)
|
|
216
|
+
logger.warning(
|
|
217
|
+
f"Verification timeout on attempt {attempt}, retrying..."
|
|
218
|
+
)
|
|
219
|
+
await asyncio.sleep(self._retry_delay)
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
attempt += 1
|
|
223
|
+
if attempt == self._max_retries:
|
|
224
|
+
return VerificationResult(
|
|
225
|
+
status=VerificationOutcome.ERROR,
|
|
226
|
+
result="",
|
|
227
|
+
error_message=f"Verification failed: {e!s}",
|
|
228
|
+
duration=time.time() - start_time,
|
|
229
|
+
metadata={"attempt": attempt},
|
|
230
|
+
)
|
|
231
|
+
await asyncio.sleep(self._retry_delay)
|
|
232
|
+
|
|
233
|
+
return VerificationResult(
|
|
234
|
+
status=VerificationOutcome.ERROR,
|
|
235
|
+
result="",
|
|
236
|
+
error_message="Unexpected code path reached",
|
|
237
|
+
duration=time.time() - start_time,
|
|
238
|
+
metadata={"attempt": attempt},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
@abstractmethod
|
|
242
|
+
async def _verify_implementation(
|
|
243
|
+
self, result: VerifierInput
|
|
244
|
+
) -> VerificationResult:
|
|
245
|
+
r"""Implement the actual verification logic.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
result: The response to verify.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
VerificationResult: Containing the verification outcome.
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
NotImplementedError: Must be implemented in subclasses.
|
|
255
|
+
"""
|
|
256
|
+
raise NotImplementedError(
|
|
257
|
+
"Subclasses must implement _verify_implementation()"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
async def verify_batch(
|
|
262
|
+
self, results: List[VerifierInput], raise_on_error: bool = False
|
|
263
|
+
) -> List[VerificationResult]:
|
|
264
|
+
r"""Verify multiple results in parallel with controlled concurrency.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
results: List of responses to verify.
|
|
268
|
+
raise_on_error: Whether to raise an exception if any verification
|
|
269
|
+
fails. (default: :obj:`False`)
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
List[VerificationResult]: One for each input response.
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
RuntimeError: If any verification fails and raise_on_error is True.
|
|
276
|
+
asyncio.TimeoutError: If verifications time out and max retries
|
|
277
|
+
exceeded.
|
|
278
|
+
"""
|
|
279
|
+
if not self._is_setup:
|
|
280
|
+
logger.warning(
|
|
281
|
+
f"{self.__class__.__name__} not set up, calling setup()"
|
|
282
|
+
)
|
|
283
|
+
await self.setup()
|
|
284
|
+
|
|
285
|
+
# Get current batch parameters from processor with defaults if not
|
|
286
|
+
# present
|
|
287
|
+
max_workers = getattr(
|
|
288
|
+
self._batch_processor, 'max_workers', self._max_parallel or 1
|
|
289
|
+
)
|
|
290
|
+
batch_size = getattr(
|
|
291
|
+
self._batch_processor, 'batch_size', self._initial_batch_size or 10
|
|
292
|
+
)
|
|
293
|
+
semaphore = asyncio.Semaphore(max(1, max_workers))
|
|
294
|
+
|
|
295
|
+
async def _verify_with_semaphore(
|
|
296
|
+
response: VerifierInput,
|
|
297
|
+
) -> VerificationResult:
|
|
298
|
+
start_time = time.time()
|
|
299
|
+
try:
|
|
300
|
+
async with semaphore:
|
|
301
|
+
verification_result = await self.verify(response)
|
|
302
|
+
processing_time = time.time() - start_time
|
|
303
|
+
success = verification_result.status == VerificationOutcome.SUCCESS
|
|
304
|
+
self._batch_processor.adjust_batch_size(success, processing_time)
|
|
305
|
+
return verification_result
|
|
306
|
+
except Exception as e:
|
|
307
|
+
processing_time = time.time() - start_time
|
|
308
|
+
self._batch_processor.adjust_batch_size(False, processing_time)
|
|
309
|
+
logger.error(f"Verification failed: {e!s}", exc_info=True)
|
|
310
|
+
return VerificationResult(
|
|
311
|
+
status=VerificationOutcome.ERROR,
|
|
312
|
+
result="",
|
|
313
|
+
error_message=str(e),
|
|
314
|
+
metadata={"error_type": type(e).__name__},
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Process in batches
|
|
318
|
+
all_results: List[VerificationResult] = []
|
|
319
|
+
for i in range(0, len(results), batch_size):
|
|
320
|
+
batch = results[i : i + batch_size]
|
|
321
|
+
verification_tasks = [
|
|
322
|
+
_verify_with_semaphore(result) for result in batch
|
|
323
|
+
]
|
|
324
|
+
try:
|
|
325
|
+
batch_results = await asyncio.gather(*verification_tasks)
|
|
326
|
+
all_results.extend(batch_results)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logger.error(f"Batch verification failed: {e!s}", exc_info=True)
|
|
329
|
+
if raise_on_error:
|
|
330
|
+
raise RuntimeError(f"Batch verification failed: {e!s}") from e
|
|
331
|
+
|
|
332
|
+
if raise_on_error and any(
|
|
333
|
+
r.status in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
|
|
334
|
+
for r in all_results
|
|
335
|
+
):
|
|
336
|
+
error_msg = "One or more verifications failed"
|
|
337
|
+
logger.error(error_msg)
|
|
338
|
+
raise RuntimeError(error_msg)
|
|
339
|
+
|
|
340
|
+
return all_results
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import Any, Dict, Optional
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VerifierInput(BaseModel):
|
|
22
|
+
r"""Structured input to the verifier"""
|
|
23
|
+
|
|
24
|
+
llm_response: str = Field(
|
|
25
|
+
description="The LLM response to be verified."
|
|
26
|
+
"Needs to be in a format that the verifier can handle."
|
|
27
|
+
)
|
|
28
|
+
ground_truth: Optional[str] = Field(
|
|
29
|
+
None, description="The ground truth data, if available."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class VerificationOutcome(Enum):
|
|
34
|
+
r"""Enum representing the status of a verification."""
|
|
35
|
+
|
|
36
|
+
SUCCESS = "success"
|
|
37
|
+
FAILURE = "failure"
|
|
38
|
+
ERROR = "error"
|
|
39
|
+
TIMEOUT = "timeout"
|
|
40
|
+
|
|
41
|
+
def __bool__(self):
|
|
42
|
+
r"""Only VerificationOutcome.SUCCESS is truthy; others are falsy."""
|
|
43
|
+
return self is VerificationOutcome.SUCCESS
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class VerificationResult(BaseModel):
|
|
47
|
+
r"""Structured result from a verification."""
|
|
48
|
+
|
|
49
|
+
status: VerificationOutcome = Field(
|
|
50
|
+
description="Status of the verification"
|
|
51
|
+
)
|
|
52
|
+
result: str = Field(description="Verification result")
|
|
53
|
+
duration: float = Field(
|
|
54
|
+
default=0.0, description="Duration of verification in seconds"
|
|
55
|
+
)
|
|
56
|
+
timestamp: datetime = Field(
|
|
57
|
+
default_factory=datetime.now,
|
|
58
|
+
description="When the verification was performed",
|
|
59
|
+
)
|
|
60
|
+
metadata: Dict[str, Any] = Field(
|
|
61
|
+
default_factory=dict,
|
|
62
|
+
description="Additional metadata about the verification",
|
|
63
|
+
)
|
|
64
|
+
error_message: Optional[str] = Field(
|
|
65
|
+
default=None, description="Error message if verification failed"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class VerifierConfig(BaseModel):
|
|
70
|
+
r"""Configuration for verifier behavior."""
|
|
71
|
+
|
|
72
|
+
enabled: bool = Field(True, description="Whether verification is enabled")
|
|
73
|
+
strict_mode: bool = Field(
|
|
74
|
+
False, description="Whether to fail on any validation error"
|
|
75
|
+
)
|
|
76
|
+
timeout: Optional[float] = Field(
|
|
77
|
+
None, description="Verification timeout in seconds"
|
|
78
|
+
)
|
|
79
|
+
max_retries: int = Field(3, description="Maximum number of retry attempts")
|
|
80
|
+
retry_delay: float = Field(
|
|
81
|
+
1.0, description="Delay between retries in seconds"
|
|
82
|
+
)
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import os
|
|
17
|
+
import shutil
|
|
18
|
+
import subprocess
|
|
19
|
+
import tempfile
|
|
20
|
+
import venv
|
|
21
|
+
from typing import List, Optional
|
|
22
|
+
|
|
23
|
+
from camel.logger import get_logger
|
|
24
|
+
from camel.verifiers import BaseVerifier
|
|
25
|
+
|
|
26
|
+
from .models import VerificationOutcome, VerificationResult, VerifierInput
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PythonVerifier(BaseVerifier):
|
|
32
|
+
r"""The PythonVerifier class verifies Python-based implementations
|
|
33
|
+
by executing them in an isolated virtual environment.
|
|
34
|
+
|
|
35
|
+
Features:
|
|
36
|
+
- Creates a virtual environment with a specified Python version.
|
|
37
|
+
- Installs required packages before executing the provided script.
|
|
38
|
+
- Executes the script and compares the output against a ground truth,
|
|
39
|
+
if supplied.
|
|
40
|
+
- Automatically cleans up the virtual environment after execution.
|
|
41
|
+
|
|
42
|
+
The verification process ensures that the code runs in a controlled
|
|
43
|
+
environment, minimizing external dependencies and conflicts.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
timeout: Optional[float] = 30.0,
|
|
49
|
+
required_packages: Optional[List[str]] = None,
|
|
50
|
+
):
|
|
51
|
+
r"""Initializes the PythonVerifier.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
timeout (Optional[float], optional): The execution timeout in
|
|
55
|
+
seconds. (default: :obj:`30.0`)
|
|
56
|
+
required_packages (Optional[List[str]], optional): A list of
|
|
57
|
+
packages to install in the virtual environment.
|
|
58
|
+
(default: :obj:`None`)
|
|
59
|
+
"""
|
|
60
|
+
# TODO: Use CAMEL's Interpreter to execute the code
|
|
61
|
+
super().__init__(timeout=timeout)
|
|
62
|
+
self.venv_path: Optional[str] = None
|
|
63
|
+
self.required_packages = required_packages or []
|
|
64
|
+
|
|
65
|
+
if os.name == 'nt': # Windows
|
|
66
|
+
self.bin_dir = 'Scripts'
|
|
67
|
+
else: # Unix-like systems
|
|
68
|
+
self.bin_dir = 'bin'
|
|
69
|
+
|
|
70
|
+
async def _setup(self) -> None:
|
|
71
|
+
r"""Set up a virtual environment for execution
|
|
72
|
+
and install required packages.
|
|
73
|
+
"""
|
|
74
|
+
self.venv_path = tempfile.mkdtemp()
|
|
75
|
+
venv.create(self.venv_path, with_pip=True)
|
|
76
|
+
logger.info(f"Virtual environment created at {self.venv_path}")
|
|
77
|
+
|
|
78
|
+
venv_pip = os.path.join(self.venv_path, self.bin_dir, "pip")
|
|
79
|
+
|
|
80
|
+
if self.required_packages:
|
|
81
|
+
try:
|
|
82
|
+
subprocess.run(
|
|
83
|
+
[venv_pip, "install", *self.required_packages],
|
|
84
|
+
check=True,
|
|
85
|
+
capture_output=True,
|
|
86
|
+
)
|
|
87
|
+
logger.info(
|
|
88
|
+
"Installed required packages:"
|
|
89
|
+
f"{', '.join(self.required_packages)}"
|
|
90
|
+
)
|
|
91
|
+
except subprocess.CalledProcessError as e:
|
|
92
|
+
logger.error(
|
|
93
|
+
"Failed to install required packages: "
|
|
94
|
+
f"{e.stderr.decode().strip()}"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
async def _cleanup(self) -> None:
|
|
98
|
+
r"""Clean up the virtual environment."""
|
|
99
|
+
if self.venv_path:
|
|
100
|
+
shutil.rmtree(self.venv_path)
|
|
101
|
+
logger.info(f"Virtual environment at {self.venv_path} removed")
|
|
102
|
+
self.venv_path = None
|
|
103
|
+
|
|
104
|
+
async def _verify_implementation(
|
|
105
|
+
self, result: VerifierInput
|
|
106
|
+
) -> VerificationResult:
|
|
107
|
+
r"""Executes the LLM-generated response in a Python virtual
|
|
108
|
+
environment.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
result (VerifierInput): Contains the LLM-generated Python code to
|
|
112
|
+
execute and optional ground truth for comparison.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
VerificationResult: Contains verification status (SUCCESS/FAILURE/
|
|
116
|
+
ERROR), execution output, error messages if any, and execution
|
|
117
|
+
duration.
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
asyncio.TimeoutError: If execution exceeds the configured timeout.
|
|
121
|
+
Exception: Any unexpected errors during execution are caught and
|
|
122
|
+
converted to an ERROR verification result.
|
|
123
|
+
"""
|
|
124
|
+
if not self.venv_path:
|
|
125
|
+
return VerificationResult(
|
|
126
|
+
status=VerificationOutcome.ERROR,
|
|
127
|
+
result="",
|
|
128
|
+
error_message="Virtual environment is not set up.",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
script = result.llm_response.strip()
|
|
132
|
+
venv_python = os.path.join(self.venv_path, self.bin_dir, "python")
|
|
133
|
+
|
|
134
|
+
if not os.path.exists(venv_python):
|
|
135
|
+
return VerificationResult(
|
|
136
|
+
status=VerificationOutcome.ERROR,
|
|
137
|
+
result="",
|
|
138
|
+
error_message="Python binary not found in virtual environment",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
process = await asyncio.create_subprocess_exec(
|
|
143
|
+
venv_python,
|
|
144
|
+
"-c",
|
|
145
|
+
script,
|
|
146
|
+
stdout=asyncio.subprocess.PIPE,
|
|
147
|
+
stderr=asyncio.subprocess.PIPE,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
stdout, stderr = await asyncio.wait_for(
|
|
151
|
+
process.communicate(), timeout=self._timeout
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
output_result = stdout.decode().strip()
|
|
155
|
+
error_output = stderr.decode().strip()
|
|
156
|
+
|
|
157
|
+
if process.returncode == 0:
|
|
158
|
+
# If ground truth is provided, compare it with the result
|
|
159
|
+
if result.ground_truth is not None:
|
|
160
|
+
# Normalize both strings by removing extra whitespace
|
|
161
|
+
normalized_output = ' '.join(output_result.strip().split())
|
|
162
|
+
normalized_truth = ' '.join(
|
|
163
|
+
str(result.ground_truth).strip().split()
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if normalized_output == normalized_truth:
|
|
167
|
+
return VerificationResult(
|
|
168
|
+
status=VerificationOutcome.SUCCESS,
|
|
169
|
+
result=output_result,
|
|
170
|
+
)
|
|
171
|
+
else:
|
|
172
|
+
return VerificationResult(
|
|
173
|
+
status=VerificationOutcome.FAILURE,
|
|
174
|
+
error_message="Output doesn't match ground truth",
|
|
175
|
+
result=output_result,
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
return VerificationResult(
|
|
179
|
+
status=VerificationOutcome.SUCCESS,
|
|
180
|
+
result=output_result,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
else:
|
|
184
|
+
return VerificationResult(
|
|
185
|
+
status=VerificationOutcome.ERROR,
|
|
186
|
+
error_message=error_output,
|
|
187
|
+
result=output_result,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
except asyncio.TimeoutError:
|
|
191
|
+
return VerificationResult(
|
|
192
|
+
status=VerificationOutcome.TIMEOUT,
|
|
193
|
+
result="",
|
|
194
|
+
error_message="Execution timed out.",
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
except Exception as e:
|
|
198
|
+
return VerificationResult(
|
|
199
|
+
status=VerificationOutcome.ERROR,
|
|
200
|
+
result="",
|
|
201
|
+
error_message=f"Execution error: {e}",
|
|
202
|
+
)
|