camel-ai 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/__init__.py +2 -0
- camel/agents/repo_agent.py +579 -0
- camel/configs/aiml_config.py +20 -19
- camel/configs/anthropic_config.py +25 -27
- camel/configs/cohere_config.py +11 -10
- camel/configs/deepseek_config.py +16 -16
- camel/configs/gemini_config.py +8 -8
- camel/configs/groq_config.py +18 -19
- camel/configs/internlm_config.py +8 -8
- camel/configs/litellm_config.py +26 -24
- camel/configs/mistral_config.py +8 -8
- camel/configs/moonshot_config.py +11 -11
- camel/configs/nvidia_config.py +13 -13
- camel/configs/ollama_config.py +14 -15
- camel/configs/openai_config.py +3 -3
- camel/configs/openrouter_config.py +9 -9
- camel/configs/qwen_config.py +8 -8
- camel/configs/reka_config.py +12 -11
- camel/configs/samba_config.py +14 -14
- camel/configs/sglang_config.py +15 -16
- camel/configs/siliconflow_config.py +18 -17
- camel/configs/togetherai_config.py +18 -19
- camel/configs/vllm_config.py +18 -19
- camel/configs/yi_config.py +7 -8
- camel/configs/zhipuai_config.py +8 -9
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/datasets/static_dataset.py +25 -23
- camel/environments/models.py +10 -1
- camel/environments/single_step.py +296 -136
- camel/extractors/__init__.py +16 -1
- camel/interpreters/docker_interpreter.py +1 -1
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +4 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +5 -3
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +9 -3
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +9 -3
- camel/models/vllm_model.py +9 -3
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +9 -3
- camel/retrievers/auto_retriever.py +14 -0
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/toolkits/__init__.py +7 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/thinking_toolkit.py +230 -0
- camel/types/enums.py +4 -0
- camel/utils/chunker/code_chunker.py +9 -15
- camel/verifiers/base.py +28 -5
- camel/verifiers/python_verifier.py +321 -68
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/METADATA +103 -8
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/RECORD +84 -75
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,15 +12,17 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
|
+
import ast
|
|
15
16
|
import asyncio
|
|
16
17
|
import os
|
|
17
18
|
import shutil
|
|
18
19
|
import subprocess
|
|
20
|
+
import sys
|
|
19
21
|
import tempfile
|
|
20
22
|
import venv
|
|
21
|
-
from typing import List, Optional
|
|
23
|
+
from typing import List, Optional, Tuple
|
|
22
24
|
|
|
23
|
-
from camel.extractors import BaseExtractor
|
|
25
|
+
from camel.extractors.base import BaseExtractor
|
|
24
26
|
from camel.logger import get_logger
|
|
25
27
|
from camel.verifiers import BaseVerifier
|
|
26
28
|
|
|
@@ -46,13 +48,16 @@ class PythonVerifier(BaseVerifier):
|
|
|
46
48
|
|
|
47
49
|
def __init__(
|
|
48
50
|
self,
|
|
51
|
+
extractor: Optional[BaseExtractor] = None,
|
|
49
52
|
timeout: Optional[float] = 30.0,
|
|
50
53
|
required_packages: Optional[List[str]] = None,
|
|
51
|
-
|
|
54
|
+
**kwargs,
|
|
52
55
|
):
|
|
53
56
|
r"""Initializes the PythonVerifier.
|
|
54
57
|
|
|
55
58
|
Args:
|
|
59
|
+
extractor (Optional[BaseExtractor], optional): The extractor to use
|
|
60
|
+
for extracting code from the solution. (default: :obj:`None`)
|
|
56
61
|
timeout (Optional[float], optional): The execution timeout in
|
|
57
62
|
seconds. (default: :obj:`30.0`)
|
|
58
63
|
required_packages (Optional[List[str]], optional): A list of
|
|
@@ -60,7 +65,7 @@ class PythonVerifier(BaseVerifier):
|
|
|
60
65
|
(default: :obj:`None`)
|
|
61
66
|
"""
|
|
62
67
|
# TODO: Use CAMEL's Interpreter to execute the code
|
|
63
|
-
super().__init__(timeout=timeout)
|
|
68
|
+
super().__init__(extractor=extractor, timeout=timeout, **kwargs)
|
|
64
69
|
self.venv_path: Optional[str] = None
|
|
65
70
|
self.required_packages = required_packages or []
|
|
66
71
|
|
|
@@ -69,25 +74,39 @@ class PythonVerifier(BaseVerifier):
|
|
|
69
74
|
else: # Unix-like systems
|
|
70
75
|
self.bin_dir = 'bin'
|
|
71
76
|
|
|
72
|
-
async def _setup(self) -> None:
|
|
73
|
-
r"""Set up a virtual environment
|
|
74
|
-
|
|
75
|
-
|
|
77
|
+
async def _setup(self, **kwargs) -> None:
|
|
78
|
+
r"""Set up a virtual environment and install required packages."""
|
|
79
|
+
uv = kwargs.get('uv', True)
|
|
80
|
+
if uv and self._is_uv_environment():
|
|
81
|
+
logger.info("[UV] Detected uv environment. Using uv for setup.")
|
|
82
|
+
self._setup_with_uv()
|
|
83
|
+
return
|
|
84
|
+
|
|
76
85
|
self.venv_path = tempfile.mkdtemp()
|
|
77
|
-
|
|
78
|
-
|
|
86
|
+
try:
|
|
87
|
+
venv.create(self.venv_path, with_pip=True)
|
|
88
|
+
logger.info(f"Virtual environment created at {self.venv_path}")
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error(f"Failed to create virtual environment: {e}")
|
|
91
|
+
# Clean up resources before re-raising
|
|
92
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
93
|
+
shutil.rmtree(self.venv_path)
|
|
94
|
+
self.venv_path = None
|
|
95
|
+
raise
|
|
79
96
|
|
|
80
97
|
venv_pip = os.path.join(self.venv_path, self.bin_dir, "pip")
|
|
81
98
|
|
|
82
99
|
if self.required_packages:
|
|
83
100
|
try:
|
|
101
|
+
# Add timeout to subprocess call
|
|
84
102
|
subprocess.run(
|
|
85
103
|
[venv_pip, "install", *self.required_packages],
|
|
86
104
|
check=True,
|
|
87
105
|
capture_output=True,
|
|
106
|
+
timeout=self._timeout,
|
|
88
107
|
)
|
|
89
108
|
logger.info(
|
|
90
|
-
"Installed required packages:"
|
|
109
|
+
"Installed required packages: "
|
|
91
110
|
f"{', '.join(self.required_packages)}"
|
|
92
111
|
)
|
|
93
112
|
except subprocess.CalledProcessError as e:
|
|
@@ -95,6 +114,101 @@ class PythonVerifier(BaseVerifier):
|
|
|
95
114
|
"Failed to install required packages: "
|
|
96
115
|
f"{e.stderr.decode().strip()}"
|
|
97
116
|
)
|
|
117
|
+
# Clean up resources before re-raising
|
|
118
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
119
|
+
shutil.rmtree(self.venv_path)
|
|
120
|
+
self.venv_path = None
|
|
121
|
+
raise
|
|
122
|
+
except subprocess.TimeoutExpired:
|
|
123
|
+
logger.error(
|
|
124
|
+
f"Package installation timed out "
|
|
125
|
+
f"after {self._timeout} seconds"
|
|
126
|
+
)
|
|
127
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
128
|
+
shutil.rmtree(self.venv_path)
|
|
129
|
+
self.venv_path = None
|
|
130
|
+
raise
|
|
131
|
+
|
|
132
|
+
def _is_uv_environment(self) -> bool:
|
|
133
|
+
r"""Detect whether the current Python runtime is managed by uv."""
|
|
134
|
+
return "UV_CACHE_DIR" in os.environ or "uv" in sys.executable
|
|
135
|
+
|
|
136
|
+
def _setup_with_uv(self) -> None:
|
|
137
|
+
r"""Create virtual environment and install packages using uv."""
|
|
138
|
+
self.venv_path = tempfile.mkdtemp()
|
|
139
|
+
try:
|
|
140
|
+
subprocess.run(
|
|
141
|
+
["uv", "venv", self.venv_path],
|
|
142
|
+
check=True,
|
|
143
|
+
capture_output=True,
|
|
144
|
+
timeout=self._timeout,
|
|
145
|
+
)
|
|
146
|
+
logger.info(
|
|
147
|
+
f"[UV] Virtual environment created at {self.venv_path}"
|
|
148
|
+
)
|
|
149
|
+
except subprocess.CalledProcessError as e:
|
|
150
|
+
logger.error(
|
|
151
|
+
"[UV] Failed to create virtual environment:\n"
|
|
152
|
+
f"{e.stderr.decode().strip()}"
|
|
153
|
+
)
|
|
154
|
+
# Clean up resources before re-raising
|
|
155
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
156
|
+
shutil.rmtree(self.venv_path)
|
|
157
|
+
self.venv_path = None
|
|
158
|
+
raise
|
|
159
|
+
except subprocess.TimeoutExpired:
|
|
160
|
+
logger.error(
|
|
161
|
+
f"[UV] Virtual environment creation timed "
|
|
162
|
+
f"out after {self._timeout} seconds"
|
|
163
|
+
)
|
|
164
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
165
|
+
shutil.rmtree(self.venv_path)
|
|
166
|
+
self.venv_path = None
|
|
167
|
+
raise
|
|
168
|
+
|
|
169
|
+
if self.required_packages:
|
|
170
|
+
venv_python = os.path.join(
|
|
171
|
+
self.venv_path,
|
|
172
|
+
self.bin_dir,
|
|
173
|
+
"python.exe" if os.name == 'nt' else "python",
|
|
174
|
+
)
|
|
175
|
+
try:
|
|
176
|
+
subprocess.run(
|
|
177
|
+
[
|
|
178
|
+
"uv",
|
|
179
|
+
"pip",
|
|
180
|
+
"install",
|
|
181
|
+
"--python",
|
|
182
|
+
venv_python,
|
|
183
|
+
*self.required_packages,
|
|
184
|
+
],
|
|
185
|
+
check=True,
|
|
186
|
+
capture_output=True,
|
|
187
|
+
timeout=self._timeout,
|
|
188
|
+
)
|
|
189
|
+
logger.info(
|
|
190
|
+
"[UV] Installed required packages via uv: "
|
|
191
|
+
f"{', '.join(self.required_packages)}"
|
|
192
|
+
)
|
|
193
|
+
except subprocess.CalledProcessError as e:
|
|
194
|
+
logger.error(
|
|
195
|
+
"[UV] Failed to install required packages via uv:\n"
|
|
196
|
+
f"{e.stderr.decode().strip()}"
|
|
197
|
+
)
|
|
198
|
+
# Clean up resources before re-raising
|
|
199
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
200
|
+
shutil.rmtree(self.venv_path)
|
|
201
|
+
self.venv_path = None
|
|
202
|
+
raise
|
|
203
|
+
except subprocess.TimeoutExpired:
|
|
204
|
+
logger.error(
|
|
205
|
+
f"[UV] Package installation timed "
|
|
206
|
+
f"out after {self._timeout} seconds"
|
|
207
|
+
)
|
|
208
|
+
if self.venv_path and os.path.exists(self.venv_path):
|
|
209
|
+
shutil.rmtree(self.venv_path)
|
|
210
|
+
self.venv_path = None
|
|
211
|
+
raise
|
|
98
212
|
|
|
99
213
|
async def _cleanup(self) -> None:
|
|
100
214
|
r"""Clean up the virtual environment."""
|
|
@@ -106,32 +220,27 @@ class PythonVerifier(BaseVerifier):
|
|
|
106
220
|
async def _verify_implementation(
|
|
107
221
|
self, solution: str, ground_truth: Optional[str]
|
|
108
222
|
) -> VerificationResult:
|
|
109
|
-
r"""Executes
|
|
110
|
-
|
|
223
|
+
r"""Executes the provided Python solution in an isolated environment
|
|
224
|
+
and verifies its output against an expected ground truth expression.
|
|
225
|
+
|
|
226
|
+
This method runs the solution in a subprocess inside a virtual
|
|
227
|
+
environment. The ground truth is assumed to be a pure Python
|
|
228
|
+
expression and is evaluated directly in the verifier process.
|
|
111
229
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
230
|
+
If both executions are successful, the actual output is compared
|
|
231
|
+
against the evaluated ground truth using semantic equality. If
|
|
232
|
+
evaluation fails, string comparison is used as a fallback.
|
|
115
233
|
|
|
116
234
|
Args:
|
|
117
|
-
solution (str): The Python code to execute and
|
|
118
|
-
|
|
119
|
-
|
|
235
|
+
solution (str): The Python code or expression to execute and
|
|
236
|
+
verify.
|
|
237
|
+
ground_truth (Optional[str]): The expected value as a Python
|
|
238
|
+
expression. If None, only execution success is verified.
|
|
120
239
|
|
|
121
240
|
Returns:
|
|
122
|
-
VerificationResult:
|
|
123
|
-
- status (VerificationOutcome): SUCCESS, FAILURE, ERROR,
|
|
124
|
-
or TIMEOUT.
|
|
125
|
-
- result (str): The execution output of the solution.
|
|
126
|
-
- error_message (Optional[str]): Captured error message,
|
|
127
|
-
if any.
|
|
128
|
-
- duration (float, optional): Execution time (set externally).
|
|
129
|
-
|
|
130
|
-
Raises:
|
|
131
|
-
asyncio.TimeoutError: If execution exceeds the configured timeout.
|
|
132
|
-
Exception: Any unexpected errors are caught and converted to an
|
|
133
|
-
ERROR verification result.
|
|
241
|
+
VerificationResult: Result of the verification process.
|
|
134
242
|
"""
|
|
243
|
+
# Check for virtual environment setup
|
|
135
244
|
if not self.venv_path:
|
|
136
245
|
return VerificationResult(
|
|
137
246
|
status=VerificationOutcome.ERROR,
|
|
@@ -139,9 +248,51 @@ class PythonVerifier(BaseVerifier):
|
|
|
139
248
|
error_message="Virtual environment is not set up.",
|
|
140
249
|
)
|
|
141
250
|
|
|
142
|
-
|
|
143
|
-
|
|
251
|
+
# If the solution is an expression, evaluate it directly
|
|
252
|
+
if self._is_expression(solution):
|
|
253
|
+
try:
|
|
254
|
+
sol_val = ast.literal_eval(solution)
|
|
255
|
+
except Exception as e:
|
|
256
|
+
return VerificationResult(
|
|
257
|
+
status=VerificationOutcome.ERROR,
|
|
258
|
+
result="",
|
|
259
|
+
error_message=f"Expression evaluation error: {e}",
|
|
260
|
+
)
|
|
144
261
|
|
|
262
|
+
if ground_truth is not None:
|
|
263
|
+
try:
|
|
264
|
+
gt_val = ast.literal_eval(ground_truth)
|
|
265
|
+
except Exception as e:
|
|
266
|
+
return VerificationResult(
|
|
267
|
+
status=VerificationOutcome.ERROR,
|
|
268
|
+
result="",
|
|
269
|
+
error_message=f"Ground truth evaluation error: {e}",
|
|
270
|
+
)
|
|
271
|
+
if sol_val == gt_val:
|
|
272
|
+
return VerificationResult(
|
|
273
|
+
status=VerificationOutcome.SUCCESS,
|
|
274
|
+
result=str(sol_val),
|
|
275
|
+
)
|
|
276
|
+
else:
|
|
277
|
+
return VerificationResult(
|
|
278
|
+
status=VerificationOutcome.FAILURE,
|
|
279
|
+
result=str(sol_val),
|
|
280
|
+
error_message="Output mismatch: "
|
|
281
|
+
f"{sol_val} != {gt_val}",
|
|
282
|
+
)
|
|
283
|
+
else:
|
|
284
|
+
return VerificationResult(
|
|
285
|
+
status=VerificationOutcome.SUCCESS,
|
|
286
|
+
result=str(sol_val),
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Otherwise, run the code block,
|
|
290
|
+
# which should already include a print(...) in the end
|
|
291
|
+
venv_python = os.path.join(
|
|
292
|
+
self.venv_path,
|
|
293
|
+
self.bin_dir,
|
|
294
|
+
"python.exe" if os.name == 'nt' else "python",
|
|
295
|
+
)
|
|
145
296
|
if not os.path.exists(venv_python):
|
|
146
297
|
return VerificationResult(
|
|
147
298
|
status=VerificationOutcome.ERROR,
|
|
@@ -150,64 +301,166 @@ class PythonVerifier(BaseVerifier):
|
|
|
150
301
|
)
|
|
151
302
|
|
|
152
303
|
try:
|
|
153
|
-
|
|
154
|
-
venv_python
|
|
155
|
-
"-c",
|
|
156
|
-
script,
|
|
157
|
-
stdout=asyncio.subprocess.PIPE,
|
|
158
|
-
stderr=asyncio.subprocess.PIPE,
|
|
159
|
-
)
|
|
160
|
-
|
|
161
|
-
stdout, stderr = await asyncio.wait_for(
|
|
162
|
-
process.communicate(), timeout=self._timeout
|
|
304
|
+
sol_out, sol_err, sol_code = await self._run_code_block(
|
|
305
|
+
solution, venv_python
|
|
163
306
|
)
|
|
307
|
+
if sol_code != 0:
|
|
308
|
+
return VerificationResult(
|
|
309
|
+
status=VerificationOutcome.ERROR,
|
|
310
|
+
result=sol_out,
|
|
311
|
+
error_message=f"Solution code error:\n{sol_err}",
|
|
312
|
+
)
|
|
164
313
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
normalized_output = ' '.join(output_result.strip().split())
|
|
173
|
-
normalized_truth = ' '.join(
|
|
174
|
-
str(ground_truth).strip().split()
|
|
314
|
+
if ground_truth is not None:
|
|
315
|
+
try:
|
|
316
|
+
# First, try to evaluate the output as-is.
|
|
317
|
+
sol_val = ast.literal_eval(sol_out)
|
|
318
|
+
except Exception as e:
|
|
319
|
+
logger.warning(
|
|
320
|
+
f"Direct eval failed: {e}. Trying repr on output."
|
|
175
321
|
)
|
|
322
|
+
try:
|
|
323
|
+
# Try to convert sol_out to a literal
|
|
324
|
+
# by wrapping it with repr.
|
|
325
|
+
# FIXME: may be unnecessary
|
|
326
|
+
sol_val = ast.literal_eval(repr(sol_out))
|
|
327
|
+
except Exception as e2:
|
|
328
|
+
logger.warning(
|
|
329
|
+
f"repr eval also failed: {e2}."
|
|
330
|
+
"Falling back to string comparison."
|
|
331
|
+
)
|
|
332
|
+
sol_val = None
|
|
176
333
|
|
|
177
|
-
|
|
334
|
+
if sol_val is not None:
|
|
335
|
+
try:
|
|
336
|
+
gt_val = ast.literal_eval(ground_truth)
|
|
337
|
+
except Exception as e:
|
|
338
|
+
return VerificationResult(
|
|
339
|
+
status=VerificationOutcome.ERROR,
|
|
340
|
+
result="",
|
|
341
|
+
error_message="Ground truth evaluation error:"
|
|
342
|
+
f"{e}",
|
|
343
|
+
)
|
|
344
|
+
if sol_val == gt_val:
|
|
178
345
|
return VerificationResult(
|
|
179
346
|
status=VerificationOutcome.SUCCESS,
|
|
180
|
-
result=
|
|
347
|
+
result=sol_out,
|
|
181
348
|
)
|
|
182
349
|
else:
|
|
183
350
|
return VerificationResult(
|
|
184
351
|
status=VerificationOutcome.FAILURE,
|
|
185
|
-
|
|
186
|
-
|
|
352
|
+
result=sol_out,
|
|
353
|
+
error_message="Output mismatch: "
|
|
354
|
+
f"{sol_val} != {gt_val}",
|
|
187
355
|
)
|
|
188
356
|
else:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
357
|
+
# Fallback: string comparison
|
|
358
|
+
if sol_out.strip() == ground_truth.strip():
|
|
359
|
+
return VerificationResult(
|
|
360
|
+
status=VerificationOutcome.SUCCESS,
|
|
361
|
+
result=sol_out,
|
|
362
|
+
)
|
|
363
|
+
else:
|
|
364
|
+
return VerificationResult(
|
|
365
|
+
status=VerificationOutcome.FAILURE,
|
|
366
|
+
result=sol_out,
|
|
367
|
+
error_message="Fallback string mismatch: "
|
|
368
|
+
f"'{sol_out}' != '{ground_truth}'",
|
|
369
|
+
)
|
|
194
370
|
else:
|
|
195
371
|
return VerificationResult(
|
|
196
|
-
status=VerificationOutcome.
|
|
197
|
-
|
|
198
|
-
result=output_result,
|
|
372
|
+
status=VerificationOutcome.SUCCESS,
|
|
373
|
+
result=sol_out,
|
|
199
374
|
)
|
|
200
|
-
|
|
201
375
|
except asyncio.TimeoutError:
|
|
202
376
|
return VerificationResult(
|
|
203
377
|
status=VerificationOutcome.TIMEOUT,
|
|
204
378
|
result="",
|
|
205
379
|
error_message="Execution timed out.",
|
|
206
380
|
)
|
|
207
|
-
|
|
208
381
|
except Exception as e:
|
|
209
382
|
return VerificationResult(
|
|
210
383
|
status=VerificationOutcome.ERROR,
|
|
211
384
|
result="",
|
|
212
|
-
error_message=f"
|
|
385
|
+
error_message=f"Unexpected error: {e}",
|
|
213
386
|
)
|
|
387
|
+
|
|
388
|
+
async def _run_code_block(
|
|
389
|
+
self, code: str, venv_path: str
|
|
390
|
+
) -> Tuple[str, str, int]:
|
|
391
|
+
r"""Executes a block of Python code in the virtual environment.
|
|
392
|
+
|
|
393
|
+
The code is written to a temporary file, executed using the Python
|
|
394
|
+
interpreter from the specified virtual environment, and
|
|
395
|
+
its output and error streams are captured.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
code (str): The Python code to execute.
|
|
399
|
+
venv_path (str): The path to the virtual environment's Python
|
|
400
|
+
binary.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
Tuple[str, str, int]: A tuple containing the stdout output,
|
|
404
|
+
stderr output, and return code from the executed script.
|
|
405
|
+
"""
|
|
406
|
+
# No longer checking for expressions since they're handled separately
|
|
407
|
+
with tempfile.NamedTemporaryFile(
|
|
408
|
+
"w+", suffix=".py", delete=False
|
|
409
|
+
) as tmp:
|
|
410
|
+
tmp.write(code)
|
|
411
|
+
tmp_path = tmp.name
|
|
412
|
+
|
|
413
|
+
proc = await asyncio.create_subprocess_exec(
|
|
414
|
+
venv_path,
|
|
415
|
+
tmp_path,
|
|
416
|
+
stdout=asyncio.subprocess.PIPE,
|
|
417
|
+
stderr=asyncio.subprocess.PIPE,
|
|
418
|
+
)
|
|
419
|
+
stdout, stderr = await asyncio.wait_for(
|
|
420
|
+
proc.communicate(), timeout=self._timeout
|
|
421
|
+
)
|
|
422
|
+
os.remove(tmp_path)
|
|
423
|
+
return (
|
|
424
|
+
stdout.decode().strip(),
|
|
425
|
+
stderr.decode().strip(),
|
|
426
|
+
proc.returncode if proc.returncode is not None else -1,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
def _is_expression(self, code: str) -> bool:
|
|
430
|
+
r"""Determines whether a given string of code is a single expression.
|
|
431
|
+
|
|
432
|
+
This utility uses Python's AST module to parse the code and checks if
|
|
433
|
+
it consists of a single expression node.
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
code (str): The Python code to analyze.
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
bool: True if the code is a single expression, False otherwise.
|
|
440
|
+
"""
|
|
441
|
+
# Skip empty or whitespace-only strings
|
|
442
|
+
if not code or code.isspace():
|
|
443
|
+
return False
|
|
444
|
+
|
|
445
|
+
try:
|
|
446
|
+
# First try parsing as an expression - this is more reliable than
|
|
447
|
+
# starting with literal_eval
|
|
448
|
+
tree = ast.parse(code.strip(), mode='eval')
|
|
449
|
+
# Check if it's a function call (like print()) - these should not
|
|
450
|
+
# be treated as expressions
|
|
451
|
+
if isinstance(tree.body, ast.Call):
|
|
452
|
+
return False
|
|
453
|
+
# If parsing succeeds in 'eval' mode and it's not a function call,
|
|
454
|
+
# it's a valid expression
|
|
455
|
+
return True
|
|
456
|
+
except SyntaxError:
|
|
457
|
+
# If parsing as expression fails, it's not a valid expression
|
|
458
|
+
return False
|
|
459
|
+
except Exception:
|
|
460
|
+
# For any other parsing errors, try literal_eval as fallback for
|
|
461
|
+
# simple literals
|
|
462
|
+
try:
|
|
463
|
+
ast.literal_eval(code)
|
|
464
|
+
return True
|
|
465
|
+
except Exception:
|
|
466
|
+
return False
|