code-analyser 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_analyser-0.1.0.dist-info/METADATA +283 -0
- code_analyser-0.1.0.dist-info/RECORD +34 -0
- code_analyser-0.1.0.dist-info/WHEEL +4 -0
- code_analyser-0.1.0.dist-info/licenses/LICENSE +21 -0
- codelens/__init__.py +7 -0
- codelens/__main__.py +19 -0
- codelens/analyzers/__init__.py +30 -0
- codelens/analyzers/base.py +139 -0
- codelens/analyzers/manager.py +207 -0
- codelens/analyzers/python_analyzer.py +344 -0
- codelens/analyzers/similarity_analyzer.py +512 -0
- codelens/api/__init__.py +1 -0
- codelens/api/routes/__init__.py +1 -0
- codelens/api/routes/analysis.py +441 -0
- codelens/api/routes/reports.py +438 -0
- codelens/api/routes/rubrics.py +349 -0
- codelens/api/schemas.py +305 -0
- codelens/cli.py +297 -0
- codelens/core/__init__.py +1 -0
- codelens/core/config.py +91 -0
- codelens/db/__init__.py +1 -0
- codelens/db/database.py +57 -0
- codelens/main.py +111 -0
- codelens/models/__init__.py +14 -0
- codelens/models/assignments.py +105 -0
- codelens/models/reports.py +172 -0
- codelens/models/rubrics.py +76 -0
- codelens/services/__init__.py +37 -0
- codelens/services/batch_processor.py +508 -0
- codelens/services/code_executor.py +310 -0
- codelens/services/sandbox.py +375 -0
- codelens/services/similarity_service.py +449 -0
- codelens/utils/__init__.py +29 -0
- codelens/utils/helpers.py +217 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Service for executing student code safely with validation and testing
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from codelens.core.config import settings
|
|
11
|
+
|
|
12
|
+
from .sandbox import ExecutionResult, TestResult, sandbox
|
|
13
|
+
|
|
14
|
+
logger = structlog.get_logger()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class CodeExecutionRequest:
|
|
19
|
+
"""Request for code execution"""
|
|
20
|
+
code: str
|
|
21
|
+
language: str = "python"
|
|
22
|
+
input_data: str | None = None
|
|
23
|
+
timeout: int | None = None
|
|
24
|
+
memory_limit: str | None = None
|
|
25
|
+
|
|
26
|
+
# Test execution options
|
|
27
|
+
run_tests: bool = False
|
|
28
|
+
test_code: str | None = None
|
|
29
|
+
test_cases: list[dict[str, Any]] | None = None
|
|
30
|
+
test_framework: str = "pytest"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ValidationResult:
|
|
35
|
+
"""Result of code validation"""
|
|
36
|
+
is_valid: bool = True
|
|
37
|
+
issues: list[str] | None = None
|
|
38
|
+
security_risks: list[str] | None = None
|
|
39
|
+
|
|
40
|
+
def __post_init__(self) -> None:
|
|
41
|
+
if self.issues is None:
|
|
42
|
+
self.issues = []
|
|
43
|
+
if self.security_risks is None:
|
|
44
|
+
self.security_risks = []
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class CodeExecutionResponse:
|
|
49
|
+
"""Response from code execution"""
|
|
50
|
+
success: bool
|
|
51
|
+
execution_result: ExecutionResult | None = None
|
|
52
|
+
test_result: TestResult | None = None
|
|
53
|
+
validation_result: ValidationResult | None = None
|
|
54
|
+
error_message: str | None = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class CodeExecutorService:
|
|
58
|
+
"""High-level service for executing student code safely"""
|
|
59
|
+
|
|
60
|
+
def __init__(self) -> None:
|
|
61
|
+
self.sandbox = sandbox
|
|
62
|
+
|
|
63
|
+
async def execute_code(self, request: CodeExecutionRequest) -> CodeExecutionResponse:
|
|
64
|
+
"""
|
|
65
|
+
Execute code with optional testing
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
request: CodeExecutionRequest with code and execution parameters
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
CodeExecutionResponse with execution results
|
|
72
|
+
"""
|
|
73
|
+
if not self.sandbox or not self.sandbox.is_available():
|
|
74
|
+
return CodeExecutionResponse(
|
|
75
|
+
success=False,
|
|
76
|
+
error_message="Code execution sandbox not available"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Validate code first
|
|
80
|
+
validation_result = await self._validate_code(request.code, request.language)
|
|
81
|
+
if not validation_result.is_valid:
|
|
82
|
+
return CodeExecutionResponse(
|
|
83
|
+
success=False,
|
|
84
|
+
validation_result=validation_result,
|
|
85
|
+
error_message="Code validation failed"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
execution_result = None
|
|
90
|
+
test_result = None
|
|
91
|
+
|
|
92
|
+
if request.language.lower() == "python":
|
|
93
|
+
# Execute Python code
|
|
94
|
+
if request.test_code or request.test_cases:
|
|
95
|
+
# Run with tests
|
|
96
|
+
test_result = await self._run_python_tests(request)
|
|
97
|
+
else:
|
|
98
|
+
# Simple execution
|
|
99
|
+
execution_result = await self.sandbox.execute_python_code(
|
|
100
|
+
code=request.code,
|
|
101
|
+
input_data=request.input_data
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
else:
|
|
105
|
+
return CodeExecutionResponse(
|
|
106
|
+
success=False,
|
|
107
|
+
error_message=f"Language '{request.language}' not supported for execution"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
return CodeExecutionResponse(
|
|
111
|
+
success=True,
|
|
112
|
+
execution_result=execution_result,
|
|
113
|
+
test_result=test_result,
|
|
114
|
+
validation_result=validation_result
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error("Code execution failed", error=str(e))
|
|
119
|
+
return CodeExecutionResponse(
|
|
120
|
+
success=False,
|
|
121
|
+
error_message=f"Execution failed: {str(e)}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
async def _validate_code(self, code: str, language: str) -> ValidationResult:
|
|
125
|
+
"""
|
|
126
|
+
Validate code for security issues and basic correctness
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
code: Source code to validate
|
|
130
|
+
language: Programming language
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
ValidationResult with validation details
|
|
134
|
+
"""
|
|
135
|
+
issues = []
|
|
136
|
+
security_risks = []
|
|
137
|
+
|
|
138
|
+
if language.lower() == "python":
|
|
139
|
+
# Check for dangerous imports and operations
|
|
140
|
+
dangerous_imports = [
|
|
141
|
+
"os", "sys", "subprocess", "socket", "urllib", "requests",
|
|
142
|
+
"http", "ftplib", "smtplib", "telnetlib", "imaplib", "nntplib",
|
|
143
|
+
"email", "json", "pickle", "marshal", "shelve", "dbm",
|
|
144
|
+
"sqlite3", "threading", "multiprocessing", "ctypes", "gc",
|
|
145
|
+
"__import__", "eval", "exec", "compile", "globals", "locals"
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
# Check for file operations
|
|
149
|
+
file_operations = ["open(", "file(", "with open"]
|
|
150
|
+
|
|
151
|
+
# Check for network operations
|
|
152
|
+
network_operations = ["socket.", "urllib.", "requests.", "http."]
|
|
153
|
+
|
|
154
|
+
# Check for system operations
|
|
155
|
+
system_operations = ["os.", "sys.", "subprocess.", "system("]
|
|
156
|
+
|
|
157
|
+
code_lower = code.lower()
|
|
158
|
+
|
|
159
|
+
# Check dangerous imports
|
|
160
|
+
for imp in dangerous_imports:
|
|
161
|
+
if f"import {imp}" in code or f"from {imp}" in code:
|
|
162
|
+
security_risks.append(f"Potentially dangerous import: {imp}")
|
|
163
|
+
|
|
164
|
+
# Check file operations
|
|
165
|
+
for op in file_operations:
|
|
166
|
+
if op in code_lower:
|
|
167
|
+
security_risks.append(f"File operation detected: {op}")
|
|
168
|
+
|
|
169
|
+
# Check network operations
|
|
170
|
+
for op in network_operations:
|
|
171
|
+
if op in code_lower:
|
|
172
|
+
security_risks.append(f"Network operation detected: {op}")
|
|
173
|
+
|
|
174
|
+
# Check system operations
|
|
175
|
+
for op in system_operations:
|
|
176
|
+
if op in code_lower:
|
|
177
|
+
security_risks.append(f"System operation detected: {op}")
|
|
178
|
+
|
|
179
|
+
# Check for eval/exec
|
|
180
|
+
if "eval(" in code or "exec(" in code:
|
|
181
|
+
security_risks.append("Dynamic code execution detected (eval/exec)")
|
|
182
|
+
|
|
183
|
+
# Check code length
|
|
184
|
+
if len(code) > settings.max_file_size:
|
|
185
|
+
issues.append(f"Code too large: {len(code)} bytes (max: {settings.max_file_size})")
|
|
186
|
+
|
|
187
|
+
# Basic syntax check
|
|
188
|
+
try:
|
|
189
|
+
compile(code, "<string>", "exec")
|
|
190
|
+
except SyntaxError as e:
|
|
191
|
+
issues.append(f"Syntax error: {e}")
|
|
192
|
+
|
|
193
|
+
else:
|
|
194
|
+
issues.append(f"Validation not implemented for language: {language}")
|
|
195
|
+
|
|
196
|
+
# Determine if validation passed
|
|
197
|
+
is_valid = len(issues) == 0 and len(security_risks) == 0
|
|
198
|
+
|
|
199
|
+
# For educational use, we might want to allow some "security risks" with warnings
|
|
200
|
+
if security_risks and not issues:
|
|
201
|
+
logger.warning("Code contains potential security risks", risks=security_risks)
|
|
202
|
+
# You could choose to allow execution with warnings
|
|
203
|
+
# is_valid = True # Uncomment to allow risky code
|
|
204
|
+
|
|
205
|
+
return ValidationResult(
|
|
206
|
+
is_valid=is_valid,
|
|
207
|
+
issues=issues,
|
|
208
|
+
security_risks=security_risks
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
async def _run_python_tests(self, request: CodeExecutionRequest) -> TestResult:
|
|
212
|
+
"""Run Python tests against student code"""
|
|
213
|
+
if not self.sandbox:
|
|
214
|
+
return TestResult(test_output="Sandbox not available")
|
|
215
|
+
|
|
216
|
+
if request.test_code:
|
|
217
|
+
# Use provided test code
|
|
218
|
+
return await self.sandbox.run_python_tests(
|
|
219
|
+
code=request.code,
|
|
220
|
+
test_code=request.test_code,
|
|
221
|
+
test_framework=request.test_framework
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
elif request.test_cases:
|
|
225
|
+
# Generate test code from test cases
|
|
226
|
+
test_code = self._generate_test_code_from_cases(request.test_cases, request.test_framework)
|
|
227
|
+
return await self.sandbox.run_python_tests(
|
|
228
|
+
code=request.code,
|
|
229
|
+
test_code=test_code,
|
|
230
|
+
test_framework=request.test_framework
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
else:
|
|
234
|
+
# No tests to run
|
|
235
|
+
return TestResult(test_output="No tests provided")
|
|
236
|
+
|
|
237
|
+
def _generate_test_code_from_cases(
|
|
238
|
+
self,
|
|
239
|
+
test_cases: list[dict[str, Any]],
|
|
240
|
+
framework: str = "pytest"
|
|
241
|
+
) -> str:
|
|
242
|
+
"""Generate test code from test case specifications"""
|
|
243
|
+
if framework == "pytest":
|
|
244
|
+
return self._generate_pytest_code(test_cases)
|
|
245
|
+
else:
|
|
246
|
+
return self._generate_unittest_code(test_cases)
|
|
247
|
+
|
|
248
|
+
def _generate_pytest_code(self, test_cases: list[dict[str, Any]]) -> str:
|
|
249
|
+
"""Generate pytest test code from test cases"""
|
|
250
|
+
test_code = "import pytest\nfrom code import *\n\n"
|
|
251
|
+
|
|
252
|
+
for i, test_case in enumerate(test_cases):
|
|
253
|
+
function_name = test_case.get("function", "main")
|
|
254
|
+
inputs = test_case.get("inputs", [])
|
|
255
|
+
expected = test_case.get("expected", None)
|
|
256
|
+
description = test_case.get("description", f"Test case {i+1}")
|
|
257
|
+
|
|
258
|
+
test_code += f"def test_case_{i+1}():\n"
|
|
259
|
+
test_code += f" \"\"\"{description}\"\"\"\n"
|
|
260
|
+
|
|
261
|
+
if inputs:
|
|
262
|
+
input_str = ", ".join([repr(inp) for inp in inputs])
|
|
263
|
+
test_code += f" result = {function_name}({input_str})\n"
|
|
264
|
+
else:
|
|
265
|
+
test_code += f" result = {function_name}()\n"
|
|
266
|
+
|
|
267
|
+
if expected is not None:
|
|
268
|
+
test_code += f" assert result == {repr(expected)}\n"
|
|
269
|
+
|
|
270
|
+
test_code += "\n"
|
|
271
|
+
|
|
272
|
+
return test_code
|
|
273
|
+
|
|
274
|
+
def _generate_unittest_code(self, test_cases: list[dict[str, Any]]) -> str:
|
|
275
|
+
"""Generate unittest test code from test cases"""
|
|
276
|
+
test_code = "import unittest\nfrom code import *\n\n"
|
|
277
|
+
test_code += "class TestCode(unittest.TestCase):\n"
|
|
278
|
+
|
|
279
|
+
for i, test_case in enumerate(test_cases):
|
|
280
|
+
function_name = test_case.get("function", "main")
|
|
281
|
+
inputs = test_case.get("inputs", [])
|
|
282
|
+
expected = test_case.get("expected", None)
|
|
283
|
+
description = test_case.get("description", f"Test case {i+1}")
|
|
284
|
+
|
|
285
|
+
test_code += f" def test_case_{i+1}(self):\n"
|
|
286
|
+
test_code += f" \"\"\"{description}\"\"\"\n"
|
|
287
|
+
|
|
288
|
+
if inputs:
|
|
289
|
+
input_str = ", ".join([repr(inp) for inp in inputs])
|
|
290
|
+
test_code += f" result = {function_name}({input_str})\n"
|
|
291
|
+
else:
|
|
292
|
+
test_code += f" result = {function_name}()\n"
|
|
293
|
+
|
|
294
|
+
if expected is not None:
|
|
295
|
+
test_code += f" self.assertEqual(result, {repr(expected)})\n"
|
|
296
|
+
|
|
297
|
+
test_code += "\n"
|
|
298
|
+
|
|
299
|
+
test_code += "\nif __name__ == '__main__':\n"
|
|
300
|
+
test_code += " unittest.main()\n"
|
|
301
|
+
|
|
302
|
+
return test_code
|
|
303
|
+
|
|
304
|
+
def is_available(self) -> bool:
|
|
305
|
+
"""Check if code execution is available"""
|
|
306
|
+
return bool(self.sandbox and self.sandbox.is_available())
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# Global code executor instance
|
|
310
|
+
code_executor = CodeExecutorService()
|
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Secure code execution sandbox using Docker containers
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import tempfile
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import docker # type: ignore[import-untyped]
|
|
14
|
+
import structlog
|
|
15
|
+
from docker.errors import ContainerError, DockerException, ImageNotFound # type: ignore[import-untyped]
|
|
16
|
+
|
|
17
|
+
from codelens.core.config import settings
|
|
18
|
+
|
|
19
|
+
logger = structlog.get_logger()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ExecutionResult:
|
|
24
|
+
"""Result of code execution in sandbox"""
|
|
25
|
+
success: bool
|
|
26
|
+
stdout: str = ""
|
|
27
|
+
stderr: str = ""
|
|
28
|
+
exit_code: int = 0
|
|
29
|
+
execution_time: float = 0.0
|
|
30
|
+
memory_used: str | None = None
|
|
31
|
+
timed_out: bool = False
|
|
32
|
+
error_message: str | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class TestResult:
|
|
37
|
+
"""Result of running tests against code"""
|
|
38
|
+
total_tests: int = 0
|
|
39
|
+
passed_tests: int = 0
|
|
40
|
+
failed_tests: list[dict[str, Any]] | None = None
|
|
41
|
+
test_output: str = ""
|
|
42
|
+
execution_result: ExecutionResult | None = None
|
|
43
|
+
|
|
44
|
+
def __post_init__(self) -> None:
|
|
45
|
+
if self.failed_tests is None:
|
|
46
|
+
self.failed_tests = []
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DockerSandbox:
|
|
50
|
+
"""Docker-based sandbox for secure code execution"""
|
|
51
|
+
|
|
52
|
+
def __init__(self) -> None:
|
|
53
|
+
self.client: docker.DockerClient | None = None
|
|
54
|
+
self.image_name = settings.docker_image
|
|
55
|
+
self.timeout = settings.analyzer.execution_timeout
|
|
56
|
+
self.memory_limit = settings.analyzer.memory_limit
|
|
57
|
+
self.cpu_limit = settings.analyzer.cpu_limit
|
|
58
|
+
|
|
59
|
+
# Initialize Docker client
|
|
60
|
+
try:
|
|
61
|
+
self.client = docker.from_env()
|
|
62
|
+
self._ensure_image_available()
|
|
63
|
+
except DockerException as e:
|
|
64
|
+
logger.error("Failed to initialize Docker client", error=str(e))
|
|
65
|
+
|
|
66
|
+
def _ensure_image_available(self) -> None:
|
|
67
|
+
"""Ensure the required Docker image is available"""
|
|
68
|
+
if not self.client:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
self.client.images.get(self.image_name)
|
|
73
|
+
logger.info("Docker image available", image=self.image_name)
|
|
74
|
+
except ImageNotFound:
|
|
75
|
+
logger.info("Pulling Docker image", image=self.image_name)
|
|
76
|
+
try:
|
|
77
|
+
self.client.images.pull(self.image_name)
|
|
78
|
+
logger.info("Docker image pulled successfully", image=self.image_name)
|
|
79
|
+
except DockerException as e:
|
|
80
|
+
logger.error("Failed to pull Docker image", image=self.image_name, error=str(e))
|
|
81
|
+
|
|
82
|
+
async def execute_python_code(
|
|
83
|
+
self,
|
|
84
|
+
code: str,
|
|
85
|
+
input_data: str | None = None,
|
|
86
|
+
working_dir: str | None = None
|
|
87
|
+
) -> ExecutionResult:
|
|
88
|
+
"""
|
|
89
|
+
Execute Python code in a secure sandbox
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
code: Python code to execute
|
|
93
|
+
input_data: Optional stdin input for the code
|
|
94
|
+
working_dir: Optional working directory for execution
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
ExecutionResult with execution details
|
|
98
|
+
"""
|
|
99
|
+
if not self.client:
|
|
100
|
+
return ExecutionResult(
|
|
101
|
+
success=False,
|
|
102
|
+
error_message="Docker client not available"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
start_time = time.time()
|
|
106
|
+
|
|
107
|
+
# Create temporary directory for code files
|
|
108
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
109
|
+
code_file = Path(temp_dir) / "code.py"
|
|
110
|
+
code_file.write_text(code)
|
|
111
|
+
|
|
112
|
+
# Create input file if provided
|
|
113
|
+
input_file = None
|
|
114
|
+
if input_data:
|
|
115
|
+
input_file = Path(temp_dir) / "input.txt"
|
|
116
|
+
input_file.write_text(input_data)
|
|
117
|
+
|
|
118
|
+
# Run code in Docker container
|
|
119
|
+
return await self._run_container(
|
|
120
|
+
command=["python", "/workspace/code.py"],
|
|
121
|
+
volumes={temp_dir: {"bind": "/workspace", "mode": "ro"}},
|
|
122
|
+
working_dir=working_dir or "/workspace",
|
|
123
|
+
input_file="/workspace/input.txt" if input_file else None,
|
|
124
|
+
start_time=start_time
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
async def run_python_tests(
|
|
128
|
+
self,
|
|
129
|
+
code: str,
|
|
130
|
+
test_code: str,
|
|
131
|
+
test_framework: str = "pytest"
|
|
132
|
+
) -> TestResult:
|
|
133
|
+
"""
|
|
134
|
+
Run tests against Python code
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
code: Student's Python code
|
|
138
|
+
test_code: Test code to run
|
|
139
|
+
test_framework: Testing framework to use (pytest, unittest)
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
TestResult with test execution details
|
|
143
|
+
"""
|
|
144
|
+
if not self.client:
|
|
145
|
+
return TestResult(
|
|
146
|
+
test_output="Docker client not available",
|
|
147
|
+
execution_result=ExecutionResult(
|
|
148
|
+
success=False,
|
|
149
|
+
error_message="Docker client not available"
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
154
|
+
# Write code and test files
|
|
155
|
+
code_file = Path(temp_dir) / "code.py"
|
|
156
|
+
test_file = Path(temp_dir) / "test_code.py"
|
|
157
|
+
|
|
158
|
+
code_file.write_text(code)
|
|
159
|
+
test_file.write_text(test_code)
|
|
160
|
+
|
|
161
|
+
# Create requirements file for test dependencies
|
|
162
|
+
requirements = Path(temp_dir) / "requirements.txt"
|
|
163
|
+
if test_framework == "pytest":
|
|
164
|
+
requirements.write_text("pytest>=7.0.0\n")
|
|
165
|
+
test_command = ["python", "-m", "pytest", "/workspace/test_code.py", "-v", "--tb=short", "--json-report", "--json-report-file=/workspace/report.json"]
|
|
166
|
+
else:
|
|
167
|
+
# unittest (built-in, no extra requirements)
|
|
168
|
+
requirements.write_text("")
|
|
169
|
+
test_command = ["python", "-m", "unittest", "/workspace/test_code.py", "-v"]
|
|
170
|
+
|
|
171
|
+
# Install dependencies and run tests
|
|
172
|
+
setup_command = """
|
|
173
|
+
cd /workspace &&
|
|
174
|
+
pip install --no-cache-dir -r requirements.txt &&
|
|
175
|
+
""" + " ".join(test_command)
|
|
176
|
+
|
|
177
|
+
start_time = time.time()
|
|
178
|
+
execution_result = await self._run_container(
|
|
179
|
+
command=["bash", "-c", setup_command],
|
|
180
|
+
volumes={temp_dir: {"bind": "/workspace", "mode": "rw"}},
|
|
181
|
+
working_dir="/workspace",
|
|
182
|
+
start_time=start_time
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Parse test results
|
|
186
|
+
return await self._parse_test_results(temp_dir, test_framework, execution_result)
|
|
187
|
+
|
|
188
|
+
async def _run_container(
|
|
189
|
+
self,
|
|
190
|
+
command: list[str],
|
|
191
|
+
volumes: dict[str, dict[str, str]],
|
|
192
|
+
working_dir: str = "/workspace",
|
|
193
|
+
input_file: str | None = None,
|
|
194
|
+
start_time: float | None = None
|
|
195
|
+
) -> ExecutionResult:
|
|
196
|
+
"""Run a command in a Docker container with security constraints"""
|
|
197
|
+
if start_time is None:
|
|
198
|
+
start_time = time.time()
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
# Container configuration with security limits
|
|
202
|
+
container_config = {
|
|
203
|
+
"image": self.image_name,
|
|
204
|
+
"command": command,
|
|
205
|
+
"volumes": volumes,
|
|
206
|
+
"working_dir": working_dir,
|
|
207
|
+
"mem_limit": self.memory_limit,
|
|
208
|
+
"memswap_limit": self.memory_limit, # Disable swap
|
|
209
|
+
"cpu_period": 100000, # 100ms
|
|
210
|
+
"cpu_quota": int(50000 * float(self.cpu_limit)), # CPU limit
|
|
211
|
+
"network_disabled": True, # No network access
|
|
212
|
+
"read_only": False, # Some operations need write access
|
|
213
|
+
"remove": True, # Auto-remove container
|
|
214
|
+
"stdout": True,
|
|
215
|
+
"stderr": True,
|
|
216
|
+
"stdin": bool(input_file),
|
|
217
|
+
"tty": False,
|
|
218
|
+
"user": "1000:1000", # Non-root user
|
|
219
|
+
# Security options
|
|
220
|
+
"security_opt": ["no-new-privileges:true"],
|
|
221
|
+
"cap_drop": ["ALL"], # Drop all capabilities
|
|
222
|
+
"tmpfs": {"/tmp": "noexec,nosuid,size=100m"},
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
# Run container
|
|
226
|
+
if not self.client:
|
|
227
|
+
raise RuntimeError("Docker client not available")
|
|
228
|
+
container = self.client.containers.run(**container_config, detach=True)
|
|
229
|
+
|
|
230
|
+
# Wait for completion with timeout
|
|
231
|
+
try:
|
|
232
|
+
exit_code = container.wait(timeout=self.timeout)["StatusCode"]
|
|
233
|
+
|
|
234
|
+
# Get output
|
|
235
|
+
stdout = container.logs(stdout=True, stderr=False).decode("utf-8", errors="replace")
|
|
236
|
+
stderr = container.logs(stdout=False, stderr=True).decode("utf-8", errors="replace")
|
|
237
|
+
|
|
238
|
+
execution_time = time.time() - start_time
|
|
239
|
+
|
|
240
|
+
# Get memory stats if possible
|
|
241
|
+
memory_used = None
|
|
242
|
+
try:
|
|
243
|
+
stats = container.stats(stream=False)
|
|
244
|
+
memory_used = f"{stats['memory_stats'].get('usage', 0) / 1024 / 1024:.1f}MB"
|
|
245
|
+
except Exception:
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
return ExecutionResult(
|
|
249
|
+
success=(exit_code == 0),
|
|
250
|
+
stdout=stdout,
|
|
251
|
+
stderr=stderr,
|
|
252
|
+
exit_code=exit_code,
|
|
253
|
+
execution_time=execution_time,
|
|
254
|
+
memory_used=memory_used,
|
|
255
|
+
timed_out=False
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
except asyncio.TimeoutError:
|
|
259
|
+
# Container timed out
|
|
260
|
+
try:
|
|
261
|
+
container.kill()
|
|
262
|
+
except Exception:
|
|
263
|
+
pass
|
|
264
|
+
|
|
265
|
+
return ExecutionResult(
|
|
266
|
+
success=False,
|
|
267
|
+
stderr="Execution timed out",
|
|
268
|
+
exit_code=-1,
|
|
269
|
+
execution_time=self.timeout,
|
|
270
|
+
timed_out=True,
|
|
271
|
+
error_message=f"Code execution exceeded {self.timeout}s timeout"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
except ContainerError as e:
|
|
275
|
+
return ExecutionResult(
|
|
276
|
+
success=False,
|
|
277
|
+
stderr=e.stderr.decode("utf-8", errors="replace") if e.stderr else "",
|
|
278
|
+
exit_code=e.exit_status,
|
|
279
|
+
execution_time=time.time() - start_time,
|
|
280
|
+
error_message=f"Container error: {str(e)}"
|
|
281
|
+
)
|
|
282
|
+
except DockerException as e:
|
|
283
|
+
return ExecutionResult(
|
|
284
|
+
success=False,
|
|
285
|
+
error_message=f"Docker error: {str(e)}",
|
|
286
|
+
execution_time=time.time() - start_time
|
|
287
|
+
)
|
|
288
|
+
except Exception as e:
|
|
289
|
+
return ExecutionResult(
|
|
290
|
+
success=False,
|
|
291
|
+
error_message=f"Unexpected error: {str(e)}",
|
|
292
|
+
execution_time=time.time() - start_time
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
async def _parse_test_results(
|
|
296
|
+
self,
|
|
297
|
+
temp_dir: str,
|
|
298
|
+
test_framework: str,
|
|
299
|
+
execution_result: ExecutionResult
|
|
300
|
+
) -> TestResult:
|
|
301
|
+
"""Parse test results from execution output"""
|
|
302
|
+
test_result = TestResult(execution_result=execution_result)
|
|
303
|
+
|
|
304
|
+
if test_framework == "pytest":
|
|
305
|
+
# Try to parse JSON report
|
|
306
|
+
report_file = Path(temp_dir) / "report.json"
|
|
307
|
+
if report_file.exists():
|
|
308
|
+
try:
|
|
309
|
+
with open(report_file) as f:
|
|
310
|
+
report_data = json.load(f)
|
|
311
|
+
|
|
312
|
+
test_result.total_tests = report_data.get("summary", {}).get("total", 0)
|
|
313
|
+
test_result.passed_tests = report_data.get("summary", {}).get("passed", 0)
|
|
314
|
+
|
|
315
|
+
# Parse failed tests
|
|
316
|
+
if test_result.failed_tests is None:
|
|
317
|
+
test_result.failed_tests = []
|
|
318
|
+
for test in report_data.get("tests", []):
|
|
319
|
+
if test.get("outcome") == "failed":
|
|
320
|
+
test_result.failed_tests.append({
|
|
321
|
+
"name": test.get("nodeid", "unknown"),
|
|
322
|
+
"message": test.get("call", {}).get("longrepr", ""),
|
|
323
|
+
"output": test.get("setup", {}).get("stdout", "")
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
327
|
+
logger.warning("Failed to parse pytest JSON report", error=str(e))
|
|
328
|
+
|
|
329
|
+
# Fallback to parsing stdout
|
|
330
|
+
if test_result.total_tests == 0:
|
|
331
|
+
self._parse_pytest_stdout(execution_result.stdout, test_result)
|
|
332
|
+
|
|
333
|
+
else: # unittest
|
|
334
|
+
self._parse_unittest_output(execution_result.stdout, test_result)
|
|
335
|
+
|
|
336
|
+
test_result.test_output = execution_result.stdout
|
|
337
|
+
return test_result
|
|
338
|
+
|
|
339
|
+
def _parse_pytest_stdout(self, output: str, test_result: TestResult) -> None:
|
|
340
|
+
"""Parse pytest stdout output for test counts"""
|
|
341
|
+
lines = output.split('\n')
|
|
342
|
+
for line in lines:
|
|
343
|
+
if "failed" in line and "passed" in line:
|
|
344
|
+
# Example: "2 failed, 3 passed in 0.12s"
|
|
345
|
+
parts = line.split()
|
|
346
|
+
for i, part in enumerate(parts):
|
|
347
|
+
if part == "failed" and i > 0:
|
|
348
|
+
test_result.total_tests += int(parts[i-1])
|
|
349
|
+
elif part == "passed" and i > 0:
|
|
350
|
+
test_result.passed_tests = int(parts[i-1])
|
|
351
|
+
test_result.total_tests += test_result.passed_tests
|
|
352
|
+
|
|
353
|
+
def _parse_unittest_output(self, output: str, test_result: TestResult) -> None:
|
|
354
|
+
"""Parse unittest output for test counts"""
|
|
355
|
+
lines = output.split('\n')
|
|
356
|
+
for line in lines:
|
|
357
|
+
if line.startswith("Ran ") and "test" in line:
|
|
358
|
+
# Example: "Ran 5 tests in 0.001s"
|
|
359
|
+
parts = line.split()
|
|
360
|
+
if len(parts) >= 2:
|
|
361
|
+
test_result.total_tests = int(parts[1])
|
|
362
|
+
elif "FAILED" in line and "failures=" in line:
|
|
363
|
+
# Parse failure count
|
|
364
|
+
for part in line.split(","):
|
|
365
|
+
if "failures=" in part:
|
|
366
|
+
failures = int(part.split("=")[1])
|
|
367
|
+
test_result.passed_tests = test_result.total_tests - failures
|
|
368
|
+
|
|
369
|
+
def is_available(self) -> bool:
|
|
370
|
+
"""Check if Docker sandbox is available"""
|
|
371
|
+
return self.client is not None
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
# Global sandbox instance
|
|
375
|
+
sandbox = DockerSandbox() if settings.docker_enabled else None
|