QuantumChecker 0.2.8__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantumchecker-0.3.0/PKG-INFO +53 -0
- quantumchecker-0.3.0/QuantumCheck/main.py +230 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumCheck/powerbi_evaluator.py +8 -8
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumCheck/python_evaluator.py +58 -39
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumCheck/sql_evaluator.py +58 -41
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumCheck/ssis_evaluator.py +131 -28
- quantumchecker-0.3.0/QuantumChecker.egg-info/PKG-INFO +53 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumChecker.egg-info/SOURCES.txt +2 -1
- quantumchecker-0.3.0/README.md +27 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/setup.py +1 -1
- quantumchecker-0.3.0/tests/test.py +135 -0
- quantumchecker-0.3.0/tests/test2.py +31 -0
- quantumchecker-0.2.8/PKG-INFO +0 -138
- quantumchecker-0.2.8/QuantumCheck/main.py +0 -188
- quantumchecker-0.2.8/QuantumChecker.egg-info/PKG-INFO +0 -138
- quantumchecker-0.2.8/README.md +0 -112
- quantumchecker-0.2.8/tests/test.py +0 -388
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumCheck/prompts.py +0 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumChecker.egg-info/requires.txt +0 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.2.8 → quantumchecker-0.3.0}/setup.cfg +0 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuantumChecker
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
|
+
Author: Qobiljon
|
|
6
|
+
Author-email: qobiljonkhayrullayev@gmail.com
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.6
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: requests>=2.31.0
|
|
13
|
+
Requires-Dist: tenacity>=8.2.3
|
|
14
|
+
Requires-Dist: pdf2image>=1.16.3
|
|
15
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
16
|
+
Requires-Dist: Pillow>=10.0.0
|
|
17
|
+
Requires-Dist: PyPDF2>=3.0.1
|
|
18
|
+
Dynamic: author
|
|
19
|
+
Dynamic: author-email
|
|
20
|
+
Dynamic: classifier
|
|
21
|
+
Dynamic: description
|
|
22
|
+
Dynamic: description-content-type
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
Sample usage:
|
|
28
|
+
```
|
|
29
|
+
import asyncio
|
|
30
|
+
from your_evaluator_module import HomeworkEvaluator
|
|
31
|
+
|
|
32
|
+
async def main():
|
|
33
|
+
evaluator = HomeworkEvaluator()
|
|
34
|
+
question_content = """
|
|
35
|
+
Q1: What is a Python list? Explain with an example.
|
|
36
|
+
|
|
37
|
+
Q2: Write an SQL query to select all records from a table named 'students'.
|
|
38
|
+
"""
|
|
39
|
+
answer_path = "sample_submissions/student1_answer.py"
|
|
40
|
+
api_keys = ["your_api_key_1", "your_api_key_2"]
|
|
41
|
+
question_type = "python"
|
|
42
|
+
|
|
43
|
+
result = await evaluator.evaluate_from_content(
|
|
44
|
+
question_content=question_content,
|
|
45
|
+
answer_path=answer_path,
|
|
46
|
+
api_keys=api_keys,
|
|
47
|
+
question_type=question_type
|
|
48
|
+
)
|
|
49
|
+
print(result)
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
asyncio.run(main())
|
|
53
|
+
```
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import zipfile
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import List, Dict
|
|
6
|
+
from .python_evaluator import PythonEvaluator
|
|
7
|
+
from .sql_evaluator import SQLEvaluator
|
|
8
|
+
from .powerbi_evaluator import PowerBIEvaluator
|
|
9
|
+
from .ssis_evaluator import SSISEvaluator
|
|
10
|
+
import asyncio
|
|
11
|
+
|
|
12
|
+
_logger_cache = {}
|
|
13
|
+
|
|
14
|
+
class HomeworkEvaluator:
|
|
15
|
+
EVALUATOR_REGISTRY = {
|
|
16
|
+
"python": PythonEvaluator,
|
|
17
|
+
"sql": SQLEvaluator,
|
|
18
|
+
"powerbi": PowerBIEvaluator,
|
|
19
|
+
"ssis": SSISEvaluator
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
EXTENSION_TO_TYPE = {
|
|
23
|
+
".py": "python",
|
|
24
|
+
".sql": "sql",
|
|
25
|
+
".pbit": "powerbi",
|
|
26
|
+
".pdf": "powerbi",
|
|
27
|
+
".dtsx": "ssis",
|
|
28
|
+
".DTSX": "ssis",
|
|
29
|
+
".txt": "text",
|
|
30
|
+
".md": "text"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
API_NAME_MAPPING = {
|
|
34
|
+
"python": "Google Gemini API",
|
|
35
|
+
"sql": "Google Gemini API",
|
|
36
|
+
"powerbi": "Google Gemini API",
|
|
37
|
+
"ssis": "Google Gemini API",
|
|
38
|
+
"text": "Google Gemini API"
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def __init__(self, log_level: int = logging.INFO):
|
|
42
|
+
self.log_level = log_level
|
|
43
|
+
self._successful_key_cache = {}
|
|
44
|
+
self._rate_limit_delay = {}
|
|
45
|
+
self._invalid_key_cache = set()
|
|
46
|
+
self._lock = asyncio.Lock()
|
|
47
|
+
self._last_request_time = None
|
|
48
|
+
|
|
49
|
+
def _get_logger(self, log_type: str) -> logging.Logger:
|
|
50
|
+
log_name = f"{log_type}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
|
|
51
|
+
if log_name not in _logger_cache:
|
|
52
|
+
logger = logging.getLogger(log_name)
|
|
53
|
+
logger.setLevel(self.log_level)
|
|
54
|
+
if not logger.handlers:
|
|
55
|
+
handler = logging.StreamHandler()
|
|
56
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
57
|
+
logger.addHandler(handler)
|
|
58
|
+
_logger_cache[log_name] = logger
|
|
59
|
+
return _logger_cache[log_name]
|
|
60
|
+
|
|
61
|
+
def parse_questions(self, content: str) -> List[str]:
|
|
62
|
+
logger = self._get_logger("QuantumCheck.main")
|
|
63
|
+
questions = [q.strip() for q in content.split("\n\n") if q.strip()]
|
|
64
|
+
if not questions:
|
|
65
|
+
raise ValueError("No valid questions found in content")
|
|
66
|
+
return questions
|
|
67
|
+
|
|
68
|
+
def _detect_zip_content_type(self, zip_path: str, logger: logging.Logger) -> str:
|
|
69
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
70
|
+
extensions = {os.path.splitext(name)[1].lower() for name in zip_ref.namelist()}
|
|
71
|
+
file_types = [self.EXTENSION_TO_TYPE.get(ext, "text") for ext in extensions if ext]
|
|
72
|
+
if "python" in file_types:
|
|
73
|
+
return "python"
|
|
74
|
+
elif "sql" in file_types:
|
|
75
|
+
return "sql"
|
|
76
|
+
elif "powerbi" in file_types:
|
|
77
|
+
return "powerbi"
|
|
78
|
+
elif "ssis" in file_types:
|
|
79
|
+
return "ssis"
|
|
80
|
+
else:
|
|
81
|
+
return "text"
|
|
82
|
+
|
|
83
|
+
async def evaluate_from_content(
|
|
84
|
+
self,
|
|
85
|
+
question_content: str,
|
|
86
|
+
answer_path: str,
|
|
87
|
+
api_keys: List[str],
|
|
88
|
+
question_type: str,
|
|
89
|
+
retry_count: int = 0
|
|
90
|
+
) -> Dict[str, any]:
|
|
91
|
+
async with self._lock:
|
|
92
|
+
now = datetime.now()
|
|
93
|
+
if self._last_request_time:
|
|
94
|
+
elapsed = (now - self._last_request_time).total_seconds()
|
|
95
|
+
if elapsed < 30:
|
|
96
|
+
await asyncio.sleep(30 - elapsed)
|
|
97
|
+
self._last_request_time = datetime.now()
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
questions = self.parse_questions(question_content)
|
|
101
|
+
except ValueError as e:
|
|
102
|
+
logger = self._get_logger("QuantumCheck.main")
|
|
103
|
+
return {
|
|
104
|
+
"score": 0,
|
|
105
|
+
"feedback": f"Error parsing question content: {str(e)}",
|
|
106
|
+
"issues": [str(e)],
|
|
107
|
+
"recommendations": [],
|
|
108
|
+
"used_api_key_index": None,
|
|
109
|
+
"used_api_name": None
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
answer_path = answer_path.strip()
|
|
113
|
+
_, ext = os.path.splitext(answer_path)
|
|
114
|
+
ext = ext.lower()
|
|
115
|
+
|
|
116
|
+
if ext == ".zip":
|
|
117
|
+
logger = self._get_logger("zip")
|
|
118
|
+
file_type = self._detect_zip_content_type(answer_path, logger)
|
|
119
|
+
else:
|
|
120
|
+
file_type = self.EXTENSION_TO_TYPE.get(ext, "text")
|
|
121
|
+
logger = self._get_logger(file_type)
|
|
122
|
+
|
|
123
|
+
eval_type = question_type if question_type in self.EVALUATOR_REGISTRY else file_type
|
|
124
|
+
|
|
125
|
+
if not os.path.exists(answer_path):
|
|
126
|
+
return {
|
|
127
|
+
"score": 0,
|
|
128
|
+
"feedback": f"Answer file not found: {answer_path}",
|
|
129
|
+
"issues": [f"Answer file not found: {answer_path}"],
|
|
130
|
+
"recommendations": [],
|
|
131
|
+
"used_api_key_index": None,
|
|
132
|
+
"used_api_name": None
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
evaluator_class = self.EVALUATOR_REGISTRY.get(eval_type, PythonEvaluator)
|
|
136
|
+
last_error_messages = []
|
|
137
|
+
|
|
138
|
+
available_keys = [(i + 1, key) for i, key in enumerate(api_keys) if key not in self._invalid_key_cache]
|
|
139
|
+
|
|
140
|
+
cached_key_idx = self._successful_key_cache.get(eval_type)
|
|
141
|
+
if cached_key_idx is not None and cached_key_idx < len(api_keys):
|
|
142
|
+
cached_key = api_keys[cached_key_idx]
|
|
143
|
+
if cached_key not in self._invalid_key_cache:
|
|
144
|
+
available_keys.insert(0, (cached_key_idx + 1, cached_key))
|
|
145
|
+
|
|
146
|
+
if not available_keys:
|
|
147
|
+
return {
|
|
148
|
+
"score": 0,
|
|
149
|
+
"feedback": "No valid API keys available.",
|
|
150
|
+
"issues": ["All API keys are invalid or rate-limited."],
|
|
151
|
+
"recommendations": [],
|
|
152
|
+
"used_api_key_index": None,
|
|
153
|
+
"used_api_name": None
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
for idx, key in available_keys:
|
|
157
|
+
if key in self._rate_limit_delay:
|
|
158
|
+
delay_until = self._rate_limit_delay[key]
|
|
159
|
+
current_time = datetime.now()
|
|
160
|
+
delay_until_time = datetime.fromtimestamp(delay_until)
|
|
161
|
+
if current_time < delay_until_time:
|
|
162
|
+
continue
|
|
163
|
+
else:
|
|
164
|
+
del self._rate_limit_delay[key]
|
|
165
|
+
|
|
166
|
+
evaluator = evaluator_class(key)
|
|
167
|
+
api_name = getattr(evaluator, 'get_api_name', lambda: self.API_NAME_MAPPING.get(eval_type, "Unknown API"))()
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
evaluation = evaluator.evaluate(questions, answer_path, temp_dir=f"temp_extract_{os.getpid()}_{idx}")
|
|
171
|
+
feedback = evaluation.get("feedback", "").lower()
|
|
172
|
+
issues = " ".join(evaluation.get("issues", [])).lower()
|
|
173
|
+
|
|
174
|
+
if any(phrase in feedback or phrase in issues for phrase in ["api key not valid", "api_key_invalid"]):
|
|
175
|
+
last_error_messages.append(f"API key #{idx} invalid.")
|
|
176
|
+
self._invalid_key_cache.add(key)
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
if any(phrase in feedback or phrase in issues for phrase in ["429", "too many requests", "rate limit"]):
|
|
180
|
+
last_error_messages.append(f"API key #{idx} rate limited.")
|
|
181
|
+
self._rate_limit_delay[key] = datetime.now().timestamp() + 300
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
if any(phrase in feedback or phrase in issues for phrase in ["503", "service unavailable"]):
|
|
185
|
+
last_error_messages.append(f"API key #{idx} service unavailable.")
|
|
186
|
+
self._rate_limit_delay[key] = datetime.now().timestamp() + 7200
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
if evaluation.get("score", 0) == 0 and "evaluation not returned" in feedback:
|
|
190
|
+
last_error_messages.append(f"API key #{idx} returned invalid evaluation.")
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
self._successful_key_cache[eval_type] = idx - 1
|
|
194
|
+
return {
|
|
195
|
+
"score": evaluation.get("score", 0),
|
|
196
|
+
"feedback": evaluation.get("feedback", "No feedback provided"),
|
|
197
|
+
"issues": evaluation.get("issues", []),
|
|
198
|
+
"recommendations": evaluation.get("recommendations", []),
|
|
199
|
+
"used_api_key_index": idx,
|
|
200
|
+
"used_api_name": api_name
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
last_error_messages.append(f"Exception with key #{idx}: {str(e)}")
|
|
205
|
+
if "429" in str(e) or "rate limit" in str(e).lower():
|
|
206
|
+
self._rate_limit_delay[key] = datetime.now().timestamp() + 300
|
|
207
|
+
elif "503" in str(e) or "service unavailable" in str(e).lower():
|
|
208
|
+
self._rate_limit_delay[key] = datetime.now().timestamp() + 7200
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
if retry_count < 3 and self._rate_limit_delay:
|
|
212
|
+
next_available_ts = min(self._rate_limit_delay.values())
|
|
213
|
+
wait_time = max(0, next_available_ts - datetime.now().timestamp())
|
|
214
|
+
await asyncio.sleep(wait_time + 1)
|
|
215
|
+
return await self.evaluate_from_content(
|
|
216
|
+
question_content=question_content,
|
|
217
|
+
answer_path=answer_path,
|
|
218
|
+
api_keys=api_keys,
|
|
219
|
+
question_type=question_type,
|
|
220
|
+
retry_count=retry_count + 1
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
"score": 0,
|
|
225
|
+
"feedback": "Evaluation failed with all API keys." if retry_count >= 3 else "All API keys are temporarily unavailable.",
|
|
226
|
+
"issues": last_error_messages if last_error_messages else ["All API keys failed to evaluate the submission."],
|
|
227
|
+
"recommendations": [],
|
|
228
|
+
"used_api_key_index": None,
|
|
229
|
+
"used_api_name": None
|
|
230
|
+
}
|
|
@@ -37,7 +37,7 @@ logging.basicConfig(
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class GeminiFlashModel:
|
|
40
|
-
def __init__(self, api_key: str, model_name: str = "gemini-
|
|
40
|
+
def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
|
|
41
41
|
api_key = os.getenv("GEMINI_API_KEY") or api_key
|
|
42
42
|
if not api_key:
|
|
43
43
|
raise ValueError("API key not found in .env file or environment variables.")
|
|
@@ -285,11 +285,11 @@ class PowerBIEvaluator:
|
|
|
285
285
|
self.model = GeminiFlashModel(api_key)
|
|
286
286
|
self.processor = PowerBIProcessor()
|
|
287
287
|
|
|
288
|
-
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
288
|
+
def evaluate(self, questions: List[str], answer_path: str, temp_dir: str = "temp_extract") -> Dict[str, any]:
|
|
289
289
|
try:
|
|
290
290
|
_, ext = os.path.splitext(answer_path)
|
|
291
291
|
ext = ext.lower()
|
|
292
|
-
extract_path =
|
|
292
|
+
extract_path = temp_dir
|
|
293
293
|
pbit_path = None
|
|
294
294
|
pdf_path = None
|
|
295
295
|
if ext == ".zip":
|
|
@@ -322,8 +322,8 @@ class PowerBIEvaluator:
|
|
|
322
322
|
}
|
|
323
323
|
if pdf_path:
|
|
324
324
|
try:
|
|
325
|
-
self.processor.process_pdf(pdf_path)
|
|
326
|
-
visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
|
|
325
|
+
image_paths = self.processor.process_pdf(pdf_path, output_dir=os.path.join(temp_dir, "outputimages"))
|
|
326
|
+
visual_result = self.model.evaluate_visuals(questions[0], os.path.join(temp_dir, "outputimages"))
|
|
327
327
|
result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
|
|
328
328
|
result["visual_score"] = visual_result["score"]
|
|
329
329
|
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
@@ -344,10 +344,10 @@ class PowerBIEvaluator:
|
|
|
344
344
|
logger.info("[Final] Score (70%% DAX, 30%% Visuals): %d/100", result["score"])
|
|
345
345
|
return result
|
|
346
346
|
finally:
|
|
347
|
-
self.processor._cleanup(extract_path, "outputimages")
|
|
347
|
+
self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
|
|
348
348
|
except Exception as e:
|
|
349
349
|
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
350
|
-
self.processor._cleanup(extract_path, "outputimages")
|
|
350
|
+
self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
|
|
351
351
|
return {
|
|
352
352
|
"score": 0,
|
|
353
353
|
"feedback": f"Error processing file: {str(e)}",
|
|
@@ -359,4 +359,4 @@ class PowerBIEvaluator:
|
|
|
359
359
|
|
|
360
360
|
|
|
361
361
|
class ProcessingError(Exception):
|
|
362
|
-
pass
|
|
362
|
+
pass
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
import zipfile
|
|
4
|
+
import shutil
|
|
4
5
|
from pprint import pprint
|
|
5
6
|
from typing import List, Dict
|
|
6
7
|
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class GeminiFlashModel:
|
|
21
|
-
def __init__(self, api_key: str, model_name: str = "gemini-
|
|
22
|
+
def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
|
|
22
23
|
if not api_key:
|
|
23
24
|
raise ValueError("API key is required.")
|
|
24
25
|
self.api_key = api_key
|
|
@@ -107,50 +108,60 @@ class PythonAnswerParser:
|
|
|
107
108
|
return answers
|
|
108
109
|
|
|
109
110
|
@staticmethod
|
|
110
|
-
def parse_zip_file(zip_path: str) -> List[str]:
|
|
111
|
+
def parse_zip_file(zip_path: str, temp_dir: str) -> List[str]:
|
|
112
|
+
"""
|
|
113
|
+
Parse Python files from a ZIP file, extracting to the specified temp_dir.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
zip_path: Path to the ZIP file
|
|
117
|
+
temp_dir: Directory to extract ZIP contents
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List of answer strings extracted from Python files
|
|
121
|
+
"""
|
|
111
122
|
combined_content = []
|
|
112
123
|
|
|
113
124
|
try:
|
|
125
|
+
# Create temporary extraction directory
|
|
126
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
114
127
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
115
|
-
temp_dir = "temp_python_extract"
|
|
116
|
-
os.makedirs(temp_dir, exist_ok=True)
|
|
117
128
|
zip_ref.extractall(temp_dir)
|
|
118
129
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
130
|
+
python_files = sorted(
|
|
131
|
+
[f for f in os.listdir(temp_dir) if f.endswith(".py")]
|
|
132
|
+
)
|
|
122
133
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
134
|
+
if not python_files:
|
|
135
|
+
logger.warning(f"No Python files found in ZIP: {zip_path}")
|
|
136
|
+
return []
|
|
126
137
|
|
|
127
|
-
|
|
128
|
-
|
|
138
|
+
for python_file in python_files:
|
|
139
|
+
with open(
|
|
129
140
|
os.path.join(temp_dir, python_file),
|
|
130
141
|
"r",
|
|
131
142
|
encoding="utf-8",
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
for f in os.listdir(temp_dir):
|
|
138
|
-
os.remove(os.path.join(temp_dir, f))
|
|
143
|
+
) as f:
|
|
144
|
+
content = f.read().strip()
|
|
145
|
+
if content:
|
|
146
|
+
combined_content.append(content)
|
|
139
147
|
|
|
140
|
-
|
|
148
|
+
if not combined_content:
|
|
149
|
+
logger.warning(f"No valid content found in Python files in ZIP: {zip_path}")
|
|
150
|
+
return []
|
|
141
151
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
return []
|
|
145
|
-
|
|
146
|
-
combined_text = "\n\n".join(combined_content)
|
|
147
|
-
return [a.strip() for a in combined_text.split("\n\n") if a.strip()]
|
|
152
|
+
combined_text = "\n\n".join(combined_content)
|
|
153
|
+
return [a.strip() for a in combined_text.split("\n\n") if a.strip()]
|
|
148
154
|
except zipfile.BadZipFile:
|
|
149
|
-
logger.error("Invalid ZIP file:
|
|
155
|
+
logger.error(f"Invalid ZIP file: {zip_path}")
|
|
150
156
|
return []
|
|
151
157
|
except Exception as e:
|
|
152
|
-
logger.error("Error processing ZIP file
|
|
158
|
+
logger.error(f"Error processing ZIP file {zip_path}: {str(e)}")
|
|
153
159
|
return []
|
|
160
|
+
finally:
|
|
161
|
+
# Clean up temporary directory
|
|
162
|
+
if os.path.exists(temp_dir):
|
|
163
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
164
|
+
logger.info(f"Cleaned up temporary directory: {temp_dir}")
|
|
154
165
|
|
|
155
166
|
|
|
156
167
|
class PythonEvaluator:
|
|
@@ -158,19 +169,30 @@ class PythonEvaluator:
|
|
|
158
169
|
self.api_key = api_key
|
|
159
170
|
self.model = GeminiFlashModel(api_key)
|
|
160
171
|
|
|
161
|
-
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
172
|
+
def evaluate(self, questions: List[str], answer_path: str, temp_dir: str = None) -> Dict[str, any]:
|
|
173
|
+
"""
|
|
174
|
+
Evaluate a Python submission.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
questions: List of questions to evaluate against
|
|
178
|
+
answer_path: Path to the answer file (ZIP or single file)
|
|
179
|
+
temp_dir: Optional directory for temporary ZIP extraction
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dictionary containing score, feedback, issues, and recommendations
|
|
183
|
+
"""
|
|
162
184
|
try:
|
|
163
185
|
if answer_path.endswith(".zip"):
|
|
164
|
-
|
|
186
|
+
# Use provided temp_dir or generate a default one
|
|
187
|
+
temp_dir = temp_dir or f"temp_python_extract_{os.getpid()}"
|
|
188
|
+
answers = PythonAnswerParser.parse_zip_file(answer_path, temp_dir)
|
|
165
189
|
else:
|
|
166
190
|
with open(answer_path, "r", encoding="utf-8") as file:
|
|
167
191
|
content = file.read()
|
|
168
192
|
answers = PythonAnswerParser.parse_single_file(content)
|
|
169
193
|
|
|
170
194
|
logger.info(
|
|
171
|
-
"Processing
|
|
172
|
-
len(questions),
|
|
173
|
-
len(answers),
|
|
195
|
+
f"Processing {len(questions)} questions and {len(answers)} answers"
|
|
174
196
|
)
|
|
175
197
|
pprint(f"Processing {len(questions)} questions and {len(answers)} answers")
|
|
176
198
|
|
|
@@ -180,15 +202,12 @@ class PythonEvaluator:
|
|
|
180
202
|
f"Questions:\n{combined_questions}\n\nAnswers:\n{combined_answers}"
|
|
181
203
|
)
|
|
182
204
|
|
|
183
|
-
final_prompt = prompt_text_python(combined_raw_content)
|
|
184
|
-
with open("combined_python_prompt.txt", "w", encoding="utf-8") as f:
|
|
185
|
-
f.write(final_prompt)
|
|
186
|
-
|
|
187
205
|
return self.model.evaluate(combined_raw_content)
|
|
188
206
|
except Exception as e:
|
|
189
|
-
logger.error("Failed to process answers from
|
|
207
|
+
logger.error(f"Failed to process answers from {answer_path}: {str(e)}")
|
|
190
208
|
return {
|
|
191
209
|
"score": 0,
|
|
192
210
|
"feedback": f"Error processing answers: {str(e)}",
|
|
193
211
|
"issues": [str(e)],
|
|
194
|
-
|
|
212
|
+
"recommendations": []
|
|
213
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
import zipfile
|
|
4
|
+
import shutil
|
|
4
5
|
from pprint import pprint
|
|
5
6
|
from typing import List, Dict
|
|
6
7
|
|
|
@@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
|
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class GeminiFlashModel:
|
|
21
|
-
def __init__(self, api_key: str, model_name: str = "gemini-
|
|
22
|
+
def __init__(self, api_key: str, model_name: str = "gemini-2.0-flash"):
|
|
22
23
|
if not api_key:
|
|
23
24
|
raise ValueError("API key is required.")
|
|
24
25
|
self.api_key = api_key
|
|
@@ -107,50 +108,60 @@ class SQLAnswerParser:
|
|
|
107
108
|
return answers
|
|
108
109
|
|
|
109
110
|
@staticmethod
|
|
110
|
-
def parse_zip_file(zip_path: str) -> List[str]:
|
|
111
|
+
def parse_zip_file(zip_path: str, temp_dir: str) -> List[str]:
|
|
112
|
+
"""
|
|
113
|
+
Parse SQL files from a ZIP file, extracting to the specified temp_dir.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
zip_path: Path to the ZIP file
|
|
117
|
+
temp_dir: Directory to extract ZIP contents
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List of answer strings extracted from SQL files
|
|
121
|
+
"""
|
|
111
122
|
combined_content = []
|
|
112
123
|
|
|
113
124
|
try:
|
|
125
|
+
# Create temporary extraction directory
|
|
126
|
+
os.makedirs(temp_dir, exist_ok=True)
|
|
114
127
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
115
|
-
temp_dir = "temp_sql_extract"
|
|
116
|
-
os.makedirs(temp_dir, exist_ok=True)
|
|
117
128
|
zip_ref.extractall(temp_dir)
|
|
118
129
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
130
|
+
sql_files = sorted(
|
|
131
|
+
[f for f in os.listdir(temp_dir) if f.endswith(".sql")]
|
|
132
|
+
)
|
|
122
133
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
134
|
+
if not sql_files:
|
|
135
|
+
logger.warning(f"No SQL files found in ZIP: {zip_path}")
|
|
136
|
+
return []
|
|
126
137
|
|
|
127
|
-
|
|
128
|
-
|
|
138
|
+
for sql_file in sql_files:
|
|
139
|
+
with open(
|
|
129
140
|
os.path.join(temp_dir, sql_file),
|
|
130
141
|
"r",
|
|
131
142
|
encoding="utf-8",
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
143
|
+
) as f:
|
|
144
|
+
content = f.read().strip()
|
|
145
|
+
if content:
|
|
146
|
+
combined_content.append(content)
|
|
136
147
|
|
|
137
|
-
|
|
138
|
-
|
|
148
|
+
if not combined_content:
|
|
149
|
+
logger.warning(f"No valid content found in SQL files in ZIP: {zip_path}")
|
|
150
|
+
return []
|
|
139
151
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
if not combined_content:
|
|
143
|
-
logger.warning("No valid content found in SQL files")
|
|
144
|
-
return []
|
|
145
|
-
|
|
146
|
-
combined_text = "\n\n".join(combined_content)
|
|
147
|
-
return [a.strip() for a in combined_text.split("\n\n") if a.strip()]
|
|
152
|
+
combined_text = "\n\n".join(combined_content)
|
|
153
|
+
return [a.strip() for a in combined_text.split("\n\n") if a.strip()]
|
|
148
154
|
except zipfile.BadZipFile:
|
|
149
|
-
logger.error("Invalid ZIP file:
|
|
155
|
+
logger.error(f"Invalid ZIP file: {zip_path}")
|
|
150
156
|
return []
|
|
151
157
|
except Exception as e:
|
|
152
|
-
logger.error("Error processing ZIP file
|
|
158
|
+
logger.error(f"Error processing ZIP file {zip_path}: {str(e)}")
|
|
153
159
|
return []
|
|
160
|
+
finally:
|
|
161
|
+
# Clean up temporary directory
|
|
162
|
+
if os.path.exists(temp_dir):
|
|
163
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
164
|
+
logger.info(f"Cleaned up temporary directory: {temp_dir}")
|
|
154
165
|
|
|
155
166
|
|
|
156
167
|
class SQLEvaluator:
|
|
@@ -158,19 +169,30 @@ class SQLEvaluator:
|
|
|
158
169
|
self.api_key = api_key
|
|
159
170
|
self.model = GeminiFlashModel(api_key)
|
|
160
171
|
|
|
161
|
-
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
172
|
+
def evaluate(self, questions: List[str], answer_path: str, temp_dir: str = None) -> Dict[str, any]:
|
|
173
|
+
"""
|
|
174
|
+
Evaluate an SQL submission.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
questions: List of questions to evaluate against
|
|
178
|
+
answer_path: Path to the answer file (ZIP or single file)
|
|
179
|
+
temp_dir: Optional directory for temporary ZIP extraction
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dictionary containing score, feedback, issues, and recommendations
|
|
183
|
+
"""
|
|
162
184
|
try:
|
|
163
185
|
if answer_path.endswith(".zip"):
|
|
164
|
-
|
|
186
|
+
# Use provided temp_dir or generate a default one
|
|
187
|
+
temp_dir = temp_dir or f"temp_sql_extract_{os.getpid()}"
|
|
188
|
+
answers = SQLAnswerParser.parse_zip_file(answer_path, temp_dir)
|
|
165
189
|
else:
|
|
166
190
|
with open(answer_path, "r", encoding="utf-8") as file:
|
|
167
191
|
content = file.read()
|
|
168
192
|
answers = SQLAnswerParser.parse_single_file(content)
|
|
169
193
|
|
|
170
194
|
logger.info(
|
|
171
|
-
"Processing
|
|
172
|
-
len(questions),
|
|
173
|
-
len(answers),
|
|
195
|
+
f"Processing {len(questions)} questions and {len(answers)} answers"
|
|
174
196
|
)
|
|
175
197
|
pprint(f"Processing {len(questions)} questions and {len(answers)} answers")
|
|
176
198
|
|
|
@@ -180,17 +202,12 @@ class SQLEvaluator:
|
|
|
180
202
|
f"Questions:\n{combined_questions}\n\nAnswers:\n{combined_answers}"
|
|
181
203
|
)
|
|
182
204
|
|
|
183
|
-
final_prompt = prompt_text_sql(combined_raw_content)
|
|
184
|
-
|
|
185
|
-
logger.info(
|
|
186
|
-
"Saved full combined content and prompt to 'combined_sql_full.txt'"
|
|
187
|
-
)
|
|
188
|
-
|
|
189
205
|
return self.model.evaluate(combined_raw_content)
|
|
190
206
|
except Exception as e:
|
|
191
|
-
logger.error("Failed to process answers from
|
|
207
|
+
logger.error(f"Failed to process answers from {answer_path}: {str(e)}")
|
|
192
208
|
return {
|
|
193
209
|
"score": 0,
|
|
194
210
|
"feedback": f"Error processing answers: {str(e)}",
|
|
195
211
|
"issues": [str(e)],
|
|
196
|
-
|
|
212
|
+
"recommendations": []
|
|
213
|
+
}
|