QuantumChecker 0.2.9__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/PKG-INFO +2 -3
- quantumchecker-0.3.1/QuantumCheck/main.py +138 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumCheck/python_evaluator.py +0 -4
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumCheck/sql_evaluator.py +0 -6
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumCheck/ssis_evaluator.py +0 -8
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumChecker.egg-info/PKG-INFO +2 -3
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/README.md +1 -2
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/setup.py +1 -1
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/tests/test2.py +15 -4
- quantumchecker-0.2.9/QuantumCheck/main.py +0 -222
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumCheck/powerbi_evaluator.py +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumCheck/prompts.py +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumChecker.egg-info/SOURCES.txt +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumChecker.egg-info/requires.txt +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/setup.cfg +0 -0
- {quantumchecker-0.2.9 → quantumchecker-0.3.1}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: QuantumChecker
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
5
|
Author: Qobiljon
|
|
6
6
|
Author-email: qobiljonkhayrullayev@gmail.com
|
|
@@ -37,13 +37,12 @@ Q1: What is a Python list? Explain with an example.
|
|
|
37
37
|
Q2: Write an SQL query to select all records from a table named 'students'.
|
|
38
38
|
"""
|
|
39
39
|
answer_path = "sample_submissions/student1_answer.py"
|
|
40
|
-
api_keys = ["your_api_key_1", "your_api_key_2"]
|
|
41
40
|
question_type = "python"
|
|
42
41
|
|
|
43
42
|
result = await evaluator.evaluate_from_content(
|
|
44
43
|
question_content=question_content,
|
|
45
44
|
answer_path=answer_path,
|
|
46
|
-
|
|
45
|
+
api_key="your_api_key",
|
|
47
46
|
question_type=question_type
|
|
48
47
|
)
|
|
49
48
|
print(result)
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import zipfile
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import List, Dict
|
|
6
|
+
from .python_evaluator import PythonEvaluator
|
|
7
|
+
from .sql_evaluator import SQLEvaluator
|
|
8
|
+
from .powerbi_evaluator import PowerBIEvaluator
|
|
9
|
+
from .ssis_evaluator import SSISEvaluator
|
|
10
|
+
import asyncio
|
|
11
|
+
|
|
12
|
+
_logger_cache = {}
|
|
13
|
+
|
|
14
|
+
class HomeworkEvaluator:
|
|
15
|
+
EVALUATOR_REGISTRY = {
|
|
16
|
+
"python": PythonEvaluator,
|
|
17
|
+
"sql": SQLEvaluator,
|
|
18
|
+
"powerbi": PowerBIEvaluator,
|
|
19
|
+
"ssis": SSISEvaluator
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
EXTENSION_TO_TYPE = {
|
|
23
|
+
".py": "python",
|
|
24
|
+
".sql": "sql",
|
|
25
|
+
".pbit": "powerbi",
|
|
26
|
+
".pdf": "powerbi",
|
|
27
|
+
".dtsx": "ssis",
|
|
28
|
+
".DTSX": "ssis",
|
|
29
|
+
".txt": "text",
|
|
30
|
+
".md": "text"
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
def __init__(self, log_level: int = logging.INFO):
|
|
34
|
+
self.log_level = log_level
|
|
35
|
+
self._lock = asyncio.Lock()
|
|
36
|
+
self._last_request_time = None
|
|
37
|
+
|
|
38
|
+
def _get_logger(self, log_type: str) -> logging.Logger:
|
|
39
|
+
log_name = f"{log_type}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
|
|
40
|
+
if log_name not in _logger_cache:
|
|
41
|
+
logger = logging.getLogger(log_name)
|
|
42
|
+
logger.setLevel(self.log_level)
|
|
43
|
+
if not logger.handlers:
|
|
44
|
+
handler = logging.StreamHandler()
|
|
45
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
46
|
+
logger.addHandler(handler)
|
|
47
|
+
_logger_cache[log_name] = logger
|
|
48
|
+
return _logger_cache[log_name]
|
|
49
|
+
|
|
50
|
+
def parse_questions(self, content: str) -> List[str]:
|
|
51
|
+
logger = self._get_logger("QuantumCheck.main")
|
|
52
|
+
questions = [q.strip() for q in content.split("\n\n") if q.strip()]
|
|
53
|
+
if not questions:
|
|
54
|
+
raise ValueError("No valid questions found in content")
|
|
55
|
+
return questions
|
|
56
|
+
|
|
57
|
+
def _detect_zip_content_type(self, zip_path: str, logger: logging.Logger) -> str:
|
|
58
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
59
|
+
extensions = {os.path.splitext(name)[1].lower() for name in zip_ref.namelist()}
|
|
60
|
+
file_types = [self.EXTENSION_TO_TYPE.get(ext, "text") for ext in extensions if ext]
|
|
61
|
+
if "python" in file_types:
|
|
62
|
+
return "python"
|
|
63
|
+
elif "sql" in file_types:
|
|
64
|
+
return "sql"
|
|
65
|
+
elif "powerbi" in file_types:
|
|
66
|
+
return "powerbi"
|
|
67
|
+
elif "ssis" in file_types:
|
|
68
|
+
return "ssis"
|
|
69
|
+
else:
|
|
70
|
+
return "text"
|
|
71
|
+
|
|
72
|
+
async def evaluate_from_content(
|
|
73
|
+
self,
|
|
74
|
+
question_content: str,
|
|
75
|
+
answer_path: str,
|
|
76
|
+
api_key: str,
|
|
77
|
+
question_type: str
|
|
78
|
+
) -> Dict[str, any]:
|
|
79
|
+
async with self._lock:
|
|
80
|
+
now = datetime.now()
|
|
81
|
+
if self._last_request_time:
|
|
82
|
+
elapsed = (now - self._last_request_time).total_seconds()
|
|
83
|
+
if elapsed < 30:
|
|
84
|
+
await asyncio.sleep(30 - elapsed)
|
|
85
|
+
self._last_request_time = datetime.now()
|
|
86
|
+
|
|
87
|
+
logger = self._get_logger("QuantumCheck.main")
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
questions = self.parse_questions(question_content)
|
|
91
|
+
except ValueError as e:
|
|
92
|
+
return {
|
|
93
|
+
"score": 0,
|
|
94
|
+
"feedback": f"Error parsing question content: {str(e)}",
|
|
95
|
+
"issues": [str(e)],
|
|
96
|
+
"recommendations": []
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
answer_path = answer_path.strip()
|
|
100
|
+
_, ext = os.path.splitext(answer_path)
|
|
101
|
+
ext = ext.lower()
|
|
102
|
+
|
|
103
|
+
if ext == ".zip":
|
|
104
|
+
logger = self._get_logger("zip")
|
|
105
|
+
file_type = self._detect_zip_content_type(answer_path, logger)
|
|
106
|
+
else:
|
|
107
|
+
file_type = self.EXTENSION_TO_TYPE.get(ext, "text")
|
|
108
|
+
logger = self._get_logger(file_type)
|
|
109
|
+
|
|
110
|
+
eval_type = question_type if question_type in self.EVALUATOR_REGISTRY else file_type
|
|
111
|
+
|
|
112
|
+
if not os.path.exists(answer_path):
|
|
113
|
+
return {
|
|
114
|
+
"score": 0,
|
|
115
|
+
"feedback": f"Answer file not found: {answer_path}",
|
|
116
|
+
"issues": [f"Answer file not found: {answer_path}"],
|
|
117
|
+
"recommendations": []
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
evaluator_class = self.EVALUATOR_REGISTRY.get(eval_type, PythonEvaluator)
|
|
121
|
+
evaluator = evaluator_class(api_key)
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
evaluation = evaluator.evaluate(questions, answer_path, temp_dir=f"temp_extract_{os.getpid()}")
|
|
125
|
+
return {
|
|
126
|
+
"score": evaluation.get("score", 0),
|
|
127
|
+
"feedback": evaluation.get("feedback", "No feedback provided"),
|
|
128
|
+
"issues": evaluation.get("issues", []),
|
|
129
|
+
"recommendations": evaluation.get("recommendations", [])
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
return {
|
|
134
|
+
"score": 0,
|
|
135
|
+
"feedback": f"Evaluation failed: {str(e)}",
|
|
136
|
+
"issues": [str(e)],
|
|
137
|
+
"recommendations": []
|
|
138
|
+
}
|
|
@@ -202,10 +202,6 @@ class PythonEvaluator:
|
|
|
202
202
|
f"Questions:\n{combined_questions}\n\nAnswers:\n{combined_answers}"
|
|
203
203
|
)
|
|
204
204
|
|
|
205
|
-
final_prompt = prompt_text_python(combined_raw_content)
|
|
206
|
-
with open("combined_python_prompt.txt", "w", encoding="utf-8") as f:
|
|
207
|
-
f.write(final_prompt)
|
|
208
|
-
|
|
209
205
|
return self.model.evaluate(combined_raw_content)
|
|
210
206
|
except Exception as e:
|
|
211
207
|
logger.error(f"Failed to process answers from {answer_path}: {str(e)}")
|
|
@@ -202,12 +202,6 @@ class SQLEvaluator:
|
|
|
202
202
|
f"Questions:\n{combined_questions}\n\nAnswers:\n{combined_answers}"
|
|
203
203
|
)
|
|
204
204
|
|
|
205
|
-
final_prompt = prompt_text_sql(combined_raw_content)
|
|
206
|
-
|
|
207
|
-
logger.info(
|
|
208
|
-
"Saved full combined content and prompt to 'combined_sql_full.txt'"
|
|
209
|
-
)
|
|
210
|
-
|
|
211
205
|
return self.model.evaluate(combined_raw_content)
|
|
212
206
|
except Exception as e:
|
|
213
207
|
logger.error(f"Failed to process answers from {answer_path}: {str(e)}")
|
|
@@ -385,16 +385,8 @@ class SSISEvaluator:
|
|
|
385
385
|
f"Issues:\n{', '.join(issues) if issues else 'None'}"
|
|
386
386
|
)
|
|
387
387
|
|
|
388
|
-
final_prompt = prompt_text_ssis(combined_raw_content)
|
|
389
388
|
|
|
390
|
-
# Save final prompt to txt file for debugging
|
|
391
|
-
with open("last_ssis_prompt.txt", "w", encoding="utf-8") as f:
|
|
392
|
-
f.write(final_prompt)
|
|
393
|
-
logger.debug("Generated prompt: %s", final_prompt[:500])
|
|
394
|
-
|
|
395
|
-
# Evaluate using Gemini model
|
|
396
389
|
result = self.model.evaluate(combined_raw_content)
|
|
397
|
-
# Append parsing issues to result
|
|
398
390
|
result["issues"] = result.get("issues", []) + issues
|
|
399
391
|
return result
|
|
400
392
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: QuantumChecker
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
5
|
Author: Qobiljon
|
|
6
6
|
Author-email: qobiljonkhayrullayev@gmail.com
|
|
@@ -37,13 +37,12 @@ Q1: What is a Python list? Explain with an example.
|
|
|
37
37
|
Q2: Write an SQL query to select all records from a table named 'students'.
|
|
38
38
|
"""
|
|
39
39
|
answer_path = "sample_submissions/student1_answer.py"
|
|
40
|
-
api_keys = ["your_api_key_1", "your_api_key_2"]
|
|
41
40
|
question_type = "python"
|
|
42
41
|
|
|
43
42
|
result = await evaluator.evaluate_from_content(
|
|
44
43
|
question_content=question_content,
|
|
45
44
|
answer_path=answer_path,
|
|
46
|
-
|
|
45
|
+
api_key="your_api_key",
|
|
47
46
|
question_type=question_type
|
|
48
47
|
)
|
|
49
48
|
print(result)
|
|
@@ -11,13 +11,12 @@ Q1: What is a Python list? Explain with an example.
|
|
|
11
11
|
Q2: Write an SQL query to select all records from a table named 'students'.
|
|
12
12
|
"""
|
|
13
13
|
answer_path = "sample_submissions/student1_answer.py"
|
|
14
|
-
api_keys = ["your_api_key_1", "your_api_key_2"]
|
|
15
14
|
question_type = "python"
|
|
16
15
|
|
|
17
16
|
result = await evaluator.evaluate_from_content(
|
|
18
17
|
question_content=question_content,
|
|
19
18
|
answer_path=answer_path,
|
|
20
|
-
|
|
19
|
+
api_key="your_api_key",
|
|
21
20
|
question_type=question_type
|
|
22
21
|
)
|
|
23
22
|
print(result)
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="QuantumChecker",
|
|
5
|
-
version="0.
|
|
5
|
+
version="0.3.1",
|
|
6
6
|
author="Qobiljon",
|
|
7
7
|
author_email="qobiljonkhayrullayev@gmail.com",
|
|
8
8
|
description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
|
|
@@ -10,20 +10,31 @@ async def main():
|
|
|
10
10
|
Q2: What is the difference between a list and a tuple in Python?
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
|
|
14
13
|
answer_path = "answer/python1.zip"
|
|
15
|
-
|
|
14
|
+
|
|
16
15
|
question_type = "python"
|
|
17
16
|
|
|
18
17
|
result = await evaluator.evaluate_from_content(
|
|
19
18
|
question_content=question_content,
|
|
20
19
|
answer_path=answer_path,
|
|
21
|
-
|
|
20
|
+
api_key="AIzaSyC2B_Q38DkCl6O8y4b5hAWEpb6aJHW6FcY",
|
|
22
21
|
question_type=question_type
|
|
23
22
|
)
|
|
24
23
|
|
|
24
|
+
result2 = await evaluator.evaluate_from_content(
|
|
25
|
+
question_content=question_content,
|
|
26
|
+
answer_path=answer_path,
|
|
27
|
+
api_key="AIzaSyC2B_Q38DkCl6O8y4b5hAWEpb6aJHW6FcY",
|
|
28
|
+
question_type=question_type
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
print("Evaluation Result:")
|
|
32
|
+
print(result["score"])
|
|
33
|
+
print(result["feedback"])
|
|
34
|
+
|
|
25
35
|
print("Evaluation Result:")
|
|
26
|
-
print(
|
|
36
|
+
print(result2["score"])
|
|
37
|
+
print(result2["feedback"])
|
|
27
38
|
|
|
28
39
|
|
|
29
40
|
if __name__ == "__main__":
|
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
import zipfile
|
|
4
|
-
import random
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
from typing import List, Dict, Optional
|
|
7
|
-
from .python_evaluator import PythonEvaluator
|
|
8
|
-
from .sql_evaluator import SQLEvaluator
|
|
9
|
-
from .powerbi_evaluator import PowerBIEvaluator
|
|
10
|
-
from .ssis_evaluator import SSISEvaluator
|
|
11
|
-
import asyncio
|
|
12
|
-
|
|
13
|
-
_logger_cache = {}
|
|
14
|
-
|
|
15
|
-
class HomeworkEvaluator:
|
|
16
|
-
EVALUATOR_REGISTRY = {
|
|
17
|
-
"python": PythonEvaluator,
|
|
18
|
-
"sql": SQLEvaluator,
|
|
19
|
-
"powerbi": PowerBIEvaluator,
|
|
20
|
-
"ssis": SSISEvaluator
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
EXTENSION_TO_TYPE = {
|
|
24
|
-
".py": "python",
|
|
25
|
-
".sql": "sql",
|
|
26
|
-
".pbit": "powerbi",
|
|
27
|
-
".pdf": "powerbi",
|
|
28
|
-
".dtsx": "ssis",
|
|
29
|
-
".DTSX": "ssis",
|
|
30
|
-
".txt": "text",
|
|
31
|
-
".md": "text"
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
API_NAME_MAPPING = {
|
|
35
|
-
"python": "Google Gemini API",
|
|
36
|
-
"sql": "Google Gemini API",
|
|
37
|
-
"powerbi": "Google Gemini API",
|
|
38
|
-
"ssis": "Google Gemini API",
|
|
39
|
-
"text": "Google Gemini API"
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
def __init__(self, log_level: int = logging.INFO):
|
|
43
|
-
self.log_level = log_level
|
|
44
|
-
self._successful_key_cache = {}
|
|
45
|
-
self._rate_limit_delay = {} # Track delay per key
|
|
46
|
-
|
|
47
|
-
def _get_logger(self, log_type: str) -> logging.Logger:
|
|
48
|
-
log_name = f"{log_type}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
|
|
49
|
-
if log_name not in _logger_cache:
|
|
50
|
-
logger = logging.getLogger(log_name)
|
|
51
|
-
logger.setLevel(self.log_level)
|
|
52
|
-
if not logger.handlers:
|
|
53
|
-
handler = logging.StreamHandler()
|
|
54
|
-
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
55
|
-
logger.addHandler(handler)
|
|
56
|
-
_logger_cache[log_name] = logger
|
|
57
|
-
return _logger_cache[log_name]
|
|
58
|
-
|
|
59
|
-
def parse_questions(self, content: str) -> List[str]:
|
|
60
|
-
logger = self._get_logger("QuantumCheck.main")
|
|
61
|
-
questions = [q.strip() for q in content.split("\n\n") if q.strip()]
|
|
62
|
-
logger.info(f"Parsed {len(questions)} questions from content")
|
|
63
|
-
if not questions:
|
|
64
|
-
raise ValueError("No valid questions found in content")
|
|
65
|
-
return questions
|
|
66
|
-
|
|
67
|
-
def _detect_zip_content_type(self, zip_path: str, logger: logging.Logger) -> str:
|
|
68
|
-
try:
|
|
69
|
-
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
70
|
-
extensions = {os.path.splitext(name)[1].lower() for name in zip_ref.namelist()}
|
|
71
|
-
file_types = [self.EXTENSION_TO_TYPE.get(ext, "text") for ext in extensions if ext]
|
|
72
|
-
logger.info(f"Detected extensions in ZIP {zip_path}: {extensions}, types: {file_types}")
|
|
73
|
-
if "python" in file_types:
|
|
74
|
-
logger.info(f"Selected file type: python from extension: .py in ZIP: {zip_path}")
|
|
75
|
-
return "python"
|
|
76
|
-
elif "sql" in file_types:
|
|
77
|
-
logger.info(f"Selected file type: sql from extension: .sql in ZIP: {zip_path}")
|
|
78
|
-
return "sql"
|
|
79
|
-
elif "powerbi" in file_types:
|
|
80
|
-
logger.info(f"Selected file type: powerbi from extension: .pbit or .pdf in ZIP: {zip_path}")
|
|
81
|
-
return "powerbi"
|
|
82
|
-
elif "ssis" in file_types:
|
|
83
|
-
logger.info(f"Selected file type: ssis from extension: .dtsx in ZIP: {zip_path}")
|
|
84
|
-
return "ssis"
|
|
85
|
-
else:
|
|
86
|
-
logger.info(f"Selected file type: text (default) in ZIP: {zip_path}")
|
|
87
|
-
return "text"
|
|
88
|
-
except zipfile.BadZipFile:
|
|
89
|
-
logger.error(f"Invalid ZIP file: {zip_path}")
|
|
90
|
-
raise ValueError(f"Invalid ZIP file: {zip_path}")
|
|
91
|
-
|
|
92
|
-
async def evaluate_from_content(
|
|
93
|
-
self,
|
|
94
|
-
question_content: str,
|
|
95
|
-
answer_path: str,
|
|
96
|
-
api_keys: List[str],
|
|
97
|
-
question_type: str
|
|
98
|
-
) -> Dict[str, any]:
|
|
99
|
-
try:
|
|
100
|
-
questions = self.parse_questions(question_content)
|
|
101
|
-
except ValueError as e:
|
|
102
|
-
logger = self._get_logger("QuantumCheck.main")
|
|
103
|
-
logger.error("Failed to parse question content: %s", str(e))
|
|
104
|
-
return {
|
|
105
|
-
"score": 0,
|
|
106
|
-
"feedback": f"Error parsing question content: {str(e)}",
|
|
107
|
-
"issues": [str(e)],
|
|
108
|
-
"recommendations": [],
|
|
109
|
-
"used_api_key_index": None,
|
|
110
|
-
"used_api_name": None
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
answer_path = answer_path.strip()
|
|
114
|
-
_, ext = os.path.splitext(answer_path)
|
|
115
|
-
ext = ext.lower()
|
|
116
|
-
|
|
117
|
-
# Determine file type, prioritizing question_type for evaluator selection
|
|
118
|
-
if ext == ".zip":
|
|
119
|
-
logger = self._get_logger("zip")
|
|
120
|
-
file_type = self._detect_zip_content_type(answer_path, logger)
|
|
121
|
-
else:
|
|
122
|
-
file_type = self.EXTENSION_TO_TYPE.get(ext, "text")
|
|
123
|
-
logger = self._get_logger(file_type)
|
|
124
|
-
|
|
125
|
-
# Use question_type if provided, else fallback to file_type
|
|
126
|
-
eval_type = question_type if question_type in self.EVALUATOR_REGISTRY else file_type
|
|
127
|
-
logger.info(f"Processing answer_path: {answer_path} with detected file type: {file_type}, evaluation type: {eval_type}")
|
|
128
|
-
|
|
129
|
-
if not os.path.exists(answer_path):
|
|
130
|
-
logger.error(f"Answer file not found: {answer_path}")
|
|
131
|
-
return {
|
|
132
|
-
"score": 0,
|
|
133
|
-
"feedback": f"Answer file not found: {answer_path}",
|
|
134
|
-
"issues": [f"Answer file not found: {answer_path}"],
|
|
135
|
-
"recommendations": [],
|
|
136
|
-
"used_api_key_index": None,
|
|
137
|
-
"used_api_name": None
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
evaluator_class = self.EVALUATOR_REGISTRY.get(eval_type, PythonEvaluator)
|
|
141
|
-
last_error_messages = []
|
|
142
|
-
|
|
143
|
-
# Shuffle keys for load balancing
|
|
144
|
-
key_order = [(i + 1, key) for i, key in enumerate(api_keys)]
|
|
145
|
-
random.shuffle(key_order)
|
|
146
|
-
|
|
147
|
-
# Try cached key with 30% probability to encourage rotation
|
|
148
|
-
cached_key_idx = self._successful_key_cache.get(eval_type)
|
|
149
|
-
if cached_key_idx is not None and cached_key_idx < len(api_keys) and random.random() < 0.3:
|
|
150
|
-
key_order.insert(0, (cached_key_idx + 1, api_keys[cached_key_idx]))
|
|
151
|
-
|
|
152
|
-
for idx, key in key_order:
|
|
153
|
-
# Check rate limit delay
|
|
154
|
-
if key in self._rate_limit_delay:
|
|
155
|
-
delay_until = self._rate_limit_delay[key]
|
|
156
|
-
current_time = datetime.now()
|
|
157
|
-
delay_until_time = datetime.fromtimestamp(delay_until)
|
|
158
|
-
if current_time < delay_until_time:
|
|
159
|
-
logger.info(f"API key #{idx} is rate-limited until {delay_until_time}, skipping.")
|
|
160
|
-
continue
|
|
161
|
-
else:
|
|
162
|
-
del self._rate_limit_delay[key]
|
|
163
|
-
|
|
164
|
-
logger.info(f"Trying API key #{idx}")
|
|
165
|
-
evaluator = evaluator_class(key)
|
|
166
|
-
api_name = getattr(evaluator, 'get_api_name', lambda: self.API_NAME_MAPPING.get(eval_type, "Unknown API"))()
|
|
167
|
-
logger.info(f"Using API: {api_name} for evaluation type: {eval_type}")
|
|
168
|
-
|
|
169
|
-
try:
|
|
170
|
-
evaluation = evaluator.evaluate(questions, answer_path, temp_dir=f"temp_extract_{os.getpid()}_{idx}")
|
|
171
|
-
|
|
172
|
-
feedback = evaluation.get("feedback", "").lower()
|
|
173
|
-
issues = " ".join(evaluation.get("issues", [])).lower()
|
|
174
|
-
|
|
175
|
-
# Check for invalid API key
|
|
176
|
-
if any(phrase in feedback or phrase in issues for phrase in ["api key not valid", "api_key_invalid"]):
|
|
177
|
-
logger.warning(f"API key #{idx} invalid, trying next key.")
|
|
178
|
-
last_error_messages.append(f"API key #{idx} invalid.")
|
|
179
|
-
continue
|
|
180
|
-
|
|
181
|
-
# Check for rate limit errors
|
|
182
|
-
if any(phrase in feedback or phrase in issues for phrase in ["429", "too many requests", "rate limit"]):
|
|
183
|
-
logger.warning(f"API key #{idx} hit rate limit, applying delay.")
|
|
184
|
-
last_error_messages.append(f"API key #{idx} rate limited.")
|
|
185
|
-
self._rate_limit_delay[key] = datetime.now().timestamp() + 45 # 45s delay
|
|
186
|
-
continue
|
|
187
|
-
|
|
188
|
-
# Check for invalid evaluation
|
|
189
|
-
if evaluation.get("score", 0) == 0 and "evaluation not returned" in feedback:
|
|
190
|
-
logger.warning(f"API key #{idx} returned invalid evaluation, trying next key.")
|
|
191
|
-
last_error_messages.append(f"API key #{idx} returned invalid evaluation.")
|
|
192
|
-
continue
|
|
193
|
-
|
|
194
|
-
# Cache successful key
|
|
195
|
-
self._successful_key_cache[eval_type] = idx - 1
|
|
196
|
-
logger.info(f"Evaluation succeeded with API key #{idx}: Score = {evaluation.get('score')}")
|
|
197
|
-
|
|
198
|
-
return {
|
|
199
|
-
"score": evaluation.get("score", 0),
|
|
200
|
-
"feedback": evaluation.get("feedback", "No feedback provided"),
|
|
201
|
-
"issues": evaluation.get("issues", []),
|
|
202
|
-
"recommendations": evaluation.get("recommendations", []),
|
|
203
|
-
"used_api_key_index": idx,
|
|
204
|
-
"used_api_name": api_name
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
except Exception as e:
|
|
208
|
-
logger.error(f"Exception using API key #{idx}: {str(e)}")
|
|
209
|
-
last_error_messages.append(f"Exception with key #{idx}: {str(e)}")
|
|
210
|
-
if "429" in str(e) or "rate limit" in str(e).lower():
|
|
211
|
-
self._rate_limit_delay[key] = datetime.now().timestamp() + 45
|
|
212
|
-
continue
|
|
213
|
-
|
|
214
|
-
logger.error("Evaluation failed with all API keys.")
|
|
215
|
-
return {
|
|
216
|
-
"score": 0,
|
|
217
|
-
"feedback": "Evaluation failed with all API keys.",
|
|
218
|
-
"issues": last_error_messages if last_error_messages else ["All API keys failed to evaluate the submission."],
|
|
219
|
-
"recommendations": [],
|
|
220
|
-
"used_api_key_index": None,
|
|
221
|
-
"used_api_name": None
|
|
222
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|