QuantumChecker 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quantumchecker-0.2.9/PKG-INFO +53 -0
- quantumchecker-0.2.9/QuantumCheck/main.py +222 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumCheck/powerbi_evaluator.py +44 -47
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumCheck/prompts.py +41 -61
- quantumchecker-0.2.9/QuantumCheck/python_evaluator.py +217 -0
- quantumchecker-0.2.9/QuantumCheck/sql_evaluator.py +219 -0
- quantumchecker-0.2.9/QuantumCheck/ssis_evaluator.py +403 -0
- quantumchecker-0.2.9/QuantumChecker.egg-info/PKG-INFO +53 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/SOURCES.txt +2 -1
- quantumchecker-0.2.9/README.md +27 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/setup.py +1 -1
- quantumchecker-0.2.9/tests/test.py +135 -0
- quantumchecker-0.2.9/tests/test2.py +30 -0
- quantumchecker-0.2.7/PKG-INFO +0 -34
- quantumchecker-0.2.7/QuantumCheck/main.py +0 -125
- quantumchecker-0.2.7/QuantumCheck/python_evaluator.py +0 -95
- quantumchecker-0.2.7/QuantumCheck/sql_evaluator.py +0 -97
- quantumchecker-0.2.7/QuantumCheck/ssis_evaluator.py +0 -136
- quantumchecker-0.2.7/QuantumChecker.egg-info/PKG-INFO +0 -34
- quantumchecker-0.2.7/README.md +0 -8
- quantumchecker-0.2.7/tests/test.py +0 -31
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/requires.txt +0 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.2.7 → quantumchecker-0.2.9}/setup.cfg +0 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuantumChecker
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
|
+
Author: Qobiljon
|
|
6
|
+
Author-email: qobiljonkhayrullayev@gmail.com
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.6
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: requests>=2.31.0
|
|
13
|
+
Requires-Dist: tenacity>=8.2.3
|
|
14
|
+
Requires-Dist: pdf2image>=1.16.3
|
|
15
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
16
|
+
Requires-Dist: Pillow>=10.0.0
|
|
17
|
+
Requires-Dist: PyPDF2>=3.0.1
|
|
18
|
+
Dynamic: author
|
|
19
|
+
Dynamic: author-email
|
|
20
|
+
Dynamic: classifier
|
|
21
|
+
Dynamic: description
|
|
22
|
+
Dynamic: description-content-type
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
Sample usage:
|
|
28
|
+
```
|
|
29
|
+
import asyncio
|
|
30
|
+
from your_evaluator_module import HomeworkEvaluator
|
|
31
|
+
|
|
32
|
+
async def main():
|
|
33
|
+
evaluator = HomeworkEvaluator()
|
|
34
|
+
question_content = """
|
|
35
|
+
Q1: What is a Python list? Explain with an example.
|
|
36
|
+
|
|
37
|
+
Q2: Write an SQL query to select all records from a table named 'students'.
|
|
38
|
+
"""
|
|
39
|
+
answer_path = "sample_submissions/student1_answer.py"
|
|
40
|
+
api_keys = ["your_api_key_1", "your_api_key_2"]
|
|
41
|
+
question_type = "python"
|
|
42
|
+
|
|
43
|
+
result = await evaluator.evaluate_from_content(
|
|
44
|
+
question_content=question_content,
|
|
45
|
+
answer_path=answer_path,
|
|
46
|
+
api_keys=api_keys,
|
|
47
|
+
question_type=question_type
|
|
48
|
+
)
|
|
49
|
+
print(result)
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
asyncio.run(main())
|
|
53
|
+
```
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import zipfile
|
|
4
|
+
import random
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import List, Dict, Optional
|
|
7
|
+
from .python_evaluator import PythonEvaluator
|
|
8
|
+
from .sql_evaluator import SQLEvaluator
|
|
9
|
+
from .powerbi_evaluator import PowerBIEvaluator
|
|
10
|
+
from .ssis_evaluator import SSISEvaluator
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
_logger_cache = {}
|
|
14
|
+
|
|
15
|
+
class HomeworkEvaluator:
|
|
16
|
+
EVALUATOR_REGISTRY = {
|
|
17
|
+
"python": PythonEvaluator,
|
|
18
|
+
"sql": SQLEvaluator,
|
|
19
|
+
"powerbi": PowerBIEvaluator,
|
|
20
|
+
"ssis": SSISEvaluator
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
EXTENSION_TO_TYPE = {
|
|
24
|
+
".py": "python",
|
|
25
|
+
".sql": "sql",
|
|
26
|
+
".pbit": "powerbi",
|
|
27
|
+
".pdf": "powerbi",
|
|
28
|
+
".dtsx": "ssis",
|
|
29
|
+
".DTSX": "ssis",
|
|
30
|
+
".txt": "text",
|
|
31
|
+
".md": "text"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
API_NAME_MAPPING = {
|
|
35
|
+
"python": "Google Gemini API",
|
|
36
|
+
"sql": "Google Gemini API",
|
|
37
|
+
"powerbi": "Google Gemini API",
|
|
38
|
+
"ssis": "Google Gemini API",
|
|
39
|
+
"text": "Google Gemini API"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def __init__(self, log_level: int = logging.INFO):
|
|
43
|
+
self.log_level = log_level
|
|
44
|
+
self._successful_key_cache = {}
|
|
45
|
+
self._rate_limit_delay = {} # Track delay per key
|
|
46
|
+
|
|
47
|
+
def _get_logger(self, log_type: str) -> logging.Logger:
|
|
48
|
+
log_name = f"{log_type}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
|
|
49
|
+
if log_name not in _logger_cache:
|
|
50
|
+
logger = logging.getLogger(log_name)
|
|
51
|
+
logger.setLevel(self.log_level)
|
|
52
|
+
if not logger.handlers:
|
|
53
|
+
handler = logging.StreamHandler()
|
|
54
|
+
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
55
|
+
logger.addHandler(handler)
|
|
56
|
+
_logger_cache[log_name] = logger
|
|
57
|
+
return _logger_cache[log_name]
|
|
58
|
+
|
|
59
|
+
def parse_questions(self, content: str) -> List[str]:
|
|
60
|
+
logger = self._get_logger("QuantumCheck.main")
|
|
61
|
+
questions = [q.strip() for q in content.split("\n\n") if q.strip()]
|
|
62
|
+
logger.info(f"Parsed {len(questions)} questions from content")
|
|
63
|
+
if not questions:
|
|
64
|
+
raise ValueError("No valid questions found in content")
|
|
65
|
+
return questions
|
|
66
|
+
|
|
67
|
+
def _detect_zip_content_type(self, zip_path: str, logger: logging.Logger) -> str:
|
|
68
|
+
try:
|
|
69
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
70
|
+
extensions = {os.path.splitext(name)[1].lower() for name in zip_ref.namelist()}
|
|
71
|
+
file_types = [self.EXTENSION_TO_TYPE.get(ext, "text") for ext in extensions if ext]
|
|
72
|
+
logger.info(f"Detected extensions in ZIP {zip_path}: {extensions}, types: {file_types}")
|
|
73
|
+
if "python" in file_types:
|
|
74
|
+
logger.info(f"Selected file type: python from extension: .py in ZIP: {zip_path}")
|
|
75
|
+
return "python"
|
|
76
|
+
elif "sql" in file_types:
|
|
77
|
+
logger.info(f"Selected file type: sql from extension: .sql in ZIP: {zip_path}")
|
|
78
|
+
return "sql"
|
|
79
|
+
elif "powerbi" in file_types:
|
|
80
|
+
logger.info(f"Selected file type: powerbi from extension: .pbit or .pdf in ZIP: {zip_path}")
|
|
81
|
+
return "powerbi"
|
|
82
|
+
elif "ssis" in file_types:
|
|
83
|
+
logger.info(f"Selected file type: ssis from extension: .dtsx in ZIP: {zip_path}")
|
|
84
|
+
return "ssis"
|
|
85
|
+
else:
|
|
86
|
+
logger.info(f"Selected file type: text (default) in ZIP: {zip_path}")
|
|
87
|
+
return "text"
|
|
88
|
+
except zipfile.BadZipFile:
|
|
89
|
+
logger.error(f"Invalid ZIP file: {zip_path}")
|
|
90
|
+
raise ValueError(f"Invalid ZIP file: {zip_path}")
|
|
91
|
+
|
|
92
|
+
async def evaluate_from_content(
|
|
93
|
+
self,
|
|
94
|
+
question_content: str,
|
|
95
|
+
answer_path: str,
|
|
96
|
+
api_keys: List[str],
|
|
97
|
+
question_type: str
|
|
98
|
+
) -> Dict[str, any]:
|
|
99
|
+
try:
|
|
100
|
+
questions = self.parse_questions(question_content)
|
|
101
|
+
except ValueError as e:
|
|
102
|
+
logger = self._get_logger("QuantumCheck.main")
|
|
103
|
+
logger.error("Failed to parse question content: %s", str(e))
|
|
104
|
+
return {
|
|
105
|
+
"score": 0,
|
|
106
|
+
"feedback": f"Error parsing question content: {str(e)}",
|
|
107
|
+
"issues": [str(e)],
|
|
108
|
+
"recommendations": [],
|
|
109
|
+
"used_api_key_index": None,
|
|
110
|
+
"used_api_name": None
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
answer_path = answer_path.strip()
|
|
114
|
+
_, ext = os.path.splitext(answer_path)
|
|
115
|
+
ext = ext.lower()
|
|
116
|
+
|
|
117
|
+
# Determine file type, prioritizing question_type for evaluator selection
|
|
118
|
+
if ext == ".zip":
|
|
119
|
+
logger = self._get_logger("zip")
|
|
120
|
+
file_type = self._detect_zip_content_type(answer_path, logger)
|
|
121
|
+
else:
|
|
122
|
+
file_type = self.EXTENSION_TO_TYPE.get(ext, "text")
|
|
123
|
+
logger = self._get_logger(file_type)
|
|
124
|
+
|
|
125
|
+
# Use question_type if provided, else fallback to file_type
|
|
126
|
+
eval_type = question_type if question_type in self.EVALUATOR_REGISTRY else file_type
|
|
127
|
+
logger.info(f"Processing answer_path: {answer_path} with detected file type: {file_type}, evaluation type: {eval_type}")
|
|
128
|
+
|
|
129
|
+
if not os.path.exists(answer_path):
|
|
130
|
+
logger.error(f"Answer file not found: {answer_path}")
|
|
131
|
+
return {
|
|
132
|
+
"score": 0,
|
|
133
|
+
"feedback": f"Answer file not found: {answer_path}",
|
|
134
|
+
"issues": [f"Answer file not found: {answer_path}"],
|
|
135
|
+
"recommendations": [],
|
|
136
|
+
"used_api_key_index": None,
|
|
137
|
+
"used_api_name": None
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
evaluator_class = self.EVALUATOR_REGISTRY.get(eval_type, PythonEvaluator)
|
|
141
|
+
last_error_messages = []
|
|
142
|
+
|
|
143
|
+
# Shuffle keys for load balancing
|
|
144
|
+
key_order = [(i + 1, key) for i, key in enumerate(api_keys)]
|
|
145
|
+
random.shuffle(key_order)
|
|
146
|
+
|
|
147
|
+
# Try cached key with 30% probability to encourage rotation
|
|
148
|
+
cached_key_idx = self._successful_key_cache.get(eval_type)
|
|
149
|
+
if cached_key_idx is not None and cached_key_idx < len(api_keys) and random.random() < 0.3:
|
|
150
|
+
key_order.insert(0, (cached_key_idx + 1, api_keys[cached_key_idx]))
|
|
151
|
+
|
|
152
|
+
for idx, key in key_order:
|
|
153
|
+
# Check rate limit delay
|
|
154
|
+
if key in self._rate_limit_delay:
|
|
155
|
+
delay_until = self._rate_limit_delay[key]
|
|
156
|
+
current_time = datetime.now()
|
|
157
|
+
delay_until_time = datetime.fromtimestamp(delay_until)
|
|
158
|
+
if current_time < delay_until_time:
|
|
159
|
+
logger.info(f"API key #{idx} is rate-limited until {delay_until_time}, skipping.")
|
|
160
|
+
continue
|
|
161
|
+
else:
|
|
162
|
+
del self._rate_limit_delay[key]
|
|
163
|
+
|
|
164
|
+
logger.info(f"Trying API key #{idx}")
|
|
165
|
+
evaluator = evaluator_class(key)
|
|
166
|
+
api_name = getattr(evaluator, 'get_api_name', lambda: self.API_NAME_MAPPING.get(eval_type, "Unknown API"))()
|
|
167
|
+
logger.info(f"Using API: {api_name} for evaluation type: {eval_type}")
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
evaluation = evaluator.evaluate(questions, answer_path, temp_dir=f"temp_extract_{os.getpid()}_{idx}")
|
|
171
|
+
|
|
172
|
+
feedback = evaluation.get("feedback", "").lower()
|
|
173
|
+
issues = " ".join(evaluation.get("issues", [])).lower()
|
|
174
|
+
|
|
175
|
+
# Check for invalid API key
|
|
176
|
+
if any(phrase in feedback or phrase in issues for phrase in ["api key not valid", "api_key_invalid"]):
|
|
177
|
+
logger.warning(f"API key #{idx} invalid, trying next key.")
|
|
178
|
+
last_error_messages.append(f"API key #{idx} invalid.")
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# Check for rate limit errors
|
|
182
|
+
if any(phrase in feedback or phrase in issues for phrase in ["429", "too many requests", "rate limit"]):
|
|
183
|
+
logger.warning(f"API key #{idx} hit rate limit, applying delay.")
|
|
184
|
+
last_error_messages.append(f"API key #{idx} rate limited.")
|
|
185
|
+
self._rate_limit_delay[key] = datetime.now().timestamp() + 45 # 45s delay
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# Check for invalid evaluation
|
|
189
|
+
if evaluation.get("score", 0) == 0 and "evaluation not returned" in feedback:
|
|
190
|
+
logger.warning(f"API key #{idx} returned invalid evaluation, trying next key.")
|
|
191
|
+
last_error_messages.append(f"API key #{idx} returned invalid evaluation.")
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# Cache successful key
|
|
195
|
+
self._successful_key_cache[eval_type] = idx - 1
|
|
196
|
+
logger.info(f"Evaluation succeeded with API key #{idx}: Score = {evaluation.get('score')}")
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
"score": evaluation.get("score", 0),
|
|
200
|
+
"feedback": evaluation.get("feedback", "No feedback provided"),
|
|
201
|
+
"issues": evaluation.get("issues", []),
|
|
202
|
+
"recommendations": evaluation.get("recommendations", []),
|
|
203
|
+
"used_api_key_index": idx,
|
|
204
|
+
"used_api_name": api_name
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.error(f"Exception using API key #{idx}: {str(e)}")
|
|
209
|
+
last_error_messages.append(f"Exception with key #{idx}: {str(e)}")
|
|
210
|
+
if "429" in str(e) or "rate limit" in str(e).lower():
|
|
211
|
+
self._rate_limit_delay[key] = datetime.now().timestamp() + 45
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
logger.error("Evaluation failed with all API keys.")
|
|
215
|
+
return {
|
|
216
|
+
"score": 0,
|
|
217
|
+
"feedback": "Evaluation failed with all API keys.",
|
|
218
|
+
"issues": last_error_messages if last_error_messages else ["All API keys failed to evaluate the submission."],
|
|
219
|
+
"recommendations": [],
|
|
220
|
+
"used_api_key_index": None,
|
|
221
|
+
"used_api_name": None
|
|
222
|
+
}
|
|
@@ -15,7 +15,6 @@ import io
|
|
|
15
15
|
import base64
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
# Placeholder for prompts.py content
|
|
19
18
|
def prompt_text_powerbi(combined_content: str) -> str:
|
|
20
19
|
return f"""
|
|
21
20
|
Evaluate the following Power BI DAX question-answer pairs for correctness, clarity, and appropriateness.
|
|
@@ -46,19 +45,20 @@ class GeminiFlashModel:
|
|
|
46
45
|
self.model_name = model_name
|
|
47
46
|
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
|
|
48
47
|
|
|
49
|
-
@retry(
|
|
50
|
-
|
|
48
|
+
@retry(
|
|
49
|
+
stop=stop_after_attempt(3),
|
|
50
|
+
wait=wait_exponential(min=4, max=10),
|
|
51
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
52
|
+
)
|
|
51
53
|
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
52
54
|
logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
|
|
53
55
|
combined_content = "\n\n".join(
|
|
54
56
|
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
55
57
|
for i, qa in enumerate(question_answer_pairs, 1)
|
|
56
58
|
)
|
|
57
|
-
|
|
58
59
|
headers = {"Content-Type": "application/json"}
|
|
59
60
|
data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
|
|
60
61
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
61
|
-
|
|
62
62
|
if response.status_code != 200:
|
|
63
63
|
logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
64
64
|
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
@@ -69,8 +69,11 @@ class GeminiFlashModel:
|
|
|
69
69
|
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
70
70
|
return self._parse_response(generated_text)
|
|
71
71
|
|
|
72
|
-
@retry(
|
|
73
|
-
|
|
72
|
+
@retry(
|
|
73
|
+
stop=stop_after_attempt(3),
|
|
74
|
+
wait=wait_exponential(min=4, max=10),
|
|
75
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
76
|
+
)
|
|
74
77
|
def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
|
|
75
78
|
folder_path = Path(image_folder)
|
|
76
79
|
images = list(folder_path.glob("*.png"))[:3]
|
|
@@ -80,12 +83,12 @@ class GeminiFlashModel:
|
|
|
80
83
|
"Evaluate the Power BI report visuals based on the provided task. The visuals are professional dashboards designed for enterprise use.\n\n"
|
|
81
84
|
f"Task: {question}\n\n"
|
|
82
85
|
f"Screenshots: {[str(img.name) for img in images]}\n\n"
|
|
83
|
-
"Evaluate based on the following criteria, assigning a score out of 100
|
|
86
|
+
"Evaluate based on the following criteria, assigning a score out of 100:\n"
|
|
84
87
|
"- Clarity (30%): Are visuals clear, with readable labels, titles, and legends?\n"
|
|
85
88
|
"- Appropriateness (30%): Are chart types (e.g., bar, line, pie) suitable for the data and task?\n"
|
|
86
89
|
"- Color Usage (20%): Are colors consistent, accessible, and visually appealing? Consider contrast and colorblind accessibility.\n"
|
|
87
90
|
"- Interactivity (20%): Do visible slicers, filters, or tooltips enhance usability and data exploration?\n\n"
|
|
88
|
-
"Provide a score
|
|
91
|
+
"Provide a score for overall quality, considering the enterprise context. Avoid overly harsh penalties for minor issues.\n"
|
|
89
92
|
"Provide concise, supportive feedback for beginners, highlighting strengths and areas for improvement.\n\n"
|
|
90
93
|
"Structure the response as:\n"
|
|
91
94
|
"Score: [SCORE]/100\n"
|
|
@@ -231,9 +234,7 @@ class PowerBIProcessor:
|
|
|
231
234
|
measures.append({
|
|
232
235
|
"Table": table["name"],
|
|
233
236
|
"Name": measure["name"],
|
|
234
|
-
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
235
|
-
list) else measure.get(
|
|
236
|
-
"expression", ""),
|
|
237
|
+
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"), list) else measure.get("expression", ""),
|
|
237
238
|
"FormatString": measure.get("formatString", "")
|
|
238
239
|
})
|
|
239
240
|
return measures
|
|
@@ -242,19 +243,31 @@ class PowerBIProcessor:
|
|
|
242
243
|
def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
|
|
243
244
|
table_info = []
|
|
244
245
|
for table in tables:
|
|
245
|
-
columns = [
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
246
|
+
columns = [
|
|
247
|
+
{
|
|
248
|
+
"Column Name": col["name"],
|
|
249
|
+
"Data Type": col.get("dataType", "Unknown"),
|
|
250
|
+
"Source Column": col.get("sourceColumn", "N/A"),
|
|
251
|
+
"Calculated": col.get("type") == "calculated"
|
|
252
|
+
}
|
|
253
|
+
for col in table.get("columns", [])
|
|
254
|
+
]
|
|
255
|
+
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if part["source"].get("expression")]
|
|
250
256
|
table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
|
|
251
257
|
return table_info
|
|
252
258
|
|
|
253
259
|
@staticmethod
|
|
254
260
|
def _get_relationships(relationships: List[Dict]) -> List[Dict]:
|
|
255
|
-
return [
|
|
256
|
-
|
|
257
|
-
|
|
261
|
+
return [
|
|
262
|
+
{
|
|
263
|
+
"From Table": rel["fromTable"],
|
|
264
|
+
"From Column": rel["fromColumn"],
|
|
265
|
+
"To Table": rel["toTable"],
|
|
266
|
+
"To Column": rel["toColumn"],
|
|
267
|
+
"Join Behavior": rel.get("joinOnDateBehavior", "N/A")
|
|
268
|
+
}
|
|
269
|
+
for rel in relationships
|
|
270
|
+
]
|
|
258
271
|
|
|
259
272
|
@staticmethod
|
|
260
273
|
def _cleanup(*paths: str):
|
|
@@ -272,15 +285,13 @@ class PowerBIEvaluator:
|
|
|
272
285
|
self.model = GeminiFlashModel(api_key)
|
|
273
286
|
self.processor = PowerBIProcessor()
|
|
274
287
|
|
|
275
|
-
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
288
|
+
def evaluate(self, questions: List[str], answer_path: str, temp_dir: str = "temp_extract") -> Dict[str, any]:
|
|
276
289
|
try:
|
|
277
290
|
_, ext = os.path.splitext(answer_path)
|
|
278
291
|
ext = ext.lower()
|
|
279
|
-
extract_path =
|
|
292
|
+
extract_path = temp_dir
|
|
280
293
|
pbit_path = None
|
|
281
294
|
pdf_path = None
|
|
282
|
-
|
|
283
|
-
# Handle input file type
|
|
284
295
|
if ext == ".zip":
|
|
285
296
|
pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
|
|
286
297
|
elif ext == ".pbit":
|
|
@@ -296,61 +307,47 @@ class PowerBIEvaluator:
|
|
|
296
307
|
"dax_score": 0,
|
|
297
308
|
"visual_score": 0
|
|
298
309
|
}
|
|
299
|
-
|
|
300
310
|
try:
|
|
301
|
-
# Extract and process the data model from .pbit
|
|
302
311
|
data_model = self.processor.extract_datamodel(pbit_path)
|
|
303
312
|
model_data = self.processor.extract_model_data(data_model)
|
|
304
313
|
answers = [json.dumps(model_data)] * len(questions)
|
|
305
314
|
dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
306
|
-
|
|
307
|
-
# Initialize result with DAX evaluation
|
|
308
315
|
result = {
|
|
309
316
|
"score": 0,
|
|
310
317
|
"feedback": f"DAX Feedback:\n{dax_result['feedback']}",
|
|
311
318
|
"issues": dax_result["issues"],
|
|
312
319
|
"recommendations": dax_result["recommendations"],
|
|
313
|
-
"dax_score": dax_result["score"],
|
|
314
|
-
"visual_score": 0
|
|
320
|
+
"dax_score": dax_result["score"],
|
|
321
|
+
"visual_score": 0
|
|
315
322
|
}
|
|
316
|
-
|
|
317
|
-
# Process PDF and evaluate visuals if present
|
|
318
323
|
if pdf_path:
|
|
319
324
|
try:
|
|
320
|
-
self.processor.process_pdf(pdf_path)
|
|
321
|
-
visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
|
|
322
|
-
# Apply 70% DAX, 30% visuals scoring
|
|
325
|
+
image_paths = self.processor.process_pdf(pdf_path, output_dir=os.path.join(temp_dir, "outputimages"))
|
|
326
|
+
visual_result = self.model.evaluate_visuals(questions[0], os.path.join(temp_dir, "outputimages"))
|
|
323
327
|
result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
|
|
324
|
-
result["visual_score"] = visual_result["score"]
|
|
328
|
+
result["visual_score"] = visual_result["score"]
|
|
325
329
|
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
326
330
|
result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
|
|
327
331
|
result["recommendations"].extend(visual_result.get("recommendations", []))
|
|
328
332
|
except ProcessingError as e:
|
|
329
333
|
logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
|
|
330
|
-
# Use DAX score only, weighted at 100% if no visuals
|
|
331
334
|
result["score"] = dax_result["score"]
|
|
332
335
|
result["issues"].append(f"Visual evaluation skipped: {str(e)}")
|
|
333
|
-
result["recommendations"].append(
|
|
334
|
-
"Ensure a valid PDF is provided for visual evaluation if intended")
|
|
336
|
+
result["recommendations"].append("Ensure a valid PDF is provided for visual evaluation if intended")
|
|
335
337
|
else:
|
|
336
|
-
# No PDF provided, use DAX score only
|
|
337
338
|
result["score"] = dax_result["score"]
|
|
338
339
|
result["feedback"] += "\n\nVisual Feedback:\nNo visuals provided for evaluation."
|
|
339
340
|
result["issues"].append("No PDF provided for visual evaluation")
|
|
340
341
|
result["recommendations"].append("Include a PDF with report visuals for complete evaluation")
|
|
341
|
-
|
|
342
|
-
# Print scores with text labels to terminal
|
|
343
342
|
logger.info("[DAX] Score: %d/100", result["dax_score"])
|
|
344
343
|
logger.info("[Visual] Score: %d/100", result["visual_score"])
|
|
345
344
|
logger.info("[Final] Score (70%% DAX, 30%% Visuals): %d/100", result["score"])
|
|
346
|
-
|
|
347
345
|
return result
|
|
348
346
|
finally:
|
|
349
|
-
|
|
350
|
-
self.processor._cleanup(extract_path, "outputimages")
|
|
347
|
+
self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
|
|
351
348
|
except Exception as e:
|
|
352
349
|
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
353
|
-
self.processor._cleanup(extract_path, "outputimages")
|
|
350
|
+
self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
|
|
354
351
|
return {
|
|
355
352
|
"score": 0,
|
|
356
353
|
"feedback": f"Error processing file: {str(e)}",
|
|
@@ -362,4 +359,4 @@ class PowerBIEvaluator:
|
|
|
362
359
|
|
|
363
360
|
|
|
364
361
|
class ProcessingError(Exception):
|
|
365
|
-
pass
|
|
362
|
+
pass
|
|
@@ -8,10 +8,10 @@ def prompt_text_python(combined_content):
|
|
|
8
8
|
"- Highlight both strengths and areas for improvement\n"
|
|
9
9
|
"- Identify major mistakes or misunderstandings (e.g., syntax errors, incorrect logic, missing components and conceptual part)\n"
|
|
10
10
|
"- Be concise but insightful\n\n"
|
|
11
|
-
"- If the student's answer is incomplete or too simplistic to fully address the question, "
|
|
11
|
+
"- If the student's answer is incomplete or too simplistic to fully address the question, you should decrease the mark for the missing answers"
|
|
12
12
|
"explain that the response lacks depth or coverage, but do not provide the missing or correct answer. "
|
|
13
13
|
"Encourage the student to research further or review the relevant concepts.\n"
|
|
14
|
-
"- If the student's submission is off-topic or unrelated to the question, "
|
|
14
|
+
"- If the student's submission is off-topic or unrelated to the question, give exatly 20 mark and "
|
|
15
15
|
"clearly state that the response does not address the question's requirements and "
|
|
16
16
|
"explain why it is irrelevant. Encourage the student to review the question carefully and "
|
|
17
17
|
"focus on the relevant Python concepts without providing the correct solution."
|
|
@@ -32,8 +32,6 @@ def prompt_text_python(combined_content):
|
|
|
32
32
|
"- Be honest but supportive\n"
|
|
33
33
|
"- Include specific examples from the provided answers if helpful\n"
|
|
34
34
|
"- Keep language beginner-friendly\n"
|
|
35
|
-
"- Do not give too low marks. You may add from 20 up to 25 additional marks for effort or "
|
|
36
|
-
"partial relevance, ensuring the score does not exceed 100."
|
|
37
35
|
)
|
|
38
36
|
|
|
39
37
|
|
|
@@ -57,7 +55,7 @@ def prompt_text_sql(combined_content: str):
|
|
|
57
55
|
"clearly state that it lacks sufficient detail or misses key components, but do not provide "
|
|
58
56
|
"the missing parts or solutions. Instead, suggest they revisit the relevant "
|
|
59
57
|
"concepts (e.g., joins, subqueries, indexing, if lacks) and encourage deeper exploration.\n"
|
|
60
|
-
"- If the student's submission is off-topic or unrelated to the question, "
|
|
58
|
+
"- If the student's submission is off-topic or unrelated to the question, give exactly 20 mark and "
|
|
61
59
|
"clearly state that the response does not address the question's requirements and "
|
|
62
60
|
"explain why it is irrelevant. Encourage the student to review the "
|
|
63
61
|
"question carefully and focus on the relevant SQL Server concepts without providing the correct solution."
|
|
@@ -76,6 +74,7 @@ def prompt_text_sql(combined_content: str):
|
|
|
76
74
|
f"{combined_content}\n"
|
|
77
75
|
"=== EVALUATION COMPLETE ===\n\n"
|
|
78
76
|
"Notes:\n"
|
|
77
|
+
"If question about other technology for example python then it is clearly off topic and should get 20 mark"
|
|
79
78
|
"- Be honest but supportive\n"
|
|
80
79
|
"- Include specific examples from the provided answers if helpful\n"
|
|
81
80
|
"- Keep language beginner-friendly\n"
|
|
@@ -83,62 +82,43 @@ def prompt_text_sql(combined_content: str):
|
|
|
83
82
|
"effort or partial relevance, ensuring the score does not exceed 100."
|
|
84
83
|
)
|
|
85
84
|
|
|
86
|
-
def prompt_text_ssis(combined_content):
|
|
85
|
+
def prompt_text_ssis(combined_content: str) -> str:
|
|
87
86
|
return (
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
"Provide feedback in this format:\n\n"
|
|
124
|
-
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
125
|
-
"OVERALL SCORE: <score>/100\n\n"
|
|
126
|
-
"FEEDBACK SUMMARY:\n"
|
|
127
|
-
"- What was done well\n"
|
|
128
|
-
"- What needs improvement\n"
|
|
129
|
-
"- Any major issues (e.g., logic errors, misunderstandings, incomplete solutions)\n\n"
|
|
130
|
-
"KEY ADVICE:\n"
|
|
131
|
-
"- Top 2-3 suggestions to improve SSIS skills\n"
|
|
132
|
-
"- Concepts to revisit\n"
|
|
133
|
-
"- Encouragement to keep learning and improving\n\n"
|
|
134
|
-
f"{combined_content}\n"
|
|
135
|
-
"=== EVALUATION COMPLETE ===\n\n"
|
|
136
|
-
"Notes:\n"
|
|
137
|
-
"- Be honest but supportive\n"
|
|
138
|
-
"- Include specific examples from the provided summary if helpful\n"
|
|
139
|
-
"- Keep language beginner-friendly\n"
|
|
140
|
-
"- Do not give too low marks. From 5 up to 10 additional marks for effort or partial relevance, ensuring the score does not exceed 100."
|
|
141
|
-
)
|
|
87
|
+
|
|
88
|
+
"You are an SSIS data engineer evaluating a beginner-level SSIS package submission (1–2 months experience).\n\n"
|
|
89
|
+
"Evaluation Criteria:\n"
|
|
90
|
+
"- Assess correct and relevant use of SSIS components: Connection Managers, Control Flow tasks (e.g., Execute SQL Task), Data Flow tasks (e.g., Flat File Source to OLE DB Destination).\n"
|
|
91
|
+
"- Check if the submission attempts to solve the task using SSIS packages (.dtsx) and related concepts.\n"
|
|
92
|
+
"- Confirm proper linking of components and appropriate use of data types.\n"
|
|
93
|
+
"- Consider clarity, effort, and completeness.\n"
|
|
94
|
+
"- If scheduling (e.g., SQL Server Agent Job) is missing, note it but deduct no more than 5 points.\n\n"
|
|
95
|
+
"**STRICT RULE ON OFF-TOPIC SUBMISSIONS:**\n"
|
|
96
|
+
"- If the submission is off-topic (e.g., Python scripts, SQL queries, Power BI reports, or anything NOT an SSIS package or SSIS-related), assign exactly 20/100 points.\n"
|
|
97
|
+
"- Do NOT give any additional points or feedback related to SSIS components.\n"
|
|
98
|
+
"- Clearly state in feedback that the submission does not address the SSIS package requirement and advise focusing on SSIS for this task.\n\n"
|
|
99
|
+
"Scoring Guidelines:\n"
|
|
100
|
+
"- Begin with a baseline of 60/100 for any reasonable SSIS attempt.\n"
|
|
101
|
+
"- Add 5–10 points for extra effort or partial correctness.\n"
|
|
102
|
+
"- Never exceed 100 points.\n"
|
|
103
|
+
"- Always reward genuine effort unless off-topic.\n\n"
|
|
104
|
+
"Feedback Format:\n"
|
|
105
|
+
"=== COMPREHENSIVE EVALUATION ===\n"
|
|
106
|
+
"OVERALL SCORE: <score>/100\n\n"
|
|
107
|
+
"FEEDBACK SUMMARY:\n"
|
|
108
|
+
"- What was done well\n"
|
|
109
|
+
"- What needs improvement\n"
|
|
110
|
+
"- Major issues (including off-topic comments if applicable)\n\n"
|
|
111
|
+
"KEY ADVICE:\n"
|
|
112
|
+
"- 1–2 improvement tips\n"
|
|
113
|
+
"- Core SSIS concepts to review\n"
|
|
114
|
+
"- Encouragement to keep practicing\n\n"
|
|
115
|
+
f"{combined_content}\n"
|
|
116
|
+
"=== EVALUATION COMPLETE ===\n\n"
|
|
117
|
+
"Notes:\n"
|
|
118
|
+
"- Be kind, clear, and beginner-friendly.\n"
|
|
119
|
+
"- If off-topic, strictly enforce 20/100 score with no exceptions.\n"
|
|
120
|
+
"- Remind student clearly to read the question carefully and focus on SSIS.\n"
|
|
121
|
+
)
|
|
142
122
|
|
|
143
123
|
def prompt_text_powerbi(combined_content: str):
|
|
144
124
|
return (
|
|
@@ -156,7 +136,7 @@ def prompt_text_powerbi(combined_content: str):
|
|
|
156
136
|
"- Highlight strengths and areas to improve\n"
|
|
157
137
|
"- Mention if the submission is incomplete or off-topic, but don't provide missing solutions\n"
|
|
158
138
|
"- Do not penalize for efficiency, missing advanced features, or redundant tables\n"
|
|
159
|
-
"- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low, with a small boost for effort if applicable\n\n"
|
|
139
|
+
"- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low and and should not be given any feedbacks related, with a small boost for effort if applicable\n\n"
|
|
160
140
|
f"{combined_content}\n"
|
|
161
141
|
"=== EVALUATION COMPLETE ==="
|
|
162
142
|
)
|