QuantumChecker 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuantumCheck/__init__.py +0 -0
- QuantumCheck/main.py +72 -0
- QuantumCheck/powerbi_evaluator.py +327 -0
- QuantumCheck/prompts.py +140 -0
- QuantumCheck/python_evaluator.py +95 -0
- QuantumCheck/sql_evaluator.py +97 -0
- QuantumCheck/ssis_evaluator.py +136 -0
- quantumchecker-0.1.0.dist-info/METADATA +28 -0
- quantumchecker-0.1.0.dist-info/RECORD +12 -0
- quantumchecker-0.1.0.dist-info/WHEEL +5 -0
- quantumchecker-0.1.0.dist-info/licenses/LICENSE +21 -0
- quantumchecker-0.1.0.dist-info/top_level.txt +1 -0
QuantumCheck/__init__.py
ADDED
|
File without changes
|
QuantumCheck/main.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Dict
|
|
4
|
+
from python_evaluator import PythonEvaluator
|
|
5
|
+
from sql_evaluator import SQLEvaluator
|
|
6
|
+
from powerbi_evaluator import PowerBIEvaluator
|
|
7
|
+
from ssis_evaluator import SSISEvaluator
|
|
8
|
+
|
|
9
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class HomeworkEvaluator:
|
|
14
|
+
EXTENSION_TO_TYPE = {
|
|
15
|
+
".py": "python",
|
|
16
|
+
".sql": "sql",
|
|
17
|
+
".zip": "powerbi",
|
|
18
|
+
".dtsx": "ssis",
|
|
19
|
+
".DTSX": "ssis",
|
|
20
|
+
".txt": "text",
|
|
21
|
+
".md": "text"
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def parse_questions(md_content: str) -> List[str]:
|
|
26
|
+
questions = [q.strip() for q in md_content.strip().split("\n\n") if q.strip()]
|
|
27
|
+
if not questions:
|
|
28
|
+
raise ValueError("No valid questions found in the question content")
|
|
29
|
+
return questions
|
|
30
|
+
|
|
31
|
+
def evaluate_from_content(self, question_content: str, answer_path: str, api_key: str) -> Dict[str, any]:
|
|
32
|
+
try:
|
|
33
|
+
questions = self.parse_questions(question_content)
|
|
34
|
+
except Exception as e:
|
|
35
|
+
logger.error("Failed to parse question content: %s", str(e))
|
|
36
|
+
raise ValueError(f"Failed to parse question content: {str(e)}")
|
|
37
|
+
|
|
38
|
+
answer_path = answer_path.strip()
|
|
39
|
+
logger.info("Processing answer_path: %s", answer_path)
|
|
40
|
+
_, ext = os.path.splitext(answer_path)
|
|
41
|
+
ext = ext.lower()
|
|
42
|
+
logger.info("Extracted extension: %s", ext)
|
|
43
|
+
file_type = self.EXTENSION_TO_TYPE.get(ext, "text")
|
|
44
|
+
logger.info("Detected file type: %s for file: %s", file_type, answer_path)
|
|
45
|
+
|
|
46
|
+
if not os.path.exists(answer_path):
|
|
47
|
+
logger.error("Answer file not found: %s", answer_path)
|
|
48
|
+
raise FileNotFoundError(f"Answer file not found: {answer_path}")
|
|
49
|
+
|
|
50
|
+
if file_type == "python":
|
|
51
|
+
evaluator = PythonEvaluator(api_key)
|
|
52
|
+
evaluation = evaluator.evaluate(questions, answer_path)
|
|
53
|
+
elif file_type == "sql":
|
|
54
|
+
evaluator = SQLEvaluator(api_key)
|
|
55
|
+
evaluation = evaluator.evaluate(questions, answer_path)
|
|
56
|
+
elif file_type == "powerbi":
|
|
57
|
+
evaluator = PowerBIEvaluator(api_key)
|
|
58
|
+
evaluation = evaluator.evaluate(questions, answer_path)
|
|
59
|
+
elif file_type == "ssis":
|
|
60
|
+
evaluator = SSISEvaluator(api_key)
|
|
61
|
+
evaluation = evaluator.evaluate(questions, answer_path)
|
|
62
|
+
else:
|
|
63
|
+
logger.warning("Unrecognized file type '%s', defaulting to text (Python parser)", file_type)
|
|
64
|
+
evaluator = PythonEvaluator(api_key)
|
|
65
|
+
evaluation = evaluator.evaluate(questions, answer_path)
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"mark": evaluation["score"],
|
|
69
|
+
"feedback": evaluation["feedback"]
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
import shutil
|
|
6
|
+
import zipfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
from pdf2image import convert_from_path
|
|
10
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
11
|
+
import requests
|
|
12
|
+
from dotenv import load_dotenv
|
|
13
|
+
from PIL import Image
|
|
14
|
+
import io
|
|
15
|
+
import base64
|
|
16
|
+
|
|
17
|
+
from prompts import prompt_text_powerbi
|
|
18
|
+
|
|
19
|
+
load_dotenv()
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
logging.basicConfig(
|
|
22
|
+
level=logging.INFO,
|
|
23
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
24
|
+
handlers=[logging.FileHandler("../powerbi_evaluator.log"), logging.StreamHandler()]
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# GeminiFlashModel class remains unchanged
|
|
28
|
+
class GeminiFlashModel:
|
|
29
|
+
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
30
|
+
api_key = os.getenv("GEMINI_API_KEY") or api_key
|
|
31
|
+
if not api_key:
|
|
32
|
+
raise ValueError("API key not found in .env file or environment variables.")
|
|
33
|
+
self.api_key = api_key
|
|
34
|
+
self.model_name = model_name
|
|
35
|
+
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
|
|
36
|
+
|
|
37
|
+
@retry(
|
|
38
|
+
stop=stop_after_attempt(3),
|
|
39
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
40
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
41
|
+
)
|
|
42
|
+
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
43
|
+
logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
|
|
44
|
+
combined_content = "\n\n".join(
|
|
45
|
+
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
46
|
+
for i, qa in enumerate(question_answer_pairs, 1)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
headers = {"Content-Type": "application/json"}
|
|
50
|
+
data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
|
|
51
|
+
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
52
|
+
|
|
53
|
+
if response.status_code != 200:
|
|
54
|
+
logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
55
|
+
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
56
|
+
response_data = response.json()
|
|
57
|
+
if not response_data.get("candidates"):
|
|
58
|
+
logger.error("API response missing candidates: %s", response_data)
|
|
59
|
+
raise ValueError("No candidates in API response")
|
|
60
|
+
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
61
|
+
return self._parse_response(generated_text)
|
|
62
|
+
|
|
63
|
+
@retry(
|
|
64
|
+
stop=stop_after_attempt(3),
|
|
65
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
66
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
67
|
+
)
|
|
68
|
+
def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
|
|
69
|
+
folder_path = Path(image_folder)
|
|
70
|
+
images = list(folder_path.glob("*.png"))[:3]
|
|
71
|
+
if not images:
|
|
72
|
+
raise ProcessingError(f"No PNG images found in {image_folder}")
|
|
73
|
+
prompt = (
|
|
74
|
+
"Evaluate the Power BI report visuals based on the given task.\n\n"
|
|
75
|
+
f"Task: {question}\n\n"
|
|
76
|
+
f"Screenshots: {[str(img.name) for img in images]}\n\n"
|
|
77
|
+
"Focus on:\n"
|
|
78
|
+
"- Clarity: Are visuals clear and easy to understand?\n"
|
|
79
|
+
"- Appropriateness: Are visual types suitable for the data and task?\n"
|
|
80
|
+
"- Layout and Design: Is the layout organized with logical flow?\n"
|
|
81
|
+
"- Readability: Are labels, titles, and legends clear and not overcrowded?\n"
|
|
82
|
+
"- Color Usage: Are colors effective, consistent, and accessible?\n"
|
|
83
|
+
"- Interactivity: (If visible) Do slicers or filters enhance usability?\n\n"
|
|
84
|
+
"Do not consider DAX, data sources, or advanced efficiency.\n"
|
|
85
|
+
"Provide feedback in a supportive manner for beginners.\n\n"
|
|
86
|
+
"Structure as: Score: [SCORE], Feedback: [FEEDBACK]"
|
|
87
|
+
)
|
|
88
|
+
parts = [{"text": prompt}]
|
|
89
|
+
for img in images:
|
|
90
|
+
with Image.open(img) as pil_img:
|
|
91
|
+
pil_img.thumbnail((1024, 1024))
|
|
92
|
+
img_buffer = io.BytesIO()
|
|
93
|
+
pil_img.save(img_buffer, format="PNG")
|
|
94
|
+
parts.append({
|
|
95
|
+
"inline_data": {
|
|
96
|
+
"mime_type": "image/png",
|
|
97
|
+
"data": base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
|
98
|
+
}
|
|
99
|
+
})
|
|
100
|
+
headers = {"Content-Type": "application/json"}
|
|
101
|
+
data = {"contents": [{"parts": parts}]}
|
|
102
|
+
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
103
|
+
if response.status_code != 200:
|
|
104
|
+
logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
105
|
+
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
106
|
+
response_data = response.json()
|
|
107
|
+
if not response_data.get("candidates"):
|
|
108
|
+
logger.error("API response missing candidates: %s", response_data)
|
|
109
|
+
raise ValueError("No candidates in API response")
|
|
110
|
+
output_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
111
|
+
score_match = re.search(r"Score:\s*(\d+)(?:/100)?", output_text)
|
|
112
|
+
feedback_match = re.search(r"Feedback:\s*(.*)", output_text, re.DOTALL)
|
|
113
|
+
return {
|
|
114
|
+
"score": int(score_match.group(1)) if score_match else 0,
|
|
115
|
+
"feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
119
|
+
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
120
|
+
try:
|
|
121
|
+
lines = text.split("\n")
|
|
122
|
+
score_found = False
|
|
123
|
+
feedback_lines = []
|
|
124
|
+
for line in lines:
|
|
125
|
+
line = line.strip()
|
|
126
|
+
if not score_found and line.startswith("OVERALL SCORE:") and "/100" in line:
|
|
127
|
+
try:
|
|
128
|
+
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
129
|
+
score_found = True
|
|
130
|
+
except ValueError:
|
|
131
|
+
result["issues"].append("Failed to parse score from API response")
|
|
132
|
+
continue
|
|
133
|
+
elif score_found:
|
|
134
|
+
feedback_lines.append(line)
|
|
135
|
+
if feedback_lines:
|
|
136
|
+
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
137
|
+
return result
|
|
138
|
+
except Exception as e:
|
|
139
|
+
result["issues"].append(str(e))
|
|
140
|
+
return result
|
|
141
|
+
|
|
142
|
+
class PowerBIProcessor:
|
|
143
|
+
def extract_datamodel(self, pbit_file_path: str) -> Dict:
|
|
144
|
+
if not os.path.exists(pbit_file_path):
|
|
145
|
+
raise ProcessingError(f"PBIT file not found: {pbit_file_path}")
|
|
146
|
+
folder_path = os.path.dirname(pbit_file_path)
|
|
147
|
+
file_name = os.path.splitext(os.path.basename(pbit_file_path))[0]
|
|
148
|
+
zip_file = os.path.join(folder_path, f"{file_name}.zip")
|
|
149
|
+
export_path = os.path.join(folder_path, "export")
|
|
150
|
+
self._cleanup(zip_file, export_path)
|
|
151
|
+
try:
|
|
152
|
+
os.rename(pbit_file_path, zip_file)
|
|
153
|
+
if not zipfile.is_zipfile(zip_file):
|
|
154
|
+
raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
|
|
155
|
+
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
156
|
+
zip_ref.extractall(export_path)
|
|
157
|
+
schema_path = os.path.join(export_path, "DataModelSchema")
|
|
158
|
+
txt_path = os.path.join(export_path, "DataModelSchema.txt")
|
|
159
|
+
os.rename(schema_path, txt_path)
|
|
160
|
+
with open(txt_path, "r", encoding="utf-16-le") as file:
|
|
161
|
+
return json.load(file)
|
|
162
|
+
except UnicodeDecodeError as e:
|
|
163
|
+
logger.error("Failed to decode DataModelSchema: %s", str(e))
|
|
164
|
+
raise ProcessingError(f"Invalid encoding in DataModelSchema: {e}")
|
|
165
|
+
except Exception as e:
|
|
166
|
+
raise ProcessingError(f"Failed to extract DataModelSchema: {e}")
|
|
167
|
+
finally:
|
|
168
|
+
self._cleanup(zip_file, export_path)
|
|
169
|
+
|
|
170
|
+
def extract_model_data(self, data: Dict) -> Dict:
|
|
171
|
+
try:
|
|
172
|
+
tables = data.get("model", {}).get("tables", [])
|
|
173
|
+
relationships = data.get("model", {}).get("relationships", [])
|
|
174
|
+
return {
|
|
175
|
+
"Calculated Measures": self._get_measures(tables),
|
|
176
|
+
"Tables": self._get_tables_and_columns(tables),
|
|
177
|
+
"Relationships": self._get_relationships(relationships)
|
|
178
|
+
}
|
|
179
|
+
except Exception as e:
|
|
180
|
+
raise ProcessingError(f"Failed to extract model data: {e}")
|
|
181
|
+
|
|
182
|
+
def process_pdf(self, pdf_path: str, output_dir: str = "outputimages", num_pages: int = 3) -> List[str]:
|
|
183
|
+
try:
|
|
184
|
+
if not os.path.exists(pdf_path):
|
|
185
|
+
raise ProcessingError(f"PDF file not found: {pdf_path}")
|
|
186
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
187
|
+
pages = convert_from_path(pdf_path, first_page=1, last_page=num_pages)
|
|
188
|
+
image_paths = []
|
|
189
|
+
for i, page in enumerate(pages):
|
|
190
|
+
image_path = os.path.join(output_dir, f"page_{i + 1}.png")
|
|
191
|
+
page.save(image_path, "PNG")
|
|
192
|
+
image_paths.append(image_path)
|
|
193
|
+
os.remove(pdf_path)
|
|
194
|
+
return image_paths
|
|
195
|
+
except Exception as e:
|
|
196
|
+
raise ProcessingError(f"Failed to process PDF: {e}")
|
|
197
|
+
|
|
198
|
+
def extract_zip(self, zip_path: str, extract_path: str) -> tuple[str, str | None]:
|
|
199
|
+
try:
|
|
200
|
+
if not os.path.exists(zip_path):
|
|
201
|
+
raise ProcessingError(f"ZIP file not found: {zip_path}")
|
|
202
|
+
if not zipfile.is_zipfile(zip_path):
|
|
203
|
+
raise ProcessingError(f"File is not a valid ZIP: {zip_path}")
|
|
204
|
+
os.makedirs(extract_path, exist_ok=True)
|
|
205
|
+
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
206
|
+
zip_ref.extractall(extract_path)
|
|
207
|
+
pbit_files = list(Path(extract_path).glob("*.pbit"))
|
|
208
|
+
pdf_files = list(Path(extract_path).glob("*.pdf"))
|
|
209
|
+
if not pbit_files:
|
|
210
|
+
raise ProcessingError("ZIP file must contain at least one .pbit file")
|
|
211
|
+
if len(pbit_files) > 1:
|
|
212
|
+
raise ProcessingError("ZIP file contains multiple .pbit files")
|
|
213
|
+
pdf_path = str(pdf_files[0]) if pdf_files else None
|
|
214
|
+
return str(pbit_files[0]), pdf_path
|
|
215
|
+
except Exception as e:
|
|
216
|
+
raise ProcessingError(f"Failed to extract ZIP file: {e}")
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def _get_measures(tables: List[Dict]) -> List[Dict]:
|
|
220
|
+
measures = []
|
|
221
|
+
for table in tables:
|
|
222
|
+
if "measures" in table:
|
|
223
|
+
for measure in table["measures"]:
|
|
224
|
+
measures.append({
|
|
225
|
+
"Table": table["name"],
|
|
226
|
+
"Name": measure["name"],
|
|
227
|
+
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"), list) else measure.get("expression", ""),
|
|
228
|
+
"FormatString": measure.get("formatString", "")
|
|
229
|
+
})
|
|
230
|
+
return measures
|
|
231
|
+
|
|
232
|
+
@staticmethod
|
|
233
|
+
def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
|
|
234
|
+
table_info = []
|
|
235
|
+
for table in tables:
|
|
236
|
+
columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"), "Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"} for col in table.get("columns", [])]
|
|
237
|
+
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if part["source"].get("expression")]
|
|
238
|
+
table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
|
|
239
|
+
return table_info
|
|
240
|
+
|
|
241
|
+
@staticmethod
|
|
242
|
+
def _get_relationships(relationships: List[Dict]) -> List[Dict]:
|
|
243
|
+
return [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"], "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in relationships]
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def _cleanup(*paths: str):
|
|
247
|
+
for path in paths:
|
|
248
|
+
if os.path.exists(path):
|
|
249
|
+
if os.path.isfile(path):
|
|
250
|
+
os.remove(path)
|
|
251
|
+
else:
|
|
252
|
+
shutil.rmtree(path, ignore_errors=True)
|
|
253
|
+
|
|
254
|
+
class PowerBIEvaluator:
|
|
255
|
+
def __init__(self, api_key: str):
|
|
256
|
+
self.api_key = api_key
|
|
257
|
+
self.model = GeminiFlashModel(api_key)
|
|
258
|
+
self.processor = PowerBIProcessor()
|
|
259
|
+
|
|
260
|
+
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
261
|
+
try:
|
|
262
|
+
_, ext = os.path.splitext(answer_path)
|
|
263
|
+
ext = ext.lower()
|
|
264
|
+
extract_path = os.path.join(os.path.dirname(answer_path), "temp_extract")
|
|
265
|
+
pbit_path = None
|
|
266
|
+
pdf_path = None
|
|
267
|
+
|
|
268
|
+
# Handle input file type
|
|
269
|
+
if ext == ".zip":
|
|
270
|
+
pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
|
|
271
|
+
elif ext == ".pbit":
|
|
272
|
+
pbit_path = answer_path
|
|
273
|
+
pdf_path = None
|
|
274
|
+
else:
|
|
275
|
+
logger.error("Invalid file type for Power BI: %s", answer_path)
|
|
276
|
+
return {
|
|
277
|
+
"score": 0,
|
|
278
|
+
"feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
|
|
279
|
+
"issues": ["Invalid file type"],
|
|
280
|
+
"recommendations": []
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
# Extract and process the data model from .pbit
|
|
285
|
+
data_model = self.processor.extract_datamodel(pbit_path)
|
|
286
|
+
model_data = self.processor.extract_model_data(data_model)
|
|
287
|
+
answers = [json.dumps(model_data)] * len(questions)
|
|
288
|
+
dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
289
|
+
|
|
290
|
+
# Initialize result with DAX evaluation
|
|
291
|
+
result = {
|
|
292
|
+
"score": dax_result["score"],
|
|
293
|
+
"feedback": f"DAX Feedback:\n{dax_result['feedback']}",
|
|
294
|
+
"issues": dax_result["issues"],
|
|
295
|
+
"recommendations": dax_result["recommendations"]
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# Process PDF and evaluate visuals if present
|
|
299
|
+
if pdf_path:
|
|
300
|
+
try:
|
|
301
|
+
self.processor.process_pdf(pdf_path)
|
|
302
|
+
visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
|
|
303
|
+
result["score"] = (dax_result["score"] + visual_result["score"]) // 2
|
|
304
|
+
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
305
|
+
result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
|
|
306
|
+
result["recommendations"].extend(visual_result.get("recommendations", []))
|
|
307
|
+
except ProcessingError as e:
|
|
308
|
+
logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
|
|
309
|
+
result["issues"].append(f"Visual evaluation skipped: {str(e)}")
|
|
310
|
+
result["recommendations"].append("Ensure a valid PDF is provided for visual evaluation if intended")
|
|
311
|
+
|
|
312
|
+
return result
|
|
313
|
+
finally:
|
|
314
|
+
# Cleanup temporary files and directories
|
|
315
|
+
self.processor._cleanup(extract_path, "outputimages")
|
|
316
|
+
except Exception as e:
|
|
317
|
+
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
318
|
+
self.processor._cleanup(extract_path, "outputimages")
|
|
319
|
+
return {
|
|
320
|
+
"score": 0,
|
|
321
|
+
"feedback": f"Error processing file: {str(e)}",
|
|
322
|
+
"issues": [str(e)],
|
|
323
|
+
"recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"]
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
class ProcessingError(Exception):
|
|
327
|
+
pass
|
QuantumCheck/prompts.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
|
|
2
|
+
def prompt_text_python(combined_content):
|
|
3
|
+
return (
|
|
4
|
+
"You are an expert Python instructor evaluating beginner Python code. "
|
|
5
|
+
"Focus on syntax, logic, code readability, and adherence to Python best practices (e.g., PEP 8).\n\n"
|
|
6
|
+
"IMPORTANT: First, check if the student's answer is relevant to Python. "
|
|
7
|
+
"If it is clearly from a different subject (e.g., SQL, Power BI), assign a low score (10–25/100) "
|
|
8
|
+
"and explain the mismatch supportively.\n\n"
|
|
9
|
+
"Your evaluation should:\n"
|
|
10
|
+
"- Focus on clarity, correctness, and understanding of the Python content\n"
|
|
11
|
+
"- Be constructive and encouraging (students are beginners)\n"
|
|
12
|
+
"- Highlight both strengths and areas for improvement\n"
|
|
13
|
+
"- Identify major mistakes or misunderstandings (e.g., syntax errors, incorrect logic, missing components)\n"
|
|
14
|
+
"- Be concise but insightful\n\n"
|
|
15
|
+
"Provide feedback in this format:\n\n"
|
|
16
|
+
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
17
|
+
"OVERALL SCORE: <score>/100\n\n"
|
|
18
|
+
"FEEDBACK SUMMARY:\n"
|
|
19
|
+
"- What was done well\n"
|
|
20
|
+
"- What needs improvement\n"
|
|
21
|
+
"- Any major issues (e.g., logic errors, misunderstanding, incomplete solutions)\n\n"
|
|
22
|
+
"KEY ADVICE:\n"
|
|
23
|
+
"- Top 2-3 suggestions to improve Python skills\n"
|
|
24
|
+
"- Highlight any concepts to revisit\n"
|
|
25
|
+
"- Encourage further learning and effort\n\n"
|
|
26
|
+
f"{combined_content}\n"
|
|
27
|
+
"=== EVALUATION COMPLETE ===\n\n"
|
|
28
|
+
"Notes:\n"
|
|
29
|
+
"- Be honest but supportive\n"
|
|
30
|
+
"- Include specific examples from the provided answers if helpful\n"
|
|
31
|
+
"- Keep language beginner-friendly\n"
|
|
32
|
+
"- Do not give too low marks unless the answer is entirely unrelated."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
def prompt_text_sql(combined_content):
|
|
36
|
+
return (
|
|
37
|
+
"You are a SQL expert evaluating beginner SQL queries. "
|
|
38
|
+
"Focus on query correctness, efficiency, proper use of SQL syntax, and alignment with the question's requirements.\n\n"
|
|
39
|
+
"IMPORTANT: First, check if the student's answer is relevant to SQL. "
|
|
40
|
+
"If the answer is clearly about a different subject (e.g., Python or Power BI), assign a low score (10–25/100) "
|
|
41
|
+
"and explain the mismatch in a supportive way.\n\n"
|
|
42
|
+
"Your evaluation should:\n"
|
|
43
|
+
"- Focus on clarity, correctness, and understanding of the SQL content\n"
|
|
44
|
+
"- Be constructive and encouraging (students are beginners)\n"
|
|
45
|
+
"- Highlight both strengths and areas for improvement\n"
|
|
46
|
+
"- Identify major mistakes or misunderstandings\n"
|
|
47
|
+
"- Be concise but insightful\n"
|
|
48
|
+
"- Check for query optimization and adherence to the question's intent\n\n"
|
|
49
|
+
"Provide feedback in this format:\n\n"
|
|
50
|
+
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
51
|
+
"OVERALL SCORE: <score>/100\n\n"
|
|
52
|
+
"FEEDBACK SUMMARY:\n"
|
|
53
|
+
"- What was done well\n"
|
|
54
|
+
"- What needs improvement\n"
|
|
55
|
+
"- Any major issues (e.g., logic errors, misunderstanding, incomplete solutions)\n\n"
|
|
56
|
+
"KEY ADVICE:\n"
|
|
57
|
+
"- Top 2-3 suggestions to improve SQL skills\n"
|
|
58
|
+
"- Highlight any concepts to revisit\n"
|
|
59
|
+
"- Encourage further learning and effort\n\n"
|
|
60
|
+
"FEEDBACK SUMMARY (in Uzbek):\n"
|
|
61
|
+
"- Nima yaxshi bajarilgan\n"
|
|
62
|
+
"- Nimalar ustida ishlash kerak\n"
|
|
63
|
+
"- Jiddiy xatoliklar yoki noto‘g‘ri tushunchalar\n\n"
|
|
64
|
+
f"{combined_content}\n"
|
|
65
|
+
"=== EVALUATION COMPLETE ===\n\n"
|
|
66
|
+
"Notes:\n"
|
|
67
|
+
"- Be honest but supportive\n"
|
|
68
|
+
"- Include specific examples from the provided answers if helpful\n"
|
|
69
|
+
"- Keep language beginner-friendly\n"
|
|
70
|
+
"- Do not give too low marks unless the subject is clearly unrelated."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def prompt_text_ssis(combined_content):
|
|
74
|
+
return (
|
|
75
|
+
"You are a data engineer reviewing an SSIS package (.dtsx) summary. "
|
|
76
|
+
"Evaluate the correctness of tasks, data flow, control flow, and configurations.\n\n"
|
|
77
|
+
"IMPORTANT: First, check if the answer is related to SSIS. "
|
|
78
|
+
"If the answer is clearly unrelated (e.g., contains Python or SQL code), assign a low score (10–25/100) "
|
|
79
|
+
"and explain the mismatch supportively.\n\n"
|
|
80
|
+
"Your evaluation should:\n"
|
|
81
|
+
"- Assess how well the package addresses the question\n"
|
|
82
|
+
"- Focus on clarity, accuracy, and understanding of key SSIS components\n"
|
|
83
|
+
"- Be supportive and constructive\n"
|
|
84
|
+
"- Highlight what was done well and what could be improved\n"
|
|
85
|
+
"- Point out only major issues if necessary\n"
|
|
86
|
+
"- Keep feedback clear and insightful\n"
|
|
87
|
+
"- Do not penalize for lack of advanced scheduling (e.g., SQL Agent use)\n\n"
|
|
88
|
+
"Provide feedback in this format:\n\n"
|
|
89
|
+
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
90
|
+
"OVERALL SCORE: <score>/100\n\n"
|
|
91
|
+
"FEEDBACK SUMMARY:\n"
|
|
92
|
+
"- What was done well\n"
|
|
93
|
+
"- What needs improvement\n"
|
|
94
|
+
"- Any major issues (e.g., logic errors, misunderstandings, incomplete solutions)\n\n"
|
|
95
|
+
"KEY ADVICE:\n"
|
|
96
|
+
"- Top 2-3 suggestions to improve SSIS skills\n"
|
|
97
|
+
"- Concepts to revisit\n"
|
|
98
|
+
"- Encouragement to keep learning and improving\n\n"
|
|
99
|
+
f"{combined_content}\n"
|
|
100
|
+
"=== EVALUATION COMPLETE ===\n\n"
|
|
101
|
+
"Notes:\n"
|
|
102
|
+
"- Be honest but supportive\n"
|
|
103
|
+
"- Include specific examples if helpful\n"
|
|
104
|
+
"- Keep language beginner-friendly\n"
|
|
105
|
+
"- Give credit for effort, even if technically incorrect. Use low scores only if clearly unrelated."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def prompt_text_powerbi(combined_content):
|
|
109
|
+
return (
|
|
110
|
+
"You are a BI professional evaluating Power BI report solutions, including DAX formulas, "
|
|
111
|
+
"data models, and visual design.\n\n"
|
|
112
|
+
"IMPORTANT: First, check if the student's answer is related to Power BI. "
|
|
113
|
+
"If it is clearly from a different domain (e.g., Python or SQL code), assign a low score (10–25/100) "
|
|
114
|
+
"and clearly explain the mismatch.\n\n"
|
|
115
|
+
"Your evaluation should:\n"
|
|
116
|
+
"- Focus on clarity, correctness, and understanding of Power BI content\n"
|
|
117
|
+
"- Be constructive and encouraging (students are beginners)\n"
|
|
118
|
+
"- Highlight strengths and areas for improvement\n"
|
|
119
|
+
"- Identify major mistakes (e.g., incorrect DAX, poor data modeling)\n"
|
|
120
|
+
"- Be concise but insightful\n"
|
|
121
|
+
"- Evaluate DAX, visuals, and data model structure\n"
|
|
122
|
+
"- Avoid penalizing for advanced design features or best practices\n\n"
|
|
123
|
+
"Provide feedback in this format:\n\n"
|
|
124
|
+
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
125
|
+
"OVERALL SCORE: <score>/100\n\n"
|
|
126
|
+
"FEEDBACK SUMMARY:\n"
|
|
127
|
+
"- What was done well\n"
|
|
128
|
+
"- What needs improvement\n"
|
|
129
|
+
"- Any major issues (e.g., incorrect DAX, missing visuals, poor relationships)\n\n"
|
|
130
|
+
"KEY ADVICE:\n"
|
|
131
|
+
"- Top 2-3 suggestions to improve Power BI skills\n"
|
|
132
|
+
"- Highlight any concepts to revisit\n"
|
|
133
|
+
"- Encourage further learning and effort\n\n"
|
|
134
|
+
f"{combined_content}\n"
|
|
135
|
+
"=== EVALUATION COMPLETE ===\n\n"
|
|
136
|
+
"Notes:\n"
|
|
137
|
+
"- Be honest but supportive\n"
|
|
138
|
+
"- If the subject is mismatched, clearly state that in feedback and give a low score (10–25/100)\n"
|
|
139
|
+
"- Keep language beginner-friendly"
|
|
140
|
+
)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import requests
|
|
3
|
+
from prompts import prompt_text_python
|
|
4
|
+
from typing import List, Dict
|
|
5
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
class GeminiFlashModel:
|
|
10
|
+
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
11
|
+
if not api_key:
|
|
12
|
+
raise ValueError("API key is required.")
|
|
13
|
+
self.api_key = api_key
|
|
14
|
+
self.model_name = model_name
|
|
15
|
+
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_name}:generateContent"
|
|
16
|
+
|
|
17
|
+
@retry(
|
|
18
|
+
stop=stop_after_attempt(3),
|
|
19
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
20
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
21
|
+
)
|
|
22
|
+
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
23
|
+
logger.info("Starting evaluation of %d Python question-answer pairs", len(question_answer_pairs))
|
|
24
|
+
|
|
25
|
+
combined_content = "\n\n".join(
|
|
26
|
+
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
27
|
+
for i, qa in enumerate(question_answer_pairs, 1)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
headers = {"Content-Type": "application/json"}
|
|
32
|
+
data = {"contents": [{"parts": [{"text": prompt_text_python(combined_content)}]}]}
|
|
33
|
+
|
|
34
|
+
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
35
|
+
|
|
36
|
+
if response.status_code != 200:
|
|
37
|
+
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
38
|
+
|
|
39
|
+
response_data = response.json()
|
|
40
|
+
if not response_data.get("candidates"):
|
|
41
|
+
raise ValueError("No candidates in API response")
|
|
42
|
+
|
|
43
|
+
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
44
|
+
return self._parse_response(generated_text)
|
|
45
|
+
|
|
46
|
+
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
47
|
+
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
48
|
+
try:
|
|
49
|
+
lines = text.split("\n")
|
|
50
|
+
score_found = False
|
|
51
|
+
feedback_lines = []
|
|
52
|
+
for line in lines:
|
|
53
|
+
line = line.strip()
|
|
54
|
+
if not score_found and line.startswith("OVERALL SCORE:") and "/100" in line:
|
|
55
|
+
try:
|
|
56
|
+
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
57
|
+
score_found = True
|
|
58
|
+
except ValueError:
|
|
59
|
+
result["issues"].append("Failed to parse score from API response")
|
|
60
|
+
continue
|
|
61
|
+
elif score_found:
|
|
62
|
+
feedback_lines.append(line)
|
|
63
|
+
if feedback_lines:
|
|
64
|
+
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
65
|
+
return result
|
|
66
|
+
except Exception as e:
|
|
67
|
+
result["issues"].append(str(e))
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
class PythonAnswerParser:
|
|
71
|
+
@staticmethod
|
|
72
|
+
def parse(content: str, question_count: int) -> List[str]:
|
|
73
|
+
answers = [a.strip() for a in content.strip().split("\n\n") if a.strip()]
|
|
74
|
+
if not answers:
|
|
75
|
+
logger.warning("No valid answers found, returning placeholders")
|
|
76
|
+
return answers + ["No answer provided."] * (question_count - len(answers))
|
|
77
|
+
|
|
78
|
+
class PythonEvaluator:
|
|
79
|
+
def __init__(self, api_key: str):
|
|
80
|
+
self.api_key = api_key
|
|
81
|
+
self.model = GeminiFlashModel(api_key)
|
|
82
|
+
|
|
83
|
+
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
84
|
+
try:
|
|
85
|
+
with open(answer_path, "r", encoding="utf-8") as file:
|
|
86
|
+
content = file.read()
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error("Failed to read answer file %s: %s", answer_path, str(e))
|
|
89
|
+
return {"score": 0, "feedback": f"Error reading file: {str(e)}", "issues": [str(e)]}
|
|
90
|
+
|
|
91
|
+
answers = PythonAnswerParser.parse(content, len(questions))
|
|
92
|
+
if len(answers) != len(questions):
|
|
93
|
+
logger.warning("Mismatch: %d questions but %d answers", len(questions), len(answers))
|
|
94
|
+
|
|
95
|
+
return self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import requests
|
|
3
|
+
from typing import List, Dict
|
|
4
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
5
|
+
|
|
6
|
+
from prompts import prompt_text_sql
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
class GeminiFlashModel:
|
|
11
|
+
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
12
|
+
if not api_key:
|
|
13
|
+
raise ValueError("API key is required.")
|
|
14
|
+
self.api_key = api_key
|
|
15
|
+
self.model_name = model_name
|
|
16
|
+
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_name}:generateContent"
|
|
17
|
+
|
|
18
|
+
@retry(
|
|
19
|
+
stop=stop_after_attempt(3),
|
|
20
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
21
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
22
|
+
)
|
|
23
|
+
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
24
|
+
logger.info("Starting evaluation of %d SQL question-answer pairs", len(question_answer_pairs))
|
|
25
|
+
|
|
26
|
+
combined_content = "\n\n".join(
|
|
27
|
+
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
28
|
+
for i, qa in enumerate(question_answer_pairs, 1)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
headers = {"Content-Type": "application/json"}
|
|
34
|
+
data = {"contents": [{"parts": [{"text": prompt_text_sql(combined_content)}]}]}
|
|
35
|
+
|
|
36
|
+
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
37
|
+
|
|
38
|
+
if response.status_code != 200:
|
|
39
|
+
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
40
|
+
|
|
41
|
+
response_data = response.json()
|
|
42
|
+
if not response_data.get("candidates"):
|
|
43
|
+
raise ValueError("No candidates in API response")
|
|
44
|
+
|
|
45
|
+
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
46
|
+
return self._parse_response(generated_text)
|
|
47
|
+
|
|
48
|
+
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
49
|
+
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
50
|
+
try:
|
|
51
|
+
lines = text.split("\n")
|
|
52
|
+
score_found = False
|
|
53
|
+
feedback_lines = []
|
|
54
|
+
for line in lines:
|
|
55
|
+
line = line.strip()
|
|
56
|
+
if not score_found and line.startswith("OVERALL SCORE:") and "/100" in line:
|
|
57
|
+
try:
|
|
58
|
+
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
59
|
+
score_found = True
|
|
60
|
+
except ValueError:
|
|
61
|
+
result["issues"].append("Failed to parse score from API response")
|
|
62
|
+
continue
|
|
63
|
+
elif score_found:
|
|
64
|
+
feedback_lines.append(line)
|
|
65
|
+
if feedback_lines:
|
|
66
|
+
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
67
|
+
return result
|
|
68
|
+
except Exception as e:
|
|
69
|
+
result["issues"].append(str(e))
|
|
70
|
+
return result
|
|
71
|
+
|
|
72
|
+
class SQLAnswerParser:
|
|
73
|
+
@staticmethod
|
|
74
|
+
def parse(content: str, question_count: int) -> List[str]:
|
|
75
|
+
answers = [a.strip() for a in content.strip().split("\n\n") if a.strip()]
|
|
76
|
+
if not answers:
|
|
77
|
+
logger.warning("No valid answers found, returning placeholders")
|
|
78
|
+
return answers + ["No answer provided."] * (question_count - len(answers))
|
|
79
|
+
|
|
80
|
+
class SQLEvaluator:
|
|
81
|
+
def __init__(self, api_key: str):
|
|
82
|
+
self.api_key = api_key
|
|
83
|
+
self.model = GeminiFlashModel(api_key)
|
|
84
|
+
|
|
85
|
+
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
86
|
+
try:
|
|
87
|
+
with open(answer_path, "r", encoding="utf-8") as file:
|
|
88
|
+
content = file.read()
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error("Failed to read answer file %s: %s", answer_path, str(e))
|
|
91
|
+
return {"score": 0, "feedback": f"Error reading file: {str(e)}", "issues": [str(e)]}
|
|
92
|
+
|
|
93
|
+
answers = SQLAnswerParser.parse(content, len(questions))
|
|
94
|
+
if len(answers) != len(questions):
|
|
95
|
+
logger.warning("Mismatch: %d questions but %d answers", len(questions), len(answers))
|
|
96
|
+
|
|
97
|
+
return self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import requests
|
|
3
|
+
import xml.etree.ElementTree as ET
|
|
4
|
+
from typing import List, Dict
|
|
5
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
6
|
+
|
|
7
|
+
from prompts import prompt_text_ssis
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
class GeminiFlashModel:
|
|
12
|
+
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
13
|
+
if not api_key:
|
|
14
|
+
raise ValueError("API key is required.")
|
|
15
|
+
self.api_key = api_key
|
|
16
|
+
self.model_name = model_name
|
|
17
|
+
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{self.model_name}:generateContent"
|
|
18
|
+
|
|
19
|
+
@retry(
|
|
20
|
+
stop=stop_after_attempt(3),
|
|
21
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
22
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
23
|
+
)
|
|
24
|
+
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
25
|
+
logger.info("Starting evaluation of %d SSIS question-answer pairs", len(question_answer_pairs))
|
|
26
|
+
|
|
27
|
+
combined_content = "\n\n".join(
|
|
28
|
+
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
29
|
+
for i, qa in enumerate(question_answer_pairs, 1)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
headers = {"Content-Type": "application/json"}
|
|
33
|
+
data = {"contents": [{"parts": [{"text": prompt_text_ssis(combined_content)}]}]}
|
|
34
|
+
|
|
35
|
+
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
36
|
+
|
|
37
|
+
if response.status_code != 200:
|
|
38
|
+
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
39
|
+
|
|
40
|
+
response_data = response.json()
|
|
41
|
+
if not response_data.get("candidates"):
|
|
42
|
+
raise ValueError("No candidates in API response")
|
|
43
|
+
|
|
44
|
+
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
45
|
+
return self._parse_response(generated_text)
|
|
46
|
+
|
|
47
|
+
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
48
|
+
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
49
|
+
try:
|
|
50
|
+
lines = text.split("\n")
|
|
51
|
+
score_found = False
|
|
52
|
+
feedback_lines = []
|
|
53
|
+
for line in lines:
|
|
54
|
+
line = line.strip()
|
|
55
|
+
if not score_found and line.startswith("OVERALL SCORE:") and "/100" in line:
|
|
56
|
+
try:
|
|
57
|
+
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
58
|
+
score_found = True
|
|
59
|
+
except ValueError:
|
|
60
|
+
result["issues"].append("Failed to parse score from API response")
|
|
61
|
+
continue
|
|
62
|
+
elif score_found:
|
|
63
|
+
feedback_lines.append(line)
|
|
64
|
+
if feedback_lines:
|
|
65
|
+
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
66
|
+
return result
|
|
67
|
+
except Exception as e:
|
|
68
|
+
result["issues"].append(str(e))
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
class SSISAnswerParser:
|
|
72
|
+
@staticmethod
|
|
73
|
+
def parse(filepath: str, question_count: int) -> List[str]:
|
|
74
|
+
try:
|
|
75
|
+
tree = ET.parse(filepath)
|
|
76
|
+
root = tree.getroot()
|
|
77
|
+
summary = []
|
|
78
|
+
|
|
79
|
+
for elem in root.iter():
|
|
80
|
+
if "Executable" in elem.tag or "task" in elem.tag.lower():
|
|
81
|
+
task_name = elem.get("Name", "Unnamed Task")
|
|
82
|
+
task_type = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
83
|
+
summary.append(f"Task: {task_name} ({task_type})")
|
|
84
|
+
|
|
85
|
+
for conn in root.findall(".//DTS:ConnectionManager", namespaces={"DTS": "www.microsoft.com/SqlServer/Dts"}):
|
|
86
|
+
conn_name = conn.get("DTS:ObjectName", "Unnamed Connection")
|
|
87
|
+
conn_type = conn.find(".//DTS:ObjectData", namespaces={"DTS": "www.microsoft.com/SqlServer/Dts"})
|
|
88
|
+
if conn_type is not None:
|
|
89
|
+
if "FlatFileConnectionManager" in conn_type.tag:
|
|
90
|
+
file_path = conn_type.find(".//Property[@Name='FileName']").text
|
|
91
|
+
columns = [col.find(".//Property[@Name='Name']").text for col in conn_type.findall(".//Column")]
|
|
92
|
+
summary.append(f"Flat File Connection: {conn_name} (File: {file_path}, Columns: {', '.join(columns)})")
|
|
93
|
+
elif "OleDbConnectionManager" in conn_type.tag:
|
|
94
|
+
conn_string = conn_type.find(".//Property[@Name='ConnectionString']").text
|
|
95
|
+
summary.append(f"SQL Server Connection: {conn_name} (ConnectionString: {conn_string})")
|
|
96
|
+
|
|
97
|
+
for component in root.findall(".//component"):
|
|
98
|
+
comp_name = component.get("name", "Unnamed Component")
|
|
99
|
+
comp_type = component.get("componentClassID", "").split(".")[-1]
|
|
100
|
+
if comp_type == "FlatFileSource":
|
|
101
|
+
summary.append(f"Flat File Source: {comp_name}")
|
|
102
|
+
elif comp_type == "DataConversion":
|
|
103
|
+
output_cols = [col.get("name") for col in component.findall(".//outputColumn")]
|
|
104
|
+
summary.append(f"Data Conversion: {comp_name} (Outputs: {', '.join(output_cols)})")
|
|
105
|
+
elif comp_type == "OLEDBDestination":
|
|
106
|
+
table_name = component.find(".//property[@name='TableName']").text
|
|
107
|
+
summary.append(f"SQL Server Destination: {comp_name} (Table: {table_name})")
|
|
108
|
+
|
|
109
|
+
for path in root.findall(".//path"):
|
|
110
|
+
start_id = path.get("startId", "Unknown")
|
|
111
|
+
end_id = path.get("endId", "Unknown")
|
|
112
|
+
summary.append(f"Data Flow Path: {start_id} -> {end_id}")
|
|
113
|
+
|
|
114
|
+
for log_provider in root.findall(".//DTS:LogProvider", namespaces={"DTS": "www.microsoft.com/SqlServer/Dts"}):
|
|
115
|
+
log_name = log_provider.get("DTS:ObjectName", "Unnamed Log")
|
|
116
|
+
log_file = log_provider.get("DTS:ConfigString", "Unknown")
|
|
117
|
+
events = [event.text.strip() for event in log_provider.findall(".//LogEvent")]
|
|
118
|
+
summary.append(f"Log Provider: {log_name} (File: {log_file}, Events: {', '.join(events)})")
|
|
119
|
+
|
|
120
|
+
combined_summary = "\n".join(summary)[:2000] or "No components found in SSIS package"
|
|
121
|
+
logger.info("Parsed SSIS summary: %s", combined_summary)
|
|
122
|
+
return [combined_summary] * question_count
|
|
123
|
+
except ET.ParseError as e:
|
|
124
|
+
logger.error("Invalid SSIS package file: %s", str(e))
|
|
125
|
+
return [f"Invalid SSIS package file: {str(e)}"] * question_count
|
|
126
|
+
|
|
127
|
+
class SSISEvaluator:
|
|
128
|
+
def __init__(self, api_key: str):
|
|
129
|
+
self.api_key = api_key
|
|
130
|
+
self.model = GeminiFlashModel(api_key)
|
|
131
|
+
|
|
132
|
+
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
133
|
+
answers = SSISAnswerParser.parse(answer_path, len(questions))
|
|
134
|
+
if len(answers) != len(questions):
|
|
135
|
+
logger.warning("Mismatch: %d questions but %d answers", len(questions), len(answers))
|
|
136
|
+
return self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuantumChecker
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
|
+
Author-email: Qobiljon <qobiljonkhayrullayev@gmail.com>
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourusername/homework_evaluator
|
|
7
|
+
Project-URL: Repository, https://github.com/yourusername/homework_evaluator
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: requests>=2.31.0
|
|
15
|
+
Requires-Dist: tenacity>=8.2.3
|
|
16
|
+
Requires-Dist: pdf2image>=1.16.3
|
|
17
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
18
|
+
Requires-Dist: Pillow>=10.0.0
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# HomeworkEvaluator
|
|
22
|
+
|
|
23
|
+
A Python package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS formats.
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install homework-evaluator
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
QuantumCheck/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
QuantumCheck/main.py,sha256=5gHOwjveQDKSYhFXXLP8FI-4V3oOuTR8UQa1ZC27MoQ,2839
|
|
3
|
+
QuantumCheck/powerbi_evaluator.py,sha256=8YB3NdZAsEIzeb7-gITnMNRhp-rjldX_xDIMOejGDNM,16355
|
|
4
|
+
QuantumCheck/prompts.py,sha256=yWAE1YTWTKTBFuKS7QQaYciNbUQgOLLx8JlfOSnkWrg,7451
|
|
5
|
+
QuantumCheck/python_evaluator.py,sha256=2nDe0QMJ_isEUK3kujQxopaG2BiGfJhFCiy99PTD8fE,4253
|
|
6
|
+
QuantumCheck/sql_evaluator.py,sha256=_kwGicJaBIGeqDs0MAqvKBF64jgXHlFdty2Wsuai0Ds,4239
|
|
7
|
+
QuantumCheck/ssis_evaluator.py,sha256=X24GM5erlBjpcjl82eGtNbqqT6RFnELRsnqHyyTQMIM,7155
|
|
8
|
+
quantumchecker-0.1.0.dist-info/licenses/LICENSE,sha256=4sSnqVo8Tpn1rhgW5ta6VOZyb7_UIJKUOZnhIgS_18Y,1096
|
|
9
|
+
quantumchecker-0.1.0.dist-info/METADATA,sha256=5t86yNT6I79X8yzxaJrAzVGoi0MNYcqsg8WaGJieuyk,966
|
|
10
|
+
quantumchecker-0.1.0.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
11
|
+
quantumchecker-0.1.0.dist-info/top_level.txt,sha256=F-dIBymOeeSbbMGjXSgZHiZ_Z8nzjgS5P78KY6nweQo,13
|
|
12
|
+
quantumchecker-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Qobiljon Xayrullayev
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
QuantumCheck
|