QuantumChecker 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/PKG-INFO +2 -1
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/powerbi_evaluator.py +79 -122
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/prompts.py +12 -37
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumChecker.egg-info/PKG-INFO +2 -1
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumChecker.egg-info/requires.txt +1 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/setup.py +3 -2
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/tests/test.py +6 -4
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/main.py +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/python_evaluator.py +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/sql_evaluator.py +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumCheck/ssis_evaluator.py +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumChecker.egg-info/SOURCES.txt +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/README.md +0 -0
- {quantumchecker-0.2.5 → quantumchecker-0.2.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: QuantumChecker
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
5
|
Author: Qobiljon
|
|
6
6
|
Author-email: qobiljonkhayrullayev@gmail.com
|
|
@@ -14,6 +14,7 @@ Requires-Dist: tenacity>=8.2.3
|
|
|
14
14
|
Requires-Dist: pdf2image>=1.16.3
|
|
15
15
|
Requires-Dist: python-dotenv>=1.0.0
|
|
16
16
|
Requires-Dist: Pillow>=10.0.0
|
|
17
|
+
Requires-Dist: PyPDF2>=3.0.1
|
|
17
18
|
Dynamic: author
|
|
18
19
|
Dynamic: author-email
|
|
19
20
|
Dynamic: classifier
|
|
@@ -14,7 +14,19 @@ from PIL import Image
|
|
|
14
14
|
import io
|
|
15
15
|
import base64
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
|
|
18
|
+
# Placeholder for prompts.py content
|
|
19
|
+
def prompt_text_powerbi(combined_content: str) -> str:
|
|
20
|
+
return f"""
|
|
21
|
+
Evaluate the following Power BI DAX question-answer pairs for correctness, clarity, and appropriateness.
|
|
22
|
+
Provide an overall score out of 100 and concise feedback. Focus on DAX logic and structure.
|
|
23
|
+
Structure the response as:
|
|
24
|
+
OVERALL SCORE: [SCORE]/100
|
|
25
|
+
[FEEDBACK]
|
|
26
|
+
|
|
27
|
+
{combined_content}
|
|
28
|
+
"""
|
|
29
|
+
|
|
18
30
|
|
|
19
31
|
load_dotenv()
|
|
20
32
|
logger = logging.getLogger(__name__)
|
|
@@ -24,34 +36,27 @@ logging.basicConfig(
|
|
|
24
36
|
handlers=[logging.FileHandler("../powerbi_evaluator.log"), logging.StreamHandler()]
|
|
25
37
|
)
|
|
26
38
|
|
|
39
|
+
|
|
27
40
|
class GeminiFlashModel:
|
|
28
41
|
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
29
|
-
logger.info("Initializing GeminiFlashModel with model: %s", model_name)
|
|
30
42
|
api_key = os.getenv("GEMINI_API_KEY") or api_key
|
|
31
43
|
if not api_key:
|
|
32
|
-
logger.error("API key not found in environment variables or provided argument")
|
|
33
44
|
raise ValueError("API key not found in .env file or environment variables.")
|
|
34
45
|
self.api_key = api_key
|
|
35
46
|
self.model_name = model_name
|
|
36
47
|
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
|
|
37
|
-
logger.info("GeminiFlashModel initialized successfully with endpoint: %s", self.endpoint)
|
|
38
48
|
|
|
39
|
-
@retry(
|
|
40
|
-
|
|
41
|
-
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
42
|
-
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
43
|
-
)
|
|
49
|
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
|
|
50
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,)))
|
|
44
51
|
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
45
|
-
logger.info("Starting evaluation of %d question-answer pairs", len(question_answer_pairs))
|
|
52
|
+
logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
|
|
46
53
|
combined_content = "\n\n".join(
|
|
47
54
|
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
48
55
|
for i, qa in enumerate(question_answer_pairs, 1)
|
|
49
56
|
)
|
|
50
|
-
logger.debug("Prepared combined content for evaluation: %s", combined_content[:100] + "..." if len(combined_content) > 100 else combined_content)
|
|
51
57
|
|
|
52
58
|
headers = {"Content-Type": "application/json"}
|
|
53
59
|
data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
|
|
54
|
-
logger.info("Sending API request to %s", self.endpoint)
|
|
55
60
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
56
61
|
|
|
57
62
|
if response.status_code != 200:
|
|
@@ -62,41 +67,32 @@ class GeminiFlashModel:
|
|
|
62
67
|
logger.error("API response missing candidates: %s", response_data)
|
|
63
68
|
raise ValueError("No candidates in API response")
|
|
64
69
|
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
65
|
-
logger.info("Received API response, parsing generated text")
|
|
66
70
|
return self._parse_response(generated_text)
|
|
67
71
|
|
|
68
|
-
@retry(
|
|
69
|
-
|
|
70
|
-
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
71
|
-
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
72
|
-
)
|
|
72
|
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
|
|
73
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,)))
|
|
73
74
|
def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
|
|
74
|
-
logger.info("Starting visual evaluation for question: %s", question)
|
|
75
75
|
folder_path = Path(image_folder)
|
|
76
76
|
images = list(folder_path.glob("*.png"))[:3]
|
|
77
77
|
if not images:
|
|
78
|
-
logger.error("No PNG images found in folder: %s", image_folder)
|
|
79
78
|
raise ProcessingError(f"No PNG images found in {image_folder}")
|
|
80
|
-
logger.info("Found %d PNG images for evaluation: %s", len(images), [img.name for img in images])
|
|
81
|
-
|
|
82
79
|
prompt = (
|
|
83
|
-
"Evaluate the Power BI report visuals based on the
|
|
80
|
+
"Evaluate the Power BI report visuals based on the provided task. The visuals are professional dashboards designed for enterprise use.\n\n"
|
|
84
81
|
f"Task: {question}\n\n"
|
|
85
82
|
f"Screenshots: {[str(img.name) for img in images]}\n\n"
|
|
86
|
-
"
|
|
87
|
-
"- Clarity: Are visuals clear
|
|
88
|
-
"- Appropriateness: Are
|
|
89
|
-
"-
|
|
90
|
-
"-
|
|
91
|
-
"-
|
|
92
|
-
"
|
|
93
|
-
"
|
|
94
|
-
"
|
|
95
|
-
"
|
|
83
|
+
"Evaluate based on the following criteria, assigning a score out of 100:z\n"
|
|
84
|
+
"- Clarity (30%): Are visuals clear, with readable labels, titles, and legends?\n"
|
|
85
|
+
"- Appropriateness (30%): Are chart types (e.g., bar, line, pie) suitable for the data and task?\n"
|
|
86
|
+
"- Color Usage (20%): Are colors consistent, accessible, and visually appealing? Consider contrast and colorblind accessibility.\n"
|
|
87
|
+
"- Interactivity (20%): Do visible slicers, filters, or tooltips enhance usability and data exploration?\n\n"
|
|
88
|
+
"Provide a score (0-100) that reflects the overall quality, considering the enterprise context. Avoid overly harsh penalties for minor issues.\n"
|
|
89
|
+
"Provide concise, supportive feedback for beginners, highlighting strengths and areas for improvement.\n\n"
|
|
90
|
+
"Structure the response as:\n"
|
|
91
|
+
"Score: [SCORE]/100\n"
|
|
92
|
+
"Feedback: [FEEDBACK]"
|
|
96
93
|
)
|
|
97
94
|
parts = [{"text": prompt}]
|
|
98
95
|
for img in images:
|
|
99
|
-
logger.debug("Processing image: %s", img.name)
|
|
100
96
|
with Image.open(img) as pil_img:
|
|
101
97
|
pil_img.thumbnail((1024, 1024))
|
|
102
98
|
img_buffer = io.BytesIO()
|
|
@@ -109,28 +105,23 @@ class GeminiFlashModel:
|
|
|
109
105
|
})
|
|
110
106
|
headers = {"Content-Type": "application/json"}
|
|
111
107
|
data = {"contents": [{"parts": parts}]}
|
|
112
|
-
logger.info("Sending visual evaluation API request to %s", self.endpoint)
|
|
113
108
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
114
109
|
if response.status_code != 200:
|
|
115
|
-
logger.error("
|
|
110
|
+
logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
116
111
|
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
117
112
|
response_data = response.json()
|
|
118
113
|
if not response_data.get("candidates"):
|
|
119
|
-
logger.error("
|
|
114
|
+
logger.error("API response missing candidates: %s", response_data)
|
|
120
115
|
raise ValueError("No candidates in API response")
|
|
121
116
|
output_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
122
|
-
logger.info("Received visual API response, parsing output")
|
|
123
117
|
score_match = re.search(r"Score:\s*(\d+)(?:/100)?", output_text)
|
|
124
118
|
feedback_match = re.search(r"Feedback:\s*(.*)", output_text, re.DOTALL)
|
|
125
|
-
|
|
119
|
+
return {
|
|
126
120
|
"score": int(score_match.group(1)) if score_match else 0,
|
|
127
121
|
"feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
|
|
128
122
|
}
|
|
129
|
-
logger.info("Visual evaluation completed: Score=%d, Feedback=%s", result["score"], result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
|
|
130
|
-
return result
|
|
131
123
|
|
|
132
124
|
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
133
|
-
logger.info("Parsing API response text")
|
|
134
125
|
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
135
126
|
try:
|
|
136
127
|
lines = text.split("\n")
|
|
@@ -142,136 +133,97 @@ class GeminiFlashModel:
|
|
|
142
133
|
try:
|
|
143
134
|
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
144
135
|
score_found = True
|
|
145
|
-
logger.info("Parsed score: %d", result["score"])
|
|
146
136
|
except ValueError:
|
|
147
137
|
result["issues"].append("Failed to parse score from API response")
|
|
148
|
-
logger.error("Failed to parse score from response: %s", line)
|
|
149
138
|
continue
|
|
150
139
|
elif score_found:
|
|
151
140
|
feedback_lines.append(line)
|
|
152
141
|
if feedback_lines:
|
|
153
142
|
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
154
|
-
logger.debug("Parsed feedback: %s", result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
|
|
155
143
|
return result
|
|
156
144
|
except Exception as e:
|
|
157
145
|
result["issues"].append(str(e))
|
|
158
|
-
logger.error("Error parsing response: %s", str(e))
|
|
159
146
|
return result
|
|
160
147
|
|
|
148
|
+
|
|
161
149
|
class PowerBIProcessor:
|
|
162
150
|
def extract_datamodel(self, pbit_file_path: str) -> Dict:
|
|
163
|
-
logger.info("Extracting data model from PBIT file: %s", pbit_file_path)
|
|
164
151
|
if not os.path.exists(pbit_file_path):
|
|
165
|
-
logger.error("PBIT file does not exist: %s", pbit_file_path)
|
|
166
152
|
raise ProcessingError(f"PBIT file not found: {pbit_file_path}")
|
|
167
153
|
folder_path = os.path.dirname(pbit_file_path)
|
|
168
154
|
file_name = os.path.splitext(os.path.basename(pbit_file_path))[0]
|
|
169
155
|
zip_file = os.path.join(folder_path, f"{file_name}.zip")
|
|
170
156
|
export_path = os.path.join(folder_path, "export")
|
|
171
|
-
logger.debug("Cleaning up temporary files: %s, %s", zip_file, export_path)
|
|
172
157
|
self._cleanup(zip_file, export_path)
|
|
173
158
|
try:
|
|
174
|
-
logger.info("Renaming PBIT to ZIP: %s -> %s", pbit_file_path, zip_file)
|
|
175
159
|
os.rename(pbit_file_path, zip_file)
|
|
176
160
|
if not zipfile.is_zipfile(zip_file):
|
|
177
|
-
logger.error("File is not a valid ZIP: %s", zip_file)
|
|
178
161
|
raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
|
|
179
|
-
logger.info("Extracting ZIP contents to: %s", export_path)
|
|
180
162
|
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
181
163
|
zip_ref.extractall(export_path)
|
|
182
164
|
schema_path = os.path.join(export_path, "DataModelSchema")
|
|
183
165
|
txt_path = os.path.join(export_path, "DataModelSchema.txt")
|
|
184
|
-
logger.debug("Renaming schema file: %s -> %s", schema_path, txt_path)
|
|
185
166
|
os.rename(schema_path, txt_path)
|
|
186
|
-
logger.info("Reading DataModelSchema file: %s", txt_path)
|
|
187
167
|
with open(txt_path, "r", encoding="utf-16-le") as file:
|
|
188
|
-
|
|
189
|
-
logger.info("Successfully extracted data model from PBIT file")
|
|
190
|
-
return data
|
|
168
|
+
return json.load(file)
|
|
191
169
|
except UnicodeDecodeError as e:
|
|
192
170
|
logger.error("Failed to decode DataModelSchema: %s", str(e))
|
|
193
171
|
raise ProcessingError(f"Invalid encoding in DataModelSchema: {e}")
|
|
194
172
|
except Exception as e:
|
|
195
|
-
logger.error("Failed to extract DataModelSchema: %s", str(e))
|
|
196
173
|
raise ProcessingError(f"Failed to extract DataModelSchema: {e}")
|
|
197
174
|
finally:
|
|
198
|
-
logger.debug("Cleaning up temporary files after extraction")
|
|
199
175
|
self._cleanup(zip_file, export_path)
|
|
200
176
|
|
|
201
177
|
def extract_model_data(self, data: Dict) -> Dict:
|
|
202
|
-
logger.info("Extracting model data from data model")
|
|
203
178
|
try:
|
|
204
179
|
tables = data.get("model", {}).get("tables", [])
|
|
205
180
|
relationships = data.get("model", {}).get("relationships", [])
|
|
206
|
-
|
|
181
|
+
return {
|
|
207
182
|
"Calculated Measures": self._get_measures(tables),
|
|
208
183
|
"Tables": self._get_tables_and_columns(tables),
|
|
209
184
|
"Relationships": self._get_relationships(relationships)
|
|
210
185
|
}
|
|
211
|
-
logger.info("Extracted model data: %d measures, %d tables, %d relationships",
|
|
212
|
-
len(result["Calculated Measures"]), len(result["Tables"]), len(result["Relationships"]))
|
|
213
|
-
return result
|
|
214
186
|
except Exception as e:
|
|
215
|
-
logger.error("Failed to extract model data: %s", str(e))
|
|
216
187
|
raise ProcessingError(f"Failed to extract model data: {e}")
|
|
217
188
|
|
|
218
189
|
def process_pdf(self, pdf_path: str, output_dir: str = "outputimages", num_pages: int = 3) -> List[str]:
|
|
219
|
-
logger.info("Processing PDF file: %s", pdf_path)
|
|
220
190
|
try:
|
|
221
191
|
if not os.path.exists(pdf_path):
|
|
222
|
-
logger.error("PDF file does not exist: %s", pdf_path)
|
|
223
192
|
raise ProcessingError(f"PDF file not found: {pdf_path}")
|
|
224
|
-
logger.debug("Creating output directory: %s", output_dir)
|
|
225
193
|
os.makedirs(output_dir, exist_ok=True)
|
|
226
|
-
logger.info("Converting PDF pages to images (max %d pages)", num_pages)
|
|
227
194
|
pages = convert_from_path(pdf_path, first_page=1, last_page=num_pages)
|
|
228
195
|
image_paths = []
|
|
229
196
|
for i, page in enumerate(pages):
|
|
230
197
|
image_path = os.path.join(output_dir, f"page_{i + 1}.png")
|
|
231
|
-
logger.debug("Saving page %d as PNG: %s", i + 1, image_path)
|
|
232
198
|
page.save(image_path, "PNG")
|
|
233
199
|
image_paths.append(image_path)
|
|
234
|
-
logger.info("Successfully processed %d pages from PDF", len(image_paths))
|
|
235
|
-
logger.debug("Removing original PDF file: %s", pdf_path)
|
|
236
200
|
os.remove(pdf_path)
|
|
237
201
|
return image_paths
|
|
238
202
|
except Exception as e:
|
|
239
|
-
logger.error("Failed to process PDF: %s", str(e))
|
|
240
203
|
raise ProcessingError(f"Failed to process PDF: {e}")
|
|
241
204
|
|
|
242
205
|
def extract_zip(self, zip_path: str, extract_path: str) -> tuple[str, str | None]:
|
|
243
|
-
logger.info("Extracting ZIP file: %s", zip_path)
|
|
244
206
|
try:
|
|
245
207
|
if not os.path.exists(zip_path):
|
|
246
|
-
logger.error("ZIP file does not exist: %s", zip_path)
|
|
247
208
|
raise ProcessingError(f"ZIP file not found: {zip_path}")
|
|
248
209
|
if not zipfile.is_zipfile(zip_path):
|
|
249
|
-
logger.error("File is not a valid ZIP: %s", zip_path)
|
|
250
210
|
raise ProcessingError(f"File is not a valid ZIP: {zip_path}")
|
|
251
|
-
logger.debug("Creating extraction directory: %s", extract_path)
|
|
252
211
|
os.makedirs(extract_path, exist_ok=True)
|
|
253
212
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
254
|
-
logger.info("Extracting ZIP contents to: %s", extract_path)
|
|
255
213
|
zip_ref.extractall(extract_path)
|
|
256
214
|
pbit_files = list(Path(extract_path).glob("*.pbit"))
|
|
257
215
|
pdf_files = list(Path(extract_path).glob("*.pdf"))
|
|
258
|
-
logger.info("Found %d PBIT files and %d PDF files in ZIP", len(pbit_files), len(pdf_files))
|
|
259
216
|
if not pbit_files:
|
|
260
|
-
logger.error("No PBIT files found in ZIP")
|
|
261
217
|
raise ProcessingError("ZIP file must contain at least one .pbit file")
|
|
262
218
|
if len(pbit_files) > 1:
|
|
263
|
-
logger.error("Multiple PBIT files found in ZIP: %s", [str(p) for p in pbit_files])
|
|
264
219
|
raise ProcessingError("ZIP file contains multiple .pbit files")
|
|
265
220
|
pdf_path = str(pdf_files[0]) if pdf_files else None
|
|
266
|
-
logger.info("Extracted PBIT file: %s, PDF file: %s", str(pbit_files[0]), pdf_path)
|
|
267
221
|
return str(pbit_files[0]), pdf_path
|
|
268
222
|
except Exception as e:
|
|
269
|
-
logger.error("Failed to extract ZIP file: %s", str(e))
|
|
270
223
|
raise ProcessingError(f"Failed to extract ZIP file: {e}")
|
|
271
224
|
|
|
272
225
|
@staticmethod
|
|
273
226
|
def _get_measures(tables: List[Dict]) -> List[Dict]:
|
|
274
|
-
logger.debug("Extracting measures from tables")
|
|
275
227
|
measures = []
|
|
276
228
|
for table in tables:
|
|
277
229
|
if "measures" in table:
|
|
@@ -279,53 +231,48 @@ class PowerBIProcessor:
|
|
|
279
231
|
measures.append({
|
|
280
232
|
"Table": table["name"],
|
|
281
233
|
"Name": measure["name"],
|
|
282
|
-
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
234
|
+
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
235
|
+
list) else measure.get(
|
|
236
|
+
"expression", ""),
|
|
283
237
|
"FormatString": measure.get("formatString", "")
|
|
284
238
|
})
|
|
285
|
-
logger.debug("Extracted %d measures", len(measures))
|
|
286
239
|
return measures
|
|
287
240
|
|
|
288
241
|
@staticmethod
|
|
289
242
|
def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
|
|
290
|
-
logger.debug("Extracting tables and columns")
|
|
291
243
|
table_info = []
|
|
292
244
|
for table in tables:
|
|
293
|
-
columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"),
|
|
294
|
-
|
|
245
|
+
columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"),
|
|
246
|
+
"Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"}
|
|
247
|
+
for col in table.get("columns", [])]
|
|
248
|
+
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if
|
|
249
|
+
part["source"].get("expression")]
|
|
295
250
|
table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
|
|
296
|
-
logger.debug("Extracted %d tables", len(table_info))
|
|
297
251
|
return table_info
|
|
298
252
|
|
|
299
253
|
@staticmethod
|
|
300
254
|
def _get_relationships(relationships: List[Dict]) -> List[Dict]:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
return result
|
|
255
|
+
return [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"],
|
|
256
|
+
"To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in
|
|
257
|
+
relationships]
|
|
305
258
|
|
|
306
259
|
@staticmethod
|
|
307
260
|
def _cleanup(*paths: str):
|
|
308
|
-
logger.debug("Cleaning up paths: %s", paths)
|
|
309
261
|
for path in paths:
|
|
310
262
|
if os.path.exists(path):
|
|
311
263
|
if os.path.isfile(path):
|
|
312
|
-
logger.debug("Removing file: %s", path)
|
|
313
264
|
os.remove(path)
|
|
314
265
|
else:
|
|
315
|
-
logger.debug("Removing directory: %s", path)
|
|
316
266
|
shutil.rmtree(path, ignore_errors=True)
|
|
317
|
-
|
|
267
|
+
|
|
318
268
|
|
|
319
269
|
class PowerBIEvaluator:
|
|
320
270
|
def __init__(self, api_key: str):
|
|
321
|
-
logger.info("Initializing PowerBIEvaluator")
|
|
322
271
|
self.api_key = api_key
|
|
323
272
|
self.model = GeminiFlashModel(api_key)
|
|
324
273
|
self.processor = PowerBIProcessor()
|
|
325
|
-
logger.info("PowerBIEvaluator initialized successfully")
|
|
326
274
|
|
|
327
275
|
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
328
|
-
logger.info("Starting evaluation for file: %s with %d questions", answer_path, len(questions))
|
|
329
276
|
try:
|
|
330
277
|
_, ext = os.path.splitext(answer_path)
|
|
331
278
|
ext = ext.lower()
|
|
@@ -334,65 +281,72 @@ class PowerBIEvaluator:
|
|
|
334
281
|
pdf_path = None
|
|
335
282
|
|
|
336
283
|
# Handle input file type
|
|
337
|
-
logger.debug("Checking file extension: %s", ext)
|
|
338
284
|
if ext == ".zip":
|
|
339
|
-
logger.info("Processing ZIP file")
|
|
340
285
|
pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
|
|
341
286
|
elif ext == ".pbit":
|
|
342
|
-
logger.info("Processing PBIT file directly")
|
|
343
287
|
pbit_path = answer_path
|
|
344
288
|
pdf_path = None
|
|
345
289
|
else:
|
|
346
|
-
logger.error("Invalid file type: %s",
|
|
290
|
+
logger.error("Invalid file type for Power BI: %s", answer_path)
|
|
347
291
|
return {
|
|
348
292
|
"score": 0,
|
|
349
293
|
"feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
|
|
350
294
|
"issues": ["Invalid file type"],
|
|
351
|
-
"recommendations": []
|
|
295
|
+
"recommendations": [],
|
|
296
|
+
"dax_score": 0,
|
|
297
|
+
"visual_score": 0
|
|
352
298
|
}
|
|
353
299
|
|
|
354
300
|
try:
|
|
355
301
|
# Extract and process the data model from .pbit
|
|
356
|
-
logger.info("Extracting data model from PBIT")
|
|
357
302
|
data_model = self.processor.extract_datamodel(pbit_path)
|
|
358
|
-
logger.info("Extracting model data")
|
|
359
303
|
model_data = self.processor.extract_model_data(data_model)
|
|
360
304
|
answers = [json.dumps(model_data)] * len(questions)
|
|
361
|
-
logger.info("Evaluating DAX with %d question-answer pairs", len(questions))
|
|
362
305
|
dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
363
306
|
|
|
364
307
|
# Initialize result with DAX evaluation
|
|
365
308
|
result = {
|
|
366
|
-
"score":
|
|
309
|
+
"score": 0,
|
|
367
310
|
"feedback": f"DAX Feedback:\n{dax_result['feedback']}",
|
|
368
311
|
"issues": dax_result["issues"],
|
|
369
|
-
"recommendations": dax_result["recommendations"]
|
|
312
|
+
"recommendations": dax_result["recommendations"],
|
|
313
|
+
"dax_score": dax_result["score"], # Store DAX score
|
|
314
|
+
"visual_score": 0 # Default visual score
|
|
370
315
|
}
|
|
371
|
-
logger.info("DAX evaluation completed: Score=%d", dax_result["score"])
|
|
372
316
|
|
|
373
317
|
# Process PDF and evaluate visuals if present
|
|
374
318
|
if pdf_path:
|
|
375
|
-
logger.info("Processing PDF for visual evaluation: %s", pdf_path)
|
|
376
319
|
try:
|
|
377
|
-
|
|
378
|
-
logger.info("Evaluating visuals with question: %s", questions[0])
|
|
320
|
+
self.processor.process_pdf(pdf_path)
|
|
379
321
|
visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
|
|
380
|
-
|
|
322
|
+
# Apply 70% DAX, 30% visuals scoring
|
|
323
|
+
result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
|
|
324
|
+
result["visual_score"] = visual_result["score"] # Store visual score
|
|
381
325
|
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
382
326
|
result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
|
|
383
327
|
result["recommendations"].extend(visual_result.get("recommendations", []))
|
|
384
|
-
logger.info("Visual evaluation completed: Score=%d", visual_result["score"])
|
|
385
328
|
except ProcessingError as e:
|
|
386
329
|
logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
|
|
330
|
+
# Use DAX score only, weighted at 100% if no visuals
|
|
331
|
+
result["score"] = dax_result["score"]
|
|
387
332
|
result["issues"].append(f"Visual evaluation skipped: {str(e)}")
|
|
388
|
-
result["recommendations"].append(
|
|
333
|
+
result["recommendations"].append(
|
|
334
|
+
"Ensure a valid PDF is provided for visual evaluation if intended")
|
|
389
335
|
else:
|
|
390
|
-
|
|
336
|
+
# No PDF provided, use DAX score only
|
|
337
|
+
result["score"] = dax_result["score"]
|
|
338
|
+
result["feedback"] += "\n\nVisual Feedback:\nNo visuals provided for evaluation."
|
|
339
|
+
result["issues"].append("No PDF provided for visual evaluation")
|
|
340
|
+
result["recommendations"].append("Include a PDF with report visuals for complete evaluation")
|
|
341
|
+
|
|
342
|
+
# Print scores with text labels to terminal
|
|
343
|
+
logger.info("[DAX] Score: %d/100", result["dax_score"])
|
|
344
|
+
logger.info("[Visual] Score: %d/100", result["visual_score"])
|
|
345
|
+
logger.info("[Final] Score (70%% DAX, 30%% Visuals): %d/100", result["score"])
|
|
391
346
|
|
|
392
|
-
logger.info("Evaluation completed successfully")
|
|
393
347
|
return result
|
|
394
348
|
finally:
|
|
395
|
-
|
|
349
|
+
# Cleanup temporary files and directories
|
|
396
350
|
self.processor._cleanup(extract_path, "outputimages")
|
|
397
351
|
except Exception as e:
|
|
398
352
|
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
@@ -401,8 +355,11 @@ class PowerBIEvaluator:
|
|
|
401
355
|
"score": 0,
|
|
402
356
|
"feedback": f"Error processing file: {str(e)}",
|
|
403
357
|
"issues": [str(e)],
|
|
404
|
-
"recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"]
|
|
358
|
+
"recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"],
|
|
359
|
+
"dax_score": 0,
|
|
360
|
+
"visual_score": 0
|
|
405
361
|
}
|
|
406
362
|
|
|
363
|
+
|
|
407
364
|
class ProcessingError(Exception):
|
|
408
365
|
pass
|
|
@@ -32,7 +32,7 @@ def prompt_text_python(combined_content):
|
|
|
32
32
|
"- Be honest but supportive\n"
|
|
33
33
|
"- Include specific examples from the provided answers if helpful\n"
|
|
34
34
|
"- Keep language beginner-friendly\n"
|
|
35
|
-
"- Do not give too low marks. You may add from
|
|
35
|
+
"- Do not give too low marks. You may add from 20 up to 25 additional marks for effort or "
|
|
36
36
|
"partial relevance, ensuring the score does not exceed 100."
|
|
37
37
|
)
|
|
38
38
|
|
|
@@ -142,49 +142,23 @@ def prompt_text_ssis(combined_content):
|
|
|
142
142
|
|
|
143
143
|
def prompt_text_powerbi(combined_content: str):
|
|
144
144
|
return (
|
|
145
|
-
"You are a BI professional evaluating Power BI
|
|
146
|
-
"
|
|
147
|
-
"Your evaluation should:\n"
|
|
148
|
-
"- Focus on clarity, correctness, and understanding of Power BI content (DAX, data models, visuals)\n"
|
|
149
|
-
"- Be constructive and encouraging (students are beginners)\n"
|
|
150
|
-
"- Highlight strengths and areas for improvement\n"
|
|
151
|
-
"- Identify major mistakes (e.g., incorrect DAX, poor data modeling, unclear visuals)\n"
|
|
152
|
-
"- Be concise but insightful\n"
|
|
153
|
-
"- Evaluate proper configuration of data model relationships, correctness and logic of DAX formulas, and "
|
|
154
|
-
"clarity of visuals (e.g., appropriate chart types, layout, readability, proper filtering)\n"
|
|
155
|
-
"- Also assess whether the student’s submission demonstrates a proper understanding of "
|
|
156
|
-
"Power BI concepts being tested (e.g., data modeling, DAX calculations, visualization principles), not just technical correctness\n"
|
|
157
|
-
"- If the student's submission is incomplete or too simplistic to fully address the question, "
|
|
158
|
-
"clearly state that it lacks sufficient detail or misses key components, but do not provide "
|
|
159
|
-
"the missing parts or solutions. Instead, suggest they revisit the relevant "
|
|
160
|
-
"Power BI concepts (e.g., data modeling, DAX, or visualization) and encourage deeper exploration\n"
|
|
161
|
-
"- If the student's submission is off-topic or unrelated to the question, "
|
|
162
|
-
"clearly state that the response does not address the question's requirements and "
|
|
163
|
-
"explain why it is irrelevant. Encourage the student to review the question carefully and "
|
|
164
|
-
"focus on the relevant Power BI concepts without providing the correct solution\n"
|
|
165
|
-
"- Do not penalize for advanced efficiency, data source paths, or separate measure tables\n"
|
|
166
|
-
"- Do not lower marks for redundant date tables or missing advanced design features\n\n"
|
|
167
|
-
"Provide feedback in this format:\n\n"
|
|
145
|
+
"You are a BI professional evaluating a beginner student's Power BI submission, including DAX, data models, and visuals.\n\n"
|
|
146
|
+
"Please provide short, clear, and supportive feedback with the following structure:\n\n"
|
|
168
147
|
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
169
148
|
"OVERALL SCORE: <score>/100\n\n"
|
|
170
149
|
"FEEDBACK SUMMARY:\n"
|
|
171
150
|
"- What was done well\n"
|
|
172
151
|
"- What needs improvement\n"
|
|
173
152
|
"- Any major issues (e.g., incorrect DAX, missing visuals, poor relationships)\n\n"
|
|
174
|
-
"
|
|
175
|
-
"-
|
|
176
|
-
"-
|
|
177
|
-
"-
|
|
178
|
-
|
|
153
|
+
"Evaluation guidelines:\n"
|
|
154
|
+
"- Focus on clarity, correctness, and understanding of Power BI concepts (DAX, modeling, visuals)\n"
|
|
155
|
+
"- Be concise, constructive, and beginner-friendly\n"
|
|
156
|
+
"- Highlight strengths and areas to improve\n"
|
|
157
|
+
"- Mention if the submission is incomplete or off-topic, but don't provide missing solutions\n"
|
|
158
|
+
"- Do not penalize for efficiency, missing advanced features, or redundant tables\n"
|
|
159
|
+
"- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low, with a small boost for effort if applicable\n\n"
|
|
179
160
|
f"{combined_content}\n"
|
|
180
|
-
"=== EVALUATION COMPLETE
|
|
181
|
-
"Notes:\n"
|
|
182
|
-
"- Be honest but supportive\n"
|
|
183
|
-
"- Include specific examples from the provided answers if helpful\n"
|
|
184
|
-
"- Keep language beginner-friendly\n"
|
|
185
|
-
"- Score submissions based on alignment with the question, effort, and technical correctness. "
|
|
186
|
-
"Off-topic or incomplete submissions should generally score low (e.g., 10-30/100), "
|
|
187
|
-
"but add from 5 up to 10 marks for effort or partial relevance, ensuring the score does not exceed 100."
|
|
161
|
+
"=== EVALUATION COMPLETE ==="
|
|
188
162
|
)
|
|
189
163
|
|
|
190
164
|
|
|
@@ -226,5 +200,6 @@ def prompt_text_powerbi(combined_content: str):
|
|
|
226
200
|
|
|
227
201
|
|
|
228
202
|
|
|
203
|
+
|
|
229
204
|
|
|
230
205
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: QuantumChecker
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
5
|
Author: Qobiljon
|
|
6
6
|
Author-email: qobiljonkhayrullayev@gmail.com
|
|
@@ -14,6 +14,7 @@ Requires-Dist: tenacity>=8.2.3
|
|
|
14
14
|
Requires-Dist: pdf2image>=1.16.3
|
|
15
15
|
Requires-Dist: python-dotenv>=1.0.0
|
|
16
16
|
Requires-Dist: Pillow>=10.0.0
|
|
17
|
+
Requires-Dist: PyPDF2>=3.0.1
|
|
17
18
|
Dynamic: author
|
|
18
19
|
Dynamic: author-email
|
|
19
20
|
Dynamic: classifier
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="QuantumChecker",
|
|
5
|
-
version="0.2.
|
|
5
|
+
version="0.2.7",
|
|
6
6
|
author="Qobiljon",
|
|
7
7
|
author_email="qobiljonkhayrullayev@gmail.com",
|
|
8
8
|
description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
|
|
@@ -17,6 +17,7 @@ setup(
|
|
|
17
17
|
"pdf2image>=1.16.3",
|
|
18
18
|
"python-dotenv>=1.0.0",
|
|
19
19
|
"Pillow>=10.0.0",
|
|
20
|
+
"PyPDF2>=3.0.1",
|
|
20
21
|
],
|
|
21
22
|
classifiers=[
|
|
22
23
|
"Programming Language :: Python :: 3",
|
|
@@ -24,4 +25,4 @@ setup(
|
|
|
24
25
|
"Operating System :: OS Independent",
|
|
25
26
|
],
|
|
26
27
|
include_package_data=True,
|
|
27
|
-
)
|
|
28
|
+
)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
from pprint import pprint
|
|
2
|
+
|
|
1
3
|
from QuantumCheck import HomeworkEvaluator
|
|
2
4
|
|
|
3
5
|
if __name__ == "__main__":
|
|
4
6
|
evaluator = HomeworkEvaluator()
|
|
5
7
|
|
|
6
8
|
primary_api_key = "AIzaSyD0ptgEixhLLjCWjkyxhqDsUzO16ytQq2c"
|
|
7
|
-
question = "
|
|
9
|
+
question = "Create a dashboard"
|
|
8
10
|
|
|
9
11
|
backup_keys = [
|
|
10
12
|
"BACKUP_KEY_1",
|
|
@@ -15,13 +17,13 @@ if __name__ == "__main__":
|
|
|
15
17
|
]
|
|
16
18
|
|
|
17
19
|
result = evaluator.evaluate_from_content(
|
|
18
|
-
question_content="
|
|
19
|
-
answer_path="../tests/answer/
|
|
20
|
+
question_content="Fuck You",
|
|
21
|
+
answer_path="../tests/answer/real.zip",
|
|
20
22
|
api_key=primary_api_key,
|
|
21
23
|
backup_api_keys=backup_keys
|
|
22
24
|
)
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
pprint(result)
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|