QuantumChecker 0.2.6__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/PKG-INFO +2 -1
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/powerbi_evaluator.py +93 -172
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/prompts.py +12 -37
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumChecker.egg-info/PKG-INFO +2 -1
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumChecker.egg-info/requires.txt +1 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/setup.py +3 -2
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/tests/test.py +6 -4
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/main.py +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/python_evaluator.py +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/sql_evaluator.py +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumCheck/ssis_evaluator.py +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumChecker.egg-info/SOURCES.txt +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/README.md +0 -0
- {quantumchecker-0.2.6 → quantumchecker-0.2.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: QuantumChecker
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
5
|
Author: Qobiljon
|
|
6
6
|
Author-email: qobiljonkhayrullayev@gmail.com
|
|
@@ -14,6 +14,7 @@ Requires-Dist: tenacity>=8.2.3
|
|
|
14
14
|
Requires-Dist: pdf2image>=1.16.3
|
|
15
15
|
Requires-Dist: python-dotenv>=1.0.0
|
|
16
16
|
Requires-Dist: Pillow>=10.0.0
|
|
17
|
+
Requires-Dist: PyPDF2>=3.0.1
|
|
17
18
|
Dynamic: author
|
|
18
19
|
Dynamic: author-email
|
|
19
20
|
Dynamic: classifier
|
|
@@ -13,9 +13,20 @@ from dotenv import load_dotenv
|
|
|
13
13
|
from PIL import Image
|
|
14
14
|
import io
|
|
15
15
|
import base64
|
|
16
|
-
import PyPDF2 # Added for PDF validation
|
|
17
16
|
|
|
18
|
-
|
|
17
|
+
|
|
18
|
+
# Placeholder for prompts.py content
|
|
19
|
+
def prompt_text_powerbi(combined_content: str) -> str:
|
|
20
|
+
return f"""
|
|
21
|
+
Evaluate the following Power BI DAX question-answer pairs for correctness, clarity, and appropriateness.
|
|
22
|
+
Provide an overall score out of 100 and concise feedback. Focus on DAX logic and structure.
|
|
23
|
+
Structure the response as:
|
|
24
|
+
OVERALL SCORE: [SCORE]/100
|
|
25
|
+
[FEEDBACK]
|
|
26
|
+
|
|
27
|
+
{combined_content}
|
|
28
|
+
"""
|
|
29
|
+
|
|
19
30
|
|
|
20
31
|
load_dotenv()
|
|
21
32
|
logger = logging.getLogger(__name__)
|
|
@@ -25,34 +36,27 @@ logging.basicConfig(
|
|
|
25
36
|
handlers=[logging.FileHandler("../powerbi_evaluator.log"), logging.StreamHandler()]
|
|
26
37
|
)
|
|
27
38
|
|
|
39
|
+
|
|
28
40
|
class GeminiFlashModel:
|
|
29
41
|
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
30
|
-
logger.info("Initializing GeminiFlashModel with model: %s", model_name)
|
|
31
42
|
api_key = os.getenv("GEMINI_API_KEY") or api_key
|
|
32
43
|
if not api_key:
|
|
33
|
-
logger.error("API key not found in environment variables or provided argument")
|
|
34
44
|
raise ValueError("API key not found in .env file or environment variables.")
|
|
35
45
|
self.api_key = api_key
|
|
36
46
|
self.model_name = model_name
|
|
37
47
|
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
|
|
38
|
-
logger.info("GeminiFlashModel initialized successfully with endpoint: %s", self.endpoint)
|
|
39
48
|
|
|
40
|
-
@retry(
|
|
41
|
-
|
|
42
|
-
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
43
|
-
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
44
|
-
)
|
|
49
|
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
|
|
50
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,)))
|
|
45
51
|
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
46
|
-
logger.info("Starting evaluation of %d question-answer pairs", len(question_answer_pairs))
|
|
52
|
+
logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
|
|
47
53
|
combined_content = "\n\n".join(
|
|
48
54
|
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
49
55
|
for i, qa in enumerate(question_answer_pairs, 1)
|
|
50
56
|
)
|
|
51
|
-
logger.debug("Prepared combined content for evaluation: %s", combined_content[:100] + "..." if len(combined_content) > 100 else combined_content)
|
|
52
57
|
|
|
53
58
|
headers = {"Content-Type": "application/json"}
|
|
54
59
|
data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
|
|
55
|
-
logger.info("Sending API request to %s", self.endpoint)
|
|
56
60
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
57
61
|
|
|
58
62
|
if response.status_code != 200:
|
|
@@ -63,89 +67,61 @@ class GeminiFlashModel:
|
|
|
63
67
|
logger.error("API response missing candidates: %s", response_data)
|
|
64
68
|
raise ValueError("No candidates in API response")
|
|
65
69
|
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
66
|
-
logger.info("Received API response, parsing generated text")
|
|
67
70
|
return self._parse_response(generated_text)
|
|
68
71
|
|
|
69
|
-
@retry(
|
|
70
|
-
|
|
71
|
-
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
72
|
-
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
73
|
-
)
|
|
72
|
+
@retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
|
|
73
|
+
retry=retry_if_exception_type((requests.exceptions.RequestException,)))
|
|
74
74
|
def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
|
|
75
|
-
logger.info("Starting visual evaluation for question: %s", question)
|
|
76
75
|
folder_path = Path(image_folder)
|
|
77
76
|
images = list(folder_path.glob("*.png"))[:3]
|
|
78
77
|
if not images:
|
|
79
|
-
logger.error("No PNG images found in folder: %s", image_folder)
|
|
80
78
|
raise ProcessingError(f"No PNG images found in {image_folder}")
|
|
81
|
-
logger.info("Found %d PNG images for evaluation: %s", len(images), [img.name for img in images])
|
|
82
|
-
|
|
83
79
|
prompt = (
|
|
84
|
-
"Evaluate the Power BI report visuals based on the
|
|
80
|
+
"Evaluate the Power BI report visuals based on the provided task. The visuals are professional dashboards designed for enterprise use.\n\n"
|
|
85
81
|
f"Task: {question}\n\n"
|
|
86
82
|
f"Screenshots: {[str(img.name) for img in images]}\n\n"
|
|
87
|
-
"
|
|
88
|
-
"- Clarity: Are visuals clear
|
|
89
|
-
"- Appropriateness: Are
|
|
90
|
-
"-
|
|
91
|
-
"-
|
|
92
|
-
"-
|
|
93
|
-
"
|
|
94
|
-
"
|
|
95
|
-
"
|
|
96
|
-
"
|
|
83
|
+
"Evaluate based on the following criteria, assigning a score out of 100:z\n"
|
|
84
|
+
"- Clarity (30%): Are visuals clear, with readable labels, titles, and legends?\n"
|
|
85
|
+
"- Appropriateness (30%): Are chart types (e.g., bar, line, pie) suitable for the data and task?\n"
|
|
86
|
+
"- Color Usage (20%): Are colors consistent, accessible, and visually appealing? Consider contrast and colorblind accessibility.\n"
|
|
87
|
+
"- Interactivity (20%): Do visible slicers, filters, or tooltips enhance usability and data exploration?\n\n"
|
|
88
|
+
"Provide a score (0-100) that reflects the overall quality, considering the enterprise context. Avoid overly harsh penalties for minor issues.\n"
|
|
89
|
+
"Provide concise, supportive feedback for beginners, highlighting strengths and areas for improvement.\n\n"
|
|
90
|
+
"Structure the response as:\n"
|
|
91
|
+
"Score: [SCORE]/100\n"
|
|
92
|
+
"Feedback: [FEEDBACK]"
|
|
97
93
|
)
|
|
98
94
|
parts = [{"text": prompt}]
|
|
99
95
|
for img in images:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
"inline_data": {
|
|
111
|
-
"mime_type": "image/png",
|
|
112
|
-
"data": base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
|
113
|
-
}
|
|
114
|
-
})
|
|
115
|
-
except Exception as e:
|
|
116
|
-
logger.error("Failed to process image %s: %s", img.name, str(e))
|
|
117
|
-
raise ProcessingError(f"Failed to process image {img.name}: {str(e)}")
|
|
96
|
+
with Image.open(img) as pil_img:
|
|
97
|
+
pil_img.thumbnail((1024, 1024))
|
|
98
|
+
img_buffer = io.BytesIO()
|
|
99
|
+
pil_img.save(img_buffer, format="PNG")
|
|
100
|
+
parts.append({
|
|
101
|
+
"inline_data": {
|
|
102
|
+
"mime_type": "image/png",
|
|
103
|
+
"data": base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
|
104
|
+
}
|
|
105
|
+
})
|
|
118
106
|
headers = {"Content-Type": "application/json"}
|
|
119
107
|
data = {"contents": [{"parts": parts}]}
|
|
120
|
-
logger.info("Sending visual evaluation API request to %s", self.endpoint)
|
|
121
108
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
122
109
|
if response.status_code != 200:
|
|
123
|
-
logger.error("
|
|
110
|
+
logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
124
111
|
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
125
112
|
response_data = response.json()
|
|
126
113
|
if not response_data.get("candidates"):
|
|
127
|
-
logger.error("
|
|
114
|
+
logger.error("API response missing candidates: %s", response_data)
|
|
128
115
|
raise ValueError("No candidates in API response")
|
|
129
116
|
output_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
130
|
-
logger.info("Received visual API response, parsing output")
|
|
131
117
|
score_match = re.search(r"Score:\s*(\d+)(?:/100)?", output_text)
|
|
132
118
|
feedback_match = re.search(r"Feedback:\s*(.*)", output_text, re.DOTALL)
|
|
133
|
-
|
|
119
|
+
return {
|
|
134
120
|
"score": int(score_match.group(1)) if score_match else 0,
|
|
135
|
-
"feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
|
|
136
|
-
"issues": []
|
|
121
|
+
"feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
|
|
137
122
|
}
|
|
138
|
-
if not score_match:
|
|
139
|
-
result["issues"].append("Failed to parse score from visual API response")
|
|
140
|
-
logger.warning("Failed to parse score from visual API response")
|
|
141
|
-
if not feedback_match:
|
|
142
|
-
result["issues"].append("Failed to parse feedback from visual API response")
|
|
143
|
-
logger.warning("Failed to parse feedback from visual API response")
|
|
144
|
-
logger.info("Visual evaluation completed: Score=%d, Feedback=%s", result["score"], result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
|
|
145
|
-
return result
|
|
146
123
|
|
|
147
124
|
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
148
|
-
logger.info("Parsing API response text")
|
|
149
125
|
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
150
126
|
try:
|
|
151
127
|
lines = text.split("\n")
|
|
@@ -157,150 +133,97 @@ class GeminiFlashModel:
|
|
|
157
133
|
try:
|
|
158
134
|
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
159
135
|
score_found = True
|
|
160
|
-
logger.info("Parsed score: %d", result["score"])
|
|
161
136
|
except ValueError:
|
|
162
137
|
result["issues"].append("Failed to parse score from API response")
|
|
163
|
-
logger.error("Failed to parse score from response: %s", line)
|
|
164
138
|
continue
|
|
165
139
|
elif score_found:
|
|
166
140
|
feedback_lines.append(line)
|
|
167
141
|
if feedback_lines:
|
|
168
142
|
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
169
|
-
logger.debug("Parsed feedback: %s", result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
|
|
170
143
|
return result
|
|
171
144
|
except Exception as e:
|
|
172
145
|
result["issues"].append(str(e))
|
|
173
|
-
logger.error("Error parsing response: %s", str(e))
|
|
174
146
|
return result
|
|
175
147
|
|
|
148
|
+
|
|
176
149
|
class PowerBIProcessor:
|
|
177
150
|
def extract_datamodel(self, pbit_file_path: str) -> Dict:
|
|
178
|
-
logger.info("Extracting data model from PBIT file: %s", pbit_file_path)
|
|
179
151
|
if not os.path.exists(pbit_file_path):
|
|
180
|
-
logger.error("PBIT file does not exist: %s", pbit_file_path)
|
|
181
152
|
raise ProcessingError(f"PBIT file not found: {pbit_file_path}")
|
|
182
153
|
folder_path = os.path.dirname(pbit_file_path)
|
|
183
154
|
file_name = os.path.splitext(os.path.basename(pbit_file_path))[0]
|
|
184
155
|
zip_file = os.path.join(folder_path, f"{file_name}.zip")
|
|
185
156
|
export_path = os.path.join(folder_path, "export")
|
|
186
|
-
logger.debug("Cleaning up temporary files: %s, %s", zip_file, export_path)
|
|
187
157
|
self._cleanup(zip_file, export_path)
|
|
188
158
|
try:
|
|
189
|
-
logger.info("Renaming PBIT to ZIP: %s -> %s", pbit_file_path, zip_file)
|
|
190
159
|
os.rename(pbit_file_path, zip_file)
|
|
191
160
|
if not zipfile.is_zipfile(zip_file):
|
|
192
|
-
logger.error("File is not a valid ZIP: %s", zip_file)
|
|
193
161
|
raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
|
|
194
|
-
logger.info("Extracting ZIP contents to: %s", export_path)
|
|
195
162
|
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
196
163
|
zip_ref.extractall(export_path)
|
|
197
164
|
schema_path = os.path.join(export_path, "DataModelSchema")
|
|
198
165
|
txt_path = os.path.join(export_path, "DataModelSchema.txt")
|
|
199
|
-
logger.debug("Renaming schema file: %s -> %s", schema_path, txt_path)
|
|
200
166
|
os.rename(schema_path, txt_path)
|
|
201
|
-
logger.info("Reading DataModelSchema file: %s", txt_path)
|
|
202
167
|
with open(txt_path, "r", encoding="utf-16-le") as file:
|
|
203
|
-
|
|
204
|
-
logger.info("Successfully extracted data model from PBIT file")
|
|
205
|
-
return data
|
|
168
|
+
return json.load(file)
|
|
206
169
|
except UnicodeDecodeError as e:
|
|
207
170
|
logger.error("Failed to decode DataModelSchema: %s", str(e))
|
|
208
171
|
raise ProcessingError(f"Invalid encoding in DataModelSchema: {e}")
|
|
209
172
|
except Exception as e:
|
|
210
|
-
logger.error("Failed to extract DataModelSchema: %s", str(e))
|
|
211
173
|
raise ProcessingError(f"Failed to extract DataModelSchema: {e}")
|
|
212
174
|
finally:
|
|
213
|
-
logger.debug("Cleaning up temporary files after extraction")
|
|
214
175
|
self._cleanup(zip_file, export_path)
|
|
215
176
|
|
|
216
177
|
def extract_model_data(self, data: Dict) -> Dict:
|
|
217
|
-
logger.info("Extracting model data from data model")
|
|
218
178
|
try:
|
|
219
179
|
tables = data.get("model", {}).get("tables", [])
|
|
220
180
|
relationships = data.get("model", {}).get("relationships", [])
|
|
221
|
-
|
|
181
|
+
return {
|
|
222
182
|
"Calculated Measures": self._get_measures(tables),
|
|
223
183
|
"Tables": self._get_tables_and_columns(tables),
|
|
224
184
|
"Relationships": self._get_relationships(relationships)
|
|
225
185
|
}
|
|
226
|
-
logger.info("Extracted model data: %d measures, %d tables, %d relationships",
|
|
227
|
-
len(result["Calculated Measures"]), len(result["Tables"]), len(result["Relationships"]))
|
|
228
|
-
return result
|
|
229
186
|
except Exception as e:
|
|
230
|
-
logger.error("Failed to extract model data: %s", str(e))
|
|
231
187
|
raise ProcessingError(f"Failed to extract model data: {e}")
|
|
232
188
|
|
|
233
189
|
def process_pdf(self, pdf_path: str, output_dir: str = "outputimages", num_pages: int = 3) -> List[str]:
|
|
234
|
-
logger.info("Processing PDF file: %s", pdf_path)
|
|
235
190
|
try:
|
|
236
191
|
if not os.path.exists(pdf_path):
|
|
237
|
-
logger.error("PDF file does not exist: %s", pdf_path)
|
|
238
192
|
raise ProcessingError(f"PDF file not found: {pdf_path}")
|
|
239
|
-
# Validate PDF
|
|
240
|
-
try:
|
|
241
|
-
with open(pdf_path, "rb") as f:
|
|
242
|
-
pdf_reader = PyPDF2.PdfReader(f)
|
|
243
|
-
if len(pdf_reader.pages) == 0:
|
|
244
|
-
logger.error("PDF is empty: %s", pdf_path)
|
|
245
|
-
raise ProcessingError(f"PDF is empty: {pdf_path}")
|
|
246
|
-
logger.info("PDF validated, contains %d pages", len(pdf_reader.pages))
|
|
247
|
-
except Exception as e:
|
|
248
|
-
logger.error("Invalid PDF file: %s", str(e))
|
|
249
|
-
raise ProcessingError(f"Invalid PDF file: {str(e)}")
|
|
250
|
-
logger.debug("Creating output directory: %s", output_dir)
|
|
251
193
|
os.makedirs(output_dir, exist_ok=True)
|
|
252
|
-
|
|
253
|
-
pages = convert_from_path(pdf_path, first_page=1, last_page=min(num_pages, len(pdf_reader.pages)))
|
|
254
|
-
if not pages:
|
|
255
|
-
logger.error("No pages converted from PDF: %s", pdf_path)
|
|
256
|
-
raise ProcessingError(f"No pages converted from PDF: {pdf_path}")
|
|
194
|
+
pages = convert_from_path(pdf_path, first_page=1, last_page=num_pages)
|
|
257
195
|
image_paths = []
|
|
258
196
|
for i, page in enumerate(pages):
|
|
259
197
|
image_path = os.path.join(output_dir, f"page_{i + 1}.png")
|
|
260
|
-
logger.debug("Saving page %d as PNG: %s", i + 1, image_path)
|
|
261
198
|
page.save(image_path, "PNG")
|
|
262
199
|
image_paths.append(image_path)
|
|
263
|
-
|
|
200
|
+
os.remove(pdf_path)
|
|
264
201
|
return image_paths
|
|
265
202
|
except Exception as e:
|
|
266
|
-
|
|
267
|
-
raise ProcessingError(f"Failed to process PDF: {str(e)}")
|
|
268
|
-
finally:
|
|
269
|
-
logger.debug("Not removing PDF file to allow debugging: %s", pdf_path)
|
|
203
|
+
raise ProcessingError(f"Failed to process PDF: {e}")
|
|
270
204
|
|
|
271
205
|
def extract_zip(self, zip_path: str, extract_path: str) -> tuple[str, str | None]:
|
|
272
|
-
logger.info("Extracting ZIP file: %s", zip_path)
|
|
273
206
|
try:
|
|
274
207
|
if not os.path.exists(zip_path):
|
|
275
|
-
logger.error("ZIP file does not exist: %s", zip_path)
|
|
276
208
|
raise ProcessingError(f"ZIP file not found: {zip_path}")
|
|
277
209
|
if not zipfile.is_zipfile(zip_path):
|
|
278
|
-
logger.error("File is not a valid ZIP: %s", zip_path)
|
|
279
210
|
raise ProcessingError(f"File is not a valid ZIP: {zip_path}")
|
|
280
|
-
logger.debug("Creating extraction directory: %s", extract_path)
|
|
281
211
|
os.makedirs(extract_path, exist_ok=True)
|
|
282
212
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
283
|
-
logger.info("Extracting ZIP contents to: %s", extract_path)
|
|
284
213
|
zip_ref.extractall(extract_path)
|
|
285
214
|
pbit_files = list(Path(extract_path).glob("*.pbit"))
|
|
286
215
|
pdf_files = list(Path(extract_path).glob("*.pdf"))
|
|
287
|
-
logger.info("Found %d PBIT files and %d PDF files in ZIP", len(pbit_files), len(pdf_files))
|
|
288
216
|
if not pbit_files:
|
|
289
|
-
logger.error("No PBIT files found in ZIP")
|
|
290
217
|
raise ProcessingError("ZIP file must contain at least one .pbit file")
|
|
291
218
|
if len(pbit_files) > 1:
|
|
292
|
-
logger.error("Multiple PBIT files found in ZIP: %s", [str(p) for p in pbit_files])
|
|
293
219
|
raise ProcessingError("ZIP file contains multiple .pbit files")
|
|
294
220
|
pdf_path = str(pdf_files[0]) if pdf_files else None
|
|
295
|
-
logger.info("Extracted PBIT file: %s, PDF file: %s", str(pbit_files[0]), pdf_path)
|
|
296
221
|
return str(pbit_files[0]), pdf_path
|
|
297
222
|
except Exception as e:
|
|
298
|
-
logger.error("Failed to extract ZIP file: %s", str(e))
|
|
299
223
|
raise ProcessingError(f"Failed to extract ZIP file: {e}")
|
|
300
224
|
|
|
301
225
|
@staticmethod
|
|
302
226
|
def _get_measures(tables: List[Dict]) -> List[Dict]:
|
|
303
|
-
logger.debug("Extracting measures from tables")
|
|
304
227
|
measures = []
|
|
305
228
|
for table in tables:
|
|
306
229
|
if "measures" in table:
|
|
@@ -308,53 +231,48 @@ class PowerBIProcessor:
|
|
|
308
231
|
measures.append({
|
|
309
232
|
"Table": table["name"],
|
|
310
233
|
"Name": measure["name"],
|
|
311
|
-
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
234
|
+
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
235
|
+
list) else measure.get(
|
|
236
|
+
"expression", ""),
|
|
312
237
|
"FormatString": measure.get("formatString", "")
|
|
313
238
|
})
|
|
314
|
-
logger.debug("Extracted %d measures", len(measures))
|
|
315
239
|
return measures
|
|
316
240
|
|
|
317
241
|
@staticmethod
|
|
318
242
|
def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
|
|
319
|
-
logger.debug("Extracting tables and columns")
|
|
320
243
|
table_info = []
|
|
321
244
|
for table in tables:
|
|
322
|
-
columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"),
|
|
323
|
-
|
|
245
|
+
columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"),
|
|
246
|
+
"Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"}
|
|
247
|
+
for col in table.get("columns", [])]
|
|
248
|
+
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if
|
|
249
|
+
part["source"].get("expression")]
|
|
324
250
|
table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
|
|
325
|
-
logger.debug("Extracted %d tables", len(table_info))
|
|
326
251
|
return table_info
|
|
327
252
|
|
|
328
253
|
@staticmethod
|
|
329
254
|
def _get_relationships(relationships: List[Dict]) -> List[Dict]:
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
return result
|
|
255
|
+
return [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"],
|
|
256
|
+
"To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in
|
|
257
|
+
relationships]
|
|
334
258
|
|
|
335
259
|
@staticmethod
|
|
336
260
|
def _cleanup(*paths: str):
|
|
337
|
-
logger.debug("Cleaning up paths: %s", paths)
|
|
338
261
|
for path in paths:
|
|
339
262
|
if os.path.exists(path):
|
|
340
263
|
if os.path.isfile(path):
|
|
341
|
-
logger.debug("Removing file: %s", path)
|
|
342
264
|
os.remove(path)
|
|
343
265
|
else:
|
|
344
|
-
logger.debug("Removing directory: %s", path)
|
|
345
266
|
shutil.rmtree(path, ignore_errors=True)
|
|
346
|
-
|
|
267
|
+
|
|
347
268
|
|
|
348
269
|
class PowerBIEvaluator:
|
|
349
270
|
def __init__(self, api_key: str):
|
|
350
|
-
logger.info("Initializing PowerBIEvaluator")
|
|
351
271
|
self.api_key = api_key
|
|
352
272
|
self.model = GeminiFlashModel(api_key)
|
|
353
273
|
self.processor = PowerBIProcessor()
|
|
354
|
-
logger.info("PowerBIEvaluator initialized successfully")
|
|
355
274
|
|
|
356
275
|
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
357
|
-
logger.info("Starting evaluation for file: %s with %d questions", answer_path, len(questions))
|
|
358
276
|
try:
|
|
359
277
|
_, ext = os.path.splitext(answer_path)
|
|
360
278
|
ext = ext.lower()
|
|
@@ -363,72 +281,72 @@ class PowerBIEvaluator:
|
|
|
363
281
|
pdf_path = None
|
|
364
282
|
|
|
365
283
|
# Handle input file type
|
|
366
|
-
logger.debug("Checking file extension: %s", ext)
|
|
367
284
|
if ext == ".zip":
|
|
368
|
-
logger.info("Processing ZIP file")
|
|
369
285
|
pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
|
|
370
286
|
elif ext == ".pbit":
|
|
371
|
-
logger.info("Processing PBIT file directly")
|
|
372
287
|
pbit_path = answer_path
|
|
373
288
|
pdf_path = None
|
|
374
289
|
else:
|
|
375
|
-
logger.error("Invalid file type: %s",
|
|
290
|
+
logger.error("Invalid file type for Power BI: %s", answer_path)
|
|
376
291
|
return {
|
|
377
292
|
"score": 0,
|
|
378
293
|
"feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
|
|
379
294
|
"issues": ["Invalid file type"],
|
|
380
|
-
"recommendations": []
|
|
295
|
+
"recommendations": [],
|
|
296
|
+
"dax_score": 0,
|
|
297
|
+
"visual_score": 0
|
|
381
298
|
}
|
|
382
299
|
|
|
383
300
|
try:
|
|
384
301
|
# Extract and process the data model from .pbit
|
|
385
|
-
logger.info("Extracting data model from PBIT")
|
|
386
302
|
data_model = self.processor.extract_datamodel(pbit_path)
|
|
387
|
-
logger.info("Extracting model data")
|
|
388
303
|
model_data = self.processor.extract_model_data(data_model)
|
|
389
304
|
answers = [json.dumps(model_data)] * len(questions)
|
|
390
|
-
logger.info("Evaluating DAX with %d question-answer pairs", len(questions))
|
|
391
305
|
dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
392
306
|
|
|
393
307
|
# Initialize result with DAX evaluation
|
|
394
308
|
result = {
|
|
395
|
-
"score":
|
|
309
|
+
"score": 0,
|
|
396
310
|
"feedback": f"DAX Feedback:\n{dax_result['feedback']}",
|
|
397
311
|
"issues": dax_result["issues"],
|
|
398
|
-
"recommendations": dax_result["recommendations"]
|
|
312
|
+
"recommendations": dax_result["recommendations"],
|
|
313
|
+
"dax_score": dax_result["score"], # Store DAX score
|
|
314
|
+
"visual_score": 0 # Default visual score
|
|
399
315
|
}
|
|
400
|
-
logger.info("DAX evaluation completed: Score=%d", dax_result["score"])
|
|
401
316
|
|
|
402
317
|
# Process PDF and evaluate visuals if present
|
|
403
318
|
if pdf_path:
|
|
404
|
-
logger.info("Processing PDF for visual evaluation: %s", pdf_path)
|
|
405
319
|
try:
|
|
406
|
-
|
|
407
|
-
if not image_paths:
|
|
408
|
-
logger.error("No images generated from PDF: %s", pdf_path)
|
|
409
|
-
raise ProcessingError("No images generated from PDF")
|
|
410
|
-
logger.info("Evaluating visuals with question: %s", questions[0])
|
|
320
|
+
self.processor.process_pdf(pdf_path)
|
|
411
321
|
visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
|
|
412
|
-
|
|
322
|
+
# Apply 70% DAX, 30% visuals scoring
|
|
323
|
+
result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
|
|
324
|
+
result["visual_score"] = visual_result["score"] # Store visual score
|
|
413
325
|
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
414
326
|
result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
|
|
415
327
|
result["recommendations"].extend(visual_result.get("recommendations", []))
|
|
416
|
-
logger.info("Visual evaluation completed: Score=%d", visual_result["score"])
|
|
417
328
|
except ProcessingError as e:
|
|
418
329
|
logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
|
|
330
|
+
# Use DAX score only, weighted at 100% if no visuals
|
|
331
|
+
result["score"] = dax_result["score"]
|
|
419
332
|
result["issues"].append(f"Visual evaluation skipped: {str(e)}")
|
|
420
|
-
result["recommendations"].append(
|
|
421
|
-
|
|
422
|
-
logger.error("Unexpected error during visual evaluation: %s", str(e))
|
|
423
|
-
result["issues"].append(f"Visual evaluation failed: {str(e)}")
|
|
424
|
-
result["recommendations"].append("Check PDF file and API connectivity")
|
|
333
|
+
result["recommendations"].append(
|
|
334
|
+
"Ensure a valid PDF is provided for visual evaluation if intended")
|
|
425
335
|
else:
|
|
426
|
-
|
|
336
|
+
# No PDF provided, use DAX score only
|
|
337
|
+
result["score"] = dax_result["score"]
|
|
338
|
+
result["feedback"] += "\n\nVisual Feedback:\nNo visuals provided for evaluation."
|
|
339
|
+
result["issues"].append("No PDF provided for visual evaluation")
|
|
340
|
+
result["recommendations"].append("Include a PDF with report visuals for complete evaluation")
|
|
341
|
+
|
|
342
|
+
# Print scores with text labels to terminal
|
|
343
|
+
logger.info("[DAX] Score: %d/100", result["dax_score"])
|
|
344
|
+
logger.info("[Visual] Score: %d/100", result["visual_score"])
|
|
345
|
+
logger.info("[Final] Score (70%% DAX, 30%% Visuals): %d/100", result["score"])
|
|
427
346
|
|
|
428
|
-
logger.info("Evaluation completed successfully")
|
|
429
347
|
return result
|
|
430
348
|
finally:
|
|
431
|
-
|
|
349
|
+
# Cleanup temporary files and directories
|
|
432
350
|
self.processor._cleanup(extract_path, "outputimages")
|
|
433
351
|
except Exception as e:
|
|
434
352
|
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
@@ -437,8 +355,11 @@ class PowerBIEvaluator:
|
|
|
437
355
|
"score": 0,
|
|
438
356
|
"feedback": f"Error processing file: {str(e)}",
|
|
439
357
|
"issues": [str(e)],
|
|
440
|
-
"recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"]
|
|
358
|
+
"recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"],
|
|
359
|
+
"dax_score": 0,
|
|
360
|
+
"visual_score": 0
|
|
441
361
|
}
|
|
442
362
|
|
|
363
|
+
|
|
443
364
|
class ProcessingError(Exception):
|
|
444
365
|
pass
|
|
@@ -32,7 +32,7 @@ def prompt_text_python(combined_content):
|
|
|
32
32
|
"- Be honest but supportive\n"
|
|
33
33
|
"- Include specific examples from the provided answers if helpful\n"
|
|
34
34
|
"- Keep language beginner-friendly\n"
|
|
35
|
-
"- Do not give too low marks. You may add from
|
|
35
|
+
"- Do not give too low marks. You may add from 20 up to 25 additional marks for effort or "
|
|
36
36
|
"partial relevance, ensuring the score does not exceed 100."
|
|
37
37
|
)
|
|
38
38
|
|
|
@@ -142,49 +142,23 @@ def prompt_text_ssis(combined_content):
|
|
|
142
142
|
|
|
143
143
|
def prompt_text_powerbi(combined_content: str):
|
|
144
144
|
return (
|
|
145
|
-
"You are a BI professional evaluating Power BI
|
|
146
|
-
"
|
|
147
|
-
"Your evaluation should:\n"
|
|
148
|
-
"- Focus on clarity, correctness, and understanding of Power BI content (DAX, data models, visuals)\n"
|
|
149
|
-
"- Be constructive and encouraging (students are beginners)\n"
|
|
150
|
-
"- Highlight strengths and areas for improvement\n"
|
|
151
|
-
"- Identify major mistakes (e.g., incorrect DAX, poor data modeling, unclear visuals)\n"
|
|
152
|
-
"- Be concise but insightful\n"
|
|
153
|
-
"- Evaluate proper configuration of data model relationships, correctness and logic of DAX formulas, and "
|
|
154
|
-
"clarity of visuals (e.g., appropriate chart types, layout, readability, proper filtering)\n"
|
|
155
|
-
"- Also assess whether the student’s submission demonstrates a proper understanding of "
|
|
156
|
-
"Power BI concepts being tested (e.g., data modeling, DAX calculations, visualization principles), not just technical correctness\n"
|
|
157
|
-
"- If the student's submission is incomplete or too simplistic to fully address the question, "
|
|
158
|
-
"clearly state that it lacks sufficient detail or misses key components, but do not provide "
|
|
159
|
-
"the missing parts or solutions. Instead, suggest they revisit the relevant "
|
|
160
|
-
"Power BI concepts (e.g., data modeling, DAX, or visualization) and encourage deeper exploration\n"
|
|
161
|
-
"- If the student's submission is off-topic or unrelated to the question, "
|
|
162
|
-
"clearly state that the response does not address the question's requirements and "
|
|
163
|
-
"explain why it is irrelevant. Encourage the student to review the question carefully and "
|
|
164
|
-
"focus on the relevant Power BI concepts without providing the correct solution\n"
|
|
165
|
-
"- Do not penalize for advanced efficiency, data source paths, or separate measure tables\n"
|
|
166
|
-
"- Do not lower marks for redundant date tables or missing advanced design features\n\n"
|
|
167
|
-
"Provide feedback in this format:\n\n"
|
|
145
|
+
"You are a BI professional evaluating a beginner student's Power BI submission, including DAX, data models, and visuals.\n\n"
|
|
146
|
+
"Please provide short, clear, and supportive feedback with the following structure:\n\n"
|
|
168
147
|
"=== COMPREHENSIVE EVALUATION ===\n\n"
|
|
169
148
|
"OVERALL SCORE: <score>/100\n\n"
|
|
170
149
|
"FEEDBACK SUMMARY:\n"
|
|
171
150
|
"- What was done well\n"
|
|
172
151
|
"- What needs improvement\n"
|
|
173
152
|
"- Any major issues (e.g., incorrect DAX, missing visuals, poor relationships)\n\n"
|
|
174
|
-
"
|
|
175
|
-
"-
|
|
176
|
-
"-
|
|
177
|
-
"-
|
|
178
|
-
|
|
153
|
+
"Evaluation guidelines:\n"
|
|
154
|
+
"- Focus on clarity, correctness, and understanding of Power BI concepts (DAX, modeling, visuals)\n"
|
|
155
|
+
"- Be concise, constructive, and beginner-friendly\n"
|
|
156
|
+
"- Highlight strengths and areas to improve\n"
|
|
157
|
+
"- Mention if the submission is incomplete or off-topic, but don't provide missing solutions\n"
|
|
158
|
+
"- Do not penalize for efficiency, missing advanced features, or redundant tables\n"
|
|
159
|
+
"- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low, with a small boost for effort if applicable\n\n"
|
|
179
160
|
f"{combined_content}\n"
|
|
180
|
-
"=== EVALUATION COMPLETE
|
|
181
|
-
"Notes:\n"
|
|
182
|
-
"- Be honest but supportive\n"
|
|
183
|
-
"- Include specific examples from the provided answers if helpful\n"
|
|
184
|
-
"- Keep language beginner-friendly\n"
|
|
185
|
-
"- Score submissions based on alignment with the question, effort, and technical correctness. "
|
|
186
|
-
"Off-topic or incomplete submissions should generally score low (e.g., 10-30/100), "
|
|
187
|
-
"but add from 5 up to 10 marks for effort or partial relevance, ensuring the score does not exceed 100."
|
|
161
|
+
"=== EVALUATION COMPLETE ==="
|
|
188
162
|
)
|
|
189
163
|
|
|
190
164
|
|
|
@@ -226,5 +200,6 @@ def prompt_text_powerbi(combined_content: str):
|
|
|
226
200
|
|
|
227
201
|
|
|
228
202
|
|
|
203
|
+
|
|
229
204
|
|
|
230
205
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: QuantumChecker
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
|
|
5
5
|
Author: Qobiljon
|
|
6
6
|
Author-email: qobiljonkhayrullayev@gmail.com
|
|
@@ -14,6 +14,7 @@ Requires-Dist: tenacity>=8.2.3
|
|
|
14
14
|
Requires-Dist: pdf2image>=1.16.3
|
|
15
15
|
Requires-Dist: python-dotenv>=1.0.0
|
|
16
16
|
Requires-Dist: Pillow>=10.0.0
|
|
17
|
+
Requires-Dist: PyPDF2>=3.0.1
|
|
17
18
|
Dynamic: author
|
|
18
19
|
Dynamic: author-email
|
|
19
20
|
Dynamic: classifier
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="QuantumChecker",
|
|
5
|
-
version="0.2.
|
|
5
|
+
version="0.2.7",
|
|
6
6
|
author="Qobiljon",
|
|
7
7
|
author_email="qobiljonkhayrullayev@gmail.com",
|
|
8
8
|
description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
|
|
@@ -17,6 +17,7 @@ setup(
|
|
|
17
17
|
"pdf2image>=1.16.3",
|
|
18
18
|
"python-dotenv>=1.0.0",
|
|
19
19
|
"Pillow>=10.0.0",
|
|
20
|
+
"PyPDF2>=3.0.1",
|
|
20
21
|
],
|
|
21
22
|
classifiers=[
|
|
22
23
|
"Programming Language :: Python :: 3",
|
|
@@ -24,4 +25,4 @@ setup(
|
|
|
24
25
|
"Operating System :: OS Independent",
|
|
25
26
|
],
|
|
26
27
|
include_package_data=True,
|
|
27
|
-
)
|
|
28
|
+
)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
+
from pprint import pprint
|
|
2
|
+
|
|
1
3
|
from QuantumCheck import HomeworkEvaluator
|
|
2
4
|
|
|
3
5
|
if __name__ == "__main__":
|
|
4
6
|
evaluator = HomeworkEvaluator()
|
|
5
7
|
|
|
6
8
|
primary_api_key = "AIzaSyD0ptgEixhLLjCWjkyxhqDsUzO16ytQq2c"
|
|
7
|
-
question = "
|
|
9
|
+
question = "Create a dashboard"
|
|
8
10
|
|
|
9
11
|
backup_keys = [
|
|
10
12
|
"BACKUP_KEY_1",
|
|
@@ -15,13 +17,13 @@ if __name__ == "__main__":
|
|
|
15
17
|
]
|
|
16
18
|
|
|
17
19
|
result = evaluator.evaluate_from_content(
|
|
18
|
-
question_content="
|
|
19
|
-
answer_path="../tests/answer/
|
|
20
|
+
question_content="Fuck You",
|
|
21
|
+
answer_path="../tests/answer/real.zip",
|
|
20
22
|
api_key=primary_api_key,
|
|
21
23
|
backup_api_keys=backup_keys
|
|
22
24
|
)
|
|
23
25
|
|
|
24
|
-
|
|
26
|
+
pprint(result)
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|