QuantumChecker 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: QuantumChecker
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
5
5
  Author: Qobiljon
6
6
  Author-email: qobiljonkhayrullayev@gmail.com
@@ -14,6 +14,7 @@ Requires-Dist: tenacity>=8.2.3
14
14
  Requires-Dist: pdf2image>=1.16.3
15
15
  Requires-Dist: python-dotenv>=1.0.0
16
16
  Requires-Dist: Pillow>=10.0.0
17
+ Requires-Dist: PyPDF2>=3.0.1
17
18
  Dynamic: author
18
19
  Dynamic: author-email
19
20
  Dynamic: classifier
@@ -13,9 +13,20 @@ from dotenv import load_dotenv
13
13
  from PIL import Image
14
14
  import io
15
15
  import base64
16
- import PyPDF2 # Added for PDF validation
17
16
 
18
- from .prompts import prompt_text_powerbi
17
+
18
+ # Placeholder for prompts.py content
19
+ def prompt_text_powerbi(combined_content: str) -> str:
20
+ return f"""
21
+ Evaluate the following Power BI DAX question-answer pairs for correctness, clarity, and appropriateness.
22
+ Provide an overall score out of 100 and concise feedback. Focus on DAX logic and structure.
23
+ Structure the response as:
24
+ OVERALL SCORE: [SCORE]/100
25
+ [FEEDBACK]
26
+
27
+ {combined_content}
28
+ """
29
+
19
30
 
20
31
  load_dotenv()
21
32
  logger = logging.getLogger(__name__)
@@ -25,34 +36,27 @@ logging.basicConfig(
25
36
  handlers=[logging.FileHandler("../powerbi_evaluator.log"), logging.StreamHandler()]
26
37
  )
27
38
 
39
+
28
40
  class GeminiFlashModel:
29
41
  def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
30
- logger.info("Initializing GeminiFlashModel with model: %s", model_name)
31
42
  api_key = os.getenv("GEMINI_API_KEY") or api_key
32
43
  if not api_key:
33
- logger.error("API key not found in environment variables or provided argument")
34
44
  raise ValueError("API key not found in .env file or environment variables.")
35
45
  self.api_key = api_key
36
46
  self.model_name = model_name
37
47
  self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
38
- logger.info("GeminiFlashModel initialized successfully with endpoint: %s", self.endpoint)
39
48
 
40
- @retry(
41
- stop=stop_after_attempt(3),
42
- wait=wait_exponential(multiplier=1, min=4, max=10),
43
- retry=retry_if_exception_type((requests.exceptions.RequestException,))
44
- )
49
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
50
+ retry=retry_if_exception_type((requests.exceptions.RequestException,)))
45
51
  def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
46
- logger.info("Starting evaluation of %d question-answer pairs", len(question_answer_pairs))
52
+ logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
47
53
  combined_content = "\n\n".join(
48
54
  f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
49
55
  for i, qa in enumerate(question_answer_pairs, 1)
50
56
  )
51
- logger.debug("Prepared combined content for evaluation: %s", combined_content[:100] + "..." if len(combined_content) > 100 else combined_content)
52
57
 
53
58
  headers = {"Content-Type": "application/json"}
54
59
  data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
55
- logger.info("Sending API request to %s", self.endpoint)
56
60
  response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
57
61
 
58
62
  if response.status_code != 200:
@@ -63,89 +67,61 @@ class GeminiFlashModel:
63
67
  logger.error("API response missing candidates: %s", response_data)
64
68
  raise ValueError("No candidates in API response")
65
69
  generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
66
- logger.info("Received API response, parsing generated text")
67
70
  return self._parse_response(generated_text)
68
71
 
69
- @retry(
70
- stop=stop_after_attempt(3),
71
- wait=wait_exponential(multiplier=1, min=4, max=10),
72
- retry=retry_if_exception_type((requests.exceptions.RequestException,))
73
- )
72
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
73
+ retry=retry_if_exception_type((requests.exceptions.RequestException,)))
74
74
  def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
75
- logger.info("Starting visual evaluation for question: %s", question)
76
75
  folder_path = Path(image_folder)
77
76
  images = list(folder_path.glob("*.png"))[:3]
78
77
  if not images:
79
- logger.error("No PNG images found in folder: %s", image_folder)
80
78
  raise ProcessingError(f"No PNG images found in {image_folder}")
81
- logger.info("Found %d PNG images for evaluation: %s", len(images), [img.name for img in images])
82
-
83
79
  prompt = (
84
- "Evaluate the Power BI report visuals based on the given task.\n\n"
80
+ "Evaluate the Power BI report visuals based on the provided task. The visuals are professional dashboards designed for enterprise use.\n\n"
85
81
  f"Task: {question}\n\n"
86
82
  f"Screenshots: {[str(img.name) for img in images]}\n\n"
87
- "Focus on:\n"
88
- "- Clarity: Are visuals clear and easy to understand?\n"
89
- "- Appropriateness: Are visual types suitable for the data and task?\n"
90
- "- Layout and Design: Is the layout organized with logical flow?\n"
91
- "- Readability: Are labels, titles, and legends clear and not overcrowded?\n"
92
- "- Color Usage: Are colors effective, consistent, and accessible?\n"
93
- "- Interactivity: (If visible) Do slicers or filters enhance usability?\n\n"
94
- "Do not consider DAX, data sources, or advanced efficiency.\n"
95
- "Provide feedback in a supportive manner for beginners.\n\n"
96
- "Structure as: Score: [SCORE], Feedback: [FEEDBACK]"
83
+ "Evaluate based on the following criteria, assigning a score out of 100:z\n"
84
+ "- Clarity (30%): Are visuals clear, with readable labels, titles, and legends?\n"
85
+ "- Appropriateness (30%): Are chart types (e.g., bar, line, pie) suitable for the data and task?\n"
86
+ "- Color Usage (20%): Are colors consistent, accessible, and visually appealing? Consider contrast and colorblind accessibility.\n"
87
+ "- Interactivity (20%): Do visible slicers, filters, or tooltips enhance usability and data exploration?\n\n"
88
+ "Provide a score (0-100) that reflects the overall quality, considering the enterprise context. Avoid overly harsh penalties for minor issues.\n"
89
+ "Provide concise, supportive feedback for beginners, highlighting strengths and areas for improvement.\n\n"
90
+ "Structure the response as:\n"
91
+ "Score: [SCORE]/100\n"
92
+ "Feedback: [FEEDBACK]"
97
93
  )
98
94
  parts = [{"text": prompt}]
99
95
  for img in images:
100
- logger.debug("Processing image: %s", img.name)
101
- try:
102
- with Image.open(img) as pil_img:
103
- if pil_img.size[0] == 0 or pil_img.size[1] == 0:
104
- logger.error("Invalid image dimensions for %s", img.name)
105
- raise ProcessingError(f"Invalid image dimensions for {img.name}")
106
- pil_img.thumbnail((1024, 1024))
107
- img_buffer = io.BytesIO()
108
- pil_img.save(img_buffer, format="PNG")
109
- parts.append({
110
- "inline_data": {
111
- "mime_type": "image/png",
112
- "data": base64.b64encode(img_buffer.getvalue()).decode('utf-8')
113
- }
114
- })
115
- except Exception as e:
116
- logger.error("Failed to process image %s: %s", img.name, str(e))
117
- raise ProcessingError(f"Failed to process image {img.name}: {str(e)}")
96
+ with Image.open(img) as pil_img:
97
+ pil_img.thumbnail((1024, 1024))
98
+ img_buffer = io.BytesIO()
99
+ pil_img.save(img_buffer, format="PNG")
100
+ parts.append({
101
+ "inline_data": {
102
+ "mime_type": "image/png",
103
+ "data": base64.b64encode(img_buffer.getvalue()).decode('utf-8')
104
+ }
105
+ })
118
106
  headers = {"Content-Type": "application/json"}
119
107
  data = {"contents": [{"parts": parts}]}
120
- logger.info("Sending visual evaluation API request to %s", self.endpoint)
121
108
  response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
122
109
  if response.status_code != 200:
123
- logger.error("Visual API request failed: Status %d, Response: %s", response.status_code, response.text)
110
+ logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
124
111
  raise Exception(f"API call failed: {response.status_code} - {response.text}")
125
112
  response_data = response.json()
126
113
  if not response_data.get("candidates"):
127
- logger.error("Visual API response missing candidates: %s", response_data)
114
+ logger.error("API response missing candidates: %s", response_data)
128
115
  raise ValueError("No candidates in API response")
129
116
  output_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
130
- logger.info("Received visual API response, parsing output")
131
117
  score_match = re.search(r"Score:\s*(\d+)(?:/100)?", output_text)
132
118
  feedback_match = re.search(r"Feedback:\s*(.*)", output_text, re.DOTALL)
133
- result = {
119
+ return {
134
120
  "score": int(score_match.group(1)) if score_match else 0,
135
- "feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated",
136
- "issues": []
121
+ "feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
137
122
  }
138
- if not score_match:
139
- result["issues"].append("Failed to parse score from visual API response")
140
- logger.warning("Failed to parse score from visual API response")
141
- if not feedback_match:
142
- result["issues"].append("Failed to parse feedback from visual API response")
143
- logger.warning("Failed to parse feedback from visual API response")
144
- logger.info("Visual evaluation completed: Score=%d, Feedback=%s", result["score"], result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
145
- return result
146
123
 
147
124
  def _parse_response(self, text: str) -> Dict[str, any]:
148
- logger.info("Parsing API response text")
149
125
  result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
150
126
  try:
151
127
  lines = text.split("\n")
@@ -157,150 +133,97 @@ class GeminiFlashModel:
157
133
  try:
158
134
  result["score"] = int(line.split(":")[1].split("/")[0].strip())
159
135
  score_found = True
160
- logger.info("Parsed score: %d", result["score"])
161
136
  except ValueError:
162
137
  result["issues"].append("Failed to parse score from API response")
163
- logger.error("Failed to parse score from response: %s", line)
164
138
  continue
165
139
  elif score_found:
166
140
  feedback_lines.append(line)
167
141
  if feedback_lines:
168
142
  result["feedback"] = "\n".join(feedback_lines).strip()
169
- logger.debug("Parsed feedback: %s", result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
170
143
  return result
171
144
  except Exception as e:
172
145
  result["issues"].append(str(e))
173
- logger.error("Error parsing response: %s", str(e))
174
146
  return result
175
147
 
148
+
176
149
  class PowerBIProcessor:
177
150
  def extract_datamodel(self, pbit_file_path: str) -> Dict:
178
- logger.info("Extracting data model from PBIT file: %s", pbit_file_path)
179
151
  if not os.path.exists(pbit_file_path):
180
- logger.error("PBIT file does not exist: %s", pbit_file_path)
181
152
  raise ProcessingError(f"PBIT file not found: {pbit_file_path}")
182
153
  folder_path = os.path.dirname(pbit_file_path)
183
154
  file_name = os.path.splitext(os.path.basename(pbit_file_path))[0]
184
155
  zip_file = os.path.join(folder_path, f"{file_name}.zip")
185
156
  export_path = os.path.join(folder_path, "export")
186
- logger.debug("Cleaning up temporary files: %s, %s", zip_file, export_path)
187
157
  self._cleanup(zip_file, export_path)
188
158
  try:
189
- logger.info("Renaming PBIT to ZIP: %s -> %s", pbit_file_path, zip_file)
190
159
  os.rename(pbit_file_path, zip_file)
191
160
  if not zipfile.is_zipfile(zip_file):
192
- logger.error("File is not a valid ZIP: %s", zip_file)
193
161
  raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
194
- logger.info("Extracting ZIP contents to: %s", export_path)
195
162
  with zipfile.ZipFile(zip_file, "r") as zip_ref:
196
163
  zip_ref.extractall(export_path)
197
164
  schema_path = os.path.join(export_path, "DataModelSchema")
198
165
  txt_path = os.path.join(export_path, "DataModelSchema.txt")
199
- logger.debug("Renaming schema file: %s -> %s", schema_path, txt_path)
200
166
  os.rename(schema_path, txt_path)
201
- logger.info("Reading DataModelSchema file: %s", txt_path)
202
167
  with open(txt_path, "r", encoding="utf-16-le") as file:
203
- data = json.load(file)
204
- logger.info("Successfully extracted data model from PBIT file")
205
- return data
168
+ return json.load(file)
206
169
  except UnicodeDecodeError as e:
207
170
  logger.error("Failed to decode DataModelSchema: %s", str(e))
208
171
  raise ProcessingError(f"Invalid encoding in DataModelSchema: {e}")
209
172
  except Exception as e:
210
- logger.error("Failed to extract DataModelSchema: %s", str(e))
211
173
  raise ProcessingError(f"Failed to extract DataModelSchema: {e}")
212
174
  finally:
213
- logger.debug("Cleaning up temporary files after extraction")
214
175
  self._cleanup(zip_file, export_path)
215
176
 
216
177
  def extract_model_data(self, data: Dict) -> Dict:
217
- logger.info("Extracting model data from data model")
218
178
  try:
219
179
  tables = data.get("model", {}).get("tables", [])
220
180
  relationships = data.get("model", {}).get("relationships", [])
221
- result = {
181
+ return {
222
182
  "Calculated Measures": self._get_measures(tables),
223
183
  "Tables": self._get_tables_and_columns(tables),
224
184
  "Relationships": self._get_relationships(relationships)
225
185
  }
226
- logger.info("Extracted model data: %d measures, %d tables, %d relationships",
227
- len(result["Calculated Measures"]), len(result["Tables"]), len(result["Relationships"]))
228
- return result
229
186
  except Exception as e:
230
- logger.error("Failed to extract model data: %s", str(e))
231
187
  raise ProcessingError(f"Failed to extract model data: {e}")
232
188
 
233
189
  def process_pdf(self, pdf_path: str, output_dir: str = "outputimages", num_pages: int = 3) -> List[str]:
234
- logger.info("Processing PDF file: %s", pdf_path)
235
190
  try:
236
191
  if not os.path.exists(pdf_path):
237
- logger.error("PDF file does not exist: %s", pdf_path)
238
192
  raise ProcessingError(f"PDF file not found: {pdf_path}")
239
- # Validate PDF
240
- try:
241
- with open(pdf_path, "rb") as f:
242
- pdf_reader = PyPDF2.PdfReader(f)
243
- if len(pdf_reader.pages) == 0:
244
- logger.error("PDF is empty: %s", pdf_path)
245
- raise ProcessingError(f"PDF is empty: {pdf_path}")
246
- logger.info("PDF validated, contains %d pages", len(pdf_reader.pages))
247
- except Exception as e:
248
- logger.error("Invalid PDF file: %s", str(e))
249
- raise ProcessingError(f"Invalid PDF file: {str(e)}")
250
- logger.debug("Creating output directory: %s", output_dir)
251
193
  os.makedirs(output_dir, exist_ok=True)
252
- logger.info("Converting PDF pages to images (max %d pages)", num_pages)
253
- pages = convert_from_path(pdf_path, first_page=1, last_page=min(num_pages, len(pdf_reader.pages)))
254
- if not pages:
255
- logger.error("No pages converted from PDF: %s", pdf_path)
256
- raise ProcessingError(f"No pages converted from PDF: {pdf_path}")
194
+ pages = convert_from_path(pdf_path, first_page=1, last_page=num_pages)
257
195
  image_paths = []
258
196
  for i, page in enumerate(pages):
259
197
  image_path = os.path.join(output_dir, f"page_{i + 1}.png")
260
- logger.debug("Saving page %d as PNG: %s", i + 1, image_path)
261
198
  page.save(image_path, "PNG")
262
199
  image_paths.append(image_path)
263
- logger.info("Successfully processed %d pages from PDF", len(image_paths))
200
+ os.remove(pdf_path)
264
201
  return image_paths
265
202
  except Exception as e:
266
- logger.error("Failed to process PDF: %s", str(e))
267
- raise ProcessingError(f"Failed to process PDF: {str(e)}")
268
- finally:
269
- logger.debug("Not removing PDF file to allow debugging: %s", pdf_path)
203
+ raise ProcessingError(f"Failed to process PDF: {e}")
270
204
 
271
205
  def extract_zip(self, zip_path: str, extract_path: str) -> tuple[str, str | None]:
272
- logger.info("Extracting ZIP file: %s", zip_path)
273
206
  try:
274
207
  if not os.path.exists(zip_path):
275
- logger.error("ZIP file does not exist: %s", zip_path)
276
208
  raise ProcessingError(f"ZIP file not found: {zip_path}")
277
209
  if not zipfile.is_zipfile(zip_path):
278
- logger.error("File is not a valid ZIP: %s", zip_path)
279
210
  raise ProcessingError(f"File is not a valid ZIP: {zip_path}")
280
- logger.debug("Creating extraction directory: %s", extract_path)
281
211
  os.makedirs(extract_path, exist_ok=True)
282
212
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
283
- logger.info("Extracting ZIP contents to: %s", extract_path)
284
213
  zip_ref.extractall(extract_path)
285
214
  pbit_files = list(Path(extract_path).glob("*.pbit"))
286
215
  pdf_files = list(Path(extract_path).glob("*.pdf"))
287
- logger.info("Found %d PBIT files and %d PDF files in ZIP", len(pbit_files), len(pdf_files))
288
216
  if not pbit_files:
289
- logger.error("No PBIT files found in ZIP")
290
217
  raise ProcessingError("ZIP file must contain at least one .pbit file")
291
218
  if len(pbit_files) > 1:
292
- logger.error("Multiple PBIT files found in ZIP: %s", [str(p) for p in pbit_files])
293
219
  raise ProcessingError("ZIP file contains multiple .pbit files")
294
220
  pdf_path = str(pdf_files[0]) if pdf_files else None
295
- logger.info("Extracted PBIT file: %s, PDF file: %s", str(pbit_files[0]), pdf_path)
296
221
  return str(pbit_files[0]), pdf_path
297
222
  except Exception as e:
298
- logger.error("Failed to extract ZIP file: %s", str(e))
299
223
  raise ProcessingError(f"Failed to extract ZIP file: {e}")
300
224
 
301
225
  @staticmethod
302
226
  def _get_measures(tables: List[Dict]) -> List[Dict]:
303
- logger.debug("Extracting measures from tables")
304
227
  measures = []
305
228
  for table in tables:
306
229
  if "measures" in table:
@@ -308,53 +231,48 @@ class PowerBIProcessor:
308
231
  measures.append({
309
232
  "Table": table["name"],
310
233
  "Name": measure["name"],
311
- "Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"), list) else measure.get("expression", ""),
234
+ "Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
235
+ list) else measure.get(
236
+ "expression", ""),
312
237
  "FormatString": measure.get("formatString", "")
313
238
  })
314
- logger.debug("Extracted %d measures", len(measures))
315
239
  return measures
316
240
 
317
241
  @staticmethod
318
242
  def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
319
- logger.debug("Extracting tables and columns")
320
243
  table_info = []
321
244
  for table in tables:
322
- columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"), "Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"} for col in table.get("columns", [])]
323
- expressions = [part["source"]["expression"] for part in table.get("partitions", []) if part["source"].get("expression")]
245
+ columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"),
246
+ "Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"}
247
+ for col in table.get("columns", [])]
248
+ expressions = [part["source"]["expression"] for part in table.get("partitions", []) if
249
+ part["source"].get("expression")]
324
250
  table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
325
- logger.debug("Extracted %d tables", len(table_info))
326
251
  return table_info
327
252
 
328
253
  @staticmethod
329
254
  def _get_relationships(relationships: List[Dict]) -> List[Dict]:
330
- logger.debug("Extracting relationships")
331
- result = [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"], "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in relationships]
332
- logger.debug("Extracted %d relationships", len(result))
333
- return result
255
+ return [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"],
256
+ "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in
257
+ relationships]
334
258
 
335
259
  @staticmethod
336
260
  def _cleanup(*paths: str):
337
- logger.debug("Cleaning up paths: %s", paths)
338
261
  for path in paths:
339
262
  if os.path.exists(path):
340
263
  if os.path.isfile(path):
341
- logger.debug("Removing file: %s", path)
342
264
  os.remove(path)
343
265
  else:
344
- logger.debug("Removing directory: %s", path)
345
266
  shutil.rmtree(path, ignore_errors=True)
346
- logger.debug("Cleanup completed")
267
+
347
268
 
348
269
  class PowerBIEvaluator:
349
270
  def __init__(self, api_key: str):
350
- logger.info("Initializing PowerBIEvaluator")
351
271
  self.api_key = api_key
352
272
  self.model = GeminiFlashModel(api_key)
353
273
  self.processor = PowerBIProcessor()
354
- logger.info("PowerBIEvaluator initialized successfully")
355
274
 
356
275
  def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
357
- logger.info("Starting evaluation for file: %s with %d questions", answer_path, len(questions))
358
276
  try:
359
277
  _, ext = os.path.splitext(answer_path)
360
278
  ext = ext.lower()
@@ -363,72 +281,72 @@ class PowerBIEvaluator:
363
281
  pdf_path = None
364
282
 
365
283
  # Handle input file type
366
- logger.debug("Checking file extension: %s", ext)
367
284
  if ext == ".zip":
368
- logger.info("Processing ZIP file")
369
285
  pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
370
286
  elif ext == ".pbit":
371
- logger.info("Processing PBIT file directly")
372
287
  pbit_path = answer_path
373
288
  pdf_path = None
374
289
  else:
375
- logger.error("Invalid file type: %s", ext)
290
+ logger.error("Invalid file type for Power BI: %s", answer_path)
376
291
  return {
377
292
  "score": 0,
378
293
  "feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
379
294
  "issues": ["Invalid file type"],
380
- "recommendations": []
295
+ "recommendations": [],
296
+ "dax_score": 0,
297
+ "visual_score": 0
381
298
  }
382
299
 
383
300
  try:
384
301
  # Extract and process the data model from .pbit
385
- logger.info("Extracting data model from PBIT")
386
302
  data_model = self.processor.extract_datamodel(pbit_path)
387
- logger.info("Extracting model data")
388
303
  model_data = self.processor.extract_model_data(data_model)
389
304
  answers = [json.dumps(model_data)] * len(questions)
390
- logger.info("Evaluating DAX with %d question-answer pairs", len(questions))
391
305
  dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
392
306
 
393
307
  # Initialize result with DAX evaluation
394
308
  result = {
395
- "score": dax_result["score"],
309
+ "score": 0,
396
310
  "feedback": f"DAX Feedback:\n{dax_result['feedback']}",
397
311
  "issues": dax_result["issues"],
398
- "recommendations": dax_result["recommendations"]
312
+ "recommendations": dax_result["recommendations"],
313
+ "dax_score": dax_result["score"], # Store DAX score
314
+ "visual_score": 0 # Default visual score
399
315
  }
400
- logger.info("DAX evaluation completed: Score=%d", dax_result["score"])
401
316
 
402
317
  # Process PDF and evaluate visuals if present
403
318
  if pdf_path:
404
- logger.info("Processing PDF for visual evaluation: %s", pdf_path)
405
319
  try:
406
- image_paths = self.processor.process_pdf(pdf_path)
407
- if not image_paths:
408
- logger.error("No images generated from PDF: %s", pdf_path)
409
- raise ProcessingError("No images generated from PDF")
410
- logger.info("Evaluating visuals with question: %s", questions[0])
320
+ self.processor.process_pdf(pdf_path)
411
321
  visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
412
- result["score"] = (dax_result["score"] + visual_result["score"]) // 2
322
+ # Apply 70% DAX, 30% visuals scoring
323
+ result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
324
+ result["visual_score"] = visual_result["score"] # Store visual score
413
325
  result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
414
326
  result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
415
327
  result["recommendations"].extend(visual_result.get("recommendations", []))
416
- logger.info("Visual evaluation completed: Score=%d", visual_result["score"])
417
328
  except ProcessingError as e:
418
329
  logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
330
+ # Use DAX score only, weighted at 100% if no visuals
331
+ result["score"] = dax_result["score"]
419
332
  result["issues"].append(f"Visual evaluation skipped: {str(e)}")
420
- result["recommendations"].append("Ensure a valid PDF with Power BI visuals is provided")
421
- except Exception as e:
422
- logger.error("Unexpected error during visual evaluation: %s", str(e))
423
- result["issues"].append(f"Visual evaluation failed: {str(e)}")
424
- result["recommendations"].append("Check PDF file and API connectivity")
333
+ result["recommendations"].append(
334
+ "Ensure a valid PDF is provided for visual evaluation if intended")
425
335
  else:
426
- logger.info("No PDF provided, skipping visual evaluation")
336
+ # No PDF provided, use DAX score only
337
+ result["score"] = dax_result["score"]
338
+ result["feedback"] += "\n\nVisual Feedback:\nNo visuals provided for evaluation."
339
+ result["issues"].append("No PDF provided for visual evaluation")
340
+ result["recommendations"].append("Include a PDF with report visuals for complete evaluation")
341
+
342
+ # Print scores with text labels to terminal
343
+ logger.info("[DAX] Score: %d/100", result["dax_score"])
344
+ logger.info("[Visual] Score: %d/100", result["visual_score"])
345
+ logger.info("[Final] Score (70%% DAX, 30%% Visuals): %d/100", result["score"])
427
346
 
428
- logger.info("Evaluation completed successfully")
429
347
  return result
430
348
  finally:
431
- logger.debug("Cleaning up temporary files and directories")
349
+ # Cleanup temporary files and directories
432
350
  self.processor._cleanup(extract_path, "outputimages")
433
351
  except Exception as e:
434
352
  logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
@@ -437,8 +355,11 @@ class PowerBIEvaluator:
437
355
  "score": 0,
438
356
  "feedback": f"Error processing file: {str(e)}",
439
357
  "issues": [str(e)],
440
- "recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"]
358
+ "recommendations": ["Check file formats and API connectivity", "Review logs for detailed errors"],
359
+ "dax_score": 0,
360
+ "visual_score": 0
441
361
  }
442
362
 
363
+
443
364
  class ProcessingError(Exception):
444
365
  pass
@@ -32,7 +32,7 @@ def prompt_text_python(combined_content):
32
32
  "- Be honest but supportive\n"
33
33
  "- Include specific examples from the provided answers if helpful\n"
34
34
  "- Keep language beginner-friendly\n"
35
- "- Do not give too low marks. You may add from 5 up to 10 additional marks for effort or "
35
+ "- Do not give too low marks. You may add from 20 up to 25 additional marks for effort or "
36
36
  "partial relevance, ensuring the score does not exceed 100."
37
37
  )
38
38
 
@@ -142,49 +142,23 @@ def prompt_text_ssis(combined_content):
142
142
 
143
143
  def prompt_text_powerbi(combined_content: str):
144
144
  return (
145
- "You are a BI professional evaluating Power BI report solutions, including DAX formulas, "
146
- "data models, and visual design based on the given task.\n\n"
147
- "Your evaluation should:\n"
148
- "- Focus on clarity, correctness, and understanding of Power BI content (DAX, data models, visuals)\n"
149
- "- Be constructive and encouraging (students are beginners)\n"
150
- "- Highlight strengths and areas for improvement\n"
151
- "- Identify major mistakes (e.g., incorrect DAX, poor data modeling, unclear visuals)\n"
152
- "- Be concise but insightful\n"
153
- "- Evaluate proper configuration of data model relationships, correctness and logic of DAX formulas, and "
154
- "clarity of visuals (e.g., appropriate chart types, layout, readability, proper filtering)\n"
155
- "- Also assess whether the student’s submission demonstrates a proper understanding of "
156
- "Power BI concepts being tested (e.g., data modeling, DAX calculations, visualization principles), not just technical correctness\n"
157
- "- If the student's submission is incomplete or too simplistic to fully address the question, "
158
- "clearly state that it lacks sufficient detail or misses key components, but do not provide "
159
- "the missing parts or solutions. Instead, suggest they revisit the relevant "
160
- "Power BI concepts (e.g., data modeling, DAX, or visualization) and encourage deeper exploration\n"
161
- "- If the student's submission is off-topic or unrelated to the question, "
162
- "clearly state that the response does not address the question's requirements and "
163
- "explain why it is irrelevant. Encourage the student to review the question carefully and "
164
- "focus on the relevant Power BI concepts without providing the correct solution\n"
165
- "- Do not penalize for advanced efficiency, data source paths, or separate measure tables\n"
166
- "- Do not lower marks for redundant date tables or missing advanced design features\n\n"
167
- "Provide feedback in this format:\n\n"
145
+ "You are a BI professional evaluating a beginner student's Power BI submission, including DAX, data models, and visuals.\n\n"
146
+ "Please provide short, clear, and supportive feedback with the following structure:\n\n"
168
147
  "=== COMPREHENSIVE EVALUATION ===\n\n"
169
148
  "OVERALL SCORE: <score>/100\n\n"
170
149
  "FEEDBACK SUMMARY:\n"
171
150
  "- What was done well\n"
172
151
  "- What needs improvement\n"
173
152
  "- Any major issues (e.g., incorrect DAX, missing visuals, poor relationships)\n\n"
174
- "KEY ADVICE:\n"
175
- "- Top 2-3 suggestions to improve Power BI skills\n"
176
- "- Highlight any concepts to revisit\n"
177
- "- Encourage further learning and effort\n\n"
178
-
153
+ "Evaluation guidelines:\n"
154
+ "- Focus on clarity, correctness, and understanding of Power BI concepts (DAX, modeling, visuals)\n"
155
+ "- Be concise, constructive, and beginner-friendly\n"
156
+ "- Highlight strengths and areas to improve\n"
157
+ "- Mention if the submission is incomplete or off-topic, but don't provide missing solutions\n"
158
+ "- Do not penalize for efficiency, missing advanced features, or redundant tables\n"
159
+ "- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low, with a small boost for effort if applicable\n\n"
179
160
  f"{combined_content}\n"
180
- "=== EVALUATION COMPLETE ===\n\n"
181
- "Notes:\n"
182
- "- Be honest but supportive\n"
183
- "- Include specific examples from the provided answers if helpful\n"
184
- "- Keep language beginner-friendly\n"
185
- "- Score submissions based on alignment with the question, effort, and technical correctness. "
186
- "Off-topic or incomplete submissions should generally score low (e.g., 10-30/100), "
187
- "but add from 5 up to 10 marks for effort or partial relevance, ensuring the score does not exceed 100."
161
+ "=== EVALUATION COMPLETE ==="
188
162
  )
189
163
 
190
164
 
@@ -226,5 +200,6 @@ def prompt_text_powerbi(combined_content: str):
226
200
 
227
201
 
228
202
 
203
+
229
204
 
230
205
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: QuantumChecker
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
5
5
  Author: Qobiljon
6
6
  Author-email: qobiljonkhayrullayev@gmail.com
@@ -14,6 +14,7 @@ Requires-Dist: tenacity>=8.2.3
14
14
  Requires-Dist: pdf2image>=1.16.3
15
15
  Requires-Dist: python-dotenv>=1.0.0
16
16
  Requires-Dist: Pillow>=10.0.0
17
+ Requires-Dist: PyPDF2>=3.0.1
17
18
  Dynamic: author
18
19
  Dynamic: author-email
19
20
  Dynamic: classifier
@@ -3,3 +3,4 @@ tenacity>=8.2.3
3
3
  pdf2image>=1.16.3
4
4
  python-dotenv>=1.0.0
5
5
  Pillow>=10.0.0
6
+ PyPDF2>=3.0.1
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="QuantumChecker",
5
- version="0.2.6",
5
+ version="0.2.7",
6
6
  author="Qobiljon",
7
7
  author_email="qobiljonkhayrullayev@gmail.com",
8
8
  description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
@@ -17,6 +17,7 @@ setup(
17
17
  "pdf2image>=1.16.3",
18
18
  "python-dotenv>=1.0.0",
19
19
  "Pillow>=10.0.0",
20
+ "PyPDF2>=3.0.1",
20
21
  ],
21
22
  classifiers=[
22
23
  "Programming Language :: Python :: 3",
@@ -24,4 +25,4 @@ setup(
24
25
  "Operating System :: OS Independent",
25
26
  ],
26
27
  include_package_data=True,
27
- )
28
+ )
@@ -1,10 +1,12 @@
1
+ from pprint import pprint
2
+
1
3
  from QuantumCheck import HomeworkEvaluator
2
4
 
3
5
  if __name__ == "__main__":
4
6
  evaluator = HomeworkEvaluator()
5
7
 
6
8
  primary_api_key = "AIzaSyD0ptgEixhLLjCWjkyxhqDsUzO16ytQq2c"
7
- question = "How to write print in python"
9
+ question = "Create a dashboard"
8
10
 
9
11
  backup_keys = [
10
12
  "BACKUP_KEY_1",
@@ -15,13 +17,13 @@ if __name__ == "__main__":
15
17
  ]
16
18
 
17
19
  result = evaluator.evaluate_from_content(
18
- question_content="How to write print in python",
19
- answer_path="../tests/answer/answer.py",
20
+ question_content="Fuck You",
21
+ answer_path="../tests/answer/real.zip",
20
22
  api_key=primary_api_key,
21
23
  backup_api_keys=backup_keys
22
24
  )
23
25
 
24
- print(result)
26
+ pprint(result)
25
27
 
26
28
 
27
29
 
File without changes
File without changes