QuantumChecker 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: QuantumChecker
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
5
5
  Author: Qobiljon
6
6
  Author-email: qobiljonkhayrullayev@gmail.com
@@ -24,15 +24,17 @@ logging.basicConfig(
24
24
  handlers=[logging.FileHandler("../powerbi_evaluator.log"), logging.StreamHandler()]
25
25
  )
26
26
 
27
- # GeminiFlashModel class remains unchanged
28
27
  class GeminiFlashModel:
29
28
  def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
29
+ logger.info("Initializing GeminiFlashModel with model: %s", model_name)
30
30
  api_key = os.getenv("GEMINI_API_KEY") or api_key
31
31
  if not api_key:
32
+ logger.error("API key not found in environment variables or provided argument")
32
33
  raise ValueError("API key not found in .env file or environment variables.")
33
34
  self.api_key = api_key
34
35
  self.model_name = model_name
35
36
  self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
37
+ logger.info("GeminiFlashModel initialized successfully with endpoint: %s", self.endpoint)
36
38
 
37
39
  @retry(
38
40
  stop=stop_after_attempt(3),
@@ -40,14 +42,16 @@ class GeminiFlashModel:
40
42
  retry=retry_if_exception_type((requests.exceptions.RequestException,))
41
43
  )
42
44
  def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
43
- logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
45
+ logger.info("Starting evaluation of %d question-answer pairs", len(question_answer_pairs))
44
46
  combined_content = "\n\n".join(
45
47
  f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
46
48
  for i, qa in enumerate(question_answer_pairs, 1)
47
49
  )
50
+ logger.debug("Prepared combined content for evaluation: %s", combined_content[:100] + "..." if len(combined_content) > 100 else combined_content)
48
51
 
49
52
  headers = {"Content-Type": "application/json"}
50
53
  data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
54
+ logger.info("Sending API request to %s", self.endpoint)
51
55
  response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
52
56
 
53
57
  if response.status_code != 200:
@@ -58,6 +62,7 @@ class GeminiFlashModel:
58
62
  logger.error("API response missing candidates: %s", response_data)
59
63
  raise ValueError("No candidates in API response")
60
64
  generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
65
+ logger.info("Received API response, parsing generated text")
61
66
  return self._parse_response(generated_text)
62
67
 
63
68
  @retry(
@@ -66,10 +71,14 @@ class GeminiFlashModel:
66
71
  retry=retry_if_exception_type((requests.exceptions.RequestException,))
67
72
  )
68
73
  def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
74
+ logger.info("Starting visual evaluation for question: %s", question)
69
75
  folder_path = Path(image_folder)
70
76
  images = list(folder_path.glob("*.png"))[:3]
71
77
  if not images:
78
+ logger.error("No PNG images found in folder: %s", image_folder)
72
79
  raise ProcessingError(f"No PNG images found in {image_folder}")
80
+ logger.info("Found %d PNG images for evaluation: %s", len(images), [img.name for img in images])
81
+
73
82
  prompt = (
74
83
  "Evaluate the Power BI report visuals based on the given task.\n\n"
75
84
  f"Task: {question}\n\n"
@@ -87,6 +96,7 @@ class GeminiFlashModel:
87
96
  )
88
97
  parts = [{"text": prompt}]
89
98
  for img in images:
99
+ logger.debug("Processing image: %s", img.name)
90
100
  with Image.open(img) as pil_img:
91
101
  pil_img.thumbnail((1024, 1024))
92
102
  img_buffer = io.BytesIO()
@@ -99,23 +109,28 @@ class GeminiFlashModel:
99
109
  })
100
110
  headers = {"Content-Type": "application/json"}
101
111
  data = {"contents": [{"parts": parts}]}
112
+ logger.info("Sending visual evaluation API request to %s", self.endpoint)
102
113
  response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
103
114
  if response.status_code != 200:
104
- logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
115
+ logger.error("Visual API request failed: Status %d, Response: %s", response.status_code, response.text)
105
116
  raise Exception(f"API call failed: {response.status_code} - {response.text}")
106
117
  response_data = response.json()
107
118
  if not response_data.get("candidates"):
108
- logger.error("API response missing candidates: %s", response_data)
119
+ logger.error("Visual API response missing candidates: %s", response_data)
109
120
  raise ValueError("No candidates in API response")
110
121
  output_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
122
+ logger.info("Received visual API response, parsing output")
111
123
  score_match = re.search(r"Score:\s*(\d+)(?:/100)?", output_text)
112
124
  feedback_match = re.search(r"Feedback:\s*(.*)", output_text, re.DOTALL)
113
- return {
125
+ result = {
114
126
  "score": int(score_match.group(1)) if score_match else 0,
115
127
  "feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
116
128
  }
129
+ logger.info("Visual evaluation completed: Score=%d, Feedback=%s", result["score"], result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
130
+ return result
117
131
 
118
132
  def _parse_response(self, text: str) -> Dict[str, any]:
133
+ logger.info("Parsing API response text")
119
134
  result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
120
135
  try:
121
136
  lines = text.split("\n")
@@ -127,96 +142,136 @@ class GeminiFlashModel:
127
142
  try:
128
143
  result["score"] = int(line.split(":")[1].split("/")[0].strip())
129
144
  score_found = True
145
+ logger.info("Parsed score: %d", result["score"])
130
146
  except ValueError:
131
147
  result["issues"].append("Failed to parse score from API response")
148
+ logger.error("Failed to parse score from response: %s", line)
132
149
  continue
133
150
  elif score_found:
134
151
  feedback_lines.append(line)
135
152
  if feedback_lines:
136
153
  result["feedback"] = "\n".join(feedback_lines).strip()
154
+ logger.debug("Parsed feedback: %s", result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
137
155
  return result
138
156
  except Exception as e:
139
157
  result["issues"].append(str(e))
158
+ logger.error("Error parsing response: %s", str(e))
140
159
  return result
141
160
 
142
161
  class PowerBIProcessor:
143
162
  def extract_datamodel(self, pbit_file_path: str) -> Dict:
163
+ logger.info("Extracting data model from PBIT file: %s", pbit_file_path)
144
164
  if not os.path.exists(pbit_file_path):
165
+ logger.error("PBIT file does not exist: %s", pbit_file_path)
145
166
  raise ProcessingError(f"PBIT file not found: {pbit_file_path}")
146
167
  folder_path = os.path.dirname(pbit_file_path)
147
168
  file_name = os.path.splitext(os.path.basename(pbit_file_path))[0]
148
169
  zip_file = os.path.join(folder_path, f"{file_name}.zip")
149
170
  export_path = os.path.join(folder_path, "export")
171
+ logger.debug("Cleaning up temporary files: %s, %s", zip_file, export_path)
150
172
  self._cleanup(zip_file, export_path)
151
173
  try:
174
+ logger.info("Renaming PBIT to ZIP: %s -> %s", pbit_file_path, zip_file)
152
175
  os.rename(pbit_file_path, zip_file)
153
176
  if not zipfile.is_zipfile(zip_file):
177
+ logger.error("File is not a valid ZIP: %s", zip_file)
154
178
  raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
179
+ logger.info("Extracting ZIP contents to: %s", export_path)
155
180
  with zipfile.ZipFile(zip_file, "r") as zip_ref:
156
181
  zip_ref.extractall(export_path)
157
182
  schema_path = os.path.join(export_path, "DataModelSchema")
158
183
  txt_path = os.path.join(export_path, "DataModelSchema.txt")
184
+ logger.debug("Renaming schema file: %s -> %s", schema_path, txt_path)
159
185
  os.rename(schema_path, txt_path)
186
+ logger.info("Reading DataModelSchema file: %s", txt_path)
160
187
  with open(txt_path, "r", encoding="utf-16-le") as file:
161
- return json.load(file)
188
+ data = json.load(file)
189
+ logger.info("Successfully extracted data model from PBIT file")
190
+ return data
162
191
  except UnicodeDecodeError as e:
163
192
  logger.error("Failed to decode DataModelSchema: %s", str(e))
164
193
  raise ProcessingError(f"Invalid encoding in DataModelSchema: {e}")
165
194
  except Exception as e:
195
+ logger.error("Failed to extract DataModelSchema: %s", str(e))
166
196
  raise ProcessingError(f"Failed to extract DataModelSchema: {e}")
167
197
  finally:
198
+ logger.debug("Cleaning up temporary files after extraction")
168
199
  self._cleanup(zip_file, export_path)
169
200
 
170
201
  def extract_model_data(self, data: Dict) -> Dict:
202
+ logger.info("Extracting model data from data model")
171
203
  try:
172
204
  tables = data.get("model", {}).get("tables", [])
173
205
  relationships = data.get("model", {}).get("relationships", [])
174
- return {
206
+ result = {
175
207
  "Calculated Measures": self._get_measures(tables),
176
208
  "Tables": self._get_tables_and_columns(tables),
177
209
  "Relationships": self._get_relationships(relationships)
178
210
  }
211
+ logger.info("Extracted model data: %d measures, %d tables, %d relationships",
212
+ len(result["Calculated Measures"]), len(result["Tables"]), len(result["Relationships"]))
213
+ return result
179
214
  except Exception as e:
215
+ logger.error("Failed to extract model data: %s", str(e))
180
216
  raise ProcessingError(f"Failed to extract model data: {e}")
181
217
 
182
218
  def process_pdf(self, pdf_path: str, output_dir: str = "outputimages", num_pages: int = 3) -> List[str]:
219
+ logger.info("Processing PDF file: %s", pdf_path)
183
220
  try:
184
221
  if not os.path.exists(pdf_path):
222
+ logger.error("PDF file does not exist: %s", pdf_path)
185
223
  raise ProcessingError(f"PDF file not found: {pdf_path}")
224
+ logger.debug("Creating output directory: %s", output_dir)
186
225
  os.makedirs(output_dir, exist_ok=True)
226
+ logger.info("Converting PDF pages to images (max %d pages)", num_pages)
187
227
  pages = convert_from_path(pdf_path, first_page=1, last_page=num_pages)
188
228
  image_paths = []
189
229
  for i, page in enumerate(pages):
190
230
  image_path = os.path.join(output_dir, f"page_{i + 1}.png")
231
+ logger.debug("Saving page %d as PNG: %s", i + 1, image_path)
191
232
  page.save(image_path, "PNG")
192
233
  image_paths.append(image_path)
234
+ logger.info("Successfully processed %d pages from PDF", len(image_paths))
235
+ logger.debug("Removing original PDF file: %s", pdf_path)
193
236
  os.remove(pdf_path)
194
237
  return image_paths
195
238
  except Exception as e:
239
+ logger.error("Failed to process PDF: %s", str(e))
196
240
  raise ProcessingError(f"Failed to process PDF: {e}")
197
241
 
198
242
  def extract_zip(self, zip_path: str, extract_path: str) -> tuple[str, str | None]:
243
+ logger.info("Extracting ZIP file: %s", zip_path)
199
244
  try:
200
245
  if not os.path.exists(zip_path):
246
+ logger.error("ZIP file does not exist: %s", zip_path)
201
247
  raise ProcessingError(f"ZIP file not found: {zip_path}")
202
248
  if not zipfile.is_zipfile(zip_path):
249
+ logger.error("File is not a valid ZIP: %s", zip_path)
203
250
  raise ProcessingError(f"File is not a valid ZIP: {zip_path}")
251
+ logger.debug("Creating extraction directory: %s", extract_path)
204
252
  os.makedirs(extract_path, exist_ok=True)
205
253
  with zipfile.ZipFile(zip_path, "r") as zip_ref:
254
+ logger.info("Extracting ZIP contents to: %s", extract_path)
206
255
  zip_ref.extractall(extract_path)
207
256
  pbit_files = list(Path(extract_path).glob("*.pbit"))
208
257
  pdf_files = list(Path(extract_path).glob("*.pdf"))
258
+ logger.info("Found %d PBIT files and %d PDF files in ZIP", len(pbit_files), len(pdf_files))
209
259
  if not pbit_files:
260
+ logger.error("No PBIT files found in ZIP")
210
261
  raise ProcessingError("ZIP file must contain at least one .pbit file")
211
262
  if len(pbit_files) > 1:
263
+ logger.error("Multiple PBIT files found in ZIP: %s", [str(p) for p in pbit_files])
212
264
  raise ProcessingError("ZIP file contains multiple .pbit files")
213
265
  pdf_path = str(pdf_files[0]) if pdf_files else None
266
+ logger.info("Extracted PBIT file: %s, PDF file: %s", str(pbit_files[0]), pdf_path)
214
267
  return str(pbit_files[0]), pdf_path
215
268
  except Exception as e:
269
+ logger.error("Failed to extract ZIP file: %s", str(e))
216
270
  raise ProcessingError(f"Failed to extract ZIP file: {e}")
217
271
 
218
272
  @staticmethod
219
273
  def _get_measures(tables: List[Dict]) -> List[Dict]:
274
+ logger.debug("Extracting measures from tables")
220
275
  measures = []
221
276
  for table in tables:
222
277
  if "measures" in table:
@@ -227,37 +282,50 @@ class PowerBIProcessor:
227
282
  "Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"), list) else measure.get("expression", ""),
228
283
  "FormatString": measure.get("formatString", "")
229
284
  })
285
+ logger.debug("Extracted %d measures", len(measures))
230
286
  return measures
231
287
 
232
288
  @staticmethod
233
289
  def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
290
+ logger.debug("Extracting tables and columns")
234
291
  table_info = []
235
292
  for table in tables:
236
293
  columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"), "Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"} for col in table.get("columns", [])]
237
294
  expressions = [part["source"]["expression"] for part in table.get("partitions", []) if part["source"].get("expression")]
238
295
  table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
296
+ logger.debug("Extracted %d tables", len(table_info))
239
297
  return table_info
240
298
 
241
299
  @staticmethod
242
300
  def _get_relationships(relationships: List[Dict]) -> List[Dict]:
243
- return [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"], "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in relationships]
301
+ logger.debug("Extracting relationships")
302
+ result = [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"], "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in relationships]
303
+ logger.debug("Extracted %d relationships", len(result))
304
+ return result
244
305
 
245
306
  @staticmethod
246
307
  def _cleanup(*paths: str):
308
+ logger.debug("Cleaning up paths: %s", paths)
247
309
  for path in paths:
248
310
  if os.path.exists(path):
249
311
  if os.path.isfile(path):
312
+ logger.debug("Removing file: %s", path)
250
313
  os.remove(path)
251
314
  else:
315
+ logger.debug("Removing directory: %s", path)
252
316
  shutil.rmtree(path, ignore_errors=True)
317
+ logger.debug("Cleanup completed")
253
318
 
254
319
  class PowerBIEvaluator:
255
320
  def __init__(self, api_key: str):
321
+ logger.info("Initializing PowerBIEvaluator")
256
322
  self.api_key = api_key
257
323
  self.model = GeminiFlashModel(api_key)
258
324
  self.processor = PowerBIProcessor()
325
+ logger.info("PowerBIEvaluator initialized successfully")
259
326
 
260
327
  def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
328
+ logger.info("Starting evaluation for file: %s with %d questions", answer_path, len(questions))
261
329
  try:
262
330
  _, ext = os.path.splitext(answer_path)
263
331
  ext = ext.lower()
@@ -266,13 +334,16 @@ class PowerBIEvaluator:
266
334
  pdf_path = None
267
335
 
268
336
  # Handle input file type
337
+ logger.debug("Checking file extension: %s", ext)
269
338
  if ext == ".zip":
339
+ logger.info("Processing ZIP file")
270
340
  pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
271
341
  elif ext == ".pbit":
342
+ logger.info("Processing PBIT file directly")
272
343
  pbit_path = answer_path
273
344
  pdf_path = None
274
345
  else:
275
- logger.error("Invalid file type for Power BI: %s", answer_path)
346
+ logger.error("Invalid file type: %s", ext)
276
347
  return {
277
348
  "score": 0,
278
349
  "feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
@@ -282,9 +353,12 @@ class PowerBIEvaluator:
282
353
 
283
354
  try:
284
355
  # Extract and process the data model from .pbit
356
+ logger.info("Extracting data model from PBIT")
285
357
  data_model = self.processor.extract_datamodel(pbit_path)
358
+ logger.info("Extracting model data")
286
359
  model_data = self.processor.extract_model_data(data_model)
287
360
  answers = [json.dumps(model_data)] * len(questions)
361
+ logger.info("Evaluating DAX with %d question-answer pairs", len(questions))
288
362
  dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
289
363
 
290
364
  # Initialize result with DAX evaluation
@@ -294,24 +368,31 @@ class PowerBIEvaluator:
294
368
  "issues": dax_result["issues"],
295
369
  "recommendations": dax_result["recommendations"]
296
370
  }
371
+ logger.info("DAX evaluation completed: Score=%d", dax_result["score"])
297
372
 
298
373
  # Process PDF and evaluate visuals if present
299
374
  if pdf_path:
375
+ logger.info("Processing PDF for visual evaluation: %s", pdf_path)
300
376
  try:
301
- self.processor.process_pdf(pdf_path)
377
+ image_paths = self.processor.process_pdf(pdf_path)
378
+ logger.info("Evaluating visuals with question: %s", questions[0])
302
379
  visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
303
380
  result["score"] = (dax_result["score"] + visual_result["score"]) // 2
304
381
  result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
305
382
  result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
306
383
  result["recommendations"].extend(visual_result.get("recommendations", []))
384
+ logger.info("Visual evaluation completed: Score=%d", visual_result["score"])
307
385
  except ProcessingError as e:
308
386
  logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
309
387
  result["issues"].append(f"Visual evaluation skipped: {str(e)}")
310
388
  result["recommendations"].append("Ensure a valid PDF is provided for visual evaluation if intended")
389
+ else:
390
+ logger.info("No PDF provided, skipping visual evaluation")
311
391
 
392
+ logger.info("Evaluation completed successfully")
312
393
  return result
313
394
  finally:
314
- # Cleanup temporary files and directories
395
+ logger.debug("Cleaning up temporary files and directories")
315
396
  self.processor._cleanup(extract_path, "outputimages")
316
397
  except Exception as e:
317
398
  logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: QuantumChecker
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
5
5
  Author: Qobiljon
6
6
  Author-email: qobiljonkhayrullayev@gmail.com
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="QuantumChecker",
5
- version="0.2.4",
5
+ version="0.2.5",
6
6
  author="Qobiljon",
7
7
  author_email="qobiljonkhayrullayev@gmail.com",
8
8
  description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
File without changes
File without changes