QuantumChecker 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/PKG-INFO +1 -1
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/powerbi_evaluator.py +143 -26
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumChecker.egg-info/PKG-INFO +1 -1
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/setup.py +1 -1
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/main.py +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/prompts.py +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/python_evaluator.py +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/sql_evaluator.py +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumCheck/ssis_evaluator.py +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumChecker.egg-info/SOURCES.txt +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumChecker.egg-info/requires.txt +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/README.md +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/setup.cfg +0 -0
- {quantumchecker-0.2.4 → quantumchecker-0.2.6}/tests/test.py +0 -0
|
@@ -13,6 +13,7 @@ from dotenv import load_dotenv
|
|
|
13
13
|
from PIL import Image
|
|
14
14
|
import io
|
|
15
15
|
import base64
|
|
16
|
+
import PyPDF2 # Added for PDF validation
|
|
16
17
|
|
|
17
18
|
from .prompts import prompt_text_powerbi
|
|
18
19
|
|
|
@@ -24,15 +25,17 @@ logging.basicConfig(
|
|
|
24
25
|
handlers=[logging.FileHandler("../powerbi_evaluator.log"), logging.StreamHandler()]
|
|
25
26
|
)
|
|
26
27
|
|
|
27
|
-
# GeminiFlashModel class remains unchanged
|
|
28
28
|
class GeminiFlashModel:
|
|
29
29
|
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
30
|
+
logger.info("Initializing GeminiFlashModel with model: %s", model_name)
|
|
30
31
|
api_key = os.getenv("GEMINI_API_KEY") or api_key
|
|
31
32
|
if not api_key:
|
|
33
|
+
logger.error("API key not found in environment variables or provided argument")
|
|
32
34
|
raise ValueError("API key not found in .env file or environment variables.")
|
|
33
35
|
self.api_key = api_key
|
|
34
36
|
self.model_name = model_name
|
|
35
37
|
self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
|
|
38
|
+
logger.info("GeminiFlashModel initialized successfully with endpoint: %s", self.endpoint)
|
|
36
39
|
|
|
37
40
|
@retry(
|
|
38
41
|
stop=stop_after_attempt(3),
|
|
@@ -40,14 +43,16 @@ class GeminiFlashModel:
|
|
|
40
43
|
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
41
44
|
)
|
|
42
45
|
def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
|
|
43
|
-
logger.info("Starting evaluation of %d
|
|
46
|
+
logger.info("Starting evaluation of %d question-answer pairs", len(question_answer_pairs))
|
|
44
47
|
combined_content = "\n\n".join(
|
|
45
48
|
f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
|
|
46
49
|
for i, qa in enumerate(question_answer_pairs, 1)
|
|
47
50
|
)
|
|
51
|
+
logger.debug("Prepared combined content for evaluation: %s", combined_content[:100] + "..." if len(combined_content) > 100 else combined_content)
|
|
48
52
|
|
|
49
53
|
headers = {"Content-Type": "application/json"}
|
|
50
54
|
data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
|
|
55
|
+
logger.info("Sending API request to %s", self.endpoint)
|
|
51
56
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
52
57
|
|
|
53
58
|
if response.status_code != 200:
|
|
@@ -58,6 +63,7 @@ class GeminiFlashModel:
|
|
|
58
63
|
logger.error("API response missing candidates: %s", response_data)
|
|
59
64
|
raise ValueError("No candidates in API response")
|
|
60
65
|
generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
66
|
+
logger.info("Received API response, parsing generated text")
|
|
61
67
|
return self._parse_response(generated_text)
|
|
62
68
|
|
|
63
69
|
@retry(
|
|
@@ -66,10 +72,14 @@ class GeminiFlashModel:
|
|
|
66
72
|
retry=retry_if_exception_type((requests.exceptions.RequestException,))
|
|
67
73
|
)
|
|
68
74
|
def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
|
|
75
|
+
logger.info("Starting visual evaluation for question: %s", question)
|
|
69
76
|
folder_path = Path(image_folder)
|
|
70
77
|
images = list(folder_path.glob("*.png"))[:3]
|
|
71
78
|
if not images:
|
|
79
|
+
logger.error("No PNG images found in folder: %s", image_folder)
|
|
72
80
|
raise ProcessingError(f"No PNG images found in {image_folder}")
|
|
81
|
+
logger.info("Found %d PNG images for evaluation: %s", len(images), [img.name for img in images])
|
|
82
|
+
|
|
73
83
|
prompt = (
|
|
74
84
|
"Evaluate the Power BI report visuals based on the given task.\n\n"
|
|
75
85
|
f"Task: {question}\n\n"
|
|
@@ -87,35 +97,55 @@ class GeminiFlashModel:
|
|
|
87
97
|
)
|
|
88
98
|
parts = [{"text": prompt}]
|
|
89
99
|
for img in images:
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
+
logger.debug("Processing image: %s", img.name)
|
|
101
|
+
try:
|
|
102
|
+
with Image.open(img) as pil_img:
|
|
103
|
+
if pil_img.size[0] == 0 or pil_img.size[1] == 0:
|
|
104
|
+
logger.error("Invalid image dimensions for %s", img.name)
|
|
105
|
+
raise ProcessingError(f"Invalid image dimensions for {img.name}")
|
|
106
|
+
pil_img.thumbnail((1024, 1024))
|
|
107
|
+
img_buffer = io.BytesIO()
|
|
108
|
+
pil_img.save(img_buffer, format="PNG")
|
|
109
|
+
parts.append({
|
|
110
|
+
"inline_data": {
|
|
111
|
+
"mime_type": "image/png",
|
|
112
|
+
"data": base64.b64encode(img_buffer.getvalue()).decode('utf-8')
|
|
113
|
+
}
|
|
114
|
+
})
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.error("Failed to process image %s: %s", img.name, str(e))
|
|
117
|
+
raise ProcessingError(f"Failed to process image {img.name}: {str(e)}")
|
|
100
118
|
headers = {"Content-Type": "application/json"}
|
|
101
119
|
data = {"contents": [{"parts": parts}]}
|
|
120
|
+
logger.info("Sending visual evaluation API request to %s", self.endpoint)
|
|
102
121
|
response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
|
|
103
122
|
if response.status_code != 200:
|
|
104
|
-
logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
123
|
+
logger.error("Visual API request failed: Status %d, Response: %s", response.status_code, response.text)
|
|
105
124
|
raise Exception(f"API call failed: {response.status_code} - {response.text}")
|
|
106
125
|
response_data = response.json()
|
|
107
126
|
if not response_data.get("candidates"):
|
|
108
|
-
logger.error("API response missing candidates: %s", response_data)
|
|
127
|
+
logger.error("Visual API response missing candidates: %s", response_data)
|
|
109
128
|
raise ValueError("No candidates in API response")
|
|
110
129
|
output_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
|
|
130
|
+
logger.info("Received visual API response, parsing output")
|
|
111
131
|
score_match = re.search(r"Score:\s*(\d+)(?:/100)?", output_text)
|
|
112
132
|
feedback_match = re.search(r"Feedback:\s*(.*)", output_text, re.DOTALL)
|
|
113
|
-
|
|
133
|
+
result = {
|
|
114
134
|
"score": int(score_match.group(1)) if score_match else 0,
|
|
115
|
-
"feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated"
|
|
135
|
+
"feedback": feedback_match.group(1).strip() if feedback_match else "No visual feedback generated",
|
|
136
|
+
"issues": []
|
|
116
137
|
}
|
|
138
|
+
if not score_match:
|
|
139
|
+
result["issues"].append("Failed to parse score from visual API response")
|
|
140
|
+
logger.warning("Failed to parse score from visual API response")
|
|
141
|
+
if not feedback_match:
|
|
142
|
+
result["issues"].append("Failed to parse feedback from visual API response")
|
|
143
|
+
logger.warning("Failed to parse feedback from visual API response")
|
|
144
|
+
logger.info("Visual evaluation completed: Score=%d, Feedback=%s", result["score"], result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
|
|
145
|
+
return result
|
|
117
146
|
|
|
118
147
|
def _parse_response(self, text: str) -> Dict[str, any]:
|
|
148
|
+
logger.info("Parsing API response text")
|
|
119
149
|
result = {"score": 0, "feedback": "Evaluation not returned by API.", "issues": [], "recommendations": []}
|
|
120
150
|
try:
|
|
121
151
|
lines = text.split("\n")
|
|
@@ -127,96 +157,150 @@ class GeminiFlashModel:
|
|
|
127
157
|
try:
|
|
128
158
|
result["score"] = int(line.split(":")[1].split("/")[0].strip())
|
|
129
159
|
score_found = True
|
|
160
|
+
logger.info("Parsed score: %d", result["score"])
|
|
130
161
|
except ValueError:
|
|
131
162
|
result["issues"].append("Failed to parse score from API response")
|
|
163
|
+
logger.error("Failed to parse score from response: %s", line)
|
|
132
164
|
continue
|
|
133
165
|
elif score_found:
|
|
134
166
|
feedback_lines.append(line)
|
|
135
167
|
if feedback_lines:
|
|
136
168
|
result["feedback"] = "\n".join(feedback_lines).strip()
|
|
169
|
+
logger.debug("Parsed feedback: %s", result["feedback"][:50] + "..." if len(result["feedback"]) > 50 else result["feedback"])
|
|
137
170
|
return result
|
|
138
171
|
except Exception as e:
|
|
139
172
|
result["issues"].append(str(e))
|
|
173
|
+
logger.error("Error parsing response: %s", str(e))
|
|
140
174
|
return result
|
|
141
175
|
|
|
142
176
|
class PowerBIProcessor:
|
|
143
177
|
def extract_datamodel(self, pbit_file_path: str) -> Dict:
|
|
178
|
+
logger.info("Extracting data model from PBIT file: %s", pbit_file_path)
|
|
144
179
|
if not os.path.exists(pbit_file_path):
|
|
180
|
+
logger.error("PBIT file does not exist: %s", pbit_file_path)
|
|
145
181
|
raise ProcessingError(f"PBIT file not found: {pbit_file_path}")
|
|
146
182
|
folder_path = os.path.dirname(pbit_file_path)
|
|
147
183
|
file_name = os.path.splitext(os.path.basename(pbit_file_path))[0]
|
|
148
184
|
zip_file = os.path.join(folder_path, f"{file_name}.zip")
|
|
149
185
|
export_path = os.path.join(folder_path, "export")
|
|
186
|
+
logger.debug("Cleaning up temporary files: %s, %s", zip_file, export_path)
|
|
150
187
|
self._cleanup(zip_file, export_path)
|
|
151
188
|
try:
|
|
189
|
+
logger.info("Renaming PBIT to ZIP: %s -> %s", pbit_file_path, zip_file)
|
|
152
190
|
os.rename(pbit_file_path, zip_file)
|
|
153
191
|
if not zipfile.is_zipfile(zip_file):
|
|
192
|
+
logger.error("File is not a valid ZIP: %s", zip_file)
|
|
154
193
|
raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
|
|
194
|
+
logger.info("Extracting ZIP contents to: %s", export_path)
|
|
155
195
|
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
156
196
|
zip_ref.extractall(export_path)
|
|
157
197
|
schema_path = os.path.join(export_path, "DataModelSchema")
|
|
158
198
|
txt_path = os.path.join(export_path, "DataModelSchema.txt")
|
|
199
|
+
logger.debug("Renaming schema file: %s -> %s", schema_path, txt_path)
|
|
159
200
|
os.rename(schema_path, txt_path)
|
|
201
|
+
logger.info("Reading DataModelSchema file: %s", txt_path)
|
|
160
202
|
with open(txt_path, "r", encoding="utf-16-le") as file:
|
|
161
|
-
|
|
203
|
+
data = json.load(file)
|
|
204
|
+
logger.info("Successfully extracted data model from PBIT file")
|
|
205
|
+
return data
|
|
162
206
|
except UnicodeDecodeError as e:
|
|
163
207
|
logger.error("Failed to decode DataModelSchema: %s", str(e))
|
|
164
208
|
raise ProcessingError(f"Invalid encoding in DataModelSchema: {e}")
|
|
165
209
|
except Exception as e:
|
|
210
|
+
logger.error("Failed to extract DataModelSchema: %s", str(e))
|
|
166
211
|
raise ProcessingError(f"Failed to extract DataModelSchema: {e}")
|
|
167
212
|
finally:
|
|
213
|
+
logger.debug("Cleaning up temporary files after extraction")
|
|
168
214
|
self._cleanup(zip_file, export_path)
|
|
169
215
|
|
|
170
216
|
def extract_model_data(self, data: Dict) -> Dict:
|
|
217
|
+
logger.info("Extracting model data from data model")
|
|
171
218
|
try:
|
|
172
219
|
tables = data.get("model", {}).get("tables", [])
|
|
173
220
|
relationships = data.get("model", {}).get("relationships", [])
|
|
174
|
-
|
|
221
|
+
result = {
|
|
175
222
|
"Calculated Measures": self._get_measures(tables),
|
|
176
223
|
"Tables": self._get_tables_and_columns(tables),
|
|
177
224
|
"Relationships": self._get_relationships(relationships)
|
|
178
225
|
}
|
|
226
|
+
logger.info("Extracted model data: %d measures, %d tables, %d relationships",
|
|
227
|
+
len(result["Calculated Measures"]), len(result["Tables"]), len(result["Relationships"]))
|
|
228
|
+
return result
|
|
179
229
|
except Exception as e:
|
|
230
|
+
logger.error("Failed to extract model data: %s", str(e))
|
|
180
231
|
raise ProcessingError(f"Failed to extract model data: {e}")
|
|
181
232
|
|
|
182
233
|
def process_pdf(self, pdf_path: str, output_dir: str = "outputimages", num_pages: int = 3) -> List[str]:
|
|
234
|
+
logger.info("Processing PDF file: %s", pdf_path)
|
|
183
235
|
try:
|
|
184
236
|
if not os.path.exists(pdf_path):
|
|
237
|
+
logger.error("PDF file does not exist: %s", pdf_path)
|
|
185
238
|
raise ProcessingError(f"PDF file not found: {pdf_path}")
|
|
239
|
+
# Validate PDF
|
|
240
|
+
try:
|
|
241
|
+
with open(pdf_path, "rb") as f:
|
|
242
|
+
pdf_reader = PyPDF2.PdfReader(f)
|
|
243
|
+
if len(pdf_reader.pages) == 0:
|
|
244
|
+
logger.error("PDF is empty: %s", pdf_path)
|
|
245
|
+
raise ProcessingError(f"PDF is empty: {pdf_path}")
|
|
246
|
+
logger.info("PDF validated, contains %d pages", len(pdf_reader.pages))
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.error("Invalid PDF file: %s", str(e))
|
|
249
|
+
raise ProcessingError(f"Invalid PDF file: {str(e)}")
|
|
250
|
+
logger.debug("Creating output directory: %s", output_dir)
|
|
186
251
|
os.makedirs(output_dir, exist_ok=True)
|
|
187
|
-
pages
|
|
252
|
+
logger.info("Converting PDF pages to images (max %d pages)", num_pages)
|
|
253
|
+
pages = convert_from_path(pdf_path, first_page=1, last_page=min(num_pages, len(pdf_reader.pages)))
|
|
254
|
+
if not pages:
|
|
255
|
+
logger.error("No pages converted from PDF: %s", pdf_path)
|
|
256
|
+
raise ProcessingError(f"No pages converted from PDF: {pdf_path}")
|
|
188
257
|
image_paths = []
|
|
189
258
|
for i, page in enumerate(pages):
|
|
190
259
|
image_path = os.path.join(output_dir, f"page_{i + 1}.png")
|
|
260
|
+
logger.debug("Saving page %d as PNG: %s", i + 1, image_path)
|
|
191
261
|
page.save(image_path, "PNG")
|
|
192
262
|
image_paths.append(image_path)
|
|
193
|
-
|
|
263
|
+
logger.info("Successfully processed %d pages from PDF", len(image_paths))
|
|
194
264
|
return image_paths
|
|
195
265
|
except Exception as e:
|
|
196
|
-
|
|
266
|
+
logger.error("Failed to process PDF: %s", str(e))
|
|
267
|
+
raise ProcessingError(f"Failed to process PDF: {str(e)}")
|
|
268
|
+
finally:
|
|
269
|
+
logger.debug("Not removing PDF file to allow debugging: %s", pdf_path)
|
|
197
270
|
|
|
198
271
|
def extract_zip(self, zip_path: str, extract_path: str) -> tuple[str, str | None]:
|
|
272
|
+
logger.info("Extracting ZIP file: %s", zip_path)
|
|
199
273
|
try:
|
|
200
274
|
if not os.path.exists(zip_path):
|
|
275
|
+
logger.error("ZIP file does not exist: %s", zip_path)
|
|
201
276
|
raise ProcessingError(f"ZIP file not found: {zip_path}")
|
|
202
277
|
if not zipfile.is_zipfile(zip_path):
|
|
278
|
+
logger.error("File is not a valid ZIP: %s", zip_path)
|
|
203
279
|
raise ProcessingError(f"File is not a valid ZIP: {zip_path}")
|
|
280
|
+
logger.debug("Creating extraction directory: %s", extract_path)
|
|
204
281
|
os.makedirs(extract_path, exist_ok=True)
|
|
205
282
|
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
|
283
|
+
logger.info("Extracting ZIP contents to: %s", extract_path)
|
|
206
284
|
zip_ref.extractall(extract_path)
|
|
207
285
|
pbit_files = list(Path(extract_path).glob("*.pbit"))
|
|
208
286
|
pdf_files = list(Path(extract_path).glob("*.pdf"))
|
|
287
|
+
logger.info("Found %d PBIT files and %d PDF files in ZIP", len(pbit_files), len(pdf_files))
|
|
209
288
|
if not pbit_files:
|
|
289
|
+
logger.error("No PBIT files found in ZIP")
|
|
210
290
|
raise ProcessingError("ZIP file must contain at least one .pbit file")
|
|
211
291
|
if len(pbit_files) > 1:
|
|
292
|
+
logger.error("Multiple PBIT files found in ZIP: %s", [str(p) for p in pbit_files])
|
|
212
293
|
raise ProcessingError("ZIP file contains multiple .pbit files")
|
|
213
294
|
pdf_path = str(pdf_files[0]) if pdf_files else None
|
|
295
|
+
logger.info("Extracted PBIT file: %s, PDF file: %s", str(pbit_files[0]), pdf_path)
|
|
214
296
|
return str(pbit_files[0]), pdf_path
|
|
215
297
|
except Exception as e:
|
|
298
|
+
logger.error("Failed to extract ZIP file: %s", str(e))
|
|
216
299
|
raise ProcessingError(f"Failed to extract ZIP file: {e}")
|
|
217
300
|
|
|
218
301
|
@staticmethod
|
|
219
302
|
def _get_measures(tables: List[Dict]) -> List[Dict]:
|
|
303
|
+
logger.debug("Extracting measures from tables")
|
|
220
304
|
measures = []
|
|
221
305
|
for table in tables:
|
|
222
306
|
if "measures" in table:
|
|
@@ -227,37 +311,50 @@ class PowerBIProcessor:
|
|
|
227
311
|
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"), list) else measure.get("expression", ""),
|
|
228
312
|
"FormatString": measure.get("formatString", "")
|
|
229
313
|
})
|
|
314
|
+
logger.debug("Extracted %d measures", len(measures))
|
|
230
315
|
return measures
|
|
231
316
|
|
|
232
317
|
@staticmethod
|
|
233
318
|
def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
|
|
319
|
+
logger.debug("Extracting tables and columns")
|
|
234
320
|
table_info = []
|
|
235
321
|
for table in tables:
|
|
236
322
|
columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"), "Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"} for col in table.get("columns", [])]
|
|
237
323
|
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if part["source"].get("expression")]
|
|
238
324
|
table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
|
|
325
|
+
logger.debug("Extracted %d tables", len(table_info))
|
|
239
326
|
return table_info
|
|
240
327
|
|
|
241
328
|
@staticmethod
|
|
242
329
|
def _get_relationships(relationships: List[Dict]) -> List[Dict]:
|
|
243
|
-
|
|
330
|
+
logger.debug("Extracting relationships")
|
|
331
|
+
result = [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"], "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in relationships]
|
|
332
|
+
logger.debug("Extracted %d relationships", len(result))
|
|
333
|
+
return result
|
|
244
334
|
|
|
245
335
|
@staticmethod
|
|
246
336
|
def _cleanup(*paths: str):
|
|
337
|
+
logger.debug("Cleaning up paths: %s", paths)
|
|
247
338
|
for path in paths:
|
|
248
339
|
if os.path.exists(path):
|
|
249
340
|
if os.path.isfile(path):
|
|
341
|
+
logger.debug("Removing file: %s", path)
|
|
250
342
|
os.remove(path)
|
|
251
343
|
else:
|
|
344
|
+
logger.debug("Removing directory: %s", path)
|
|
252
345
|
shutil.rmtree(path, ignore_errors=True)
|
|
346
|
+
logger.debug("Cleanup completed")
|
|
253
347
|
|
|
254
348
|
class PowerBIEvaluator:
|
|
255
349
|
def __init__(self, api_key: str):
|
|
350
|
+
logger.info("Initializing PowerBIEvaluator")
|
|
256
351
|
self.api_key = api_key
|
|
257
352
|
self.model = GeminiFlashModel(api_key)
|
|
258
353
|
self.processor = PowerBIProcessor()
|
|
354
|
+
logger.info("PowerBIEvaluator initialized successfully")
|
|
259
355
|
|
|
260
356
|
def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
|
|
357
|
+
logger.info("Starting evaluation for file: %s with %d questions", answer_path, len(questions))
|
|
261
358
|
try:
|
|
262
359
|
_, ext = os.path.splitext(answer_path)
|
|
263
360
|
ext = ext.lower()
|
|
@@ -266,13 +363,16 @@ class PowerBIEvaluator:
|
|
|
266
363
|
pdf_path = None
|
|
267
364
|
|
|
268
365
|
# Handle input file type
|
|
366
|
+
logger.debug("Checking file extension: %s", ext)
|
|
269
367
|
if ext == ".zip":
|
|
368
|
+
logger.info("Processing ZIP file")
|
|
270
369
|
pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
|
|
271
370
|
elif ext == ".pbit":
|
|
371
|
+
logger.info("Processing PBIT file directly")
|
|
272
372
|
pbit_path = answer_path
|
|
273
373
|
pdf_path = None
|
|
274
374
|
else:
|
|
275
|
-
logger.error("Invalid file type
|
|
375
|
+
logger.error("Invalid file type: %s", ext)
|
|
276
376
|
return {
|
|
277
377
|
"score": 0,
|
|
278
378
|
"feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
|
|
@@ -282,9 +382,12 @@ class PowerBIEvaluator:
|
|
|
282
382
|
|
|
283
383
|
try:
|
|
284
384
|
# Extract and process the data model from .pbit
|
|
385
|
+
logger.info("Extracting data model from PBIT")
|
|
285
386
|
data_model = self.processor.extract_datamodel(pbit_path)
|
|
387
|
+
logger.info("Extracting model data")
|
|
286
388
|
model_data = self.processor.extract_model_data(data_model)
|
|
287
389
|
answers = [json.dumps(model_data)] * len(questions)
|
|
390
|
+
logger.info("Evaluating DAX with %d question-answer pairs", len(questions))
|
|
288
391
|
dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
289
392
|
|
|
290
393
|
# Initialize result with DAX evaluation
|
|
@@ -294,24 +397,38 @@ class PowerBIEvaluator:
|
|
|
294
397
|
"issues": dax_result["issues"],
|
|
295
398
|
"recommendations": dax_result["recommendations"]
|
|
296
399
|
}
|
|
400
|
+
logger.info("DAX evaluation completed: Score=%d", dax_result["score"])
|
|
297
401
|
|
|
298
402
|
# Process PDF and evaluate visuals if present
|
|
299
403
|
if pdf_path:
|
|
404
|
+
logger.info("Processing PDF for visual evaluation: %s", pdf_path)
|
|
300
405
|
try:
|
|
301
|
-
self.processor.process_pdf(pdf_path)
|
|
406
|
+
image_paths = self.processor.process_pdf(pdf_path)
|
|
407
|
+
if not image_paths:
|
|
408
|
+
logger.error("No images generated from PDF: %s", pdf_path)
|
|
409
|
+
raise ProcessingError("No images generated from PDF")
|
|
410
|
+
logger.info("Evaluating visuals with question: %s", questions[0])
|
|
302
411
|
visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
|
|
303
412
|
result["score"] = (dax_result["score"] + visual_result["score"]) // 2
|
|
304
413
|
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
305
414
|
result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
|
|
306
415
|
result["recommendations"].extend(visual_result.get("recommendations", []))
|
|
416
|
+
logger.info("Visual evaluation completed: Score=%d", visual_result["score"])
|
|
307
417
|
except ProcessingError as e:
|
|
308
418
|
logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
|
|
309
419
|
result["issues"].append(f"Visual evaluation skipped: {str(e)}")
|
|
310
|
-
result["recommendations"].append("Ensure a valid PDF
|
|
420
|
+
result["recommendations"].append("Ensure a valid PDF with Power BI visuals is provided")
|
|
421
|
+
except Exception as e:
|
|
422
|
+
logger.error("Unexpected error during visual evaluation: %s", str(e))
|
|
423
|
+
result["issues"].append(f"Visual evaluation failed: {str(e)}")
|
|
424
|
+
result["recommendations"].append("Check PDF file and API connectivity")
|
|
425
|
+
else:
|
|
426
|
+
logger.info("No PDF provided, skipping visual evaluation")
|
|
311
427
|
|
|
428
|
+
logger.info("Evaluation completed successfully")
|
|
312
429
|
return result
|
|
313
430
|
finally:
|
|
314
|
-
|
|
431
|
+
logger.debug("Cleaning up temporary files and directories")
|
|
315
432
|
self.processor._cleanup(extract_path, "outputimages")
|
|
316
433
|
except Exception as e:
|
|
317
434
|
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="QuantumChecker",
|
|
5
|
-
version="0.2.
|
|
5
|
+
version="0.2.6",
|
|
6
6
|
author="Qobiljon",
|
|
7
7
|
author_email="qobiljonkhayrullayev@gmail.com",
|
|
8
8
|
description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|