QuantumChecker 0.2.7__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. quantumchecker-0.2.9/PKG-INFO +53 -0
  2. quantumchecker-0.2.9/QuantumCheck/main.py +222 -0
  3. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumCheck/powerbi_evaluator.py +44 -47
  4. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumCheck/prompts.py +41 -61
  5. quantumchecker-0.2.9/QuantumCheck/python_evaluator.py +217 -0
  6. quantumchecker-0.2.9/QuantumCheck/sql_evaluator.py +219 -0
  7. quantumchecker-0.2.9/QuantumCheck/ssis_evaluator.py +403 -0
  8. quantumchecker-0.2.9/QuantumChecker.egg-info/PKG-INFO +53 -0
  9. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/SOURCES.txt +2 -1
  10. quantumchecker-0.2.9/README.md +27 -0
  11. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/setup.py +1 -1
  12. quantumchecker-0.2.9/tests/test.py +135 -0
  13. quantumchecker-0.2.9/tests/test2.py +30 -0
  14. quantumchecker-0.2.7/PKG-INFO +0 -34
  15. quantumchecker-0.2.7/QuantumCheck/main.py +0 -125
  16. quantumchecker-0.2.7/QuantumCheck/python_evaluator.py +0 -95
  17. quantumchecker-0.2.7/QuantumCheck/sql_evaluator.py +0 -97
  18. quantumchecker-0.2.7/QuantumCheck/ssis_evaluator.py +0 -136
  19. quantumchecker-0.2.7/QuantumChecker.egg-info/PKG-INFO +0 -34
  20. quantumchecker-0.2.7/README.md +0 -8
  21. quantumchecker-0.2.7/tests/test.py +0 -31
  22. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumCheck/__init__.py +0 -0
  23. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/dependency_links.txt +0 -0
  24. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/requires.txt +0 -0
  25. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/QuantumChecker.egg-info/top_level.txt +0 -0
  26. {quantumchecker-0.2.7 → quantumchecker-0.2.9}/setup.cfg +0 -0
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.4
2
+ Name: QuantumChecker
3
+ Version: 0.2.9
4
+ Summary: A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.
5
+ Author: Qobiljon
6
+ Author-email: qobiljonkhayrullayev@gmail.com
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.6
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: requests>=2.31.0
13
+ Requires-Dist: tenacity>=8.2.3
14
+ Requires-Dist: pdf2image>=1.16.3
15
+ Requires-Dist: python-dotenv>=1.0.0
16
+ Requires-Dist: Pillow>=10.0.0
17
+ Requires-Dist: PyPDF2>=3.0.1
18
+ Dynamic: author
19
+ Dynamic: author-email
20
+ Dynamic: classifier
21
+ Dynamic: description
22
+ Dynamic: description-content-type
23
+ Dynamic: requires-dist
24
+ Dynamic: requires-python
25
+ Dynamic: summary
26
+
27
+ Sample usage:
28
+ ```
29
+ import asyncio
30
+ from your_evaluator_module import HomeworkEvaluator
31
+
32
+ async def main():
33
+ evaluator = HomeworkEvaluator()
34
+ question_content = """
35
+ Q1: What is a Python list? Explain with an example.
36
+
37
+ Q2: Write an SQL query to select all records from a table named 'students'.
38
+ """
39
+ answer_path = "sample_submissions/student1_answer.py"
40
+ api_keys = ["your_api_key_1", "your_api_key_2"]
41
+ question_type = "python"
42
+
43
+ result = await evaluator.evaluate_from_content(
44
+ question_content=question_content,
45
+ answer_path=answer_path,
46
+ api_keys=api_keys,
47
+ question_type=question_type
48
+ )
49
+ print(result)
50
+
51
+ if __name__ == "__main__":
52
+ asyncio.run(main())
53
+ ```
@@ -0,0 +1,222 @@
1
+ import logging
2
+ import os
3
+ import zipfile
4
+ import random
5
+ from datetime import datetime
6
+ from typing import List, Dict, Optional
7
+ from .python_evaluator import PythonEvaluator
8
+ from .sql_evaluator import SQLEvaluator
9
+ from .powerbi_evaluator import PowerBIEvaluator
10
+ from .ssis_evaluator import SSISEvaluator
11
+ import asyncio
12
+
13
+ _logger_cache = {}
14
+
15
+ class HomeworkEvaluator:
16
+ EVALUATOR_REGISTRY = {
17
+ "python": PythonEvaluator,
18
+ "sql": SQLEvaluator,
19
+ "powerbi": PowerBIEvaluator,
20
+ "ssis": SSISEvaluator
21
+ }
22
+
23
+ EXTENSION_TO_TYPE = {
24
+ ".py": "python",
25
+ ".sql": "sql",
26
+ ".pbit": "powerbi",
27
+ ".pdf": "powerbi",
28
+ ".dtsx": "ssis",
29
+ ".DTSX": "ssis",
30
+ ".txt": "text",
31
+ ".md": "text"
32
+ }
33
+
34
+ API_NAME_MAPPING = {
35
+ "python": "Google Gemini API",
36
+ "sql": "Google Gemini API",
37
+ "powerbi": "Google Gemini API",
38
+ "ssis": "Google Gemini API",
39
+ "text": "Google Gemini API"
40
+ }
41
+
42
+ def __init__(self, log_level: int = logging.INFO):
43
+ self.log_level = log_level
44
+ self._successful_key_cache = {}
45
+ self._rate_limit_delay = {} # Track delay per key
46
+
47
+ def _get_logger(self, log_type: str) -> logging.Logger:
48
+ log_name = f"{log_type}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
49
+ if log_name not in _logger_cache:
50
+ logger = logging.getLogger(log_name)
51
+ logger.setLevel(self.log_level)
52
+ if not logger.handlers:
53
+ handler = logging.StreamHandler()
54
+ handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
55
+ logger.addHandler(handler)
56
+ _logger_cache[log_name] = logger
57
+ return _logger_cache[log_name]
58
+
59
+ def parse_questions(self, content: str) -> List[str]:
60
+ logger = self._get_logger("QuantumCheck.main")
61
+ questions = [q.strip() for q in content.split("\n\n") if q.strip()]
62
+ logger.info(f"Parsed {len(questions)} questions from content")
63
+ if not questions:
64
+ raise ValueError("No valid questions found in content")
65
+ return questions
66
+
67
+ def _detect_zip_content_type(self, zip_path: str, logger: logging.Logger) -> str:
68
+ try:
69
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
70
+ extensions = {os.path.splitext(name)[1].lower() for name in zip_ref.namelist()}
71
+ file_types = [self.EXTENSION_TO_TYPE.get(ext, "text") for ext in extensions if ext]
72
+ logger.info(f"Detected extensions in ZIP {zip_path}: {extensions}, types: {file_types}")
73
+ if "python" in file_types:
74
+ logger.info(f"Selected file type: python from extension: .py in ZIP: {zip_path}")
75
+ return "python"
76
+ elif "sql" in file_types:
77
+ logger.info(f"Selected file type: sql from extension: .sql in ZIP: {zip_path}")
78
+ return "sql"
79
+ elif "powerbi" in file_types:
80
+ logger.info(f"Selected file type: powerbi from extension: .pbit or .pdf in ZIP: {zip_path}")
81
+ return "powerbi"
82
+ elif "ssis" in file_types:
83
+ logger.info(f"Selected file type: ssis from extension: .dtsx in ZIP: {zip_path}")
84
+ return "ssis"
85
+ else:
86
+ logger.info(f"Selected file type: text (default) in ZIP: {zip_path}")
87
+ return "text"
88
+ except zipfile.BadZipFile:
89
+ logger.error(f"Invalid ZIP file: {zip_path}")
90
+ raise ValueError(f"Invalid ZIP file: {zip_path}")
91
+
92
+ async def evaluate_from_content(
93
+ self,
94
+ question_content: str,
95
+ answer_path: str,
96
+ api_keys: List[str],
97
+ question_type: str
98
+ ) -> Dict[str, any]:
99
+ try:
100
+ questions = self.parse_questions(question_content)
101
+ except ValueError as e:
102
+ logger = self._get_logger("QuantumCheck.main")
103
+ logger.error("Failed to parse question content: %s", str(e))
104
+ return {
105
+ "score": 0,
106
+ "feedback": f"Error parsing question content: {str(e)}",
107
+ "issues": [str(e)],
108
+ "recommendations": [],
109
+ "used_api_key_index": None,
110
+ "used_api_name": None
111
+ }
112
+
113
+ answer_path = answer_path.strip()
114
+ _, ext = os.path.splitext(answer_path)
115
+ ext = ext.lower()
116
+
117
+ # Determine file type, prioritizing question_type for evaluator selection
118
+ if ext == ".zip":
119
+ logger = self._get_logger("zip")
120
+ file_type = self._detect_zip_content_type(answer_path, logger)
121
+ else:
122
+ file_type = self.EXTENSION_TO_TYPE.get(ext, "text")
123
+ logger = self._get_logger(file_type)
124
+
125
+ # Use question_type if provided, else fallback to file_type
126
+ eval_type = question_type if question_type in self.EVALUATOR_REGISTRY else file_type
127
+ logger.info(f"Processing answer_path: {answer_path} with detected file type: {file_type}, evaluation type: {eval_type}")
128
+
129
+ if not os.path.exists(answer_path):
130
+ logger.error(f"Answer file not found: {answer_path}")
131
+ return {
132
+ "score": 0,
133
+ "feedback": f"Answer file not found: {answer_path}",
134
+ "issues": [f"Answer file not found: {answer_path}"],
135
+ "recommendations": [],
136
+ "used_api_key_index": None,
137
+ "used_api_name": None
138
+ }
139
+
140
+ evaluator_class = self.EVALUATOR_REGISTRY.get(eval_type, PythonEvaluator)
141
+ last_error_messages = []
142
+
143
+ # Shuffle keys for load balancing
144
+ key_order = [(i + 1, key) for i, key in enumerate(api_keys)]
145
+ random.shuffle(key_order)
146
+
147
+ # Try cached key with 30% probability to encourage rotation
148
+ cached_key_idx = self._successful_key_cache.get(eval_type)
149
+ if cached_key_idx is not None and cached_key_idx < len(api_keys) and random.random() < 0.3:
150
+ key_order.insert(0, (cached_key_idx + 1, api_keys[cached_key_idx]))
151
+
152
+ for idx, key in key_order:
153
+ # Check rate limit delay
154
+ if key in self._rate_limit_delay:
155
+ delay_until = self._rate_limit_delay[key]
156
+ current_time = datetime.now()
157
+ delay_until_time = datetime.fromtimestamp(delay_until)
158
+ if current_time < delay_until_time:
159
+ logger.info(f"API key #{idx} is rate-limited until {delay_until_time}, skipping.")
160
+ continue
161
+ else:
162
+ del self._rate_limit_delay[key]
163
+
164
+ logger.info(f"Trying API key #{idx}")
165
+ evaluator = evaluator_class(key)
166
+ api_name = getattr(evaluator, 'get_api_name', lambda: self.API_NAME_MAPPING.get(eval_type, "Unknown API"))()
167
+ logger.info(f"Using API: {api_name} for evaluation type: {eval_type}")
168
+
169
+ try:
170
+ evaluation = evaluator.evaluate(questions, answer_path, temp_dir=f"temp_extract_{os.getpid()}_{idx}")
171
+
172
+ feedback = evaluation.get("feedback", "").lower()
173
+ issues = " ".join(evaluation.get("issues", [])).lower()
174
+
175
+ # Check for invalid API key
176
+ if any(phrase in feedback or phrase in issues for phrase in ["api key not valid", "api_key_invalid"]):
177
+ logger.warning(f"API key #{idx} invalid, trying next key.")
178
+ last_error_messages.append(f"API key #{idx} invalid.")
179
+ continue
180
+
181
+ # Check for rate limit errors
182
+ if any(phrase in feedback or phrase in issues for phrase in ["429", "too many requests", "rate limit"]):
183
+ logger.warning(f"API key #{idx} hit rate limit, applying delay.")
184
+ last_error_messages.append(f"API key #{idx} rate limited.")
185
+ self._rate_limit_delay[key] = datetime.now().timestamp() + 45 # 45s delay
186
+ continue
187
+
188
+ # Check for invalid evaluation
189
+ if evaluation.get("score", 0) == 0 and "evaluation not returned" in feedback:
190
+ logger.warning(f"API key #{idx} returned invalid evaluation, trying next key.")
191
+ last_error_messages.append(f"API key #{idx} returned invalid evaluation.")
192
+ continue
193
+
194
+ # Cache successful key
195
+ self._successful_key_cache[eval_type] = idx - 1
196
+ logger.info(f"Evaluation succeeded with API key #{idx}: Score = {evaluation.get('score')}")
197
+
198
+ return {
199
+ "score": evaluation.get("score", 0),
200
+ "feedback": evaluation.get("feedback", "No feedback provided"),
201
+ "issues": evaluation.get("issues", []),
202
+ "recommendations": evaluation.get("recommendations", []),
203
+ "used_api_key_index": idx,
204
+ "used_api_name": api_name
205
+ }
206
+
207
+ except Exception as e:
208
+ logger.error(f"Exception using API key #{idx}: {str(e)}")
209
+ last_error_messages.append(f"Exception with key #{idx}: {str(e)}")
210
+ if "429" in str(e) or "rate limit" in str(e).lower():
211
+ self._rate_limit_delay[key] = datetime.now().timestamp() + 45
212
+ continue
213
+
214
+ logger.error("Evaluation failed with all API keys.")
215
+ return {
216
+ "score": 0,
217
+ "feedback": "Evaluation failed with all API keys.",
218
+ "issues": last_error_messages if last_error_messages else ["All API keys failed to evaluate the submission."],
219
+ "recommendations": [],
220
+ "used_api_key_index": None,
221
+ "used_api_name": None
222
+ }
@@ -15,7 +15,6 @@ import io
15
15
  import base64
16
16
 
17
17
 
18
- # Placeholder for prompts.py content
19
18
  def prompt_text_powerbi(combined_content: str) -> str:
20
19
  return f"""
21
20
  Evaluate the following Power BI DAX question-answer pairs for correctness, clarity, and appropriateness.
@@ -46,19 +45,20 @@ class GeminiFlashModel:
46
45
  self.model_name = model_name
47
46
  self.endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model_name}:generateContent"
48
47
 
49
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
50
- retry=retry_if_exception_type((requests.exceptions.RequestException,)))
48
+ @retry(
49
+ stop=stop_after_attempt(3),
50
+ wait=wait_exponential(min=4, max=10),
51
+ retry=retry_if_exception_type((requests.exceptions.RequestException,))
52
+ )
51
53
  def evaluate(self, question_answer_pairs: List[Dict[str, str]]) -> Dict[str, any]:
52
54
  logger.info("Starting evaluation of %d Power BI question-answer pairs", len(question_answer_pairs))
53
55
  combined_content = "\n\n".join(
54
56
  f"Question {i}:\n{qa['question']}\n\nAnswer {i}:\n{qa['answer']}\n"
55
57
  for i, qa in enumerate(question_answer_pairs, 1)
56
58
  )
57
-
58
59
  headers = {"Content-Type": "application/json"}
59
60
  data = {"contents": [{"parts": [{"text": prompt_text_powerbi(combined_content)}]}]}
60
61
  response = requests.post(f"{self.endpoint}?key={self.api_key}", headers=headers, json=data)
61
-
62
62
  if response.status_code != 200:
63
63
  logger.error("API request failed: Status %d, Response: %s", response.status_code, response.text)
64
64
  raise Exception(f"API call failed: {response.status_code} - {response.text}")
@@ -69,8 +69,11 @@ class GeminiFlashModel:
69
69
  generated_text = response_data["candidates"][0]["content"]["parts"][0]["text"]
70
70
  return self._parse_response(generated_text)
71
71
 
72
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(min=4, max=10),
73
- retry=retry_if_exception_type((requests.exceptions.RequestException,)))
72
+ @retry(
73
+ stop=stop_after_attempt(3),
74
+ wait=wait_exponential(min=4, max=10),
75
+ retry=retry_if_exception_type((requests.exceptions.RequestException,))
76
+ )
74
77
  def evaluate_visuals(self, question: str, image_folder: str) -> Dict[str, any]:
75
78
  folder_path = Path(image_folder)
76
79
  images = list(folder_path.glob("*.png"))[:3]
@@ -80,12 +83,12 @@ class GeminiFlashModel:
80
83
  "Evaluate the Power BI report visuals based on the provided task. The visuals are professional dashboards designed for enterprise use.\n\n"
81
84
  f"Task: {question}\n\n"
82
85
  f"Screenshots: {[str(img.name) for img in images]}\n\n"
83
- "Evaluate based on the following criteria, assigning a score out of 100:z\n"
86
+ "Evaluate based on the following criteria, assigning a score out of 100:\n"
84
87
  "- Clarity (30%): Are visuals clear, with readable labels, titles, and legends?\n"
85
88
  "- Appropriateness (30%): Are chart types (e.g., bar, line, pie) suitable for the data and task?\n"
86
89
  "- Color Usage (20%): Are colors consistent, accessible, and visually appealing? Consider contrast and colorblind accessibility.\n"
87
90
  "- Interactivity (20%): Do visible slicers, filters, or tooltips enhance usability and data exploration?\n\n"
88
- "Provide a score (0-100) that reflects the overall quality, considering the enterprise context. Avoid overly harsh penalties for minor issues.\n"
91
+ "Provide a score for overall quality, considering the enterprise context. Avoid overly harsh penalties for minor issues.\n"
89
92
  "Provide concise, supportive feedback for beginners, highlighting strengths and areas for improvement.\n\n"
90
93
  "Structure the response as:\n"
91
94
  "Score: [SCORE]/100\n"
@@ -231,9 +234,7 @@ class PowerBIProcessor:
231
234
  measures.append({
232
235
  "Table": table["name"],
233
236
  "Name": measure["name"],
234
- "Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
235
- list) else measure.get(
236
- "expression", ""),
237
+ "Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"), list) else measure.get("expression", ""),
237
238
  "FormatString": measure.get("formatString", "")
238
239
  })
239
240
  return measures
@@ -242,19 +243,31 @@ class PowerBIProcessor:
242
243
  def _get_tables_and_columns(tables: List[Dict]) -> List[Dict]:
243
244
  table_info = []
244
245
  for table in tables:
245
- columns = [{"Column Name": col["name"], "Data Type": col.get("dataType", "Unknown"),
246
- "Source Column": col.get("sourceColumn", "N/A"), "Calculated": col.get("type") == "calculated"}
247
- for col in table.get("columns", [])]
248
- expressions = [part["source"]["expression"] for part in table.get("partitions", []) if
249
- part["source"].get("expression")]
246
+ columns = [
247
+ {
248
+ "Column Name": col["name"],
249
+ "Data Type": col.get("dataType", "Unknown"),
250
+ "Source Column": col.get("sourceColumn", "N/A"),
251
+ "Calculated": col.get("type") == "calculated"
252
+ }
253
+ for col in table.get("columns", [])
254
+ ]
255
+ expressions = [part["source"]["expression"] for part in table.get("partitions", []) if part["source"].get("expression")]
250
256
  table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
251
257
  return table_info
252
258
 
253
259
  @staticmethod
254
260
  def _get_relationships(relationships: List[Dict]) -> List[Dict]:
255
- return [{"From Table": rel["fromTable"], "From Column": rel["fromColumn"], "To Table": rel["toTable"],
256
- "To Column": rel["toColumn"], "Join Behavior": rel.get("joinOnDateBehavior", "N/A")} for rel in
257
- relationships]
261
+ return [
262
+ {
263
+ "From Table": rel["fromTable"],
264
+ "From Column": rel["fromColumn"],
265
+ "To Table": rel["toTable"],
266
+ "To Column": rel["toColumn"],
267
+ "Join Behavior": rel.get("joinOnDateBehavior", "N/A")
268
+ }
269
+ for rel in relationships
270
+ ]
258
271
 
259
272
  @staticmethod
260
273
  def _cleanup(*paths: str):
@@ -272,15 +285,13 @@ class PowerBIEvaluator:
272
285
  self.model = GeminiFlashModel(api_key)
273
286
  self.processor = PowerBIProcessor()
274
287
 
275
- def evaluate(self, questions: List[str], answer_path: str) -> Dict[str, any]:
288
+ def evaluate(self, questions: List[str], answer_path: str, temp_dir: str = "temp_extract") -> Dict[str, any]:
276
289
  try:
277
290
  _, ext = os.path.splitext(answer_path)
278
291
  ext = ext.lower()
279
- extract_path = os.path.join(os.path.dirname(answer_path), "temp_extract")
292
+ extract_path = temp_dir
280
293
  pbit_path = None
281
294
  pdf_path = None
282
-
283
- # Handle input file type
284
295
  if ext == ".zip":
285
296
  pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
286
297
  elif ext == ".pbit":
@@ -296,61 +307,47 @@ class PowerBIEvaluator:
296
307
  "dax_score": 0,
297
308
  "visual_score": 0
298
309
  }
299
-
300
310
  try:
301
- # Extract and process the data model from .pbit
302
311
  data_model = self.processor.extract_datamodel(pbit_path)
303
312
  model_data = self.processor.extract_model_data(data_model)
304
313
  answers = [json.dumps(model_data)] * len(questions)
305
314
  dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
306
-
307
- # Initialize result with DAX evaluation
308
315
  result = {
309
316
  "score": 0,
310
317
  "feedback": f"DAX Feedback:\n{dax_result['feedback']}",
311
318
  "issues": dax_result["issues"],
312
319
  "recommendations": dax_result["recommendations"],
313
- "dax_score": dax_result["score"], # Store DAX score
314
- "visual_score": 0 # Default visual score
320
+ "dax_score": dax_result["score"],
321
+ "visual_score": 0
315
322
  }
316
-
317
- # Process PDF and evaluate visuals if present
318
323
  if pdf_path:
319
324
  try:
320
- self.processor.process_pdf(pdf_path)
321
- visual_result = self.model.evaluate_visuals(questions[0], "outputimages")
322
- # Apply 70% DAX, 30% visuals scoring
325
+ image_paths = self.processor.process_pdf(pdf_path, output_dir=os.path.join(temp_dir, "outputimages"))
326
+ visual_result = self.model.evaluate_visuals(questions[0], os.path.join(temp_dir, "outputimages"))
323
327
  result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
324
- result["visual_score"] = visual_result["score"] # Store visual score
328
+ result["visual_score"] = visual_result["score"]
325
329
  result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
326
330
  result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
327
331
  result["recommendations"].extend(visual_result.get("recommendations", []))
328
332
  except ProcessingError as e:
329
333
  logger.warning("Failed to process PDF, proceeding with DAX evaluation only: %s", str(e))
330
- # Use DAX score only, weighted at 100% if no visuals
331
334
  result["score"] = dax_result["score"]
332
335
  result["issues"].append(f"Visual evaluation skipped: {str(e)}")
333
- result["recommendations"].append(
334
- "Ensure a valid PDF is provided for visual evaluation if intended")
336
+ result["recommendations"].append("Ensure a valid PDF is provided for visual evaluation if intended")
335
337
  else:
336
- # No PDF provided, use DAX score only
337
338
  result["score"] = dax_result["score"]
338
339
  result["feedback"] += "\n\nVisual Feedback:\nNo visuals provided for evaluation."
339
340
  result["issues"].append("No PDF provided for visual evaluation")
340
341
  result["recommendations"].append("Include a PDF with report visuals for complete evaluation")
341
-
342
- # Print scores with text labels to terminal
343
342
  logger.info("[DAX] Score: %d/100", result["dax_score"])
344
343
  logger.info("[Visual] Score: %d/100", result["visual_score"])
345
344
  logger.info("[Final] Score (70%% DAX, 30%% Visuals): %d/100", result["score"])
346
-
347
345
  return result
348
346
  finally:
349
- # Cleanup temporary files and directories
350
- self.processor._cleanup(extract_path, "outputimages")
347
+ self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
351
348
  except Exception as e:
352
349
  logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
353
- self.processor._cleanup(extract_path, "outputimages")
350
+ self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
354
351
  return {
355
352
  "score": 0,
356
353
  "feedback": f"Error processing file: {str(e)}",
@@ -362,4 +359,4 @@ class PowerBIEvaluator:
362
359
 
363
360
 
364
361
  class ProcessingError(Exception):
365
- pass
362
+ pass
@@ -8,10 +8,10 @@ def prompt_text_python(combined_content):
8
8
  "- Highlight both strengths and areas for improvement\n"
9
9
  "- Identify major mistakes or misunderstandings (e.g., syntax errors, incorrect logic, missing components and conceptual part)\n"
10
10
  "- Be concise but insightful\n\n"
11
- "- If the student's answer is incomplete or too simplistic to fully address the question, "
11
+ "- If the student's answer is incomplete or too simplistic to fully address the question, you should decrease the mark for the missing answers"
12
12
  "explain that the response lacks depth or coverage, but do not provide the missing or correct answer. "
13
13
  "Encourage the student to research further or review the relevant concepts.\n"
14
- "- If the student's submission is off-topic or unrelated to the question, "
14
+ "- If the student's submission is off-topic or unrelated to the question, give exatly 20 mark and "
15
15
  "clearly state that the response does not address the question's requirements and "
16
16
  "explain why it is irrelevant. Encourage the student to review the question carefully and "
17
17
  "focus on the relevant Python concepts without providing the correct solution."
@@ -32,8 +32,6 @@ def prompt_text_python(combined_content):
32
32
  "- Be honest but supportive\n"
33
33
  "- Include specific examples from the provided answers if helpful\n"
34
34
  "- Keep language beginner-friendly\n"
35
- "- Do not give too low marks. You may add from 20 up to 25 additional marks for effort or "
36
- "partial relevance, ensuring the score does not exceed 100."
37
35
  )
38
36
 
39
37
 
@@ -57,7 +55,7 @@ def prompt_text_sql(combined_content: str):
57
55
  "clearly state that it lacks sufficient detail or misses key components, but do not provide "
58
56
  "the missing parts or solutions. Instead, suggest they revisit the relevant "
59
57
  "concepts (e.g., joins, subqueries, indexing, if lacks) and encourage deeper exploration.\n"
60
- "- If the student's submission is off-topic or unrelated to the question, "
58
+ "- If the student's submission is off-topic or unrelated to the question, give exactly 20 mark and "
61
59
  "clearly state that the response does not address the question's requirements and "
62
60
  "explain why it is irrelevant. Encourage the student to review the "
63
61
  "question carefully and focus on the relevant SQL Server concepts without providing the correct solution."
@@ -76,6 +74,7 @@ def prompt_text_sql(combined_content: str):
76
74
  f"{combined_content}\n"
77
75
  "=== EVALUATION COMPLETE ===\n\n"
78
76
  "Notes:\n"
77
+ "If question about other technology for example python then it is clearly off topic and should get 20 mark"
79
78
  "- Be honest but supportive\n"
80
79
  "- Include specific examples from the provided answers if helpful\n"
81
80
  "- Keep language beginner-friendly\n"
@@ -83,62 +82,43 @@ def prompt_text_sql(combined_content: str):
83
82
  "effort or partial relevance, ensuring the score does not exceed 100."
84
83
  )
85
84
 
86
- def prompt_text_ssis(combined_content):
85
+ def prompt_text_ssis(combined_content: str) -> str:
87
86
  return (
88
- "You are a data engineer reviewing an SSIS package (.dtsx) summary. "
89
- "Evaluate how well the package addresses the question, focusing on the correctness of tasks, "
90
- "data flow, control flow, and configurations.\n\n"
91
- "Your evaluation should:\n"
92
- "- Assess how well the package addresses the question overall\n"
93
- "- Focus on clarity, accuracy, and a basic understanding of key SSIS components "
94
- "(e.g., Control Flow, Data Flow, Connection Managers)\n"
95
- "- Be supportive and constructive students are new to SSIS, so encourage learning and reward effort\n"
96
- "- Highlight what was done well and gently suggest what could be improved\n"
97
- "- Point out only major issues when necessary (e.g., missing essential components, "
98
- "incorrect configurations, or clear misunderstandings)\n"
99
- "- Keep feedback clear, concise, and insightful\n"
100
- "- Also assess whether the student’s submission demonstrates a proper understanding of "
101
- "SSIS concepts being tested (e.g., ETL processes, control flow sequencing, error handling), not just technical correctness\n"
102
- "- Check for proper use of control flow tasks, data flow transformations, precedence constraints, "
103
- "error handling (e.g., OnError events), and connection manager configurations\n"
104
- "- If the student's submission is incomplete or too simplistic to fully address the question, "
105
- "clearly state that it lacks sufficient detail or misses key components, "
106
- "but do not provide the missing parts or solutions. Instead, suggest they revisit the relevant "
107
- "SSIS concepts (e.g., control flow, data flow, error handling) and encourage deeper exploration\n"
108
- "- If the student's submission is off-topic or unrelated to the question, "
109
- "clearly state that the response does not address the question's requirements and "
110
- "explain why it is irrelevant. Encourage the student to review the question carefully and "
111
- "focus on the relevant SSIS concepts without providing the correct solution\n"
112
- "- Understand that simple packages may only use one Data Flow Task, and that’s perfectly fine\n"
113
- "- If scheduling (e.g., daily at 7 AM) is not included, just note it briefly — "
114
- "it may be handled by SQL Server Agent and should not impact the score significantly (no more than 5–10 points)\n\n"
115
- "When provided, check that:\n"
116
- "- Data flow connections are properly linked\n"
117
- "- Data types match the destination schema\n\n"
118
- "Important Scoring Note:\n"
119
- "Always give credit for effort, even if there are technical gaps. It’s better to nudge students forward "
120
- "than to discourage them. Start from a generous baseline and avoid very low scores unless the submission "
121
- "shows no attempt. Remember the student is not a pro programmer, so avoid low scores just because best "
122
- "practices weren’t followed exactly. Score mainly based on what was asked. "
123
- "Provide feedback in this format:\n\n"
124
- "=== COMPREHENSIVE EVALUATION ===\n\n"
125
- "OVERALL SCORE: <score>/100\n\n"
126
- "FEEDBACK SUMMARY:\n"
127
- "- What was done well\n"
128
- "- What needs improvement\n"
129
- "- Any major issues (e.g., logic errors, misunderstandings, incomplete solutions)\n\n"
130
- "KEY ADVICE:\n"
131
- "- Top 2-3 suggestions to improve SSIS skills\n"
132
- "- Concepts to revisit\n"
133
- "- Encouragement to keep learning and improving\n\n"
134
- f"{combined_content}\n"
135
- "=== EVALUATION COMPLETE ===\n\n"
136
- "Notes:\n"
137
- "- Be honest but supportive\n"
138
- "- Include specific examples from the provided summary if helpful\n"
139
- "- Keep language beginner-friendly\n"
140
- "- Do not give too low marks. From 5 up to 10 additional marks for effort or partial relevance, ensuring the score does not exceed 100."
141
- )
87
+
88
+ "You are an SSIS data engineer evaluating a beginner-level SSIS package submission (1–2 months experience).\n\n"
89
+ "Evaluation Criteria:\n"
90
+ "- Assess correct and relevant use of SSIS components: Connection Managers, Control Flow tasks (e.g., Execute SQL Task), Data Flow tasks (e.g., Flat File Source to OLE DB Destination).\n"
91
+ "- Check if the submission attempts to solve the task using SSIS packages (.dtsx) and related concepts.\n"
92
+ "- Confirm proper linking of components and appropriate use of data types.\n"
93
+ "- Consider clarity, effort, and completeness.\n"
94
+ "- If scheduling (e.g., SQL Server Agent Job) is missing, note it but deduct no more than 5 points.\n\n"
95
+ "**STRICT RULE ON OFF-TOPIC SUBMISSIONS:**\n"
96
+ "- If the submission is off-topic (e.g., Python scripts, SQL queries, Power BI reports, or anything NOT an SSIS package or SSIS-related), assign exactly 20/100 points.\n"
97
+ "- Do NOT give any additional points or feedback related to SSIS components.\n"
98
+ "- Clearly state in feedback that the submission does not address the SSIS package requirement and advise focusing on SSIS for this task.\n\n"
99
+ "Scoring Guidelines:\n"
100
+ "- Begin with a baseline of 60/100 for any reasonable SSIS attempt.\n"
101
+ "- Add 5–10 points for extra effort or partial correctness.\n"
102
+ "- Never exceed 100 points.\n"
103
+ "- Always reward genuine effort unless off-topic.\n\n"
104
+ "Feedback Format:\n"
105
+ "=== COMPREHENSIVE EVALUATION ===\n"
106
+ "OVERALL SCORE: <score>/100\n\n"
107
+ "FEEDBACK SUMMARY:\n"
108
+ "- What was done well\n"
109
+ "- What needs improvement\n"
110
+ "- Major issues (including off-topic comments if applicable)\n\n"
111
+ "KEY ADVICE:\n"
112
+ "- 1–2 improvement tips\n"
113
+ "- Core SSIS concepts to review\n"
114
+ "- Encouragement to keep practicing\n\n"
115
+ f"{combined_content}\n"
116
+ "=== EVALUATION COMPLETE ===\n\n"
117
+ "Notes:\n"
118
+ "- Be kind, clear, and beginner-friendly.\n"
119
+ "- If off-topic, strictly enforce 20/100 score with no exceptions.\n"
120
+ "- Remind student clearly to read the question carefully and focus on SSIS.\n"
121
+ )
142
122
 
143
123
  def prompt_text_powerbi(combined_content: str):
144
124
  return (
@@ -156,7 +136,7 @@ def prompt_text_powerbi(combined_content: str):
156
136
  "- Highlight strengths and areas to improve\n"
157
137
  "- Mention if the submission is incomplete or off-topic, but don't provide missing solutions\n"
158
138
  "- Do not penalize for efficiency, missing advanced features, or redundant tables\n"
159
- "- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low, with a small boost for effort if applicable\n\n"
139
+ "- Base score on relevance, correctness, and effort. Incomplete/off-topic work should be scored low and and should not be given any feedbacks related, with a small boost for effort if applicable\n\n"
160
140
  f"{combined_content}\n"
161
141
  "=== EVALUATION COMPLETE ==="
162
142
  )