QuantumChecker 0.3.4__tar.gz → 0.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/PKG-INFO +1 -1
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/powerbi_evaluator.py +76 -50
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/ssis_evaluator.py +0 -9
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumChecker.egg-info/PKG-INFO +1 -1
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumChecker.egg-info/SOURCES.txt +2 -1
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/setup.py +1 -1
- quantumchecker-0.3.6/tests/test.py +45 -0
- quantumchecker-0.3.6/tests/test2.py +25 -0
- quantumchecker-0.3.4/tests/test.py +0 -82
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/__init__.py +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/main.py +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/prompts.py +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/python_evaluator.py +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumCheck/sql_evaluator.py +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumChecker.egg-info/dependency_links.txt +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumChecker.egg-info/requires.txt +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/QuantumChecker.egg-info/top_level.txt +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/README.md +0 -0
- {quantumchecker-0.3.4 → quantumchecker-0.3.6}/setup.cfg +0 -0
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import os
|
|
@@ -6,33 +8,33 @@ import shutil
|
|
|
6
8
|
import zipfile
|
|
7
9
|
from pathlib import Path
|
|
8
10
|
from typing import Dict, List
|
|
9
|
-
|
|
10
|
-
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
11
|
+
|
|
11
12
|
import requests
|
|
12
13
|
from dotenv import load_dotenv
|
|
14
|
+
from pdf2image import convert_from_path
|
|
13
15
|
from PIL import Image
|
|
14
|
-
import
|
|
15
|
-
|
|
16
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
17
|
+
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def prompt_text_powerbi(combined_content: str) -> str:
|
|
19
21
|
return f"""
|
|
20
22
|
You are an expert Power BI instructor evaluating beginner-level DAX question-answer pairs.
|
|
21
|
-
|
|
23
|
+
|
|
22
24
|
Each answer contains a data model (in JSON format) extracted from a submitted .pbit file. Evaluate the technical correctness, relevance, and clarity of the DAX elements provided.
|
|
23
|
-
|
|
25
|
+
|
|
24
26
|
Use the following criteria to give a fair and supportive evaluation:
|
|
25
27
|
- Measures (40 points): Are calculated measures meaningful, syntactically valid, and aligned with the question?
|
|
26
28
|
- Relationships (20 points): Are key relationships between tables defined logically?
|
|
27
29
|
- Tables & Columns (20 points): Are relevant tables/columns present? Are naming conventions clear?
|
|
28
30
|
- Expressions (10 points): Are query partitions or expressions present and understandable?
|
|
29
31
|
- Overall structure (10 points): Does the model appear coherent and purposeful?
|
|
30
|
-
|
|
32
|
+
|
|
31
33
|
**Scoring Tolerance**:
|
|
32
34
|
- Be kind to beginners. Do not give extremely low scores unless the model is completely missing or incorrect.
|
|
33
35
|
- If measures exist and make some sense, award partial credit (e.g., 20–30 out of 40).
|
|
34
36
|
- A score below 30/100 should only be given if there’s little to no relevant content.
|
|
35
|
-
|
|
37
|
+
|
|
36
38
|
Structure your response exactly like this:
|
|
37
39
|
OVERALL SCORE: [SCORE]/100
|
|
38
40
|
[Brief feedback here — 3–5 sentences focused on strengths + areas to improve.]
|
|
@@ -51,6 +53,9 @@ logging.basicConfig(
|
|
|
51
53
|
)
|
|
52
54
|
|
|
53
55
|
|
|
56
|
+
# ==============================
|
|
57
|
+
# Gemini Flash Model
|
|
58
|
+
# ==============================
|
|
54
59
|
class GeminiFlashModel:
|
|
55
60
|
def __init__(self, api_key: str, model_name: str = "gemini-1.5-flash"):
|
|
56
61
|
api_key = os.getenv("GEMINI_API_KEY") or api_key
|
|
@@ -164,6 +169,7 @@ class GeminiFlashModel:
|
|
|
164
169
|
return result
|
|
165
170
|
|
|
166
171
|
|
|
172
|
+
|
|
167
173
|
class PowerBIProcessor:
|
|
168
174
|
def extract_datamodel(self, pbit_file_path: str) -> Dict:
|
|
169
175
|
if not os.path.exists(pbit_file_path):
|
|
@@ -174,7 +180,7 @@ class PowerBIProcessor:
|
|
|
174
180
|
export_path = os.path.join(folder_path, "export")
|
|
175
181
|
self._cleanup(zip_file, export_path)
|
|
176
182
|
try:
|
|
177
|
-
|
|
183
|
+
shutil.copy(pbit_file_path, zip_file)
|
|
178
184
|
if not zipfile.is_zipfile(zip_file):
|
|
179
185
|
raise ProcessingError(f"File is not a valid ZIP: {zip_file}")
|
|
180
186
|
with zipfile.ZipFile(zip_file, "r") as zip_ref:
|
|
@@ -215,7 +221,6 @@ class PowerBIProcessor:
|
|
|
215
221
|
image_path = os.path.join(output_dir, f"page_{i + 1}.png")
|
|
216
222
|
page.save(image_path, "PNG")
|
|
217
223
|
image_paths.append(image_path)
|
|
218
|
-
os.remove(pdf_path)
|
|
219
224
|
return image_paths
|
|
220
225
|
except Exception as e:
|
|
221
226
|
raise ProcessingError(f"Failed to process PDF: {e}")
|
|
@@ -249,7 +254,9 @@ class PowerBIProcessor:
|
|
|
249
254
|
measures.append({
|
|
250
255
|
"Table": table["name"],
|
|
251
256
|
"Name": measure["name"],
|
|
252
|
-
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
257
|
+
"Expression": " ".join(measure.get("expression", "")) if isinstance(measure.get("expression"),
|
|
258
|
+
list) else measure.get(
|
|
259
|
+
"expression", ""),
|
|
253
260
|
"FormatString": measure.get("formatString", "")
|
|
254
261
|
})
|
|
255
262
|
return measures
|
|
@@ -267,7 +274,8 @@ class PowerBIProcessor:
|
|
|
267
274
|
}
|
|
268
275
|
for col in table.get("columns", [])
|
|
269
276
|
]
|
|
270
|
-
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if
|
|
277
|
+
expressions = [part["source"]["expression"] for part in table.get("partitions", []) if
|
|
278
|
+
part["source"].get("expression")]
|
|
271
279
|
table_info.append({"Table Name": table["name"], "Columns": columns, "Expressions": expressions})
|
|
272
280
|
return table_info
|
|
273
281
|
|
|
@@ -294,6 +302,7 @@ class PowerBIProcessor:
|
|
|
294
302
|
shutil.rmtree(path, ignore_errors=True)
|
|
295
303
|
|
|
296
304
|
|
|
305
|
+
|
|
297
306
|
class PowerBIEvaluator:
|
|
298
307
|
def __init__(self, api_key: str):
|
|
299
308
|
self.api_key = api_key
|
|
@@ -301,67 +310,81 @@ class PowerBIEvaluator:
|
|
|
301
310
|
self.processor = PowerBIProcessor()
|
|
302
311
|
|
|
303
312
|
def evaluate(self, questions: List[str], answer_path: str, temp_dir: str = "temp_extract") -> Dict[str, any]:
|
|
313
|
+
extract_path = temp_dir
|
|
314
|
+
outputimages = os.path.join(temp_dir, "outputimages")
|
|
304
315
|
try:
|
|
305
316
|
_, ext = os.path.splitext(answer_path)
|
|
306
317
|
ext = ext.lower()
|
|
307
318
|
extract_path = temp_dir
|
|
308
319
|
pbit_path = None
|
|
309
320
|
pdf_path = None
|
|
321
|
+
|
|
310
322
|
if ext == ".zip":
|
|
311
323
|
pbit_path, pdf_path = self.processor.extract_zip(answer_path, extract_path)
|
|
312
324
|
elif ext == ".pbit":
|
|
313
325
|
pbit_path = answer_path
|
|
314
|
-
|
|
326
|
+
elif ext == ".pdf":
|
|
327
|
+
pdf_path = answer_path
|
|
315
328
|
else:
|
|
316
|
-
logger.error("Invalid file type for Power BI: %s", answer_path)
|
|
317
329
|
return {
|
|
318
330
|
"score": 0,
|
|
319
|
-
"feedback": f"Invalid file type: {ext}. Expected .pbit or .zip",
|
|
331
|
+
"feedback": f"Invalid file type: {ext}. Expected .pbit, .pdf, or .zip",
|
|
320
332
|
"issues": ["Invalid file type"],
|
|
321
333
|
"recommendations": [],
|
|
322
334
|
"dax_score": 0,
|
|
323
335
|
"visual_score": 0
|
|
324
336
|
}
|
|
325
|
-
|
|
337
|
+
|
|
338
|
+
dax_result = None
|
|
339
|
+
visual_result = None
|
|
340
|
+
|
|
341
|
+
if pbit_path:
|
|
326
342
|
data_model = self.processor.extract_datamodel(pbit_path)
|
|
327
343
|
model_data = self.processor.extract_model_data(data_model)
|
|
328
344
|
answers = [json.dumps(model_data)] * len(questions)
|
|
329
345
|
dax_result = self.model.evaluate([{"question": q, "answer": a} for q, a in zip(questions, answers)])
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
346
|
+
|
|
347
|
+
if pdf_path:
|
|
348
|
+
image_paths = self.processor.process_pdf(pdf_path, output_dir=os.path.join(temp_dir, "outputimages"))
|
|
349
|
+
visual_result = self.model.evaluate_visuals(questions[0], os.path.join(temp_dir, "outputimages"))
|
|
350
|
+
|
|
351
|
+
result = {
|
|
352
|
+
"score": 0,
|
|
353
|
+
"feedback": "",
|
|
354
|
+
"issues": [],
|
|
355
|
+
"recommendations": [],
|
|
356
|
+
"dax_score": 0,
|
|
357
|
+
"visual_score": 0
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if dax_result:
|
|
361
|
+
result["dax_score"] = dax_result["score"]
|
|
362
|
+
result["feedback"] += f"DAX Feedback:\n{dax_result['feedback']}"
|
|
363
|
+
result["issues"].extend(dax_result["issues"])
|
|
364
|
+
result["recommendations"].extend(dax_result["recommendations"])
|
|
365
|
+
|
|
366
|
+
if visual_result:
|
|
367
|
+
result["visual_score"] = visual_result["score"]
|
|
368
|
+
result["feedback"] += f"\n\nVisual Feedback:\n{visual_result['feedback']}"
|
|
369
|
+
result["issues"].extend([f"Visual: {i}" for i in visual_result.get("issues", [])])
|
|
370
|
+
result["recommendations"].extend(visual_result.get("recommendations", []))
|
|
371
|
+
|
|
372
|
+
if dax_result and visual_result:
|
|
373
|
+
result["score"] = int(0.7 * dax_result["score"] + 0.3 * visual_result["score"])
|
|
374
|
+
elif dax_result:
|
|
375
|
+
result["score"] = dax_result["score"]
|
|
376
|
+
result["feedback"] += "\n\nVisual Feedback:\nNo visuals provided for evaluation."
|
|
377
|
+
result["issues"].append("No PDF provided for visual evaluation")
|
|
378
|
+
result["recommendations"].append("Include a PDF with report visuals for complete evaluation")
|
|
379
|
+
elif visual_result:
|
|
380
|
+
result["score"] = visual_result["score"]
|
|
381
|
+
result["feedback"] = "No DAX provided for evaluation.\n\n" + result["feedback"]
|
|
382
|
+
result["issues"].append("No PBIT provided for DAX evaluation")
|
|
383
|
+
result["recommendations"].append("Include a PBIT file with data model for complete evaluation")
|
|
384
|
+
|
|
385
|
+
return result
|
|
386
|
+
|
|
363
387
|
except Exception as e:
|
|
364
|
-
logger.exception("Failed to evaluate Power BI file %s: %s", answer_path, str(e))
|
|
365
388
|
self.processor._cleanup(extract_path, os.path.join(temp_dir, "outputimages"))
|
|
366
389
|
return {
|
|
367
390
|
"score": 0,
|
|
@@ -371,6 +394,9 @@ class PowerBIEvaluator:
|
|
|
371
394
|
"dax_score": 0,
|
|
372
395
|
"visual_score": 0
|
|
373
396
|
}
|
|
397
|
+
finally:
|
|
398
|
+
self.processor._cleanup(extract_path, outputimages)
|
|
399
|
+
|
|
374
400
|
|
|
375
401
|
|
|
376
402
|
class ProcessingError(Exception):
|
|
@@ -7,7 +7,6 @@ from typing import List, Dict
|
|
|
7
7
|
import requests
|
|
8
8
|
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
9
9
|
from pprint import pprint
|
|
10
|
-
import json
|
|
11
10
|
import re
|
|
12
11
|
|
|
13
12
|
from .prompts import prompt_text_ssis
|
|
@@ -207,11 +206,6 @@ class SSISAnswerParser:
|
|
|
207
206
|
logger.warning("No valid answers found in single SSIS file")
|
|
208
207
|
answers = [combined_summary]
|
|
209
208
|
|
|
210
|
-
|
|
211
|
-
with open("parsed_ssis_summary.json", "w", encoding="utf-8") as f:
|
|
212
|
-
json.dump(structured_data, f, indent=2)
|
|
213
|
-
logger.info("Saved structured SSIS summary to 'parsed_ssis_summary.json'")
|
|
214
|
-
|
|
215
209
|
return {
|
|
216
210
|
"text_answers": answers,
|
|
217
211
|
"structured_data": structured_data
|
|
@@ -219,9 +213,6 @@ class SSISAnswerParser:
|
|
|
219
213
|
|
|
220
214
|
except ET.ParseError as e:
|
|
221
215
|
logger.error("Invalid SSIS package file: %s", str(e))
|
|
222
|
-
with open("debug_dtsx_content.txt", "w", encoding="utf-8") as f:
|
|
223
|
-
f.write(content)
|
|
224
|
-
logger.info("Saved raw .dtsx content to 'debug_dtsx_content.txt' for debugging")
|
|
225
216
|
return {"text_answers": ["Invalid SSIS package file"], "structured_data": {"issues": [str(e)]}}
|
|
226
217
|
except Exception as e:
|
|
227
218
|
logger.error("Unexpected error parsing .dtsx file: %s", str(e))
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="QuantumChecker",
|
|
5
|
-
version="0.3.
|
|
5
|
+
version="0.3.6",
|
|
6
6
|
author="Qobiljon",
|
|
7
7
|
author_email="qobiljonkhayrullayev@gmail.com",
|
|
8
8
|
description="A package to evaluate homework submissions in Python, SQL, PowerBI, and SSIS.",
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import psutil
|
|
4
|
+
from pprint import pprint
|
|
5
|
+
from QuantumCheck import HomeworkEvaluator
|
|
6
|
+
|
|
7
|
+
API_KEY = "AIzaSyDw76DEINpfBVgwIEZLShhy97tvWg7BmzY"
|
|
8
|
+
|
|
9
|
+
question_sets = {
|
|
10
|
+
"python": "Write a Python function to calculate factorial.\nWrite a Python script to reverse a string.",
|
|
11
|
+
"powerbi": "Create a Power BI report with a bar chart.\nExplain DAX measures for sales analysis.",
|
|
12
|
+
"sql": "Write a SQL query to join two tables.\nWrite a SQL query for aggregate functions.",
|
|
13
|
+
"ssis": "Design an SSIS package for data import.\nExplain SSIS control flow tasks."
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
answer_paths = {
|
|
17
|
+
"python": ["../tests/answer/python1.zip"],
|
|
18
|
+
"powerbi": ["../tests/answer/homework2_last.pdf"],
|
|
19
|
+
"sql": ["../tests/answer/sql3.zip"],
|
|
20
|
+
"ssis": ["../tests/answer/answer.dtsx"]
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async def main():
|
|
24
|
+
evaluator = HomeworkEvaluator()
|
|
25
|
+
process = psutil.Process(os.getpid())
|
|
26
|
+
|
|
27
|
+
for qtype, question in question_sets.items():
|
|
28
|
+
for ans in answer_paths[qtype]:
|
|
29
|
+
mem_before = process.memory_info().rss
|
|
30
|
+
evaluation = await evaluator.evaluate_from_content(
|
|
31
|
+
question_content=question,
|
|
32
|
+
answer_path=ans,
|
|
33
|
+
api_key=API_KEY,
|
|
34
|
+
question_type=qtype
|
|
35
|
+
)
|
|
36
|
+
mem_after = process.memory_info().rss
|
|
37
|
+
delta_mb = (mem_after - mem_before) / 1024**2
|
|
38
|
+
|
|
39
|
+
print(f"{qtype} | {ans}")
|
|
40
|
+
print(f"📈 Memory used for evaluation: {delta_mb:.2f} MB")
|
|
41
|
+
print(f"✅ Evaluation result: {pprint(evaluation)}")
|
|
42
|
+
print("-" * 40)
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pprint import pprint
|
|
3
|
+
from QuantumCheck import HomeworkEvaluator
|
|
4
|
+
|
|
5
|
+
API_KEY = "AIzaSyDw76DEINpfBVgwIEZLShhy97tvWg7BmzY"
|
|
6
|
+
|
|
7
|
+
question = "Create a Power BI report with a bar chart.\nExplain DAX measures for sales analysis."
|
|
8
|
+
answer_path = "../tests/answer/test.pdf"
|
|
9
|
+
|
|
10
|
+
async def main():
|
|
11
|
+
evaluator = HomeworkEvaluator()
|
|
12
|
+
evaluation = await evaluator.evaluate_from_content(
|
|
13
|
+
question_content=question,
|
|
14
|
+
answer_path=answer_path,
|
|
15
|
+
api_key=API_KEY,
|
|
16
|
+
question_type="powerbi"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
print(f"PowerBI | {answer_path}")
|
|
20
|
+
print("✅ Evaluation result:")
|
|
21
|
+
pprint(evaluation)
|
|
22
|
+
print("-" * 40)
|
|
23
|
+
|
|
24
|
+
if __name__ == "__main__":
|
|
25
|
+
asyncio.run(main())
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from QuantumCheck import HomeworkEvaluator
|
|
3
|
-
|
|
4
|
-
question_sets = {
|
|
5
|
-
"python_beginner": "Write a Python function to calculate factorial.\nWrite a Python script to reverse a string.",
|
|
6
|
-
"power_bi": "Create a Power BI report with a bar chart.\nExplain DAX measures for sales analysis.",
|
|
7
|
-
"sql": "Write a SQL query to join two tables.\nWrite a SQL query for aggregate functions.",
|
|
8
|
-
"ssis": "Design an SSIS package for data import.\nExplain SSIS control flow tasks."
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
answer_paths = {
|
|
12
|
-
"python": ["../tests/answer/python1.zip"],
|
|
13
|
-
"powerbi": ["../tests/answer/real.zip"],
|
|
14
|
-
"sql": ["../tests/answer/sql3.zip"],
|
|
15
|
-
"ssis": ["../tests/answer/answer.dtsx"]
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
question_type_mapping = {
|
|
19
|
-
"python_beginner": "python",
|
|
20
|
-
"power_bi": "powerbi",
|
|
21
|
-
"sql": "sql",
|
|
22
|
-
"ssis": "ssis"
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def format_score(score):
|
|
27
|
-
if score >= 90:
|
|
28
|
-
return f"🟢 Excellent ({score}⭐)"
|
|
29
|
-
elif score >= 75:
|
|
30
|
-
return f"🟡 Good ({score})"
|
|
31
|
-
elif score >= 50:
|
|
32
|
-
return f"🟠 Pass ({score})"
|
|
33
|
-
else:
|
|
34
|
-
return f"🔴 Fail ({score})"
|
|
35
|
-
API_KEY = "<KEY>"
|
|
36
|
-
async def run_evaluation(evaluator, q_key, q_content, question_type, answer_path, index):
|
|
37
|
-
try:
|
|
38
|
-
evaluation = await evaluator.evaluate_from_content(
|
|
39
|
-
question_content=q_content,
|
|
40
|
-
answer_path=answer_path,
|
|
41
|
-
api_key=API_KEY,
|
|
42
|
-
question_type=question_type
|
|
43
|
-
)
|
|
44
|
-
score = evaluation.get("score", 0)
|
|
45
|
-
return (q_key, index, "success", score)
|
|
46
|
-
except Exception as e:
|
|
47
|
-
return (q_key, index, "error", str(e))
|
|
48
|
-
|
|
49
|
-
async def main():
|
|
50
|
-
evaluator = HomeworkEvaluator()
|
|
51
|
-
tasks = []
|
|
52
|
-
|
|
53
|
-
for q_key, q_content in question_sets.items():
|
|
54
|
-
question_type = question_type_mapping[q_key]
|
|
55
|
-
paths = answer_paths.get(question_type, [])
|
|
56
|
-
if not paths:
|
|
57
|
-
print(f"⚠️ No answer paths found for question type '{question_type}'")
|
|
58
|
-
continue
|
|
59
|
-
for i in range(10): # run each set 10 times
|
|
60
|
-
for path in paths:
|
|
61
|
-
task = run_evaluation(evaluator, q_key, q_content, question_type, path, i + 1)
|
|
62
|
-
tasks.append(task)
|
|
63
|
-
|
|
64
|
-
results = await asyncio.gather(*tasks)
|
|
65
|
-
|
|
66
|
-
# Group results by question key
|
|
67
|
-
grouped = {}
|
|
68
|
-
for q_key, index, status, output in results:
|
|
69
|
-
if q_key not in grouped:
|
|
70
|
-
grouped[q_key] = []
|
|
71
|
-
grouped[q_key].append((index, status, output))
|
|
72
|
-
|
|
73
|
-
# Sort and print all at once, grouped by question
|
|
74
|
-
for q_key in grouped:
|
|
75
|
-
print(f"\n📘 {q_key.upper()} Results")
|
|
76
|
-
for index, status, output in sorted(grouped[q_key], key=lambda x: x[0]):
|
|
77
|
-
if status == "success":
|
|
78
|
-
print(f" ⏱️ Run {index:02}: {format_score(output)}")
|
|
79
|
-
else:
|
|
80
|
-
print(f" ⏱️ Run {index:02}: ❌ Error - {output}")
|
|
81
|
-
|
|
82
|
-
asyncio.run(main())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|