quizforge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quizforge/__init__.py +104 -0
- quizforge/bank.py +89 -0
- quizforge/certificate.py +84 -0
- quizforge/cli.py +134 -0
- quizforge/generate.py +193 -0
- quizforge/grade.py +119 -0
- quizforge/integrity.py +123 -0
- quizforge/llm.py +42 -0
- quizforge/sample.py +73 -0
- quizforge/schemas.py +85 -0
- quizforge/text.py +11 -0
- quizforge-0.2.0.dist-info/METADATA +104 -0
- quizforge-0.2.0.dist-info/RECORD +16 -0
- quizforge-0.2.0.dist-info/WHEEL +4 -0
- quizforge-0.2.0.dist-info/entry_points.txt +2 -0
- quizforge-0.2.0.dist-info/licenses/LICENSE +21 -0
quizforge/__init__.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""quizforge — generate a deep, mixed-format question bank from source material
|
|
2
|
+
and grade it. Deterministic where it can, LLM where it must. Bring your own
|
|
3
|
+
chat model.
|
|
4
|
+
|
|
5
|
+
Public API
|
|
6
|
+
----------
|
|
7
|
+
Generate:
|
|
8
|
+
generate_bank(material, llm, *, targets=..., existing=...) -> list[dict]
|
|
9
|
+
Sample:
|
|
10
|
+
sample_test(questions, blueprint=..., seen_ids=...) -> list[dict]
|
|
11
|
+
pick_spread(pool, want, seen_ids) -> list[dict]
|
|
12
|
+
DEFAULT_BLUEPRINT
|
|
13
|
+
Grade:
|
|
14
|
+
grade_fill_blank(question, user_answer) -> dict
|
|
15
|
+
grade_match(question, selections) -> dict
|
|
16
|
+
grade_open_answer(question, user_answer, llm) -> QuizGrade | None
|
|
17
|
+
QuizGrade
|
|
18
|
+
Integrity:
|
|
19
|
+
assess_speed(*, elapsed_seconds, question_types, passed) -> IntegrityFlag
|
|
20
|
+
expected_min_seconds(question_types) -> float
|
|
21
|
+
within_time_limit(elapsed_seconds, limit_seconds=...) -> bool
|
|
22
|
+
IntegrityFlag
|
|
23
|
+
Certificate:
|
|
24
|
+
make_certificate(*, learner_id, ..., score_pct, awarded_on) -> Certificate
|
|
25
|
+
verification_code(...) -> str | verify(certificate) -> bool
|
|
26
|
+
Certificate
|
|
27
|
+
Utilities:
|
|
28
|
+
normalize(text) -> str
|
|
29
|
+
structured_output(llm, schema) -> chain
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from .bank import Bank
|
|
33
|
+
from .certificate import (
|
|
34
|
+
Certificate,
|
|
35
|
+
is_eligible,
|
|
36
|
+
level_for,
|
|
37
|
+
make_certificate,
|
|
38
|
+
verification_code,
|
|
39
|
+
verify,
|
|
40
|
+
)
|
|
41
|
+
from .generate import (
|
|
42
|
+
DEFAULT_COVERAGE,
|
|
43
|
+
DEFAULT_DIFF_SPLIT,
|
|
44
|
+
DEFAULT_SYSTEM,
|
|
45
|
+
DEFAULT_TARGETS,
|
|
46
|
+
generate_bank,
|
|
47
|
+
)
|
|
48
|
+
from .grade import (
|
|
49
|
+
DEFAULT_GRADE_SYSTEM,
|
|
50
|
+
QuizGrade,
|
|
51
|
+
grade_fill_blank,
|
|
52
|
+
grade_match,
|
|
53
|
+
grade_open_answer,
|
|
54
|
+
)
|
|
55
|
+
from .integrity import (
|
|
56
|
+
DEFAULT_MIN_SECONDS_PER_TYPE,
|
|
57
|
+
DEFAULT_TIME_LIMIT_SECONDS,
|
|
58
|
+
IntegrityFlag,
|
|
59
|
+
assess_speed,
|
|
60
|
+
expected_min_seconds,
|
|
61
|
+
within_time_limit,
|
|
62
|
+
)
|
|
63
|
+
from .llm import structured_output
|
|
64
|
+
from .sample import DEFAULT_BLUEPRINT, DIFFICULTY_ORDER, pick_spread, sample_test
|
|
65
|
+
from .schemas import DIFFICULTIES, FORMAT_KEYS, FORMATS
|
|
66
|
+
from .text import normalize
|
|
67
|
+
|
|
68
|
+
__version__ = "0.2.0"
|
|
69
|
+
|
|
70
|
+
__all__ = [
|
|
71
|
+
"Bank",
|
|
72
|
+
"generate_bank",
|
|
73
|
+
"sample_test",
|
|
74
|
+
"pick_spread",
|
|
75
|
+
"grade_fill_blank",
|
|
76
|
+
"grade_match",
|
|
77
|
+
"grade_open_answer",
|
|
78
|
+
"QuizGrade",
|
|
79
|
+
"assess_speed",
|
|
80
|
+
"expected_min_seconds",
|
|
81
|
+
"within_time_limit",
|
|
82
|
+
"IntegrityFlag",
|
|
83
|
+
"DEFAULT_MIN_SECONDS_PER_TYPE",
|
|
84
|
+
"DEFAULT_TIME_LIMIT_SECONDS",
|
|
85
|
+
"Certificate",
|
|
86
|
+
"make_certificate",
|
|
87
|
+
"verification_code",
|
|
88
|
+
"verify",
|
|
89
|
+
"is_eligible",
|
|
90
|
+
"level_for",
|
|
91
|
+
"normalize",
|
|
92
|
+
"structured_output",
|
|
93
|
+
"DEFAULT_BLUEPRINT",
|
|
94
|
+
"DEFAULT_TARGETS",
|
|
95
|
+
"DEFAULT_DIFF_SPLIT",
|
|
96
|
+
"DEFAULT_SYSTEM",
|
|
97
|
+
"DEFAULT_COVERAGE",
|
|
98
|
+
"DEFAULT_GRADE_SYSTEM",
|
|
99
|
+
"DIFFICULTY_ORDER",
|
|
100
|
+
"DIFFICULTIES",
|
|
101
|
+
"FORMAT_KEYS",
|
|
102
|
+
"FORMATS",
|
|
103
|
+
"__version__",
|
|
104
|
+
]
|
quizforge/bank.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Bank — a YAML-backed question bank with grow/sample/save convenience.
|
|
2
|
+
|
|
3
|
+
A bank file is YAML with (at least) a ``questions:`` list; any other top-level
|
|
4
|
+
keys are metadata, carried through untouched on save. An optional ``material:``
|
|
5
|
+
key holds the source text used to ground generation (or pass it explicitly).
|
|
6
|
+
|
|
7
|
+
Comments are NOT preserved on save (clean round-trip via PyYAML). If you keep a
|
|
8
|
+
heavily-commented bank file, generate into memory and write where you control
|
|
9
|
+
the formatting instead of calling ``save``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from collections import Counter
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import List, Optional, Union
|
|
15
|
+
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
from .generate import generate_bank
|
|
19
|
+
from .sample import sample_test
|
|
20
|
+
|
|
21
|
+
PathLike = Union[str, Path]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Bank:
|
|
25
|
+
"""Load, grow, sample, and save a question bank."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, questions: Optional[List[dict]] = None, meta: Optional[dict] = None):
|
|
28
|
+
self.questions: List[dict] = list(questions or [])
|
|
29
|
+
self.meta: dict = dict(meta or {})
|
|
30
|
+
|
|
31
|
+
# ----- construction -----------------------------------------------------
|
|
32
|
+
@classmethod
|
|
33
|
+
def load(cls, path: PathLike) -> "Bank":
|
|
34
|
+
data = yaml.safe_load(Path(path).read_text()) or {}
|
|
35
|
+
if not isinstance(data, dict):
|
|
36
|
+
raise ValueError(f"{path}: expected a YAML mapping, got {type(data).__name__}")
|
|
37
|
+
questions = data.pop("questions", []) or []
|
|
38
|
+
return cls(questions=questions, meta=data)
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_dict(cls, data: dict) -> "Bank":
|
|
42
|
+
data = dict(data or {})
|
|
43
|
+
return cls(questions=data.pop("questions", []) or [], meta=data)
|
|
44
|
+
|
|
45
|
+
# ----- inspection -------------------------------------------------------
|
|
46
|
+
@property
|
|
47
|
+
def material(self) -> str:
|
|
48
|
+
return self.meta.get("material", "") or ""
|
|
49
|
+
|
|
50
|
+
def counts_by_type(self) -> dict:
|
|
51
|
+
return dict(Counter(q.get("type", "mc") for q in self.questions))
|
|
52
|
+
|
|
53
|
+
def counts_by_difficulty(self) -> dict:
|
|
54
|
+
return dict(Counter(q.get("difficulty", "medium") for q in self.questions))
|
|
55
|
+
|
|
56
|
+
def __len__(self) -> int:
|
|
57
|
+
return len(self.questions)
|
|
58
|
+
|
|
59
|
+
# ----- mutation ---------------------------------------------------------
|
|
60
|
+
def add(self, new_questions: List[dict]) -> "Bank":
|
|
61
|
+
self.questions.extend(new_questions)
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def grow(self, llm, *, targets: Optional[dict] = None,
|
|
65
|
+
material: Optional[str] = None, **kwargs) -> List[dict]:
|
|
66
|
+
"""Generate the shortfall to reach ``targets`` and append it in place.
|
|
67
|
+
|
|
68
|
+
Grounds on ``material`` if given, else the bank's ``material`` metadata.
|
|
69
|
+
Returns just the newly added questions.
|
|
70
|
+
"""
|
|
71
|
+
mat = material if material is not None else self.material
|
|
72
|
+
if not (mat or "").strip():
|
|
73
|
+
raise ValueError("no material to ground generation — pass material=... "
|
|
74
|
+
"or set a 'material:' key in the bank file")
|
|
75
|
+
new = generate_bank(mat, llm, targets=targets, existing=self.questions, **kwargs)
|
|
76
|
+
self.add(new)
|
|
77
|
+
return new
|
|
78
|
+
|
|
79
|
+
def sample(self, blueprint: Optional[dict] = None, seen_ids=(), rng=None) -> List[dict]:
|
|
80
|
+
return sample_test(self.questions, blueprint=blueprint, seen_ids=seen_ids, rng=rng)
|
|
81
|
+
|
|
82
|
+
# ----- persistence ------------------------------------------------------
|
|
83
|
+
def to_dict(self) -> dict:
|
|
84
|
+
return {**self.meta, "questions": self.questions}
|
|
85
|
+
|
|
86
|
+
def save(self, path: PathLike) -> None:
|
|
87
|
+
with open(path, "w") as f:
|
|
88
|
+
yaml.safe_dump(self.to_dict(), f, sort_keys=False, allow_unicode=True,
|
|
89
|
+
width=100, default_flow_style=False)
|
quizforge/certificate.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Completion certificates — the data + a tamper-evident verification code.
|
|
2
|
+
|
|
3
|
+
quizforge owns the *facts* of a certificate (who, which topic, what level, when)
|
|
4
|
+
and a deterministic verification code derived from them, so any consumer can
|
|
5
|
+
re-derive the code to confirm a certificate wasn't altered. Rendering — PDF,
|
|
6
|
+
HTML, image — is deliberately left to the consumer, where the branding lives, so
|
|
7
|
+
this module stays dependency-light (pydantic only) and clockless (the caller
|
|
8
|
+
supplies ``awarded_on``, keeping generation reproducible).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
|
|
16
|
+
PASS = "passed"
|
|
17
|
+
DISTINCTION = "distinction"
|
|
18
|
+
_SEP = "\x1f" # unit separator — unlikely to appear in any field
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Certificate(BaseModel):
|
|
22
|
+
"""An earned completion certificate's facts + its verification code."""
|
|
23
|
+
|
|
24
|
+
learner_id: str = Field(description="Stable learner identity (e.g. email).")
|
|
25
|
+
learner_name: str = Field(description="Display name to print on the certificate.")
|
|
26
|
+
topic_id: str = Field(description="Topic/lesson identifier.")
|
|
27
|
+
topic_title: str = Field(description="Human-readable topic title.")
|
|
28
|
+
score_pct: int = Field(description="Best score as a 0-100 percentage.")
|
|
29
|
+
level: str = Field(description="'passed' or 'distinction'.")
|
|
30
|
+
awarded_on: str = Field(description="Award date as the caller's display string.")
|
|
31
|
+
verification_code: str = Field(description="Deterministic, tamper-evident code.")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def verification_code(*, learner_id: str, topic_id: str, awarded_on: str, level: str,
|
|
35
|
+
score_pct: int, secret: str = "", prefix: str = "QF") -> str:
|
|
36
|
+
"""Derive a short, stable verification code from a certificate's fields.
|
|
37
|
+
|
|
38
|
+
Same fields (+ same ``secret``) always yield the same code; changing any
|
|
39
|
+
field changes the code, so a printed certificate can be checked against the
|
|
40
|
+
record. With a non-empty ``secret`` the code is unforgeable without it.
|
|
41
|
+
"""
|
|
42
|
+
raw = _SEP.join([learner_id.lower().strip(), topic_id, awarded_on, level,
|
|
43
|
+
str(score_pct), secret])
|
|
44
|
+
digest = hashlib.sha256(raw.encode("utf-8")).hexdigest().upper()
|
|
45
|
+
return f"{prefix}-{digest[:4]}-{digest[4:8]}"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_eligible(score_pct: int, pass_threshold: float = 0.6) -> bool:
|
|
49
|
+
"""Whether a score earns a certificate at all."""
|
|
50
|
+
return score_pct / 100.0 >= pass_threshold
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def level_for(score_pct: int, distinction_threshold: float = 0.8) -> str:
|
|
54
|
+
"""'distinction' at/above the threshold, else 'passed'."""
|
|
55
|
+
return DISTINCTION if score_pct / 100.0 >= distinction_threshold else PASS
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def make_certificate(*, learner_id: str, learner_name: str, topic_id: str, topic_title: str,
|
|
59
|
+
score_pct: int, awarded_on: str, pass_threshold: float = 0.6,
|
|
60
|
+
distinction_threshold: float = 0.8, secret: str = "",
|
|
61
|
+
prefix: str = "QF") -> Certificate:
|
|
62
|
+
"""Build a :class:`Certificate` for an eligible score.
|
|
63
|
+
|
|
64
|
+
Raises ``ValueError`` if ``score_pct`` is below ``pass_threshold`` — there is
|
|
65
|
+
no certificate for a non-pass.
|
|
66
|
+
"""
|
|
67
|
+
if not is_eligible(score_pct, pass_threshold):
|
|
68
|
+
raise ValueError(f"score {score_pct}% is below the pass threshold "
|
|
69
|
+
f"{round(pass_threshold * 100)}% — no certificate")
|
|
70
|
+
level = level_for(score_pct, distinction_threshold)
|
|
71
|
+
code = verification_code(learner_id=learner_id, topic_id=topic_id, awarded_on=awarded_on,
|
|
72
|
+
level=level, score_pct=score_pct, secret=secret, prefix=prefix)
|
|
73
|
+
return Certificate(learner_id=learner_id, learner_name=learner_name, topic_id=topic_id,
|
|
74
|
+
topic_title=topic_title, score_pct=score_pct, level=level,
|
|
75
|
+
awarded_on=awarded_on, verification_code=code)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def verify(certificate: Certificate, secret: str = "", prefix: str = "QF") -> bool:
|
|
79
|
+
"""Re-derive the code from the certificate's fields and confirm it matches."""
|
|
80
|
+
expected = verification_code(
|
|
81
|
+
learner_id=certificate.learner_id, topic_id=certificate.topic_id,
|
|
82
|
+
awarded_on=certificate.awarded_on, level=certificate.level,
|
|
83
|
+
score_pct=certificate.score_pct, secret=secret, prefix=prefix)
|
|
84
|
+
return expected == certificate.verification_code
|
quizforge/cli.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""quizforge command line.
|
|
2
|
+
|
|
3
|
+
quizforge grow BANK [--material-file F] [--mc N ...] [--dry-run] --llm mod:factory
|
|
4
|
+
quizforge sample BANK [--seed N] [--json]
|
|
5
|
+
quizforge stats BANK
|
|
6
|
+
|
|
7
|
+
`grow` needs a chat model. Because quizforge is provider-neutral, you point it at
|
|
8
|
+
one you construct:
|
|
9
|
+
|
|
10
|
+
# a dotted path to a zero-arg callable returning a LangChain-style model
|
|
11
|
+
quizforge grow bank.yaml --llm myproject.models:make_llm
|
|
12
|
+
|
|
13
|
+
# or, if langchain-openai is installed and OPENAI_API_KEY is set
|
|
14
|
+
quizforge grow bank.yaml --openai-model gpt-4.1
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import importlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import random
|
|
22
|
+
import sys
|
|
23
|
+
from collections import Counter
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
from .bank import Bank
|
|
27
|
+
from .generate import DEFAULT_TARGETS
|
|
28
|
+
from .schemas import FORMAT_KEYS
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _resolve_llm(args):
|
|
32
|
+
"""Build the chat model from --llm dotted path or --openai-model."""
|
|
33
|
+
if args.llm:
|
|
34
|
+
module_path, _, attr = args.llm.partition(":")
|
|
35
|
+
if not module_path or not attr:
|
|
36
|
+
raise SystemExit("--llm must be 'module.path:callable' (a zero-arg factory)")
|
|
37
|
+
factory = getattr(importlib.import_module(module_path), attr)
|
|
38
|
+
return factory()
|
|
39
|
+
if args.openai_model:
|
|
40
|
+
try:
|
|
41
|
+
from langchain_openai import ChatOpenAI
|
|
42
|
+
except ImportError:
|
|
43
|
+
raise SystemExit("--openai-model needs 'pip install langchain-openai'")
|
|
44
|
+
return ChatOpenAI(model=args.openai_model, temperature=args.temperature)
|
|
45
|
+
raise SystemExit("grow needs a model: pass --llm module:factory or --openai-model NAME")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _cmd_grow(args) -> int:
|
|
49
|
+
bank = Bank.load(args.bank)
|
|
50
|
+
targets = {f: getattr(args, f) for f in FORMAT_KEYS if getattr(args, f) is not None}
|
|
51
|
+
material = None
|
|
52
|
+
if args.material_file:
|
|
53
|
+
material = open(args.material_file).read()
|
|
54
|
+
|
|
55
|
+
llm = _resolve_llm(args)
|
|
56
|
+
before = len(bank)
|
|
57
|
+
new = bank.grow(llm, targets=targets or None, material=material,
|
|
58
|
+
batch_size=args.batch, coverage=args.coverage or None)
|
|
59
|
+
print(f"Generated {len(new)} new questions ({dict(Counter(q['type'] for q in new))}).")
|
|
60
|
+
if args.dry_run:
|
|
61
|
+
print(f"--dry-run: not writing. Bank would grow {before} -> {before + len(new)}.")
|
|
62
|
+
for q in new[:3]:
|
|
63
|
+
print(f" [{q['type']}/{q['difficulty']}] {q['prompt'][:90]}")
|
|
64
|
+
return 0
|
|
65
|
+
bank.save(args.bank)
|
|
66
|
+
print(f"Wrote {args.bank}. Bank now {len(bank)} (was {before}).")
|
|
67
|
+
return 0
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _cmd_sample(args) -> int:
|
|
71
|
+
bank = Bank.load(args.bank)
|
|
72
|
+
rng = random.Random(args.seed) if args.seed is not None else None
|
|
73
|
+
test = bank.sample(rng=rng)
|
|
74
|
+
if args.json:
|
|
75
|
+
json.dump(test, sys.stdout, indent=2, ensure_ascii=False)
|
|
76
|
+
print()
|
|
77
|
+
return 0
|
|
78
|
+
counts = Counter(q.get("type", "mc") for q in test)
|
|
79
|
+
print(f"Sampled {len(test)} questions: {dict(counts)}")
|
|
80
|
+
for i, q in enumerate(test, 1):
|
|
81
|
+
print(f" {i:2d}. [{q.get('type')}/{q.get('difficulty')}] {q.get('prompt', '')[:90]}")
|
|
82
|
+
return 0
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _cmd_stats(args) -> int:
|
|
86
|
+
bank = Bank.load(args.bank)
|
|
87
|
+
print(f"Bank: {args.bank}")
|
|
88
|
+
print(f" total questions : {len(bank)}")
|
|
89
|
+
print(f" by format : {bank.counts_by_type()}")
|
|
90
|
+
print(f" by difficulty : {bank.counts_by_difficulty()}")
|
|
91
|
+
if bank.material:
|
|
92
|
+
print(f" material : {len(bank.material)} chars")
|
|
93
|
+
return 0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
97
|
+
p = argparse.ArgumentParser(prog="quizforge",
|
|
98
|
+
description="Generate, sample, and grade mixed-format question banks.")
|
|
99
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
100
|
+
|
|
101
|
+
g = sub.add_parser("grow", help="generate the shortfall to reach per-format targets")
|
|
102
|
+
g.add_argument("bank", help="path to the bank YAML file")
|
|
103
|
+
g.add_argument("--material-file", help="text file to ground generation (overrides bank 'material:')")
|
|
104
|
+
g.add_argument("--batch", type=int, default=8, help="questions per LLM call (default 8)")
|
|
105
|
+
g.add_argument("--coverage", default=None, help="coverage steering hint for the generator")
|
|
106
|
+
g.add_argument("--dry-run", action="store_true", help="generate and summarize, but don't write")
|
|
107
|
+
g.add_argument("--llm", help="dotted path 'module:callable' to a zero-arg model factory")
|
|
108
|
+
g.add_argument("--openai-model", help="convenience: build ChatOpenAI with this model name")
|
|
109
|
+
g.add_argument("--temperature", type=float, default=0.4, help="temperature for --openai-model")
|
|
110
|
+
for f in FORMAT_KEYS:
|
|
111
|
+
g.add_argument(f"--{f.replace('_', '-')}", dest=f, type=int, default=None,
|
|
112
|
+
help=f"target TOTAL {f} questions (default {DEFAULT_TARGETS[f]})")
|
|
113
|
+
g.set_defaults(func=_cmd_grow)
|
|
114
|
+
|
|
115
|
+
s = sub.add_parser("sample", help="draw one mixed-format test from the bank")
|
|
116
|
+
s.add_argument("bank")
|
|
117
|
+
s.add_argument("--seed", type=int, default=None, help="rng seed for a reproducible draw")
|
|
118
|
+
s.add_argument("--json", action="store_true", help="emit the sampled questions as JSON")
|
|
119
|
+
s.set_defaults(func=_cmd_sample)
|
|
120
|
+
|
|
121
|
+
st = sub.add_parser("stats", help="show bank size and format/difficulty mix")
|
|
122
|
+
st.add_argument("bank")
|
|
123
|
+
st.set_defaults(func=_cmd_stats)
|
|
124
|
+
return p
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def main(argv: Optional[list] = None) -> int:
|
|
128
|
+
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
|
129
|
+
args = build_parser().parse_args(argv)
|
|
130
|
+
return args.func(args)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
if __name__ == "__main__":
|
|
134
|
+
raise SystemExit(main())
|
quizforge/generate.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""Generation — grow a question bank from source material with an injected LLM.
|
|
2
|
+
|
|
3
|
+
``generate_bank`` drafts questions strictly from the material you provide,
|
|
4
|
+
validates each one, dedupes against what's already in the bank (and within the
|
|
5
|
+
run), assigns sequential ids, and returns the NEW questions as plain dicts. It
|
|
6
|
+
never writes files and never duplicates an existing prompt, so it's safe to
|
|
7
|
+
re-run to "top up" a bank to target sizes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from collections import Counter
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
|
|
14
|
+
from .llm import structured_output
|
|
15
|
+
from .schemas import DIFFICULTIES, FORMATS
|
|
16
|
+
from .text import normalize
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger("quizforge")
|
|
19
|
+
|
|
20
|
+
# Default TOTAL bank size per format — comfortably deep for a 20-question test
|
|
21
|
+
# (default blueprint draws mc8/fill4/match2/short4/freetext2) so unseen
|
|
22
|
+
# questions last across many retakes.
|
|
23
|
+
DEFAULT_TARGETS = {"mc": 40, "fill_blank": 20, "match": 12, "short": 16, "freetext": 12}
|
|
24
|
+
DEFAULT_BATCH = 8
|
|
25
|
+
DEFAULT_DIFF_SPLIT = {"easy": 0.30, "medium": 0.45, "hard": 0.25}
|
|
26
|
+
|
|
27
|
+
DEFAULT_SYSTEM = (
|
|
28
|
+
"You are an expert quiz-content author. Write questions STRICTLY grounded in the provided "
|
|
29
|
+
"material — do not invent facts beyond it. Questions must be accurate, unambiguous, and test "
|
|
30
|
+
"real understanding, not trivia. For multiple choice, exactly one option is correct and the "
|
|
31
|
+
"distractors are plausible. Vary difficulty as requested. Return strict JSON for the "
|
|
32
|
+
"requested schema."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Domain-neutral default. Callers with a specific emphasis (e.g. attack-scenario
|
|
36
|
+
# coverage for security training) pass their own ``coverage`` string.
|
|
37
|
+
DEFAULT_COVERAGE = (
|
|
38
|
+
"Cover the breadth of the material. Frame at least some questions as realistic applied "
|
|
39
|
+
"scenarios rather than abstract recall."
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _diff_counts(n: int, split: dict) -> dict:
|
|
44
|
+
"""Split a batch of n into easy/medium/hard per ``split`` (sums to n)."""
|
|
45
|
+
easy = round(n * split.get("easy", 0))
|
|
46
|
+
hard = round(n * split.get("hard", 0))
|
|
47
|
+
medium = max(0, n - easy - hard)
|
|
48
|
+
return {"easy": easy, "medium": medium, "hard": hard}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _valid_difficulty(d: str) -> str:
|
|
52
|
+
d = (d or "").strip().lower()
|
|
53
|
+
return d if d in DIFFICULTIES else "medium"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _gen_batch(llm, schema, fmt_desc, material, n, existing_prompts, *,
|
|
57
|
+
system, coverage, diff_split):
|
|
58
|
+
counts = _diff_counts(n, diff_split)
|
|
59
|
+
avoid = "\n".join(f" - {p}" for p in list(existing_prompts)[-40:]) or " (none yet)"
|
|
60
|
+
coverage_line = f"COVERAGE: {coverage}\n" if coverage else ""
|
|
61
|
+
human = (
|
|
62
|
+
f"{material}\n\n"
|
|
63
|
+
f"Write exactly {n} {fmt_desc}.\n"
|
|
64
|
+
f"Difficulty mix: {counts['easy']} easy, {counts['medium']} medium, {counts['hard']} hard "
|
|
65
|
+
f"(set each question's difficulty field accordingly).\n"
|
|
66
|
+
f"{coverage_line}"
|
|
67
|
+
"Ground every fact in the material above; do not invent details.\n"
|
|
68
|
+
f"Do NOT duplicate or lightly reword any of these existing questions:\n{avoid}\n"
|
|
69
|
+
)
|
|
70
|
+
chain = structured_output(llm, schema)
|
|
71
|
+
batch = chain.invoke([("system", system), ("human", human)])
|
|
72
|
+
return batch.questions
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _validate(qtype: str, q, seen_norms: set) -> Optional[dict]:
|
|
76
|
+
"""Coerce a generated question to a clean dict, or drop it (return None)."""
|
|
77
|
+
prompt = (getattr(q, "prompt", "") or "").strip()
|
|
78
|
+
if not prompt:
|
|
79
|
+
return None
|
|
80
|
+
norm = normalize(prompt)
|
|
81
|
+
if not norm or norm in seen_norms:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
out = {"type": qtype, "difficulty": _valid_difficulty(q.difficulty), "prompt": prompt}
|
|
85
|
+
if qtype == "mc":
|
|
86
|
+
choices = [c.strip() for c in (q.choices or []) if c.strip()]
|
|
87
|
+
if len(choices) < 3 or not (0 <= q.answer_idx < len(choices)) or not q.explanation.strip():
|
|
88
|
+
return None
|
|
89
|
+
out.update(choices=choices, answer_idx=q.answer_idx, explanation=q.explanation.strip())
|
|
90
|
+
elif qtype == "fill_blank":
|
|
91
|
+
accepted = [a.strip() for a in (q.accepted_answers or []) if a.strip()]
|
|
92
|
+
if not accepted:
|
|
93
|
+
return None
|
|
94
|
+
out.update(accepted_answers=accepted, explanation=(q.explanation or "").strip())
|
|
95
|
+
elif qtype == "match":
|
|
96
|
+
pairs = [{"left": p.left.strip(), "right": p.right.strip()}
|
|
97
|
+
for p in (q.pairs or []) if p.left.strip() and p.right.strip()]
|
|
98
|
+
rights = [p["right"].lower() for p in pairs]
|
|
99
|
+
if not (2 <= len(pairs) <= 6) or len(set(rights)) != len(rights): # distinct rights
|
|
100
|
+
return None
|
|
101
|
+
out.update(pairs=pairs, explanation=(q.explanation or "").strip())
|
|
102
|
+
else: # short / freetext
|
|
103
|
+
rubric = [r.strip() for r in (q.rubric or []) if r.strip()]
|
|
104
|
+
if not q.model_answer.strip() or not rubric:
|
|
105
|
+
return None
|
|
106
|
+
out.update(model_answer=q.model_answer.strip(), rubric=rubric)
|
|
107
|
+
|
|
108
|
+
seen_norms.add(norm)
|
|
109
|
+
return out
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _next_id_start(existing: List[dict]) -> int:
|
|
113
|
+
"""Highest q<N> id in the existing bank, plus one (1 if none)."""
|
|
114
|
+
max_idx = 0
|
|
115
|
+
for q in existing or []:
|
|
116
|
+
qid = str(q.get("id", ""))
|
|
117
|
+
if qid.startswith("q") and qid[1:].isdigit():
|
|
118
|
+
max_idx = max(max_idx, int(qid[1:]))
|
|
119
|
+
return max_idx + 1
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def generate_bank(material: str, llm, *, targets: Optional[dict] = None,
|
|
123
|
+
existing: Optional[List[dict]] = None, batch_size: int = DEFAULT_BATCH,
|
|
124
|
+
diff_split: Optional[dict] = None, system: str = DEFAULT_SYSTEM,
|
|
125
|
+
coverage: str = DEFAULT_COVERAGE, max_batches_per_format: int = 6,
|
|
126
|
+
id_prefix: str = "q") -> List[dict]:
|
|
127
|
+
"""Generate the shortfall needed to bring a bank up to ``targets``.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
material: the source text to ground questions in.
|
|
131
|
+
llm: any LangChain-style chat model (sync ``with_structured_output``).
|
|
132
|
+
targets: desired TOTAL count per format (defaults to ``DEFAULT_TARGETS``).
|
|
133
|
+
existing: the current bank (counted by type; prompts are not reused).
|
|
134
|
+
batch_size: questions requested per LLM call.
|
|
135
|
+
diff_split: easy/medium/hard proportions per batch.
|
|
136
|
+
system / coverage: prompt steering (override ``coverage`` for a domain).
|
|
137
|
+
max_batches_per_format: cap on retries when validation rejects items.
|
|
138
|
+
id_prefix: id scheme; ids continue after the highest existing ``<prefix><N>``.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
The NEW questions as dicts (with ``id``/``type``/``difficulty`` + fields).
|
|
142
|
+
Does not mutate ``existing`` and does not include it in the result.
|
|
143
|
+
"""
|
|
144
|
+
targets = {**DEFAULT_TARGETS, **(targets or {})}
|
|
145
|
+
diff_split = diff_split or DEFAULT_DIFF_SPLIT
|
|
146
|
+
existing = existing or []
|
|
147
|
+
|
|
148
|
+
existing_by_type: dict = {}
|
|
149
|
+
seen_norms: set = set()
|
|
150
|
+
for q in existing:
|
|
151
|
+
existing_by_type.setdefault(q.get("type", "mc"), []).append(q)
|
|
152
|
+
seen_norms.add(normalize(q.get("prompt", "")))
|
|
153
|
+
|
|
154
|
+
all_new: List[dict] = []
|
|
155
|
+
for fmt, target in targets.items():
|
|
156
|
+
if fmt not in FORMATS:
|
|
157
|
+
logger.warning("Unknown format %r in targets — skipping", fmt)
|
|
158
|
+
continue
|
|
159
|
+
have = len(existing_by_type.get(fmt, []))
|
|
160
|
+
need = max(0, target - have)
|
|
161
|
+
if need == 0:
|
|
162
|
+
continue
|
|
163
|
+
schema, fmt_desc = FORMATS[fmt]
|
|
164
|
+
prompts_seen = {q["prompt"] for q in existing if q.get("type", "mc") == fmt}
|
|
165
|
+
made = batches = 0
|
|
166
|
+
while made < need and batches < max_batches_per_format:
|
|
167
|
+
batches += 1
|
|
168
|
+
batch_n = min(batch_size, need - made)
|
|
169
|
+
try:
|
|
170
|
+
raw = _gen_batch(llm, schema, fmt_desc, material, batch_n, prompts_seen,
|
|
171
|
+
system=system, coverage=coverage, diff_split=diff_split)
|
|
172
|
+
except Exception as exc: # noqa: BLE001 — skip a failed batch, keep going
|
|
173
|
+
logger.warning("%s batch failed (%s): %s", fmt, type(exc).__name__, str(exc)[:160])
|
|
174
|
+
continue
|
|
175
|
+
for q in raw:
|
|
176
|
+
v = _validate(fmt, q, seen_norms)
|
|
177
|
+
if v:
|
|
178
|
+
all_new.append(v)
|
|
179
|
+
prompts_seen.add(v["prompt"])
|
|
180
|
+
made += 1
|
|
181
|
+
if made >= need:
|
|
182
|
+
break
|
|
183
|
+
logger.info("%-11s have %2d / target %2d -> added %2d", fmt, have, target, made)
|
|
184
|
+
|
|
185
|
+
# Assign sequential ids continuing after the existing bank (id first).
|
|
186
|
+
start = _next_id_start(existing)
|
|
187
|
+
all_new = [{"id": f"{id_prefix}{i}", **q} for i, q in enumerate(all_new, start=start)]
|
|
188
|
+
|
|
189
|
+
if all_new:
|
|
190
|
+
logger.info("Generated %d new questions: %s | difficulty %s", len(all_new),
|
|
191
|
+
dict(Counter(q["type"] for q in all_new)),
|
|
192
|
+
dict(Counter(q["difficulty"] for q in all_new)))
|
|
193
|
+
return all_new
|
quizforge/grade.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Grading — deterministic where we can, LLM where we must.
|
|
2
|
+
|
|
3
|
+
Multiple-choice is graded by the caller (it's a trivial index compare).
|
|
4
|
+
Fill-in-the-blank and match-the-following are graded deterministically here
|
|
5
|
+
(no LLM, fast and free). Open-ended answers are scored 0..1 with feedback by an
|
|
6
|
+
injected chat model against the question's model answer + rubric.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import List, Optional
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field
|
|
13
|
+
|
|
14
|
+
from .llm import structured_output
|
|
15
|
+
from .text import normalize
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("quizforge")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def grade_fill_blank(question: dict, user_answer: str) -> dict:
|
|
21
|
+
"""Deterministic grade for a fill-in-the-blank against accepted_answers.
|
|
22
|
+
|
|
23
|
+
Binary credit: 1.0 if the normalized answer matches any accepted answer.
|
|
24
|
+
"""
|
|
25
|
+
accepted = question.get("accepted_answers") or []
|
|
26
|
+
ua = normalize(user_answer)
|
|
27
|
+
correct = bool(ua) and ua in {normalize(a) for a in accepted}
|
|
28
|
+
return {"score": 1.0 if correct else 0.0, "correct": correct, "accepted": accepted}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def grade_match(question: dict, selections: dict) -> dict:
|
|
32
|
+
"""Deterministic grade for match-the-following with per-pair partial credit.
|
|
33
|
+
|
|
34
|
+
``selections`` maps the stringified left-item index -> the right-side label
|
|
35
|
+
chosen. Score = fraction of rows matched correctly.
|
|
36
|
+
"""
|
|
37
|
+
pairs = question.get("pairs") or []
|
|
38
|
+
total = len(pairs)
|
|
39
|
+
if not total:
|
|
40
|
+
return {"score": 0.0, "correct_count": 0, "total": 0, "rows": []}
|
|
41
|
+
rows = []
|
|
42
|
+
correct_count = 0
|
|
43
|
+
for i, pair in enumerate(pairs):
|
|
44
|
+
chosen = selections.get(str(i), "")
|
|
45
|
+
is_ok = normalize(chosen) == normalize(pair.get("right", ""))
|
|
46
|
+
if is_ok:
|
|
47
|
+
correct_count += 1
|
|
48
|
+
rows.append({
|
|
49
|
+
"left": pair.get("left", ""),
|
|
50
|
+
"right": pair.get("right", ""),
|
|
51
|
+
"chosen": chosen,
|
|
52
|
+
"correct": is_ok,
|
|
53
|
+
})
|
|
54
|
+
return {"score": correct_count / total, "correct_count": correct_count,
|
|
55
|
+
"total": total, "rows": rows}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class QuizGrade(BaseModel):
|
|
59
|
+
"""The model's read on an open-ended answer."""
|
|
60
|
+
|
|
61
|
+
score: float = Field(description="Credit awarded, 0.0 to 1.0. 1.0 = covers the key points "
|
|
62
|
+
"correctly; ~0.5 = partially correct or missing a key idea; 0.0 = wrong, "
|
|
63
|
+
"empty, or just restates the question.")
|
|
64
|
+
verdict: str = Field(description="One of: 'correct', 'partial', 'incorrect'")
|
|
65
|
+
feedback: str = Field(description="2-3 sentences, encouraging coach tone. Say what was right, "
|
|
66
|
+
"then the single most important thing to add or fix.")
|
|
67
|
+
covered: List[str] = Field(description="Key points the answer got right (short phrases). Empty if none.")
|
|
68
|
+
missed: List[str] = Field(description="Key points the answer missed or got wrong (short phrases). Empty if none.")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
DEFAULT_GRADE_SYSTEM = (
|
|
72
|
+
"You are a training assessor grading a learner's open-ended quiz answer. Grade on "
|
|
73
|
+
"CONCEPTS, not phrasing or grammar — credit the learner when they demonstrate the right "
|
|
74
|
+
"understanding in their own words. Be fair but rigorous: award partial credit when an "
|
|
75
|
+
"answer is on the right track but misses a key point, and award 0 when it is wrong, empty, "
|
|
76
|
+
"evasive ('I don't know'), or merely restates the question. Do not be fooled by "
|
|
77
|
+
"confident-sounding but incorrect answers. Return strict JSON for the QuizGrade schema."
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _open_prompt(question: dict, user_answer: str) -> str:
|
|
82
|
+
parts = [
|
|
83
|
+
f"QUESTION:\n{question['prompt']}",
|
|
84
|
+
"",
|
|
85
|
+
f"MODEL ANSWER (what a strong response covers):\n"
|
|
86
|
+
f"{(question.get('model_answer') or '').strip() or '(none provided)'}",
|
|
87
|
+
]
|
|
88
|
+
rubric = question.get("rubric") or []
|
|
89
|
+
if rubric:
|
|
90
|
+
parts += ["", "RUBRIC — key points to look for:"]
|
|
91
|
+
parts += [f" - {point}" for point in rubric]
|
|
92
|
+
parts += ["", f"LEARNER'S ANSWER:\n{(user_answer or '').strip()}",
|
|
93
|
+
"", "Grade the learner's answer. score is fractional 0..1."]
|
|
94
|
+
return "\n".join(parts)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def grade_open_answer(question: dict, user_answer: str, llm,
|
|
98
|
+
system: str = DEFAULT_GRADE_SYSTEM) -> Optional[QuizGrade]:
|
|
99
|
+
"""Score an open-ended answer 0..1 with feedback using the injected ``llm``.
|
|
100
|
+
|
|
101
|
+
Returns a :class:`QuizGrade`, or ``None`` if automated grading was
|
|
102
|
+
unavailable (callers should exclude the question from the attempt's max
|
|
103
|
+
score rather than penalize the learner for an outage).
|
|
104
|
+
"""
|
|
105
|
+
if not (user_answer or "").strip():
|
|
106
|
+
return QuizGrade(score=0.0, verdict="incorrect", feedback="No answer was provided.",
|
|
107
|
+
covered=[], missed=list(question.get("rubric") or []))
|
|
108
|
+
try:
|
|
109
|
+
chain = structured_output(llm, QuizGrade)
|
|
110
|
+
grade = chain.invoke([
|
|
111
|
+
("system", system),
|
|
112
|
+
("human", _open_prompt(question, user_answer)),
|
|
113
|
+
])
|
|
114
|
+
except Exception as exc: # noqa: BLE001 — degrade gracefully, never crash the quiz
|
|
115
|
+
logger.warning("Open-answer grading failed for %s: %s: %s",
|
|
116
|
+
question.get("id", "?"), type(exc).__name__, str(exc)[:200])
|
|
117
|
+
return None
|
|
118
|
+
grade.score = max(0.0, min(1.0, float(grade.score)))
|
|
119
|
+
return grade
|
quizforge/integrity.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Integrity signals — flag attempts that are too fast to be genuine.
|
|
2
|
+
|
|
3
|
+
A deep, unseen-first bank (see :mod:`quizforge.sample`) already makes
|
|
4
|
+
answer-sharing hard. This adds a cheap, deterministic backstop: an attempt that
|
|
5
|
+
*passed* in less time than it physically takes to read and answer the questions
|
|
6
|
+
is suspicious — it points at a leaked answer key, a shared screenshot, or
|
|
7
|
+
automation, not knowledge.
|
|
8
|
+
|
|
9
|
+
This is intentionally a dumb arithmetic heuristic, not an LLM call: "is 40s
|
|
10
|
+
plausible for 20 questions including two written answers" is a timing question,
|
|
11
|
+
and an LLM adds nothing to it. The per-format floors and thresholds are tunable
|
|
12
|
+
so a caller can dial sensitivity to their own population.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Iterable, List, Optional
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
18
|
+
|
|
19
|
+
# Minimum *realistic* seconds to genuinely read + answer one question of each
|
|
20
|
+
# format — not an absolute physical floor, but the point below which engagement
|
|
21
|
+
# is implausible. Open formats dominate because a passing written answer takes
|
|
22
|
+
# real time to compose. Deliberately generous to keep false positives low.
|
|
23
|
+
DEFAULT_MIN_SECONDS_PER_TYPE = {
|
|
24
|
+
"mc": 5.0,
|
|
25
|
+
"fill_blank": 7.0,
|
|
26
|
+
"match": 10.0,
|
|
27
|
+
"short": 20.0,
|
|
28
|
+
"freetext": 40.0,
|
|
29
|
+
}
|
|
30
|
+
# Seconds assumed for a format not in the table (unknown / custom).
|
|
31
|
+
DEFAULT_FALLBACK_SECONDS = 6.0
|
|
32
|
+
|
|
33
|
+
# Default wall-clock budget for a single timed attempt (30 minutes). A limit is
|
|
34
|
+
# the opposite end of the timing spectrum from :func:`assess_speed`: too *slow*
|
|
35
|
+
# rather than too *fast*. Enforcement (a countdown that auto-submits) is the
|
|
36
|
+
# caller's UI concern; this module just owns the policy value and the check.
|
|
37
|
+
DEFAULT_TIME_LIMIT_SECONDS = 1800
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def within_time_limit(elapsed_seconds: int,
|
|
41
|
+
limit_seconds: int = DEFAULT_TIME_LIMIT_SECONDS) -> bool:
|
|
42
|
+
"""Whether a timed attempt finished inside its allotted budget.
|
|
43
|
+
|
|
44
|
+
Untimed attempts (``elapsed_seconds <= 0``, e.g. legacy rows or a disabled
|
|
45
|
+
timer) are treated as within limit — absence of timing is not a violation.
|
|
46
|
+
A non-positive ``limit_seconds`` means "no limit" and always returns True.
|
|
47
|
+
"""
|
|
48
|
+
if elapsed_seconds <= 0 or limit_seconds <= 0:
|
|
49
|
+
return True
|
|
50
|
+
return elapsed_seconds <= limit_seconds
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class IntegrityFlag(BaseModel):
|
|
54
|
+
"""The verdict on a single attempt's timing."""
|
|
55
|
+
|
|
56
|
+
suspicious: bool = Field(description="True if the pass was implausibly fast.")
|
|
57
|
+
severity: str = Field(description="One of: 'none', 'low', 'high'.")
|
|
58
|
+
elapsed_seconds: int = Field(description="Seconds the learner actually took.")
|
|
59
|
+
expected_seconds: float = Field(description="Realistic minimum for these questions.")
|
|
60
|
+
speed_ratio: float = Field(description="elapsed / expected; <1 is faster than realistic.")
|
|
61
|
+
reasons: List[str] = Field(default_factory=list, description="Human-readable explanation.")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def expected_min_seconds(question_types: Iterable[str], per_type: Optional[dict] = None,
|
|
65
|
+
fallback: float = DEFAULT_FALLBACK_SECONDS) -> float:
|
|
66
|
+
"""Sum the per-format realistic-minimum seconds for a set of question types."""
|
|
67
|
+
table = {**DEFAULT_MIN_SECONDS_PER_TYPE, **(per_type or {})}
|
|
68
|
+
return sum(table.get(t, fallback) for t in question_types)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def assess_speed(*, elapsed_seconds: int, question_types: Iterable[str], passed: bool,
|
|
72
|
+
per_type: Optional[dict] = None, fallback: float = DEFAULT_FALLBACK_SECONDS,
|
|
73
|
+
low_ratio: float = 0.5, high_ratio: float = 0.25) -> IntegrityFlag:
|
|
74
|
+
"""Flag a *passing* attempt completed implausibly fast.
|
|
75
|
+
|
|
76
|
+
Only passes are assessed — a fast *fail* is someone giving up, not cheating.
|
|
77
|
+
Attempts with no recorded timing (``elapsed_seconds <= 0``, e.g. legacy rows)
|
|
78
|
+
are never flagged: absence of a signal is not evidence.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
elapsed_seconds: wall-clock seconds the attempt took.
|
|
82
|
+
question_types: the format of each question in the attempt (e.g.
|
|
83
|
+
``["mc", "mc", "freetext", ...]``).
|
|
84
|
+
passed: whether the attempt passed (only passes are flagged).
|
|
85
|
+
per_type: override realistic-minimum seconds per format.
|
|
86
|
+
low_ratio / high_ratio: speed_ratio thresholds for 'low' / 'high'
|
|
87
|
+
severity (lower ratio = faster = more suspicious).
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
An :class:`IntegrityFlag`. ``severity`` is 'high' when
|
|
91
|
+
``speed_ratio < high_ratio``, 'low' when ``< low_ratio``, else 'none'.
|
|
92
|
+
"""
|
|
93
|
+
types = list(question_types)
|
|
94
|
+
expected = expected_min_seconds(types, per_type, fallback)
|
|
95
|
+
|
|
96
|
+
if elapsed_seconds <= 0:
|
|
97
|
+
return IntegrityFlag(suspicious=False, severity="none", elapsed_seconds=int(elapsed_seconds),
|
|
98
|
+
expected_seconds=round(expected, 1), speed_ratio=0.0,
|
|
99
|
+
reasons=["No timing was recorded for this attempt."])
|
|
100
|
+
if not passed:
|
|
101
|
+
return IntegrityFlag(suspicious=False, severity="none", elapsed_seconds=int(elapsed_seconds),
|
|
102
|
+
expected_seconds=round(expected, 1),
|
|
103
|
+
speed_ratio=round(elapsed_seconds / expected, 2) if expected else 0.0,
|
|
104
|
+
reasons=["Attempt did not pass — timing not assessed."])
|
|
105
|
+
|
|
106
|
+
ratio = (elapsed_seconds / expected) if expected > 0 else 1.0
|
|
107
|
+
if ratio < high_ratio:
|
|
108
|
+
severity = "high"
|
|
109
|
+
elif ratio < low_ratio:
|
|
110
|
+
severity = "low"
|
|
111
|
+
else:
|
|
112
|
+
severity = "none"
|
|
113
|
+
|
|
114
|
+
reasons: List[str] = []
|
|
115
|
+
if severity != "none":
|
|
116
|
+
reasons.append(
|
|
117
|
+
f"Passed in {int(elapsed_seconds)}s — {round(ratio * 100)}% of the "
|
|
118
|
+
f"~{round(expected)}s it realistically takes to read and answer "
|
|
119
|
+
f"{len(types)} questions."
|
|
120
|
+
)
|
|
121
|
+
return IntegrityFlag(suspicious=severity != "none", severity=severity,
|
|
122
|
+
elapsed_seconds=int(elapsed_seconds), expected_seconds=round(expected, 1),
|
|
123
|
+
speed_ratio=round(ratio, 2), reasons=reasons)
|
quizforge/llm.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Provider-neutral structured-output helper.
|
|
2
|
+
|
|
3
|
+
quizforge never imports a specific LLM SDK. You pass in any object that quacks
|
|
4
|
+
like a LangChain chat model — i.e. it exposes ``with_structured_output(schema)``
|
|
5
|
+
returning something with an ``.invoke(messages)`` method, where ``messages`` is a
|
|
6
|
+
list of ``(role, content)`` tuples. That covers ``langchain-openai``,
|
|
7
|
+
``langchain-anthropic``, community wrappers, and your own shim.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("quizforge")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class _RetryingStructuredChain:
|
|
17
|
+
"""Binds a schema to the model and retries invoke on transient failures."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, llm: Any, schema: type, max_retries: int):
|
|
20
|
+
self._bound = llm.with_structured_output(schema)
|
|
21
|
+
self._max_retries = max_retries
|
|
22
|
+
|
|
23
|
+
def invoke(self, messages):
|
|
24
|
+
last_exc = None
|
|
25
|
+
for attempt in range(self._max_retries + 1):
|
|
26
|
+
try:
|
|
27
|
+
return self._bound.invoke(messages)
|
|
28
|
+
except Exception as exc: # noqa: BLE001 — surface the last error after retries
|
|
29
|
+
last_exc = exc
|
|
30
|
+
if attempt < self._max_retries:
|
|
31
|
+
logger.warning("structured_output attempt %d failed (%s); retrying",
|
|
32
|
+
attempt + 1, type(exc).__name__)
|
|
33
|
+
raise last_exc
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def structured_output(llm: Any, schema: type, max_retries: int = 1):
|
|
37
|
+
"""Return a chain whose ``.invoke(messages)`` yields a validated ``schema``.
|
|
38
|
+
|
|
39
|
+
Uses the model's native structured-output binding and retries once by
|
|
40
|
+
default so a single flaky response doesn't fail the whole call.
|
|
41
|
+
"""
|
|
42
|
+
return _RetryingStructuredChain(llm, schema, max_retries)
|
quizforge/sample.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Sampling — draw a fresh, mixed-format test from a deep bank.
|
|
2
|
+
|
|
3
|
+
A deep bank (far more questions than any one test shows) plus unseen-first
|
|
4
|
+
sampling is what makes two learners rarely see the same test, which defeats
|
|
5
|
+
answer-sharing. No LLM here — pure, deterministic given an rng.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import random
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from typing import Iterable, List, Optional
|
|
11
|
+
|
|
12
|
+
# How many of each format a single test draws. Sums to 20 by default.
|
|
13
|
+
DEFAULT_BLUEPRINT = {"mc": 8, "fill_blank": 4, "match": 2, "short": 4, "freetext": 2}
|
|
14
|
+
DIFFICULTY_ORDER = ("easy", "medium", "hard")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def pick_spread(pool: List[dict], want: int, seen_ids: Iterable[str],
|
|
18
|
+
rng: Optional[random.Random] = None) -> List[dict]:
|
|
19
|
+
"""Pick ``want`` questions from one format's pool, unseen-first + difficulty-spread.
|
|
20
|
+
|
|
21
|
+
Unseen questions are exhausted before reusing seen ones; within that order we
|
|
22
|
+
round-robin easy/medium/hard so a draw doesn't come back all-hard or all-easy.
|
|
23
|
+
"""
|
|
24
|
+
rng = rng or random
|
|
25
|
+
if want <= 0 or not pool:
|
|
26
|
+
return []
|
|
27
|
+
seen = set(seen_ids)
|
|
28
|
+
unseen = [q for q in pool if q.get("id") not in seen]
|
|
29
|
+
reused = [q for q in pool if q.get("id") in seen]
|
|
30
|
+
rng.shuffle(unseen)
|
|
31
|
+
rng.shuffle(reused)
|
|
32
|
+
ordered = unseen + reused # exhaust unseen before recycling
|
|
33
|
+
|
|
34
|
+
buckets: dict = defaultdict(list)
|
|
35
|
+
for q in ordered:
|
|
36
|
+
buckets[q.get("difficulty", "medium")].append(q)
|
|
37
|
+
|
|
38
|
+
target = min(want, len(ordered))
|
|
39
|
+
picked: List[dict] = []
|
|
40
|
+
while len(picked) < target:
|
|
41
|
+
progressed = False
|
|
42
|
+
for level in DIFFICULTY_ORDER:
|
|
43
|
+
if buckets[level]:
|
|
44
|
+
picked.append(buckets[level].pop(0))
|
|
45
|
+
progressed = True
|
|
46
|
+
if len(picked) >= target:
|
|
47
|
+
break
|
|
48
|
+
if not progressed: # difficulties outside the canonical order — drain them
|
|
49
|
+
leftovers = [q for qs in buckets.values() for q in qs]
|
|
50
|
+
picked.extend(leftovers[: target - len(picked)])
|
|
51
|
+
break
|
|
52
|
+
return picked
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def sample_test(questions: List[dict], blueprint: Optional[dict] = None,
|
|
56
|
+
seen_ids: Iterable[str] = (), rng: Optional[random.Random] = None) -> List[dict]:
|
|
57
|
+
"""Draw a mixed-format test per ``blueprint``, unseen-first within each format.
|
|
58
|
+
|
|
59
|
+
Falls short of the blueprint only when the bank lacks enough of a format; the
|
|
60
|
+
test still assembles with whatever it can. Final order is shuffled so formats
|
|
61
|
+
interleave instead of arriving grouped by type.
|
|
62
|
+
"""
|
|
63
|
+
rng = rng or random
|
|
64
|
+
blueprint = blueprint or DEFAULT_BLUEPRINT
|
|
65
|
+
by_type: dict = defaultdict(list)
|
|
66
|
+
for q in questions:
|
|
67
|
+
by_type[q.get("type", "mc")].append(q)
|
|
68
|
+
|
|
69
|
+
picked: List[dict] = []
|
|
70
|
+
for qtype, want in blueprint.items():
|
|
71
|
+
picked.extend(pick_spread(by_type.get(qtype, []), want, seen_ids, rng=rng))
|
|
72
|
+
rng.shuffle(picked)
|
|
73
|
+
return picked
|
quizforge/schemas.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Generation schemas — the per-format shapes the LLM must return.
|
|
2
|
+
|
|
3
|
+
These describe what a freshly *generated* question looks like coming back from
|
|
4
|
+
the model. After validation, questions are stored as plain dicts (YAML-friendly,
|
|
5
|
+
template-friendly) with an added ``id`` and ``type``; see ``generate`` and the
|
|
6
|
+
format-field reference in the README.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import List
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, Field
|
|
12
|
+
|
|
13
|
+
# The five question formats and the difficulty levels the kernel understands.
|
|
14
|
+
FORMAT_KEYS = ("mc", "fill_blank", "match", "short", "freetext")
|
|
15
|
+
DIFFICULTIES = ("easy", "medium", "hard")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GenMC(BaseModel):
|
|
19
|
+
"""A generated multiple-choice question."""
|
|
20
|
+
|
|
21
|
+
prompt: str
|
|
22
|
+
choices: List[str] = Field(description="3-4 options; exactly one correct, others plausible-but-wrong")
|
|
23
|
+
answer_idx: int = Field(description="0-based index of the correct choice")
|
|
24
|
+
explanation: str = Field(description="Why the answer is right — a teaching sentence")
|
|
25
|
+
difficulty: str = Field(description="easy | medium | hard")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class GenFill(BaseModel):
|
|
29
|
+
"""A generated fill-in-the-blank question."""
|
|
30
|
+
|
|
31
|
+
prompt: str = Field(description="A sentence with a ______ blank where the answer belongs")
|
|
32
|
+
accepted_answers: List[str] = Field(description="Every acceptable spelling/synonym/abbreviation of the answer")
|
|
33
|
+
explanation: str
|
|
34
|
+
difficulty: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GenPair(BaseModel):
|
|
38
|
+
left: str
|
|
39
|
+
right: str
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class GenMatch(BaseModel):
|
|
43
|
+
"""A generated match-the-following question."""
|
|
44
|
+
|
|
45
|
+
prompt: str
|
|
46
|
+
pairs: List[GenPair] = Field(description="3-4 unambiguous one-to-one pairs; distinct right-side labels")
|
|
47
|
+
explanation: str
|
|
48
|
+
difficulty: str
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class GenOpen(BaseModel):
|
|
52
|
+
"""A generated open-ended (short / free-response) question."""
|
|
53
|
+
|
|
54
|
+
prompt: str
|
|
55
|
+
model_answer: str = Field(description="A strong, concise reference answer")
|
|
56
|
+
rubric: List[str] = Field(description="3-5 key points a correct answer must cover")
|
|
57
|
+
difficulty: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class MCBatch(BaseModel):
|
|
61
|
+
questions: List[GenMC]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class FillBatch(BaseModel):
|
|
65
|
+
questions: List[GenFill]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class MatchBatch(BaseModel):
|
|
69
|
+
questions: List[GenMatch]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class OpenBatch(BaseModel):
|
|
73
|
+
questions: List[GenOpen]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Per-format: the batch schema the model fills, and a human-readable description
|
|
77
|
+
# spliced into the generation prompt. ``short`` and ``freetext`` share the open
|
|
78
|
+
# schema but differ in framing (one-liners vs. walk-me-through scenarios).
|
|
79
|
+
FORMATS = {
|
|
80
|
+
"mc": (MCBatch, "multiple-choice questions with one best answer and 3 plausible distractors"),
|
|
81
|
+
"fill_blank": (FillBatch, "fill-in-the-blank questions (a sentence with a ______ blank)"),
|
|
82
|
+
"match": (MatchBatch, "match-the-following questions with 3-4 unambiguous pairs"),
|
|
83
|
+
"short": (OpenBatch, "short-answer questions answerable in 1-2 sentences"),
|
|
84
|
+
"freetext": (OpenBatch, "scenario / free-response questions ('walk me through how you'd…')"),
|
|
85
|
+
}
|
quizforge/text.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Shared text normalization used by the deterministic graders and the
|
|
2
|
+
generator's dedup pass."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def normalize(text: str) -> str:
|
|
6
|
+
"""Casefold + collapse whitespace + strip surrounding punctuation.
|
|
7
|
+
|
|
8
|
+
So 'ICA.', ' ica ', and 'ICA' all compare equal without an LLM call.
|
|
9
|
+
"""
|
|
10
|
+
cleaned = " ".join((text or "").lower().split())
|
|
11
|
+
return cleaned.strip(" \t\n.,;:!?\"'`()[]")
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quizforge
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Generate a deep, mixed-format question bank from source material and grade it — deterministic where it can, LLM where it must. Bring your own chat model.
|
|
5
|
+
Project-URL: Homepage, https://github.com/vinayvobbili/quizforge
|
|
6
|
+
Project-URL: Source, https://github.com/vinayvobbili/quizforge
|
|
7
|
+
Author: Vinay Vobbilichetty
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: assessment,education,grading,llm,question-bank,quiz,training
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Education :: Testing
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: pydantic>=2
|
|
18
|
+
Requires-Dist: pyyaml>=6
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
21
|
+
Provides-Extra: openai
|
|
22
|
+
Requires-Dist: langchain-openai>=0.1; extra == 'openai'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# quizforge
|
|
26
|
+
|
|
27
|
+
Generate a deep, **mixed-format** question bank from any source material, then grade it — **deterministic where it can, LLM where it must**. Bring your own chat model.
|
|
28
|
+
|
|
29
|
+
quizforge is the engine behind a training/readiness feature: it drafts far more questions than any single test shows (multiple choice, fill-in-the-blank, match-the-following, short answer, and free-response scenarios), samples a fresh shuffled test on each attempt — so two learners rarely see the same one — and grades every format. MC/fill/match are graded instantly with no model call; open-ended answers are scored 0–1 with coaching feedback by an LLM you provide.
|
|
30
|
+
|
|
31
|
+
- **Model-agnostic** — pass any LangChain-style chat model (`with_structured_output`). No SDK is bundled.
|
|
32
|
+
- **Deep bank, anti-sharing sampling** — unseen-first, difficulty-spread draws per a configurable blueprint.
|
|
33
|
+
- **Cheap grading** — only open-ended answers cost a model call; everything else is local and free.
|
|
34
|
+
- **Plain dicts in, plain dicts out** — YAML/JSON-friendly, easy to store and template.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install quizforge
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Bring a chat model from whichever provider you use, e.g.:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install langchain-openai # or langchain-anthropic, etc.
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Quickstart
|
|
49
|
+
|
|
50
|
+
### Generate a bank
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from quizforge import generate_bank
|
|
54
|
+
from langchain_openai import ChatOpenAI
|
|
55
|
+
|
|
56
|
+
llm = ChatOpenAI(model="gpt-4.1", temperature=0.4)
|
|
57
|
+
|
|
58
|
+
material = open("citrix_lesson.md").read()
|
|
59
|
+
new_questions = generate_bank(
|
|
60
|
+
material, llm,
|
|
61
|
+
targets={"mc": 40, "fill_blank": 20, "match": 12, "short": 16, "freetext": 12},
|
|
62
|
+
existing=[], # pass your current bank to top it up
|
|
63
|
+
coverage="At least half should be applied incident-response scenarios.",
|
|
64
|
+
)
|
|
65
|
+
# -> list of dicts with id/type/difficulty + per-format fields. Store as you like.
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
`generate_bank` only produces the *shortfall* to reach `targets`, validates each
|
|
69
|
+
question, and never duplicates an existing prompt — safe to re-run to grow a bank.
|
|
70
|
+
|
|
71
|
+
### Sample a test
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from quizforge import sample_test, DEFAULT_BLUEPRINT
|
|
75
|
+
|
|
76
|
+
test = sample_test(bank, blueprint=DEFAULT_BLUEPRINT, seen_ids=already_seen)
|
|
77
|
+
# DEFAULT_BLUEPRINT draws mc8 / fill4 / match2 / short4 / freetext2 = 20, shuffled.
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Grade
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from quizforge import grade_fill_blank, grade_match, grade_open_answer
|
|
84
|
+
|
|
85
|
+
grade_fill_blank(q, "ICA") # {"score": 1.0, "correct": True, ...}
|
|
86
|
+
grade_match(q, {"0": "RDP", "1": "ICA"}) # per-pair partial credit
|
|
87
|
+
grade_open_answer(q, learner_text, llm) # QuizGrade(score, verdict, feedback, ...) or None
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
`grade_open_answer` returns `None` if the model was unavailable — exclude that
|
|
91
|
+
question from the attempt's max score rather than penalizing the learner.
|
|
92
|
+
|
|
93
|
+
## Question shapes
|
|
94
|
+
|
|
95
|
+
Each question is a dict with `id`, `type`, `difficulty`, `prompt`, plus:
|
|
96
|
+
|
|
97
|
+
- `mc` — `choices: [str]`, `answer_idx: int`, `explanation: str`
|
|
98
|
+
- `fill_blank` — `accepted_answers: [str]`, `explanation: str`
|
|
99
|
+
- `match` — `pairs: [{left, right}]`, `explanation: str`
|
|
100
|
+
- `short` / `freetext` — `model_answer: str`, `rubric: [str]`
|
|
101
|
+
|
|
102
|
+
## License
|
|
103
|
+
|
|
104
|
+
MIT
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
quizforge/__init__.py,sha256=fgrMRAFDYItRLsSydsFZE2grIpyHnLf5ZfCCkoIGAGU,2666
|
|
2
|
+
quizforge/bank.py,sha256=8IpsET4x4Ulodxam_sGv4Ot65ECLwGsP2VqzAFQs-LM,3612
|
|
3
|
+
quizforge/certificate.py,sha256=iOOtXQGpACd1Us76XIXSBST-Z19FHOIWmYMJSCH-iUA,4247
|
|
4
|
+
quizforge/cli.py,sha256=NgC3ciNc8pdD0PdwwoKBoXoAEujbv8ZNdIXiYDiAW8o,5420
|
|
5
|
+
quizforge/generate.py,sha256=UCqEVvbwRFhrXqveXW21H3_qvUUfzDC4WY5txckXEFo,8638
|
|
6
|
+
quizforge/grade.py,sha256=hZ90Nnuqf1u7WPWJG9fHS9d-kLhzIkP5GxSLcIPSpos,5221
|
|
7
|
+
quizforge/integrity.py,sha256=1_DbL9W8jJkKE7ncRs-ddxrsSllaiLVModyXjwmSQQM,5928
|
|
8
|
+
quizforge/llm.py,sha256=Ycd6RT2yCkZy_4GvUsNJKWRrWwDUooBrKuoJ1Dl1Hxg,1684
|
|
9
|
+
quizforge/sample.py,sha256=2qTaGM2mjVO6yzKRstXZrO59ercWuakD5jbet06lKTk,2888
|
|
10
|
+
quizforge/schemas.py,sha256=4zkq9bl-AmiszF3i0r9iRA7N77Gz_a64j53XOP5buJA,2917
|
|
11
|
+
quizforge/text.py,sha256=jzFeT9DleOUhmRZpCFQIoZNIzqxSI1wc6fBJvl6VBNU,387
|
|
12
|
+
quizforge-0.2.0.dist-info/METADATA,sha256=NohUiaIHbU0Bzkv47Bj55X2iBys69ZdnQNbFDBpA1F4,4096
|
|
13
|
+
quizforge-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
14
|
+
quizforge-0.2.0.dist-info/entry_points.txt,sha256=HL6bvElX7W1gotbUwLyKBVtMkDi_tJlkQXvVzbaX6-4,49
|
|
15
|
+
quizforge-0.2.0.dist-info/licenses/LICENSE,sha256=Q7CXTchzC9hqR2Dr-9cRh3bM2kXTgXGwk-dO0rGvQsE,1076
|
|
16
|
+
quizforge-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vinay Vobbilichetty
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|