quiz-gen 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quiz_gen/__init__.py +23 -0
- quiz_gen/__version__.py +13 -0
- quiz_gen/agents/__init__.py +0 -0
- quiz_gen/agents/answer_generator.py +0 -0
- quiz_gen/agents/base_agent.py +0 -0
- quiz_gen/agents/orchestrator.py +0 -0
- quiz_gen/agents/question_generator.py +0 -0
- quiz_gen/agents/reviewer.py +0 -0
- quiz_gen/agents/validator.py +0 -0
- quiz_gen/cli.py +209 -0
- quiz_gen/config.py +0 -0
- quiz_gen/models/__init__.py +0 -0
- quiz_gen/models/chunk.py +0 -0
- quiz_gen/models/document.py +0 -0
- quiz_gen/models/question.py +0 -0
- quiz_gen/models/quiz.py +0 -0
- quiz_gen/parsers/__init__.py +13 -0
- quiz_gen/parsers/base.py +0 -0
- quiz_gen/parsers/html/eu_lex_parser.py +805 -0
- quiz_gen/parsers/pdf_parser.py +0 -0
- quiz_gen/parsers/utils.py +0 -0
- quiz_gen/storage/__init__.py +0 -0
- quiz_gen/storage/base.py +0 -0
- quiz_gen/storage/database.py +0 -0
- quiz_gen/storage/json_storage.py +0 -0
- quiz_gen/utils/__init__.py +0 -0
- quiz_gen/utils/helpers.py +0 -0
- quiz_gen/utils/logging.py +0 -0
- quiz_gen/validation/__init__.py +0 -0
- quiz_gen/validation/human_feedback.py +0 -0
- quiz_gen/validation/quality_checker.py +0 -0
- quiz_gen-0.1.5.dist-info/METADATA +395 -0
- quiz_gen-0.1.5.dist-info/RECORD +37 -0
- quiz_gen-0.1.5.dist-info/WHEEL +5 -0
- quiz_gen-0.1.5.dist-info/entry_points.txt +2 -0
- quiz_gen-0.1.5.dist-info/licenses/LICENSE +21 -0
- quiz_gen-0.1.5.dist-info/top_level.txt +1 -0
quiz_gen/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Quiz Gen AI - AI-powered quiz generator for regulatory and educational documentation."""
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from quiz_gen.__version__ import __version__, __author__, __email__
|
|
5
|
+
except ImportError:
|
|
6
|
+
__version__ = "0.1.0.dev"
|
|
7
|
+
__author__ = "Yauheniya Varabyova"
|
|
8
|
+
__email__ = "yauheniya.ai@gmail.com"
|
|
9
|
+
|
|
10
|
+
from quiz_gen.parsers.html.eu_lex_parser import (
|
|
11
|
+
EURLexParser,
|
|
12
|
+
RegulationChunk,
|
|
13
|
+
SectionType,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"__version__",
|
|
18
|
+
"__author__",
|
|
19
|
+
"__email__",
|
|
20
|
+
"EURLexParser",
|
|
21
|
+
"RegulationChunk",
|
|
22
|
+
"SectionType",
|
|
23
|
+
]
|
quiz_gen/__version__.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Version information for quiz-gen package."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import version, metadata
|
|
4
|
+
|
|
5
|
+
__version__ = version("quiz-gen")
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
_metadata = metadata("quiz-gen")
|
|
9
|
+
__author__ = _metadata.get("Author", "Yauheniya Varabyova")
|
|
10
|
+
__email__ = _metadata.get("Author-email", "yauheniya.ai@gmail.com")
|
|
11
|
+
except Exception:
|
|
12
|
+
__author__ = "Yauheniya Varabyova"
|
|
13
|
+
__email__ = "yauheniya.ai@gmail.com"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
quiz_gen/cli.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Command-line interface for quiz-gen package.
|
|
4
|
+
Parse EUR-Lex documents and extract structured content.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from quiz_gen.__version__ import __version__
|
|
13
|
+
from quiz_gen.parsers.html.eu_lex_parser import EURLexParser
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
17
|
+
"""Create and configure the argument parser."""
|
|
18
|
+
parser = argparse.ArgumentParser(
|
|
19
|
+
prog="quiz-gen",
|
|
20
|
+
description="Parse EUR-Lex regulatory documents and extract structured content into chunks and TOC.",
|
|
21
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
22
|
+
epilog="""
|
|
23
|
+
Examples:
|
|
24
|
+
# Parse from URL
|
|
25
|
+
quiz-gen https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32018R1139
|
|
26
|
+
|
|
27
|
+
# Parse local HTML file
|
|
28
|
+
quiz-gen data/documents/html/regulation.html
|
|
29
|
+
|
|
30
|
+
# Specify output directory
|
|
31
|
+
quiz-gen --output data/output regulation.html
|
|
32
|
+
|
|
33
|
+
# Specify custom output filenames
|
|
34
|
+
quiz-gen --chunks my_chunks.json --toc my_toc.json regulation.html
|
|
35
|
+
|
|
36
|
+
# Print TOC to console
|
|
37
|
+
quiz-gen --print-toc regulation.html
|
|
38
|
+
"""
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"input",
|
|
43
|
+
help="URL or path to local HTML file of EUR-Lex document"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"-o", "--output",
|
|
48
|
+
type=str,
|
|
49
|
+
default="data/processed",
|
|
50
|
+
help="Output directory for generated files (default: data/processed)"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--chunks",
|
|
55
|
+
type=str,
|
|
56
|
+
help="Custom filename for chunks JSON (default: <input>_chunks.json)"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--toc",
|
|
61
|
+
type=str,
|
|
62
|
+
help="Custom filename for TOC JSON (default: <input>_toc.json)"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--print-toc",
|
|
67
|
+
action="store_true",
|
|
68
|
+
help="Print formatted table of contents to console"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--no-save",
|
|
73
|
+
action="store_true",
|
|
74
|
+
help="Don't save output files, only display stats"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"-v", "--version",
|
|
79
|
+
action="version",
|
|
80
|
+
version=f"%(prog)s {__version__}"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--verbose",
|
|
85
|
+
action="store_true",
|
|
86
|
+
help="Enable verbose output"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return parser
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_default_filename(input_path: str, suffix: str) -> str:
|
|
93
|
+
"""Generate default filename from input path or URL."""
|
|
94
|
+
# Extract document identifier from URL or filename
|
|
95
|
+
if input_path.startswith("http"):
|
|
96
|
+
# Extract CELEX number from URL
|
|
97
|
+
if "CELEX:" in input_path or "CELEX%3A" in input_path:
|
|
98
|
+
celex = input_path.split("CELEX")[-1].split(":")[1] if ":" in input_path else input_path.split("%3A")[1]
|
|
99
|
+
celex = celex.split("&")[0].split("?")[0]
|
|
100
|
+
return f"{celex}_{suffix}.json"
|
|
101
|
+
return f"document_{suffix}.json"
|
|
102
|
+
else:
|
|
103
|
+
# Use filename without extension
|
|
104
|
+
stem = Path(input_path).stem
|
|
105
|
+
# Remove URL encoding if present
|
|
106
|
+
stem = stem.replace("%3A", "_").replace("%3A", "_")
|
|
107
|
+
return f"{stem}_{suffix}.json"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def parse_document(
|
|
111
|
+
input_source: str,
|
|
112
|
+
output_dir: str,
|
|
113
|
+
chunks_filename: Optional[str] = None,
|
|
114
|
+
toc_filename: Optional[str] = None,
|
|
115
|
+
print_toc: bool = False,
|
|
116
|
+
no_save: bool = False,
|
|
117
|
+
verbose: bool = False
|
|
118
|
+
) -> int:
|
|
119
|
+
"""
|
|
120
|
+
Parse EUR-Lex document and save results.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
0 on success, 1 on error
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
# Determine if input is URL or file
|
|
127
|
+
if input_source.startswith("http://") or input_source.startswith("https://"):
|
|
128
|
+
if verbose:
|
|
129
|
+
print(f"Fetching document from URL: {input_source}")
|
|
130
|
+
parser = EURLexParser(url=input_source)
|
|
131
|
+
else:
|
|
132
|
+
input_path = Path(input_source)
|
|
133
|
+
if not input_path.exists():
|
|
134
|
+
print(f"Error: File not found: {input_source}", file=sys.stderr)
|
|
135
|
+
return 1
|
|
136
|
+
|
|
137
|
+
if verbose:
|
|
138
|
+
print(f"Reading document from file: {input_source}")
|
|
139
|
+
|
|
140
|
+
with open(input_path, 'r', encoding='utf-8') as f:
|
|
141
|
+
html_content = f.read()
|
|
142
|
+
parser = EURLexParser(html_content=html_content)
|
|
143
|
+
|
|
144
|
+
# Parse document
|
|
145
|
+
if verbose:
|
|
146
|
+
print("Parsing document...")
|
|
147
|
+
chunks, toc = parser.parse()
|
|
148
|
+
|
|
149
|
+
# Print statistics
|
|
150
|
+
print(f"\n✓ Successfully parsed document")
|
|
151
|
+
print(f" Title: {toc.get('title', 'Unknown')[:80]}...")
|
|
152
|
+
print(f" Total chunks: {len(chunks)}")
|
|
153
|
+
|
|
154
|
+
# Count by type
|
|
155
|
+
from collections import Counter
|
|
156
|
+
types = Counter(c.section_type.value for c in chunks)
|
|
157
|
+
for section_type, count in sorted(types.items()):
|
|
158
|
+
print(f" {section_type}: {count}")
|
|
159
|
+
|
|
160
|
+
# Print TOC if requested
|
|
161
|
+
if print_toc:
|
|
162
|
+
parser.print_toc()
|
|
163
|
+
|
|
164
|
+
# Save files unless --no-save
|
|
165
|
+
if not no_save:
|
|
166
|
+
output_path = Path(output_dir)
|
|
167
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
|
|
169
|
+
# Determine output filenames
|
|
170
|
+
chunks_file = chunks_filename or get_default_filename(input_source, "chunks")
|
|
171
|
+
toc_file = toc_filename or get_default_filename(input_source, "toc")
|
|
172
|
+
|
|
173
|
+
chunks_path = output_path / chunks_file
|
|
174
|
+
toc_path = output_path / toc_file
|
|
175
|
+
|
|
176
|
+
# Save files
|
|
177
|
+
parser.save_chunks(str(chunks_path))
|
|
178
|
+
parser.save_toc(str(toc_path))
|
|
179
|
+
|
|
180
|
+
print(f"\n✓ Files saved to: {output_dir}")
|
|
181
|
+
|
|
182
|
+
return 0
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
186
|
+
if verbose:
|
|
187
|
+
import traceback
|
|
188
|
+
traceback.print_exc()
|
|
189
|
+
return 1
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def main() -> int:
|
|
193
|
+
"""Main entry point for CLI."""
|
|
194
|
+
parser = create_parser()
|
|
195
|
+
args = parser.parse_args()
|
|
196
|
+
|
|
197
|
+
return parse_document(
|
|
198
|
+
input_source=args.input,
|
|
199
|
+
output_dir=args.output,
|
|
200
|
+
chunks_filename=args.chunks,
|
|
201
|
+
toc_filename=args.toc,
|
|
202
|
+
print_toc=args.print_toc,
|
|
203
|
+
no_save=args.no_save,
|
|
204
|
+
verbose=args.verbose
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if __name__ == "__main__":
|
|
209
|
+
sys.exit(main())
|
quiz_gen/config.py
ADDED
|
File without changes
|
|
File without changes
|
quiz_gen/models/chunk.py
ADDED
|
File without changes
|
|
File without changes
|
|
File without changes
|
quiz_gen/models/quiz.py
ADDED
|
File without changes
|
quiz_gen/parsers/base.py
ADDED
|
File without changes
|