quiz-gen 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quiz_gen/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ """Quiz Gen AI - AI-powered quiz generator for regulatory and educational documentation."""
2
+
3
+ try:
4
+ from quiz_gen.__version__ import __version__, __author__, __email__
5
+ except ImportError:
6
+ __version__ = "0.1.0.dev"
7
+ __author__ = "Yauheniya Varabyova"
8
+ __email__ = "yauheniya.ai@gmail.com"
9
+
10
+ from quiz_gen.parsers.html.eu_lex_parser import (
11
+ EURLexParser,
12
+ RegulationChunk,
13
+ SectionType,
14
+ )
15
+
16
+ __all__ = [
17
+ "__version__",
18
+ "__author__",
19
+ "__email__",
20
+ "EURLexParser",
21
+ "RegulationChunk",
22
+ "SectionType",
23
+ ]
@@ -0,0 +1,13 @@
1
+ """Version information for quiz-gen package."""
2
+
3
+ from importlib.metadata import version, metadata
4
+
5
+ __version__ = version("quiz-gen")
6
+
7
+ try:
8
+ _metadata = metadata("quiz-gen")
9
+ __author__ = _metadata.get("Author", "Yauheniya Varabyova")
10
+ __email__ = _metadata.get("Author-email", "yauheniya.ai@gmail.com")
11
+ except Exception:
12
+ __author__ = "Yauheniya Varabyova"
13
+ __email__ = "yauheniya.ai@gmail.com"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
quiz_gen/cli.py ADDED
@@ -0,0 +1,209 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Command-line interface for quiz-gen package.
4
+ Parse EUR-Lex documents and extract structured content.
5
+ """
6
+
7
+ import argparse
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ from quiz_gen.__version__ import __version__
13
+ from quiz_gen.parsers.html.eu_lex_parser import EURLexParser
14
+
15
+
16
+ def create_parser() -> argparse.ArgumentParser:
17
+ """Create and configure the argument parser."""
18
+ parser = argparse.ArgumentParser(
19
+ prog="quiz-gen",
20
+ description="Parse EUR-Lex regulatory documents and extract structured content into chunks and TOC.",
21
+ formatter_class=argparse.RawDescriptionHelpFormatter,
22
+ epilog="""
23
+ Examples:
24
+ # Parse from URL
25
+ quiz-gen https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32018R1139
26
+
27
+ # Parse local HTML file
28
+ quiz-gen data/documents/html/regulation.html
29
+
30
+ # Specify output directory
31
+ quiz-gen --output data/output regulation.html
32
+
33
+ # Specify custom output filenames
34
+ quiz-gen --chunks my_chunks.json --toc my_toc.json regulation.html
35
+
36
+ # Print TOC to console
37
+ quiz-gen --print-toc regulation.html
38
+ """
39
+ )
40
+
41
+ parser.add_argument(
42
+ "input",
43
+ help="URL or path to local HTML file of EUR-Lex document"
44
+ )
45
+
46
+ parser.add_argument(
47
+ "-o", "--output",
48
+ type=str,
49
+ default="data/processed",
50
+ help="Output directory for generated files (default: data/processed)"
51
+ )
52
+
53
+ parser.add_argument(
54
+ "--chunks",
55
+ type=str,
56
+ help="Custom filename for chunks JSON (default: <input>_chunks.json)"
57
+ )
58
+
59
+ parser.add_argument(
60
+ "--toc",
61
+ type=str,
62
+ help="Custom filename for TOC JSON (default: <input>_toc.json)"
63
+ )
64
+
65
+ parser.add_argument(
66
+ "--print-toc",
67
+ action="store_true",
68
+ help="Print formatted table of contents to console"
69
+ )
70
+
71
+ parser.add_argument(
72
+ "--no-save",
73
+ action="store_true",
74
+ help="Don't save output files, only display stats"
75
+ )
76
+
77
+ parser.add_argument(
78
+ "-v", "--version",
79
+ action="version",
80
+ version=f"%(prog)s {__version__}"
81
+ )
82
+
83
+ parser.add_argument(
84
+ "--verbose",
85
+ action="store_true",
86
+ help="Enable verbose output"
87
+ )
88
+
89
+ return parser
90
+
91
+
92
+ def get_default_filename(input_path: str, suffix: str) -> str:
93
+ """Generate default filename from input path or URL."""
94
+ # Extract document identifier from URL or filename
95
+ if input_path.startswith("http"):
96
+ # Extract CELEX number from URL
97
+ if "CELEX:" in input_path or "CELEX%3A" in input_path:
98
+ celex = input_path.split("CELEX")[-1].split(":")[1] if ":" in input_path else input_path.split("%3A")[1]
99
+ celex = celex.split("&")[0].split("?")[0]
100
+ return f"{celex}_{suffix}.json"
101
+ return f"document_{suffix}.json"
102
+ else:
103
+ # Use filename without extension
104
+ stem = Path(input_path).stem
105
+ # Remove URL encoding if present
106
+ stem = stem.replace("%3A", "_").replace("%3A", "_")
107
+ return f"{stem}_{suffix}.json"
108
+
109
+
110
+ def parse_document(
111
+ input_source: str,
112
+ output_dir: str,
113
+ chunks_filename: Optional[str] = None,
114
+ toc_filename: Optional[str] = None,
115
+ print_toc: bool = False,
116
+ no_save: bool = False,
117
+ verbose: bool = False
118
+ ) -> int:
119
+ """
120
+ Parse EUR-Lex document and save results.
121
+
122
+ Returns:
123
+ 0 on success, 1 on error
124
+ """
125
+ try:
126
+ # Determine if input is URL or file
127
+ if input_source.startswith("http://") or input_source.startswith("https://"):
128
+ if verbose:
129
+ print(f"Fetching document from URL: {input_source}")
130
+ parser = EURLexParser(url=input_source)
131
+ else:
132
+ input_path = Path(input_source)
133
+ if not input_path.exists():
134
+ print(f"Error: File not found: {input_source}", file=sys.stderr)
135
+ return 1
136
+
137
+ if verbose:
138
+ print(f"Reading document from file: {input_source}")
139
+
140
+ with open(input_path, 'r', encoding='utf-8') as f:
141
+ html_content = f.read()
142
+ parser = EURLexParser(html_content=html_content)
143
+
144
+ # Parse document
145
+ if verbose:
146
+ print("Parsing document...")
147
+ chunks, toc = parser.parse()
148
+
149
+ # Print statistics
150
+ print(f"\n✓ Successfully parsed document")
151
+ print(f" Title: {toc.get('title', 'Unknown')[:80]}...")
152
+ print(f" Total chunks: {len(chunks)}")
153
+
154
+ # Count by type
155
+ from collections import Counter
156
+ types = Counter(c.section_type.value for c in chunks)
157
+ for section_type, count in sorted(types.items()):
158
+ print(f" {section_type}: {count}")
159
+
160
+ # Print TOC if requested
161
+ if print_toc:
162
+ parser.print_toc()
163
+
164
+ # Save files unless --no-save
165
+ if not no_save:
166
+ output_path = Path(output_dir)
167
+ output_path.mkdir(parents=True, exist_ok=True)
168
+
169
+ # Determine output filenames
170
+ chunks_file = chunks_filename or get_default_filename(input_source, "chunks")
171
+ toc_file = toc_filename or get_default_filename(input_source, "toc")
172
+
173
+ chunks_path = output_path / chunks_file
174
+ toc_path = output_path / toc_file
175
+
176
+ # Save files
177
+ parser.save_chunks(str(chunks_path))
178
+ parser.save_toc(str(toc_path))
179
+
180
+ print(f"\n✓ Files saved to: {output_dir}")
181
+
182
+ return 0
183
+
184
+ except Exception as e:
185
+ print(f"Error: {e}", file=sys.stderr)
186
+ if verbose:
187
+ import traceback
188
+ traceback.print_exc()
189
+ return 1
190
+
191
+
192
+ def main() -> int:
193
+ """Main entry point for CLI."""
194
+ parser = create_parser()
195
+ args = parser.parse_args()
196
+
197
+ return parse_document(
198
+ input_source=args.input,
199
+ output_dir=args.output,
200
+ chunks_filename=args.chunks,
201
+ toc_filename=args.toc,
202
+ print_toc=args.print_toc,
203
+ no_save=args.no_save,
204
+ verbose=args.verbose
205
+ )
206
+
207
+
208
+ if __name__ == "__main__":
209
+ sys.exit(main())
quiz_gen/config.py ADDED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1,13 @@
1
+ """Document parsers for various formats."""
2
+
3
+ from quiz_gen.parsers.html.eu_lex_parser import (
4
+ EURLexParser,
5
+ RegulationChunk,
6
+ SectionType,
7
+ )
8
+
9
+ __all__ = [
10
+ "EURLexParser",
11
+ "RegulationChunk",
12
+ "SectionType",
13
+ ]
File without changes