qguider 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv run *)"
5
+ ]
6
+ }
7
+ }
@@ -0,0 +1 @@
1
+ SESSION="..."
@@ -0,0 +1,2 @@
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
@@ -0,0 +1,7 @@
1
+ .env
2
+ __pycache__/
3
+ *.html
4
+ !fixtures/*.html
5
+ .venv/
6
+ pytest_cache/
7
+ qguider_data/
qguider-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,214 @@
1
+ Metadata-Version: 2.4
2
+ Name: qguider
3
+ Version: 1.0.0
4
+ Summary: A Python package to download and scrape QGuides.
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: beautifulsoup4
7
+ Requires-Dist: pydantic
8
+ Requires-Dist: python-dotenv
9
+ Requires-Dist: requests
10
+ Requires-Dist: rich>=15.0.0
11
+ Requires-Dist: tdqm>=0.0.1
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest; extra == 'dev'
14
+ Description-Content-Type: text/markdown
15
+
16
+ # QGuider
17
+
18
+ QGuider is a Python library for downloading, parsing, and querying Harvard QGuides — the university's course evaluation reports. It scrapes QGuide HTML pages, normalizes the data into typed Pydantic models, and provides a fluent API for filtering and exporting results.
19
+
20
+ ## Features
21
+
22
+ - Download QGuides for multiple semesters with checkpointing and resume support
23
+ - Parse HTML reports into structured, typed models
24
+ - Filter by semester, subject, department, or instructor
25
+ - Aggregate multi-instructor courses into a single record
26
+ - Export to JSON or pandas DataFrame
27
+ - Import previously exported JSON back into model objects
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install qguider
33
+ ```
34
+
35
+ Or install from source:
36
+
37
+ ```bash
38
+ git clone https://github.com/ivanharvard/qguider
39
+ cd qguider
40
+ pip install .
41
+ ```
42
+
43
+ ## Setup
44
+
45
+ QGuider requires your Harvard Key credentials to access the QGuide portal.
46
+
47
+ 1. Login to [Harvard QGuide Portal](https://qreports.fas.harvard.edu/browse/index).
48
+ 2. Either (press F12 on your keyboard) or (right click anywhere on the page, and click `Inspect`).
49
+ 3. Press the arrow pointing to the right, and then click on `Application`.
50
+ ![Inspect Element](images/inspect.png)
51
+ 4. Under Storage, find Cookies, and under Cookies, find the option that looks like `https://qreports.fas.har...`
52
+ ![Cookies](images/cookies.png)
53
+ 5. Find the row that's labeled `SESSION`. In that row, double click the cell under the column `Value`. Copy it to your clipboard.
54
+ 6. Create a `.env` file in your working directory if it does not already exist:
55
+ ```
56
+ SESSION="..."
57
+ ```
58
+ 7. Paste the value into your `.env`.
59
+ 8. When initializing the `QGuider`, pass in the path to your `.env` file.
60
+
61
+ This `SESSION` key is temporary! You will need to replace it every 30-40 minutes or so if you wish to download any sources. You'll know it's time to replace it when you get 0 QGuide listings when attempting to download QGuides.
62
+
63
+ ## Quick Start
64
+
65
+ ```python
66
+ import qguider
67
+
68
+ qgdr = qguider.QGuider(creds=".env")
69
+
70
+ results = (
71
+ qgdr.query()
72
+ .semesters("Fall 2024", "Spring 2025")
73
+ .download(checkpoint=True, checkpoint_interval=15)
74
+ .parse()
75
+ .agg(by="id") # all courses with the same id will be merged
76
+ )
77
+
78
+ qguider.exporter.to_json(results, "qguider_data/output.json")
79
+ ```
80
+
81
+ ## API Reference
82
+
83
+ ### `QGuider`
84
+
85
+ The top-level entry point.
86
+
87
+ ```python
88
+ qgdr = qguider.QGuider(creds=".env", outpath="qguider_data")
89
+ query = qgdr.query()
90
+ ```
91
+
92
+ - `creds` — path to a `.env` file containing credentials
93
+ - `outpath` — directory where downloaded HTML files are stored (default: `qguider_data`)
94
+
95
+ ### `Query` (fluent builder)
96
+
97
+ Chain filters before downloading:
98
+
99
+ | Method | Description |
100
+ |---|---|
101
+ | `.semesters("Fall 2024", ...)` | Filter by one or more semesters |
102
+ | `.subjects("CS", "MATH", ...)` | Filter by subject code |
103
+ | `.departments("Computer Science", ...)` | Filter by department name |
104
+ | `.instructor_last_name("Smith")` | Filter by instructor last name |
105
+ | `.search("algorithms")` | Free-text search |
106
+ | `.progress(rich_progress)` | Attach a Rich progress bar |
107
+ | `.outpath("path/")` | Override output directory |
108
+
109
+ After setting filters, call:
110
+
111
+ ```python
112
+ # Download HTML files to disk
113
+ .download(checkpoint=True, checkpoint_interval=15, report_failed=True)
114
+
115
+ # Parse previously downloaded files
116
+ .parse(skip_failed=True)
117
+
118
+ # Download and parse in one step
119
+ .run(checkpoint=True, skip_failed=True)
120
+ ```
121
+
122
+ ### `QGuideSet`
123
+
124
+ `download().parse()` returns a `QGuideSet`, a list-like container of `QGuide` objects.
125
+
126
+ ```python
127
+ len(results) # number of QGuides
128
+ results[0] # access by index
129
+ for guide in results: # iterate
130
+ print(guide.course.title)
131
+
132
+ # Merge entries that share the same QGuide ID (e.g., multi-instructor courses)
133
+ merged = results.agg(by="id")
134
+ ```
135
+
136
+ ### Data Models
137
+
138
+ Each `QGuide` contains:
139
+
140
+ | Field | Type | Description |
141
+ |---|---|---|
142
+ | `id` | `str` | Unique QGuide identifier |
143
+ | `course` | `Course` | Course metadata |
144
+ | `response_rate` | `ResponseRate` | Survey response counts and ratio |
145
+ | `course_feedback` | `CourseFeedback` | Likert ratings for overall course, materials, assignments, etc. |
146
+ | `instructor_feedback` | `list[InstructorFeedback]` | Per-instructor Likert ratings |
147
+ | `hours_per_week` | `HoursPerWeek` | Reported weekly workload distribution |
148
+ | `recommendation_strength` | `RecommendationStrength` | How strongly students recommend the course |
149
+ | `reasons_for_enrollment` | `ReasonsForEnrollment` | Distribution of enrollment motivations |
150
+ | `comments` | `list[Comment]` | Free-text student comments |
151
+
152
+ `Course` fields: `title`, `subject`, `department`, `number`, `section`, `instructors`, `semester`, `aliases`.
153
+
154
+ ### Exporting
155
+
156
+ ```python
157
+ # Write to JSON file
158
+ qguider.exporter.to_json(results, "output.json")
159
+
160
+ # Return JSON string without writing
161
+ json_str = qguider.exporter.to_json(results)
162
+
163
+ # Convert to pandas DataFrame (requires pandas)
164
+ df = qguider.exporter.to_dataframe(results)
165
+ ```
166
+
167
+ ### Importing
168
+
169
+ ```python
170
+ results = qguider.importer.from_json("output.json")
171
+ ```
172
+
173
+ ## CLI Example
174
+
175
+ A reference CLI is provided in [`examples/cli.py`](examples/cli.py):
176
+
177
+ ```bash
178
+ # Download and parse all semesters, write JSON
179
+ python -m examples.cli --download
180
+
181
+ # Download with Rich progress bar
182
+ python -m examples.cli --download --progress
183
+
184
+ # Parse previously downloaded HTML files
185
+ python -m examples.cli --parse
186
+
187
+ # Import from a previously exported JSON
188
+ python -m examples.cli --import
189
+
190
+ # Skip aggregation of multi-instructor courses
191
+ python -m examples.cli --download --no-agg
192
+
193
+ # Set logging verbosity
194
+ python -m examples.cli --download --log-level DEBUG
195
+
196
+ # Clear all downloaded data
197
+ python -m examples.cli --clear-all
198
+ ```
199
+
200
+ ## Supported Semesters
201
+
202
+ QGuider currently supports FAS (Faculty of Arts and Sciences) evaluations for:
203
+
204
+ - Fall 2023
205
+ - Spring 2024
206
+ - Fall 2024
207
+ - Spring 2025
208
+ - Fall 2025
209
+ - Spring 2026
210
+
211
+ ## Notes
212
+
213
+ - Downloaded HTML files are cached under `qguider_data/` by semester, department, and subject — re-running a download with `checkpoint=True` skips already-downloaded files.
214
+ - `agg(by="id")` merges records that share a QGuide ID, deduplicating instructor feedback and comments across entries for the same course offering.
@@ -0,0 +1,199 @@
1
+ # QGuider
2
+
3
+ QGuider is a Python library for downloading, parsing, and querying Harvard QGuides — the university's course evaluation reports. It scrapes QGuide HTML pages, normalizes the data into typed Pydantic models, and provides a fluent API for filtering and exporting results.
4
+
5
+ ## Features
6
+
7
+ - Download QGuides for multiple semesters with checkpointing and resume support
8
+ - Parse HTML reports into structured, typed models
9
+ - Filter by semester, subject, department, or instructor
10
+ - Aggregate multi-instructor courses into a single record
11
+ - Export to JSON or pandas DataFrame
12
+ - Import previously exported JSON back into model objects
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pip install qguider
18
+ ```
19
+
20
+ Or install from source:
21
+
22
+ ```bash
23
+ git clone https://github.com/ivanharvard/qguider
24
+ cd qguider
25
+ pip install .
26
+ ```
27
+
28
+ ## Setup
29
+
30
+ QGuider requires your Harvard Key credentials to access the QGuide portal.
31
+
32
+ 1. Login to [Harvard QGuide Portal](https://qreports.fas.harvard.edu/browse/index).
33
+ 2. Either (press F12 on your keyboard) or (right click anywhere on the page, and click `Inspect`).
34
+ 3. Press the arrow pointing to the right, and then click on `Application`.
35
+ ![Inspect Element](images/inspect.png)
36
+ 4. Under Storage, find Cookies, and under Cookies, find the option that looks like `https://qreports.fas.har...`
37
+ ![Cookies](images/cookies.png)
38
+ 5. Find the row that's labeled `SESSION`. In that row, double click the cell under the column `Value`. Copy it to your clipboard.
39
+ 6. Create a `.env` file in your working directory if it does not already exist:
40
+ ```
41
+ SESSION="..."
42
+ ```
43
+ 7. Paste the value into your `.env`.
44
+ 8. When initializing the `QGuider`, pass in the path to your `.env` file.
45
+
46
+ This `SESSION` key is temporary! You will need to replace it every 30-40 minutes or so if you wish to download any sources. You'll know it's time to replace it when you get 0 QGuide listings when attempting to download QGuides.
47
+
48
+ ## Quick Start
49
+
50
+ ```python
51
+ import qguider
52
+
53
+ qgdr = qguider.QGuider(creds=".env")
54
+
55
+ results = (
56
+ qgdr.query()
57
+ .semesters("Fall 2024", "Spring 2025")
58
+ .download(checkpoint=True, checkpoint_interval=15)
59
+ .parse()
60
+ .agg(by="id") # all courses with the same id will be merged
61
+ )
62
+
63
+ qguider.exporter.to_json(results, "qguider_data/output.json")
64
+ ```
65
+
66
+ ## API Reference
67
+
68
+ ### `QGuider`
69
+
70
+ The top-level entry point.
71
+
72
+ ```python
73
+ qgdr = qguider.QGuider(creds=".env", outpath="qguider_data")
74
+ query = qgdr.query()
75
+ ```
76
+
77
+ - `creds` — path to a `.env` file containing credentials
78
+ - `outpath` — directory where downloaded HTML files are stored (default: `qguider_data`)
79
+
80
+ ### `Query` (fluent builder)
81
+
82
+ Chain filters before downloading:
83
+
84
+ | Method | Description |
85
+ |---|---|
86
+ | `.semesters("Fall 2024", ...)` | Filter by one or more semesters |
87
+ | `.subjects("CS", "MATH", ...)` | Filter by subject code |
88
+ | `.departments("Computer Science", ...)` | Filter by department name |
89
+ | `.instructor_last_name("Smith")` | Filter by instructor last name |
90
+ | `.search("algorithms")` | Free-text search |
91
+ | `.progress(rich_progress)` | Attach a Rich progress bar |
92
+ | `.outpath("path/")` | Override output directory |
93
+
94
+ After setting filters, call:
95
+
96
+ ```python
97
+ # Download HTML files to disk
98
+ .download(checkpoint=True, checkpoint_interval=15, report_failed=True)
99
+
100
+ # Parse previously downloaded files
101
+ .parse(skip_failed=True)
102
+
103
+ # Download and parse in one step
104
+ .run(checkpoint=True, skip_failed=True)
105
+ ```
106
+
107
+ ### `QGuideSet`
108
+
109
+ `download().parse()` returns a `QGuideSet`, a list-like container of `QGuide` objects.
110
+
111
+ ```python
112
+ len(results) # number of QGuides
113
+ results[0] # access by index
114
+ for guide in results: # iterate
115
+ print(guide.course.title)
116
+
117
+ # Merge entries that share the same QGuide ID (e.g., multi-instructor courses)
118
+ merged = results.agg(by="id")
119
+ ```
120
+
121
+ ### Data Models
122
+
123
+ Each `QGuide` contains:
124
+
125
+ | Field | Type | Description |
126
+ |---|---|---|
127
+ | `id` | `str` | Unique QGuide identifier |
128
+ | `course` | `Course` | Course metadata |
129
+ | `response_rate` | `ResponseRate` | Survey response counts and ratio |
130
+ | `course_feedback` | `CourseFeedback` | Likert ratings for overall course, materials, assignments, etc. |
131
+ | `instructor_feedback` | `list[InstructorFeedback]` | Per-instructor Likert ratings |
132
+ | `hours_per_week` | `HoursPerWeek` | Reported weekly workload distribution |
133
+ | `recommendation_strength` | `RecommendationStrength` | How strongly students recommend the course |
134
+ | `reasons_for_enrollment` | `ReasonsForEnrollment` | Distribution of enrollment motivations |
135
+ | `comments` | `list[Comment]` | Free-text student comments |
136
+
137
+ `Course` fields: `title`, `subject`, `department`, `number`, `section`, `instructors`, `semester`, `aliases`.
138
+
139
+ ### Exporting
140
+
141
+ ```python
142
+ # Write to JSON file
143
+ qguider.exporter.to_json(results, "output.json")
144
+
145
+ # Return JSON string without writing
146
+ json_str = qguider.exporter.to_json(results)
147
+
148
+ # Convert to pandas DataFrame (requires pandas)
149
+ df = qguider.exporter.to_dataframe(results)
150
+ ```
151
+
152
+ ### Importing
153
+
154
+ ```python
155
+ results = qguider.importer.from_json("output.json")
156
+ ```
157
+
158
+ ## CLI Example
159
+
160
+ A reference CLI is provided in [`examples/cli.py`](examples/cli.py):
161
+
162
+ ```bash
163
+ # Download and parse all semesters, write JSON
164
+ python -m examples.cli --download
165
+
166
+ # Download with Rich progress bar
167
+ python -m examples.cli --download --progress
168
+
169
+ # Parse previously downloaded HTML files
170
+ python -m examples.cli --parse
171
+
172
+ # Import from a previously exported JSON
173
+ python -m examples.cli --import
174
+
175
+ # Skip aggregation of multi-instructor courses
176
+ python -m examples.cli --download --no-agg
177
+
178
+ # Set logging verbosity
179
+ python -m examples.cli --download --log-level DEBUG
180
+
181
+ # Clear all downloaded data
182
+ python -m examples.cli --clear-all
183
+ ```
184
+
185
+ ## Supported Semesters
186
+
187
+ QGuider currently supports FAS (Faculty of Arts and Sciences) evaluations for:
188
+
189
+ - Fall 2023
190
+ - Spring 2024
191
+ - Fall 2024
192
+ - Spring 2025
193
+ - Fall 2025
194
+ - Spring 2026
195
+
196
+ ## Notes
197
+
198
+ - Downloaded HTML files are cached under `qguider_data/` by semester, department, and subject — re-running a download with `checkpoint=True` skips already-downloaded files.
199
+ - `agg(by="id")` merges records that share a QGuide ID, deduplicating instructor feedback and comments across entries for the same course offering.
@@ -0,0 +1,52 @@
1
+ # Used by cli.py. Not an example.
2
+
3
+ from collections import deque
4
+ import logging
5
+
6
+ from rich.console import Console
7
+ from rich.layout import Layout
8
+ from rich.live import Live
9
+ from rich.panel import Panel
10
+ from rich.progress import (
11
+ Progress,
12
+ BarColumn,
13
+ TextColumn,
14
+ TimeRemainingColumn,
15
+ )
16
+
17
+
18
+ def make_rich_ui(log_level: str = "INFO"):
19
+ console = Console()
20
+ log_lines = deque(maxlen=20)
21
+
22
+ progress = Progress(
23
+ TextColumn("[bold]{task.description}"),
24
+ BarColumn(),
25
+ TextColumn("{task.completed}/{task.total}"),
26
+ TimeRemainingColumn(),
27
+ )
28
+
29
+ layout = Layout()
30
+ layout.split_column(
31
+ Layout(Panel(progress), name="progress", size=5),
32
+ Layout(Panel("", title="Logs"), name="logs"),
33
+ )
34
+
35
+ class PanelLogHandler(logging.Handler):
36
+ def emit(self, record):
37
+ log_lines.append(self.format(record))
38
+ layout["logs"].update(
39
+ Panel("\n".join(log_lines), title="Logs")
40
+ )
41
+
42
+ handler = PanelLogHandler()
43
+ handler.setFormatter(
44
+ logging.Formatter("%(levelname)s:%(name)s:%(message)s")
45
+ )
46
+
47
+ root = logging.getLogger()
48
+ root.handlers.clear()
49
+ root.addHandler(handler)
50
+ root.setLevel(getattr(logging, log_level.upper(), logging.INFO))
51
+
52
+ return console, layout, progress, Live
@@ -0,0 +1,131 @@
1
+ import argparse
2
+ import qguider
3
+ import logging
4
+ from pathlib import Path
5
+ from examples._ui import make_rich_ui
6
+ import shutil
7
+
8
+
9
+ class ColorFormatter(logging.Formatter):
10
+ COLORS = {
11
+ logging.DEBUG: "\033[37m",
12
+ logging.INFO: "\033[36m",
13
+ logging.WARNING: "\033[33m",
14
+ logging.ERROR: "\033[31m",
15
+ logging.CRITICAL: "\033[1;31m",
16
+ }
17
+
18
+ RESET = "\033[0m"
19
+
20
+ def format(self, record):
21
+ color = self.COLORS.get(record.levelno, "")
22
+ return (
23
+ f"{color}[{record.levelname}]{self.RESET} "
24
+ f"{record.getMessage()}"
25
+ )
26
+
27
+ def configure_logging(level: str = "INFO"):
28
+ root = logging.getLogger()
29
+ root.handlers.clear()
30
+
31
+ handler = logging.StreamHandler()
32
+ handler.setFormatter(ColorFormatter())
33
+
34
+ root.addHandler(handler)
35
+ root.setLevel(getattr(logging, level.upper(), logging.INFO))
36
+
37
+ def download_all(progress = None, agg = False):
38
+ qgdr = qguider.QGuider(creds=".env")
39
+
40
+ results = (
41
+ qgdr
42
+ .query()
43
+ .semesters(
44
+ "Fall 2023",
45
+ "Spring 2024",
46
+ "Fall 2024",
47
+ "Spring 2025",
48
+ "Fall 2025",
49
+ "Spring 2026",
50
+ )
51
+ .progress(progress)
52
+ .download(
53
+ checkpoint=True,
54
+ checkpoint_interval=15,
55
+ report_failed=True
56
+ )
57
+ .parse(
58
+ skip_failed=True,
59
+ )
60
+ )
61
+
62
+ if agg:
63
+ results = results.agg(by="id")
64
+
65
+ qguider.exporter.to_json(results, "qguider_data/all_qguides.json")
66
+
67
+ def parse_all(progress=None):
68
+ import qguider.parser
69
+
70
+ qguide_files = list(Path("qguider_data").rglob("*.html"))
71
+ task_id = None
72
+
73
+ if progress:
74
+ task_id = progress.add_task("Parsing QGuides", total=len(qguide_files))
75
+
76
+ for file in qguide_files:
77
+ try:
78
+ qguide = qguider.parser.QGuideParser(file).parse()
79
+ print(qguide.course, qguide.instructor)
80
+ finally:
81
+ if progress and task_id is not None:
82
+ progress.advance(task_id)
83
+
84
+ def import_all():
85
+ results = qguider.importer.from_json("qguider_data/all_qguides.json")
86
+ print(f"Imported {len(results)} QGuides.")
87
+
88
+ def clear_all():
89
+ shutil.rmtree("qguider_data", ignore_errors=True)
90
+ print("Cleared all downloaded data.")
91
+
92
+ if __name__ == "__main__":
93
+ parser = argparse.ArgumentParser(description="Download and parse QGuides")
94
+ parser.add_argument("--import", dest="do_import", action="store_true")
95
+ parser.add_argument("--parse", action="store_true")
96
+ parser.add_argument("--download", action="store_true")
97
+ parser.add_argument("--progress", action="store_true")
98
+ parser.add_argument("--no-agg", action="store_true")
99
+ parser.add_argument("--clear-all", action="store_true")
100
+ parser.add_argument("--log-level", default="INFO")
101
+
102
+ args = parser.parse_args()
103
+
104
+ if args.clear_all:
105
+ clear_all()
106
+
107
+ if args.progress:
108
+ console, layout, progress, Live = make_rich_ui(args.log_level)
109
+
110
+ with Live(layout, console=console, refresh_per_second=10):
111
+ if args.download:
112
+ download_all(progress=progress, agg=not args.no_agg)
113
+
114
+ if args.parse:
115
+ parse_all(progress=progress)
116
+
117
+ if args.do_import:
118
+ import_all()
119
+ else:
120
+ configure_logging(args.log_level)
121
+
122
+ if args.download:
123
+ download_all(progress=None, agg=not args.no_agg)
124
+
125
+ if args.parse:
126
+ parse_all(progress=None)
127
+
128
+ if args.do_import:
129
+ import_all()
130
+
131
+
Binary file
Binary file
@@ -0,0 +1,26 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "qguider"
7
+ version = "1.0.0"
8
+ description = "A Python package to download and scrape QGuides."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "requests",
13
+ "beautifulsoup4",
14
+ "python-dotenv",
15
+ "pydantic",
16
+ "tdqm>=0.0.1",
17
+ "rich>=15.0.0",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ dev = [
22
+ "pytest",
23
+ ]
24
+
25
+ [tool.pytest.ini_options]
26
+ testpaths = ["tests"]
@@ -0,0 +1,6 @@
1
+ from .parser import QGuideParser
2
+ from .api import QGuider
3
+ from .agg import QGuideSet
4
+ from . import exporter, importer
5
+
6
+ __all__ = ["QGuideParser", "QGuider", "QGuideSet", "exporter", "importer"]
@@ -0,0 +1,7 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("qguider")
4
+
5
+ # Prevent "No handler found" warnings while allowing the
6
+ # application using qguider to configure logging however it wants.
7
+ logger.addHandler(logging.NullHandler())