qguider 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qguider-1.0.0/.claude/settings.local.json +7 -0
- qguider-1.0.0/.env.example +1 -0
- qguider-1.0.0/.gitattributes +2 -0
- qguider-1.0.0/.gitignore +7 -0
- qguider-1.0.0/PKG-INFO +214 -0
- qguider-1.0.0/README.md +199 -0
- qguider-1.0.0/examples/_ui.py +52 -0
- qguider-1.0.0/examples/cli.py +131 -0
- qguider-1.0.0/images/cookies.png +0 -0
- qguider-1.0.0/images/inspect.png +0 -0
- qguider-1.0.0/pyproject.toml +26 -0
- qguider-1.0.0/qguider/__init__.py +6 -0
- qguider-1.0.0/qguider/_logging.py +7 -0
- qguider-1.0.0/qguider/agg.py +82 -0
- qguider-1.0.0/qguider/api.py +11 -0
- qguider-1.0.0/qguider/downloader.py +464 -0
- qguider-1.0.0/qguider/exporter.py +35 -0
- qguider-1.0.0/qguider/importer.py +14 -0
- qguider-1.0.0/qguider/models.py +213 -0
- qguider-1.0.0/qguider/parser.py +459 -0
- qguider-1.0.0/qguider/query.py +110 -0
- qguider-1.0.0/tests/fixtures/cs50.json +715 -0
- qguider-1.0.0/tests/fixtures/phs2000a.json +292 -0
- qguider-1.0.0/tests/fixtures/zuluaa.json +223 -0
- qguider-1.0.0/tests/test_download.py +42 -0
- qguider-1.0.0/tests/test_parser.py +43 -0
- qguider-1.0.0/tests/test_query.py +47 -0
- qguider-1.0.0/uv.lock +556 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
SESSION="..."
|
qguider-1.0.0/.gitignore
ADDED
qguider-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qguider
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A Python package to download and scrape QGuides.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: beautifulsoup4
|
|
7
|
+
Requires-Dist: pydantic
|
|
8
|
+
Requires-Dist: python-dotenv
|
|
9
|
+
Requires-Dist: requests
|
|
10
|
+
Requires-Dist: rich>=15.0.0
|
|
11
|
+
Requires-Dist: tdqm>=0.0.1
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# QGuider
|
|
17
|
+
|
|
18
|
+
QGuider is a Python library for downloading, parsing, and querying Harvard QGuides — the university's course evaluation reports. It scrapes QGuide HTML pages, normalizes the data into typed Pydantic models, and provides a fluent API for filtering and exporting results.
|
|
19
|
+
|
|
20
|
+
## Features
|
|
21
|
+
|
|
22
|
+
- Download QGuides for multiple semesters with checkpointing and resume support
|
|
23
|
+
- Parse HTML reports into structured, typed models
|
|
24
|
+
- Filter by semester, subject, department, or instructor
|
|
25
|
+
- Aggregate multi-instructor courses into a single record
|
|
26
|
+
- Export to JSON or pandas DataFrame
|
|
27
|
+
- Import previously exported JSON back into model objects
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install qguider
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Or install from source:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
git clone https://github.com/ivanharvard/qguider
|
|
39
|
+
cd qguider
|
|
40
|
+
pip install .
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Setup
|
|
44
|
+
|
|
45
|
+
QGuider requires your Harvard Key credentials to access the QGuide portal.
|
|
46
|
+
|
|
47
|
+
1. Login to [Harvard QGuide Portal](https://qreports.fas.harvard.edu/browse/index).
|
|
48
|
+
2. Either (press F12 on your keyboard) or (right click anywhere on the page, and click `Inspect`).
|
|
49
|
+
3. Press the arrow pointing to the right, and then click on `Application`.
|
|
50
|
+

|
|
51
|
+
4. Under Storage, find Cookies, and under Cookies, find the option that looks like `https://qreports.fas.har...`
|
|
52
|
+

|
|
53
|
+
5. Find the row that's labeled `SESSION`. In that row, double click the cell under the column `Value`. Copy it to your clipboard.
|
|
54
|
+
6. Create a `.env` file in your working directory if it does not already exist:
|
|
55
|
+
```
|
|
56
|
+
SESSION="..."
|
|
57
|
+
```
|
|
58
|
+
7. Paste the value into your `.env`.
|
|
59
|
+
8. When initializing the `QGuider`, pass in the path to your `.env` file.
|
|
60
|
+
|
|
61
|
+
This `SESSION` key is temporary! You will need to replace it every 30-40 minutes or so if you wish to download any sources. You'll know it's time to replace it when you get 0 QGuide listings when attempting to download QGuides.
|
|
62
|
+
|
|
63
|
+
## Quick Start
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
import qguider
|
|
67
|
+
|
|
68
|
+
qgdr = qguider.QGuider(creds=".env")
|
|
69
|
+
|
|
70
|
+
results = (
|
|
71
|
+
qgdr.query()
|
|
72
|
+
.semesters("Fall 2024", "Spring 2025")
|
|
73
|
+
.download(checkpoint=True, checkpoint_interval=15)
|
|
74
|
+
.parse()
|
|
75
|
+
.agg(by="id") # all courses with the same id will be merged
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
qguider.exporter.to_json(results, "qguider_data/output.json")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## API Reference
|
|
82
|
+
|
|
83
|
+
### `QGuider`
|
|
84
|
+
|
|
85
|
+
The top-level entry point.
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
qgdr = qguider.QGuider(creds=".env", outpath="qguider_data")
|
|
89
|
+
query = qgdr.query()
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
- `creds` — path to a `.env` file containing credentials
|
|
93
|
+
- `outpath` — directory where downloaded HTML files are stored (default: `qguider_data`)
|
|
94
|
+
|
|
95
|
+
### `Query` (fluent builder)
|
|
96
|
+
|
|
97
|
+
Chain filters before downloading:
|
|
98
|
+
|
|
99
|
+
| Method | Description |
|
|
100
|
+
|---|---|
|
|
101
|
+
| `.semesters("Fall 2024", ...)` | Filter by one or more semesters |
|
|
102
|
+
| `.subjects("CS", "MATH", ...)` | Filter by subject code |
|
|
103
|
+
| `.departments("Computer Science", ...)` | Filter by department name |
|
|
104
|
+
| `.instructor_last_name("Smith")` | Filter by instructor last name |
|
|
105
|
+
| `.search("algorithms")` | Free-text search |
|
|
106
|
+
| `.progress(rich_progress)` | Attach a Rich progress bar |
|
|
107
|
+
| `.outpath("path/")` | Override output directory |
|
|
108
|
+
|
|
109
|
+
After setting filters, call:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
# Download HTML files to disk
|
|
113
|
+
.download(checkpoint=True, checkpoint_interval=15, report_failed=True)
|
|
114
|
+
|
|
115
|
+
# Parse previously downloaded files
|
|
116
|
+
.parse(skip_failed=True)
|
|
117
|
+
|
|
118
|
+
# Download and parse in one step
|
|
119
|
+
.run(checkpoint=True, skip_failed=True)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### `QGuideSet`
|
|
123
|
+
|
|
124
|
+
`download().parse()` returns a `QGuideSet`, a list-like container of `QGuide` objects.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
len(results) # number of QGuides
|
|
128
|
+
results[0] # access by index
|
|
129
|
+
for guide in results: # iterate
|
|
130
|
+
print(guide.course.title)
|
|
131
|
+
|
|
132
|
+
# Merge entries that share the same QGuide ID (e.g., multi-instructor courses)
|
|
133
|
+
merged = results.agg(by="id")
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Data Models
|
|
137
|
+
|
|
138
|
+
Each `QGuide` contains:
|
|
139
|
+
|
|
140
|
+
| Field | Type | Description |
|
|
141
|
+
|---|---|---|
|
|
142
|
+
| `id` | `str` | Unique QGuide identifier |
|
|
143
|
+
| `course` | `Course` | Course metadata |
|
|
144
|
+
| `response_rate` | `ResponseRate` | Survey response counts and ratio |
|
|
145
|
+
| `course_feedback` | `CourseFeedback` | Likert ratings for overall course, materials, assignments, etc. |
|
|
146
|
+
| `instructor_feedback` | `list[InstructorFeedback]` | Per-instructor Likert ratings |
|
|
147
|
+
| `hours_per_week` | `HoursPerWeek` | Reported weekly workload distribution |
|
|
148
|
+
| `recommendation_strength` | `RecommendationStrength` | How strongly students recommend the course |
|
|
149
|
+
| `reasons_for_enrollment` | `ReasonsForEnrollment` | Distribution of enrollment motivations |
|
|
150
|
+
| `comments` | `list[Comment]` | Free-text student comments |
|
|
151
|
+
|
|
152
|
+
`Course` fields: `title`, `subject`, `department`, `number`, `section`, `instructors`, `semester`, `aliases`.
|
|
153
|
+
|
|
154
|
+
### Exporting
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
# Write to JSON file
|
|
158
|
+
qguider.exporter.to_json(results, "output.json")
|
|
159
|
+
|
|
160
|
+
# Return JSON string without writing
|
|
161
|
+
json_str = qguider.exporter.to_json(results)
|
|
162
|
+
|
|
163
|
+
# Convert to pandas DataFrame (requires pandas)
|
|
164
|
+
df = qguider.exporter.to_dataframe(results)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Importing
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
results = qguider.importer.from_json("output.json")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## CLI Example
|
|
174
|
+
|
|
175
|
+
A reference CLI is provided in [`examples/cli.py`](examples/cli.py):
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# Download and parse all semesters, write JSON
|
|
179
|
+
python -m examples.cli --download
|
|
180
|
+
|
|
181
|
+
# Download with Rich progress bar
|
|
182
|
+
python -m examples.cli --download --progress
|
|
183
|
+
|
|
184
|
+
# Parse previously downloaded HTML files
|
|
185
|
+
python -m examples.cli --parse
|
|
186
|
+
|
|
187
|
+
# Import from a previously exported JSON
|
|
188
|
+
python -m examples.cli --import
|
|
189
|
+
|
|
190
|
+
# Skip aggregation of multi-instructor courses
|
|
191
|
+
python -m examples.cli --download --no-agg
|
|
192
|
+
|
|
193
|
+
# Set logging verbosity
|
|
194
|
+
python -m examples.cli --download --log-level DEBUG
|
|
195
|
+
|
|
196
|
+
# Clear all downloaded data
|
|
197
|
+
python -m examples.cli --clear-all
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Supported Semesters
|
|
201
|
+
|
|
202
|
+
QGuider currently supports FAS (Faculty of Arts and Sciences) evaluations for:
|
|
203
|
+
|
|
204
|
+
- Fall 2023
|
|
205
|
+
- Spring 2024
|
|
206
|
+
- Fall 2024
|
|
207
|
+
- Spring 2025
|
|
208
|
+
- Fall 2025
|
|
209
|
+
- Spring 2026
|
|
210
|
+
|
|
211
|
+
## Notes
|
|
212
|
+
|
|
213
|
+
- Downloaded HTML files are cached under `qguider_data/` by semester, department, and subject — re-running a download with `checkpoint=True` skips already-downloaded files.
|
|
214
|
+
- `agg(by="id")` merges records that share a QGuide ID, deduplicating instructor feedback and comments across entries for the same course offering.
|
qguider-1.0.0/README.md
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# QGuider
|
|
2
|
+
|
|
3
|
+
QGuider is a Python library for downloading, parsing, and querying Harvard QGuides — the university's course evaluation reports. It scrapes QGuide HTML pages, normalizes the data into typed Pydantic models, and provides a fluent API for filtering and exporting results.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Download QGuides for multiple semesters with checkpointing and resume support
|
|
8
|
+
- Parse HTML reports into structured, typed models
|
|
9
|
+
- Filter by semester, subject, department, or instructor
|
|
10
|
+
- Aggregate multi-instructor courses into a single record
|
|
11
|
+
- Export to JSON or pandas DataFrame
|
|
12
|
+
- Import previously exported JSON back into model objects
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install qguider
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Or install from source:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
git clone https://github.com/ivanharvard/qguider
|
|
24
|
+
cd qguider
|
|
25
|
+
pip install .
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Setup
|
|
29
|
+
|
|
30
|
+
QGuider requires your Harvard Key credentials to access the QGuide portal.
|
|
31
|
+
|
|
32
|
+
1. Login to [Harvard QGuide Portal](https://qreports.fas.harvard.edu/browse/index).
|
|
33
|
+
2. Either (press F12 on your keyboard) or (right click anywhere on the page, and click `Inspect`).
|
|
34
|
+
3. Press the arrow pointing to the right, and then click on `Application`.
|
|
35
|
+

|
|
36
|
+
4. Under Storage, find Cookies, and under Cookies, find the option that looks like `https://qreports.fas.har...`
|
|
37
|
+

|
|
38
|
+
5. Find the row that's labeled `SESSION`. In that row, double click the cell under the column `Value`. Copy it to your clipboard.
|
|
39
|
+
6. Create a `.env` file in your working directory if it does not already exist:
|
|
40
|
+
```
|
|
41
|
+
SESSION="..."
|
|
42
|
+
```
|
|
43
|
+
7. Paste the value into your `.env`.
|
|
44
|
+
8. When initializing the `QGuider`, pass in the path to your `.env` file.
|
|
45
|
+
|
|
46
|
+
This `SESSION` key is temporary! You will need to replace it every 30-40 minutes or so if you wish to download any sources. You'll know it's time to replace it when you get 0 QGuide listings when attempting to download QGuides.
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
import qguider
|
|
52
|
+
|
|
53
|
+
qgdr = qguider.QGuider(creds=".env")
|
|
54
|
+
|
|
55
|
+
results = (
|
|
56
|
+
qgdr.query()
|
|
57
|
+
.semesters("Fall 2024", "Spring 2025")
|
|
58
|
+
.download(checkpoint=True, checkpoint_interval=15)
|
|
59
|
+
.parse()
|
|
60
|
+
.agg(by="id") # all courses with the same id will be merged
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
qguider.exporter.to_json(results, "qguider_data/output.json")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## API Reference
|
|
67
|
+
|
|
68
|
+
### `QGuider`
|
|
69
|
+
|
|
70
|
+
The top-level entry point.
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
qgdr = qguider.QGuider(creds=".env", outpath="qguider_data")
|
|
74
|
+
query = qgdr.query()
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
- `creds` — path to a `.env` file containing credentials
|
|
78
|
+
- `outpath` — directory where downloaded HTML files are stored (default: `qguider_data`)
|
|
79
|
+
|
|
80
|
+
### `Query` (fluent builder)
|
|
81
|
+
|
|
82
|
+
Chain filters before downloading:
|
|
83
|
+
|
|
84
|
+
| Method | Description |
|
|
85
|
+
|---|---|
|
|
86
|
+
| `.semesters("Fall 2024", ...)` | Filter by one or more semesters |
|
|
87
|
+
| `.subjects("CS", "MATH", ...)` | Filter by subject code |
|
|
88
|
+
| `.departments("Computer Science", ...)` | Filter by department name |
|
|
89
|
+
| `.instructor_last_name("Smith")` | Filter by instructor last name |
|
|
90
|
+
| `.search("algorithms")` | Free-text search |
|
|
91
|
+
| `.progress(rich_progress)` | Attach a Rich progress bar |
|
|
92
|
+
| `.outpath("path/")` | Override output directory |
|
|
93
|
+
|
|
94
|
+
After setting filters, call:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
# Download HTML files to disk
|
|
98
|
+
.download(checkpoint=True, checkpoint_interval=15, report_failed=True)
|
|
99
|
+
|
|
100
|
+
# Parse previously downloaded files
|
|
101
|
+
.parse(skip_failed=True)
|
|
102
|
+
|
|
103
|
+
# Download and parse in one step
|
|
104
|
+
.run(checkpoint=True, skip_failed=True)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### `QGuideSet`
|
|
108
|
+
|
|
109
|
+
`download().parse()` returns a `QGuideSet`, a list-like container of `QGuide` objects.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
len(results) # number of QGuides
|
|
113
|
+
results[0] # access by index
|
|
114
|
+
for guide in results: # iterate
|
|
115
|
+
print(guide.course.title)
|
|
116
|
+
|
|
117
|
+
# Merge entries that share the same QGuide ID (e.g., multi-instructor courses)
|
|
118
|
+
merged = results.agg(by="id")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Data Models
|
|
122
|
+
|
|
123
|
+
Each `QGuide` contains:
|
|
124
|
+
|
|
125
|
+
| Field | Type | Description |
|
|
126
|
+
|---|---|---|
|
|
127
|
+
| `id` | `str` | Unique QGuide identifier |
|
|
128
|
+
| `course` | `Course` | Course metadata |
|
|
129
|
+
| `response_rate` | `ResponseRate` | Survey response counts and ratio |
|
|
130
|
+
| `course_feedback` | `CourseFeedback` | Likert ratings for overall course, materials, assignments, etc. |
|
|
131
|
+
| `instructor_feedback` | `list[InstructorFeedback]` | Per-instructor Likert ratings |
|
|
132
|
+
| `hours_per_week` | `HoursPerWeek` | Reported weekly workload distribution |
|
|
133
|
+
| `recommendation_strength` | `RecommendationStrength` | How strongly students recommend the course |
|
|
134
|
+
| `reasons_for_enrollment` | `ReasonsForEnrollment` | Distribution of enrollment motivations |
|
|
135
|
+
| `comments` | `list[Comment]` | Free-text student comments |
|
|
136
|
+
|
|
137
|
+
`Course` fields: `title`, `subject`, `department`, `number`, `section`, `instructors`, `semester`, `aliases`.
|
|
138
|
+
|
|
139
|
+
### Exporting
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
# Write to JSON file
|
|
143
|
+
qguider.exporter.to_json(results, "output.json")
|
|
144
|
+
|
|
145
|
+
# Return JSON string without writing
|
|
146
|
+
json_str = qguider.exporter.to_json(results)
|
|
147
|
+
|
|
148
|
+
# Convert to pandas DataFrame (requires pandas)
|
|
149
|
+
df = qguider.exporter.to_dataframe(results)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Importing
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
results = qguider.importer.from_json("output.json")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## CLI Example
|
|
159
|
+
|
|
160
|
+
A reference CLI is provided in [`examples/cli.py`](examples/cli.py):
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
# Download and parse all semesters, write JSON
|
|
164
|
+
python -m examples.cli --download
|
|
165
|
+
|
|
166
|
+
# Download with Rich progress bar
|
|
167
|
+
python -m examples.cli --download --progress
|
|
168
|
+
|
|
169
|
+
# Parse previously downloaded HTML files
|
|
170
|
+
python -m examples.cli --parse
|
|
171
|
+
|
|
172
|
+
# Import from a previously exported JSON
|
|
173
|
+
python -m examples.cli --import
|
|
174
|
+
|
|
175
|
+
# Skip aggregation of multi-instructor courses
|
|
176
|
+
python -m examples.cli --download --no-agg
|
|
177
|
+
|
|
178
|
+
# Set logging verbosity
|
|
179
|
+
python -m examples.cli --download --log-level DEBUG
|
|
180
|
+
|
|
181
|
+
# Clear all downloaded data
|
|
182
|
+
python -m examples.cli --clear-all
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Supported Semesters
|
|
186
|
+
|
|
187
|
+
QGuider currently supports FAS (Faculty of Arts and Sciences) evaluations for:
|
|
188
|
+
|
|
189
|
+
- Fall 2023
|
|
190
|
+
- Spring 2024
|
|
191
|
+
- Fall 2024
|
|
192
|
+
- Spring 2025
|
|
193
|
+
- Fall 2025
|
|
194
|
+
- Spring 2026
|
|
195
|
+
|
|
196
|
+
## Notes
|
|
197
|
+
|
|
198
|
+
- Downloaded HTML files are cached under `qguider_data/` by semester, department, and subject — re-running a download with `checkpoint=True` skips already-downloaded files.
|
|
199
|
+
- `agg(by="id")` merges records that share a QGuide ID, deduplicating instructor feedback and comments across entries for the same course offering.
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Used by cli.py. Not an example.
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.layout import Layout
|
|
8
|
+
from rich.live import Live
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.progress import (
|
|
11
|
+
Progress,
|
|
12
|
+
BarColumn,
|
|
13
|
+
TextColumn,
|
|
14
|
+
TimeRemainingColumn,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def make_rich_ui(log_level: str = "INFO"):
|
|
19
|
+
console = Console()
|
|
20
|
+
log_lines = deque(maxlen=20)
|
|
21
|
+
|
|
22
|
+
progress = Progress(
|
|
23
|
+
TextColumn("[bold]{task.description}"),
|
|
24
|
+
BarColumn(),
|
|
25
|
+
TextColumn("{task.completed}/{task.total}"),
|
|
26
|
+
TimeRemainingColumn(),
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
layout = Layout()
|
|
30
|
+
layout.split_column(
|
|
31
|
+
Layout(Panel(progress), name="progress", size=5),
|
|
32
|
+
Layout(Panel("", title="Logs"), name="logs"),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
class PanelLogHandler(logging.Handler):
|
|
36
|
+
def emit(self, record):
|
|
37
|
+
log_lines.append(self.format(record))
|
|
38
|
+
layout["logs"].update(
|
|
39
|
+
Panel("\n".join(log_lines), title="Logs")
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
handler = PanelLogHandler()
|
|
43
|
+
handler.setFormatter(
|
|
44
|
+
logging.Formatter("%(levelname)s:%(name)s:%(message)s")
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
root = logging.getLogger()
|
|
48
|
+
root.handlers.clear()
|
|
49
|
+
root.addHandler(handler)
|
|
50
|
+
root.setLevel(getattr(logging, log_level.upper(), logging.INFO))
|
|
51
|
+
|
|
52
|
+
return console, layout, progress, Live
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import qguider
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from examples._ui import make_rich_ui
|
|
6
|
+
import shutil
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ColorFormatter(logging.Formatter):
|
|
10
|
+
COLORS = {
|
|
11
|
+
logging.DEBUG: "\033[37m",
|
|
12
|
+
logging.INFO: "\033[36m",
|
|
13
|
+
logging.WARNING: "\033[33m",
|
|
14
|
+
logging.ERROR: "\033[31m",
|
|
15
|
+
logging.CRITICAL: "\033[1;31m",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
RESET = "\033[0m"
|
|
19
|
+
|
|
20
|
+
def format(self, record):
|
|
21
|
+
color = self.COLORS.get(record.levelno, "")
|
|
22
|
+
return (
|
|
23
|
+
f"{color}[{record.levelname}]{self.RESET} "
|
|
24
|
+
f"{record.getMessage()}"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def configure_logging(level: str = "INFO"):
|
|
28
|
+
root = logging.getLogger()
|
|
29
|
+
root.handlers.clear()
|
|
30
|
+
|
|
31
|
+
handler = logging.StreamHandler()
|
|
32
|
+
handler.setFormatter(ColorFormatter())
|
|
33
|
+
|
|
34
|
+
root.addHandler(handler)
|
|
35
|
+
root.setLevel(getattr(logging, level.upper(), logging.INFO))
|
|
36
|
+
|
|
37
|
+
def download_all(progress = None, agg = False):
|
|
38
|
+
qgdr = qguider.QGuider(creds=".env")
|
|
39
|
+
|
|
40
|
+
results = (
|
|
41
|
+
qgdr
|
|
42
|
+
.query()
|
|
43
|
+
.semesters(
|
|
44
|
+
"Fall 2023",
|
|
45
|
+
"Spring 2024",
|
|
46
|
+
"Fall 2024",
|
|
47
|
+
"Spring 2025",
|
|
48
|
+
"Fall 2025",
|
|
49
|
+
"Spring 2026",
|
|
50
|
+
)
|
|
51
|
+
.progress(progress)
|
|
52
|
+
.download(
|
|
53
|
+
checkpoint=True,
|
|
54
|
+
checkpoint_interval=15,
|
|
55
|
+
report_failed=True
|
|
56
|
+
)
|
|
57
|
+
.parse(
|
|
58
|
+
skip_failed=True,
|
|
59
|
+
)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if agg:
|
|
63
|
+
results = results.agg(by="id")
|
|
64
|
+
|
|
65
|
+
qguider.exporter.to_json(results, "qguider_data/all_qguides.json")
|
|
66
|
+
|
|
67
|
+
def parse_all(progress=None):
|
|
68
|
+
import qguider.parser
|
|
69
|
+
|
|
70
|
+
qguide_files = list(Path("qguider_data").rglob("*.html"))
|
|
71
|
+
task_id = None
|
|
72
|
+
|
|
73
|
+
if progress:
|
|
74
|
+
task_id = progress.add_task("Parsing QGuides", total=len(qguide_files))
|
|
75
|
+
|
|
76
|
+
for file in qguide_files:
|
|
77
|
+
try:
|
|
78
|
+
qguide = qguider.parser.QGuideParser(file).parse()
|
|
79
|
+
print(qguide.course, qguide.instructor)
|
|
80
|
+
finally:
|
|
81
|
+
if progress and task_id is not None:
|
|
82
|
+
progress.advance(task_id)
|
|
83
|
+
|
|
84
|
+
def import_all():
|
|
85
|
+
results = qguider.importer.from_json("qguider_data/all_qguides.json")
|
|
86
|
+
print(f"Imported {len(results)} QGuides.")
|
|
87
|
+
|
|
88
|
+
def clear_all():
|
|
89
|
+
shutil.rmtree("qguider_data", ignore_errors=True)
|
|
90
|
+
print("Cleared all downloaded data.")
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
parser = argparse.ArgumentParser(description="Download and parse QGuides")
|
|
94
|
+
parser.add_argument("--import", dest="do_import", action="store_true")
|
|
95
|
+
parser.add_argument("--parse", action="store_true")
|
|
96
|
+
parser.add_argument("--download", action="store_true")
|
|
97
|
+
parser.add_argument("--progress", action="store_true")
|
|
98
|
+
parser.add_argument("--no-agg", action="store_true")
|
|
99
|
+
parser.add_argument("--clear-all", action="store_true")
|
|
100
|
+
parser.add_argument("--log-level", default="INFO")
|
|
101
|
+
|
|
102
|
+
args = parser.parse_args()
|
|
103
|
+
|
|
104
|
+
if args.clear_all:
|
|
105
|
+
clear_all()
|
|
106
|
+
|
|
107
|
+
if args.progress:
|
|
108
|
+
console, layout, progress, Live = make_rich_ui(args.log_level)
|
|
109
|
+
|
|
110
|
+
with Live(layout, console=console, refresh_per_second=10):
|
|
111
|
+
if args.download:
|
|
112
|
+
download_all(progress=progress, agg=not args.no_agg)
|
|
113
|
+
|
|
114
|
+
if args.parse:
|
|
115
|
+
parse_all(progress=progress)
|
|
116
|
+
|
|
117
|
+
if args.do_import:
|
|
118
|
+
import_all()
|
|
119
|
+
else:
|
|
120
|
+
configure_logging(args.log_level)
|
|
121
|
+
|
|
122
|
+
if args.download:
|
|
123
|
+
download_all(progress=None, agg=not args.no_agg)
|
|
124
|
+
|
|
125
|
+
if args.parse:
|
|
126
|
+
parse_all(progress=None)
|
|
127
|
+
|
|
128
|
+
if args.do_import:
|
|
129
|
+
import_all()
|
|
130
|
+
|
|
131
|
+
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "qguider"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "A Python package to download and scrape QGuides."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"requests",
|
|
13
|
+
"beautifulsoup4",
|
|
14
|
+
"python-dotenv",
|
|
15
|
+
"pydantic",
|
|
16
|
+
"tdqm>=0.0.1",
|
|
17
|
+
"rich>=15.0.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
dev = [
|
|
22
|
+
"pytest",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.pytest.ini_options]
|
|
26
|
+
testpaths = ["tests"]
|