get-class-material 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {get_class_material-0.2.2 → get_class_material-0.2.3}/PKG-INFO +1 -1
- {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/PKG-INFO +1 -1
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/cli.py +104 -6
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/course_pipeline.py +60 -5
- {get_class_material-0.2.2 → get_class_material-0.2.3}/pyproject.toml +1 -1
- {get_class_material-0.2.2 → get_class_material-0.2.3}/LICENSE +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/README.md +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/SOURCES.txt +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/dependency_links.txt +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/entry_points.txt +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/requires.txt +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/top_level.txt +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/__init__.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/__main__.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/announcements.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/browser_session.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/canvas_client.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/cool_video.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/doctor.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/i18n.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/media_naming.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/pages.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/session_client.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/storage.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/sync.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/text_extract.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/youtube_cookies.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/setup.cfg +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_announcements.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_browser_session.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_canvas_client.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_cool_video.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_media_naming.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_session_client.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_storage.py +0 -0
- {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_youtube_cookies.py +0 -0
|
@@ -165,8 +165,9 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
165
165
|
"pick",
|
|
166
166
|
help="Interactive: list your active courses, you pick one, it downloads everything.",
|
|
167
167
|
)
|
|
168
|
-
pick.add_argument("--out", default=
|
|
169
|
-
help="Output directory
|
|
168
|
+
pick.add_argument("--out", default=None,
|
|
169
|
+
help="Output directory. Default: ~/Documents/ntu-cool-gcm_material "
|
|
170
|
+
"(or ./ntu-cool-gcm_material if you already have one there).")
|
|
170
171
|
pick.add_argument("--headers-file", default=".secrets/ntu_cool_headers.txt")
|
|
171
172
|
pick.add_argument("--refresh-session", action="store_true",
|
|
172
173
|
help="Open the browser to refresh login before listing.")
|
|
@@ -179,6 +180,12 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
179
180
|
pick.add_argument("--skip-pages", action="store_true")
|
|
180
181
|
pick.add_argument("--skip-youtube", action="store_true")
|
|
181
182
|
pick.add_argument("--skip-cool-videos", action="store_true")
|
|
183
|
+
pick.add_argument(
|
|
184
|
+
"--all-file-types", action="store_true",
|
|
185
|
+
help="Also download non-PDF files (.docx / .pptx / .xlsx / .zip / etc.). "
|
|
186
|
+
"Default skips them so a friend's .doc that breaks when force-renamed "
|
|
187
|
+
"to .pdf doesn't show up unsolicited.",
|
|
188
|
+
)
|
|
182
189
|
pick.add_argument(
|
|
183
190
|
"--keep-terminal", action="store_true",
|
|
184
191
|
help="Don't close the PowerShell window on quit (Windows only).",
|
|
@@ -193,8 +200,9 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
193
200
|
help="Download every PDF, Page, YouTube link and NTU CDN video for a course.",
|
|
194
201
|
)
|
|
195
202
|
course.add_argument("--course-id", required=True, help="Canvas course id (e.g. 60804).")
|
|
196
|
-
course.add_argument("--out", default=
|
|
197
|
-
help="Output directory
|
|
203
|
+
course.add_argument("--out", default=None,
|
|
204
|
+
help="Output directory. Default: ~/Documents/ntu-cool-gcm_material "
|
|
205
|
+
"(or ./ntu-cool-gcm_material if you already have one there).")
|
|
198
206
|
course.add_argument(
|
|
199
207
|
"--headers-file",
|
|
200
208
|
default=".secrets/ntu_cool_headers.txt",
|
|
@@ -220,6 +228,10 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
220
228
|
course.add_argument("--skip-pages", action="store_true")
|
|
221
229
|
course.add_argument("--skip-youtube", action="store_true")
|
|
222
230
|
course.add_argument("--skip-cool-videos", action="store_true")
|
|
231
|
+
course.add_argument(
|
|
232
|
+
"--all-file-types", action="store_true",
|
|
233
|
+
help="Also download non-PDF files (.docx / .pptx / .xlsx / .zip / etc.).",
|
|
234
|
+
)
|
|
223
235
|
|
|
224
236
|
sync = subparsers.add_parser("sync", help="Download course files and module metadata.")
|
|
225
237
|
sync.add_argument("--state", default="active", help="Canvas enrollment_state filter.")
|
|
@@ -373,6 +385,90 @@ def _close_parent_terminal_on_quit() -> None:
|
|
|
373
385
|
pass
|
|
374
386
|
|
|
375
387
|
|
|
388
|
+
def _windows_documents_folder() -> Path | None:
|
|
389
|
+
"""Ask Windows itself where the user's Documents folder really is.
|
|
390
|
+
|
|
391
|
+
`~/Documents` works on most installs but breaks in two real cases:
|
|
392
|
+
- OneDrive sync redirects "Documents" to ~/OneDrive/<localized>/...
|
|
393
|
+
Often that's ~/OneDrive/文件/ on a Traditional Chinese system.
|
|
394
|
+
- User moved Documents to D drive via Properties → Location.
|
|
395
|
+
|
|
396
|
+
Windows' SHGetKnownFolderPath returns the actual filesystem path for
|
|
397
|
+
FOLDERID_Documents, transparent to OneDrive redirect, locale, and any
|
|
398
|
+
user customization. Returns None on non-Windows or if the call fails
|
|
399
|
+
(caller falls back to ~/Documents)."""
|
|
400
|
+
if os.name != "nt":
|
|
401
|
+
return None
|
|
402
|
+
try:
|
|
403
|
+
import ctypes
|
|
404
|
+
from ctypes import wintypes
|
|
405
|
+
|
|
406
|
+
class _GUID(ctypes.Structure):
|
|
407
|
+
_fields_ = [
|
|
408
|
+
("Data1", wintypes.DWORD),
|
|
409
|
+
("Data2", wintypes.WORD),
|
|
410
|
+
("Data3", wintypes.WORD),
|
|
411
|
+
("Data4", ctypes.c_ubyte * 8),
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
# FOLDERID_Documents = {FDD39AD0-238F-46AF-ADB4-6C85480369C7}
|
|
415
|
+
folderid_documents = _GUID(
|
|
416
|
+
0xFDD39AD0, 0x238F, 0x46AF,
|
|
417
|
+
(ctypes.c_ubyte * 8)(0xAD, 0xB4, 0x6C, 0x85, 0x48, 0x03, 0x69, 0xC7),
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
SHGetKnownFolderPath = ctypes.windll.shell32.SHGetKnownFolderPath
|
|
421
|
+
SHGetKnownFolderPath.argtypes = [
|
|
422
|
+
ctypes.POINTER(_GUID), wintypes.DWORD, wintypes.HANDLE,
|
|
423
|
+
ctypes.POINTER(ctypes.c_wchar_p),
|
|
424
|
+
]
|
|
425
|
+
SHGetKnownFolderPath.restype = ctypes.HRESULT
|
|
426
|
+
|
|
427
|
+
path_ptr = ctypes.c_wchar_p()
|
|
428
|
+
hr = SHGetKnownFolderPath(
|
|
429
|
+
ctypes.byref(folderid_documents), 0, None, ctypes.byref(path_ptr),
|
|
430
|
+
)
|
|
431
|
+
if hr != 0 or not path_ptr.value:
|
|
432
|
+
return None
|
|
433
|
+
result = Path(path_ptr.value)
|
|
434
|
+
ctypes.windll.ole32.CoTaskMemFree(path_ptr)
|
|
435
|
+
return result
|
|
436
|
+
except Exception:
|
|
437
|
+
return None
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def _default_documents_root() -> Path:
|
|
441
|
+
"""Where to put the materials folder by default (parent of ntu-cool-gcm_material).
|
|
442
|
+
|
|
443
|
+
Windows: SHGetKnownFolderPath if available (handles OneDrive + locale),
|
|
444
|
+
else ~/Documents.
|
|
445
|
+
Mac/Linux: ~/Documents.
|
|
446
|
+
"""
|
|
447
|
+
win_docs = _windows_documents_folder()
|
|
448
|
+
if win_docs is not None:
|
|
449
|
+
return win_docs
|
|
450
|
+
return Path.home() / "Documents"
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def _resolve_output_dir(arg_value: str | None) -> Path:
|
|
454
|
+
"""Resolve where to put downloaded materials.
|
|
455
|
+
|
|
456
|
+
Precedence:
|
|
457
|
+
1. Explicit --out argument (user wins).
|
|
458
|
+
2. ./ntu-cool-gcm_material in the current directory IF it already
|
|
459
|
+
exists (legacy users from <0.2.6 who have stuff there already).
|
|
460
|
+
3. <Documents>/ntu-cool-gcm_material — Documents resolved via Windows'
|
|
461
|
+
own KnownFolders API so OneDrive redirects and zh-TW localization
|
|
462
|
+
go to the right place.
|
|
463
|
+
"""
|
|
464
|
+
if arg_value:
|
|
465
|
+
return Path(arg_value).expanduser()
|
|
466
|
+
legacy = Path("ntu-cool-gcm_material")
|
|
467
|
+
if legacy.exists() and legacy.is_dir():
|
|
468
|
+
return legacy
|
|
469
|
+
return _default_documents_root() / "ntu-cool-gcm_material"
|
|
470
|
+
|
|
471
|
+
|
|
376
472
|
def _maybe_set_up_youtube_cookies(cookies_path: Path) -> None:
|
|
377
473
|
"""If the user has no YouTube cookies, offer to set them up inline.
|
|
378
474
|
Skips silently when stdin isn't a tty (piped input / tests)."""
|
|
@@ -510,7 +606,7 @@ def _cmd_pick(base_url: str, args: argparse.Namespace) -> int:
|
|
|
510
606
|
try:
|
|
511
607
|
download_course(
|
|
512
608
|
course_id=course_id,
|
|
513
|
-
output_dir=
|
|
609
|
+
output_dir=_resolve_output_dir(args.out),
|
|
514
610
|
base_url=base_url,
|
|
515
611
|
headers_path=headers_path,
|
|
516
612
|
refresh_session=False,
|
|
@@ -523,6 +619,7 @@ def _cmd_pick(base_url: str, args: argparse.Namespace) -> int:
|
|
|
523
619
|
skip_pages=args.skip_pages,
|
|
524
620
|
skip_youtube=args.skip_youtube,
|
|
525
621
|
skip_cool_videos=args.skip_cool_videos,
|
|
622
|
+
all_file_types=args.all_file_types,
|
|
526
623
|
)
|
|
527
624
|
except RuntimeError as exc:
|
|
528
625
|
print(t(f"下載失敗: {exc}", f"download failed: {exc}"))
|
|
@@ -798,7 +895,7 @@ def _cmd_download_course(base_url: str, args: argparse.Namespace) -> int:
|
|
|
798
895
|
try:
|
|
799
896
|
download_course(
|
|
800
897
|
course_id=args.course_id,
|
|
801
|
-
output_dir=
|
|
898
|
+
output_dir=_resolve_output_dir(args.out),
|
|
802
899
|
base_url=base_url,
|
|
803
900
|
headers_path=headers_path,
|
|
804
901
|
refresh_session=args.refresh_session,
|
|
@@ -809,6 +906,7 @@ def _cmd_download_course(base_url: str, args: argparse.Namespace) -> int:
|
|
|
809
906
|
skip_pages=args.skip_pages,
|
|
810
907
|
skip_youtube=args.skip_youtube,
|
|
811
908
|
skip_cool_videos=args.skip_cool_videos,
|
|
909
|
+
all_file_types=args.all_file_types,
|
|
812
910
|
)
|
|
813
911
|
return 0
|
|
814
912
|
except RuntimeError as exc:
|
|
@@ -294,19 +294,66 @@ def _download_signed_url(url: str, target: Path) -> None:
|
|
|
294
294
|
|
|
295
295
|
# ---- per-stage workers ----
|
|
296
296
|
|
|
297
|
-
|
|
298
|
-
"""
|
|
297
|
+
_KNOWN_FILE_EXTS = {
|
|
298
|
+
".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".csv",
|
|
299
|
+
".zip", ".rar", ".7z", ".tar", ".gz",
|
|
300
|
+
".txt", ".md", ".rtf",
|
|
301
|
+
".jpg", ".jpeg", ".png", ".gif", ".bmp",
|
|
302
|
+
".mp3", ".wav", ".m4a",
|
|
303
|
+
".mp4", ".mov", ".avi", ".mkv",
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def download_files(
|
|
308
|
+
plan: CoursePlan, client: CanvasSessionClient, *, all_file_types: bool = False,
|
|
309
|
+
) -> StageStats:
|
|
310
|
+
"""Download every File-type module item directly into the week directory.
|
|
311
|
+
|
|
312
|
+
Default: download every file but save with .pdf extension regardless of
|
|
313
|
+
the original type. Most NTU course material is PDF, so this gives a
|
|
314
|
+
uniform-looking output. Files that aren't really PDF (.docx, .xlsx, .zip,
|
|
315
|
+
etc.) still download fine — they just sit on disk with a .pdf extension.
|
|
316
|
+
Most apps still open them (Excel/Word sniff the binary), but if anything
|
|
317
|
+
won't open, re-run with `all_file_types=True` to get the real extensions.
|
|
318
|
+
|
|
319
|
+
With `all_file_types=True`, every file keeps its original extension
|
|
320
|
+
(.pdf / .docx / .pptx / .xlsx / .zip / etc.) — the cleanest behavior but
|
|
321
|
+
means the output isn't uniformly .pdf.
|
|
322
|
+
"""
|
|
299
323
|
headers = _session_headers(client)
|
|
300
324
|
stats = StageStats()
|
|
325
|
+
forced_pdf_count = 0
|
|
301
326
|
for week in plan.weeks:
|
|
302
327
|
for item in week.items:
|
|
303
328
|
if item.get("type") != "File":
|
|
304
329
|
continue
|
|
305
330
|
file_id = str(item.get("content_id") or "")
|
|
306
331
|
title = str(item.get("title") or "").strip() or f"item-{item.get('id')}"
|
|
307
|
-
|
|
332
|
+
|
|
333
|
+
# Real extension for this file: prefer Canvas's display_name in
|
|
334
|
+
# content_details, fall back to the title's suffix.
|
|
335
|
+
content_details = item.get("content_details") or {}
|
|
336
|
+
display_name = str(content_details.get("display_name") or "")
|
|
337
|
+
real_ext = Path(display_name).suffix.lower() if display_name else ""
|
|
338
|
+
if not real_ext:
|
|
339
|
+
real_ext = Path(title).suffix.lower()
|
|
340
|
+
|
|
341
|
+
# Decide what extension to use ON DISK.
|
|
342
|
+
if all_file_types:
|
|
343
|
+
use_ext = real_ext or ".pdf"
|
|
344
|
+
else:
|
|
345
|
+
use_ext = ".pdf"
|
|
346
|
+
if real_ext and real_ext != ".pdf":
|
|
347
|
+
forced_pdf_count += 1
|
|
348
|
+
|
|
349
|
+
# Strip any known extension from the title (case-insensitive) so we
|
|
350
|
+
# don't double up: "syllabus.pdf" + ".pdf" = "syllabus.pdf", not
|
|
351
|
+
# "syllabus.pdf.pdf"; "MS-02_C03-Ex.xlsx" + ".pdf" = "MS-02_C03-Ex.pdf",
|
|
352
|
+
# not "MS-02_C03-Ex.xlsx.pdf".
|
|
353
|
+
title_ext = Path(title).suffix.lower()
|
|
354
|
+
stem = title[:-len(title_ext)] if title_ext in _KNOWN_FILE_EXTS else title
|
|
308
355
|
safe_title = sanitize_teacher_title(stem)
|
|
309
|
-
target = week.week_dir / f"{safe_title}
|
|
356
|
+
target = week.week_dir / f"{safe_title}{use_ext}"
|
|
310
357
|
if target.exists():
|
|
311
358
|
stats.skipped += 1
|
|
312
359
|
continue
|
|
@@ -327,6 +374,13 @@ def download_files(plan: CoursePlan, client: CanvasSessionClient) -> StageStats:
|
|
|
327
374
|
except Exception as exc:
|
|
328
375
|
print(f" ✗ 失敗: {exc}")
|
|
329
376
|
stats.failed.append(f"{week.label}/{target.name}: {type(exc).__name__}: {exc}")
|
|
377
|
+
if forced_pdf_count > 0:
|
|
378
|
+
print(t(
|
|
379
|
+
f" 注意: {forced_pdf_count} 個原本不是 PDF 的檔案被存成 .pdf。"
|
|
380
|
+
f"如果有檔案打不開,加 --all-file-types 重抓會用真實副檔名。",
|
|
381
|
+
f" Note: {forced_pdf_count} non-PDF file(s) were saved with a .pdf extension. "
|
|
382
|
+
f"If any won't open, re-run with --all-file-types to keep their real extension.",
|
|
383
|
+
))
|
|
330
384
|
return stats
|
|
331
385
|
|
|
332
386
|
|
|
@@ -793,6 +847,7 @@ def download_course(
|
|
|
793
847
|
headless: bool = False,
|
|
794
848
|
skip_pdfs: bool = False, skip_pages: bool = False,
|
|
795
849
|
skip_youtube: bool = False, skip_cool_videos: bool = False,
|
|
850
|
+
all_file_types: bool = False,
|
|
796
851
|
sso_timeout_sec: int = 600,
|
|
797
852
|
) -> CoursePlan:
|
|
798
853
|
"""Top-level orchestrator. Opens at most ONE Playwright context for the entire run.
|
|
@@ -872,7 +927,7 @@ def download_course(
|
|
|
872
927
|
if not skip_pdfs:
|
|
873
928
|
print(t("\n[1/4] PDF 檔案", "\n[1/4] PDFs / Files"))
|
|
874
929
|
course_stats.pdfs = _run_with_session_retry(
|
|
875
|
-
lambda c: download_files(plan, c), t("PDF", "files")
|
|
930
|
+
lambda c: download_files(plan, c, all_file_types=all_file_types), t("PDF", "files")
|
|
876
931
|
)
|
|
877
932
|
print(t(
|
|
878
933
|
f" 下載 {course_stats.pdfs.done}、跳過 {course_stats.pdfs.skipped}、失敗 {len(course_stats.pdfs.failed)}",
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "get-class-material"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.3"
|
|
8
8
|
description = "One-command bulk downloader for NTU COOL (Canvas) course materials — PDFs, lecture videos, and Pages."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
File without changes
|
|
File without changes
|
{get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/requires.txt
RENAMED
|
File without changes
|
{get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|