get-class-material 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {get_class_material-0.2.2 → get_class_material-0.2.3}/PKG-INFO +1 -1
  2. {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/PKG-INFO +1 -1
  3. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/cli.py +104 -6
  4. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/course_pipeline.py +60 -5
  5. {get_class_material-0.2.2 → get_class_material-0.2.3}/pyproject.toml +1 -1
  6. {get_class_material-0.2.2 → get_class_material-0.2.3}/LICENSE +0 -0
  7. {get_class_material-0.2.2 → get_class_material-0.2.3}/README.md +0 -0
  8. {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/SOURCES.txt +0 -0
  9. {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/dependency_links.txt +0 -0
  10. {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/entry_points.txt +0 -0
  11. {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/requires.txt +0 -0
  12. {get_class_material-0.2.2 → get_class_material-0.2.3}/get_class_material.egg-info/top_level.txt +0 -0
  13. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/__init__.py +0 -0
  14. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/__main__.py +0 -0
  15. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/announcements.py +0 -0
  16. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/browser_session.py +0 -0
  17. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/canvas_client.py +0 -0
  18. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/cool_video.py +0 -0
  19. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/doctor.py +0 -0
  20. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/i18n.py +0 -0
  21. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/media_naming.py +0 -0
  22. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/pages.py +0 -0
  23. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/session_client.py +0 -0
  24. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/storage.py +0 -0
  25. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/sync.py +0 -0
  26. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/text_extract.py +0 -0
  27. {get_class_material-0.2.2 → get_class_material-0.2.3}/ntu_cool_materials/youtube_cookies.py +0 -0
  28. {get_class_material-0.2.2 → get_class_material-0.2.3}/setup.cfg +0 -0
  29. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_announcements.py +0 -0
  30. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_browser_session.py +0 -0
  31. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_canvas_client.py +0 -0
  32. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_cool_video.py +0 -0
  33. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_media_naming.py +0 -0
  34. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_session_client.py +0 -0
  35. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_storage.py +0 -0
  36. {get_class_material-0.2.2 → get_class_material-0.2.3}/tests/test_youtube_cookies.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: get-class-material
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: One-command bulk downloader for NTU COOL (Canvas) course materials — PDFs, lecture videos, and Pages.
5
5
  Author-email: jabir <jabir95tsai@gmail.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: get-class-material
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: One-command bulk downloader for NTU COOL (Canvas) course materials — PDFs, lecture videos, and Pages.
5
5
  Author-email: jabir <jabir95tsai@gmail.com>
6
6
  License-Expression: MIT
@@ -165,8 +165,9 @@ def _build_parser() -> argparse.ArgumentParser:
165
165
  "pick",
166
166
  help="Interactive: list your active courses, you pick one, it downloads everything.",
167
167
  )
168
- pick.add_argument("--out", default="ntu-cool-gcm_material",
169
- help="Output directory (default: ntu-cool-gcm_material/, relative to current directory).")
168
+ pick.add_argument("--out", default=None,
169
+ help="Output directory. Default: ~/Documents/ntu-cool-gcm_material "
170
+ "(or ./ntu-cool-gcm_material if you already have one there).")
170
171
  pick.add_argument("--headers-file", default=".secrets/ntu_cool_headers.txt")
171
172
  pick.add_argument("--refresh-session", action="store_true",
172
173
  help="Open the browser to refresh login before listing.")
@@ -179,6 +180,12 @@ def _build_parser() -> argparse.ArgumentParser:
179
180
  pick.add_argument("--skip-pages", action="store_true")
180
181
  pick.add_argument("--skip-youtube", action="store_true")
181
182
  pick.add_argument("--skip-cool-videos", action="store_true")
183
+ pick.add_argument(
184
+ "--all-file-types", action="store_true",
185
+ help="Also download non-PDF files (.docx / .pptx / .xlsx / .zip / etc.). "
186
+ "Default skips them so a friend's .doc that breaks when force-renamed "
187
+ "to .pdf doesn't show up unsolicited.",
188
+ )
182
189
  pick.add_argument(
183
190
  "--keep-terminal", action="store_true",
184
191
  help="Don't close the PowerShell window on quit (Windows only).",
@@ -193,8 +200,9 @@ def _build_parser() -> argparse.ArgumentParser:
193
200
  help="Download every PDF, Page, YouTube link and NTU CDN video for a course.",
194
201
  )
195
202
  course.add_argument("--course-id", required=True, help="Canvas course id (e.g. 60804).")
196
- course.add_argument("--out", default="ntu-cool-gcm_material",
197
- help="Output directory (default: ntu-cool-gcm_material/, relative to current directory).")
203
+ course.add_argument("--out", default=None,
204
+ help="Output directory. Default: ~/Documents/ntu-cool-gcm_material "
205
+ "(or ./ntu-cool-gcm_material if you already have one there).")
198
206
  course.add_argument(
199
207
  "--headers-file",
200
208
  default=".secrets/ntu_cool_headers.txt",
@@ -220,6 +228,10 @@ def _build_parser() -> argparse.ArgumentParser:
220
228
  course.add_argument("--skip-pages", action="store_true")
221
229
  course.add_argument("--skip-youtube", action="store_true")
222
230
  course.add_argument("--skip-cool-videos", action="store_true")
231
+ course.add_argument(
232
+ "--all-file-types", action="store_true",
233
+ help="Also download non-PDF files (.docx / .pptx / .xlsx / .zip / etc.).",
234
+ )
223
235
 
224
236
  sync = subparsers.add_parser("sync", help="Download course files and module metadata.")
225
237
  sync.add_argument("--state", default="active", help="Canvas enrollment_state filter.")
@@ -373,6 +385,90 @@ def _close_parent_terminal_on_quit() -> None:
373
385
  pass
374
386
 
375
387
 
388
+ def _windows_documents_folder() -> Path | None:
389
+ """Ask Windows itself where the user's Documents folder really is.
390
+
391
+ `~/Documents` works on most installs but breaks in two real cases:
392
+ - OneDrive sync redirects "Documents" to ~/OneDrive/<localized>/...
393
+ Often that's ~/OneDrive/文件/ on a Traditional Chinese system.
394
+ - User moved Documents to D drive via Properties → Location.
395
+
396
+ Windows' SHGetKnownFolderPath returns the actual filesystem path for
397
+ FOLDERID_Documents, transparent to OneDrive redirect, locale, and any
398
+ user customization. Returns None on non-Windows or if the call fails
399
+ (caller falls back to ~/Documents)."""
400
+ if os.name != "nt":
401
+ return None
402
+ try:
403
+ import ctypes
404
+ from ctypes import wintypes
405
+
406
+ class _GUID(ctypes.Structure):
407
+ _fields_ = [
408
+ ("Data1", wintypes.DWORD),
409
+ ("Data2", wintypes.WORD),
410
+ ("Data3", wintypes.WORD),
411
+ ("Data4", ctypes.c_ubyte * 8),
412
+ ]
413
+
414
+ # FOLDERID_Documents = {FDD39AD0-238F-46AF-ADB4-6C85480369C7}
415
+ folderid_documents = _GUID(
416
+ 0xFDD39AD0, 0x238F, 0x46AF,
417
+ (ctypes.c_ubyte * 8)(0xAD, 0xB4, 0x6C, 0x85, 0x48, 0x03, 0x69, 0xC7),
418
+ )
419
+
420
+ SHGetKnownFolderPath = ctypes.windll.shell32.SHGetKnownFolderPath
421
+ SHGetKnownFolderPath.argtypes = [
422
+ ctypes.POINTER(_GUID), wintypes.DWORD, wintypes.HANDLE,
423
+ ctypes.POINTER(ctypes.c_wchar_p),
424
+ ]
425
+ SHGetKnownFolderPath.restype = ctypes.HRESULT
426
+
427
+ path_ptr = ctypes.c_wchar_p()
428
+ hr = SHGetKnownFolderPath(
429
+ ctypes.byref(folderid_documents), 0, None, ctypes.byref(path_ptr),
430
+ )
431
+ if hr != 0 or not path_ptr.value:
432
+ return None
433
+ result = Path(path_ptr.value)
434
+ ctypes.windll.ole32.CoTaskMemFree(path_ptr)
435
+ return result
436
+ except Exception:
437
+ return None
438
+
439
+
440
+ def _default_documents_root() -> Path:
441
+ """Where to put the materials folder by default (parent of ntu-cool-gcm_material).
442
+
443
+ Windows: SHGetKnownFolderPath if available (handles OneDrive + locale),
444
+ else ~/Documents.
445
+ Mac/Linux: ~/Documents.
446
+ """
447
+ win_docs = _windows_documents_folder()
448
+ if win_docs is not None:
449
+ return win_docs
450
+ return Path.home() / "Documents"
451
+
452
+
453
+ def _resolve_output_dir(arg_value: str | None) -> Path:
454
+ """Resolve where to put downloaded materials.
455
+
456
+ Precedence:
457
+ 1. Explicit --out argument (user wins).
458
+ 2. ./ntu-cool-gcm_material in the current directory IF it already
459
+ exists (legacy users from <0.2.6 who have stuff there already).
460
+ 3. <Documents>/ntu-cool-gcm_material — Documents resolved via Windows'
461
+ own KnownFolders API so OneDrive redirects and zh-TW localization
462
+ go to the right place.
463
+ """
464
+ if arg_value:
465
+ return Path(arg_value).expanduser()
466
+ legacy = Path("ntu-cool-gcm_material")
467
+ if legacy.exists() and legacy.is_dir():
468
+ return legacy
469
+ return _default_documents_root() / "ntu-cool-gcm_material"
470
+
471
+
376
472
  def _maybe_set_up_youtube_cookies(cookies_path: Path) -> None:
377
473
  """If the user has no YouTube cookies, offer to set them up inline.
378
474
  Skips silently when stdin isn't a tty (piped input / tests)."""
@@ -510,7 +606,7 @@ def _cmd_pick(base_url: str, args: argparse.Namespace) -> int:
510
606
  try:
511
607
  download_course(
512
608
  course_id=course_id,
513
- output_dir=Path(args.out),
609
+ output_dir=_resolve_output_dir(args.out),
514
610
  base_url=base_url,
515
611
  headers_path=headers_path,
516
612
  refresh_session=False,
@@ -523,6 +619,7 @@ def _cmd_pick(base_url: str, args: argparse.Namespace) -> int:
523
619
  skip_pages=args.skip_pages,
524
620
  skip_youtube=args.skip_youtube,
525
621
  skip_cool_videos=args.skip_cool_videos,
622
+ all_file_types=args.all_file_types,
526
623
  )
527
624
  except RuntimeError as exc:
528
625
  print(t(f"下載失敗: {exc}", f"download failed: {exc}"))
@@ -798,7 +895,7 @@ def _cmd_download_course(base_url: str, args: argparse.Namespace) -> int:
798
895
  try:
799
896
  download_course(
800
897
  course_id=args.course_id,
801
- output_dir=Path(args.out),
898
+ output_dir=_resolve_output_dir(args.out),
802
899
  base_url=base_url,
803
900
  headers_path=headers_path,
804
901
  refresh_session=args.refresh_session,
@@ -809,6 +906,7 @@ def _cmd_download_course(base_url: str, args: argparse.Namespace) -> int:
809
906
  skip_pages=args.skip_pages,
810
907
  skip_youtube=args.skip_youtube,
811
908
  skip_cool_videos=args.skip_cool_videos,
909
+ all_file_types=args.all_file_types,
812
910
  )
813
911
  return 0
814
912
  except RuntimeError as exc:
@@ -294,19 +294,66 @@ def _download_signed_url(url: str, target: Path) -> None:
294
294
 
295
295
  # ---- per-stage workers ----
296
296
 
297
- def download_files(plan: CoursePlan, client: CanvasSessionClient) -> StageStats:
298
- """Download every File-type module item directly into the week directory."""
297
+ _KNOWN_FILE_EXTS = {
298
+ ".pdf", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".csv",
299
+ ".zip", ".rar", ".7z", ".tar", ".gz",
300
+ ".txt", ".md", ".rtf",
301
+ ".jpg", ".jpeg", ".png", ".gif", ".bmp",
302
+ ".mp3", ".wav", ".m4a",
303
+ ".mp4", ".mov", ".avi", ".mkv",
304
+ }
305
+
306
+
307
+ def download_files(
308
+ plan: CoursePlan, client: CanvasSessionClient, *, all_file_types: bool = False,
309
+ ) -> StageStats:
310
+ """Download every File-type module item directly into the week directory.
311
+
312
+ Default: download every file but save with .pdf extension regardless of
313
+ the original type. Most NTU course material is PDF, so this gives a
314
+ uniform-looking output. Files that aren't really PDF (.docx, .xlsx, .zip,
315
+ etc.) still download fine — they just sit on disk with a .pdf extension.
316
+ Most apps still open them (Excel/Word sniff the binary), but if anything
317
+ won't open, re-run with `all_file_types=True` to get the real extensions.
318
+
319
+ With `all_file_types=True`, every file keeps its original extension
320
+ (.pdf / .docx / .pptx / .xlsx / .zip / etc.) — the cleanest behavior but
321
+ means the output isn't uniformly .pdf.
322
+ """
299
323
  headers = _session_headers(client)
300
324
  stats = StageStats()
325
+ forced_pdf_count = 0
301
326
  for week in plan.weeks:
302
327
  for item in week.items:
303
328
  if item.get("type") != "File":
304
329
  continue
305
330
  file_id = str(item.get("content_id") or "")
306
331
  title = str(item.get("title") or "").strip() or f"item-{item.get('id')}"
307
- stem = title[:-4] if title.lower().endswith(".pdf") else title
332
+
333
+ # Real extension for this file: prefer Canvas's display_name in
334
+ # content_details, fall back to the title's suffix.
335
+ content_details = item.get("content_details") or {}
336
+ display_name = str(content_details.get("display_name") or "")
337
+ real_ext = Path(display_name).suffix.lower() if display_name else ""
338
+ if not real_ext:
339
+ real_ext = Path(title).suffix.lower()
340
+
341
+ # Decide what extension to use ON DISK.
342
+ if all_file_types:
343
+ use_ext = real_ext or ".pdf"
344
+ else:
345
+ use_ext = ".pdf"
346
+ if real_ext and real_ext != ".pdf":
347
+ forced_pdf_count += 1
348
+
349
+ # Strip any known extension from the title (case-insensitive) so we
350
+ # don't double up: "syllabus.pdf" + ".pdf" = "syllabus.pdf", not
351
+ # "syllabus.pdf.pdf"; "MS-02_C03-Ex.xlsx" + ".pdf" = "MS-02_C03-Ex.pdf",
352
+ # not "MS-02_C03-Ex.xlsx.pdf".
353
+ title_ext = Path(title).suffix.lower()
354
+ stem = title[:-len(title_ext)] if title_ext in _KNOWN_FILE_EXTS else title
308
355
  safe_title = sanitize_teacher_title(stem)
309
- target = week.week_dir / f"{safe_title}.pdf"
356
+ target = week.week_dir / f"{safe_title}{use_ext}"
310
357
  if target.exists():
311
358
  stats.skipped += 1
312
359
  continue
@@ -327,6 +374,13 @@ def download_files(plan: CoursePlan, client: CanvasSessionClient) -> StageStats:
327
374
  except Exception as exc:
328
375
  print(f" ✗ 失敗: {exc}")
329
376
  stats.failed.append(f"{week.label}/{target.name}: {type(exc).__name__}: {exc}")
377
+ if forced_pdf_count > 0:
378
+ print(t(
379
+ f" 注意: {forced_pdf_count} 個原本不是 PDF 的檔案被存成 .pdf。"
380
+ f"如果有檔案打不開,加 --all-file-types 重抓會用真實副檔名。",
381
+ f" Note: {forced_pdf_count} non-PDF file(s) were saved with a .pdf extension. "
382
+ f"If any won't open, re-run with --all-file-types to keep their real extension.",
383
+ ))
330
384
  return stats
331
385
 
332
386
 
@@ -793,6 +847,7 @@ def download_course(
793
847
  headless: bool = False,
794
848
  skip_pdfs: bool = False, skip_pages: bool = False,
795
849
  skip_youtube: bool = False, skip_cool_videos: bool = False,
850
+ all_file_types: bool = False,
796
851
  sso_timeout_sec: int = 600,
797
852
  ) -> CoursePlan:
798
853
  """Top-level orchestrator. Opens at most ONE Playwright context for the entire run.
@@ -872,7 +927,7 @@ def download_course(
872
927
  if not skip_pdfs:
873
928
  print(t("\n[1/4] PDF 檔案", "\n[1/4] PDFs / Files"))
874
929
  course_stats.pdfs = _run_with_session_retry(
875
- lambda c: download_files(plan, c), t("PDF", "files")
930
+ lambda c: download_files(plan, c, all_file_types=all_file_types), t("PDF", "files")
876
931
  )
877
932
  print(t(
878
933
  f" 下載 {course_stats.pdfs.done}、跳過 {course_stats.pdfs.skipped}、失敗 {len(course_stats.pdfs.failed)}",
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "get-class-material"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "One-command bulk downloader for NTU COOL (Canvas) course materials — PDFs, lecture videos, and Pages."
9
9
  readme = "README.md"
10
10
  license = "MIT"