figgydeck 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
figgydeck/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ """figgydeck: turn textbooks into study decks (PowerPoint slides or Anki decks)."""
2
+
3
+ from importlib.metadata import PackageNotFoundError
4
+ from importlib.metadata import version as _pkg_version
5
+
6
+ from figgydeck.anki import build_apkg, build_combined_apkg
7
+ from figgydeck.extract import extract_chapter
8
+ from figgydeck.models import Chapter
9
+
10
+ # The PowerPoint builders (build_pptx, build_combined_pptx) are equal
11
+ # first-class outputs, but they live in figgydeck.pptx and are imported from
12
+ # there so that the top-level package never hard-imports the optional
13
+ # python-pptx dependency. Use `from figgydeck.pptx import build_pptx`.
14
+
15
+ # Version is defined once in pyproject.toml; read it back from the installed
16
+ # distribution metadata so the two can never drift.
17
+ try:
18
+ __version__ = _pkg_version("figgydeck")
19
+ except PackageNotFoundError: # running from a source tree that isn't installed
20
+ __version__ = "0.0.0+unknown"
21
+
22
+ __all__ = ["extract_chapter", "build_apkg", "build_combined_apkg", "Chapter"]
figgydeck/anki.py ADDED
@@ -0,0 +1,331 @@
1
+ """Anki deck builder: turn a manifest + images into a .apkg file."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import html
7
+ import re
8
+ import shutil
9
+ import tempfile
10
+ from pathlib import Path
11
+
12
+ import genanki
13
+
14
+ from figgydeck.models import Chapter, load_manifest
15
+
16
+ # Stable model ID — must not change between runs, or Anki will fail to
17
+ # update existing notes when users re-import.
18
+ _MODEL_ID = 1607392319
19
+ _DECK_ID_BASE = 2059400110
20
+
21
+
22
+ CARD_CSS = """
23
+ /* ---------- Light mode (default) ---------- */
24
+ .card {
25
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
26
+ font-size: 17px; color: #2A1B23; background: #F8F4EC;
27
+ text-align: left; padding: 18px; line-height: 1.5;
28
+ }
29
+ .card-img {
30
+ display: block; max-width: 100%; max-height: 60vh;
31
+ margin: 0 auto 12px; border-radius: 6px;
32
+ box-shadow: 0 2px 12px rgba(0,0,0,0.15);
33
+ background: #ffffff; padding: 6px;
34
+ }
35
+ .tag-row {
36
+ text-align: center; font-size: 12px; letter-spacing: 2px;
37
+ text-transform: uppercase; color: #A26769;
38
+ font-weight: 600; margin-top: 8px;
39
+ }
40
+ .divider {
41
+ border: 0; border-top: 2px solid #C9A86A;
42
+ width: 60px; margin: 16px auto;
43
+ }
44
+ .label {
45
+ font-size: 11px; letter-spacing: 1.5px; text-transform: uppercase;
46
+ color: #A26769; font-weight: 600; margin-bottom: 4px;
47
+ }
48
+ .title {
49
+ font-family: Georgia, serif; font-size: 22px; font-weight: 700;
50
+ color: #6D2E46; margin: 0 0 14px 0; line-height: 1.3;
51
+ }
52
+ .caption { font-size: 15px; color: #2A1B23; margin: 0 0 16px 0; }
53
+ .meta {
54
+ font-size: 12px; color: #8A7079; font-style: italic;
55
+ border-top: 1px solid #D8C9B5; padding-top: 10px; margin-top: 12px;
56
+ }
57
+ .meta-line { margin: 2px 0; }
58
+ .meta b { color: #6D2E46; font-style: normal; }
59
+
60
+ /* ---------- Dark mode (.nightMode / .night_mode) ---------- */
61
+ .nightMode .card, .night_mode .card,
62
+ .card.nightMode, .card.night_mode {
63
+ color: #ECE2D0; background: #1F1419;
64
+ }
65
+ .nightMode .card-img, .night_mode .card-img,
66
+ .card.nightMode .card-img, .card.night_mode .card-img {
67
+ background: #ffffff; box-shadow: 0 2px 16px rgba(0,0,0,0.6);
68
+ }
69
+ .nightMode .tag-row, .night_mode .tag-row,
70
+ .card.nightMode .tag-row, .card.night_mode .tag-row { color: #E8B86F; }
71
+ .nightMode .divider, .night_mode .divider,
72
+ .card.nightMode .divider, .card.night_mode .divider { border-top-color: #E8B86F; }
73
+ .nightMode .label, .night_mode .label,
74
+ .card.nightMode .label, .card.night_mode .label { color: #E8B86F; }
75
+ .nightMode .title, .night_mode .title,
76
+ .card.nightMode .title, .card.night_mode .title { color: #F2C9A0; }
77
+ .nightMode .caption, .night_mode .caption,
78
+ .card.nightMode .caption, .card.night_mode .caption { color: #ECE2D0; }
79
+ .nightMode .meta, .night_mode .meta,
80
+ .card.nightMode .meta, .card.night_mode .meta {
81
+ color: #B5A39A; border-top-color: #4A2D38;
82
+ }
83
+ .nightMode .meta b, .night_mode .meta b,
84
+ .card.nightMode .meta b, .card.night_mode .meta b { color: #F2C9A0; }
85
+ """
86
+
87
+
88
+ def _build_model() -> genanki.Model:
89
+ return genanki.Model(
90
+ _MODEL_ID,
91
+ "figgydeck Card",
92
+ fields=[
93
+ {"name": "Number"}, # "Fig X.Y" or "Tab X.Y"
94
+ {"name": "Image"}, # <img src="...">
95
+ {"name": "Title"}, # tables have one; figures may be empty
96
+ {"name": "Caption"}, # cleaned caption body
97
+ {"name": "Book"},
98
+ {"name": "Chapter"},
99
+ {"name": "Page"},
100
+ {"name": "Type"}, # "Figure" | "Table"
101
+ ],
102
+ templates=[{
103
+ "name": "Image -> Details",
104
+ "qfmt": '{{Image}}<div class="tag-row">{{Number}}</div>',
105
+ "afmt": (
106
+ '{{Image}}<div class="tag-row">{{Number}}</div>'
107
+ '<hr class="divider">'
108
+ '<div class="label">{{Type}}</div>'
109
+ '{{#Title}}<div class="title">{{Title}}</div>{{/Title}}'
110
+ '{{#Caption}}<div class="caption">{{Caption}}</div>{{/Caption}}'
111
+ '<div class="meta">'
112
+ '<div class="meta-line"><b>Book:</b> {{Book}}</div>'
113
+ '<div class="meta-line"><b>Chapter:</b> {{Chapter}}</div>'
114
+ '<div class="meta-line"><b>Page:</b> {{Page}}</div>'
115
+ '</div>'
116
+ ),
117
+ }],
118
+ css=CARD_CSS,
119
+ )
120
+
121
+
122
+ def _slugify(s: str) -> str:
123
+ s = s.lower()
124
+ s = re.sub(r"[^\w\s-]", "", s)
125
+ s = re.sub(r"[-\s]+", "-", s).strip("-")
126
+ return s
127
+
128
+
129
+ def _add_notes(
130
+ deck: genanki.Deck,
131
+ model: genanki.Model,
132
+ manifest: list[dict],
133
+ images_dir: Path,
134
+ book_title: str,
135
+ chapter_title: str,
136
+ media_files: list[str],
137
+ *,
138
+ media_prefix: str = "",
139
+ stage_dir: Path | None = None,
140
+ log=lambda *a, **k: None,
141
+ ) -> int:
142
+ """Add one note per figure/table entry to `deck`; return the skipped count.
143
+
144
+ Appends each referenced image path to `media_files` (the caller writes them
145
+ into the genanki Package). genanki keys media by basename, so when merging
146
+ several chapters into one package `media_prefix` (the chapter slug) is set:
147
+ each image is copied into `stage_dir` as ``f"{media_prefix}__{name}"`` and
148
+ the note's ``<img src>`` references that unique basename. With an empty
149
+ prefix (single-chapter build) images are referenced in place, unchanged.
150
+ """
151
+ book_tag = _slugify(book_title)
152
+ chapter_tag = _slugify(chapter_title)
153
+ skipped = 0
154
+
155
+ for entry in manifest:
156
+ if not entry.get("image_filename"):
157
+ log(f" skip {entry['type']} {entry['number']}: no image")
158
+ skipped += 1
159
+ continue
160
+ img_path = images_dir / entry["image_filename"]
161
+ if not img_path.exists():
162
+ log(f" skip {entry['type']} {entry['number']}: missing {img_path}")
163
+ skipped += 1
164
+ continue
165
+
166
+ if media_prefix:
167
+ if stage_dir is None:
168
+ raise ValueError("media_prefix requires stage_dir")
169
+ media_name = f"{media_prefix}__{img_path.name}"
170
+ staged = stage_dir / media_name
171
+ shutil.copy(img_path, staged)
172
+ media_files.append(str(staged))
173
+ else:
174
+ media_name = img_path.name
175
+ media_files.append(str(img_path))
176
+
177
+ kind = entry["type"].capitalize()
178
+ number_label = f"Tab {entry['number']}" if kind == "Table" else f"Fig {entry['number']}"
179
+
180
+ note = genanki.Note(
181
+ model=model,
182
+ fields=[
183
+ number_label,
184
+ f'<img class="card-img" src="{html.escape(media_name)}">',
185
+ html.escape(entry.get("title") or ""),
186
+ html.escape(entry.get("caption") or ""),
187
+ html.escape(book_title),
188
+ html.escape(chapter_title),
189
+ f"p. {entry['page']}",
190
+ kind,
191
+ ],
192
+ tags=[
193
+ f"book::{book_tag}",
194
+ f"chapter::{chapter_tag}",
195
+ f"type::{kind.lower()}",
196
+ ],
197
+ # Stable GUID: re-running figgydeck and re-importing into Anki
198
+ # updates existing notes rather than creating duplicates.
199
+ guid=genanki.guid_for(book_tag, chapter_tag, kind, entry["number"]),
200
+ )
201
+ deck.add_note(note)
202
+
203
+ return skipped
204
+
205
+
206
+ def _deck_for(book_title: str, chapter_title: str, deck_name: str) -> genanki.Deck:
207
+ """Create a genanki.Deck with a stable id derived from book + chapter.
208
+
209
+ Uses a SHA-256 digest (not builtin ``hash()``, which is salted per process
210
+ via ``PYTHONHASHSEED``) so the same book/chapter yields the same deck id on
211
+ every run. Anki keys decks by name on import, so the id is mostly cosmetic —
212
+ but a stable value keeps re-imports and diffs reproducible.
213
+ """
214
+ book_tag = _slugify(book_title)
215
+ chapter_tag = _slugify(chapter_title)
216
+ digest = hashlib.sha256(f"{book_tag}::{chapter_tag}".encode()).digest()
217
+ deck_id = _DECK_ID_BASE + int.from_bytes(digest[:4], "big") % 100000
218
+ return genanki.Deck(deck_id, deck_name)
219
+
220
+
221
+ def build_apkg(
222
+ manifest: list[dict] | str | Path,
223
+ images_dir: str | Path,
224
+ book_title: str,
225
+ chapter_title: str,
226
+ output_path: str | Path,
227
+ *,
228
+ verbose: bool = True,
229
+ ) -> Path:
230
+ """Build an Anki .apkg from a chapter manifest.
231
+
232
+ Args:
233
+ manifest: Either the manifest list (from `extract_chapter()`) or a
234
+ path to a `manifest.json` file.
235
+ images_dir: Folder containing the extracted images referenced in
236
+ the manifest.
237
+ book_title: Display title for the book — appears on every card back.
238
+ chapter_title: Display title for the chapter.
239
+ output_path: Where to write the `.apkg` file.
240
+
241
+ Returns:
242
+ Path to the written .apkg file.
243
+ """
244
+ manifest = load_manifest(manifest)
245
+
246
+ images_dir = Path(images_dir)
247
+ output_path = Path(output_path)
248
+ log = print if verbose else (lambda *a, **k: None)
249
+
250
+ deck = _deck_for(book_title, chapter_title, f"{book_title} :: {chapter_title}")
251
+ model = _build_model()
252
+ media_files: list[str] = []
253
+
254
+ skipped = _add_notes(
255
+ deck, model, manifest, images_dir, book_title, chapter_title,
256
+ media_files, log=log,
257
+ )
258
+
259
+ pkg = genanki.Package(deck)
260
+ pkg.media_files = media_files
261
+ pkg.write_to_file(str(output_path))
262
+
263
+ log(f"\nWrote: {output_path}")
264
+ log(f" notes added: {len(deck.notes)}")
265
+ log(f" skipped: {skipped}")
266
+
267
+ return output_path
268
+
269
+
270
+ def build_combined_apkg(
271
+ chapters: list[Chapter],
272
+ book_title: str,
273
+ output_path: str | Path,
274
+ *,
275
+ verbose: bool = True,
276
+ ) -> Path:
277
+ """Build one `.apkg` merging every chapter into its own Anki subdeck.
278
+
279
+ Args:
280
+ chapters: list of `Chapter` records. Each chapter's `manifest` may be
281
+ the list from `extract_chapter()` or a path to a `manifest.json`.
282
+ book_title: Book title — the parent deck; each chapter becomes a
283
+ subdeck named ``"{book_title}::{chapter_title}"`` ("::" with no
284
+ spaces is Anki's subdeck separator, so they nest under the book).
285
+ output_path: Where to write the combined `.apkg`.
286
+
287
+ Images from different chapters can share a basename (e.g. ``img-000.png``);
288
+ since genanki keys media by basename, each chapter's images are staged with
289
+ a chapter-prefixed basename so they don't collide inside the package.
290
+ """
291
+ output_path = Path(output_path)
292
+ log = print if verbose else (lambda *a, **k: None)
293
+
294
+ model = _build_model()
295
+ decks: list[genanki.Deck] = []
296
+ media_files: list[str] = []
297
+
298
+ # Keep the staging dir alive until after the package is written.
299
+ with tempfile.TemporaryDirectory(prefix="figgydeck-apkg-") as tmp:
300
+ stage_dir = Path(tmp)
301
+ total_notes = 0
302
+ total_skipped = 0
303
+
304
+ for i, chapter in enumerate(chapters):
305
+ manifest = load_manifest(chapter.manifest)
306
+ images_dir = chapter.images_dir
307
+ chapter_title = chapter.title
308
+ deck = _deck_for(
309
+ book_title, chapter_title, f"{book_title}::{chapter_title}"
310
+ )
311
+ # Index-prefixed so media basenames stay unique even if two
312
+ # chapters share a title (and thus a slug).
313
+ skipped = _add_notes(
314
+ deck, model, manifest, Path(images_dir), book_title, chapter_title,
315
+ media_files, media_prefix=f"ch{i}-{_slugify(chapter_title)}",
316
+ stage_dir=stage_dir, log=log,
317
+ )
318
+ decks.append(deck)
319
+ total_notes += len(deck.notes)
320
+ total_skipped += skipped
321
+
322
+ pkg = genanki.Package(decks)
323
+ pkg.media_files = media_files
324
+ pkg.write_to_file(str(output_path))
325
+
326
+ log(f"\nWrote: {output_path}")
327
+ log(f" chapters: {len(chapters)}")
328
+ log(f" notes added: {total_notes}")
329
+ log(f" skipped: {total_skipped}")
330
+
331
+ return output_path
figgydeck/clean.py ADDED
@@ -0,0 +1,83 @@
1
+ """Caption cleanup: strip running headers, decode ligatures, normalize whitespace."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ # Common PDF font ligatures
8
+ LIGATURES = {
9
+ "fi": "fi", "fl": "fl", "ff": "ff", "ffi": "ffi", "ffl": "ffl",
10
+ "ſt": "ft", "st": "st",
11
+ }
12
+
13
+ # A running header/footer is a banner of consecutive ALL-CAPS words (the book or
14
+ # chapter title) that PDF extraction interleaves into the caption text. We treat
15
+ # any run of >= 3 consecutive capitalized tokens -- with short lowercase
16
+ # connectors like "of"/"the" allowed between them -- as such a banner and strip
17
+ # it. This is a heuristic: ordinary captions rarely contain three consecutive
18
+ # all-caps words with no intervening punctuation (acronym lists like "DNA, RNA"
19
+ # are punctuated, so they don't match and are preserved).
20
+ _CAPS_WORD = r"[A-Z][A-Z0-9&.'\-]*"
21
+ _CONNECTOR = r"(?:of|the|and|in|for|to|a|an|&)"
22
+ RUNNING_HEADER_RE = re.compile(
23
+ rf"{_CAPS_WORD}(?:(?:\s+{_CONNECTOR})*\s+{_CAPS_WORD}){{2,}}"
24
+ )
25
+
26
+ # Per-line footnote patterns are publisher-specific; none are enabled by default.
27
+ # Add re.compile(...) entries here to drop footnote lines for a particular layout.
28
+ FOOTNOTE_LINE_PATTERNS: list[re.Pattern[str]] = []
29
+
30
+
31
+ def clean_caption(text: str) -> str:
32
+ """Clean a raw extracted caption into something readable.
33
+
34
+ Steps:
35
+ 1. Decode font ligatures
36
+ 2. Strip running headers (anywhere they appear)
37
+ 3. Filter footnote-style lines
38
+ 4. De-hyphenate line-wrapped words
39
+ 5. Normalize the en-dash glyph
40
+ 6. Collapse whitespace
41
+ 7. Strip trailing partial words from running-header bleed
42
+ """
43
+ if not text:
44
+ return ""
45
+
46
+ # 1. Ligatures
47
+ for lig, repl in LIGATURES.items():
48
+ text = text.replace(lig, repl)
49
+
50
+ # 2. Running headers (mid-text occurrences)
51
+ text = RUNNING_HEADER_RE.sub(" ", text)
52
+
53
+ # 3. Per-line footnote filtering
54
+ lines = [ln.strip() for ln in text.split("\n") if ln.strip()]
55
+ lines = [ln for ln in lines if not any(p.match(ln) for p in FOOTNOTE_LINE_PATTERNS)]
56
+ text = " ".join(lines)
57
+
58
+ # 4. De-hyphenate line-wrapped words
59
+ text = re.sub(r"-\s+", "", text)
60
+
61
+ # 5. Elsevier renders en-dash as "e": "1968e1969" -> "1968–1969"
62
+ text = re.sub(r"(\d{4})e(\d{4})", r"\1–\2", text)
63
+
64
+ # 6. Collapse whitespace
65
+ text = re.sub(r"\s+", " ", text).strip()
66
+
67
+ # 7. Second running-header pass: catches banners that only became adjacent
68
+ # once line breaks were collapsed (header bled onto the caption tail).
69
+ text = RUNNING_HEADER_RE.sub(" ", text)
70
+ text = re.sub(r"\s+", " ", text).strip()
71
+
72
+ return text
73
+
74
+
75
+ def split_camel(s: str) -> str:
76
+ """Restore spaces in CamelCase strings extracted from PDF.
77
+
78
+ pdfplumber sometimes concatenates chars without inferring spaces, giving
79
+ us "RatStrainsandStocksOriginated...". This restores it to readable form.
80
+ """
81
+ s = re.sub(r"([a-z])([A-Z])", r"\1 \2", s)
82
+ s = re.sub(r"\s+", " ", s).strip()
83
+ return s
figgydeck/cli.py ADDED
@@ -0,0 +1,212 @@
1
+ """figgydeck command-line interface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import re
7
+ import shutil
8
+ import sys
9
+ import tempfile
10
+ from pathlib import Path
11
+
12
+ from figgydeck.models import Chapter
13
+
14
+
15
+ def _slug(s: str) -> str:
16
+ """Collapse whitespace and strip punctuation, keeping alphanumerics.
17
+
18
+ "Ch. 1: Historical Foundations" -> "Ch1HistoricalFoundations"
19
+ """
20
+ s = re.sub(r"[^\w\s]", "", s)
21
+ s = re.sub(r"\s+", "", s.title())
22
+ return s
23
+
24
+
25
+ def _safe_filename(book: str, chapter: str) -> str:
26
+ """Build a clean filename from book + chapter titles.
27
+
28
+ Examples:
29
+ "Example Textbook (1st ed., 2020)", "Ch. 3: Cell Structure"
30
+ → "ExampleTextbook1StEd2020_Ch3CellStructure"
31
+ """
32
+ return f"{_slug(book)}_{_slug(chapter)}"
33
+
34
+
35
+ def _chapter_title(pdf_path: Path, explicit: str | None) -> str:
36
+ """Resolve a chapter's display title.
37
+
38
+ Uses `explicit` when given; otherwise derives one from the PDF filename:
39
+ a ``ch01``-style stem becomes ``"Chapter 1"``; anything else is title-cased
40
+ with ``-``/``_`` turned into spaces.
41
+ """
42
+ if explicit:
43
+ return explicit
44
+ stem = pdf_path.stem
45
+ # Match a leading chapter marker and split off any descriptive remainder:
46
+ # "ch01" -> "Chapter 1"
47
+ # "Chapter-3-Biology-and-Diseases..." -> "Chapter 3: Biology and Diseases..."
48
+ # "Chapter 11 - Microbiological..." -> "Chapter 11: Microbiological..."
49
+ m = re.match(r"\s*ch(?:apter)?[\s._-]*0*(\d+)[\s._-]*(.*)$", stem, re.IGNORECASE)
50
+ if m:
51
+ num = int(m.group(1))
52
+ rest = re.sub(r"[\s_-]+", " ", m.group(2)).strip(" _-.")
53
+ return f"Chapter {num}: {rest}" if rest else f"Chapter {num}"
54
+ pretty = re.sub(r"[-_]+", " ", stem).strip()
55
+ return pretty.title() if pretty else stem
56
+
57
+
58
+ _VALID_FORMATS = ("apkg", "pptx")
59
+
60
+
61
+ def _format_list(s: str) -> list[str]:
62
+ """Parse a single --format value: comma-separated formats with validation."""
63
+ parts = [p.strip().lower() for p in s.split(",") if p.strip()]
64
+ if not parts:
65
+ raise argparse.ArgumentTypeError("--format value cannot be empty")
66
+ bad = [p for p in parts if p not in _VALID_FORMATS]
67
+ if bad:
68
+ raise argparse.ArgumentTypeError(
69
+ f"unknown format(s): {', '.join(bad)} "
70
+ f"(choose from: {', '.join(_VALID_FORMATS)})"
71
+ )
72
+ return parts
73
+
74
+
75
+ def main(argv: list[str] | None = None) -> int:
76
+ p = argparse.ArgumentParser(
77
+ prog="figgydeck",
78
+ description="Turn textbooks into study decks: extract figures (and "
79
+ "optionally tables) and captions from chapter PDFs and "
80
+ "package them as Anki decks or PowerPoint slides.",
81
+ )
82
+ p.add_argument("pdfs", nargs="+", metavar="PDF", help="One or more source chapter PDFs")
83
+ p.add_argument("--book", required=True, help="Book title (e.g. 'Example Textbook (1st ed., 2020)')")
84
+ p.add_argument(
85
+ "--chapter", action="append", default=None, metavar="TITLE",
86
+ help="Chapter title (e.g. 'Ch. 1: Historical Foundations'). Repeatable: "
87
+ "give one per PDF, in order. If omitted, titles are derived from "
88
+ "each PDF's filename.",
89
+ )
90
+ p.add_argument("--output", "-o", default="./out", help="Output directory (default: ./out)")
91
+ p.add_argument(
92
+ "--format", "-f", action="append", type=_format_list, default=None,
93
+ metavar="FMT[,FMT...]",
94
+ help="Output format(s) to build (required). Repeatable, or "
95
+ f"comma-separated. Choices: {', '.join(_VALID_FORMATS)}.",
96
+ )
97
+ p.add_argument("--combine", action="store_true",
98
+ help="Merge all input PDFs into a single artifact per format.")
99
+ p.add_argument("--full-res", action="store_true",
100
+ help="Embed full-resolution PNGs in .pptx (default: downscaled "
101
+ "JPEGs, which keep the file small enough for Google Slides).")
102
+ p.add_argument("--tables", action="store_true",
103
+ help="Also extract tables (default: figures only).")
104
+ p.add_argument("--save-manifest", action="store_true",
105
+ help="Also write manifest.json (the structured extraction "
106
+ "result) into the output directory.")
107
+ p.add_argument("--save-images", action="store_true",
108
+ help="Also write the extracted figure/table images (an "
109
+ "images/ folder) into the output directory.")
110
+ p.add_argument("--quiet", "-q", action="store_true", help="Suppress progress logging")
111
+ args = p.parse_args(argv)
112
+
113
+ # An explicit output format is required — the decks are the only
114
+ # deliverables, so we won't guess which one you want.
115
+ if not args.format:
116
+ print(
117
+ f"error: --format is required (choose from: {', '.join(_VALID_FORMATS)}); "
118
+ "e.g. --format apkg or --format apkg,pptx",
119
+ file=sys.stderr,
120
+ )
121
+ return 2
122
+
123
+ pdf_paths = [Path(p) for p in args.pdfs]
124
+ missing = [p for p in pdf_paths if not p.exists()]
125
+ if missing:
126
+ for p in missing:
127
+ print(f"error: {p} does not exist", file=sys.stderr)
128
+ return 2
129
+
130
+ # Resolve one chapter title per PDF: all-or-nothing on --chapter.
131
+ if args.chapter is None:
132
+ titles = [_chapter_title(p, None) for p in pdf_paths]
133
+ elif len(args.chapter) == len(pdf_paths):
134
+ titles = [_chapter_title(p, c) for p, c in zip(pdf_paths, args.chapter, strict=True)]
135
+ else:
136
+ print(
137
+ f"error: got {len(args.chapter)} --chapter value(s) for "
138
+ f"{len(pdf_paths)} PDF(s); supply either none or exactly one per PDF",
139
+ file=sys.stderr,
140
+ )
141
+ return 2
142
+
143
+ out_dir = Path(args.output)
144
+ out_dir.mkdir(parents=True, exist_ok=True)
145
+ verbose = not args.quiet
146
+
147
+ # Flatten + dedupe requested formats while preserving order.
148
+ formats = list(dict.fromkeys(f for chunk in args.format for f in chunk))
149
+
150
+ # Bind builders up front so a missing optional dep (python-pptx) fails
151
+ # before the slow extraction step. Imported at call time so tests can
152
+ # patch the module-level functions.
153
+ apkg_single = apkg_combined = None
154
+ pptx_single = pptx_combined = None
155
+ if "apkg" in formats:
156
+ from figgydeck.anki import build_apkg as apkg_single
157
+ from figgydeck.anki import build_combined_apkg as apkg_combined
158
+ if "pptx" in formats:
159
+ from figgydeck.pptx import build_combined_pptx as pptx_combined
160
+ from figgydeck.pptx import build_pptx as pptx_single
161
+
162
+ from figgydeck.extract import extract_chapter
163
+
164
+ single_pdf = len(pdf_paths) == 1
165
+
166
+ # Extract into a temporary working dir. The built decks embed their own
167
+ # images, so the raw manifest.json / images/ are intermediates — surfaced
168
+ # into out_dir only when --save-manifest / --save-images ask for them. A
169
+ # single PDF extracts flat; multiple PDFs each get their own subdir so fixed
170
+ # image names (img-000.png, ...) don't collide.
171
+ with tempfile.TemporaryDirectory(prefix="figgydeck-extract-") as tmp:
172
+ work = Path(tmp)
173
+ chapters: list[Chapter] = []
174
+ for i, (pdf, title) in enumerate(zip(pdf_paths, titles, strict=True)):
175
+ sub = "" if single_pdf else f"{i:02d}_{_slug(title)}"
176
+ ex_dir = work / sub if sub else work
177
+ manifest = extract_chapter(pdf, ex_dir, include_tables=args.tables, verbose=verbose)
178
+ chapters.append(Chapter(manifest, ex_dir / "images", title))
179
+
180
+ if args.save_manifest or args.save_images:
181
+ dest = out_dir / sub if sub else out_dir
182
+ dest.mkdir(parents=True, exist_ok=True)
183
+ if args.save_manifest:
184
+ shutil.copy(ex_dir / "manifest.json", dest / "manifest.json")
185
+ if args.save_images:
186
+ shutil.copytree(ex_dir / "images", dest / "images", dirs_exist_ok=True)
187
+
188
+ # Write requested formats (inside the temp dir so extracted images exist).
189
+ optimize = not args.full_res
190
+ if args.combine:
191
+ base = f"{_slug(args.book)}_Combined"
192
+ if apkg_combined is not None:
193
+ apkg_combined(chapters, args.book, out_dir / f"{base}.apkg", verbose=verbose)
194
+ if pptx_combined is not None:
195
+ pptx_combined(chapters, args.book, out_dir / f"{base}.pptx",
196
+ optimize_images=optimize, verbose=verbose)
197
+ else:
198
+ for chapter in chapters:
199
+ base = _safe_filename(args.book, chapter.title)
200
+ if apkg_single is not None:
201
+ apkg_single(chapter.manifest, chapter.images_dir, args.book, chapter.title,
202
+ out_dir / f"{base}.apkg", verbose=verbose)
203
+ if pptx_single is not None:
204
+ pptx_single(chapter.manifest, chapter.images_dir, args.book, chapter.title,
205
+ out_dir / f"{base}.pptx",
206
+ optimize_images=optimize, verbose=verbose)
207
+
208
+ return 0
209
+
210
+
211
+ if __name__ == "__main__":
212
+ raise SystemExit(main())