PyPI - notability-extractor - Versions diffs - 0.1.0__py3-none-any.whl - Mend

notability-extractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

notability_extractor/__init__.py +3 -0
notability_extractor/__main__.py +3 -0
notability_extractor/anki.py +297 -0
notability_extractor/archive/__init__.py +1 -0
notability_extractor/archive/backup.py +198 -0
notability_extractor/archive/config.py +109 -0
notability_extractor/archive/filter.py +44 -0
notability_extractor/archive/scheduler.py +65 -0
notability_extractor/archive/scheduler_install.py +186 -0
notability_extractor/archive/store.py +217 -0
notability_extractor/build/__init__.py +1 -0
notability_extractor/build/flashcards.py +91 -0
notability_extractor/build/notes.py +31 -0
notability_extractor/build/reader.py +108 -0
notability_extractor/build/summaries.py +38 -0
notability_extractor/cli.py +263 -0
notability_extractor/extract/__init__.py +1 -0
notability_extractor/extract/exporter.py +45 -0
notability_extractor/extract/http_cache.py +87 -0
notability_extractor/extract/nbn.py +78 -0
notability_extractor/extract/platform_check.py +35 -0
notability_extractor/gui/__init__.py +0 -0
notability_extractor/gui/app.py +68 -0
notability_extractor/gui/main_window.py +119 -0
notability_extractor/gui/pages/__init__.py +0 -0
notability_extractor/gui/pages/export.py +123 -0
notability_extractor/gui/pages/library.py +203 -0
notability_extractor/gui/pages/notes.py +102 -0
notability_extractor/gui/pages/settings.py +349 -0
notability_extractor/gui/pages/summaries.py +101 -0
notability_extractor/gui/theme.py +61 -0
notability_extractor/gui/widgets/__init__.py +0 -0
notability_extractor/gui/widgets/card_editor.py +180 -0
notability_extractor/gui/widgets/tag_filter.py +101 -0
notability_extractor/gui/widgets/tag_input.py +161 -0
notability_extractor/model.py +76 -0
notability_extractor/utils.py +80 -0
notability_extractor-0.1.0.dist-info/METADATA +205 -0
notability_extractor-0.1.0.dist-info/RECORD +41 -0
notability_extractor-0.1.0.dist-info/WHEEL +4 -0
notability_extractor-0.1.0.dist-info/entry_points.txt +3 -0

notability_extractor/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""notability-extractor: pull flashcards out of Notability and into Anki."""
+__version__ = "0.1.0"

notability_extractor/__main__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from notability_extractor.cli import main
+main()

notability_extractor/anki.py ADDED Viewed

@@ -0,0 +1,297 @@
+"""
+Build and write an Anki .apkg package from a list of front/back card dicts.
+An .apkg is a ZIP file containing:
+  - collection.anki2  -- a minimal SQLite database understood by Anki 2.1+
+  - media             -- a JSON object mapping media filenames (empty here)
+Reference: https://github.com/ankidroid/Anki-Android/wiki/Database-Structure
+"""
+import base64
+import json
+import random
+import sqlite3
+import tempfile
+import time
+import zipfile
+from pathlib import Path
+from typing import Any
+from notability_extractor.utils import field_checksum, get_logger
+log = get_logger(__name__)
+_BASIC_MODEL_ID = 1702000000000
+_DECK_ID = 1702000000001
+_CONF_ID = 1
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS cards (
+    id      integer PRIMARY KEY,
+    nid     integer NOT NULL,
+    did     integer NOT NULL,
+    ord     integer NOT NULL,
+    mod     integer NOT NULL,
+    usn     integer NOT NULL,
+    type    integer NOT NULL,
+    queue   integer NOT NULL,
+    due     integer NOT NULL,
+    ivl     integer NOT NULL,
+    factor  integer NOT NULL,
+    reps    integer NOT NULL,
+    lapses  integer NOT NULL,
+    left    integer NOT NULL,
+    odue    integer NOT NULL,
+    odid    integer NOT NULL,
+    flags   integer NOT NULL,
+    data    text    NOT NULL
+);
+CREATE TABLE IF NOT EXISTS col (
+    id      integer PRIMARY KEY,
+    crt     integer NOT NULL,
+    mod     integer NOT NULL,
+    scm     integer NOT NULL,
+    ver     integer NOT NULL,
+    dty     integer NOT NULL,
+    usn     integer NOT NULL,
+    ls      integer NOT NULL,
+    conf    text    NOT NULL,
+    models  text    NOT NULL,
+    decks   text    NOT NULL,
+    dconf   text    NOT NULL,
+    tags    text    NOT NULL
+);
+CREATE TABLE IF NOT EXISTS graves (
+    usn     integer NOT NULL,
+    oid     integer NOT NULL,
+    type    integer NOT NULL
+);
+CREATE TABLE IF NOT EXISTS notes (
+    id      integer PRIMARY KEY,
+    guid    text    NOT NULL,
+    mid     integer NOT NULL,
+    mod     integer NOT NULL,
+    usn     integer NOT NULL,
+    tags    text    NOT NULL,
+    flds    text    NOT NULL,
+    sfld    text    NOT NULL,
+    csum    integer NOT NULL,
+    flags   integer NOT NULL,
+    data    text    NOT NULL
+);
+CREATE TABLE IF NOT EXISTS revlog (
+    id      integer PRIMARY KEY,
+    cid     integer NOT NULL,
+    usn     integer NOT NULL,
+    ease    integer NOT NULL,
+    ivl     integer NOT NULL,
+    lastIvl integer NOT NULL,
+    factor  integer NOT NULL,
+    time    integer NOT NULL,
+    type    integer NOT NULL
+);
+"""
+def _guid() -> str:
+    return base64.b64encode(random.randbytes(9)).decode("ascii")
+def _build_collection(
+    conn: sqlite3.Connection, cards: list[dict[str, Any]], deck_name: str, now: int
+) -> None:
+    conn.executescript(_SCHEMA)
+    model = {
+        str(_BASIC_MODEL_ID): {
+            "id": _BASIC_MODEL_ID,
+            "name": "Notability Basic",
+            "type": 0,
+            "mod": now,
+            "usn": -1,
+            "sortf": 0,
+            "did": None,
+            "tmpls": [
+                {
+                    "name": "Card 1",
+                    "ord": 0,
+                    "qfmt": "{{Front}}",
+                    "afmt": "{{FrontSide}}<hr id=answer>{{Back}}",
+                    "bqfmt": "",
+                    "bafmt": "",
+                    "did": None,
+                    "bfont": "",
+                    "bsize": 0,
+                }
+            ],
+            "flds": [
+                {
+                    "name": "Front",
+                    "ord": 0,
+                    "sticky": False,
+                    "rtl": False,
+                    "font": "Arial",
+                    "size": 20,
+                },
+                {
+                    "name": "Back",
+                    "ord": 1,
+                    "sticky": False,
+                    "rtl": False,
+                    "font": "Arial",
+                    "size": 20,
+                },
+            ],
+            "css": ".card { font-family: arial; font-size: 20px; text-align: center; }",
+            "latexPre": "",
+            "latexPost": "",
+            "tags": [],
+            "vers": [],
+        }
+    }
+    deck = {
+        str(_DECK_ID): {
+            "id": _DECK_ID,
+            "name": deck_name,
+            "desc": "",
+            "mod": now,
+            "usn": -1,
+            "collapsed": False,
+            "browserCollapsed": False,
+            "extendNew": 0,
+            "extendRev": 0,
+            "conf": _CONF_ID,
+            "dyn": 0,
+            "newToday": [0, 0],
+            "revToday": [0, 0],
+            "lrnToday": [0, 0],
+            "timeToday": [0, 0],
+        }
+    }
+    dconf = {
+        str(_CONF_ID): {
+            "id": _CONF_ID,
+            "name": "Default",
+            "replayq": True,
+            "lapse": {"delays": [10], "leechAction": 0, "leechFails": 8, "minInt": 1, "mult": 0},
+            "rev": {
+                "ease4": 1.3,
+                "fuzz": 0.05,
+                "ivlFct": 1,
+                "maxIvl": 36500,
+                "minSpace": 1,
+                "perDay": 100,
+            },
+            "new": {
+                "bury": True,
+                "delays": [1, 10],
+                "initialFactor": 2500,
+                "ints": [1, 4, 7],
+                "order": 1,
+                "perDay": 20,
+                "separate": True,
+            },
+            "timer": 0,
+            "autoplay": True,
+            "mod": now,
+            "usn": -1,
+        }
+    }
+    col_conf = {
+        "nextPos": 1,
+        "estTimes": True,
+        "activeDecks": [_DECK_ID],
+        "sortType": "noteFld",
+        "timeLim": 0,
+        "sortBackwards": False,
+        "addToCur": True,
+        "curDeck": _DECK_ID,
+        "newBury": True,
+        "newSpread": 0,
+        "dueCounts": True,
+        "curModel": str(_BASIC_MODEL_ID),
+        "collapseTime": 1200,
+    }
+    conn.execute(
+        "INSERT INTO col VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
+        (
+            1,
+            now,
+            now,
+            now,
+            11,
+            0,
+            -1,
+            0,
+            json.dumps(col_conf),
+            json.dumps(model),
+            json.dumps(deck),
+            json.dumps(dconf),
+            "{}",
+        ),
+    )
+    for i, card in enumerate(cards):
+        note_id = now * 1000 + i
+        card_id = note_id + 1
+        front, back = card["front"], card["back"]
+        flds = f"{front}\x1f{back}"
+        conn.execute(
+            "INSERT INTO notes VALUES (?,?,?,?,?,?,?,?,?,?,?)",
+            (
+                note_id,
+                _guid(),
+                _BASIC_MODEL_ID,
+                now,
+                -1,
+                "",
+                flds,
+                front,
+                field_checksum(front),
+                0,
+                "",
+            ),
+        )
+        conn.execute(
+            "INSERT INTO cards VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
+            (card_id, note_id, _DECK_ID, 0, now, -1, 0, 0, i, 0, 0, 0, 0, 0, 0, 0, 0, ""),
+        )
+    conn.commit()
+    log.debug("Inserted %d notes into temporary Anki collection", len(cards))
+def write_apkg(cards: list[dict[str, Any]], deck_name: str, out_path: Path) -> None:
+    """
+    Write *cards* as an Anki .apkg file at *out_path*.
+    Each card in *cards* must have ``"front"`` and ``"back"`` string keys.
+    Raises ValueError when *cards* is empty.
+    """
+    if not cards:
+        raise ValueError("No cards to write -- the .apkg would be empty.")
+    now = int(time.time())
+    with tempfile.TemporaryDirectory() as tmpdir:
+        db_path = Path(tmpdir) / "collection.anki2"
+        conn = sqlite3.connect(str(db_path))
+        _build_collection(conn, cards, deck_name, now)
+        conn.close()
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        with zipfile.ZipFile(str(out_path), "w", zipfile.ZIP_DEFLATED) as zf:
+            zf.write(str(db_path), "collection.anki2")
+            zf.writestr("media", "{}")
+    log.info(
+        "Wrote %d cards to Anki package: %s  (deck: '%s')",
+        len(cards),
+        out_path,
+        deck_name,
+    )

notability_extractor/archive/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Archive layer: JSONL CRUD, filtering, backups, scheduler."""

notability_extractor/archive/backup.py ADDED Viewed

@@ -0,0 +1,198 @@
+"""Backup operations: snapshot, restore, export, import, prune."""
+from __future__ import annotations
+import hashlib
+import json
+import shutil
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Literal
+from notability_extractor.archive.store import DEFAULT_ARCHIVE
+from notability_extractor.archive.store import load as _load_archive
+from notability_extractor.archive.store import merge as _merge_cards
+from notability_extractor.archive.store import save_all as _save_all
+from notability_extractor.model import Card
+from notability_extractor.utils import get_logger
+log = get_logger(__name__)
+DEFAULT_BACKUPS = Path.home() / ".notability_extractor" / "backups"
+@dataclass(frozen=True)
+class Snapshot:
+    path: Path
+    timestamp: datetime
+def snapshot(
+    path: Path = DEFAULT_ARCHIVE,
+    backups_dir: Path = DEFAULT_BACKUPS,
+) -> Path | None:
+    """Copy archive to backups_dir with timestamped filename.
+    Returns None if (a) archive doesn't exist, (b) archive hash matches the most
+    recent snapshot, or (c) the copy fails. Never raises -- a failed backup must
+    not crash a save flow.
+    """
+    if not path.is_file():
+        return None
+    try:
+        backups_dir.mkdir(parents=True, exist_ok=True)
+        current_hash = _hash(path)
+        latest = _latest_snapshot(backups_dir)
+        if latest is not None and _hash(latest.path) == current_hash:
+            return None
+        stamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+        target = backups_dir / f"cards-{stamp}.jsonl"
+        shutil.copy2(path, target)
+        return target
+    except OSError as exc:
+        log.error("Backup snapshot failed: %s", exc)
+        return None
+def list_snapshots(backups_dir: Path = DEFAULT_BACKUPS) -> list[Snapshot]:
+    """Return all snapshots, newest first."""
+    if not backups_dir.is_dir():
+        return []
+    out: list[Snapshot] = []
+    for p in backups_dir.glob("cards-*.jsonl"):
+        ts = _parse_timestamp(p.name)
+        if ts is not None:
+            out.append(Snapshot(path=p, timestamp=ts))
+    out.sort(key=lambda s: s.timestamp, reverse=True)
+    return out
+def prune(backups_dir: Path = DEFAULT_BACKUPS, keep: int = 10) -> int:
+    """Delete oldest snapshots so only `keep` remain. Returns deletion count."""
+    snaps = list_snapshots(backups_dir)
+    if len(snaps) <= keep:
+        return 0
+    to_delete = snaps[keep:]
+    for s in to_delete:
+        try:
+            s.path.unlink()
+        except OSError as exc:
+            log.warning("Failed to prune %s: %s", s.path, exc)
+    return len(to_delete)
+def restore_snapshot(
+    snapshot_name: str,
+    archive_path: Path = DEFAULT_ARCHIVE,
+    backups_dir: Path = DEFAULT_BACKUPS,
+) -> None:
+    """Replace archive contents with this snapshot's contents.
+    SAFETY: snapshots the current archive first, so a restore-by-mistake can be
+    undone by restoring the pre-restore snapshot.
+    """
+    source = backups_dir / snapshot_name
+    if not source.is_file():
+        raise FileNotFoundError(f"No snapshot at {source}")
+    snapshot(archive_path, backups_dir)
+    archive_path.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(source, archive_path)
+def export_archive(
+    target: Path,
+    fmt: Literal["jsonl", "json"] = "jsonl",
+    archive_path: Path = DEFAULT_ARCHIVE,
+) -> None:
+    """Dump archive to target. jsonl = byte-copy. json = pretty, {cards: [...]}."""
+    target.parent.mkdir(parents=True, exist_ok=True)
+    if fmt == "jsonl":
+        shutil.copy2(archive_path, target)
+        return
+    cards = _load_archive(archive_path)
+    payload = {
+        "exported_at": datetime.now(UTC).isoformat(),
+        "cards": [
+            {
+                "id": c.id,
+                "created_at": c.created_at.isoformat(),
+                "updated_at": c.updated_at.isoformat(),
+                "question": c.card.question,
+                "options": c.card.options,
+                "correct_answer": c.card.correct_answer,
+                "source_file": c.card.source_file,
+                "index": c.card.index,
+                "tags": c.card.tags,
+            }
+            for c in cards
+        ],
+    }
+    target.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
+def import_archive(
+    source: Path,
+    mode: Literal["merge", "replace"] = "merge",
+    archive_path: Path = DEFAULT_ARCHIVE,
+) -> tuple[int, int]:
+    """Load cards from source (.jsonl or pretty .json). Merge or replace."""
+    incoming_cards = _read_cards_for_import(source)
+    if mode == "replace":
+        snapshot(archive_path)
+        _save_all([], archive_path)
+    return _merge_cards(incoming_cards, archive_path)
+def _hash(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(8192), b""):
+            h.update(chunk)
+    return h.hexdigest()
+def _latest_snapshot(backups_dir: Path) -> Snapshot | None:
+    snaps = list_snapshots(backups_dir)
+    return snaps[0] if snaps else None
+def _parse_timestamp(filename: str) -> datetime | None:
+    stem = filename.removeprefix("cards-").removesuffix(".jsonl")
+    try:
+        return datetime.strptime(stem, "%Y%m%d-%H%M%S").replace(tzinfo=UTC)
+    except ValueError:
+        return None
+def _read_cards_for_import(source: Path) -> list[Card]:
+    text = source.read_text()
+    # .json files are pretty-printed {cards: [...]}; .jsonl is one object per line.
+    # We also sniff the content for unknown extensions -- a top-level object that
+    # isn't JSONL (which would be line-delimited) gets the JSON path.
+    if source.suffix == ".json":
+        payload = json.loads(text)
+        rows = payload.get("cards", []) if isinstance(payload, dict) else payload
+    else:
+        # .jsonl or anything else: parse line by line
+        rows = [json.loads(line) for line in text.splitlines() if line.strip()]
+    out: list[Card] = []
+    for r in rows:
+        # r is a dict parsed from JSON; we guard with isinstance for mypy
+        if not isinstance(r, dict):
+            continue
+        out.append(
+            Card(
+                question=str(r["question"]),
+                options=(
+                    {str(k): str(v) for k, v in r["options"].items()}
+                    if isinstance(r.get("options"), dict)
+                    else {}
+                ),
+                correct_answer=str(r["correct_answer"]),
+                source_file=str(r.get("source_file", "imported")),
+                index=int(str(r.get("index", 0))),
+                tags=list(r.get("tags", [])) if isinstance(r.get("tags"), list) else [],
+            )
+        )
+    return out

notability_extractor/archive/config.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Persistent GUI/CLI config at ~/.notability_extractor/config.json.
+Schema is a flat dict. Reads are tolerant -- missing keys fall back to
+sensible defaults. Writes are atomic via tmpfile + os.replace.
+"""
+from __future__ import annotations
+import json
+import os
+import tempfile
+from pathlib import Path
+from typing import Any
+DEFAULT_CONFIG_PATH = Path.home() / ".notability_extractor" / "config.json"
+# Keys and what they do:
+#   theme      - color scheme: light | dark | auto (follows OS)
+#   font_size  - base point size for the GUI (applied to QApplication)
+#   log_level  - info | debug; debug logs every archive mutation for auditing
+#   deck_name  - Anki deck name used when building .apkg
+#   input_dir  - path to the Notability export dir; empty string = unset
+#   export_dir - where backup snapshots are written
+#   schedule   - headless backup cadence: off | hourly | daily | weekly
+#   retention  - how many snapshots to keep
+_DEFAULTS: dict[str, Any] = {
+    "theme": "auto",
+    "font_size": 11,
+    "log_level": "info",
+    "deck_name": "Notability Flashcards",
+    "input_dir": "",
+    "export_dir": str(Path.home() / "Documents" / "notability-backups"),
+    "schedule": "off",
+    "retention": 10,
+    # tag_colors: { "biology": "#2d7d4a", ... } - chip color per tag, global
+    "tag_colors": {},
+}
+def load(path: Path = DEFAULT_CONFIG_PATH) -> dict[str, Any]:
+    """Load config from disk, falling back to defaults for missing keys.
+    Returns a fresh dict each call -- callers can mutate freely without
+    affecting the on-disk state.
+    """
+    out = dict(_DEFAULTS)
+    if not path.is_file():
+        return out
+    try:
+        raw = json.loads(path.read_text())
+        if isinstance(raw, dict):
+            for k, v in raw.items():
+                out[k] = v
+    except (OSError, json.JSONDecodeError):
+        # corrupt config -- fall back to defaults rather than crash the app
+        return dict(_DEFAULTS)
+    return out
+def save(cfg: dict[str, Any], path: Path = DEFAULT_CONFIG_PATH) -> None:
+    """Atomic write of config to disk."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    payload = json.dumps(cfg, indent=2, ensure_ascii=False) + "\n"
+    with tempfile.NamedTemporaryFile(
+        mode="w",
+        dir=path.parent,
+        prefix=path.name + ".",
+        suffix=".tmp",
+        delete=False,
+        encoding="utf-8",
+    ) as tmp:
+        tmp.write(payload)
+        tmp.flush()
+        os.fsync(tmp.fileno())
+        tmp_name = tmp.name
+    os.replace(tmp_name, path)
+def get(key: str, path: Path = DEFAULT_CONFIG_PATH) -> Any:
+    """Convenience: load + get one key with default fallback."""
+    return load(path).get(key, _DEFAULTS.get(key))
+def set_value(key: str, value: Any, path: Path = DEFAULT_CONFIG_PATH) -> None:
+    """Convenience: load, set one key, save."""
+    cfg = load(path)
+    cfg[key] = value
+    save(cfg, path)
+def get_tag_color(tag: str, path: Path = DEFAULT_CONFIG_PATH) -> str | None:
+    """Return the saved chip color for this tag, or None for the default."""
+    cfg = load(path)
+    tag_colors = cfg.get("tag_colors", {})
+    if isinstance(tag_colors, dict):
+        val = tag_colors.get(tag)
+        return val if isinstance(val, str) else None
+    return None
+def set_tag_color(tag: str, color: str, path: Path = DEFAULT_CONFIG_PATH) -> None:
+    """Persist a chip color for this tag. Applied globally everywhere tag appears."""
+    cfg = load(path)
+    tag_colors = cfg.get("tag_colors", {})
+    if not isinstance(tag_colors, dict):
+        tag_colors = {}
+    tag_colors[tag] = color
+    cfg["tag_colors"] = tag_colors
+    save(cfg, path)

notability_extractor/archive/filter.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Pure filter helpers over a list[ArchivedCard]. No I/O."""
+from __future__ import annotations
+from typing import Literal
+from notability_extractor.model import ArchivedCard
+def by_tags(
+    cards: list[ArchivedCard],
+    tags: list[str],
+    mode: Literal["any", "all"] = "any",
+) -> list[ArchivedCard]:
+    """Filter to cards matching given tags. mode='any' = union, 'all' = intersection."""
+    if not tags:
+        return list(cards)
+    wanted = set(tags)
+    if mode == "all":
+        return [c for c in cards if wanted.issubset(c.card.tags)]
+    return [c for c in cards if wanted.intersection(c.card.tags)]
+def by_text(cards: list[ArchivedCard], query: str) -> list[ArchivedCard]:
+    """Case-insensitive substring match against question + every option's text."""
+    if not query:
+        return list(cards)
+    needle = query.lower()
+    out: list[ArchivedCard] = []
+    for c in cards:
+        if needle in c.card.question.lower():
+            out.append(c)
+            continue
+        if any(needle in v.lower() for v in c.card.options.values()):
+            out.append(c)
+    return out
+def all_tags(cards: list[ArchivedCard]) -> list[str]:
+    """Sorted unique tags across all cards. Feeds tag-input autocomplete."""
+    seen: set[str] = set()
+    for c in cards:
+        seen.update(c.card.tags)
+    return sorted(seen, key=str.lower)