epub2anki 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [ "main" ]
6
+ pull_request:
7
+ branches: [ "main" ]
8
+
9
+ jobs:
10
+ test-and-lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Install uv
16
+ uses: astral-sh/setup-uv@v5
17
+ with:
18
+ enable-cache: true
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version-file: ".python-version"
24
+
25
+ - name: Install dependencies
26
+ run: uv sync --all-extras --dev
27
+
28
+ - name: Run pre-commit hooks
29
+ uses: pre-commit/action@v3.0.1
30
+
31
+ - name: Run tests with pytest
32
+ run: uv run pytest
@@ -0,0 +1,41 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ pypi-publish:
9
+ name: Build and publish Python package to PyPI
10
+ runs-on: ubuntu-latest
11
+ environment:
12
+ name: pypi
13
+ url: https://pypi.org/p/epub2anki
14
+
15
+
16
+ # Required for PyPI Trusted Publisher authentication
17
+ permissions:
18
+ id-token: write
19
+ contents: read
20
+
21
+ steps:
22
+ - name: Checkout repository
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Install uv
26
+ uses: astral-sh/setup-uv@v5
27
+ with:
28
+ enable-cache: true
29
+
30
+ - name: Set up Python
31
+ uses: actions/setup-python@v5
32
+ with:
33
+ python-version-file: ".python-version"
34
+
35
+ - name: Build sdist and wheel
36
+ run: uv build
37
+
38
+ - name: Publish package distributions to PyPI
39
+ uses: pypa/gh-action-pypi-publish@release/v1
40
+ with:
41
+ packages-dir: dist/
@@ -0,0 +1,17 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .DS_Store
12
+ .env
13
+ .envrc
14
+ .pytest_cache/
15
+ cache/
16
+ books/
17
+ decks/
@@ -0,0 +1,14 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.6.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-added-large-files
8
+ - id: check-toml
9
+ - repo: https://github.com/astral-sh/ruff-pre-commit
10
+ rev: v0.3.4
11
+ hooks:
12
+ - id: ruff
13
+ args: [--fix]
14
+ - id: ruff-format
@@ -0,0 +1 @@
1
+ 3.13
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.4
2
+ Name: epub2anki
3
+ Version: 0.1.0
4
+ Summary: Convert books and ebooks into Anki flashcards using Anthropic's Claude API.
5
+ Author-email: Matteo Gätzner <matteo.gatzner@gmail.com>
6
+ Requires-Python: >=3.13
7
+ Requires-Dist: anthropic>=0.86.0
8
+ Requires-Dist: ebooklib>=0.20
9
+ Requires-Dist: genanki>=0.13.1
10
+ Requires-Dist: instructor>=1.14.5
11
+ Requires-Dist: markdown>=3.10.2
12
+ Requires-Dist: markitdown>=0.1.5
13
+ Requires-Dist: pydantic>=2.12.5
14
+ Requires-Dist: tqdm>=4.67.3
15
+ Description-Content-Type: text/markdown
16
+
17
+ # epub2anki
18
+
19
+ Convert books and ebooks into Anki flashcards using Anthropic's Claude API.
20
+
21
+ ## Overview
22
+
23
+ `epub2anki` transforms your EPUB books into Anki decks (`.apkg` files). It parses the table of contents and internal book structure, divides the text into manageable chunks, and requests an LLM (Claude) to generate comprehensive and useful Anki flashcards.
24
+
25
+ ## Features
26
+
27
+ - **Structural Parsing:** Uses the EPUB's Table of Contents to intelligently split the book into coherent sections.
28
+ - **LLM Flashcard Generation:** Uses Anthropic's API to construct high-quality flashcards summarizing key concepts.
29
+ - **Batch Processing:** Can utilize Anthropic's Batch API for up to 50% cost savings on API calls.
30
+ - **Resilient Coaching & Caching:** Uses SQLite to cache generated notes, meaning if the process is interrupted, you won't be charged twice for previously processed sections!
31
+ - **Direct Anki Export:** Outputs a ready-to-import `.apkg` file.
32
+
33
+ ## Prerequisites
34
+
35
+ - Python 3.13+
36
+ - An [Anthropic API Key](https://console.anthropic.com/) set as `ANTHROPIC_API_KEY` in your environment.
37
+
38
+ ## Installation
39
+
40
+ You can install `epub2anki` using `pip` or `uv`:
41
+
42
+ ```bash
43
+ pip install epub2anki
44
+ ```
45
+
46
+ Or using `uv` (recommended):
47
+
48
+ ```bash
49
+ uv tool install epub2anki
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ Basic usage:
55
+
56
+ ```bash
57
+ export ANTHROPIC_API_KEY="your-api-key-here"
58
+ epub2anki path/to/your/book.epub
59
+ ```
60
+
61
+ This will parse the EPUB, split it into chunks of ~50,000 characters, generate flashcards using the `claude-haiku-4-5` model, and finally place a `<book-name>.apkg` file in the `decks/` directory.
62
+
63
+ ### Advanced Usage & Batching
64
+
65
+ To save 50% on API costs, use the `--batch` flag. This will submit all generation requests to the Anthropic Batch API:
66
+
67
+ ```bash
68
+ epub2anki path/to/your/book.epub --batch
69
+ ```
70
+ *Note: The Batch API operates asynchronously and takes 5 minutes to 24 hours to finish. `epub2anki` will submit the batch and return a Batch ID.*
71
+
72
+ Once your batch is ready (you can check your Anthropic Console), run the script again using `--fetch-batch`:
73
+
74
+ ```bash
75
+ epub2anki path/to/your/book.epub --fetch-batch msgbat_XXXXXXX
76
+ ```
77
+ This will retrieve the completed responses from Anthropic, save them into the local cache, and generate your `.apkg` deck.
78
+
79
+ ### Command-Line Arguments
80
+
81
+ ```
82
+ usage: epub2anki [-h] [--batch] [--fetch-batch FETCH_BATCH] [--deck-id DECK_ID]
83
+ [--chunk-size CHUNK_SIZE] [--model MODEL] [--retries RETRIES]
84
+ [--db-path DB_PATH] [--output-dir OUTPUT_DIR]
85
+ [--rate-max-requests RATE_MAX_REQUESTS] [--rate-max-input RATE_MAX_INPUT]
86
+ [--rate-max-output RATE_MAX_OUTPUT] [--rate-window RATE_WINDOW]
87
+ book_path
88
+
89
+ Generate Anki flashcards from EPUB books using an LLM.
90
+
91
+ positional arguments:
92
+ book_path Path to the EPUB book.
93
+
94
+ options:
95
+ -h, --help show this help message and exit
96
+ --batch Use Anthropic's async Batch API for 50% lower costs.
97
+ --fetch-batch ID Fetch an existing batch ID from Anthropic and build the deck.
98
+ --deck-id DECK_ID Unique integer ID for the Anki deck.
99
+ --chunk-size SIZE Maximum text size per LLM prompt (default: 50000).
100
+ --model MODEL Anthropic model (default: claude-haiku-4-5).
101
+ --output-dir DIR Directory to save the finished .apkg file (default: decks).
102
+ ```
103
+
104
+ ## License
105
+
106
+ MIT License
@@ -0,0 +1,90 @@
1
+ # epub2anki
2
+
3
+ Convert books and ebooks into Anki flashcards using Anthropic's Claude API.
4
+
5
+ ## Overview
6
+
7
+ `epub2anki` transforms your EPUB books into Anki decks (`.apkg` files). It parses the table of contents and internal book structure, divides the text into manageable chunks, and requests an LLM (Claude) to generate comprehensive and useful Anki flashcards.
8
+
9
+ ## Features
10
+
11
+ - **Structural Parsing:** Uses the EPUB's Table of Contents to intelligently split the book into coherent sections.
12
+ - **LLM Flashcard Generation:** Uses Anthropic's API to construct high-quality flashcards summarizing key concepts.
13
+ - **Batch Processing:** Can utilize Anthropic's Batch API for up to 50% cost savings on API calls.
14
+ - **Resilient Coaching & Caching:** Uses SQLite to cache generated notes, meaning if the process is interrupted, you won't be charged twice for previously processed sections!
15
+ - **Direct Anki Export:** Outputs a ready-to-import `.apkg` file.
16
+
17
+ ## Prerequisites
18
+
19
+ - Python 3.13+
20
+ - An [Anthropic API Key](https://console.anthropic.com/) set as `ANTHROPIC_API_KEY` in your environment.
21
+
22
+ ## Installation
23
+
24
+ You can install `epub2anki` using `pip` or `uv`:
25
+
26
+ ```bash
27
+ pip install epub2anki
28
+ ```
29
+
30
+ Or using `uv` (recommended):
31
+
32
+ ```bash
33
+ uv tool install epub2anki
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ Basic usage:
39
+
40
+ ```bash
41
+ export ANTHROPIC_API_KEY="your-api-key-here"
42
+ epub2anki path/to/your/book.epub
43
+ ```
44
+
45
+ This will parse the EPUB, split it into chunks of ~50,000 characters, generate flashcards using the `claude-haiku-4-5` model, and finally place a `<book-name>.apkg` file in the `decks/` directory.
46
+
47
+ ### Advanced Usage & Batching
48
+
49
+ To save 50% on API costs, use the `--batch` flag. This will submit all generation requests to the Anthropic Batch API:
50
+
51
+ ```bash
52
+ epub2anki path/to/your/book.epub --batch
53
+ ```
54
+ *Note: The Batch API operates asynchronously and takes 5 minutes to 24 hours to finish. `epub2anki` will submit the batch and return a Batch ID.*
55
+
56
+ Once your batch is ready (you can check your Anthropic Console), run the script again using `--fetch-batch`:
57
+
58
+ ```bash
59
+ epub2anki path/to/your/book.epub --fetch-batch msgbat_XXXXXXX
60
+ ```
61
+ This will retrieve the completed responses from Anthropic, save them into the local cache, and generate your `.apkg` deck.
62
+
63
+ ### Command-Line Arguments
64
+
65
+ ```
66
+ usage: epub2anki [-h] [--batch] [--fetch-batch FETCH_BATCH] [--deck-id DECK_ID]
67
+ [--chunk-size CHUNK_SIZE] [--model MODEL] [--retries RETRIES]
68
+ [--db-path DB_PATH] [--output-dir OUTPUT_DIR]
69
+ [--rate-max-requests RATE_MAX_REQUESTS] [--rate-max-input RATE_MAX_INPUT]
70
+ [--rate-max-output RATE_MAX_OUTPUT] [--rate-window RATE_WINDOW]
71
+ book_path
72
+
73
+ Generate Anki flashcards from EPUB books using an LLM.
74
+
75
+ positional arguments:
76
+ book_path Path to the EPUB book.
77
+
78
+ options:
79
+ -h, --help show this help message and exit
80
+ --batch Use Anthropic's async Batch API for 50% lower costs.
81
+ --fetch-batch ID Fetch an existing batch ID from Anthropic and build the deck.
82
+ --deck-id DECK_ID Unique integer ID for the Anki deck.
83
+ --chunk-size SIZE Maximum text size per LLM prompt (default: 50000).
84
+ --model MODEL Anthropic model (default: claude-haiku-4-5).
85
+ --output-dir DIR Directory to save the finished .apkg file (default: decks).
86
+ ```
87
+
88
+ ## License
89
+
90
+ MIT License
@@ -0,0 +1,33 @@
1
+ [project]
2
+ name = "epub2anki"
3
+ version = "0.1.0"
4
+ description = "Convert books and ebooks into Anki flashcards using Anthropic's Claude API."
5
+ authors = [
6
+ { name = "Matteo Gätzner", email = "matteo.gatzner@gmail.com" }
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = ">=3.13"
10
+ dependencies = [
11
+ "anthropic>=0.86.0",
12
+ "ebooklib>=0.20",
13
+ "genanki>=0.13.1",
14
+ "instructor>=1.14.5",
15
+ "markdown>=3.10.2",
16
+ "markitdown>=0.1.5",
17
+ "pydantic>=2.12.5",
18
+ "tqdm>=4.67.3",
19
+ ]
20
+ [project.scripts]
21
+ epub2anki = "epub2anki.main:main"
22
+
23
+ [build-system]
24
+ requires = ["hatchling"]
25
+ build-backend = "hatchling.build"
26
+
27
+ [dependency-groups]
28
+ dev = [
29
+ "pdbpp>=0.12.1",
30
+ "pytest>=9.0.2",
31
+ "pre-commit>=3.7.1",
32
+ "ruff>=0.3.4",
33
+ ]
File without changes
@@ -0,0 +1,127 @@
1
+ import json
2
+ import sqlite3
3
+ from functools import cache
4
+ from pathlib import Path
5
+
6
+ import genanki
7
+
8
+ ANKI_MODEL_ID = 1847192314
9
+
10
+ SIMPLE_ANKI_MODEL = genanki.Model(
11
+ ANKI_MODEL_ID,
12
+ "Standard Model",
13
+ fields=[
14
+ {"name": "Front"},
15
+ {"name": "Back"},
16
+ ],
17
+ templates=[
18
+ {
19
+ "name": "Standard Card",
20
+ "qfmt": "{{Front}}",
21
+ "afmt": '{{FrontSide}}<hr id="answer">{{Back}}',
22
+ },
23
+ ],
24
+ )
25
+
26
+
27
+ @cache
28
+ def init_db(db_path: Path) -> sqlite3.Connection:
29
+ """Initializes the SQLite database and creates required tables.
30
+
31
+ Args:
32
+ db_path (Path): The path to the SQLite database file.
33
+
34
+ Returns:
35
+ sqlite3.Connection: The established database connection.
36
+ """
37
+ conn = sqlite3.connect(db_path)
38
+ conn.execute(
39
+ """
40
+ CREATE TABLE IF NOT EXISTS generated_notes (
41
+ book_name TEXT,
42
+ section_path TEXT,
43
+ prompt TEXT,
44
+ notes_json TEXT,
45
+ model TEXT,
46
+ PRIMARY KEY (book_name, section_path)
47
+ )
48
+ """
49
+ )
50
+ conn.execute(
51
+ """
52
+ CREATE TABLE IF NOT EXISTS subtrees (
53
+ href TEXT PRIMARY KEY,
54
+ html TEXT,
55
+ size INTEGER
56
+ )
57
+ """
58
+ )
59
+ conn.commit()
60
+ return conn
61
+
62
+
63
+ def get_cached_notes(
64
+ conn: sqlite3.Connection, book_name: str, section_path: str
65
+ ) -> list[genanki.Note]:
66
+ """Retrieves generated Anki notes from the database cache for a specific book section.
67
+
68
+ Args:
69
+ conn (sqlite3.Connection): Active database connection.
70
+ book_name (str): The name of the parsed book.
71
+ section_path (str): The specific path of the section within the book.
72
+
73
+ Returns:
74
+ list[genanki.Note]: A list of retrieved Anki notes. Returns an empty list if no notes are cached.
75
+ """
76
+ cursor = conn.cursor()
77
+ cursor.execute(
78
+ "SELECT notes_json FROM generated_notes WHERE book_name = ? AND section_path = ?",
79
+ (book_name, section_path),
80
+ )
81
+ row = cursor.fetchone()
82
+
83
+ if not row:
84
+ return []
85
+
86
+ notes_data = json.loads(row[0])
87
+ notes = []
88
+ for data in notes_data:
89
+ note = genanki.Note(
90
+ model=SIMPLE_ANKI_MODEL,
91
+ fields=[data["front"], data["back"]],
92
+ tags=data["tags"],
93
+ )
94
+ notes.append(note)
95
+ return notes
96
+
97
+
98
+ def save_notes_to_cache(
99
+ conn: sqlite3.Connection,
100
+ book_name: str,
101
+ section_path: str,
102
+ prompt: str,
103
+ model: str,
104
+ notes: list[genanki.Note],
105
+ ):
106
+ """Saves generated Anki notes to the database cache.
107
+
108
+ Args:
109
+ conn (sqlite3.Connection): Active database connection.
110
+ book_name (str): The name of the parsed book.
111
+ section_path (str): The specific path of the section within the book.
112
+ prompt (str): The LLM prompt used for generation.
113
+ model (str): The AI model name used for generation.
114
+ notes (list[genanki.Note]): The list of generated notes to cache.
115
+ """
116
+ notes_data = [
117
+ {"front": note.fields[0], "back": note.fields[1], "tags": note.tags} # type: ignore
118
+ for note in notes
119
+ ]
120
+ conn.execute(
121
+ """
122
+ INSERT OR REPLACE INTO generated_notes (book_name, section_path, prompt, model, notes_json)
123
+ VALUES (?, ?, ?, ?, ?)
124
+ """,
125
+ (book_name, section_path, prompt, model, json.dumps(notes_data)),
126
+ )
127
+ conn.commit()