book-condenser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. book_condenser-0.1.0/CHANGELOG.md +10 -0
  2. book_condenser-0.1.0/CONTRIBUTING.md +29 -0
  3. book_condenser-0.1.0/LICENSE +74 -0
  4. book_condenser-0.1.0/MANIFEST.in +9 -0
  5. book_condenser-0.1.0/NOTICE +6 -0
  6. book_condenser-0.1.0/PKG-INFO +208 -0
  7. book_condenser-0.1.0/README.md +172 -0
  8. book_condenser-0.1.0/SECURITY.md +16 -0
  9. book_condenser-0.1.0/chapter_map_example.json +6 -0
  10. book_condenser-0.1.0/examples/chapter_map.json +7 -0
  11. book_condenser-0.1.0/pyproject.toml +68 -0
  12. book_condenser-0.1.0/requirements.txt +5 -0
  13. book_condenser-0.1.0/setup.cfg +4 -0
  14. book_condenser-0.1.0/src/book_condenser/__init__.py +9 -0
  15. book_condenser-0.1.0/src/book_condenser/__main__.py +7 -0
  16. book_condenser-0.1.0/src/book_condenser/cli.py +6 -0
  17. book_condenser-0.1.0/src/book_condenser/core.py +2547 -0
  18. book_condenser-0.1.0/src/book_condenser/exporters.py +11 -0
  19. book_condenser-0.1.0/src/book_condenser/llm.py +6 -0
  20. book_condenser-0.1.0/src/book_condenser/loaders.py +13 -0
  21. book_condenser-0.1.0/src/book_condenser/models.py +40 -0
  22. book_condenser-0.1.0/src/book_condenser/pipeline.py +12 -0
  23. book_condenser-0.1.0/src/book_condenser/selection.py +20 -0
  24. book_condenser-0.1.0/src/book_condenser/text.py +13 -0
  25. book_condenser-0.1.0/src/book_condenser.egg-info/PKG-INFO +208 -0
  26. book_condenser-0.1.0/src/book_condenser.egg-info/SOURCES.txt +29 -0
  27. book_condenser-0.1.0/src/book_condenser.egg-info/dependency_links.txt +1 -0
  28. book_condenser-0.1.0/src/book_condenser.egg-info/entry_points.txt +2 -0
  29. book_condenser-0.1.0/src/book_condenser.egg-info/requires.txt +11 -0
  30. book_condenser-0.1.0/src/book_condenser.egg-info/top_level.txt +1 -0
  31. book_condenser-0.1.0/tests/test_core.py +148 -0
@@ -0,0 +1,10 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ - Initial public-release preparation.
6
+ - Added installable Python package metadata and `book-condenser` CLI entry point.
7
+ - Added PolyForm Noncommercial 1.0.0 license, security policy, contribution guide, and release documentation.
8
+ - Added focused unit tests, ruff linting, and GitHub Actions CI.
9
+ - Removed generated/private book artifacts from the release tree.
10
+
@@ -0,0 +1,29 @@
1
+ # Contributing
2
+
3
+ Thanks for helping improve Book Condenser.
4
+
5
+ ## Local Setup
6
+
7
+ ```bash
8
+ python -m venv .venv
9
+ source .venv/bin/activate
10
+ pip install -e ".[dev]"
11
+ ```
12
+
13
+ Run the local checks before opening a pull request:
14
+
15
+ ```bash
16
+ ruff check .
17
+ pytest
18
+ python -m build
19
+ twine check dist/*
20
+ ```
21
+
22
+ ## Test Data
23
+
24
+ Do not commit copyrighted books, generated abridgements, or full parsed source text. Use synthetic fixtures or public-domain material that is clearly safe to redistribute.
25
+
26
+ ## Style
27
+
28
+ Keep behavior-preserving refactors separate from functional changes when possible. The CLI is the public interface, so changes to flags, defaults, output names, or file formats should include tests and README updates.
29
+
@@ -0,0 +1,74 @@
1
+ PolyForm Noncommercial License 1.0.0
2
+
3
+ <https://polyformproject.org/licenses/noncommercial/1.0.0>
4
+
5
+ ## Acceptance
6
+
7
+ In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses.
8
+
9
+ ## Copyright License
10
+
11
+ The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to Distribution License and make changes or new works based on the software according to Changes and New Works License.
12
+
13
+ ## Distribution License
14
+
15
+ The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by Changes and New Works License.
16
+
17
+ ## Notices
18
+
19
+ You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example:
20
+
21
+ Required Notice: Copyright Yoyodyne, Inc. (http://example.com)
22
+
23
+ ## Changes and New Works License
24
+
25
+ The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose.
26
+
27
+ ## Patent License
28
+
29
+ The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software.
30
+
31
+ ## Noncommercial Purposes
32
+
33
+ Any noncommercial purpose is a permitted purpose.
34
+
35
+ ## Personal Uses
36
+
37
+ Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose.
38
+
39
+ ## Noncommercial Organizations
40
+
41
+ Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding.
42
+
43
+ ## Fair Use
44
+
45
+ You may have "fair use" rights for the software under the law. These terms do not limit them.
46
+
47
+ ## No Other Rights
48
+
49
+ These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses.
50
+
51
+ ## Patent Defense
52
+
53
+ If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company.
54
+
55
+ ## Violations
56
+
57
+ The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately.
58
+
59
+ ## No Liability
60
+
61
+ As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.
62
+
63
+ ## Definitions
64
+
65
+ The licensor is the individual or entity offering these terms, and the software is the software the licensor makes available under these terms.
66
+
67
+ You refers to the individual or entity agreeing to these terms.
68
+
69
+ Your company is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. Control means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect.
70
+
71
+ Your licenses are all the licenses granted to you for the software under these terms.
72
+
73
+ Use means anything you do with the software requiring one of your licenses.
74
+
@@ -0,0 +1,9 @@
1
+ include CHANGELOG.md
2
+ include CONTRIBUTING.md
3
+ include NOTICE
4
+ include SECURITY.md
5
+ include requirements.txt
6
+ include chapter_map_example.json
7
+ recursive-include examples *.json
8
+ recursive-include tests *.py
9
+
@@ -0,0 +1,6 @@
1
+ Book Condenser is licensed under the PolyForm Noncommercial License 1.0.0.
2
+
3
+ Book Condenser processes user-provided source documents and may produce outputs containing substantial verbatim text from those documents. You are responsible for ensuring that you have the legal right to process each source document and to store, distribute, or otherwise use generated outputs.
4
+
5
+ This project does not provide legal advice. If you are unsure whether a use is allowed, consult a qualified professional before processing or sharing copyrighted material.
6
+
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: book-condenser
3
+ Version: 0.1.0
4
+ Summary: Read the essential book: create extractive abridgements that preserve the author's original passages.
5
+ Author: Khalid
6
+ License-Expression: LicenseRef-PolyForm-Noncommercial-1.0.0
7
+ Project-URL: Homepage, https://github.com/khalid/book-condenser
8
+ Project-URL: Repository, https://github.com/khalid/book-condenser
9
+ Project-URL: Issues, https://github.com/khalid/book-condenser/issues
10
+ Project-URL: License, https://polyformproject.org/licenses/noncommercial/1.0.0
11
+ Keywords: books,epub,pdf,abridgement,extractive,openai,cli
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: End Users/Desktop
15
+ Classifier: Natural Language :: English
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Text Processing
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: openai>=2.21.0
26
+ Requires-Dist: pydantic>=2.7.0
27
+ Requires-Dist: pymupdf>=1.24.0
28
+ Requires-Dist: python-docx>=1.1.0
29
+ Requires-Dist: reportlab>=4.2.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: build>=1.2.0; extra == "dev"
32
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
33
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
34
+ Requires-Dist: twine>=5.0.0; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ # Book Condenser
38
+
39
+ ## Read the Essential Book
40
+
41
+ Book Condenser creates an extractive abridgement of a nonfiction book. An AI model identifies the original passages that carry the book's central argument, evidence, concepts, turning points, and conclusions. The software then assembles those passages verbatim into a shorter, beautifully formatted reading edition.
42
+
43
+ This approach preserves what makes a serious book valuable: the author's reasoning, voice, and choice of evidence. Many nonfiction books develop their core ideas through repetition, extended examples, and supporting detail. By retaining the passages that do the essential intellectual work, Book Condenser makes the book more efficient to read while keeping the reader in direct contact with the original text.
44
+
45
+ The result is a condensed, tablet-friendly PDF designed for focused reading: shorter than the source, richer than a summary, and faithful to the author.
46
+
47
+ This tool is intended for books you own the rights to process, public-domain works, or other material you are legally allowed to transform and store. Generated outputs may contain substantial verbatim source text.
48
+
49
+ ## Features
50
+
51
+ - Supports EPUB, PDF, DOCX, TXT, and Markdown input.
52
+ - Validates parsing with `--parse-only` before making API calls.
53
+ - Preserves chronology and argument structure through subtype-aware selection rules.
54
+ - Protects broad coverage with `--coverage-mode all` and per-section concentration limits.
55
+ - Produces `reading_abridgement.pdf` as the primary reader-facing output.
56
+ - Writes audit artifacts so users can inspect selected passages, scores, coverage, and quality-control decisions.
57
+
58
+ ## Installation
59
+
60
+ From PyPI after release:
61
+
62
+ ```bash
63
+ pip install book-condenser
64
+ ```
65
+
66
+ For local development from a checkout:
67
+
68
+ ```bash
69
+ python -m venv .venv
70
+ source .venv/bin/activate
71
+ pip install -e ".[dev]"
72
+ ```
73
+
74
+ Set your OpenAI API key in the environment before running the full pipeline:
75
+
76
+ ```bash
77
+ export OPENAI_API_KEY="your-api-key-here"
78
+ ```
79
+
80
+ You can also set `OPENAI_MODEL`; otherwise the CLI defaults to `gpt-5-mini`.
81
+
82
+ ## Quick Start
83
+
84
+ Validate parsing before any API calls:
85
+
86
+ ```bash
87
+ book-condenser path/to/public-domain-book.epub \
88
+ --output-dir out/example \
89
+ --parse-only
90
+ ```
91
+
92
+ Review `out/example/parsed_structure_report.md`. Continue only if chapter and back-matter detection look plausible.
93
+
94
+ Generate a reading edition:
95
+
96
+ ```bash
97
+ book-condenser path/to/public-domain-book.epub \
98
+ --output-dir out/example \
99
+ --target-ratio 0.25 \
100
+ --coverage-mode all \
101
+ --chapter-max-share 0.08 \
102
+ --apply-qc
103
+ ```
104
+
105
+ For PDFs with unreliable bookmarks, provide a manual chapter map:
106
+
107
+ ```bash
108
+ book-condenser path/to/public-domain-book.pdf \
109
+ --chapter-map examples/chapter_map.json \
110
+ --output-dir out/example \
111
+ --parse-only
112
+ ```
113
+
114
+ The root `book_condenser.py` file is a compatibility launcher. Prefer the installed `book-condenser` command for normal use.
115
+
116
+ ## Key Controls
117
+
118
+ | Argument | Purpose | Default |
119
+ |---|---|---:|
120
+ | `--target-ratio` | Target proportion of source words retained | `0.25` |
121
+ | `--candidate-ratio` | Candidate pool before global pruning | `0.42` |
122
+ | `--coverage-mode` | Section coverage rule: `all`, `major`, or `none` | `all` |
123
+ | `--chapter-max-share` | Maximum nominal share of final text from one chapter | `0.08` |
124
+ | `--chapter-map` | Manual PDF section/page map when bookmarks are unreliable | none |
125
+ | `--parse-only` | Validate structure and cleanup without API calls | off |
126
+ | `--apply-qc` | Apply final model review within constraints | off |
127
+ | `--pdf-page-size` | `small-tablet`, `a5`, or `large-tablet` | `small-tablet` |
128
+ | `--pdf-font-size` | Body type size between 11 and 20 pt | `14.0` |
129
+ | `--pdf-font` | `auto`, `georgia`, `dejavu serif`, or `times` | `auto` |
130
+ | `--no-docx` | Skip optional DOCX output | off |
131
+
132
+ ## Outputs
133
+
134
+ ```text
135
+ out/example/
136
+ parsed_structure_report.md
137
+ book_metadata.json
138
+ book_paragraphs.jsonl
139
+ structural_overview.json
140
+ chapter_candidates/
141
+ scored_candidates.json
142
+ global_selection.json
143
+ quality_control.json
144
+ selection_audit.md
145
+ reading_abridgement.md
146
+ reading_abridgement.pdf
147
+ reading_abridgement.docx
148
+ ```
149
+
150
+ `reading_abridgement.pdf` is the primary reading edition. `selection_audit.md` records subtype classification, chapter balance, selected passage functions, scores, protected anchors, and locations.
151
+
152
+ Treat the entire output directory as private by default. It can contain verbatim source text, local paths, and model-generated analysis.
153
+
154
+ ## Manual Chapter Map Format
155
+
156
+ Pages are 1-indexed. `end_page` is optional; when omitted, the next section's `start_page - 1` is used.
157
+
158
+ ```json
159
+ [
160
+ {"title": "Prologue", "start_page": 1, "end_page": 8},
161
+ {"title": "Chapter One", "start_page": 9},
162
+ {"title": "Chapter Two", "start_page": 28},
163
+ {"title": "Bibliography", "start_page": 410}
164
+ ]
165
+ ```
166
+
167
+ Back matter headings are retained in the parse audit but excluded from selection and source-word budgeting.
168
+
169
+ ## Source Format Guidance
170
+
171
+ Prefer EPUB when available. PDFs may require a manual chapter map and inspection of the parse-only report. If a PDF is scanned or image-only, run OCR first.
172
+
173
+ The parser supports EPUB 2 `toc.ncx`, EPUB 3 navigation documents, semantic back-matter signals, anchored subsections, PDF bookmarks, visible-heading fallback, and common PDF text cleanup.
174
+
175
+ ## Cost and Privacy
176
+
177
+ Full runs send selected source excerpts and structural context to the configured OpenAI model. Use `--parse-only` to inspect local parsing before any API calls. Larger books, higher `--candidate-ratio`, and `--apply-qc` increase token usage and cost.
178
+
179
+ Do not process confidential, copyrighted, or sensitive books unless your API/provider settings and legal rights allow that use.
180
+
181
+ ## Development
182
+
183
+ Run checks locally:
184
+
185
+ ```bash
186
+ ruff check .
187
+ pytest
188
+ python -m build
189
+ twine check dist/*
190
+ ```
191
+
192
+ The package exposes `book-condenser` as a console script and `python -m book_condenser` as a module entry point.
193
+
194
+ ## Release Checklist
195
+
196
+ 1. Confirm the repository root is this project directory, not a parent home directory.
197
+ 2. Verify no `.env`, `books/`, `out/`, generated abridgements, or copyrighted fixtures are tracked.
198
+ 3. Run `ruff check .`, `pytest`, `python -m build`, and `twine check dist/*`.
199
+ 4. Configure PyPI trusted publishing for `khalidlabs/book-condenser` using the `Publish to PyPI` workflow.
200
+ 5. Publish a GitHub release or run the publish workflow manually after package install and CLI smoke tests pass.
201
+
202
+ ## License
203
+
204
+ Book Condenser is licensed under the [PolyForm Noncommercial License 1.0.0](LICENSE). Commercial use is not permitted by this license without a separate commercial license from the licensor.
205
+
206
+ ## Disclaimer
207
+
208
+ Book Condenser is provided as-is and does not provide legal advice. You are responsible for ensuring that your source material and generated outputs comply with copyright law, contract terms, platform policies, and any other obligations that apply to your use.
@@ -0,0 +1,172 @@
1
+ # Book Condenser
2
+
3
+ ## Read the Essential Book
4
+
5
+ Book Condenser creates an extractive abridgement of a nonfiction book. An AI model identifies the original passages that carry the book's central argument, evidence, concepts, turning points, and conclusions. The software then assembles those passages verbatim into a shorter, beautifully formatted reading edition.
6
+
7
+ This approach preserves what makes a serious book valuable: the author's reasoning, voice, and choice of evidence. Many nonfiction books develop their core ideas through repetition, extended examples, and supporting detail. By retaining the passages that do the essential intellectual work, Book Condenser makes the book more efficient to read while keeping the reader in direct contact with the original text.
8
+
9
+ The result is a condensed, tablet-friendly PDF designed for focused reading: shorter than the source, richer than a summary, and faithful to the author.
10
+
11
+ This tool is intended for books you own the rights to process, public-domain works, or other material you are legally allowed to transform and store. Generated outputs may contain substantial verbatim source text.
12
+
13
+ ## Features
14
+
15
+ - Supports EPUB, PDF, DOCX, TXT, and Markdown input.
16
+ - Validates parsing with `--parse-only` before making API calls.
17
+ - Preserves chronology and argument structure through subtype-aware selection rules.
18
+ - Protects broad coverage with `--coverage-mode all` and per-section concentration limits.
19
+ - Produces `reading_abridgement.pdf` as the primary reader-facing output.
20
+ - Writes audit artifacts so users can inspect selected passages, scores, coverage, and quality-control decisions.
21
+
22
+ ## Installation
23
+
24
+ From PyPI after release:
25
+
26
+ ```bash
27
+ pip install book-condenser
28
+ ```
29
+
30
+ For local development from a checkout:
31
+
32
+ ```bash
33
+ python -m venv .venv
34
+ source .venv/bin/activate
35
+ pip install -e ".[dev]"
36
+ ```
37
+
38
+ Set your OpenAI API key in the environment before running the full pipeline:
39
+
40
+ ```bash
41
+ export OPENAI_API_KEY="your-api-key-here"
42
+ ```
43
+
44
+ You can also set `OPENAI_MODEL`; otherwise the CLI defaults to `gpt-5-mini`.
45
+
46
+ ## Quick Start
47
+
48
+ Validate parsing before any API calls:
49
+
50
+ ```bash
51
+ book-condenser path/to/public-domain-book.epub \
52
+ --output-dir out/example \
53
+ --parse-only
54
+ ```
55
+
56
+ Review `out/example/parsed_structure_report.md`. Continue only if chapter and back-matter detection look plausible.
57
+
58
+ Generate a reading edition:
59
+
60
+ ```bash
61
+ book-condenser path/to/public-domain-book.epub \
62
+ --output-dir out/example \
63
+ --target-ratio 0.25 \
64
+ --coverage-mode all \
65
+ --chapter-max-share 0.08 \
66
+ --apply-qc
67
+ ```
68
+
69
+ For PDFs with unreliable bookmarks, provide a manual chapter map:
70
+
71
+ ```bash
72
+ book-condenser path/to/public-domain-book.pdf \
73
+ --chapter-map examples/chapter_map.json \
74
+ --output-dir out/example \
75
+ --parse-only
76
+ ```
77
+
78
+ The root `book_condenser.py` file is a compatibility launcher. Prefer the installed `book-condenser` command for normal use.
79
+
80
+ ## Key Controls
81
+
82
+ | Argument | Purpose | Default |
83
+ |---|---|---:|
84
+ | `--target-ratio` | Target proportion of source words retained | `0.25` |
85
+ | `--candidate-ratio` | Candidate pool before global pruning | `0.42` |
86
+ | `--coverage-mode` | Section coverage rule: `all`, `major`, or `none` | `all` |
87
+ | `--chapter-max-share` | Maximum nominal share of final text from one chapter | `0.08` |
88
+ | `--chapter-map` | Manual PDF section/page map when bookmarks are unreliable | none |
89
+ | `--parse-only` | Validate structure and cleanup without API calls | off |
90
+ | `--apply-qc` | Apply final model review within constraints | off |
91
+ | `--pdf-page-size` | `small-tablet`, `a5`, or `large-tablet` | `small-tablet` |
92
+ | `--pdf-font-size` | Body type size between 11 and 20 pt | `14.0` |
93
+ | `--pdf-font` | `auto`, `georgia`, `dejavu serif`, or `times` | `auto` |
94
+ | `--no-docx` | Skip optional DOCX output | off |
95
+
96
+ ## Outputs
97
+
98
+ ```text
99
+ out/example/
100
+ parsed_structure_report.md
101
+ book_metadata.json
102
+ book_paragraphs.jsonl
103
+ structural_overview.json
104
+ chapter_candidates/
105
+ scored_candidates.json
106
+ global_selection.json
107
+ quality_control.json
108
+ selection_audit.md
109
+ reading_abridgement.md
110
+ reading_abridgement.pdf
111
+ reading_abridgement.docx
112
+ ```
113
+
114
+ `reading_abridgement.pdf` is the primary reading edition. `selection_audit.md` records subtype classification, chapter balance, selected passage functions, scores, protected anchors, and locations.
115
+
116
+ Treat the entire output directory as private by default. It can contain verbatim source text, local paths, and model-generated analysis.
117
+
118
+ ## Manual Chapter Map Format
119
+
120
+ Pages are 1-indexed. `end_page` is optional; when omitted, the next section's `start_page - 1` is used.
121
+
122
+ ```json
123
+ [
124
+ {"title": "Prologue", "start_page": 1, "end_page": 8},
125
+ {"title": "Chapter One", "start_page": 9},
126
+ {"title": "Chapter Two", "start_page": 28},
127
+ {"title": "Bibliography", "start_page": 410}
128
+ ]
129
+ ```
130
+
131
+ Back matter headings are retained in the parse audit but excluded from selection and source-word budgeting.
132
+
133
+ ## Source Format Guidance
134
+
135
+ Prefer EPUB when available. PDFs may require a manual chapter map and inspection of the parse-only report. If a PDF is scanned or image-only, run OCR first.
136
+
137
+ The parser supports EPUB 2 `toc.ncx`, EPUB 3 navigation documents, semantic back-matter signals, anchored subsections, PDF bookmarks, visible-heading fallback, and common PDF text cleanup.
138
+
139
+ ## Cost and Privacy
140
+
141
+ Full runs send selected source excerpts and structural context to the configured OpenAI model. Use `--parse-only` to inspect local parsing before any API calls. Larger books, higher `--candidate-ratio`, and `--apply-qc` increase token usage and cost.
142
+
143
+ Do not process confidential, copyrighted, or sensitive books unless your API/provider settings and legal rights allow that use.
144
+
145
+ ## Development
146
+
147
+ Run checks locally:
148
+
149
+ ```bash
150
+ ruff check .
151
+ pytest
152
+ python -m build
153
+ twine check dist/*
154
+ ```
155
+
156
+ The package exposes `book-condenser` as a console script and `python -m book_condenser` as a module entry point.
157
+
158
+ ## Release Checklist
159
+
160
+ 1. Confirm the repository root is this project directory, not a parent home directory.
161
+ 2. Verify no `.env`, `books/`, `out/`, generated abridgements, or copyrighted fixtures are tracked.
162
+ 3. Run `ruff check .`, `pytest`, `python -m build`, and `twine check dist/*`.
163
+ 4. Configure PyPI trusted publishing for `khalidlabs/book-condenser` using the `Publish to PyPI` workflow.
164
+ 5. Publish a GitHub release or run the publish workflow manually after package install and CLI smoke tests pass.
165
+
166
+ ## License
167
+
168
+ Book Condenser is licensed under the [PolyForm Noncommercial License 1.0.0](LICENSE). Commercial use is not permitted by this license without a separate commercial license from the licensor.
169
+
170
+ ## Disclaimer
171
+
172
+ Book Condenser is provided as-is and does not provide legal advice. You are responsible for ensuring that your source material and generated outputs comply with copyright law, contract terms, platform policies, and any other obligations that apply to your use.
@@ -0,0 +1,16 @@
1
+ # Security Policy
2
+
3
+ ## Reporting Issues
4
+
5
+ Please report security issues privately to the project maintainer before opening a public issue. Include the affected version, a clear reproduction path, and any relevant logs with secrets removed.
6
+
7
+ ## Secrets
8
+
9
+ Book Condenser reads API credentials from environment variables such as `OPENAI_API_KEY`. Do not commit `.env` files, shell history, generated logs, or output artifacts containing credentials.
10
+
11
+ If an API key is exposed, revoke it with the provider immediately, create a replacement key, and remove the exposed value from the repository and any published history.
12
+
13
+ ## Generated Artifacts
14
+
15
+ Generated files may contain substantial verbatim text from source books and local filesystem paths. Treat `out/`, `books/`, and similar working directories as private unless you have verified that every file is safe to publish.
16
+
@@ -0,0 +1,6 @@
1
+ [
2
+ {"title": "Prologue", "start_page": 1, "end_page": 8},
3
+ {"title": "Chapter One", "start_page": 9},
4
+ {"title": "Chapter Two", "start_page": 28},
5
+ {"title": "List of Interviews and Written Correspondence", "start_page": 410}
6
+ ]
@@ -0,0 +1,7 @@
1
+ [
2
+ {"title": "Prologue", "start_page": 1, "end_page": 8},
3
+ {"title": "Chapter One", "start_page": 9},
4
+ {"title": "Chapter Two", "start_page": 28},
5
+ {"title": "Bibliography", "start_page": 410}
6
+ ]
7
+
@@ -0,0 +1,68 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "book-condenser"
7
+ version = "0.1.0"
8
+ description = "Read the essential book: create extractive abridgements that preserve the author's original passages."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "LicenseRef-PolyForm-Noncommercial-1.0.0"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ { name = "Khalid" }
15
+ ]
16
+ keywords = ["books", "epub", "pdf", "abridgement", "extractive", "openai", "cli"]
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Environment :: Console",
20
+ "Intended Audience :: End Users/Desktop",
21
+ "Natural Language :: English",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: Text Processing",
28
+ ]
29
+ dependencies = [
30
+ "openai>=2.21.0",
31
+ "pydantic>=2.7.0",
32
+ "pymupdf>=1.24.0",
33
+ "python-docx>=1.1.0",
34
+ "reportlab>=4.2.0",
35
+ ]
36
+
37
+ [project.optional-dependencies]
38
+ dev = [
39
+ "build>=1.2.0",
40
+ "pytest>=8.0.0",
41
+ "ruff>=0.8.0",
42
+ "twine>=5.0.0",
43
+ ]
44
+
45
+ [project.scripts]
46
+ book-condenser = "book_condenser.cli:main"
47
+
48
+ [project.urls]
49
+ Homepage = "https://github.com/khalid/book-condenser"
50
+ Repository = "https://github.com/khalid/book-condenser"
51
+ Issues = "https://github.com/khalid/book-condenser/issues"
52
+ License = "https://polyformproject.org/licenses/noncommercial/1.0.0"
53
+
54
+ [tool.setuptools.packages.find]
55
+ where = ["src"]
56
+
57
+ [tool.pytest.ini_options]
58
+ testpaths = ["tests"]
59
+ pythonpath = ["src"]
60
+
61
+ [tool.ruff]
62
+ line-length = 120
63
+ target-version = "py310"
64
+
65
+ [tool.ruff.lint]
66
+ select = ["E", "F", "I", "UP", "B"]
67
+ ignore = ["E501"]
68
+
@@ -0,0 +1,5 @@
1
+ openai>=2.21.0
2
+ pydantic>=2.7.0
3
+ pymupdf>=1.24.0
4
+ python-docx>=1.1.0
5
+ reportlab>=4.2.0
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ """Book Condenser package."""
2
+
3
+ from .core import __doc__ as __doc__
4
+ from .core import main
5
+
6
+ __version__ = "0.1.0"
7
+
8
+ __all__ = ["__version__", "main"]
9
+
@@ -0,0 +1,7 @@
1
+ """Run the Book Condenser command line interface."""
2
+
3
+ from .core import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
7
+
@@ -0,0 +1,6 @@
1
+ """Console-script entry point."""
2
+
3
+ from .core import main
4
+
5
+ __all__ = ["main"]
6
+