inkmd 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. inkmd-0.1.0/LICENSE +21 -0
  2. inkmd-0.1.0/PKG-INFO +293 -0
  3. inkmd-0.1.0/README.md +241 -0
  4. inkmd-0.1.0/pyproject.toml +50 -0
  5. inkmd-0.1.0/setup.cfg +4 -0
  6. inkmd-0.1.0/src/inkmd/__init__.py +59 -0
  7. inkmd-0.1.0/src/inkmd/__main__.py +4 -0
  8. inkmd-0.1.0/src/inkmd/_kerning_data.py +4659 -0
  9. inkmd-0.1.0/src/inkmd/ast.py +172 -0
  10. inkmd-0.1.0/src/inkmd/cli.py +75 -0
  11. inkmd-0.1.0/src/inkmd/fonts.py +461 -0
  12. inkmd-0.1.0/src/inkmd/layout.py +844 -0
  13. inkmd-0.1.0/src/inkmd/parser.py +1727 -0
  14. inkmd-0.1.0/src/inkmd/pdf.py +557 -0
  15. inkmd-0.1.0/src/inkmd/render.py +800 -0
  16. inkmd-0.1.0/src/inkmd.egg-info/PKG-INFO +293 -0
  17. inkmd-0.1.0/src/inkmd.egg-info/SOURCES.txt +43 -0
  18. inkmd-0.1.0/src/inkmd.egg-info/dependency_links.txt +1 -0
  19. inkmd-0.1.0/src/inkmd.egg-info/entry_points.txt +2 -0
  20. inkmd-0.1.0/src/inkmd.egg-info/requires.txt +3 -0
  21. inkmd-0.1.0/src/inkmd.egg-info/top_level.txt +1 -0
  22. inkmd-0.1.0/tests/test_autolinks.py +321 -0
  23. inkmd-0.1.0/tests/test_blockquotes_codeblocks.py +322 -0
  24. inkmd-0.1.0/tests/test_cli.py +145 -0
  25. inkmd-0.1.0/tests/test_code_wrap.py +102 -0
  26. inkmd-0.1.0/tests/test_commonmark_inline.py +274 -0
  27. inkmd-0.1.0/tests/test_compile.py +241 -0
  28. inkmd-0.1.0/tests/test_fonts.py +157 -0
  29. inkmd-0.1.0/tests/test_headings.py +267 -0
  30. inkmd-0.1.0/tests/test_inline_parser.py +155 -0
  31. inkmd-0.1.0/tests/test_kerning.py +173 -0
  32. inkmd-0.1.0/tests/test_layout.py +136 -0
  33. inkmd-0.1.0/tests/test_links.py +258 -0
  34. inkmd-0.1.0/tests/test_lists.py +334 -0
  35. inkmd-0.1.0/tests/test_parser.py +117 -0
  36. inkmd-0.1.0/tests/test_pdf_emission.py +204 -0
  37. inkmd-0.1.0/tests/test_render.py +141 -0
  38. inkmd-0.1.0/tests/test_strikethrough.py +194 -0
  39. inkmd-0.1.0/tests/test_styled_layout.py +186 -0
  40. inkmd-0.1.0/tests/test_styled_pdf.py +201 -0
  41. inkmd-0.1.0/tests/test_tables.py +333 -0
  42. inkmd-0.1.0/tests/test_text_pdf.py +185 -0
  43. inkmd-0.1.0/tests/test_thematic_break.py +137 -0
  44. inkmd-0.1.0/tests/test_times.py +127 -0
  45. inkmd-0.1.0/tests/test_zipapp.py +82 -0
inkmd-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dylan Moir
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
inkmd-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,293 @@
1
+ Metadata-Version: 2.4
2
+ Name: inkmd
3
+ Version: 0.1.0
4
+ Summary: Pure-Python markdown to PDF compiler. Zero system dependencies. Deterministic.
5
+ Author: Dylan Moir
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Dylan Moir
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/eagredev/inkmd
29
+ Project-URL: Repository, https://github.com/eagredev/inkmd
30
+ Project-URL: Issues, https://github.com/eagredev/inkmd/issues
31
+ Keywords: markdown,pdf,compiler,deterministic,zero-dependencies
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3 :: Only
38
+ Classifier: Programming Language :: Python :: 3.9
39
+ Classifier: Programming Language :: Python :: 3.10
40
+ Classifier: Programming Language :: Python :: 3.11
41
+ Classifier: Programming Language :: Python :: 3.12
42
+ Classifier: Programming Language :: Python :: 3.13
43
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
44
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
45
+ Classifier: Topic :: Printing
46
+ Requires-Python: >=3.9
47
+ Description-Content-Type: text/markdown
48
+ License-File: LICENSE
49
+ Provides-Extra: dev
50
+ Requires-Dist: pytest>=7; extra == "dev"
51
+ Dynamic: license-file
52
+
53
+ # inkmd
54
+
55
+ **Markdown to PDF, pure Python, zero dependencies. MIT-licensed. Deterministic.**
56
+
57
+ ```sh
58
+ pip install inkmd
59
+ inkmd in.md -o out.pdf
60
+ ```
61
+
62
+ That's the whole install. No system packages, no fonts to install, no Chrome binary, no `apt-get`. Works the same on macOS, Linux, Windows, Alpine, AWS Lambda, a locked-down CI runner, or a Steam Deck.
63
+
64
+ <p align="center">
65
+ <img src="docs/images/hero-sample.png" alt="A quarterly report rendered by inkmd, showing headings, a styled paragraph with strikethrough, a blockquote, a right-aligned table with tinted header, a bulleted list, and a fenced Python code block with a grey background." width="640">
66
+ <br>
67
+ <em><a href="examples/hero-sample.md">examples/hero-sample.md</a> rendered through inkmd: headings, inline styles, strikethrough, blockquote, GFM table, list, fenced code, autolinked URL and email all in one page.</em>
68
+ <br>
69
+ <em>See also <a href="examples/inkmd-brief.md">examples/inkmd-brief.md</a>, a two-page project brief written in inkmd-renderable markdown.</em>
70
+ </p>
71
+
72
+ ## What you get
73
+
74
+ - **A single pure-Python wheel.** No native extensions, no system libraries. Installs in under a second.
75
+ - **Faithful CommonMark plus the parts of GFM people actually use:** tables, autolinks, strikethrough, fenced code with language tags. The [supported features](#supported-markdown) section has the full matrix.
76
+ - **PDFs that look right.** Real AFM-driven kerning emitted via TJ arrays, clickable links, tinted code-block backgrounds, blockquote rules that stack for nested quotes, table alignment, headings that breathe.
77
+ - **Byte-identical output for the same input.** No clocks, no random IDs. Useful for version control, signed PDFs, audit trails, reproducible CI.
78
+ - **Two layers of API:** a CLI and a `compile()` / `render_file()` library function. The whole public surface is two functions.
79
+
80
+ ## Why this exists
81
+
82
+ Markdown to PDF is a solved problem in theory and a minefield in practice. Every other tool brings heavy system dependencies that don't survive the trip into an Alpine container, a Lambda function, or a Windows machine without admin rights.
83
+
84
+ | Tool | What goes wrong |
85
+ |------|-----------------|
86
+ | **wkhtmltopdf** | Deprecated since 2023. Unpatched CVEs. |
87
+ | **Chrome headless / Puppeteer** | 200MB+ install. 5 to 15s cold-start latency. |
88
+ | **WeasyPrint** | Needs Pango, cairo, GObject (350 to 550MB of system packages). Breaks on Alpine and Windows. |
89
+ | **Pandoc + LaTeX** | 3GB texlive install. |
90
+ | **PyMuPDF-based tools** | Don't build on Alpine musl. |
91
+ | **`borb`** | AGPL, so unusable in closed-source or commercial projects without a paid licence. |
92
+
93
+ `inkmd` runs anywhere Python runs. It's the markdown-to-PDF compiler you'd write yourself with a free weekend if you didn't want to take a dependency on a browser.
94
+
95
+ ## Use cases
96
+
97
+ - **CI documentation pipelines.** Compile READMEs, release notes, or changelogs to PDF as a build artefact, in a stripped-down container, without `apt-get`.
98
+ - **Agent-generated documents.** LLM agents that need to deliver a PDF (CVs, reports, summaries) can call `inkmd.compile()` directly. No subprocess, no shell-out, no Chrome.
99
+ - **Reproducible audit trails.** Hash the markdown, hash the PDF, and the same input gives the same output bytes. Useful for compliance, signed reports, version-controlled docs.
100
+ - **Serverless rendering.** Lambda plus zero system dependencies equals a PDF endpoint that cold-starts in well under a second.
101
+ - **Restricted environments.** Locked-down CI runners, embedded hardware, anywhere installing a 200MB browser isn't an option.
102
+
103
+ ## Status
104
+
105
+ **v0.1, feature-complete, MIT-licensed.** 501 tests across 24 files. Stdlib-only, Python 3.9+. Byte-deterministic output. The [torture test](examples/torture-test.md) covers everything `inkmd` can render.
106
+
107
+ ## Install
108
+
109
+ From PyPI:
110
+
111
+ ```sh
112
+ pip install inkmd
113
+ ```
114
+
115
+ Or grab the single-file zipapp (no `pip` install required). Each tagged release attaches an `inkmd.pyz` of around 300 KB that you can drop anywhere Python 3.9+ is available:
116
+
117
+ ```sh
118
+ curl -L -o inkmd.pyz https://github.com/eagredev/inkmd/releases/latest/download/inkmd.pyz
119
+ python inkmd.pyz in.md -o out.pdf
120
+ ```
121
+
122
+ Or build it yourself from a checkout:
123
+
124
+ ```sh
125
+ python scripts/build_zipapp.py # produces dist/inkmd.pyz
126
+ ```
127
+
128
+ ## Usage
129
+
130
+ ### CLI
131
+
132
+ ```sh
133
+ inkmd in.md -o out.pdf # file in, file out
134
+ inkmd in.md > out.pdf # file in, stdout out
135
+ inkmd < in.md > out.pdf # stdin in, stdout out
136
+ inkmd in.md -o out.pdf --page-size A4 --family times
137
+ inkmd in.md -o out.pdf --no-autolinks
138
+ inkmd --version
139
+ ```
140
+
141
+ ### Library
142
+
143
+ ```python
144
+ import inkmd
145
+
146
+ # Compile markdown text to PDF bytes
147
+ pdf_bytes = inkmd.compile(md_text)
148
+
149
+ # Or convert files directly
150
+ inkmd.render_file("in.md", "out.pdf")
151
+
152
+ # Options (same on both functions)
153
+ pdf_bytes = inkmd.compile(
154
+ md_text,
155
+ page_size="A4", # or "letter" (default)
156
+ family="times", # or "helvetica" (default)
157
+ autolinks=False, # opt out of GFM bare-URL/email detection
158
+ )
159
+ ```
160
+
161
+ The public API is intentionally narrow: two functions, no classes to instantiate, no state to manage. The CLI is a thin argparse wrapper around `compile()`.
162
+
163
+ ## Supported markdown
164
+
165
+ ### CommonMark
166
+
167
+ | Feature | inkmd |
168
+ |---------|:---:|
169
+ | Paragraphs with line wrapping | Yes |
170
+ | ATX headings (`#` to `######`) | Yes |
171
+ | Setext headings (`===` / `---`) | Yes |
172
+ | Ordered lists, arbitrary `start` | Yes |
173
+ | Unordered lists (`-` / `*` / `+`) | Yes |
174
+ | Nested lists, mixed marker types | Yes |
175
+ | Tight vs. loose list detection | Yes |
176
+ | Blockquotes | Yes |
177
+ | Nested and multi-paragraph blockquotes | Yes |
178
+ | Blockquotes wrapping any block type | Yes |
179
+ | Fenced code blocks | Yes |
180
+ | Code block language tag (info string) | Yes |
181
+ | Indented code blocks | Yes |
182
+ | Code spans (`` `code` ``) | Yes |
183
+ | Emphasis (`*`, `_`) | Yes |
184
+ | Strong emphasis (`**`, `__`) | Yes |
185
+ | Triple `***` becomes nested italic-bold | Yes |
186
+ | Rule of 3 plus intraword-underscore | Yes |
187
+ | Backslash escapes | Yes |
188
+ | Thematic breaks | Yes |
189
+ | Inline links `[text](url)` | Yes |
190
+ | Inline link titles | Yes |
191
+ | Angle-bracket autolinks `<url>` | Yes |
192
+ | Images `![](...)` | v0.2 |
193
+ | Reference-style links | v0.2 |
194
+ | HTML blocks / inline HTML | not planned |
195
+
196
+ ### GFM extensions
197
+
198
+ | Feature | inkmd |
199
+ |---------|:---:|
200
+ | Pipe tables | Yes |
201
+ | Table column alignments | Yes |
202
+ | Bare URL autolinks (`https://...`, `www....`) | Yes |
203
+ | Bare host autolinks (`host.tld/path`) | Yes |
204
+ | Email autolinks | Yes |
205
+ | Strikethrough `~~text~~` | Yes |
206
+ | Task lists `- [ ]` / `- [x]` | v0.2 |
207
+
208
+ ### Visual output
209
+
210
+ - Clickable PDF `/Link` annotations on every URL, inline links and autolinks alike.
211
+ - Blue underlined link text.
212
+ - Light-grey background tint behind fenced code blocks.
213
+ - Thin grey vertical rules for blockquotes. Stacked side-by-side for nested quotes.
214
+ - Tinted table headers with full grid borders and per-column alignment.
215
+ - AFM-correct kerning emitted via TJ arrays (Helvetica and Times both fully kerned).
216
+ - Strikethrough drawn as a thin horizontal bar at glyph mid-height.
217
+
218
+ ### Typography
219
+
220
+ - Helvetica family (default) or Times family. Code uses Courier.
221
+ - Standard PDF letter and A4 page sizes.
222
+ - WinAnsi character encoding: em-dash, en-dash, curly quotes, ellipsis, most Western European glyphs.
223
+ - Codepoints outside WinAnsi (CJK, Cyrillic, emoji, most non-Latin scripts) render as `?` in v0.1. v0.2 lifts this with font embedding.
224
+
225
+ ## Determinism
226
+
227
+ `inkmd` produces **byte-identical** PDF output for the same markdown input on every platform, every Python version, every run. No real-time clocks, no random IDs, no platform-dependent iteration order.
228
+
229
+ If you hash the markdown and the PDF, the relationship is stable forever. Useful for version-controlled documents, signed/hashed PDFs, reproducible CI builds, and audit trails.
230
+
231
+ ## What `inkmd` doesn't do yet
232
+
233
+ | Feature | When | Why |
234
+ |---------|------|-----|
235
+ | Images | v0.2 | Needs decoding plus embedding logic; out of scope for v0.1 |
236
+ | TTF / OTF font embedding | v0.2 | v0.1 uses PDF's 14 base fonts. Tiny output, no font files to ship, but limits codepoints to WinAnsi |
237
+ | Task lists | v0.2 | GFM extension; needs list-marker prefix scan |
238
+ | Headers, footers, page numbers | v0.2 | Needs a per-page chrome system |
239
+ | Page-splitting for oversized tables | v0.2 | Tables currently place atomically and overflow if taller than a page |
240
+ | Tables inside blockquotes | v0.2 | Table detection runs at document level only |
241
+ | Tagged PDF / PDF/UA accessibility | v0.3+ | Under consideration |
242
+ | PDF/A archival format | n/a | Not planned |
243
+ | Math (LaTeX-style) | n/a | Out of scope. Use Pandoc + LaTeX. |
244
+ | HTML passthrough | n/a | Out of scope by design. `inkmd` is markdown to PDF, not HTML to PDF. |
245
+ | Themes / CSS | n/a | Out of scope. Markdown's value is its constraints. |
246
+
247
+ ## How it works
248
+
249
+ Four layers, each strictly above the previous:
250
+
251
+ 1. **`parser`** is a single-pass container-aware block parser plus a CommonMark inline tokeniser. Produces a frozen-dataclass AST.
252
+ 2. **`render`** lowers AST blocks to `RenderedBlock` records with runs, spacing, indent, decorations. Carries font and link state through inline nesting.
253
+ 3. **`layout`** wraps runs into pages, positions each `PositionedRun` against the page coordinate system, emits background rectangles for code blocks, vertical rules for blockquotes, underline plus annotation pairs for links, and bars for strikethrough.
254
+ 4. **`pdf`** serialises pages into PDF bytes. Text via `Tj`/`TJ`-with-kerning, graphics via `rg`/`re`/`f`, link annotations via per-page `/Annots` arrays.
255
+
256
+ No layer imports a higher one. The whole pipeline is around 3,500 lines of pure-Python logic plus 4,700 lines of generated AFM kerning tables. That's it. For a deeper walk-through (the emphasis algorithm, AFM kerning, determinism mechanics), see [`docs/internals.md`](docs/internals.md). The complexity profile is in [`LIZARD-AUDIT.md`](LIZARD-AUDIT.md).
257
+
258
+ <details>
259
+ <summary><strong>A note on font rendering in v0.1</strong></summary>
260
+
261
+ `inkmd` v0.1 uses PDF's **14 base fonts** (Helvetica, Times, Courier, Symbol, ZapfDingbats and their variants). These are spec-mandated to be available in every conforming PDF reader, so we don't ship any font files. The output stays tiny and dependency-free.
262
+
263
+ The trade-off is that the *actual rendering* depends on which Helvetica (or Times, etc.) the reader's system provides:
264
+
265
+ - **macOS** ships Helvetica Neue (real Helvetica). Renders as designed.
266
+ - **Windows** with Adobe Reader ships real Helvetica. Renders as designed.
267
+ - **Linux** typically substitutes Nimbus Sans (URW++'s free Helvetica clone). Renders very similarly but with slightly different side bearings, so spacing between glyphs can look subtly different.
268
+ - **Mobile** (iOS / Android) ships system Helvetica or Roboto variants. Mostly fine.
269
+
270
+ The advance widths are correct everywhere (PDF readers honour the AFM-published metrics), so layout (page breaks, line wrapping, paragraph flow) is identical across systems. What varies is the precise glyph shape *within* each advance-width box, which can produce slightly different visual spacing.
271
+
272
+ For most use cases this is fine. If you need pixel-identical rendering across every system (signed or archival documents, for example), wait for **v0.2 font embedding**, which will bundle font outlines inside each PDF.
273
+
274
+ </details>
275
+
276
+ ## Roadmap
277
+
278
+ - **v0.1**: Core CommonMark + GFM subset, library + CLI, MIT, deterministic. **Shipped.**
279
+ - **v0.2**: Font embedding (full Unicode), images, task lists, headers/footers/page numbers, page-splitting for oversized tables, tables-in-blockquotes.
280
+ - **v0.3**: Tagged PDF, accessibility, TOC generation, cross-references.
281
+ - **post-v1.0**: Optimisations, additional page sizes, PDF/A consideration.
282
+
283
+ ## Licence
284
+
285
+ MIT. See [LICENSE](LICENSE).
286
+
287
+ ## Acknowledgements
288
+
289
+ The 14 standard PDF fonts and their AFM metric files are public-domain artefacts published by Adobe ([adobe-type-tools/Core14_AFMs](https://github.com/adobe-type-tools/Core14_AFMs)). PDF format reference: ISO 32000-1.
290
+
291
+ ## About
292
+
293
+ Built by [Dylan Moir](https://www.linkedin.com/in/dylanmoir/) with Claude as a pair-programming collaborator. If `inkmd` saves you a fight with WeasyPrint or a 200 MB Chrome install in your CI, a star on the repo is plenty.
inkmd-0.1.0/README.md ADDED
@@ -0,0 +1,241 @@
1
+ # inkmd
2
+
3
+ **Markdown to PDF, pure Python, zero dependencies. MIT-licensed. Deterministic.**
4
+
5
+ ```sh
6
+ pip install inkmd
7
+ inkmd in.md -o out.pdf
8
+ ```
9
+
10
+ That's the whole install. No system packages, no fonts to install, no Chrome binary, no `apt-get`. Works the same on macOS, Linux, Windows, Alpine, AWS Lambda, a locked-down CI runner, or a Steam Deck.
11
+
12
+ <p align="center">
13
+ <img src="docs/images/hero-sample.png" alt="A quarterly report rendered by inkmd, showing headings, a styled paragraph with strikethrough, a blockquote, a right-aligned table with tinted header, a bulleted list, and a fenced Python code block with a grey background." width="640">
14
+ <br>
15
+ <em><a href="examples/hero-sample.md">examples/hero-sample.md</a> rendered through inkmd: headings, inline styles, strikethrough, blockquote, GFM table, list, fenced code, autolinked URL and email all in one page.</em>
16
+ <br>
17
+ <em>See also <a href="examples/inkmd-brief.md">examples/inkmd-brief.md</a>, a two-page project brief written in inkmd-renderable markdown.</em>
18
+ </p>
19
+
20
+ ## What you get
21
+
22
+ - **A single pure-Python wheel.** No native extensions, no system libraries. Installs in under a second.
23
+ - **Faithful CommonMark plus the parts of GFM people actually use:** tables, autolinks, strikethrough, fenced code with language tags. The [supported features](#supported-markdown) section has the full matrix.
24
+ - **PDFs that look right.** Real AFM-driven kerning emitted via TJ arrays, clickable links, tinted code-block backgrounds, blockquote rules that stack for nested quotes, table alignment, headings that breathe.
25
+ - **Byte-identical output for the same input.** No clocks, no random IDs. Useful for version control, signed PDFs, audit trails, reproducible CI.
26
+ - **Two layers of API:** a CLI and a `compile()` / `render_file()` library function. The whole public surface is two functions.
27
+
28
+ ## Why this exists
29
+
30
+ Markdown to PDF is a solved problem in theory and a minefield in practice. Every other tool brings heavy system dependencies that don't survive the trip into an Alpine container, a Lambda function, or a Windows machine without admin rights.
31
+
32
+ | Tool | What goes wrong |
33
+ |------|-----------------|
34
+ | **wkhtmltopdf** | Deprecated since 2023. Unpatched CVEs. |
35
+ | **Chrome headless / Puppeteer** | 200MB+ install. 5 to 15s cold-start latency. |
36
+ | **WeasyPrint** | Needs Pango, cairo, GObject (350 to 550MB of system packages). Breaks on Alpine and Windows. |
37
+ | **Pandoc + LaTeX** | 3GB texlive install. |
38
+ | **PyMuPDF-based tools** | Don't build on Alpine musl. |
39
+ | **`borb`** | AGPL, so unusable in closed-source or commercial projects without a paid licence. |
40
+
41
+ `inkmd` runs anywhere Python runs. It's the markdown-to-PDF compiler you'd write yourself with a free weekend if you didn't want to take a dependency on a browser.
42
+
43
+ ## Use cases
44
+
45
+ - **CI documentation pipelines.** Compile READMEs, release notes, or changelogs to PDF as a build artefact, in a stripped-down container, without `apt-get`.
46
+ - **Agent-generated documents.** LLM agents that need to deliver a PDF (CVs, reports, summaries) can call `inkmd.compile()` directly. No subprocess, no shell-out, no Chrome.
47
+ - **Reproducible audit trails.** Hash the markdown, hash the PDF, and the same input gives the same output bytes. Useful for compliance, signed reports, version-controlled docs.
48
+ - **Serverless rendering.** Lambda plus zero system dependencies equals a PDF endpoint that cold-starts in well under a second.
49
+ - **Restricted environments.** Locked-down CI runners, embedded hardware, anywhere installing a 200MB browser isn't an option.
50
+
51
+ ## Status
52
+
53
+ **v0.1, feature-complete, MIT-licensed.** 501 tests across 24 files. Stdlib-only, Python 3.9+. Byte-deterministic output. The [torture test](examples/torture-test.md) covers everything `inkmd` can render.
54
+
55
+ ## Install
56
+
57
+ From PyPI:
58
+
59
+ ```sh
60
+ pip install inkmd
61
+ ```
62
+
63
+ Or grab the single-file zipapp (no `pip` install required). Each tagged release attaches an `inkmd.pyz` of around 300 KB that you can drop anywhere Python 3.9+ is available:
64
+
65
+ ```sh
66
+ curl -L -o inkmd.pyz https://github.com/eagredev/inkmd/releases/latest/download/inkmd.pyz
67
+ python inkmd.pyz in.md -o out.pdf
68
+ ```
69
+
70
+ Or build it yourself from a checkout:
71
+
72
+ ```sh
73
+ python scripts/build_zipapp.py # produces dist/inkmd.pyz
74
+ ```
75
+
76
+ ## Usage
77
+
78
+ ### CLI
79
+
80
+ ```sh
81
+ inkmd in.md -o out.pdf # file in, file out
82
+ inkmd in.md > out.pdf # file in, stdout out
83
+ inkmd < in.md > out.pdf # stdin in, stdout out
84
+ inkmd in.md -o out.pdf --page-size A4 --family times
85
+ inkmd in.md -o out.pdf --no-autolinks
86
+ inkmd --version
87
+ ```
88
+
89
+ ### Library
90
+
91
+ ```python
92
+ import inkmd
93
+
94
+ # Compile markdown text to PDF bytes
95
+ pdf_bytes = inkmd.compile(md_text)
96
+
97
+ # Or convert files directly
98
+ inkmd.render_file("in.md", "out.pdf")
99
+
100
+ # Options (same on both functions)
101
+ pdf_bytes = inkmd.compile(
102
+ md_text,
103
+ page_size="A4", # or "letter" (default)
104
+ family="times", # or "helvetica" (default)
105
+ autolinks=False, # opt out of GFM bare-URL/email detection
106
+ )
107
+ ```
108
+
109
+ The public API is intentionally narrow: two functions, no classes to instantiate, no state to manage. The CLI is a thin argparse wrapper around `compile()`.
110
+
111
+ ## Supported markdown
112
+
113
+ ### CommonMark
114
+
115
+ | Feature | inkmd |
116
+ |---------|:---:|
117
+ | Paragraphs with line wrapping | Yes |
118
+ | ATX headings (`#` to `######`) | Yes |
119
+ | Setext headings (`===` / `---`) | Yes |
120
+ | Ordered lists, arbitrary `start` | Yes |
121
+ | Unordered lists (`-` / `*` / `+`) | Yes |
122
+ | Nested lists, mixed marker types | Yes |
123
+ | Tight vs. loose list detection | Yes |
124
+ | Blockquotes | Yes |
125
+ | Nested and multi-paragraph blockquotes | Yes |
126
+ | Blockquotes wrapping any block type | Yes |
127
+ | Fenced code blocks | Yes |
128
+ | Code block language tag (info string) | Yes |
129
+ | Indented code blocks | Yes |
130
+ | Code spans (`` `code` ``) | Yes |
131
+ | Emphasis (`*`, `_`) | Yes |
132
+ | Strong emphasis (`**`, `__`) | Yes |
133
+ | Triple `***` becomes nested italic-bold | Yes |
134
+ | Rule of 3 plus intraword-underscore | Yes |
135
+ | Backslash escapes | Yes |
136
+ | Thematic breaks | Yes |
137
+ | Inline links `[text](url)` | Yes |
138
+ | Inline link titles | Yes |
139
+ | Angle-bracket autolinks `<url>` | Yes |
140
+ | Images `![](...)` | v0.2 |
141
+ | Reference-style links | v0.2 |
142
+ | HTML blocks / inline HTML | not planned |
143
+
144
+ ### GFM extensions
145
+
146
+ | Feature | inkmd |
147
+ |---------|:---:|
148
+ | Pipe tables | Yes |
149
+ | Table column alignments | Yes |
150
+ | Bare URL autolinks (`https://...`, `www....`) | Yes |
151
+ | Bare host autolinks (`host.tld/path`) | Yes |
152
+ | Email autolinks | Yes |
153
+ | Strikethrough `~~text~~` | Yes |
154
+ | Task lists `- [ ]` / `- [x]` | v0.2 |
155
+
156
+ ### Visual output
157
+
158
+ - Clickable PDF `/Link` annotations on every URL, inline links and autolinks alike.
159
+ - Blue underlined link text.
160
+ - Light-grey background tint behind fenced code blocks.
161
+ - Thin grey vertical rules for blockquotes. Stacked side-by-side for nested quotes.
162
+ - Tinted table headers with full grid borders and per-column alignment.
163
+ - AFM-correct kerning emitted via TJ arrays (Helvetica and Times both fully kerned).
164
+ - Strikethrough drawn as a thin horizontal bar at glyph mid-height.
165
+
166
+ ### Typography
167
+
168
+ - Helvetica family (default) or Times family. Code uses Courier.
169
+ - Standard PDF letter and A4 page sizes.
170
+ - WinAnsi character encoding: em-dash, en-dash, curly quotes, ellipsis, most Western European glyphs.
171
+ - Codepoints outside WinAnsi (CJK, Cyrillic, emoji, most non-Latin scripts) render as `?` in v0.1. v0.2 lifts this with font embedding.
172
+
173
+ ## Determinism
174
+
175
+ `inkmd` produces **byte-identical** PDF output for the same markdown input on every platform, every Python version, every run. No real-time clocks, no random IDs, no platform-dependent iteration order.
176
+
177
+ If you hash the markdown and the PDF, the relationship is stable forever. Useful for version-controlled documents, signed/hashed PDFs, reproducible CI builds, and audit trails.
178
+
179
+ ## What `inkmd` doesn't do yet
180
+
181
+ | Feature | When | Why |
182
+ |---------|------|-----|
183
+ | Images | v0.2 | Needs decoding plus embedding logic; out of scope for v0.1 |
184
+ | TTF / OTF font embedding | v0.2 | v0.1 uses PDF's 14 base fonts. Tiny output, no font files to ship, but limits codepoints to WinAnsi |
185
+ | Task lists | v0.2 | GFM extension; needs list-marker prefix scan |
186
+ | Headers, footers, page numbers | v0.2 | Needs a per-page chrome system |
187
+ | Page-splitting for oversized tables | v0.2 | Tables currently place atomically and overflow if taller than a page |
188
+ | Tables inside blockquotes | v0.2 | Table detection runs at document level only |
189
+ | Tagged PDF / PDF/UA accessibility | v0.3+ | Under consideration |
190
+ | PDF/A archival format | n/a | Not planned |
191
+ | Math (LaTeX-style) | n/a | Out of scope. Use Pandoc + LaTeX. |
192
+ | HTML passthrough | n/a | Out of scope by design. `inkmd` is markdown to PDF, not HTML to PDF. |
193
+ | Themes / CSS | n/a | Out of scope. Markdown's value is its constraints. |
194
+
195
+ ## How it works
196
+
197
+ Four layers, each strictly above the previous:
198
+
199
+ 1. **`parser`** is a single-pass container-aware block parser plus a CommonMark inline tokeniser. Produces a frozen-dataclass AST.
200
+ 2. **`render`** lowers AST blocks to `RenderedBlock` records with runs, spacing, indent, decorations. Carries font and link state through inline nesting.
201
+ 3. **`layout`** wraps runs into pages, positions each `PositionedRun` against the page coordinate system, emits background rectangles for code blocks, vertical rules for blockquotes, underline plus annotation pairs for links, and bars for strikethrough.
202
+ 4. **`pdf`** serialises pages into PDF bytes. Text via `Tj`/`TJ`-with-kerning, graphics via `rg`/`re`/`f`, link annotations via per-page `/Annots` arrays.
203
+
204
+ No layer imports a higher one. The whole pipeline is around 3,500 lines of pure-Python logic plus 4,700 lines of generated AFM kerning tables. That's it. For a deeper walk-through (the emphasis algorithm, AFM kerning, determinism mechanics), see [`docs/internals.md`](docs/internals.md). The complexity profile is in [`LIZARD-AUDIT.md`](LIZARD-AUDIT.md).
205
+
206
+ <details>
207
+ <summary><strong>A note on font rendering in v0.1</strong></summary>
208
+
209
+ `inkmd` v0.1 uses PDF's **14 base fonts** (Helvetica, Times, Courier, Symbol, ZapfDingbats and their variants). These are spec-mandated to be available in every conforming PDF reader, so we don't ship any font files. The output stays tiny and dependency-free.
210
+
211
+ The trade-off is that the *actual rendering* depends on which Helvetica (or Times, etc.) the reader's system provides:
212
+
213
+ - **macOS** ships Helvetica Neue (real Helvetica). Renders as designed.
214
+ - **Windows** with Adobe Reader ships real Helvetica. Renders as designed.
215
+ - **Linux** typically substitutes Nimbus Sans (URW++'s free Helvetica clone). Renders very similarly but with slightly different side bearings, so spacing between glyphs can look subtly different.
216
+ - **Mobile** (iOS / Android) ships system Helvetica or Roboto variants. Mostly fine.
217
+
218
+ The advance widths are correct everywhere (PDF readers honour the AFM-published metrics), so layout (page breaks, line wrapping, paragraph flow) is identical across systems. What varies is the precise glyph shape *within* each advance-width box, which can produce slightly different visual spacing.
219
+
220
+ For most use cases this is fine. If you need pixel-identical rendering across every system (signed or archival documents, for example), wait for **v0.2 font embedding**, which will bundle font outlines inside each PDF.
221
+
222
+ </details>
223
+
224
+ ## Roadmap
225
+
226
+ - **v0.1**: Core CommonMark + GFM subset, library + CLI, MIT, deterministic. **Shipped.**
227
+ - **v0.2**: Font embedding (full Unicode), images, task lists, headers/footers/page numbers, page-splitting for oversized tables, tables-in-blockquotes.
228
+ - **v0.3**: Tagged PDF, accessibility, TOC generation, cross-references.
229
+ - **post-v1.0**: Optimisations, additional page sizes, PDF/A consideration.
230
+
231
+ ## Licence
232
+
233
+ MIT. See [LICENSE](LICENSE).
234
+
235
+ ## Acknowledgements
236
+
237
+ The 14 standard PDF fonts and their AFM metric files are public-domain artefacts published by Adobe ([adobe-type-tools/Core14_AFMs](https://github.com/adobe-type-tools/Core14_AFMs)). PDF format reference: ISO 32000-1.
238
+
239
+ ## About
240
+
241
+ Built by [Dylan Moir](https://www.linkedin.com/in/dylanmoir/) with Claude as a pair-programming collaborator. If `inkmd` saves you a fight with WeasyPrint or a 200 MB Chrome install in your CI, a star on the repo is plenty.
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "inkmd"
7
+ version = "0.1.0"
8
+ description = "Pure-Python markdown to PDF compiler. Zero system dependencies. Deterministic."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { file = "LICENSE" }
12
+ authors = [
13
+ { name = "Dylan Moir" },
14
+ ]
15
+ keywords = ["markdown", "pdf", "compiler", "deterministic", "zero-dependencies"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3 :: Only",
23
+ "Programming Language :: Python :: 3.9",
24
+ "Programming Language :: Python :: 3.10",
25
+ "Programming Language :: Python :: 3.11",
26
+ "Programming Language :: Python :: 3.12",
27
+ "Programming Language :: Python :: 3.13",
28
+ "Topic :: Software Development :: Libraries :: Python Modules",
29
+ "Topic :: Text Processing :: Markup :: Markdown",
30
+ "Topic :: Printing",
31
+ ]
32
+ dependencies = []
33
+
34
+ [project.optional-dependencies]
35
+ dev = ["pytest>=7"]
36
+
37
+ [project.scripts]
38
+ inkmd = "inkmd.cli:main"
39
+
40
+ [project.urls]
41
+ Homepage = "https://github.com/eagredev/inkmd"
42
+ Repository = "https://github.com/eagredev/inkmd"
43
+ Issues = "https://github.com/eagredev/inkmd/issues"
44
+
45
+ [tool.setuptools.packages.find]
46
+ where = ["src"]
47
+
48
+ [tool.pytest.ini_options]
49
+ testpaths = ["tests"]
50
+ addopts = "-ra --strict-markers"
inkmd-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,59 @@
1
+ """inkmd — pure-Python markdown to PDF compiler.
2
+
3
+ Public API:
4
+
5
+ inkmd.compile(md_text: str, page_size: str = "letter", family: str = "helvetica") -> bytes
6
+ Parse markdown into PDF bytes.
7
+
8
+ inkmd.render_file(in_path, out_path, page_size: str = "letter", family: str = "helvetica") -> None
9
+ Read a markdown file, write a PDF file.
10
+
11
+ Font family choices: 'helvetica' (default, sans-serif) or 'times' (serif).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from pathlib import Path
17
+
18
+ from inkmd.parser import parse
19
+ from inkmd.pdf import styled_pdf
20
+ from inkmd.render import FAMILIES, render_document
21
+
22
+
23
+ __version__ = "0.1.0"
24
+
25
+
26
+ def compile(
27
+ md_text: str,
28
+ page_size: str = "letter",
29
+ family: str = "helvetica",
30
+ *,
31
+ autolinks: bool = True,
32
+ ) -> bytes:
33
+ """Compile markdown text into PDF bytes.
34
+
35
+ ``autolinks`` controls GFM-style detection of bare URLs and email
36
+ addresses (default True). Set False for strict CommonMark — bare
37
+ URLs render as plain text and only `<url>` / `[text](url)` produce
38
+ links.
39
+ """
40
+ if family not in FAMILIES:
41
+ raise ValueError(f"unknown family {family!r}; available: {tuple(FAMILIES)}")
42
+ doc = parse(md_text, autolinks=autolinks)
43
+ paragraphs = render_document(doc, family=FAMILIES[family])
44
+ return styled_pdf(paragraphs, page_size=page_size)
45
+
46
+
47
+ def render_file(
48
+ in_path: str | Path,
49
+ out_path: str | Path,
50
+ page_size: str = "letter",
51
+ family: str = "helvetica",
52
+ *,
53
+ autolinks: bool = True,
54
+ ) -> None:
55
+ """Read markdown from ``in_path``; write PDF to ``out_path``."""
56
+ md = Path(in_path).read_text(encoding="utf-8")
57
+ Path(out_path).write_bytes(
58
+ compile(md, page_size=page_size, family=family, autolinks=autolinks)
59
+ )
@@ -0,0 +1,4 @@
1
+ from inkmd.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ raise SystemExit(main())