@brahim.ariani/md2pdf-cli 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -0
- package/bin/md2pdf.js +23 -0
- package/lib/frontmatter.js +61 -0
- package/lib/highlight.js +38 -0
- package/lib/index.js +85 -11
- package/lib/sanitize.js +25 -0
- package/lib/styles.js +52 -0
- package/lib/toc.js +75 -0
- package/package.json +11 -3
package/README.md
CHANGED
|
@@ -31,8 +31,16 @@ If `output.pdf` is omitted, the output filename is derived from the input (e.g.
|
|
|
31
31
|
| `--title <text>` | Document title (defaults to the input filename) |
|
|
32
32
|
| `--css <file>` | Path to a custom CSS file (replaces the default styles) |
|
|
33
33
|
| `--format <size>` | Page format: `A4`, `Letter`, `Legal`, ... Default: `A4` |
|
|
34
|
+
| `--toc` | Prepend an auto-generated table of contents |
|
|
35
|
+
| `--toc-depth <n>` | Deepest heading level included in the TOC. Default: `3` |
|
|
36
|
+
| `--toc-title <text>` | TOC heading text. Default: `Contents` |
|
|
37
|
+
| `--highlight` | Syntax-highlight fenced code blocks with Shiki |
|
|
38
|
+
| `--code-theme <name>` | Shiki theme for code blocks. Default: `github-light` |
|
|
39
|
+
| `--cover` | Render a title page from YAML front matter |
|
|
40
|
+
| `--no-cover` | Never render a title page (overrides front matter) |
|
|
34
41
|
| `--no-page-numbers` | Disable the page-number footer |
|
|
35
42
|
| `--no-math` | Disable KaTeX equation rendering |
|
|
43
|
+
| `--no-sanitize` | Disable HTML sanitization (**unsafe**, see below) |
|
|
36
44
|
| `--keep-html` | Keep the intermediate `.tmp.html` file for debugging |
|
|
37
45
|
| `-h`, `--help` | Show usage |
|
|
38
46
|
|
|
@@ -43,6 +51,9 @@ md2pdf research.md
|
|
|
43
51
|
md2pdf research.md out/research.pdf
|
|
44
52
|
md2pdf report.md report.pdf --title "Quarterly Report" --css theme.css
|
|
45
53
|
md2pdf notes.md notes.pdf --format Letter --no-page-numbers
|
|
54
|
+
md2pdf book.md book.pdf --toc --toc-depth 2 --toc-title "Table of Contents"
|
|
55
|
+
md2pdf code.md code.pdf --highlight --code-theme github-dark
|
|
56
|
+
md2pdf paper.md paper.pdf --cover --toc
|
|
46
57
|
md2pdf paper.md paper.pdf # equations rendered by default
|
|
47
58
|
md2pdf draft.md draft.pdf --no-math # treat $...$ as literal text
|
|
48
59
|
```
|
|
@@ -61,6 +72,99 @@ $$
|
|
|
61
72
|
|
|
62
73
|
Equations are rendered server-side with KaTeX, so the PDF is self-contained and prints identically on any machine.
|
|
63
74
|
|
|
75
|
+
## Table of contents
|
|
76
|
+
|
|
77
|
+
Pass `--toc` to prepend an auto-generated, clickable table of contents built
|
|
78
|
+
from the document headings. Each heading also receives a stable `id` slug, so
|
|
79
|
+
the TOC links resolve as in-document bookmarks.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
md2pdf report.md report.pdf --toc # depth 3 (default)
|
|
83
|
+
md2pdf report.md report.pdf --toc --toc-depth 2 # only h1 + h2
|
|
84
|
+
md2pdf report.md report.pdf --toc --toc-title "Sommaire"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The TOC is placed on its own page (it ends with a page break). You can fully
|
|
88
|
+
restyle it via `--css` by targeting `nav.toc`, `nav.toc .toc-title`, etc.
|
|
89
|
+
|
|
90
|
+
## Front matter & cover page
|
|
91
|
+
|
|
92
|
+
Markdown files may start with a YAML front-matter block. It is parsed, stripped
|
|
93
|
+
from the body, and used to enrich the document:
|
|
94
|
+
|
|
95
|
+
```markdown
|
|
96
|
+
---
|
|
97
|
+
title: Quarterly Report
|
|
98
|
+
subtitle: Q2 2026 Financial Overview
|
|
99
|
+
author:
|
|
100
|
+
- Brahim Ariani
|
|
101
|
+
- Finance Team
|
|
102
|
+
date: 2026-05-31
|
|
103
|
+
cover: true
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
# Introduction
|
|
107
|
+
...
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
- `title` becomes the HTML document title (a `--title` flag still wins).
|
|
111
|
+
- With `--cover` (or `cover: true` in the front matter), a dedicated **title
|
|
112
|
+
page** is rendered from `title`, `subtitle`, `author`/`authors` and `date`,
|
|
113
|
+
followed by a page break. `--no-cover` disables it even if the front matter
|
|
114
|
+
requests one.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
md2pdf paper.md paper.pdf --cover
|
|
118
|
+
md2pdf paper.md paper.pdf --cover --toc # title page, then a TOC page
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Restyle the cover via `--css` by targeting `section.cover`, `.cover-title`,
|
|
122
|
+
`.cover-subtitle`, `.cover-author`, `.cover-date`.
|
|
123
|
+
|
|
124
|
+
## Syntax highlighting
|
|
125
|
+
|
|
126
|
+
Pass `--highlight` to colorize fenced code blocks with
|
|
127
|
+
[Shiki](https://shiki.style/) (the same engine that powers VS Code). Colors are
|
|
128
|
+
inlined into the HTML, so the PDF stays self-contained and prints identically
|
|
129
|
+
everywhere — no client-side JavaScript or web fonts required.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
md2pdf code.md code.pdf --highlight
|
|
133
|
+
md2pdf code.md code.pdf --highlight --code-theme github-dark
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Only the languages actually used in the document are loaded, keeping conversion
|
|
137
|
+
fast. Use any Shiki theme name (e.g. `github-light`, `github-dark`, `nord`,
|
|
138
|
+
`dracula`, `min-light`). Unknown languages fall back to a plain, escaped code
|
|
139
|
+
block, and an unknown theme falls back to `github-light`.
|
|
140
|
+
|
|
141
|
+
## Security / HTML sanitization
|
|
142
|
+
|
|
143
|
+
Markdown allows raw HTML, which means an untrusted `.md` file can embed
|
|
144
|
+
`<script>`, `<iframe>`, or event-handler attributes (`onerror`, `onclick`, ...).
|
|
145
|
+
Because the document is rendered through a real browser (Chromium) before being
|
|
146
|
+
printed, such payloads would otherwise execute.
|
|
147
|
+
|
|
148
|
+
To prevent this, the HTML produced from your Markdown is **sanitized by default**
|
|
149
|
+
with [DOMPurify](https://github.com/cure53/DOMPurify) before it ever reaches the
|
|
150
|
+
browser. Scripts, event handlers, and dangerous URIs (`javascript:`, ...) are
|
|
151
|
+
stripped, while legitimate content — headings, tables, code blocks, images,
|
|
152
|
+
links and KaTeX/MathML/SVG math — is preserved.
|
|
153
|
+
|
|
154
|
+
If you fully trust the input and need to keep raw HTML (custom `<script>`,
|
|
155
|
+
embeds, etc.), you can opt out:
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
md2pdf trusted.md trusted.pdf --no-sanitize
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
```js
|
|
162
|
+
await convert({ input: 'trusted.md', output: 'trusted.pdf', sanitize: false });
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
> Only disable sanitization for content you control. Never run `--no-sanitize`
|
|
166
|
+
> on files from untrusted sources.
|
|
167
|
+
|
|
64
168
|
## Programmatic API
|
|
65
169
|
|
|
66
170
|
```js
|
|
@@ -74,6 +178,11 @@ await convert({
|
|
|
74
178
|
// css: '/* inline CSS string */',
|
|
75
179
|
format: 'A4',
|
|
76
180
|
pageNumbers: true,
|
|
181
|
+
sanitize: true,
|
|
182
|
+
toc: true,
|
|
183
|
+
tocDepth: 3,
|
|
184
|
+
highlight: true,
|
|
185
|
+
codeTheme: 'github-light',
|
|
77
186
|
});
|
|
78
187
|
```
|
|
79
188
|
|
|
@@ -90,6 +199,13 @@ await convert({
|
|
|
90
199
|
| `margin` | `object` | 22mm / 18mm | `{ top, bottom, left, right }` |
|
|
91
200
|
| `pageNumbers` | `boolean` | `true` | Render `n / total` in the footer |
|
|
92
201
|
| `math` | `boolean` | `true` | Render `$...$` and `$$...$$` as KaTeX |
|
|
202
|
+
| `sanitize` | `boolean` | `true` | Sanitize generated HTML (strip scripts/handlers) |
|
|
203
|
+
| `toc` | `boolean` | `false` | Prepend an auto-generated table of contents |
|
|
204
|
+
| `tocDepth` | `number` | `3` | Deepest heading level included in the TOC |
|
|
205
|
+
| `tocTitle` | `string` | `'Contents'` | TOC heading text |
|
|
206
|
+
| `highlight` | `boolean` | `false` | Syntax-highlight code blocks with Shiki |
|
|
207
|
+
| `codeTheme` | `string` | `'github-light'` | Shiki theme name for code blocks |
|
|
208
|
+
| `cover` | `boolean` | front matter | Render a title page (`true`/`false` overrides YAML) |
|
|
93
209
|
| `headerTemplate` | `string` | empty | Puppeteer header HTML |
|
|
94
210
|
| `footerTemplate` | `string` | page numbers | Puppeteer footer HTML |
|
|
95
211
|
| `puppeteerOptions` | `object` | `{}` | Extra options passed to `puppeteer.launch` |
|
package/bin/md2pdf.js
CHANGED
|
@@ -12,8 +12,16 @@ Options:
|
|
|
12
12
|
--title <text> Document title (defaults to input filename)
|
|
13
13
|
--css <file> Path to a custom CSS file (replaces the default styles)
|
|
14
14
|
--format <size> Page format (A4, Letter, ...). Default: A4
|
|
15
|
+
--toc Prepend an auto-generated table of contents
|
|
16
|
+
--toc-depth <n> Max heading level included in the TOC. Default: 3
|
|
17
|
+
--toc-title <text> TOC heading text. Default: "Contents"
|
|
18
|
+
--highlight Syntax-highlight fenced code blocks (Shiki)
|
|
19
|
+
--code-theme <name> Shiki theme for code blocks. Default: github-light
|
|
20
|
+
--cover Render a title page from YAML front matter
|
|
21
|
+
--no-cover Never render a title page (overrides front matter)
|
|
15
22
|
--no-page-numbers Disable footer page numbers
|
|
16
23
|
--no-math Disable KaTeX equation rendering ($...$ / $$...$$)
|
|
24
|
+
--no-sanitize Disable HTML sanitization (UNSAFE: allows raw HTML/scripts)
|
|
17
25
|
--keep-html Keep the intermediate .tmp.html file
|
|
18
26
|
-h, --help Show this help
|
|
19
27
|
|
|
@@ -31,10 +39,18 @@ function parseArgs(argv) {
|
|
|
31
39
|
if (a === '-h' || a === '--help') { args.flags.help = true; continue; }
|
|
32
40
|
if (a === '--no-page-numbers') { args.flags.pageNumbers = false; continue; }
|
|
33
41
|
if (a === '--no-math') { args.flags.math = false; continue; }
|
|
42
|
+
if (a === '--no-sanitize' || a === '--unsafe') { args.flags.sanitize = false; continue; }
|
|
34
43
|
if (a === '--keep-html') { args.flags.keepHtml = true; continue; }
|
|
44
|
+
if (a === '--toc') { args.flags.toc = true; continue; }
|
|
45
|
+
if (a === '--highlight') { args.flags.highlight = true; continue; }
|
|
46
|
+
if (a === '--cover') { args.flags.cover = true; continue; }
|
|
47
|
+
if (a === '--no-cover') { args.flags.cover = false; continue; }
|
|
35
48
|
if (a === '--title') { args.flags.title = argv[++i]; continue; }
|
|
36
49
|
if (a === '--css') { args.flags.cssFile = argv[++i]; continue; }
|
|
37
50
|
if (a === '--format') { args.flags.format = argv[++i]; continue; }
|
|
51
|
+
if (a === '--toc-depth') { args.flags.tocDepth = parseInt(argv[++i], 10); continue; }
|
|
52
|
+
if (a === '--toc-title') { args.flags.tocTitle = argv[++i]; continue; }
|
|
53
|
+
if (a === '--code-theme') { args.flags.codeTheme = argv[++i]; continue; }
|
|
38
54
|
if (a.startsWith('--')) {
|
|
39
55
|
console.error(`Unknown option: ${a}`);
|
|
40
56
|
process.exit(2);
|
|
@@ -66,6 +82,13 @@ function parseArgs(argv) {
|
|
|
66
82
|
format: args.flags.format || 'A4',
|
|
67
83
|
pageNumbers: args.flags.pageNumbers !== false,
|
|
68
84
|
math: args.flags.math !== false,
|
|
85
|
+
sanitize: args.flags.sanitize !== false,
|
|
86
|
+
toc: !!args.flags.toc,
|
|
87
|
+
tocDepth: Number.isInteger(args.flags.tocDepth) ? args.flags.tocDepth : 3,
|
|
88
|
+
tocTitle: args.flags.tocTitle,
|
|
89
|
+
highlight: !!args.flags.highlight,
|
|
90
|
+
codeTheme: args.flags.codeTheme,
|
|
91
|
+
cover: args.flags.cover,
|
|
69
92
|
keepHtml: !!args.flags.keepHtml,
|
|
70
93
|
});
|
|
71
94
|
if (result.brokenImages && result.brokenImages.length) {
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const matter = require('gray-matter');
|
|
4
|
+
|
|
5
|
+
function escapeHtml(s) {
|
|
6
|
+
return String(s)
|
|
7
|
+
.replace(/&/g, '&')
|
|
8
|
+
.replace(/</g, '<')
|
|
9
|
+
.replace(/>/g, '>')
|
|
10
|
+
.replace(/"/g, '"');
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
// Splits YAML front matter from the Markdown body. Always returns a plain
|
|
14
|
+
// metadata object and the remaining content (front matter stripped).
|
|
15
|
+
function parseFrontMatter(rawMd) {
|
|
16
|
+
try {
|
|
17
|
+
const { data, content } = matter(rawMd);
|
|
18
|
+
return { data: data && typeof data === 'object' ? data : {}, content };
|
|
19
|
+
} catch (_) {
|
|
20
|
+
// Malformed YAML: fall back to treating the whole file as content.
|
|
21
|
+
return { data: {}, content: rawMd };
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function formatDate(value) {
|
|
26
|
+
if (value instanceof Date && !isNaN(value)) {
|
|
27
|
+
return value.toISOString().slice(0, 10);
|
|
28
|
+
}
|
|
29
|
+
return String(value);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function normalizeAuthors(data) {
|
|
33
|
+
const raw = data.author != null ? data.author : data.authors;
|
|
34
|
+
if (raw == null) return [];
|
|
35
|
+
return (Array.isArray(raw) ? raw : [raw]).map((a) => String(a)).filter(Boolean);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Builds a standalone title page from front matter metadata. Returns '' when
|
|
39
|
+
// there is nothing meaningful to show.
|
|
40
|
+
function buildCoverHtml(data = {}) {
|
|
41
|
+
const parts = [];
|
|
42
|
+
if (data.title) {
|
|
43
|
+
parts.push(`<h1 class="cover-title">${escapeHtml(data.title)}</h1>`);
|
|
44
|
+
}
|
|
45
|
+
if (data.subtitle) {
|
|
46
|
+
parts.push(`<p class="cover-subtitle">${escapeHtml(data.subtitle)}</p>`);
|
|
47
|
+
}
|
|
48
|
+
const authors = normalizeAuthors(data);
|
|
49
|
+
if (authors.length) {
|
|
50
|
+
parts.push(
|
|
51
|
+
`<p class="cover-author">${authors.map(escapeHtml).join(', ')}</p>`
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
if (data.date != null && data.date !== '') {
|
|
55
|
+
parts.push(`<p class="cover-date">${escapeHtml(formatDate(data.date))}</p>`);
|
|
56
|
+
}
|
|
57
|
+
if (!parts.length) return '';
|
|
58
|
+
return `<section class="cover">${parts.join('')}</section>`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
module.exports = { parseFrontMatter, buildCoverHtml };
|
package/lib/highlight.js
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const DEFAULT_THEME = 'github-light';
|
|
4
|
+
|
|
5
|
+
// Shiki is ESM-only; load it lazily via dynamic import so this CommonJS
|
|
6
|
+
// package keeps working and pays the cost only when highlighting is enabled.
|
|
7
|
+
let shikiPromise = null;
|
|
8
|
+
function loadShiki() {
|
|
9
|
+
if (!shikiPromise) shikiPromise = import('shiki');
|
|
10
|
+
return shikiPromise;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
// Builds a highlighter preloaded with the requested theme and languages.
|
|
14
|
+
// `codeToHtml` is synchronous once the highlighter exists, so it can safely be
|
|
15
|
+
// called from marked's synchronous `code` renderer.
|
|
16
|
+
async function createCodeHighlighter({ theme = DEFAULT_THEME, langs = [] } = {}) {
|
|
17
|
+
const shiki = await loadShiki();
|
|
18
|
+
|
|
19
|
+
const safeTheme = theme in shiki.bundledThemes ? theme : DEFAULT_THEME;
|
|
20
|
+
const safeLangs = Array.from(new Set(langs)).filter(
|
|
21
|
+
(lang) => lang in shiki.bundledLanguages || lang in shiki.bundledLanguagesAlias
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
const highlighter = await shiki.createHighlighter({
|
|
25
|
+
themes: [safeTheme],
|
|
26
|
+
langs: safeLangs,
|
|
27
|
+
});
|
|
28
|
+
const loaded = new Set(highlighter.getLoadedLanguages());
|
|
29
|
+
|
|
30
|
+
return {
|
|
31
|
+
theme: safeTheme,
|
|
32
|
+
supports: (lang) => !!lang && loaded.has(lang),
|
|
33
|
+
toHtml: (code, lang) =>
|
|
34
|
+
highlighter.codeToHtml(code, { lang, theme: safeTheme }),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
module.exports = { createCodeHighlighter, DEFAULT_THEME };
|
package/lib/index.js
CHANGED
|
@@ -2,16 +2,69 @@
|
|
|
2
2
|
|
|
3
3
|
const fs = require('fs');
|
|
4
4
|
const path = require('path');
|
|
5
|
-
const {
|
|
5
|
+
const { Marked } = require('marked');
|
|
6
6
|
const markedKatex = require('marked-katex-extension');
|
|
7
7
|
const puppeteer = require('puppeteer');
|
|
8
8
|
const { defaultCss, katexCssLink } = require('./styles');
|
|
9
|
+
const { sanitizeHtml } = require('./sanitize');
|
|
10
|
+
const { collectHeadings, buildTocHtml, slugify } = require('./toc');
|
|
11
|
+
const { createCodeHighlighter } = require('./highlight');
|
|
12
|
+
const { parseFrontMatter, buildCoverHtml } = require('./frontmatter');
|
|
9
13
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
14
|
+
// Recursively collect the fenced-code languages used anywhere in the document
|
|
15
|
+
// (including inside lists/blockquotes) so only those grammars are loaded.
|
|
16
|
+
function collectCodeLangs(tokens, out = new Set()) {
|
|
17
|
+
for (const token of tokens) {
|
|
18
|
+
if (token.type === 'code' && token.lang) {
|
|
19
|
+
out.add(token.lang.trim().split(/\s+/)[0]);
|
|
20
|
+
}
|
|
21
|
+
if (token.tokens) collectCodeLangs(token.tokens, out);
|
|
22
|
+
if (token.items) collectCodeLangs(token.items, out);
|
|
23
|
+
if (token.rows) for (const row of token.rows) collectCodeLangs(row, out);
|
|
24
|
+
}
|
|
25
|
+
return out;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Render Markdown to HTML using an isolated Marked instance so per-call state
|
|
29
|
+
// (heading ids, KaTeX extension, highlighter) never leaks across invocations.
|
|
30
|
+
async function renderMarkdown({ md, math, toc, tocDepth, tocTitle, highlight, codeTheme }) {
|
|
31
|
+
const m = new Marked();
|
|
32
|
+
m.setOptions({ gfm: true, breaks: false });
|
|
33
|
+
if (math) {
|
|
34
|
+
m.use(markedKatex({ throwOnError: false, output: 'html', nonStandard: true }));
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const tokens = m.lexer(md);
|
|
38
|
+
const headings = collectHeadings(tokens);
|
|
39
|
+
const slugs = headings.map((h) => h.slug);
|
|
40
|
+
|
|
41
|
+
let highlighter = null;
|
|
42
|
+
if (highlight) {
|
|
43
|
+
const langs = Array.from(collectCodeLangs(tokens));
|
|
44
|
+
highlighter = await createCodeHighlighter({ theme: codeTheme, langs });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let idx = 0;
|
|
48
|
+
const renderer = {
|
|
49
|
+
heading(text, level) {
|
|
50
|
+
const slug = slugs[idx++] || slugify(text) || `section-${idx}`;
|
|
51
|
+
return `<h${level} id="${slug}">${text}</h${level}>\n`;
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
if (highlighter) {
|
|
55
|
+
renderer.code = (text, infostring) => {
|
|
56
|
+
const lang = (infostring || '').trim().split(/\s+/)[0];
|
|
57
|
+
if (highlighter.supports(lang)) {
|
|
58
|
+
return highlighter.toHtml(text, lang);
|
|
59
|
+
}
|
|
60
|
+
return `<pre><code>${escapeHtml(text)}</code></pre>\n`;
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
m.use({ renderer });
|
|
64
|
+
|
|
65
|
+
const body = m.parse(md);
|
|
66
|
+
const tocHtml = toc ? buildTocHtml(headings, { title: tocTitle, depth: tocDepth }) : '';
|
|
67
|
+
return { body, tocHtml };
|
|
15
68
|
}
|
|
16
69
|
|
|
17
70
|
function buildHtml({ body, title, css, math }) {
|
|
@@ -97,6 +150,13 @@ async function convert(options) {
|
|
|
97
150
|
puppeteerOptions = {},
|
|
98
151
|
keepHtml = false,
|
|
99
152
|
math = true,
|
|
153
|
+
sanitize = true,
|
|
154
|
+
toc = false,
|
|
155
|
+
tocDepth = 3,
|
|
156
|
+
tocTitle = 'Contents',
|
|
157
|
+
highlight = false,
|
|
158
|
+
codeTheme = 'github-light',
|
|
159
|
+
cover,
|
|
100
160
|
} = options;
|
|
101
161
|
|
|
102
162
|
if (!input) throw new Error('`input` is required');
|
|
@@ -110,17 +170,31 @@ async function convert(options) {
|
|
|
110
170
|
}
|
|
111
171
|
|
|
112
172
|
const rawMd = fs.readFileSync(inputAbs, 'utf8');
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
const
|
|
116
|
-
|
|
173
|
+
const { data: frontMatter, content } = parseFrontMatter(rawMd);
|
|
174
|
+
const md = math ? normalizeBlockMath(content) : content;
|
|
175
|
+
const { body: parsedBody, tocHtml } = await renderMarkdown({
|
|
176
|
+
md,
|
|
177
|
+
math,
|
|
178
|
+
toc,
|
|
179
|
+
tocDepth,
|
|
180
|
+
tocTitle,
|
|
181
|
+
highlight,
|
|
182
|
+
codeTheme,
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
const wantCover =
|
|
186
|
+
cover === true || (cover == null && frontMatter.cover === true);
|
|
187
|
+
const coverHtml = wantCover ? buildCoverHtml(frontMatter) : '';
|
|
188
|
+
const fullBody = [coverHtml, tocHtml, parsedBody].filter(Boolean).join('\n');
|
|
189
|
+
const body = sanitize ? sanitizeHtml(fullBody) : fullBody;
|
|
117
190
|
|
|
118
191
|
let resolvedCss = css || defaultCss;
|
|
119
192
|
if (cssFile) {
|
|
120
193
|
resolvedCss = fs.readFileSync(path.resolve(cssFile), 'utf8');
|
|
121
194
|
}
|
|
122
195
|
|
|
123
|
-
const docTitle =
|
|
196
|
+
const docTitle =
|
|
197
|
+
title || frontMatter.title || path.basename(inputAbs, path.extname(inputAbs));
|
|
124
198
|
const html = buildHtml({ body, title: docTitle, css: resolvedCss, math });
|
|
125
199
|
|
|
126
200
|
const tmpHtmlPath = outputAbs.replace(/\.pdf$/i, '') + '.tmp.html';
|
package/lib/sanitize.js
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { JSDOM } = require('jsdom');
|
|
4
|
+
const createDOMPurify = require('dompurify');
|
|
5
|
+
|
|
6
|
+
let purifier = null;
|
|
7
|
+
|
|
8
|
+
function getPurifier() {
|
|
9
|
+
if (purifier) return purifier;
|
|
10
|
+
const { window } = new JSDOM('');
|
|
11
|
+
purifier = createDOMPurify(window);
|
|
12
|
+
return purifier;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function sanitizeHtml(html) {
|
|
16
|
+
const DOMPurify = getPurifier();
|
|
17
|
+
return DOMPurify.sanitize(html, {
|
|
18
|
+
USE_PROFILES: { html: true, svg: true, svgFilters: true, mathMl: true },
|
|
19
|
+
ADD_ATTR: ['target'],
|
|
20
|
+
FORBID_TAGS: ['style'],
|
|
21
|
+
ALLOW_DATA_ATTR: false,
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
module.exports = { sanitizeHtml };
|
package/lib/styles.js
CHANGED
|
@@ -95,6 +95,58 @@ img + em, p > em {
|
|
|
95
95
|
margin-bottom: 14px;
|
|
96
96
|
}
|
|
97
97
|
a { color: #1d4ed8; text-decoration: none; }
|
|
98
|
+
nav.toc {
|
|
99
|
+
page-break-after: always;
|
|
100
|
+
break-after: page;
|
|
101
|
+
margin-bottom: 8px;
|
|
102
|
+
}
|
|
103
|
+
nav.toc .toc-title {
|
|
104
|
+
margin-top: 0;
|
|
105
|
+
border-bottom: 1px solid #bcccdc;
|
|
106
|
+
padding-bottom: 4px;
|
|
107
|
+
}
|
|
108
|
+
nav.toc ul {
|
|
109
|
+
list-style: none;
|
|
110
|
+
margin: 4px 0;
|
|
111
|
+
padding-left: 18px;
|
|
112
|
+
}
|
|
113
|
+
nav.toc > ul { padding-left: 0; }
|
|
114
|
+
nav.toc li { margin-bottom: 3px; }
|
|
115
|
+
nav.toc a { color: #102a43; }
|
|
116
|
+
section.cover {
|
|
117
|
+
page-break-after: always;
|
|
118
|
+
break-after: page;
|
|
119
|
+
display: flex;
|
|
120
|
+
flex-direction: column;
|
|
121
|
+
align-items: center;
|
|
122
|
+
justify-content: center;
|
|
123
|
+
text-align: center;
|
|
124
|
+
min-height: 80vh;
|
|
125
|
+
}
|
|
126
|
+
section.cover .cover-title {
|
|
127
|
+
font-size: 30pt;
|
|
128
|
+
border: none;
|
|
129
|
+
margin: 0 0 8px 0;
|
|
130
|
+
padding: 0;
|
|
131
|
+
}
|
|
132
|
+
section.cover .cover-subtitle {
|
|
133
|
+
font-size: 15pt;
|
|
134
|
+
color: #475569;
|
|
135
|
+
margin: 0 0 28px 0;
|
|
136
|
+
text-align: center;
|
|
137
|
+
}
|
|
138
|
+
section.cover .cover-author {
|
|
139
|
+
font-size: 13pt;
|
|
140
|
+
color: #102a43;
|
|
141
|
+
margin: 0 0 6px 0;
|
|
142
|
+
text-align: center;
|
|
143
|
+
}
|
|
144
|
+
section.cover .cover-date {
|
|
145
|
+
font-size: 11pt;
|
|
146
|
+
color: #64748b;
|
|
147
|
+
margin: 0;
|
|
148
|
+
text-align: center;
|
|
149
|
+
}
|
|
98
150
|
.katex { font-size: 1.05em; }
|
|
99
151
|
.katex-display { margin: 14px 0; overflow-x: auto; overflow-y: hidden; page-break-inside: avoid; }
|
|
100
152
|
.katex-display > .katex { display: inline-block; text-align: center; max-width: 100%; }
|
package/lib/toc.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function escapeHtml(s) {
|
|
4
|
+
return String(s)
|
|
5
|
+
.replace(/&/g, '&')
|
|
6
|
+
.replace(/</g, '<')
|
|
7
|
+
.replace(/>/g, '>')
|
|
8
|
+
.replace(/"/g, '"');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
function slugify(text) {
|
|
12
|
+
return String(text)
|
|
13
|
+
.toLowerCase()
|
|
14
|
+
.trim()
|
|
15
|
+
.replace(/<[^>]*>/g, '')
|
|
16
|
+
.replace(/[^\w\s-]/g, '')
|
|
17
|
+
.replace(/\s+/g, '-')
|
|
18
|
+
.replace(/-+/g, '-')
|
|
19
|
+
.replace(/^-+|-+$/g, '');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function collectHeadings(tokens) {
|
|
23
|
+
const used = new Map();
|
|
24
|
+
const out = [];
|
|
25
|
+
for (const token of tokens) {
|
|
26
|
+
if (token.type !== 'heading') continue;
|
|
27
|
+
const text = token.text;
|
|
28
|
+
let slug = slugify(text) || 'section';
|
|
29
|
+
if (used.has(slug)) {
|
|
30
|
+
const next = used.get(slug) + 1;
|
|
31
|
+
used.set(slug, next);
|
|
32
|
+
slug = `${slug}-${next}`;
|
|
33
|
+
} else {
|
|
34
|
+
used.set(slug, 0);
|
|
35
|
+
}
|
|
36
|
+
out.push({ level: token.depth, text, slug });
|
|
37
|
+
}
|
|
38
|
+
return out;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function nest(items) {
|
|
42
|
+
const root = { level: -Infinity, children: [] };
|
|
43
|
+
const stack = [root];
|
|
44
|
+
for (const item of items) {
|
|
45
|
+
const node = { ...item, children: [] };
|
|
46
|
+
while (stack.length > 1 && stack[stack.length - 1].level >= item.level) {
|
|
47
|
+
stack.pop();
|
|
48
|
+
}
|
|
49
|
+
stack[stack.length - 1].children.push(node);
|
|
50
|
+
stack.push(node);
|
|
51
|
+
}
|
|
52
|
+
return root.children;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function renderNodes(nodes) {
|
|
56
|
+
if (!nodes.length) return '';
|
|
57
|
+
const items = nodes
|
|
58
|
+
.map(
|
|
59
|
+
(n) =>
|
|
60
|
+
`<li><a href="#${n.slug}">${escapeHtml(n.text)}</a>${renderNodes(n.children)}</li>`
|
|
61
|
+
)
|
|
62
|
+
.join('');
|
|
63
|
+
return `<ul>${items}</ul>`;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function buildTocHtml(headings, { title = 'Contents', depth = 3 } = {}) {
|
|
67
|
+
const items = headings.filter((h) => h.level <= depth);
|
|
68
|
+
if (!items.length) return '';
|
|
69
|
+
const heading = title
|
|
70
|
+
? `<h2 class="toc-title">${escapeHtml(title)}</h2>`
|
|
71
|
+
: '';
|
|
72
|
+
return `<nav class="toc">${heading}${renderNodes(nest(items))}</nav>`;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
module.exports = { slugify, collectHeadings, buildTocHtml };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@brahim.ariani/md2pdf-cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Convert Markdown files to beautifully styled PDFs using marked and puppeteer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"markdown",
|
|
@@ -31,13 +31,21 @@
|
|
|
31
31
|
"node": ">=18"
|
|
32
32
|
},
|
|
33
33
|
"scripts": {
|
|
34
|
-
"test": "node test/smoke.js"
|
|
34
|
+
"test": "node test/sanitize.js && node test/toc.js && node test/highlight.js && node test/frontmatter.js && node test/smoke.js",
|
|
35
|
+
"test:sanitize": "node test/sanitize.js",
|
|
36
|
+
"test:toc": "node test/toc.js",
|
|
37
|
+
"test:highlight": "node test/highlight.js",
|
|
38
|
+
"test:frontmatter": "node test/frontmatter.js"
|
|
35
39
|
},
|
|
36
40
|
"dependencies": {
|
|
41
|
+
"dompurify": "^3.4.7",
|
|
42
|
+
"gray-matter": "^4.0.3",
|
|
43
|
+
"jsdom": "^29.1.1",
|
|
37
44
|
"katex": "^0.16.9",
|
|
38
45
|
"marked": "^12.0.0",
|
|
39
46
|
"marked-katex-extension": "^5.0.0",
|
|
40
|
-
"puppeteer": "^24.15.0"
|
|
47
|
+
"puppeteer": "^24.15.0",
|
|
48
|
+
"shiki": "^4.1.0"
|
|
41
49
|
},
|
|
42
50
|
"repository": {
|
|
43
51
|
"type": "git",
|