@grainulation/mill 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/LICENSE +21 -0
  3. package/README.md +76 -0
  4. package/bin/mill.js +320 -0
  5. package/lib/exporters/csv.js +83 -0
  6. package/lib/exporters/json-ld.js +44 -0
  7. package/lib/exporters/markdown.js +116 -0
  8. package/lib/exporters/pdf.js +104 -0
  9. package/lib/formats/bibtex.js +76 -0
  10. package/lib/formats/changelog.js +102 -0
  11. package/lib/formats/csv.js +92 -0
  12. package/lib/formats/dot.js +129 -0
  13. package/lib/formats/evidence-matrix.js +87 -0
  14. package/lib/formats/executive-summary.js +130 -0
  15. package/lib/formats/github-issues.js +89 -0
  16. package/lib/formats/graphml.js +118 -0
  17. package/lib/formats/html-report.js +181 -0
  18. package/lib/formats/jira-csv.js +89 -0
  19. package/lib/formats/json-ld.js +28 -0
  20. package/lib/formats/markdown.js +118 -0
  21. package/lib/formats/ndjson.js +25 -0
  22. package/lib/formats/obsidian.js +136 -0
  23. package/lib/formats/opml.js +108 -0
  24. package/lib/formats/ris.js +70 -0
  25. package/lib/formats/rss.js +100 -0
  26. package/lib/formats/sankey.js +72 -0
  27. package/lib/formats/slide-deck.js +200 -0
  28. package/lib/formats/sql.js +116 -0
  29. package/lib/formats/static-site.js +169 -0
  30. package/lib/formats/treemap.js +65 -0
  31. package/lib/formats/typescript-defs.js +147 -0
  32. package/lib/formats/yaml.js +144 -0
  33. package/lib/formats.js +60 -0
  34. package/lib/index.js +14 -0
  35. package/lib/json-ld-common.js +72 -0
  36. package/lib/publishers/clipboard.js +70 -0
  37. package/lib/publishers/static.js +152 -0
  38. package/lib/serve-mcp.js +340 -0
  39. package/lib/server.js +535 -0
  40. package/package.json +53 -0
  41. package/public/grainulation-tokens.css +321 -0
  42. package/public/index.html +891 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0
4
+
5
+ Initial release.
6
+
7
+ - 24 export formats (CSV, Markdown, JSON-LD, NDJSON, BibTeX, RIS, YAML, SQL, GraphML, DOT, and more)
8
+ - Web workbench UI with format preview, copy, and download
9
+ - SSE live-reload when source files change
10
+ - `mill serve` with `--source` flag for cross-directory compilation reading
11
+ - `mill export`, `mill convert`, `mill publish`, `mill formats` CLI commands
12
+ - Zero runtime dependencies
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 grainulation contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # mill
2
+
3
+ Turn sprint evidence into shareable artifacts.
4
+
5
+ ## Quick start
6
+
7
+ ```bash
8
+ # Export a brief to PDF
9
+ npx @grainulation/mill export --format pdf output/brief.html
10
+
11
+ # Claims to CSV
12
+ npx @grainulation/mill export --format csv claims.json
13
+
14
+ # HTML to clean Markdown
15
+ npx @grainulation/mill convert --from html --to markdown output/brief.html
16
+
17
+ # Build a static site from sprint outputs
18
+ npx @grainulation/mill publish --target static output/
19
+
20
+ # Claims to JSON-LD
21
+ npx @grainulation/mill export --format json-ld claims.json -o claims.jsonld
22
+
23
+ # Copy to clipboard
24
+ npx @grainulation/mill publish --target clipboard output/brief.html
25
+ ```
26
+
27
+ ## Export formats
28
+
29
+ 24 built-in formats in `lib/formats/`. Most commonly used:
30
+
31
+ | Format | Input | Output | Notes |
32
+ |--------|-------|--------|-------|
33
+ | `pdf` | HTML, Markdown | PDF | Uses `npx md-to-pdf` or puppeteer |
34
+ | `csv` | claims.json | CSV | Flat columns, semicolon-joined tags |
35
+ | `markdown` | HTML | Markdown | Zero-dep tag stripping, handles wheat output |
36
+ | `json-ld` | claims.json | JSON-LD | schema.org vocab, wheat namespace |
37
+ | `html-report` | claims.json | HTML | Self-contained interactive report |
38
+ | `slide-deck` | claims.json | HTML | Scroll-snap presentation |
39
+ | `github-issues` | claims.json | JSON | GitHub Issues payloads |
40
+ | `jira-csv` | claims.json | CSV | Jira-compatible import |
41
+ | `yaml` | claims.json | YAML | YAML export |
42
+ | `ndjson` | claims.json | NDJSON | Newline-delimited JSON |
43
+
44
+ Run `mill formats` to see all 24 formats and publish targets.
45
+
46
+ ## Publish targets
47
+
48
+ | Target | Input | Output | Notes |
49
+ |--------|-------|--------|-------|
50
+ | `static` | directory | `_site/` | Dark-themed index + copied artifacts |
51
+ | `clipboard` | file or dir | clipboard | Uses pbcopy/xclip/clip |
52
+
53
+ ## Zero dependencies
54
+
55
+ Mill has no installed npm dependencies. Heavy operations (PDF generation) run via `npx` on demand, pulling packages only when needed.
56
+
57
+ ## Works standalone
58
+
59
+ Mill reads sprint output files directly. It does not require wheat to be installed -- give it HTML, Markdown, or claims JSON and it produces shareable formats.
60
+
61
+ ## CLI reference
62
+
63
+ ```
64
+ mill export --format <fmt> <file> Export to target format
65
+ mill publish --target <dest> <dir> Publish sprint outputs
66
+ mill convert --from <fmt> --to <fmt> <file> Convert between formats
67
+ mill formats List available formats
68
+ mill serve [--port 9094] [--source <dir>] Start the export workbench UI
69
+ mill serve-mcp Start the MCP server on stdio
70
+ ```
71
+
72
+ All commands accept `-o <path>` to set the output location.
73
+
74
+ ## License
75
+
76
+ MIT
package/bin/mill.js ADDED
@@ -0,0 +1,320 @@
1
+ #!/usr/bin/env node
2
+
3
+ 'use strict';
4
+
5
+ const path = require('node:path');
6
+ const { parseArgs } = require('node:util');
7
+ const { fork } = require('node:child_process');
8
+
9
+ const LIB_DIR = path.join(__dirname, '..', 'lib');
10
+
11
+ // --version / -v: print version and exit
12
+ if (process.argv.includes('--version') || process.argv.includes('-v')) {
13
+ const pkg = require(path.join(__dirname, '..', 'package.json'));
14
+ console.log(pkg.version);
15
+ process.exit(0);
16
+ }
17
+
18
+ const verbose = process.argv.includes('--verbose');
19
+ function vlog(...a) {
20
+ if (!verbose) return;
21
+ const ts = new Date().toISOString();
22
+ process.stderr.write(`[${ts}] mill: ${a.join(' ')}\n`);
23
+ }
24
+
25
+ const COMMANDS = {
26
+ export: { description: 'Export artifacts to a target format', handler: runExport },
27
+ publish: { description: 'Publish sprint outputs to a destination', handler: runPublish },
28
+ convert: { description: 'Convert between artifact formats', handler: runConvert },
29
+ formats: { description: 'List available export formats', handler: runFormats },
30
+ serve: { description: 'Start the export workbench UI', handler: runServe },
31
+ 'serve-mcp': { description: 'Start the MCP server on stdio', handler: null },
32
+ };
33
+
34
+ const USAGE = `
35
+ mill -- turn sprint evidence into shareable artifacts
36
+
37
+ Usage:
38
+ mill serve [--port 9094] [--source <dir>] Start the export workbench UI
39
+ mill serve-mcp Start the MCP server on stdio
40
+ mill export --format <fmt> <file> Export artifact to target format
41
+ mill publish --target <dest> <dir> Publish sprint outputs
42
+ mill convert --from <fmt> --to <fmt> <file> Convert between formats
43
+ mill formats List available formats
44
+
45
+ Export formats:
46
+ pdf HTML or Markdown to PDF (via npx md-to-pdf)
47
+ csv Claims JSON to CSV
48
+ markdown HTML artifacts to clean Markdown
49
+ json-ld Claims JSON to JSON-LD for semantic web
50
+
51
+ Publish targets:
52
+ static Generate a static site from sprint outputs
53
+ clipboard Copy formatted output to system clipboard
54
+
55
+ Examples:
56
+ npx @grainulation/mill serve --port 9094 --source /path/to/sprint
57
+ npx @grainulation/mill export --format pdf output/brief.html
58
+ npx @grainulation/mill export --format csv claims.json
59
+ npx @grainulation/mill export --format json-ld claims.json -o claims.jsonld
60
+ npx @grainulation/mill publish --target static output/
61
+ npx @grainulation/mill convert --from html --to markdown output/brief.html
62
+ `.trim();
63
+
64
+ function main() {
65
+ const args = process.argv.slice(2);
66
+
67
+ vlog('startup', `command=${args[0] || '(none)'}`, `cwd=${process.cwd()}`);
68
+
69
+ if (args.length === 0 || args[0] === '--help' || args[0] === '-h') {
70
+ console.log(USAGE);
71
+ process.exit(0);
72
+ }
73
+
74
+ const command = args[0];
75
+ const handler = COMMANDS[command];
76
+
77
+ if (command === 'help') {
78
+ console.log(USAGE);
79
+ process.exit(0);
80
+ }
81
+
82
+ // serve command forks the ESM server module
83
+ if (command === 'serve') {
84
+ runServe(args.slice(1));
85
+ return;
86
+ }
87
+
88
+ // serve-mcp command starts the MCP server on stdio
89
+ if (command === 'serve-mcp') {
90
+ const serveMcp = require('../lib/serve-mcp.js');
91
+ serveMcp.run(process.cwd());
92
+ return;
93
+ }
94
+
95
+ if (!handler) {
96
+ console.error(`mill: unknown command: ${command}`);
97
+ console.error(`Run "mill --help" for usage.`);
98
+ process.exit(1);
99
+ }
100
+
101
+ handler.handler(args.slice(1));
102
+ }
103
+
104
+ async function runExport(args) {
105
+ let values, positionals;
106
+ try {
107
+ ({ values, positionals } = parseArgs({
108
+ args,
109
+ options: {
110
+ format: { type: 'string', short: 'f' },
111
+ output: { type: 'string', short: 'o' },
112
+ json: { type: 'boolean', default: false },
113
+ },
114
+ allowPositionals: true,
115
+ }));
116
+ } catch (err) {
117
+ if (err.code === 'ERR_PARSE_ARGS_UNKNOWN_OPTION') {
118
+ const flag = err.message.match(/option "([^"]+)"/)?.[1] || 'unknown';
119
+ console.error(`mill: unknown option: ${flag}. Run "mill export --help" for usage.`);
120
+ process.exit(1);
121
+ }
122
+ throw err;
123
+ }
124
+
125
+ if (!values.format) {
126
+ console.error('mill: missing --format. Options: pdf, csv, markdown, json-ld');
127
+ process.exit(1);
128
+ }
129
+
130
+ const inputFile = positionals[0];
131
+ if (!inputFile) {
132
+ console.error('mill: missing input file.');
133
+ process.exit(1);
134
+ }
135
+
136
+ const inputPath = path.resolve(inputFile);
137
+ const format = values.format;
138
+ const outputPath = values.output
139
+ ? path.resolve(values.output)
140
+ : null;
141
+
142
+ const formats = require('../lib/formats.js');
143
+ const exporter = formats.getExporter(format);
144
+
145
+ if (!exporter) {
146
+ console.error(`mill: unknown format: ${format}`);
147
+ console.error(`Available: ${formats.listExportFormats().join(', ')}`);
148
+ process.exit(1);
149
+ }
150
+
151
+ try {
152
+ const result = await exporter.export(inputPath, outputPath);
153
+ if (values.json) {
154
+ console.log(JSON.stringify(result));
155
+ } else {
156
+ console.log(result.message);
157
+ }
158
+ } catch (err) {
159
+ if (values.json) {
160
+ console.log(JSON.stringify({ error: err.message }));
161
+ } else {
162
+ console.error(`mill: export failed: ${err.message}`);
163
+ }
164
+ process.exit(1);
165
+ }
166
+ }
167
+
168
+ async function runPublish(args) {
169
+ let values, positionals;
170
+ try {
171
+ ({ values, positionals } = parseArgs({
172
+ args,
173
+ options: {
174
+ target: { type: 'string', short: 't' },
175
+ output: { type: 'string', short: 'o' },
176
+ json: { type: 'boolean', default: false },
177
+ },
178
+ allowPositionals: true,
179
+ }));
180
+ } catch (err) {
181
+ if (err.code === 'ERR_PARSE_ARGS_UNKNOWN_OPTION') {
182
+ const flag = err.message.match(/option "([^"]+)"/)?.[1] || 'unknown';
183
+ console.error(`mill: unknown option: ${flag}. Run "mill publish --help" for usage.`);
184
+ process.exit(1);
185
+ }
186
+ throw err;
187
+ }
188
+
189
+ if (!values.target) {
190
+ console.error('mill: missing --target. Options: static, clipboard');
191
+ process.exit(1);
192
+ }
193
+
194
+ const inputDir = positionals[0];
195
+ if (!inputDir) {
196
+ console.error('mill: missing input directory.');
197
+ process.exit(1);
198
+ }
199
+
200
+ const inputPath = path.resolve(inputDir);
201
+ const target = values.target;
202
+ const outputPath = values.output ? path.resolve(values.output) : null;
203
+
204
+ const formats = require('../lib/formats.js');
205
+ const publisher = formats.getPublisher(target);
206
+
207
+ if (!publisher) {
208
+ console.error(`mill: unknown target: ${target}`);
209
+ console.error(`Available: ${formats.listPublishTargets().join(', ')}`);
210
+ process.exit(1);
211
+ }
212
+
213
+ try {
214
+ const result = await publisher.publish(inputPath, outputPath);
215
+ if (values.json) {
216
+ console.log(JSON.stringify(result));
217
+ } else {
218
+ console.log(result.message);
219
+ }
220
+ } catch (err) {
221
+ if (values.json) {
222
+ console.log(JSON.stringify({ error: err.message }));
223
+ } else {
224
+ console.error(`mill: publish failed: ${err.message}`);
225
+ }
226
+ process.exit(1);
227
+ }
228
+ }
229
+
230
+ async function runConvert(args) {
231
+ let values, positionals;
232
+ try {
233
+ ({ values, positionals } = parseArgs({
234
+ args,
235
+ options: {
236
+ from: { type: 'string' },
237
+ to: { type: 'string' },
238
+ output: { type: 'string', short: 'o' },
239
+ json: { type: 'boolean', default: false },
240
+ },
241
+ allowPositionals: true,
242
+ }));
243
+ } catch (err) {
244
+ if (err.code === 'ERR_PARSE_ARGS_UNKNOWN_OPTION') {
245
+ const flag = err.message.match(/option "([^"]+)"/)?.[1] || 'unknown';
246
+ console.error(`mill: unknown option: ${flag}. Run "mill convert --help" for usage.`);
247
+ process.exit(1);
248
+ }
249
+ throw err;
250
+ }
251
+
252
+ if (!values.from || !values.to) {
253
+ console.error('mill: missing --from and/or --to format.');
254
+ process.exit(1);
255
+ }
256
+
257
+ const inputFile = positionals[0];
258
+ if (!inputFile) {
259
+ console.error('mill: missing input file.');
260
+ process.exit(1);
261
+ }
262
+
263
+ const inputPath = path.resolve(inputFile);
264
+ const outputPath = values.output ? path.resolve(values.output) : null;
265
+
266
+ // Convert is sugar: detect source, export to target
267
+ const formats = require('../lib/formats.js');
268
+ const exporter = formats.getExporter(values.to);
269
+
270
+ if (!exporter) {
271
+ console.error(`mill: unknown target format: ${values.to}`);
272
+ process.exit(1);
273
+ }
274
+
275
+ try {
276
+ const result = await exporter.export(inputPath, outputPath);
277
+ if (values.json) {
278
+ console.log(JSON.stringify(result));
279
+ } else {
280
+ console.log(result.message);
281
+ }
282
+ } catch (err) {
283
+ if (values.json) {
284
+ console.log(JSON.stringify({ error: err.message }));
285
+ } else {
286
+ console.error(`mill: convert failed: ${err.message}`);
287
+ }
288
+ process.exit(1);
289
+ }
290
+ }
291
+
292
+ function runFormats(args) {
293
+ const jsonMode = (args || []).includes('--json');
294
+ const formats = require('../lib/formats.js');
295
+ if (jsonMode) {
296
+ console.log(JSON.stringify({
297
+ export_formats: formats.listExportFormats(),
298
+ publish_targets: formats.listPublishTargets(),
299
+ }));
300
+ return;
301
+ }
302
+ console.log('Export formats:');
303
+ for (const f of formats.listExportFormats()) {
304
+ console.log(` ${f}`);
305
+ }
306
+ console.log('\nPublish targets:');
307
+ for (const t of formats.listPublishTargets()) {
308
+ console.log(` ${t}`);
309
+ }
310
+ }
311
+
312
+ function runServe(args) {
313
+ const serverPath = path.join(LIB_DIR, 'server.js');
314
+ const child = fork(serverPath, args, { stdio: 'inherit' });
315
+ child.on('exit', (code) => process.exit(code ?? 0));
316
+ process.on('SIGTERM', () => child.kill('SIGTERM'));
317
+ process.on('SIGINT', () => child.kill('SIGINT'));
318
+ }
319
+
320
+ main();
@@ -0,0 +1,83 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+
6
+ /**
7
+ * Export claims.json to CSV.
8
+ * Handles nested fields by flattening to dot-notation columns.
9
+ */
10
+
11
+ const CSV_COLUMNS = [
12
+ 'id',
13
+ 'type',
14
+ 'text',
15
+ 'confidence',
16
+ 'evidence_tier',
17
+ 'source',
18
+ 'status',
19
+ 'created',
20
+ 'tags',
21
+ ];
22
+
23
+ function escapeCsvField(value) {
24
+ if (value == null) return '';
25
+ let str = String(value);
26
+ // CWE-1236: Prevent CSV injection by prefixing formula-triggering characters
27
+ if (/^[=+\-@\t\r]/.test(str)) {
28
+ str = "'" + str;
29
+ }
30
+ if (str.includes(',') || str.includes('"') || str.includes('\n')) {
31
+ return `"${str.replace(/"/g, '""')}"`;
32
+ }
33
+ return str;
34
+ }
35
+
36
+ function claimToRow(claim) {
37
+ return CSV_COLUMNS.map((col) => {
38
+ if (col === 'tags') {
39
+ return escapeCsvField(Array.isArray(claim.tags) ? claim.tags.join('; ') : '');
40
+ }
41
+ if (col === 'evidence_tier') {
42
+ return escapeCsvField(claim.evidence?.tier ?? claim.evidence_tier ?? '');
43
+ }
44
+ if (col === 'source') {
45
+ return escapeCsvField(claim.evidence?.source ?? claim.source ?? '');
46
+ }
47
+ return escapeCsvField(claim[col]);
48
+ }).join(',');
49
+ }
50
+
51
+ function deriveOutputPath(inputPath, explicit) {
52
+ if (explicit) return explicit;
53
+ const dir = path.dirname(inputPath);
54
+ const base = path.basename(inputPath, path.extname(inputPath));
55
+ return path.join(dir, `${base}.csv`);
56
+ }
57
+
58
+ async function exportCsv(inputPath, outputPath) {
59
+ const raw = fs.readFileSync(inputPath, 'utf-8');
60
+ const data = JSON.parse(raw);
61
+
62
+ // Accept either an array or { claims: [...] }
63
+ const claims = Array.isArray(data) ? data : (data.claims || []);
64
+
65
+ if (claims.length === 0) {
66
+ throw new Error('No claims found in input file.');
67
+ }
68
+
69
+ const header = CSV_COLUMNS.join(',');
70
+ const rows = claims.map(claimToRow);
71
+ const csv = [header, ...rows].join('\n') + '\n';
72
+
73
+ const out = deriveOutputPath(inputPath, outputPath);
74
+ fs.writeFileSync(out, csv, 'utf-8');
75
+
76
+ return { outputPath: out, message: `CSV written to ${out} (${claims.length} claims)` };
77
+ }
78
+
79
+ module.exports = {
80
+ name: 'csv',
81
+ description: 'Export claims JSON to CSV',
82
+ export: exportCsv,
83
+ };
@@ -0,0 +1,44 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+ const { buildReport } = require('../json-ld-common.js');
6
+
7
+ /**
8
+ * Export claims.json to JSON-LD format.
9
+ * Uses shared schema.org/Report vocabulary from json-ld-common.js.
10
+ */
11
+
12
+ function deriveOutputPath(inputPath, explicit) {
13
+ if (explicit) return explicit;
14
+ const dir = path.dirname(inputPath);
15
+ const base = path.basename(inputPath, path.extname(inputPath));
16
+ return path.join(dir, `${base}.jsonld`);
17
+ }
18
+
19
+ async function exportJsonLd(inputPath, outputPath) {
20
+ const raw = fs.readFileSync(inputPath, 'utf-8');
21
+ const data = JSON.parse(raw);
22
+ const claims = Array.isArray(data) ? data : (data.claims || []);
23
+
24
+ if (claims.length === 0) {
25
+ throw new Error('No claims found in input file.');
26
+ }
27
+
28
+ const meta = data.meta || { sprint: 'unknown', question: 'Wheat Sprint Claims' };
29
+ const certificate = data.certificate || {};
30
+ const doc = buildReport(meta, claims, certificate);
31
+
32
+ const out = deriveOutputPath(inputPath, outputPath);
33
+ const tmp = out + '.tmp.' + process.pid;
34
+ fs.writeFileSync(tmp, JSON.stringify(doc, null, 2) + '\n', 'utf-8');
35
+ fs.renameSync(tmp, out);
36
+
37
+ return { outputPath: out, message: `JSON-LD written to ${out} (${claims.length} claims)` };
38
+ }
39
+
40
+ module.exports = {
41
+ name: 'json-ld',
42
+ description: 'Export claims JSON to JSON-LD for semantic web',
43
+ export: exportJsonLd,
44
+ };
@@ -0,0 +1,116 @@
1
+ 'use strict';
2
+
3
+ const fs = require('node:fs');
4
+ const path = require('node:path');
5
+
6
+ /**
7
+ * Convert HTML artifacts to clean Markdown.
8
+ * Uses a minimal tag-stripping approach -- no dependencies.
9
+ * Handles the common patterns from wheat sprint HTML output.
10
+ */
11
+
12
+ function htmlToMarkdown(html) {
13
+ let md = html;
14
+
15
+ // Remove doctype, head, scripts, styles
16
+ md = md.replace(/<!DOCTYPE[^>]*>/gi, '');
17
+ md = md.replace(/<head[\s\S]*?<\/head>/gi, '');
18
+ md = md.replace(/<script[\s\S]*?<\/script>/gi, '');
19
+ md = md.replace(/<style[\s\S]*?<\/style>/gi, '');
20
+
21
+ // Headings
22
+ md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, (_, c) => `# ${strip(c)}\n\n`);
23
+ md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, (_, c) => `## ${strip(c)}\n\n`);
24
+ md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, (_, c) => `### ${strip(c)}\n\n`);
25
+ md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, (_, c) => `#### ${strip(c)}\n\n`);
26
+ md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, (_, c) => `##### ${strip(c)}\n\n`);
27
+ md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, (_, c) => `###### ${strip(c)}\n\n`);
28
+
29
+ // Bold, italic, code
30
+ md = md.replace(/<strong[^>]*>([\s\S]*?)<\/strong>/gi, '**$1**');
31
+ md = md.replace(/<b[^>]*>([\s\S]*?)<\/b>/gi, '**$1**');
32
+ md = md.replace(/<em[^>]*>([\s\S]*?)<\/em>/gi, '*$1*');
33
+ md = md.replace(/<i[^>]*>([\s\S]*?)<\/i>/gi, '*$1*');
34
+ md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
35
+
36
+ // Pre/code blocks
37
+ md = md.replace(/<pre[^>]*><code[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_, c) => {
38
+ return '\n```\n' + decodeEntities(c) + '\n```\n\n';
39
+ });
40
+ md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (_, c) => {
41
+ return '\n```\n' + decodeEntities(c) + '\n```\n\n';
42
+ });
43
+
44
+ // Links
45
+ md = md.replace(/<a[^>]+href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
46
+
47
+ // Images
48
+ md = md.replace(/<img[^>]+src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '![$2]($1)');
49
+ md = md.replace(/<img[^>]+src="([^"]*)"[^>]*\/?>/gi, '![]($1)');
50
+
51
+ // Lists
52
+ md = md.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, c) => `- ${strip(c).trim()}\n`);
53
+ md = md.replace(/<\/?[ou]l[^>]*>/gi, '\n');
54
+
55
+ // Paragraphs and breaks
56
+ md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, (_, c) => `${strip(c).trim()}\n\n`);
57
+ md = md.replace(/<br\s*\/?>/gi, '\n');
58
+ md = md.replace(/<hr\s*\/?>/gi, '\n---\n\n');
59
+
60
+ // Blockquotes
61
+ md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_, c) => {
62
+ return strip(c).trim().split('\n').map((l) => `> ${l}`).join('\n') + '\n\n';
63
+ });
64
+
65
+ // Strip remaining tags
66
+ md = md.replace(/<[^>]+>/g, '');
67
+
68
+ // Decode common entities
69
+ md = decodeEntities(md);
70
+
71
+ // Normalize whitespace
72
+ md = md.replace(/\n{3,}/g, '\n\n');
73
+ md = md.trim() + '\n';
74
+
75
+ return md;
76
+ }
77
+
78
+ function strip(html) {
79
+ return html.replace(/<[^>]+>/g, '');
80
+ }
81
+
82
+ function decodeEntities(str) {
83
+ return str
84
+ .replace(/&amp;/g, '&')
85
+ .replace(/&lt;/g, '<')
86
+ .replace(/&gt;/g, '>')
87
+ .replace(/&quot;/g, '"')
88
+ .replace(/&#39;/g, "'")
89
+ .replace(/&nbsp;/g, ' ');
90
+ }
91
+
92
+ function deriveOutputPath(inputPath, explicit) {
93
+ if (explicit) return explicit;
94
+ const dir = path.dirname(inputPath);
95
+ const base = path.basename(inputPath, path.extname(inputPath));
96
+ return path.join(dir, `${base}.md`);
97
+ }
98
+
99
+ async function exportMarkdown(inputPath, outputPath) {
100
+ const html = fs.readFileSync(inputPath, 'utf-8');
101
+ const trimmed = html.trimStart();
102
+ if (!trimmed.startsWith('<') && !trimmed.startsWith('<!DOCTYPE') && !trimmed.startsWith('<html')) {
103
+ process.stderr.write('Warning: Input does not appear to be HTML. Markdown conversion may produce unexpected results.\n');
104
+ }
105
+ const md = htmlToMarkdown(html);
106
+ const out = deriveOutputPath(inputPath, outputPath);
107
+ fs.writeFileSync(out, md, 'utf-8');
108
+ return { outputPath: out, message: `Markdown written to ${out}` };
109
+ }
110
+
111
+ module.exports = {
112
+ name: 'markdown',
113
+ description: 'Convert HTML artifacts to clean Markdown',
114
+ export: exportMarkdown,
115
+ htmlToMarkdown,
116
+ };