@holdenmatt/md-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Matt Holden
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # md-parser
2
+
3
+ Markdown parser primitives for frontmatter, body text, sections, and code blocks.
4
+
5
+ `@holdenmatt/md-parser` provides shared Markdown parser primitives for generic document structure. It parses untyped frontmatter, frontmatter-free body text, flat heading sections, and code blocks without assigning application meaning to the content.
6
+
7
+ ## Install
8
+
9
+ ```sh
10
+ npm install @holdenmatt/md-parser
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ts
16
+ import { parse, stringify } from "@holdenmatt/md-parser";
17
+
18
+ const document = parse(`---
19
+ title: Example
20
+ ---
21
+
22
+ ## Notes
23
+
24
+ Body text.
25
+ `);
26
+
27
+ document.raw; // original markdown
28
+ document.frontmatter; // Record<string, unknown>
29
+ document.body; // markdown without frontmatter
30
+ document.sections[0]?.heading; // "Notes"
31
+ document.codeBlocks; // fenced and indented code blocks
32
+
33
+ const markdown = stringify({
34
+ frontmatter: { title: "Example" },
35
+ body: "## Notes\n\nBody text.\n",
36
+ });
37
+ ```
38
+
39
+ ## API
40
+
41
+ ### `parse(markdown)`
42
+
43
+ Parses a Markdown string and returns a `MarkdownDocument`.
44
+
45
+ ### `parseFile(path)`
46
+
47
+ Reads a UTF-8 Markdown file and returns a `Promise<MarkdownDocument>`.
48
+
49
+ ### `stringify({ frontmatter, body })`
50
+
51
+ Serializes frontmatter and body text back into Markdown. Empty or omitted frontmatter returns the body unchanged.
52
+
53
+ ### `MarkdownDocument`
54
+
55
+ `parse` returns one canonical document shape:
56
+
57
+ ```ts
58
+ type MarkdownDocument = {
59
+ raw: string;
60
+ frontmatter: Record<string, unknown>;
61
+ body: string;
62
+ sections: MarkdownSection[];
63
+ codeBlocks: MarkdownCodeBlock[];
64
+ };
65
+ ```
66
+
67
+ Frontmatter is intentionally untyped. Application packages can refine it with their own schemas, or use [`@holdenmatt/md-schema`](https://github.com/holdenmatt/md-schema) for typed frontmatter parsing.
68
+
69
+ Parse and file-read failures throw `MarkdownParseError`. See [SPEC.md](./SPEC.md) for the structural parsing contract.
package/SPEC.md ADDED
@@ -0,0 +1,39 @@
1
+ # md-parser spec
2
+
3
+ md-parser parses one Markdown document into stable structural views and can stringify frontmatter plus body text back into Markdown.
4
+
5
+ ## Scope
6
+
7
+ - md-parser parses document structure, not document meaning.
8
+ - Frontmatter remains untyped; application packages refine it.
9
+ - The package does not define application semantics for headings, code block languages, directives, macros, or other Markdown content.
10
+
11
+ ## Parse
12
+
13
+ - `parse(markdown)` returns `{ raw, frontmatter, body, sections, codeBlocks }`.
14
+ - `raw` is the original input string.
15
+ - `frontmatter` is parsed YAML data or `{}`.
16
+ - Invalid frontmatter fails.
17
+ - `body` is Markdown with frontmatter removed.
18
+ - `parseFile(path)` reads UTF-8 text before parsing.
19
+ - Parse and file failures throw `MarkdownParseError`.
20
+ - The public `MarkdownDocument` shape does not expose the internal Markdown AST.
21
+
22
+ ## Sections
23
+
24
+ - Sections are a flat list built from body headings.
25
+ - A section owns content until the next heading of the same or shallower depth.
26
+ - Section body text is trimmed of surrounding blank lines.
27
+ - Section headings are plain readable text.
28
+
29
+ ## Code Blocks
30
+
31
+ - Code blocks come from the parsed body structure.
32
+ - Each block exposes `info`, `language`, `meta`, and `value`.
33
+ - Code block language and metadata are not interpreted.
34
+
35
+ ## Stringify
36
+
37
+ - `stringify({ frontmatter, body })` serializes frontmatter and body text.
38
+ - Empty or omitted frontmatter returns the body unchanged.
39
+ - Stringify does not use `sections` or `codeBlocks`.
@@ -0,0 +1,4 @@
1
+ export { MarkdownParseError, parse, parseFile } from "./parse.js";
2
+ export { stringify } from "./stringify.js";
3
+ export type { MarkdownCodeBlock, MarkdownDocument, MarkdownSection } from "./parse.js";
4
+ export type { MarkdownStringifyInput } from "./stringify.js";
package/dist/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export { MarkdownParseError, parse, parseFile } from "./parse.js";
2
+ export { stringify } from "./stringify.js";
@@ -0,0 +1,55 @@
1
+ /** Error thrown when parsing or file reading fails. */
2
+ export declare class MarkdownParseError<TCode extends string = string> extends Error {
3
+ /** Machine-readable error code. */
4
+ readonly code: TCode;
5
+ /** Original error or diagnostic details, when available. */
6
+ readonly details: unknown;
7
+ constructor(code: TCode, message: string, details?: unknown);
8
+ }
9
+ /**
10
+ * Parsed markdown document with raw text and structural views of the body.
11
+ */
12
+ export type MarkdownDocument = {
13
+ /** Original input markdown. */
14
+ raw: string;
15
+ /** Parsed frontmatter data, or an empty object when no frontmatter exists. */
16
+ frontmatter: Record<string, unknown>;
17
+ /** Markdown body with any frontmatter removed. */
18
+ body: string;
19
+ /** Flat heading sections from the body. */
20
+ sections: MarkdownSection[];
21
+ /** Code blocks from the body. */
22
+ codeBlocks: MarkdownCodeBlock[];
23
+ };
24
+ /**
25
+ * Markdown body content owned by a heading.
26
+ */
27
+ export type MarkdownSection = {
28
+ /** Plain text heading content. */
29
+ heading: string;
30
+ /** Markdown heading depth, from 1 through 6. */
31
+ depth: number;
32
+ /** Section content until the next same-or-higher heading. */
33
+ body: string;
34
+ };
35
+ /**
36
+ * Markdown code block from the body.
37
+ */
38
+ export type MarkdownCodeBlock = {
39
+ /** Full code fence info string, reconstructed from language and meta. */
40
+ info: string;
41
+ /** Code fence language, when present. */
42
+ language: string | undefined;
43
+ /** Code fence metadata after the language, when present. */
44
+ meta: string | undefined;
45
+ /** Code block content. */
46
+ value: string;
47
+ };
48
+ /**
49
+ * Parse markdown into one canonical document shape.
50
+ */
51
+ export declare function parse(markdown: string): MarkdownDocument;
52
+ /**
53
+ * Read a UTF-8 markdown file and parse it.
54
+ */
55
+ export declare function parseFile(path: string): Promise<MarkdownDocument>;
package/dist/parse.js ADDED
@@ -0,0 +1,138 @@
1
+ import { readFile } from "node:fs/promises";
2
+ import matter from "gray-matter";
3
+ import { unified } from "unified";
4
+ import remarkParse from "remark-parse";
5
+ /** Error thrown when parsing or file reading fails. */
6
+ export class MarkdownParseError extends Error {
7
+ /** Machine-readable error code. */
8
+ code;
9
+ /** Original error or diagnostic details, when available. */
10
+ details;
11
+ constructor(code, message, details) {
12
+ super(message);
13
+ this.name = "MarkdownParseError";
14
+ this.code = code;
15
+ this.details = details;
16
+ }
17
+ }
18
+ const markdownParser = unified().use(remarkParse);
19
+ /**
20
+ * Parse markdown into one canonical document shape.
21
+ */
22
+ export function parse(markdown) {
23
+ let parsed;
24
+ try {
25
+ parsed = matter(markdown);
26
+ }
27
+ catch (error) {
28
+ throw new MarkdownParseError("FRONTMATTER_PARSE_ERROR", "Could not parse markdown frontmatter.", error);
29
+ }
30
+ try {
31
+ const ast = markdownParser.parse(parsed.content);
32
+ return {
33
+ raw: markdown,
34
+ frontmatter: toFrontmatterRecord(parsed.data),
35
+ body: parsed.content,
36
+ sections: extractSections(parsed.content, ast),
37
+ codeBlocks: extractCodeBlocks(ast),
38
+ };
39
+ }
40
+ catch (error) {
41
+ throw new MarkdownParseError("MARKDOWN_PARSE_ERROR", "Could not parse markdown body.", error);
42
+ }
43
+ }
44
+ /**
45
+ * Read a UTF-8 markdown file and parse it.
46
+ */
47
+ export async function parseFile(path) {
48
+ try {
49
+ return parse(await readFile(path, "utf8"));
50
+ }
51
+ catch (error) {
52
+ if (error instanceof MarkdownParseError)
53
+ throw error;
54
+ throw new MarkdownParseError("FILE_READ_ERROR", `Could not read file: ${path}`, error);
55
+ }
56
+ }
57
+ /**
58
+ * Build flat heading sections from body offsets reported by mdast.
59
+ */
60
+ function extractSections(body, ast) {
61
+ const headings = ast.children
62
+ .map((node, index) => (node.type === "heading" ? { node, index } : undefined))
63
+ .filter((item) => item !== undefined);
64
+ return headings.flatMap(({ node }, index) => {
65
+ const start = node.position?.end.offset;
66
+ if (start === undefined)
67
+ return [];
68
+ const next = headings.slice(index + 1).find((heading) => heading.node.depth <= node.depth);
69
+ const end = next?.node.position?.start.offset ?? body.length;
70
+ return [
71
+ {
72
+ heading: textFromNode(node),
73
+ depth: node.depth,
74
+ body: trimBlankLines(body.slice(start, end)),
75
+ },
76
+ ];
77
+ });
78
+ }
79
+ /**
80
+ * Collect code blocks without assigning meaning to their language.
81
+ */
82
+ function extractCodeBlocks(ast) {
83
+ const blocks = [];
84
+ visit(ast, (node) => {
85
+ if (node.type !== "code")
86
+ return;
87
+ const code = node;
88
+ const language = code.lang ?? undefined;
89
+ const meta = code.meta ?? undefined;
90
+ const info = [language, meta].filter((value) => value !== undefined && value !== "").join(" ");
91
+ blocks.push({
92
+ info,
93
+ language,
94
+ meta,
95
+ value: code.value,
96
+ });
97
+ });
98
+ return blocks;
99
+ }
100
+ /**
101
+ * Walk the mdast tree depth-first.
102
+ */
103
+ function visit(node, visitor) {
104
+ visitor(node);
105
+ if (!("children" in node))
106
+ return;
107
+ for (const child of node.children) {
108
+ visit(child, visitor);
109
+ }
110
+ }
111
+ /**
112
+ * Collapse a heading node to its readable text.
113
+ */
114
+ function textFromNode(node) {
115
+ if ("value" in node && typeof node.value === "string")
116
+ return node.value;
117
+ if (!("children" in node))
118
+ return "";
119
+ return node.children.map((child) => textFromNode(child)).join("");
120
+ }
121
+ /**
122
+ * Remove blank padding around extracted section bodies.
123
+ */
124
+ function trimBlankLines(value) {
125
+ return value.replace(/^(?:[ \t]*\r?\n)+/, "").replace(/(?:\r?\n[ \t]*)+$/, "");
126
+ }
127
+ /**
128
+ * Keep the public frontmatter shape object-like and predictable.
129
+ */
130
+ function toFrontmatterRecord(value) {
131
+ return isPlainRecord(value) ? value : {};
132
+ }
133
+ /**
134
+ * Check for a non-array object suitable for frontmatter data.
135
+ */
136
+ function isPlainRecord(value) {
137
+ return typeof value === "object" && value !== null && !Array.isArray(value);
138
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Source fields needed to serialize a markdown document.
3
+ */
4
+ export type MarkdownStringifyInput = {
5
+ /** Frontmatter data to write, or omit for a plain body-only document. */
6
+ frontmatter?: Record<string, unknown>;
7
+ /** Markdown body text. */
8
+ body: string;
9
+ };
10
+ /**
11
+ * Serialize frontmatter and body back into markdown.
12
+ */
13
+ export declare function stringify(input: MarkdownStringifyInput): string;
@@ -0,0 +1,15 @@
1
+ import matter from "gray-matter";
2
+ /**
3
+ * Serialize frontmatter and body back into markdown.
4
+ */
5
+ export function stringify(input) {
6
+ if (!hasFrontmatter(input.frontmatter))
7
+ return input.body;
8
+ return matter.stringify(input.body, input.frontmatter);
9
+ }
10
+ /**
11
+ * Check whether frontmatter should be emitted.
12
+ */
13
+ function hasFrontmatter(value) {
14
+ return value !== undefined && Object.keys(value).length > 0;
15
+ }
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@holdenmatt/md-parser",
3
+ "version": "0.1.0",
4
+ "description": "Markdown parser primitives for frontmatter, body text, sections, and code blocks.",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/holdenmatt/md-parser.git"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "SPEC.md"
13
+ ],
14
+ "type": "module",
15
+ "exports": {
16
+ ".": {
17
+ "types": "./dist/index.d.ts",
18
+ "import": "./dist/index.js"
19
+ }
20
+ },
21
+ "scripts": {
22
+ "build": "tsc",
23
+ "prepare": "tsc",
24
+ "test": "vitest run",
25
+ "typecheck": "tsc --noEmit",
26
+ "lint": "oxlint",
27
+ "format": "oxfmt --write .",
28
+ "format:check": "oxfmt --check .",
29
+ "check": "pnpm format:check && pnpm lint && pnpm test && pnpm typecheck && pnpm build"
30
+ },
31
+ "dependencies": {
32
+ "gray-matter": "^4.0.3",
33
+ "remark-parse": "^11.0.0",
34
+ "unified": "^11.0.5"
35
+ },
36
+ "devDependencies": {
37
+ "@types/mdast": "^4.0.4",
38
+ "@types/node": "^26.0.1",
39
+ "oxfmt": "^0.56.0",
40
+ "oxlint": "^1.71.0",
41
+ "typescript": "^6.0.3",
42
+ "vitest": "^4.1.9"
43
+ },
44
+ "packageManager": "pnpm@10.33.0"
45
+ }