quill-matter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +175 -0
- package/cli/index.ts +214 -0
- package/package.json +90 -0
- package/src/detector.ts +53 -0
- package/src/excerpt.ts +63 -0
- package/src/extractor.ts +120 -0
- package/src/index.ts +510 -0
- package/src/parsers.ts +73 -0
- package/src/sanitizer.ts +50 -0
- package/src/stringify.ts +151 -0
- package/src/types.ts +166 -0
- package/src/validator.ts +28 -0
- package/src/wasm-loader.ts +166 -0
package/src/extractor.ts
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import type { DelimiterPair, ExtractionResult } from "./types.js";
|
|
2
|
+
import { DEFAULT_DELIMITERS, ExtractionError } from "./types.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Extract the front matter block and the remaining Markdown body from a source
|
|
6
|
+
* string.
|
|
7
|
+
*
|
|
8
|
+
* Supports symmetric delimiters (`---`, `+++`) and asymmetric pairs
|
|
9
|
+
* (e.g. `{ open: "<!--", close: "-->" }`).
|
|
10
|
+
*
|
|
11
|
+
* @param source Raw Markdown source string.
|
|
12
|
+
* @param delimiters Optional array of delimiter pairs to try. Defaults to
|
|
13
|
+
* `[{ open: "---", close: "---" }, { open: "---", close: "..." }, { open: "+++", close: "+++" }]`.
|
|
14
|
+
*
|
|
15
|
+
* @returns An `ExtractionResult` with the raw (unparsed) data, body content,
|
|
16
|
+
* and delimiter used — or `null` if no front matter is present.
|
|
17
|
+
*/
|
|
18
|
+
export function extractFrontMatter(
|
|
19
|
+
source: string,
|
|
20
|
+
delimiters: readonly DelimiterPair[] = DEFAULT_DELIMITERS,
|
|
21
|
+
): ExtractionResult | null {
|
|
22
|
+
if (!source || source.trim().length === 0) {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Strip UTF-8 BOM — editors like Windows Notepad prepend \uFEFF which
|
|
27
|
+
// would prevent the opening delimiter from matching.
|
|
28
|
+
if (source.charCodeAt(0) === 0xfeff) {
|
|
29
|
+
source = source.slice(1);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Validate delimiter pairs.
|
|
33
|
+
for (const { open, close } of delimiters) {
|
|
34
|
+
if (!open || !close) {
|
|
35
|
+
throw new ExtractionError(
|
|
36
|
+
"Invalid delimiter pair: both open and close must be non-empty strings.",
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Try each delimiter pair in order.
|
|
42
|
+
// Only normalise line endings in the region we need (not the full source).
|
|
43
|
+
let openMatched = false;
|
|
44
|
+
for (const pair of delimiters) {
|
|
45
|
+
const result = tryExtract(source, pair);
|
|
46
|
+
if (result !== undefined) {
|
|
47
|
+
return result;
|
|
48
|
+
}
|
|
49
|
+
// Track whether the opening delimiter matched at all.
|
|
50
|
+
if (source.startsWith(`${pair.open}\n`) || source.startsWith(`${pair.open}\r\n`)) {
|
|
51
|
+
openMatched = true;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// The file starts with a known opening delimiter but none of the
|
|
56
|
+
// corresponding closing delimiters were found.
|
|
57
|
+
if (openMatched) {
|
|
58
|
+
// Collect only the close delimiters that pair with the matched open.
|
|
59
|
+
const matchedOpens = new Set(
|
|
60
|
+
delimiters
|
|
61
|
+
.filter((d) => source.startsWith(`${d.open}\n`) || source.startsWith(`${d.open}\r\n`))
|
|
62
|
+
.map((d) => d.open),
|
|
63
|
+
);
|
|
64
|
+
const closes = [
|
|
65
|
+
...new Set(delimiters.filter((d) => matchedOpens.has(d.open)).map((d) => d.close)),
|
|
66
|
+
];
|
|
67
|
+
throw new ExtractionError(
|
|
68
|
+
`Unclosed front matter block: opening delimiter found but no closing delimiter (tried: ${closes.map((c) => `"${c}"`).join(", ")}).`,
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// No front matter found.
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ---------------------------------------------------------------------------
|
|
77
|
+
// Internal helpers
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
function tryExtract(source: string, pair: DelimiterPair): ExtractionResult | undefined {
|
|
81
|
+
const { open, close } = pair;
|
|
82
|
+
|
|
83
|
+
// The file must start with the opening delimiter followed by a newline.
|
|
84
|
+
if (!source.startsWith(`${open}\n`) && !source.startsWith(`${open}\r\n`)) {
|
|
85
|
+
return undefined; // signal "not this delimiter"
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Skip past `open` + newline (handle both \r\n and \n).
|
|
89
|
+
const openEnd = source[open.length + 1] === "\n" ? open.length + 2 : open.length + 1;
|
|
90
|
+
|
|
91
|
+
// Search for the closing delimiter on its own line.
|
|
92
|
+
// Start from openEnd - 1 so the closing delimiter is found even when the
|
|
93
|
+
// front matter body is completely empty (e.g. "---\n---\n").
|
|
94
|
+
let closeIdx = source.indexOf(`\n${close}`, openEnd - 1);
|
|
95
|
+
if (closeIdx === -1) {
|
|
96
|
+
// Also try \r\n before close.
|
|
97
|
+
closeIdx = source.indexOf(`\r\n${close}`, openEnd - 1);
|
|
98
|
+
if (closeIdx !== -1) closeIdx += 1; // advance past \r so closeIdx points to \n
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (closeIdx === -1) {
|
|
102
|
+
// Close delimiter not found — let the next pair be tried.
|
|
103
|
+
return undefined;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const rawData = source.slice(openEnd, closeIdx).trim();
|
|
107
|
+
const contentStart = closeIdx + 1 + close.length; // skip `\nclose`
|
|
108
|
+
|
|
109
|
+
// Skip optional trailing newline after the closing delimiter.
|
|
110
|
+
let bodyStart = contentStart;
|
|
111
|
+
if (source[bodyStart] === "\n") {
|
|
112
|
+
bodyStart += 1;
|
|
113
|
+
} else if (source[bodyStart] === "\r" && source[bodyStart + 1] === "\n") {
|
|
114
|
+
bodyStart += 2;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const content = source.slice(bodyStart);
|
|
118
|
+
|
|
119
|
+
return { rawData, content, delimiter: pair };
|
|
120
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
import type { DetectionResult } from "./detector.js";
|
|
2
|
+
import { detectFormatWithPreparsed } from "./detector.js";
|
|
3
|
+
import { extractExcerpt } from "./excerpt.js";
|
|
4
|
+
import { extractFrontMatter } from "./extractor.js";
|
|
5
|
+
import { createParserAdapter } from "./parsers.js";
|
|
6
|
+
import { sanitizeKeys } from "./sanitizer.js";
|
|
7
|
+
import type {
|
|
8
|
+
DelimiterPair,
|
|
9
|
+
ExcerptOptions,
|
|
10
|
+
ExtractionResult,
|
|
11
|
+
FrontMatterFormat,
|
|
12
|
+
ParseResult,
|
|
13
|
+
ParseResultEmpty,
|
|
14
|
+
ParseResultError,
|
|
15
|
+
ParseResultSuccess,
|
|
16
|
+
} from "./types.js";
|
|
17
|
+
import { DEFAULT_DELIMITERS, FrontMatterError } from "./types.js";
|
|
18
|
+
import type { WasmParsers } from "./wasm-loader.js";
|
|
19
|
+
import { getWasmParsers, getWasmParsersSync } from "./wasm-loader.js";
|
|
20
|
+
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Lazy-loaded validator (avoids requiring valibot at import time)
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
/** Valibot GenericSchema — declared locally to avoid requiring valibot at import time. */
|
|
26
|
+
type AnySchema = { readonly "~standard": unknown };
|
|
27
|
+
|
|
28
|
+
let _validateFn: ((data: unknown, schema: AnySchema) => unknown) | null = null;
|
|
29
|
+
|
|
30
|
+
/** Load the validator module on demand and cache it. */
|
|
31
|
+
async function lazyValidate<T>(data: unknown, schema: AnySchema): Promise<T> {
|
|
32
|
+
if (!_validateFn) {
|
|
33
|
+
const mod = await import("./validator.js");
|
|
34
|
+
_validateFn = mod.validate as (data: unknown, schema: AnySchema) => unknown;
|
|
35
|
+
}
|
|
36
|
+
const fn = _validateFn;
|
|
37
|
+
return fn(data, schema) as T;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Use the cached validator synchronously. Throws if not yet loaded. */
|
|
41
|
+
function lazyValidateSync<T>(data: unknown, schema: AnySchema): T {
|
|
42
|
+
if (!_validateFn) {
|
|
43
|
+
throw new FrontMatterError(
|
|
44
|
+
"Schema validation in sync mode requires the validator to be pre-loaded. " +
|
|
45
|
+
"Call `parseFrontMatter()` once with a schema first, or " +
|
|
46
|
+
"pre-load with: await import('quill-matter/validator')",
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
return _validateFn(data, schema) as T;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Constants
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
/** Maximum allowed input size in bytes (1 MB).
|
|
57
|
+
* Both the TS layer and the Rust WASM layer enforce the same byte-level limit. */
|
|
58
|
+
const MAX_INPUT_SIZE = 1_048_576;
|
|
59
|
+
|
|
60
|
+
/** Byte length of a string (UTF-8). */
|
|
61
|
+
const encoder = new TextEncoder();
|
|
62
|
+
const byteLength = (s: string): number => encoder.encode(s).byteLength;
|
|
63
|
+
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
// Quick detection
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Check whether a string contains a front matter block.
|
|
70
|
+
*
|
|
71
|
+
* This is a **fast, zero-cost** check — it does not load WASM, does not parse
|
|
72
|
+
* the data, and performs only minimal string matching.
|
|
73
|
+
*
|
|
74
|
+
* Useful for filtering files before calling the full `parseFrontMatter()` pipeline.
|
|
75
|
+
*
|
|
76
|
+
* @example
|
|
77
|
+
* ```ts
|
|
78
|
+
* import { hasFrontMatter } from "quill-matter";
|
|
79
|
+
*
|
|
80
|
+
* if (hasFrontMatter(source)) {
|
|
81
|
+
* const result = await parseFrontMatter(source);
|
|
82
|
+
* }
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
export function hasFrontMatter(
|
|
86
|
+
source: string,
|
|
87
|
+
delimiters: readonly DelimiterPair[] = DEFAULT_DELIMITERS,
|
|
88
|
+
): boolean {
|
|
89
|
+
if (!source) return false;
|
|
90
|
+
|
|
91
|
+
// Validate delimiter pairs (consistent with extractFrontMatter).
|
|
92
|
+
for (const { open, close } of delimiters) {
|
|
93
|
+
if (!open || !close) {
|
|
94
|
+
throw new FrontMatterError(
|
|
95
|
+
"Invalid delimiter pair: both open and close must be non-empty strings.",
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Strip BOM for consistency with extractFrontMatter.
|
|
101
|
+
const s = source.charCodeAt(0) === 0xfeff ? source.slice(1) : source;
|
|
102
|
+
|
|
103
|
+
for (const { open, close } of delimiters) {
|
|
104
|
+
if (s.startsWith(`${open}\n`) || s.startsWith(`${open}\r\n`)) {
|
|
105
|
+
// Look for the closing delimiter on its own line.
|
|
106
|
+
const openEnd = s[open.length] === "\r" ? open.length + 2 : open.length + 1;
|
|
107
|
+
if (s.indexOf(`\n${close}`, openEnd - 1) !== -1) return true;
|
|
108
|
+
if (s.indexOf(`\r\n${close}`, openEnd - 1) !== -1) return true;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// Public API
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Options for `parseFrontMatter` and `parseFrontMatterSync`.
|
|
120
|
+
*/
|
|
121
|
+
export interface ParseOptions<T = Record<string, unknown>> {
|
|
122
|
+
/**
|
|
123
|
+
* Optional Valibot schema to validate the parsed data against.
|
|
124
|
+
* When provided, the returned `data` is typed & validated.
|
|
125
|
+
*
|
|
126
|
+
* Note: without a schema, the generic `T` is unchecked at runtime —
|
|
127
|
+
* the caller is responsible for ensuring the cast is sound.
|
|
128
|
+
*/
|
|
129
|
+
schema?: AnySchema;
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Force a specific format instead of relying on auto-detection.
|
|
133
|
+
*/
|
|
134
|
+
format?: FrontMatterFormat;
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Custom delimiter pairs to try when extracting front matter.
|
|
138
|
+
* Defaults to `[{ open: "---", close: "---" }, { open: "---", close: "..." }, { open: "+++", close: "+++" }]`.
|
|
139
|
+
*/
|
|
140
|
+
delimiters?: DelimiterPair[];
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* When `false`, **parse and validation** errors are caught and returned
|
|
144
|
+
* as `result.error` instead of throwing. The `result.data` will be `{}`.
|
|
145
|
+
*
|
|
146
|
+
* **Note:** Extraction errors (e.g. unclosed delimiters) are structural
|
|
147
|
+
* and always throw, regardless of this setting.
|
|
148
|
+
*
|
|
149
|
+
* @default true
|
|
150
|
+
*/
|
|
151
|
+
strict?: boolean;
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Extract an excerpt from the content.
|
|
155
|
+
* - `true` — Use default separator `<!-- more -->`, fallback to first paragraph.
|
|
156
|
+
* - `{ separator: "..." }` — Use custom separator.
|
|
157
|
+
*
|
|
158
|
+
* @default false
|
|
159
|
+
*/
|
|
160
|
+
excerpt?: boolean | ExcerptOptions;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parse front matter from a Markdown source string (async).
|
|
165
|
+
*
|
|
166
|
+
* Pipeline: extract → detect format → parse via WASM → validate (optional).
|
|
167
|
+
*
|
|
168
|
+
* The WASM module is lazily loaded on first call and cached.
|
|
169
|
+
*
|
|
170
|
+
* @example
|
|
171
|
+
* ```ts
|
|
172
|
+
* import { parseFrontMatter } from "quill-matter";
|
|
173
|
+
*
|
|
174
|
+
* const result = await parseFrontMatter(`---
|
|
175
|
+
* title: Hello World
|
|
176
|
+
* ---
|
|
177
|
+
* # Content here`);
|
|
178
|
+
*
|
|
179
|
+
* console.log(result.data); // { title: "Hello World" }
|
|
180
|
+
* console.log(result.format); // "yaml"
|
|
181
|
+
* console.log(result.content); // "# Content here"
|
|
182
|
+
* ```
|
|
183
|
+
*
|
|
184
|
+
* @remarks
|
|
185
|
+
* Synchronous parsing errors are caught inside the try/catch block and
|
|
186
|
+
* handled according to the `strict` option, giving consumers a uniform
|
|
187
|
+
* error-handling path.
|
|
188
|
+
*/
|
|
189
|
+
export async function parseFrontMatter<T = Record<string, unknown>>(
|
|
190
|
+
source: string,
|
|
191
|
+
options?: ParseOptions<T>,
|
|
192
|
+
): Promise<ParseResult<T>> {
|
|
193
|
+
return parseFrontMatterCore<T>(source, options, {
|
|
194
|
+
getWasm: () => getWasmParsers(),
|
|
195
|
+
validate: (data, schema) => lazyValidate<T>(data, schema),
|
|
196
|
+
});
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Parse front matter synchronously.
|
|
201
|
+
*
|
|
202
|
+
* **Requires** `await initWasm()` to have been called first.
|
|
203
|
+
*
|
|
204
|
+
* @throws {Error} if WASM module is not initialized.
|
|
205
|
+
*
|
|
206
|
+
* @example
|
|
207
|
+
* ```ts
|
|
208
|
+
* import { initWasm, parseFrontMatterSync } from "quill-matter";
|
|
209
|
+
*
|
|
210
|
+
* await initWasm(); // once at startup
|
|
211
|
+
* const result = parseFrontMatterSync(source);
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
214
|
+
export function parseFrontMatterSync<T = Record<string, unknown>>(
|
|
215
|
+
source: string,
|
|
216
|
+
options?: ParseOptions<T>,
|
|
217
|
+
): ParseResult<T> {
|
|
218
|
+
return parseFrontMatterCore<T>(source, options, {
|
|
219
|
+
getWasm: () => getWasmParsersSync(),
|
|
220
|
+
validate: (data, schema) => lazyValidateSync<T>(data, schema),
|
|
221
|
+
}) as ParseResult<T>;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// ---------------------------------------------------------------------------
|
|
225
|
+
// Shared core implementation
|
|
226
|
+
// ---------------------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
/** Runtime-specific callbacks injected by the async / sync entry points. */
|
|
229
|
+
interface ParseCallbacks<T> {
|
|
230
|
+
getWasm: () => WasmParsers | Promise<WasmParsers>;
|
|
231
|
+
validate: (data: unknown, schema: AnySchema) => T | Promise<T>;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Core parsing logic shared between the async and sync public APIs.
|
|
236
|
+
*
|
|
237
|
+
* The `callbacks` parameter abstracts away the only two differences:
|
|
238
|
+
* how to obtain the WASM module and how to run validation.
|
|
239
|
+
*/
|
|
240
|
+
function parseFrontMatterCore<T = Record<string, unknown>>(
|
|
241
|
+
source: string,
|
|
242
|
+
options: ParseOptions<T> | undefined,
|
|
243
|
+
callbacks: ParseCallbacks<T>,
|
|
244
|
+
): ParseResult<T> | Promise<ParseResult<T>> {
|
|
245
|
+
const size = byteLength(source);
|
|
246
|
+
if (size > MAX_INPUT_SIZE) {
|
|
247
|
+
throw new FrontMatterError(`Input too large: ${size} bytes (max: ${MAX_INPUT_SIZE})`);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const extraction = extractFrontMatter(source, options?.delimiters);
|
|
251
|
+
|
|
252
|
+
if (extraction === null || extraction.rawData.length === 0) {
|
|
253
|
+
const body = extraction?.content ?? source;
|
|
254
|
+
const fmt = options?.format ?? "yaml";
|
|
255
|
+
const excerpt = options?.excerpt ? extractExcerpt(body, options.excerpt) : undefined;
|
|
256
|
+
return {
|
|
257
|
+
data: {} as Record<string, never>,
|
|
258
|
+
content: body,
|
|
259
|
+
format: fmt,
|
|
260
|
+
isEmpty: true,
|
|
261
|
+
excerpt,
|
|
262
|
+
} satisfies ParseResultEmpty;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const detection = detectFormatWithPreparsed(extraction.rawData, extraction.delimiter);
|
|
266
|
+
const format = options?.format ?? detection.format;
|
|
267
|
+
const strict = options?.strict !== false;
|
|
268
|
+
const excerpt = options?.excerpt
|
|
269
|
+
? extractExcerpt(extraction.content, options.excerpt)
|
|
270
|
+
: undefined;
|
|
271
|
+
|
|
272
|
+
try {
|
|
273
|
+
// Resolve raw data — may be sync or async depending on WASM loading.
|
|
274
|
+
const rawOrPromise = resolveRawData<T>(extraction, detection, format, callbacks);
|
|
275
|
+
|
|
276
|
+
// Build the result once raw data is available.
|
|
277
|
+
const buildResult = (raw: unknown): ParseResult<T> | Promise<ParseResult<T>> => {
|
|
278
|
+
const sanitised = sanitizeKeys(raw);
|
|
279
|
+
|
|
280
|
+
if (options?.schema) {
|
|
281
|
+
const validated = callbacks.validate(sanitised, options.schema);
|
|
282
|
+
// Handle both sync and async validation.
|
|
283
|
+
if (validated instanceof Promise) {
|
|
284
|
+
return validated.then((data) => makeSuccess<T>(data, extraction, format, excerpt));
|
|
285
|
+
}
|
|
286
|
+
return makeSuccess<T>(validated, extraction, format, excerpt);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
return makeSuccess<T>(sanitised as T, extraction, format, excerpt);
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
if (rawOrPromise instanceof Promise) {
|
|
293
|
+
return rawOrPromise
|
|
294
|
+
.then(buildResult)
|
|
295
|
+
.catch((err) => handleError(err, strict, extraction, format, excerpt));
|
|
296
|
+
}
|
|
297
|
+
return buildResult(rawOrPromise);
|
|
298
|
+
} catch (err) {
|
|
299
|
+
return handleError(err, strict, extraction, format, excerpt);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/** Resolve parsed data, reusing pre-parsed JSON when possible. */
|
|
304
|
+
function resolveRawData<T>(
|
|
305
|
+
extraction: ExtractionResult,
|
|
306
|
+
detection: DetectionResult,
|
|
307
|
+
format: FrontMatterFormat,
|
|
308
|
+
callbacks: ParseCallbacks<T>,
|
|
309
|
+
): unknown | Promise<unknown> {
|
|
310
|
+
if (
|
|
311
|
+
format === detection.format &&
|
|
312
|
+
"preparsedData" in detection &&
|
|
313
|
+
detection.preparsedData !== undefined
|
|
314
|
+
) {
|
|
315
|
+
return detection.preparsedData;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
const wasmOrPromise = callbacks.getWasm();
|
|
319
|
+
if (wasmOrPromise instanceof Promise) {
|
|
320
|
+
return wasmOrPromise.then((wasm) => {
|
|
321
|
+
const adapter = createParserAdapter(format, wasm);
|
|
322
|
+
return adapter.parse(extraction.rawData);
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const adapter = createParserAdapter(format, wasmOrPromise);
|
|
327
|
+
return adapter.parse(extraction.rawData);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function makeSuccess<T>(
|
|
331
|
+
data: T,
|
|
332
|
+
extraction: ExtractionResult,
|
|
333
|
+
format: FrontMatterFormat,
|
|
334
|
+
excerpt: string | undefined,
|
|
335
|
+
): ParseResultSuccess<T> {
|
|
336
|
+
return {
|
|
337
|
+
data,
|
|
338
|
+
content: extraction.content,
|
|
339
|
+
format,
|
|
340
|
+
isEmpty: false,
|
|
341
|
+
excerpt,
|
|
342
|
+
rawData: extraction.rawData,
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function handleError(
|
|
347
|
+
err: unknown,
|
|
348
|
+
strict: boolean,
|
|
349
|
+
extraction: ExtractionResult,
|
|
350
|
+
format: FrontMatterFormat,
|
|
351
|
+
excerpt: string | undefined,
|
|
352
|
+
): ParseResultError {
|
|
353
|
+
if (strict) throw err;
|
|
354
|
+
|
|
355
|
+
// Sanitize error messages to prevent information leakage (internal paths,
|
|
356
|
+
// stack traces, Rust panic details) when errors are returned to consumers.
|
|
357
|
+
const rawMessage = err instanceof Error ? err.message : String(err);
|
|
358
|
+
const sanitizedMessage = sanitizeErrorMessage(rawMessage);
|
|
359
|
+
|
|
360
|
+
return {
|
|
361
|
+
data: {} as Record<string, never>,
|
|
362
|
+
content: extraction.content,
|
|
363
|
+
format,
|
|
364
|
+
isEmpty: false,
|
|
365
|
+
error: new FrontMatterError(sanitizedMessage),
|
|
366
|
+
excerpt,
|
|
367
|
+
rawData: extraction.rawData,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/** Strip file paths, stack traces, and internal details from error messages. */
|
|
372
|
+
function sanitizeErrorMessage(message: string): string {
|
|
373
|
+
// Strip absolute/relative file paths (Unix and Windows)
|
|
374
|
+
let sanitized = message.replace(/(?:[A-Za-z]:)?[/\\][\w./\-]+/g, "<path>");
|
|
375
|
+
// Strip stack trace lines
|
|
376
|
+
sanitized = sanitized.replace(/\n\s+at\s+.+/g, "");
|
|
377
|
+
// Truncate to prevent excessive error detail exposure
|
|
378
|
+
if (sanitized.length > 300) {
|
|
379
|
+
sanitized = `${sanitized.slice(0, 300)}…`;
|
|
380
|
+
}
|
|
381
|
+
return sanitized.trim();
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// ---------------------------------------------------------------------------
|
|
385
|
+
// Re-exports
|
|
386
|
+
// ---------------------------------------------------------------------------
|
|
387
|
+
|
|
388
|
+
export { detectFormat } from "./detector.js";
|
|
389
|
+
export { extractExcerpt } from "./excerpt.js";
|
|
390
|
+
export { extractFrontMatter } from "./extractor.js";
|
|
391
|
+
|
|
392
|
+
export { stringifyFrontMatter, stringifyFrontMatterSync } from "./stringify.js";
|
|
393
|
+
export type {
|
|
394
|
+
DelimiterPair,
|
|
395
|
+
ExcerptOptions,
|
|
396
|
+
ExtractionResult,
|
|
397
|
+
FrontMatterFormat,
|
|
398
|
+
ParseResult,
|
|
399
|
+
ParseResultEmpty,
|
|
400
|
+
ParseResultError,
|
|
401
|
+
ParseResultSuccess,
|
|
402
|
+
ParserAdapter,
|
|
403
|
+
StringifyOptions,
|
|
404
|
+
} from "./types.js";
|
|
405
|
+
export {
|
|
406
|
+
DEFAULT_DELIMITERS,
|
|
407
|
+
ExtractionError,
|
|
408
|
+
FrontMatterError,
|
|
409
|
+
ParseError,
|
|
410
|
+
ValidationError,
|
|
411
|
+
} from "./types.js";
|
|
412
|
+
export { initWasm } from "./wasm-loader.js";
|
|
413
|
+
|
|
414
|
+
// Note: `validate` is NOT re-exported here to avoid pulling in the valibot
|
|
415
|
+
// dependency at import time. Use the subpath import instead:
|
|
416
|
+
// ---------------------------------------------------------------------------
|
|
417
|
+
// Runtime detection & File API
|
|
418
|
+
// ---------------------------------------------------------------------------
|
|
419
|
+
|
|
420
|
+
declare const Bun:
|
|
421
|
+
| { file(path: string | URL): { text(): Promise<string>; exists(): Promise<boolean> } }
|
|
422
|
+
| undefined;
|
|
423
|
+
declare const Deno: { readTextFile(path: string | URL): Promise<string> } | undefined;
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Read and parse front matter from a file path or URL.
|
|
427
|
+
*
|
|
428
|
+
* Automatically detects the runtime (Bun, Deno) or falls back to `fetch`
|
|
429
|
+
* for URL inputs in other environments (Cloudflare Workers, Browsers).
|
|
430
|
+
*
|
|
431
|
+
* @param path - File path (string) or URL to read.
|
|
432
|
+
* @param options - Parse options (format, delimiters, schema, etc).
|
|
433
|
+
* @returns The parsed front matter result.
|
|
434
|
+
* @throws {Error} If the file cannot be read or runtime is unsupported.
|
|
435
|
+
*/
|
|
436
|
+
export async function readFrontMatter<T = Record<string, unknown>>(
|
|
437
|
+
path: string | URL,
|
|
438
|
+
options?: ParseOptions<T>,
|
|
439
|
+
): Promise<ParseResult<T>> {
|
|
440
|
+
const content = await readFileContent(path);
|
|
441
|
+
return parseFrontMatter<T>(content, options);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Read and parse multiple files in parallel.
|
|
446
|
+
*
|
|
447
|
+
* @param paths - Array of file paths or URLs.
|
|
448
|
+
* @param options - Parse options (shared across all files).
|
|
449
|
+
* @returns Array of results in the same order as inputs.
|
|
450
|
+
*/
|
|
451
|
+
export async function readFrontMatterMany<T = Record<string, unknown>>(
|
|
452
|
+
paths: (string | URL)[],
|
|
453
|
+
options?: ParseOptions<T>,
|
|
454
|
+
): Promise<ParseResult<T>[]> {
|
|
455
|
+
return Promise.all(paths.map((p) => readFrontMatter<T>(p, options)));
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
async function readFileContent(path: string | URL): Promise<string> {
|
|
459
|
+
// 1. Fetch (HTTP/HTTPS) - prioritize for all runtimes
|
|
460
|
+
if (
|
|
461
|
+
(path instanceof URL && (path.protocol === "http:" || path.protocol === "https:")) ||
|
|
462
|
+
(typeof path === "string" && /^https?:/.test(path))
|
|
463
|
+
) {
|
|
464
|
+
const res = await fetch(path);
|
|
465
|
+
if (!res.ok) {
|
|
466
|
+
throw new Error(`Failed to fetch ${String(path)}: ${res.status} ${res.statusText}`);
|
|
467
|
+
}
|
|
468
|
+
return res.text();
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// 2. Bun (Local Files & file: URLs)
|
|
472
|
+
if (typeof Bun !== "undefined") {
|
|
473
|
+
// Bun.file() handles absolute/relative paths and file:// URLs correctly
|
|
474
|
+
const file = Bun.file(path);
|
|
475
|
+
try {
|
|
476
|
+
return await file.text();
|
|
477
|
+
} catch {
|
|
478
|
+
throw new Error(`File not found: ${String(path)}`);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// 3. Deno (Local Files & file: URLs)
|
|
483
|
+
if (typeof Deno !== "undefined") {
|
|
484
|
+
return Deno.readTextFile(path);
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// 4. Fallback: node:fs (covers Vitest workers where Bun global is unavailable)
|
|
488
|
+
try {
|
|
489
|
+
const { readFile } = await import("node:fs/promises");
|
|
490
|
+
// Convert URL to path string to avoid type conflicts between Deno and Node URL types
|
|
491
|
+
let filePath: string;
|
|
492
|
+
if (typeof path === "string") {
|
|
493
|
+
filePath = path;
|
|
494
|
+
} else {
|
|
495
|
+
// For file:// URLs, convert to file path; for other URLs, use href
|
|
496
|
+
if (path.protocol === "file:") {
|
|
497
|
+
const { fileURLToPath } = await import("node:url");
|
|
498
|
+
// Deno's URL type and Node's URL type have incompatible TypeScript definitions
|
|
499
|
+
// (searchParams property differs), but they're runtime-compatible.
|
|
500
|
+
// @ts-expect-error - Suppress type error: Deno URL vs Node URL type mismatch
|
|
501
|
+
filePath = fileURLToPath(path);
|
|
502
|
+
} else {
|
|
503
|
+
filePath = path.href;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
return await readFile(filePath, "utf-8");
|
|
507
|
+
} catch {
|
|
508
|
+
throw new Error(`File not found: ${String(path)}`);
|
|
509
|
+
}
|
|
510
|
+
}
|
package/src/parsers.ts
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { FrontMatterFormat, ParserAdapter } from "./types.js";
|
|
2
|
+
import { ParseError } from "./types.js";
|
|
3
|
+
import type { WasmParsers } from "./wasm-loader.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Create a `ParserAdapter` for the given format backed by the WASM parsers.
|
|
7
|
+
*/
|
|
8
|
+
export function createParserAdapter(format: FrontMatterFormat, wasm: WasmParsers): ParserAdapter {
|
|
9
|
+
const fn = FORMAT_TO_FN[format];
|
|
10
|
+
|
|
11
|
+
return {
|
|
12
|
+
parse(input: string): unknown {
|
|
13
|
+
try {
|
|
14
|
+
return fn(wasm, input);
|
|
15
|
+
} catch (err) {
|
|
16
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
17
|
+
const { line, column } = extractErrorPosition(message);
|
|
18
|
+
throw new ParseError(
|
|
19
|
+
`Failed to parse ${format.toUpperCase()}: ${message}`,
|
|
20
|
+
format,
|
|
21
|
+
line,
|
|
22
|
+
column,
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Internal
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
type ParseFn = (wasm: WasmParsers, input: string) => unknown;
|
|
34
|
+
|
|
35
|
+
const FORMAT_TO_FN: Record<FrontMatterFormat, ParseFn> = {
|
|
36
|
+
yaml: (w, i) => w.parse_yaml(i),
|
|
37
|
+
json: (w, i) => w.parse_json(i),
|
|
38
|
+
toml: (w, i) => w.parse_toml(i),
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Extract line and column numbers from error messages.
|
|
43
|
+
* Supports various formats:
|
|
44
|
+
* - "at line 5, column 10"
|
|
45
|
+
* - "line 5 column 10"
|
|
46
|
+
* - "5:10"
|
|
47
|
+
*/
|
|
48
|
+
function extractErrorPosition(message: string): { line?: number; column?: number } {
|
|
49
|
+
// Cap message length to prevent ReDoS on unusually long error strings.
|
|
50
|
+
const msg = message.length > 500 ? message.slice(0, 500) : message;
|
|
51
|
+
|
|
52
|
+
// Pattern: "at line X, column Y" or "line X column Y"
|
|
53
|
+
const lineColMatch = msg.match(/line\s+(\d+)[,\s]+column\s+(\d+)/i);
|
|
54
|
+
if (lineColMatch) {
|
|
55
|
+
return {
|
|
56
|
+
line: Number.parseInt(lineColMatch[1], 10),
|
|
57
|
+
column: Number.parseInt(lineColMatch[2], 10),
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Pattern: "X:Y" (common in many parsers)
|
|
62
|
+
// Use word boundary + negative lookbehind for '.' to avoid matching
|
|
63
|
+
// version numbers (e.g. "1.2:3") or timestamps.
|
|
64
|
+
const colonMatch = msg.match(/(?<![.\d])(\d+):(\d+)(?!\d*\.)/);
|
|
65
|
+
if (colonMatch) {
|
|
66
|
+
return {
|
|
67
|
+
line: Number.parseInt(colonMatch[1], 10),
|
|
68
|
+
column: Number.parseInt(colonMatch[2], 10),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return {};
|
|
73
|
+
}
|