quill-matter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ import type { DelimiterPair, ExtractionResult } from "./types.js";
2
+ import { DEFAULT_DELIMITERS, ExtractionError } from "./types.js";
3
+
4
+ /**
5
+ * Extract the front matter block and the remaining Markdown body from a source
6
+ * string.
7
+ *
8
+ * Supports symmetric delimiters (`---`, `+++`) and asymmetric pairs
9
+ * (e.g. `{ open: "<!--", close: "-->" }`).
10
+ *
11
+ * @param source Raw Markdown source string.
12
+ * @param delimiters Optional array of delimiter pairs to try. Defaults to
13
+ * `[{ open: "---", close: "---" }, { open: "---", close: "..." }, { open: "+++", close: "+++" }]`.
14
+ *
15
+ * @returns An `ExtractionResult` with the raw (unparsed) data, body content,
16
+ * and delimiter used — or `null` if no front matter is present.
17
+ */
18
+ export function extractFrontMatter(
19
+ source: string,
20
+ delimiters: readonly DelimiterPair[] = DEFAULT_DELIMITERS,
21
+ ): ExtractionResult | null {
22
+ if (!source || source.trim().length === 0) {
23
+ return null;
24
+ }
25
+
26
+ // Strip UTF-8 BOM — editors like Windows Notepad prepend \uFEFF which
27
+ // would prevent the opening delimiter from matching.
28
+ if (source.charCodeAt(0) === 0xfeff) {
29
+ source = source.slice(1);
30
+ }
31
+
32
+ // Validate delimiter pairs.
33
+ for (const { open, close } of delimiters) {
34
+ if (!open || !close) {
35
+ throw new ExtractionError(
36
+ "Invalid delimiter pair: both open and close must be non-empty strings.",
37
+ );
38
+ }
39
+ }
40
+
41
+ // Try each delimiter pair in order.
42
+ // Only normalise line endings in the region we need (not the full source).
43
+ let openMatched = false;
44
+ for (const pair of delimiters) {
45
+ const result = tryExtract(source, pair);
46
+ if (result !== undefined) {
47
+ return result;
48
+ }
49
+ // Track whether the opening delimiter matched at all.
50
+ if (source.startsWith(`${pair.open}\n`) || source.startsWith(`${pair.open}\r\n`)) {
51
+ openMatched = true;
52
+ }
53
+ }
54
+
55
+ // The file starts with a known opening delimiter but none of the
56
+ // corresponding closing delimiters were found.
57
+ if (openMatched) {
58
+ // Collect only the close delimiters that pair with the matched open.
59
+ const matchedOpens = new Set(
60
+ delimiters
61
+ .filter((d) => source.startsWith(`${d.open}\n`) || source.startsWith(`${d.open}\r\n`))
62
+ .map((d) => d.open),
63
+ );
64
+ const closes = [
65
+ ...new Set(delimiters.filter((d) => matchedOpens.has(d.open)).map((d) => d.close)),
66
+ ];
67
+ throw new ExtractionError(
68
+ `Unclosed front matter block: opening delimiter found but no closing delimiter (tried: ${closes.map((c) => `"${c}"`).join(", ")}).`,
69
+ );
70
+ }
71
+
72
+ // No front matter found.
73
+ return null;
74
+ }
75
+
76
+ // ---------------------------------------------------------------------------
77
+ // Internal helpers
78
+ // ---------------------------------------------------------------------------
79
+
80
+ function tryExtract(source: string, pair: DelimiterPair): ExtractionResult | undefined {
81
+ const { open, close } = pair;
82
+
83
+ // The file must start with the opening delimiter followed by a newline.
84
+ if (!source.startsWith(`${open}\n`) && !source.startsWith(`${open}\r\n`)) {
85
+ return undefined; // signal "not this delimiter"
86
+ }
87
+
88
+ // Skip past `open` + newline (handle both \r\n and \n).
89
+ const openEnd = source[open.length + 1] === "\n" ? open.length + 2 : open.length + 1;
90
+
91
+ // Search for the closing delimiter on its own line.
92
+ // Start from openEnd - 1 so the closing delimiter is found even when the
93
+ // front matter body is completely empty (e.g. "---\n---\n").
94
+ let closeIdx = source.indexOf(`\n${close}`, openEnd - 1);
95
+ if (closeIdx === -1) {
96
+ // Also try \r\n before close.
97
+ closeIdx = source.indexOf(`\r\n${close}`, openEnd - 1);
98
+ if (closeIdx !== -1) closeIdx += 1; // advance past \r so closeIdx points to \n
99
+ }
100
+
101
+ if (closeIdx === -1) {
102
+ // Close delimiter not found — let the next pair be tried.
103
+ return undefined;
104
+ }
105
+
106
+ const rawData = source.slice(openEnd, closeIdx).trim();
107
+ const contentStart = closeIdx + 1 + close.length; // skip `\nclose`
108
+
109
+ // Skip optional trailing newline after the closing delimiter.
110
+ let bodyStart = contentStart;
111
+ if (source[bodyStart] === "\n") {
112
+ bodyStart += 1;
113
+ } else if (source[bodyStart] === "\r" && source[bodyStart + 1] === "\n") {
114
+ bodyStart += 2;
115
+ }
116
+
117
+ const content = source.slice(bodyStart);
118
+
119
+ return { rawData, content, delimiter: pair };
120
+ }
package/src/index.ts ADDED
@@ -0,0 +1,510 @@
1
+ import type { DetectionResult } from "./detector.js";
2
+ import { detectFormatWithPreparsed } from "./detector.js";
3
+ import { extractExcerpt } from "./excerpt.js";
4
+ import { extractFrontMatter } from "./extractor.js";
5
+ import { createParserAdapter } from "./parsers.js";
6
+ import { sanitizeKeys } from "./sanitizer.js";
7
+ import type {
8
+ DelimiterPair,
9
+ ExcerptOptions,
10
+ ExtractionResult,
11
+ FrontMatterFormat,
12
+ ParseResult,
13
+ ParseResultEmpty,
14
+ ParseResultError,
15
+ ParseResultSuccess,
16
+ } from "./types.js";
17
+ import { DEFAULT_DELIMITERS, FrontMatterError } from "./types.js";
18
+ import type { WasmParsers } from "./wasm-loader.js";
19
+ import { getWasmParsers, getWasmParsersSync } from "./wasm-loader.js";
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Lazy-loaded validator (avoids requiring valibot at import time)
23
+ // ---------------------------------------------------------------------------
24
+
25
+ /** Valibot GenericSchema — declared locally to avoid requiring valibot at import time. */
26
+ type AnySchema = { readonly "~standard": unknown };
27
+
28
+ let _validateFn: ((data: unknown, schema: AnySchema) => unknown) | null = null;
29
+
30
+ /** Load the validator module on demand and cache it. */
31
+ async function lazyValidate<T>(data: unknown, schema: AnySchema): Promise<T> {
32
+ if (!_validateFn) {
33
+ const mod = await import("./validator.js");
34
+ _validateFn = mod.validate as (data: unknown, schema: AnySchema) => unknown;
35
+ }
36
+ const fn = _validateFn;
37
+ return fn(data, schema) as T;
38
+ }
39
+
40
+ /** Use the cached validator synchronously. Throws if not yet loaded. */
41
+ function lazyValidateSync<T>(data: unknown, schema: AnySchema): T {
42
+ if (!_validateFn) {
43
+ throw new FrontMatterError(
44
+ "Schema validation in sync mode requires the validator to be pre-loaded. " +
45
+ "Call `parseFrontMatter()` once with a schema first, or " +
46
+ "pre-load with: await import('quill-matter/validator')",
47
+ );
48
+ }
49
+ return _validateFn(data, schema) as T;
50
+ }
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Constants
54
+ // ---------------------------------------------------------------------------
55
+
56
+ /** Maximum allowed input size in bytes (1 MB).
57
+ * Both the TS layer and the Rust WASM layer enforce the same byte-level limit. */
58
+ const MAX_INPUT_SIZE = 1_048_576;
59
+
60
+ /** Byte length of a string (UTF-8). */
61
+ const encoder = new TextEncoder();
62
+ const byteLength = (s: string): number => encoder.encode(s).byteLength;
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // Quick detection
66
+ // ---------------------------------------------------------------------------
67
+
68
+ /**
69
+ * Check whether a string contains a front matter block.
70
+ *
71
+ * This is a **fast, zero-cost** check — it does not load WASM, does not parse
72
+ * the data, and performs only minimal string matching.
73
+ *
74
+ * Useful for filtering files before calling the full `parseFrontMatter()` pipeline.
75
+ *
76
+ * @example
77
+ * ```ts
78
+ * import { hasFrontMatter } from "quill-matter";
79
+ *
80
+ * if (hasFrontMatter(source)) {
81
+ * const result = await parseFrontMatter(source);
82
+ * }
83
+ * ```
84
+ */
85
+ export function hasFrontMatter(
86
+ source: string,
87
+ delimiters: readonly DelimiterPair[] = DEFAULT_DELIMITERS,
88
+ ): boolean {
89
+ if (!source) return false;
90
+
91
+ // Validate delimiter pairs (consistent with extractFrontMatter).
92
+ for (const { open, close } of delimiters) {
93
+ if (!open || !close) {
94
+ throw new FrontMatterError(
95
+ "Invalid delimiter pair: both open and close must be non-empty strings.",
96
+ );
97
+ }
98
+ }
99
+
100
+ // Strip BOM for consistency with extractFrontMatter.
101
+ const s = source.charCodeAt(0) === 0xfeff ? source.slice(1) : source;
102
+
103
+ for (const { open, close } of delimiters) {
104
+ if (s.startsWith(`${open}\n`) || s.startsWith(`${open}\r\n`)) {
105
+ // Look for the closing delimiter on its own line.
106
+ const openEnd = s[open.length] === "\r" ? open.length + 2 : open.length + 1;
107
+ if (s.indexOf(`\n${close}`, openEnd - 1) !== -1) return true;
108
+ if (s.indexOf(`\r\n${close}`, openEnd - 1) !== -1) return true;
109
+ }
110
+ }
111
+ return false;
112
+ }
113
+
114
+ // ---------------------------------------------------------------------------
115
+ // Public API
116
+ // ---------------------------------------------------------------------------
117
+
118
+ /**
119
+ * Options for `parseFrontMatter` and `parseFrontMatterSync`.
120
+ */
121
+ export interface ParseOptions<T = Record<string, unknown>> {
122
+ /**
123
+ * Optional Valibot schema to validate the parsed data against.
124
+ * When provided, the returned `data` is typed & validated.
125
+ *
126
+ * Note: without a schema, the generic `T` is unchecked at runtime —
127
+ * the caller is responsible for ensuring the cast is sound.
128
+ */
129
+ schema?: AnySchema;
130
+
131
+ /**
132
+ * Force a specific format instead of relying on auto-detection.
133
+ */
134
+ format?: FrontMatterFormat;
135
+
136
+ /**
137
+ * Custom delimiter pairs to try when extracting front matter.
138
+ * Defaults to `[{ open: "---", close: "---" }, { open: "---", close: "..." }, { open: "+++", close: "+++" }]`.
139
+ */
140
+ delimiters?: DelimiterPair[];
141
+
142
+ /**
143
+ * When `false`, **parse and validation** errors are caught and returned
144
+ * as `result.error` instead of throwing. The `result.data` will be `{}`.
145
+ *
146
+ * **Note:** Extraction errors (e.g. unclosed delimiters) are structural
147
+ * and always throw, regardless of this setting.
148
+ *
149
+ * @default true
150
+ */
151
+ strict?: boolean;
152
+
153
+ /**
154
+ * Extract an excerpt from the content.
155
+ * - `true` — Use default separator `<!-- more -->`, fallback to first paragraph.
156
+ * - `{ separator: "..." }` — Use custom separator.
157
+ *
158
+ * @default false
159
+ */
160
+ excerpt?: boolean | ExcerptOptions;
161
+ }
162
+
163
+ /**
164
+ * Parse front matter from a Markdown source string (async).
165
+ *
166
+ * Pipeline: extract → detect format → parse via WASM → validate (optional).
167
+ *
168
+ * The WASM module is lazily loaded on first call and cached.
169
+ *
170
+ * @example
171
+ * ```ts
172
+ * import { parseFrontMatter } from "quill-matter";
173
+ *
174
+ * const result = await parseFrontMatter(`---
175
+ * title: Hello World
176
+ * ---
177
+ * # Content here`);
178
+ *
179
+ * console.log(result.data); // { title: "Hello World" }
180
+ * console.log(result.format); // "yaml"
181
+ * console.log(result.content); // "# Content here"
182
+ * ```
183
+ *
184
+ * @remarks
185
+ * Synchronous parsing errors are caught inside the try/catch block and
186
+ * handled according to the `strict` option, giving consumers a uniform
187
+ * error-handling path.
188
+ */
189
+ export async function parseFrontMatter<T = Record<string, unknown>>(
190
+ source: string,
191
+ options?: ParseOptions<T>,
192
+ ): Promise<ParseResult<T>> {
193
+ return parseFrontMatterCore<T>(source, options, {
194
+ getWasm: () => getWasmParsers(),
195
+ validate: (data, schema) => lazyValidate<T>(data, schema),
196
+ });
197
+ }
198
+
199
+ /**
200
+ * Parse front matter synchronously.
201
+ *
202
+ * **Requires** `await initWasm()` to have been called first.
203
+ *
204
+ * @throws {Error} if WASM module is not initialized.
205
+ *
206
+ * @example
207
+ * ```ts
208
+ * import { initWasm, parseFrontMatterSync } from "quill-matter";
209
+ *
210
+ * await initWasm(); // once at startup
211
+ * const result = parseFrontMatterSync(source);
212
+ * ```
213
+ */
214
+ export function parseFrontMatterSync<T = Record<string, unknown>>(
215
+ source: string,
216
+ options?: ParseOptions<T>,
217
+ ): ParseResult<T> {
218
+ return parseFrontMatterCore<T>(source, options, {
219
+ getWasm: () => getWasmParsersSync(),
220
+ validate: (data, schema) => lazyValidateSync<T>(data, schema),
221
+ }) as ParseResult<T>;
222
+ }
223
+
224
+ // ---------------------------------------------------------------------------
225
+ // Shared core implementation
226
+ // ---------------------------------------------------------------------------
227
+
228
+ /** Runtime-specific callbacks injected by the async / sync entry points. */
229
+ interface ParseCallbacks<T> {
230
+ getWasm: () => WasmParsers | Promise<WasmParsers>;
231
+ validate: (data: unknown, schema: AnySchema) => T | Promise<T>;
232
+ }
233
+
234
+ /**
235
+ * Core parsing logic shared between the async and sync public APIs.
236
+ *
237
+ * The `callbacks` parameter abstracts away the only two differences:
238
+ * how to obtain the WASM module and how to run validation.
239
+ */
240
+ function parseFrontMatterCore<T = Record<string, unknown>>(
241
+ source: string,
242
+ options: ParseOptions<T> | undefined,
243
+ callbacks: ParseCallbacks<T>,
244
+ ): ParseResult<T> | Promise<ParseResult<T>> {
245
+ const size = byteLength(source);
246
+ if (size > MAX_INPUT_SIZE) {
247
+ throw new FrontMatterError(`Input too large: ${size} bytes (max: ${MAX_INPUT_SIZE})`);
248
+ }
249
+
250
+ const extraction = extractFrontMatter(source, options?.delimiters);
251
+
252
+ if (extraction === null || extraction.rawData.length === 0) {
253
+ const body = extraction?.content ?? source;
254
+ const fmt = options?.format ?? "yaml";
255
+ const excerpt = options?.excerpt ? extractExcerpt(body, options.excerpt) : undefined;
256
+ return {
257
+ data: {} as Record<string, never>,
258
+ content: body,
259
+ format: fmt,
260
+ isEmpty: true,
261
+ excerpt,
262
+ } satisfies ParseResultEmpty;
263
+ }
264
+
265
+ const detection = detectFormatWithPreparsed(extraction.rawData, extraction.delimiter);
266
+ const format = options?.format ?? detection.format;
267
+ const strict = options?.strict !== false;
268
+ const excerpt = options?.excerpt
269
+ ? extractExcerpt(extraction.content, options.excerpt)
270
+ : undefined;
271
+
272
+ try {
273
+ // Resolve raw data — may be sync or async depending on WASM loading.
274
+ const rawOrPromise = resolveRawData<T>(extraction, detection, format, callbacks);
275
+
276
+ // Build the result once raw data is available.
277
+ const buildResult = (raw: unknown): ParseResult<T> | Promise<ParseResult<T>> => {
278
+ const sanitised = sanitizeKeys(raw);
279
+
280
+ if (options?.schema) {
281
+ const validated = callbacks.validate(sanitised, options.schema);
282
+ // Handle both sync and async validation.
283
+ if (validated instanceof Promise) {
284
+ return validated.then((data) => makeSuccess<T>(data, extraction, format, excerpt));
285
+ }
286
+ return makeSuccess<T>(validated, extraction, format, excerpt);
287
+ }
288
+
289
+ return makeSuccess<T>(sanitised as T, extraction, format, excerpt);
290
+ };
291
+
292
+ if (rawOrPromise instanceof Promise) {
293
+ return rawOrPromise
294
+ .then(buildResult)
295
+ .catch((err) => handleError(err, strict, extraction, format, excerpt));
296
+ }
297
+ return buildResult(rawOrPromise);
298
+ } catch (err) {
299
+ return handleError(err, strict, extraction, format, excerpt);
300
+ }
301
+ }
302
+
303
+ /** Resolve parsed data, reusing pre-parsed JSON when possible. */
304
+ function resolveRawData<T>(
305
+ extraction: ExtractionResult,
306
+ detection: DetectionResult,
307
+ format: FrontMatterFormat,
308
+ callbacks: ParseCallbacks<T>,
309
+ ): unknown | Promise<unknown> {
310
+ if (
311
+ format === detection.format &&
312
+ "preparsedData" in detection &&
313
+ detection.preparsedData !== undefined
314
+ ) {
315
+ return detection.preparsedData;
316
+ }
317
+
318
+ const wasmOrPromise = callbacks.getWasm();
319
+ if (wasmOrPromise instanceof Promise) {
320
+ return wasmOrPromise.then((wasm) => {
321
+ const adapter = createParserAdapter(format, wasm);
322
+ return adapter.parse(extraction.rawData);
323
+ });
324
+ }
325
+
326
+ const adapter = createParserAdapter(format, wasmOrPromise);
327
+ return adapter.parse(extraction.rawData);
328
+ }
329
+
330
+ function makeSuccess<T>(
331
+ data: T,
332
+ extraction: ExtractionResult,
333
+ format: FrontMatterFormat,
334
+ excerpt: string | undefined,
335
+ ): ParseResultSuccess<T> {
336
+ return {
337
+ data,
338
+ content: extraction.content,
339
+ format,
340
+ isEmpty: false,
341
+ excerpt,
342
+ rawData: extraction.rawData,
343
+ };
344
+ }
345
+
346
+ function handleError(
347
+ err: unknown,
348
+ strict: boolean,
349
+ extraction: ExtractionResult,
350
+ format: FrontMatterFormat,
351
+ excerpt: string | undefined,
352
+ ): ParseResultError {
353
+ if (strict) throw err;
354
+
355
+ // Sanitize error messages to prevent information leakage (internal paths,
356
+ // stack traces, Rust panic details) when errors are returned to consumers.
357
+ const rawMessage = err instanceof Error ? err.message : String(err);
358
+ const sanitizedMessage = sanitizeErrorMessage(rawMessage);
359
+
360
+ return {
361
+ data: {} as Record<string, never>,
362
+ content: extraction.content,
363
+ format,
364
+ isEmpty: false,
365
+ error: new FrontMatterError(sanitizedMessage),
366
+ excerpt,
367
+ rawData: extraction.rawData,
368
+ };
369
+ }
370
+
371
+ /** Strip file paths, stack traces, and internal details from error messages. */
372
+ function sanitizeErrorMessage(message: string): string {
373
+ // Strip absolute/relative file paths (Unix and Windows)
374
+ let sanitized = message.replace(/(?:[A-Za-z]:)?[/\\][\w./\-]+/g, "<path>");
375
+ // Strip stack trace lines
376
+ sanitized = sanitized.replace(/\n\s+at\s+.+/g, "");
377
+ // Truncate to prevent excessive error detail exposure
378
+ if (sanitized.length > 300) {
379
+ sanitized = `${sanitized.slice(0, 300)}…`;
380
+ }
381
+ return sanitized.trim();
382
+ }
383
+
384
+ // ---------------------------------------------------------------------------
385
+ // Re-exports
386
+ // ---------------------------------------------------------------------------
387
+
388
+ export { detectFormat } from "./detector.js";
389
+ export { extractExcerpt } from "./excerpt.js";
390
+ export { extractFrontMatter } from "./extractor.js";
391
+
392
+ export { stringifyFrontMatter, stringifyFrontMatterSync } from "./stringify.js";
393
+ export type {
394
+ DelimiterPair,
395
+ ExcerptOptions,
396
+ ExtractionResult,
397
+ FrontMatterFormat,
398
+ ParseResult,
399
+ ParseResultEmpty,
400
+ ParseResultError,
401
+ ParseResultSuccess,
402
+ ParserAdapter,
403
+ StringifyOptions,
404
+ } from "./types.js";
405
+ export {
406
+ DEFAULT_DELIMITERS,
407
+ ExtractionError,
408
+ FrontMatterError,
409
+ ParseError,
410
+ ValidationError,
411
+ } from "./types.js";
412
+ export { initWasm } from "./wasm-loader.js";
413
+
414
+ // Note: `validate` is NOT re-exported here to avoid pulling in the valibot
415
+ // dependency at import time. Use the subpath import instead:
416
+ // ---------------------------------------------------------------------------
417
+ // Runtime detection & File API
418
+ // ---------------------------------------------------------------------------
419
+
420
+ declare const Bun:
421
+ | { file(path: string | URL): { text(): Promise<string>; exists(): Promise<boolean> } }
422
+ | undefined;
423
+ declare const Deno: { readTextFile(path: string | URL): Promise<string> } | undefined;
424
+
425
+ /**
426
+ * Read and parse front matter from a file path or URL.
427
+ *
428
+ * Automatically detects the runtime (Bun, Deno) or falls back to `fetch`
429
+ * for URL inputs in other environments (Cloudflare Workers, Browsers).
430
+ *
431
+ * @param path - File path (string) or URL to read.
432
+ * @param options - Parse options (format, delimiters, schema, etc).
433
+ * @returns The parsed front matter result.
434
+ * @throws {Error} If the file cannot be read or runtime is unsupported.
435
+ */
436
+ export async function readFrontMatter<T = Record<string, unknown>>(
437
+ path: string | URL,
438
+ options?: ParseOptions<T>,
439
+ ): Promise<ParseResult<T>> {
440
+ const content = await readFileContent(path);
441
+ return parseFrontMatter<T>(content, options);
442
+ }
443
+
444
+ /**
445
+ * Read and parse multiple files in parallel.
446
+ *
447
+ * @param paths - Array of file paths or URLs.
448
+ * @param options - Parse options (shared across all files).
449
+ * @returns Array of results in the same order as inputs.
450
+ */
451
+ export async function readFrontMatterMany<T = Record<string, unknown>>(
452
+ paths: (string | URL)[],
453
+ options?: ParseOptions<T>,
454
+ ): Promise<ParseResult<T>[]> {
455
+ return Promise.all(paths.map((p) => readFrontMatter<T>(p, options)));
456
+ }
457
+
458
+ async function readFileContent(path: string | URL): Promise<string> {
459
+ // 1. Fetch (HTTP/HTTPS) - prioritize for all runtimes
460
+ if (
461
+ (path instanceof URL && (path.protocol === "http:" || path.protocol === "https:")) ||
462
+ (typeof path === "string" && /^https?:/.test(path))
463
+ ) {
464
+ const res = await fetch(path);
465
+ if (!res.ok) {
466
+ throw new Error(`Failed to fetch ${String(path)}: ${res.status} ${res.statusText}`);
467
+ }
468
+ return res.text();
469
+ }
470
+
471
+ // 2. Bun (Local Files & file: URLs)
472
+ if (typeof Bun !== "undefined") {
473
+ // Bun.file() handles absolute/relative paths and file:// URLs correctly
474
+ const file = Bun.file(path);
475
+ try {
476
+ return await file.text();
477
+ } catch {
478
+ throw new Error(`File not found: ${String(path)}`);
479
+ }
480
+ }
481
+
482
+ // 3. Deno (Local Files & file: URLs)
483
+ if (typeof Deno !== "undefined") {
484
+ return Deno.readTextFile(path);
485
+ }
486
+
487
+ // 4. Fallback: node:fs (covers Vitest workers where Bun global is unavailable)
488
+ try {
489
+ const { readFile } = await import("node:fs/promises");
490
+ // Convert URL to path string to avoid type conflicts between Deno and Node URL types
491
+ let filePath: string;
492
+ if (typeof path === "string") {
493
+ filePath = path;
494
+ } else {
495
+ // For file:// URLs, convert to file path; for other URLs, use href
496
+ if (path.protocol === "file:") {
497
+ const { fileURLToPath } = await import("node:url");
498
+ // Deno's URL type and Node's URL type have incompatible TypeScript definitions
499
+ // (searchParams property differs), but they're runtime-compatible.
500
+ // @ts-expect-error - Suppress type error: Deno URL vs Node URL type mismatch
501
+ filePath = fileURLToPath(path);
502
+ } else {
503
+ filePath = path.href;
504
+ }
505
+ }
506
+ return await readFile(filePath, "utf-8");
507
+ } catch {
508
+ throw new Error(`File not found: ${String(path)}`);
509
+ }
510
+ }
package/src/parsers.ts ADDED
@@ -0,0 +1,73 @@
1
+ import type { FrontMatterFormat, ParserAdapter } from "./types.js";
2
+ import { ParseError } from "./types.js";
3
+ import type { WasmParsers } from "./wasm-loader.js";
4
+
5
+ /**
6
+ * Create a `ParserAdapter` for the given format backed by the WASM parsers.
7
+ */
8
+ export function createParserAdapter(format: FrontMatterFormat, wasm: WasmParsers): ParserAdapter {
9
+ const fn = FORMAT_TO_FN[format];
10
+
11
+ return {
12
+ parse(input: string): unknown {
13
+ try {
14
+ return fn(wasm, input);
15
+ } catch (err) {
16
+ const message = err instanceof Error ? err.message : String(err);
17
+ const { line, column } = extractErrorPosition(message);
18
+ throw new ParseError(
19
+ `Failed to parse ${format.toUpperCase()}: ${message}`,
20
+ format,
21
+ line,
22
+ column,
23
+ );
24
+ }
25
+ },
26
+ };
27
+ }
28
+
29
+ // ---------------------------------------------------------------------------
30
+ // Internal
31
+ // ---------------------------------------------------------------------------
32
+
33
+ type ParseFn = (wasm: WasmParsers, input: string) => unknown;
34
+
35
+ const FORMAT_TO_FN: Record<FrontMatterFormat, ParseFn> = {
36
+ yaml: (w, i) => w.parse_yaml(i),
37
+ json: (w, i) => w.parse_json(i),
38
+ toml: (w, i) => w.parse_toml(i),
39
+ };
40
+
41
+ /**
42
+ * Extract line and column numbers from error messages.
43
+ * Supports various formats:
44
+ * - "at line 5, column 10"
45
+ * - "line 5 column 10"
46
+ * - "5:10"
47
+ */
48
+ function extractErrorPosition(message: string): { line?: number; column?: number } {
49
+ // Cap message length to prevent ReDoS on unusually long error strings.
50
+ const msg = message.length > 500 ? message.slice(0, 500) : message;
51
+
52
+ // Pattern: "at line X, column Y" or "line X column Y"
53
+ const lineColMatch = msg.match(/line\s+(\d+)[,\s]+column\s+(\d+)/i);
54
+ if (lineColMatch) {
55
+ return {
56
+ line: Number.parseInt(lineColMatch[1], 10),
57
+ column: Number.parseInt(lineColMatch[2], 10),
58
+ };
59
+ }
60
+
61
+ // Pattern: "X:Y" (common in many parsers)
62
+ // Use word boundary + negative lookbehind for '.' to avoid matching
63
+ // version numbers (e.g. "1.2:3") or timestamps.
64
+ const colonMatch = msg.match(/(?<![.\d])(\d+):(\d+)(?!\d*\.)/);
65
+ if (colonMatch) {
66
+ return {
67
+ line: Number.parseInt(colonMatch[1], 10),
68
+ column: Number.parseInt(colonMatch[2], 10),
69
+ };
70
+ }
71
+
72
+ return {};
73
+ }