brustjs 0.1.38-alpha → 0.1.40-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,469 @@
1
+ import { Marked, type Tokens } from 'marked'
2
+
3
+ // ---------------------------------------------------------------------------
4
+ // Types
5
+ // ---------------------------------------------------------------------------
6
+
7
+ export type MdHydrate = 'load' | 'idle' | 'visible' | 'interaction'
8
+
9
+ const HYDRATE_MODES: readonly MdHydrate[] = ['load', 'idle', 'visible', 'interaction']
10
+
11
+ /** One embedded-component use found in a markdown page (islands only). */
12
+ export interface MdIslandUse {
13
+ name: string
14
+ /** 0-based per page; the emit step offsets past the wrapper's own islands. */
15
+ instanceLocal: number
16
+ props: Record<string, unknown>
17
+ hydrate: MdHydrate
18
+ csr: boolean
19
+ /** 1-based line number within the md body (post-frontmatter). */
20
+ line: number
21
+ }
22
+
23
+ /** One behavior-component (x-data) use found in a markdown page. */
24
+ export interface MdBehaviorUse {
25
+ name: string
26
+ directive: string
27
+ /** 1-based line number within the md body (post-frontmatter). */
28
+ line: number
29
+ /** Literal tag props (string/number only — validated at extract time). The
30
+ * emit step inline-substitutes them into the component's compiled body. */
31
+ props: Record<string, unknown>
32
+ /** The EXACT placeholder host markup injected into the rendered HTML. The
33
+ * emit step (which owns compileJsx) substitutes the fully inlined component
34
+ * body over this exact string. It carries the per-render nonce, so user
35
+ * prose can never collide with it. */
36
+ marker: string
37
+ }
38
+
39
+ export type MdComponentResolution =
40
+ | { kind: 'island'; id: string }
41
+ | { kind: 'behavior'; directive: string }
42
+
43
+ export interface RenderMdPageOptions {
44
+ /** Markdown source, frontmatter already stripped. */
45
+ body: string
46
+ absPath: string
47
+ /** `null` → unknown name → renderMdPage throws with `file:line`. */
48
+ resolve: (name: string, line: number) => MdComponentResolution | null
49
+ }
50
+
51
+ // ---------------------------------------------------------------------------
52
+ // Shiki (optional peer dep) — lazy, cached, one warning per build
53
+ // ---------------------------------------------------------------------------
54
+
55
+ interface ShikiLike {
56
+ codeToHtml(
57
+ code: string,
58
+ options: { lang: string; themes: { light: string; dark: string } },
59
+ ): Promise<string>
60
+ }
61
+
62
+ const defaultShikiImporter = () => import('shiki') as Promise<ShikiLike>
63
+
64
+ let shikiImporter: () => Promise<ShikiLike> = defaultShikiImporter
65
+ /** `undefined` = not attempted yet; `null` = unavailable. */
66
+ let shikiLoad: Promise<ShikiLike | null> | undefined
67
+ let warnedShikiMissing = false
68
+
69
+ /** Test seam: replaces the dynamic `import('shiki')` and resets all cached state. */
70
+ export function __setShikiImporterForTests(importer: (() => Promise<ShikiLike>) | null): void {
71
+ shikiImporter = importer ?? defaultShikiImporter
72
+ shikiLoad = undefined
73
+ warnedShikiMissing = false
74
+ }
75
+
76
+ function loadShiki(): Promise<ShikiLike | null> {
77
+ if (shikiLoad === undefined) {
78
+ shikiLoad = shikiImporter().catch(() => null)
79
+ }
80
+ return shikiLoad
81
+ }
82
+
83
+ function escapeHtml(text: string): string {
84
+ return text
85
+ .replaceAll('&', '&amp;')
86
+ .replaceAll('<', '&lt;')
87
+ .replaceAll('>', '&gt;')
88
+ .replaceAll('"', '&quot;')
89
+ }
90
+
91
+ function fallbackCodeBlock(code: string, lang: string): string {
92
+ const cls = lang === '' ? '' : ` class="language-${escapeHtml(lang)}"`
93
+ return `<pre><code${cls}>${escapeHtml(code)}</code></pre>`
94
+ }
95
+
96
+ /**
97
+ * Highlights a code fence via shiki (lazy-imported once, cached) with dual
98
+ * CSS-variables themes. shiki absent → escape-only `<pre><code>` fallback and
99
+ * ONE warning per build. Unknown language → silent escape-only fallback.
100
+ */
101
+ export async function highlightCode(code: string, lang: string): Promise<string> {
102
+ const language = (lang ?? '').trim().split(/\s+/)[0] ?? ''
103
+ if (language === '') return fallbackCodeBlock(code, '')
104
+ const shiki = await loadShiki()
105
+ if (shiki === null) {
106
+ if (!warnedShikiMissing) {
107
+ warnedShikiMissing = true
108
+ console.warn(
109
+ '[brust md] shiki is not installed — code fences are emitted without syntax highlighting (add the optional `shiki` dependency to enable it)',
110
+ )
111
+ }
112
+ return fallbackCodeBlock(code, language)
113
+ }
114
+ try {
115
+ return await shiki.codeToHtml(code, {
116
+ lang: language,
117
+ themes: { light: 'github-light', dark: 'github-dark' },
118
+ })
119
+ } catch {
120
+ // Unknown/unsupported language — degrade per-fence, no warning.
121
+ return fallbackCodeBlock(code, language)
122
+ }
123
+ }
124
+
125
+ // ---------------------------------------------------------------------------
126
+ // Jinja-brace neutralization
127
+ // ---------------------------------------------------------------------------
128
+
129
+ const JINJA_DELIMS: Record<string, string> = {
130
+ '{{': '{{ "{{" }}',
131
+ '}}': '{{ "}}" }}',
132
+ '{%': '{{ "{%" }}',
133
+ '%}': '{{ "%}" }}',
134
+ }
135
+
136
+ /**
137
+ * Replaces every minijinja delimiter in md-origin HTML with a string-literal
138
+ * expression that renders back to the original text. Single pass — the
139
+ * replacements themselves are never re-matched. Component-host markup is
140
+ * injected AFTER this pass so its jinja stays live.
141
+ */
142
+ export function neutralizeBraces(html: string): string {
143
+ return html.replace(/\{\{|\}\}|\{%|%\}/g, (m) => JINJA_DELIMS[m] as string)
144
+ }
145
+
146
+ // ---------------------------------------------------------------------------
147
+ // Component-tag transform (line-level, outside code fences)
148
+ // ---------------------------------------------------------------------------
149
+
150
+ /**
151
+ * Placeholder strategy: extracted component-tag lines become single-line HTML
152
+ * comments (`<!--brust-md-slot:N-->`). Per CommonMark, a comment line is an
153
+ * HTML block (type 2) that may interrupt a paragraph, so marked emits it raw
154
+ * and block-level — never wrapped in `<p>` — even when the tag line directly
155
+ * abuts paragraph text (probed against marked 18). The comment contains no
156
+ * jinja delimiters, so it passes through `neutralizeBraces` untouched and is
157
+ * substituted with the (live-jinja) host markup afterwards.
158
+ */
159
+ // Per-call nonce so user content that happens to contain the literal
160
+ // placeholder text (e.g. docs ABOUT this mechanism) can never be substituted.
161
+ const slotPlaceholder = (nonce: string, n: number) => `<!--brust-md-slot:${nonce}:${n}-->`
162
+
163
+ /** Opens like `<Name` with a capital ident — a *candidate* component-tag line. */
164
+ const TAG_OPEN_RE = /^<([A-Z][A-Za-z0-9]*)(?=[\s/>])/
165
+
166
+ interface ExtractedTags {
167
+ /** Body with component-tag lines swapped for slot placeholders. */
168
+ source: string
169
+ /** Host markup per placeholder index. */
170
+ hosts: string[]
171
+ islands: MdIslandUse[]
172
+ behaviors: MdBehaviorUse[]
173
+ }
174
+
175
+ function extractComponentTags(
176
+ body: string,
177
+ absPath: string,
178
+ resolve: RenderMdPageOptions['resolve'],
179
+ nonce: string,
180
+ ): ExtractedTags {
181
+ const lines = body.split('\n')
182
+ const hosts: string[] = []
183
+ const islands: MdIslandUse[] = []
184
+ const behaviors: MdBehaviorUse[] = []
185
+ let instanceLocal = 0
186
+
187
+ let fence: { char: string; len: number } | null = null
188
+ for (let i = 0; i < lines.length; i++) {
189
+ const raw = lines[i] as string
190
+ const line = raw.endsWith('\r') ? raw.slice(0, -1) : raw
191
+ const lineNo = i + 1
192
+
193
+ const fenceMatch = /^ {0,3}(`{3,}|~{3,})/.exec(line)
194
+ if (fence === null) {
195
+ if (fenceMatch !== null) {
196
+ const marker = fenceMatch[1] as string
197
+ fence = { char: marker[0] as string, len: marker.length }
198
+ }
199
+ // fall through: a fence-opening line is never a component tag
200
+ } else {
201
+ if (
202
+ fenceMatch !== null &&
203
+ (fenceMatch[1] as string)[0] === fence.char &&
204
+ (fenceMatch[1] as string).length >= fence.len &&
205
+ line.trim() === fenceMatch[1]
206
+ ) {
207
+ fence = null
208
+ }
209
+ continue // inside (or closing) a fence — tags are shielded
210
+ }
211
+ if (fenceMatch !== null) continue
212
+
213
+ const open = TAG_OPEN_RE.exec(line.trimEnd())
214
+ if (open === null) continue
215
+ const name = open[1] as string
216
+ const trimmed = line.trimEnd()
217
+
218
+ if (!trimmed.endsWith('/>')) {
219
+ // Non-self-closing usage of a registry name is an error; unknown names
220
+ // are left for markdown (could be prose-level inline HTML).
221
+ if (resolve(name, lineNo) !== null) {
222
+ throw new Error(
223
+ `${absPath}:${lineNo} — <${name}> must be self-closing in markdown (children are not supported in v1)`,
224
+ )
225
+ }
226
+ continue
227
+ }
228
+
229
+ const resolution = resolve(name, lineNo)
230
+ if (resolution === null) {
231
+ throw new Error(`${absPath}:${lineNo} — <${name}> is not in mdRoutes components registry`)
232
+ }
233
+
234
+ const attrText = trimmed.slice(open[0].length, trimmed.length - 2)
235
+ const { props, hydrate, csr } = parseTagAttrs(attrText, name, absPath, lineNo)
236
+
237
+ let host: string
238
+ if (resolution.kind === 'behavior') {
239
+ // Behavior components have no hydration model — silently dropping these
240
+ // would mislead authors into thinking they did something.
241
+ if (hydrate !== 'load' || csr) {
242
+ throw new Error(
243
+ `${absPath}:${lineNo} — <${name}> is a native behavior component; hydrate/csr do not apply`,
244
+ )
245
+ }
246
+ // The emit step compiles the component's body through the native-inline
247
+ // path and substitutes the result over the placeholder below. That path
248
+ // can only inline-substitute string/number literals (bool/object props
249
+ // are rejected by the JSX compiler), and a string carrying jinja
250
+ // delimiters would land RAW in the compiled host — live jinja that can't
251
+ // be neutralized after the fact. Validate both here, where file:line is
252
+ // at hand.
253
+ for (const [k, v] of Object.entries(props)) {
254
+ if (typeof v !== 'string' && typeof v !== 'number') {
255
+ throw new Error(
256
+ `${absPath}:${lineNo} — <${name}> prop "${k}" must be a string or number literal ` +
257
+ `(behavior component bodies are inlined statically; got ${typeof v})`,
258
+ )
259
+ }
260
+ if (typeof v === 'string' && /\{\{|\}\}|\{%|%\}/.test(v)) {
261
+ throw new Error(
262
+ `${absPath}:${lineNo} — <${name}> prop "${k}" contains jinja delimiters, which cannot ` +
263
+ 'be inlined into a md behavior host',
264
+ )
265
+ }
266
+ }
267
+ // Placeholder host: substituted whole-tag by the emit step (which owns
268
+ // compileJsx — this module must stay free of it). The nonce makes the
269
+ // marker impossible to author in md prose; the index disambiguates
270
+ // multiple uses of the same component on one page.
271
+ host = `<div x-data="${resolution.directive}" data-brust-md-behavior="${nonce}:${behaviors.length}"></div>`
272
+ behaviors.push({ name, directive: resolution.directive, line: lineNo, props, marker: host })
273
+ } else {
274
+ const n = instanceLocal
275
+ const common = `<div data-brust-island="${resolution.id}" data-brust-props="{{ island_${n}_props }}" data-brust-hydrate="${hydrate}"`
276
+ host = csr
277
+ ? `${common} data-brust-csr></div>`
278
+ : `${common}>{{ island_${n}_html | safe }}</div>`
279
+ islands.push({ name, instanceLocal: n, props, hydrate, csr, line: lineNo })
280
+ instanceLocal++
281
+ }
282
+ lines[i] = slotPlaceholder(nonce, hosts.length)
283
+ hosts.push(host)
284
+ }
285
+
286
+ return { source: lines.join('\n'), hosts, islands, behaviors }
287
+ }
288
+
289
+ interface ParsedTagAttrs {
290
+ props: Record<string, unknown>
291
+ hydrate: MdHydrate
292
+ csr: boolean
293
+ }
294
+
295
+ /**
296
+ * Parses the attr region of a component tag. Forms:
297
+ * - `p="str"` → string (verbatim, no escapes)
298
+ * - `p={42}` → JSON-parsed scalar
299
+ * - `p={{"a":1}}` → JSON object (the `{…}` content is JSON.parse'd)
300
+ * - `flag` → true
301
+ * Reserved names `hydrate` / `csr` are pulled out of props.
302
+ */
303
+ function parseTagAttrs(
304
+ text: string,
305
+ tagName: string,
306
+ absPath: string,
307
+ line: number,
308
+ ): ParsedTagAttrs {
309
+ const props: Record<string, unknown> = {}
310
+ const fail = (msg: string): never => {
311
+ throw new Error(`${absPath}:${line} — <${tagName}> ${msg}`)
312
+ }
313
+
314
+ let i = 0
315
+ while (i < text.length) {
316
+ while (i < text.length && /\s/.test(text[i] as string)) i++
317
+ if (i >= text.length) break
318
+ const nameMatch = /^[a-zA-Z][\w-]*/.exec(text.slice(i))
319
+ if (nameMatch === null) return fail(`has a malformed attribute near: ${text.slice(i).trim()}`)
320
+ const attrName = nameMatch[0]
321
+ i += attrName.length
322
+
323
+ let value: unknown = true
324
+ if (text[i] === '=') {
325
+ i++
326
+ const open = text[i]
327
+ if (open === '"') {
328
+ const close = text.indexOf('"', i + 1)
329
+ if (close === -1) return fail(`attribute "${attrName}" has an unterminated string value`)
330
+ value = text.slice(i + 1, close)
331
+ i = close + 1
332
+ } else if (open === '{') {
333
+ const end = scanBalancedBraces(text, i)
334
+ if (end === -1) return fail(`attribute "${attrName}" has unbalanced braces`)
335
+ const inner = text.slice(i + 1, end)
336
+ try {
337
+ value = JSON.parse(inner)
338
+ } catch {
339
+ return fail(`attribute "${attrName}" is not valid JSON: {${inner}}`)
340
+ }
341
+ i = end + 1
342
+ } else {
343
+ return fail(`attribute "${attrName}" value must be "…" or {…}`)
344
+ }
345
+ }
346
+ if (attrName in props) {
347
+ return fail(`has duplicate attribute "${attrName}"`)
348
+ }
349
+ props[attrName] = value
350
+ }
351
+
352
+ let hydrate: MdHydrate = 'load'
353
+ if ('hydrate' in props) {
354
+ const h = props.hydrate
355
+ delete props.hydrate
356
+ if (typeof h !== 'string' || !HYDRATE_MODES.includes(h as MdHydrate)) {
357
+ return fail(
358
+ `hydrate must be one of ${HYDRATE_MODES.join('|')}, got: ${JSON.stringify(h)}`,
359
+ ) as never
360
+ }
361
+ hydrate = h as MdHydrate
362
+ }
363
+ let csr = false
364
+ if ('csr' in props) {
365
+ csr = props.csr !== false
366
+ delete props.csr
367
+ }
368
+ return { props, hydrate, csr }
369
+ }
370
+
371
+ /**
372
+ * Returns the index of the `}` closing the `{` at `start`, honoring nested
373
+ * braces and JSON double-quoted strings (with backslash escapes); -1 if
374
+ * unbalanced.
375
+ */
376
+ function scanBalancedBraces(text: string, start: number): number {
377
+ let depth = 0
378
+ let inString = false
379
+ for (let i = start; i < text.length; i++) {
380
+ const ch = text[i]
381
+ if (inString) {
382
+ if (ch === '\\') i++
383
+ else if (ch === '"') inString = false
384
+ } else if (ch === '"') {
385
+ inString = true
386
+ } else if (ch === '{') {
387
+ depth++
388
+ } else if (ch === '}') {
389
+ depth--
390
+ if (depth === 0) return i
391
+ }
392
+ }
393
+ return -1
394
+ }
395
+
396
+ // ---------------------------------------------------------------------------
397
+ // Markdown → HTML (marked, GFM, heading ids, shiki fences)
398
+ // ---------------------------------------------------------------------------
399
+
400
+ function slugify(text: string): string {
401
+ return text
402
+ .toLowerCase()
403
+ .trim()
404
+ .replace(/\s+/g, '-')
405
+ .replace(/[^\w-]/g, '')
406
+ }
407
+
408
+ async function renderMarkdown(source: string): Promise<string> {
409
+ // Fresh instance per page: heading-id dedupe state is page-local.
410
+ const slugCounts = new Map<string, number>()
411
+ const highlighted = new WeakMap<Tokens.Code, string>()
412
+
413
+ const marked = new Marked()
414
+ marked.use({
415
+ gfm: true,
416
+ async: true,
417
+ walkTokens: async (token) => {
418
+ if (token.type === 'code') {
419
+ const code = token as Tokens.Code
420
+ highlighted.set(code, await highlightCode(code.text, code.lang ?? ''))
421
+ }
422
+ },
423
+ renderer: {
424
+ code(token: Tokens.Code): string {
425
+ return `${highlighted.get(token) ?? fallbackCodeBlock(token.text, token.lang ?? '')}\n`
426
+ },
427
+ heading({ tokens, depth }: Tokens.Heading): string {
428
+ const text = this.parser.parseInline(tokens, this.parser.textRenderer)
429
+ const base = slugify(text)
430
+ const seen = slugCounts.get(base) ?? 0
431
+ slugCounts.set(base, seen + 1)
432
+ const id = seen === 0 ? base : `${base}-${seen + 1}`
433
+ return `<h${depth} id="${id}">${this.parser.parseInline(tokens)}</h${depth}>\n`
434
+ },
435
+ },
436
+ })
437
+ return marked.parse(source)
438
+ }
439
+
440
+ // ---------------------------------------------------------------------------
441
+ // Pipeline
442
+ // ---------------------------------------------------------------------------
443
+
444
+ /**
445
+ * Renders one markdown page body to jinja-safe HTML.
446
+ *
447
+ * Order is load-bearing (locked by tests):
448
+ * 1. extract component-tag lines to opaque placeholders (marked never sees them)
449
+ * 2. render markdown (GFM, heading ids, shiki fences)
450
+ * 3. neutralize jinja braces over the rendered HTML
451
+ * 4. substitute placeholders with host markup — its jinja stays live
452
+ */
453
+ export async function renderMdPage(
454
+ opts: RenderMdPageOptions,
455
+ ): Promise<{ html: string; islands: MdIslandUse[]; behaviors: MdBehaviorUse[] }> {
456
+ const nonce = Math.random().toString(16).slice(2, 10)
457
+ const { source, hosts, islands, behaviors } = extractComponentTags(
458
+ opts.body,
459
+ opts.absPath,
460
+ opts.resolve,
461
+ nonce,
462
+ )
463
+ const rendered = await renderMarkdown(source)
464
+ let html = neutralizeBraces(rendered)
465
+ for (let n = 0; n < hosts.length; n++) {
466
+ html = html.replaceAll(slotPlaceholder(nonce, n), hosts[n] as string)
467
+ }
468
+ return { html, islands, behaviors }
469
+ }