@hypermedia-components/core 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,16 @@
2
2
  * Install the editable-code behavior on the given root.
3
3
  *
4
4
  * Enhances every `.hc-code[data-editable]` once and re-scans subtrees
5
- * delivered by htmx (`htmx:load`). Repeated calls on the same root return the
6
- * same uninstaller.
5
+ * delivered by htmx (`htmx:load`). A field gets a synced line-number gutter
6
+ * when `data-gutter="line-numbers"` is set, and a live syntax-highlight overlay
7
+ * when `data-lang` resolves to a registered grammar (built-in or via
8
+ * `registerCodeLanguage()`). Repeated calls on the same root return the same
9
+ * uninstaller.
7
10
  *
8
11
  * @param {Document|Element} [root=document]
9
12
  * The scope to scan. Defaults to the global document when available.
10
13
  * @returns {() => void} an idempotent uninstaller that removes the synced
11
- * gutters and listeners it added.
14
+ * gutters, overlays, and listeners it added.
12
15
  */
13
16
  export function installCodeEditor(root?: Document | Element): () => void;
17
+ export { registerCodeLanguage } from "./code-syntax.js";
@@ -1,22 +1,37 @@
1
1
  // installCodeEditor — upgrade an editable `hc-code` field with a synced
2
- // line-number gutter (issue #255).
2
+ // line-number gutter (#255) and an optional live syntax-highlight overlay
3
+ // (#264).
3
4
  //
4
- // <div class="hc-code" data-editable data-gutter="line-numbers">
5
+ // <div class="hc-code" data-editable data-gutter="line-numbers" data-lang="sql">
5
6
  // <textarea class="hc-code__input" name="content" spellcheck="false">SELECT 1</textarea>
6
7
  // </div>
7
8
  //
8
9
  // The value lives in a real <textarea name>, so it submits in forms, works
9
10
  // with htmx (hx-post / hx-include / hx-vals), and degrades to a plain
10
- // monospace textarea when this script is absent. When `data-gutter="line-numbers"`
11
- // is set, the behavior inserts a `.hc-code__gutter` element before the
11
+ // monospace textarea when this script is absent.
12
+ //
13
+ // `data-gutter="line-numbers"` inserts a `.hc-code__gutter` element before the
12
14
  // textarea and keeps it in sync: it re-numbers on input and matches the
13
- // textarea's vertical scroll. To keep the numbers aligned with the lines it
14
- // sets the textarea to not soft-wrap (`wrap="off"`), so long lines scroll
15
- // horizontally rather than pushing the numbers out of step.
15
+ // textarea's vertical scroll.
16
+ //
17
+ // `data-lang` opts into a live highlight overlay: when the value resolves to a
18
+ // registered grammar (see code-syntax.js — built-ins plus
19
+ // registerCodeLanguage()), the behavior inserts a decorative, aria-hidden
20
+ // `.hc-code__highlight` layer behind the textarea, re-tokenizes on input
21
+ // (throttled to one render per animation frame), and matches the textarea's
22
+ // scrollTop/scrollLeft. The textarea text is rendered transparent over the
23
+ // layer (CSS), so the coloured spans show through while the caret stays
24
+ // visible. An unknown `data-lang` (no grammar) leaves the field a plain
25
+ // textarea — no overlay, no transparent text, no regression to #255.
26
+ //
27
+ // To keep both overlays aligned with the lines the behavior sets the textarea
28
+ // to not soft-wrap (`wrap="off"`), so long lines scroll horizontally rather
29
+ // than pushing the numbers or tokens out of step.
16
30
  //
17
- // Syntax highlighting is out of scope (a CSP-safe overlay is a possible
18
- // follow-up). installCodeEditor(root = document) is idempotent and returns an
19
- // uninstaller; fields swapped in by htmx are enhanced on `htmx:load`.
31
+ // installCodeEditor(root = document) is idempotent and returns an uninstaller;
32
+ // fields swapped in by htmx are enhanced on `htmx:load`.
33
+
34
+ import { tokenizeCode, resolveCodeLanguage } from './code-syntax.js';
20
35
 
21
36
  const INSTALL_KEY = '__hcCodeEditorUninstall';
22
37
 
@@ -29,44 +44,117 @@ function lineNumbers(count) {
29
44
  function enhance(container) {
30
45
  const textarea = container.querySelector('.hc-code__input');
31
46
  if (!textarea) return null;
32
- if (container.dataset.gutter !== 'line-numbers') return () => {};
33
- if (container.querySelector('.hc-code__gutter')) return () => {};
34
47
 
35
- // Keep line numbers aligned: a soft-wrapped line would span several rows
36
- // while the gutter counts one. Horizontal scroll instead.
48
+ const wantGutter = container.dataset.gutter === 'line-numbers';
49
+ const lang = container.dataset.lang;
50
+ const wantHighlight = !!resolveCodeLanguage(lang);
51
+
52
+ if (!wantGutter && !wantHighlight) return () => {};
53
+ // Already enhanced (defensive — the installer's WeakSet is the primary guard).
54
+ if (container.querySelector('.hc-code__gutter') || container.querySelector('.hc-code__highlight')) {
55
+ return () => {};
56
+ }
57
+
58
+ const doc = container.ownerDocument;
59
+ const view = doc.defaultView;
60
+
61
+ // Keep the gutter numbers and overlay tokens aligned: a soft-wrapped line
62
+ // would span several rows while the gutter counts one and the overlay (pre)
63
+ // does not wrap. Horizontal scroll instead.
37
64
  const prevWrap = textarea.getAttribute('wrap');
38
65
  textarea.setAttribute('wrap', 'off');
39
66
 
40
- const gutter = container.ownerDocument.createElement('div');
41
- gutter.className = 'hc-code__gutter';
42
- gutter.setAttribute('aria-hidden', 'true');
43
- container.insertBefore(gutter, textarea);
44
-
67
+ // --- Line-number gutter -------------------------------------------------
68
+ let gutter = null;
45
69
  let lastCount = 0;
70
+ if (wantGutter) {
71
+ gutter = doc.createElement('div');
72
+ gutter.className = 'hc-code__gutter';
73
+ gutter.setAttribute('aria-hidden', 'true');
74
+ container.insertBefore(gutter, textarea);
75
+ }
46
76
  const renumber = () => {
77
+ if (!gutter) return;
47
78
  const count = Math.max(1, textarea.value.split('\n').length);
48
79
  if (count !== lastCount) {
49
80
  gutter.textContent = lineNumbers(count);
50
81
  lastCount = count;
51
82
  }
52
83
  };
84
+
85
+ // --- Live highlight overlay --------------------------------------------
86
+ let highlight = null;
87
+ if (wantHighlight) {
88
+ highlight = doc.createElement('div');
89
+ highlight.className = 'hc-code__highlight';
90
+ highlight.setAttribute('aria-hidden', 'true');
91
+ container.insertBefore(highlight, textarea);
92
+ }
93
+ const renderHighlight = () => {
94
+ if (!highlight) return;
95
+ // Fall back to a single plain run if the grammar can't reconstruct this
96
+ // buffer, so the (transparent) textarea text always stays backed by
97
+ // visible overlay text.
98
+ const tokens = tokenizeCode(lang, textarea.value) || [{ tok: '', text: textarea.value }];
99
+ const frag = doc.createDocumentFragment();
100
+ for (let i = 0; i < tokens.length; i += 1) {
101
+ const t = tokens[i];
102
+ if (t.tok) {
103
+ const span = doc.createElement('span');
104
+ span.className = 'hc-code__tok';
105
+ span.setAttribute('data-tok', t.tok);
106
+ span.textContent = t.text;
107
+ frag.appendChild(span);
108
+ } else {
109
+ frag.appendChild(doc.createTextNode(t.text));
110
+ }
111
+ }
112
+ highlight.textContent = '';
113
+ highlight.appendChild(frag);
114
+ };
115
+
116
+ // Throttle re-tokenization to one render per frame so large buffers stay
117
+ // responsive while typing.
118
+ let frame = 0;
119
+ const scheduleRender = () => {
120
+ if (!highlight) return;
121
+ if (!view || !view.requestAnimationFrame) {
122
+ renderHighlight();
123
+ return;
124
+ }
125
+ if (frame) return;
126
+ frame = view.requestAnimationFrame(() => {
127
+ frame = 0;
128
+ renderHighlight();
129
+ });
130
+ };
131
+
53
132
  const syncScroll = () => {
54
- gutter.scrollTop = textarea.scrollTop;
133
+ if (gutter) gutter.scrollTop = textarea.scrollTop;
134
+ if (highlight) {
135
+ highlight.scrollTop = textarea.scrollTop;
136
+ highlight.scrollLeft = textarea.scrollLeft;
137
+ }
55
138
  };
139
+
56
140
  const onInput = () => {
57
141
  renumber();
142
+ scheduleRender();
58
143
  syncScroll();
59
144
  };
60
145
 
61
146
  textarea.addEventListener('input', onInput);
62
147
  textarea.addEventListener('scroll', syncScroll);
63
148
  renumber();
149
+ renderHighlight();
64
150
  syncScroll();
65
151
 
66
152
  return () => {
153
+ if (frame && view && view.cancelAnimationFrame) view.cancelAnimationFrame(frame);
67
154
  textarea.removeEventListener('input', onInput);
68
155
  textarea.removeEventListener('scroll', syncScroll);
69
- gutter.remove();
156
+ if (gutter) gutter.remove();
157
+ if (highlight) highlight.remove();
70
158
  if (prevWrap == null) textarea.removeAttribute('wrap');
71
159
  else textarea.setAttribute('wrap', prevWrap);
72
160
  };
@@ -76,13 +164,16 @@ function enhance(container) {
76
164
  * Install the editable-code behavior on the given root.
77
165
  *
78
166
  * Enhances every `.hc-code[data-editable]` once and re-scans subtrees
79
- * delivered by htmx (`htmx:load`). Repeated calls on the same root return the
80
- * same uninstaller.
167
+ * delivered by htmx (`htmx:load`). A field gets a synced line-number gutter
168
+ * when `data-gutter="line-numbers"` is set, and a live syntax-highlight overlay
169
+ * when `data-lang` resolves to a registered grammar (built-in or via
170
+ * `registerCodeLanguage()`). Repeated calls on the same root return the same
171
+ * uninstaller.
81
172
  *
82
173
  * @param {Document|Element} [root=document]
83
174
  * The scope to scan. Defaults to the global document when available.
84
175
  * @returns {() => void} an idempotent uninstaller that removes the synced
85
- * gutters and listeners it added.
176
+ * gutters, overlays, and listeners it added.
86
177
  */
87
178
  export function installCodeEditor(root = (typeof document !== 'undefined' ? document : null)) {
88
179
  if (!root) return () => {};
@@ -119,3 +210,5 @@ export function installCodeEditor(root = (typeof document !== 'undefined' ? docu
119
210
  root[INSTALL_KEY] = uninstall;
120
211
  return uninstall;
121
212
  }
213
+
214
+ export { registerCodeLanguage } from './code-syntax.js';
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Register a tokenizer for `installCodeEditor()`'s live highlight overlay,
3
+ * keyed by the value of a field's `data-lang`. Registering a built-in name
4
+ * (`sql`, `json`, `yaml`, `html`, …) overrides it. Names are case-insensitive.
5
+ *
6
+ * The tokenizer must return tokens whose `text` parts reconstruct the input
7
+ * exactly; if they don't, the overlay safely declines to highlight that buffer
8
+ * rather than desync from the textarea.
9
+ *
10
+ * @param {string} name e.g. `"tql-sql"`.
11
+ * @param {(text: string) => Array<{tok: string, text: string}>} tokenizer
12
+ * @returns {() => void} an uninstaller that removes this registration
13
+ * (restoring any built-in of the same name).
14
+ */
15
+ export function registerCodeLanguage(name: string, tokenizer: (text: string) => Array<{
16
+ tok: string;
17
+ text: string;
18
+ }>): () => void;
19
+ /**
20
+ * Resolve the tokenizer for a `data-lang` value: a consumer registration wins,
21
+ * then a built-in grammar, else `null` (no highlighting → plain textarea).
22
+ *
23
+ * @param {string} [name]
24
+ * @returns {((text: string) => Array<{tok: string, text: string}>) | null}
25
+ */
26
+ export function resolveCodeLanguage(name?: string): ((text: string) => Array<{
27
+ tok: string;
28
+ text: string;
29
+ }>) | null;
30
+ /**
31
+ * Tokenize `text` as `lang`, returning `null` when there is no grammar, the
32
+ * tokenizer throws, or its tokens fail to reconstruct the source exactly. A
33
+ * `null` return tells the overlay to stay out of the way.
34
+ *
35
+ * @param {string} lang
36
+ * @param {string} text
37
+ * @returns {Array<{tok: string, text: string}> | null}
38
+ */
39
+ export function tokenizeCode(lang: string, text: string): Array<{
40
+ tok: string;
41
+ text: string;
42
+ }> | null;
@@ -0,0 +1,308 @@
1
+ // @hypermedia-components/core — code tokenizers for the live editable
2
+ // highlight overlay (issue #264).
3
+ //
4
+ // A tokenizer is `(text) => Array<{ tok, text }>`. The `text` parts,
5
+ // concatenated in order, must reconstruct the input exactly; `tok` is one of
6
+ // the hc-code `data-tok` values
7
+ //
8
+ // keyword | string | number | comment | operator | identifier
9
+ // property | tag | attribute | meta
10
+ //
11
+ // or a falsy value (`''` / `null`) for plain, uncoloured text. `installCodeEditor()`
12
+ // renders each token as a `<span class="hc-code__tok" data-tok="…">` (or a bare
13
+ // text node) into the overlay, coloured from the same `--hc-code-tok-*` palette
14
+ // as the server-tokenized read-only path (#261), so the editor matches the
15
+ // read-only / diff surfaces.
16
+ //
17
+ // Built-in grammars (`sql`, `json`, `yaml`, `html`) cover common cases. Register
18
+ // your own with `registerCodeLanguage(name, tokenizer)` — a dialect tokenizer
19
+ // can classify constructs a generic grammar can't (e.g. TesseraQL's 2-way SQL
20
+ // directives `/*%if … */` as `meta`). Everything here is CSP-safe: pure JS,
21
+ // no `eval` / `new Function`, no network.
22
+
23
+ // Consumer registrations live on a globalThis-keyed singleton so every inlined
24
+ // copy of this module (hc.js, hc.behaviors.js each bundle one) reads and writes
25
+ // the same registry — the same reason the i18n catalog is a singleton (#216).
26
+ // Built-ins are resolved as a fallback below, so registering a built-in name
27
+ // overrides it without mutating shared state.
28
+ const STATE_KEY = Symbol.for('hypermedia-components.code-languages');
29
+ const registry = globalThis[STATE_KEY] || (globalThis[STATE_KEY] = new Map());
30
+
31
+ const norm = (name) => String(name).toLowerCase();
32
+
33
+ /**
34
+ * Run an ordered list of sticky-regex rules over `text`, accumulating
35
+ * unmatched characters as plain tokens. Each rule is `{ tok, re }` where `re`
36
+ * carries the `y` (sticky) flag; the first rule that matches at the current
37
+ * position wins. Reconstructs the input exactly.
38
+ *
39
+ * @param {string} text
40
+ * @param {Array<{tok: string, re: RegExp}>} rules
41
+ * @returns {Array<{tok: string, text: string}>}
42
+ */
43
+ function scan(text, rules) {
44
+ const out = [];
45
+ let plain = '';
46
+ const flush = () => {
47
+ if (plain) {
48
+ out.push({ tok: '', text: plain });
49
+ plain = '';
50
+ }
51
+ };
52
+ let i = 0;
53
+ const n = text.length;
54
+ while (i < n) {
55
+ let matched = false;
56
+ for (let r = 0; r < rules.length; r += 1) {
57
+ const { tok, re } = rules[r];
58
+ re.lastIndex = i;
59
+ const m = re.exec(text);
60
+ if (m && m.index === i && m[0].length > 0) {
61
+ flush();
62
+ out.push({ tok, text: m[0] });
63
+ i += m[0].length;
64
+ matched = true;
65
+ break;
66
+ }
67
+ }
68
+ if (!matched) {
69
+ plain += text[i];
70
+ i += 1;
71
+ }
72
+ }
73
+ flush();
74
+ return out;
75
+ }
76
+
77
+ // --- SQL ------------------------------------------------------------------
78
+ // A generic SQL grammar. 2-way-SQL block comments (`/* … */`) read as plain
79
+ // comments here; a dialect that wants its directives highlighted as `meta`
80
+ // registers its own tokenizer.
81
+ const SQL_KEYWORDS =
82
+ /(?:select|from|where|insert|into|values|update|set|delete|create|table|alter|drop|join|inner|left|right|outer|full|cross|on|group|by|order|having|limit|offset|union|all|distinct|as|and|or|not|in|is|null|like|between|exists|case|when|then|else|end|asc|desc|primary|key|foreign|references|default|index|view|with|returning|using|cast|coalesce|count|sum|avg|min|max|true|false)\b/iy;
83
+
84
+ function tokenizeSql(text) {
85
+ return scan(text, [
86
+ { tok: 'comment', re: /--[^\n]*/y },
87
+ { tok: 'comment', re: /\/\*[\s\S]*?\*\//y },
88
+ { tok: 'string', re: /'(?:[^']|'')*'/y },
89
+ { tok: 'number', re: /\b\d+(?:\.\d+)?\b/y },
90
+ { tok: 'keyword', re: SQL_KEYWORDS },
91
+ { tok: 'identifier', re: /[A-Za-z_][\w$]*/y },
92
+ { tok: 'operator', re: /[-+*/%=<>!,;.()|&^~?@:[\]{}]+/y },
93
+ ]);
94
+ }
95
+
96
+ // --- JSON -----------------------------------------------------------------
97
+ function tokenizeJson(text) {
98
+ return scan(text, [
99
+ // A string immediately before a colon is an object key → `property`.
100
+ { tok: 'property', re: /"(?:[^"\\]|\\.)*"(?=\s*:)/y },
101
+ { tok: 'string', re: /"(?:[^"\\]|\\.)*"/y },
102
+ { tok: 'number', re: /-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/y },
103
+ { tok: 'keyword', re: /\b(?:true|false|null)\b/y },
104
+ { tok: 'operator', re: /[{}[\]:,]/y },
105
+ ]);
106
+ }
107
+
108
+ // --- YAML -----------------------------------------------------------------
109
+ function tokenizeYaml(text) {
110
+ return scan(text, [
111
+ { tok: 'comment', re: /#[^\n]*/y },
112
+ { tok: 'string', re: /'(?:[^']|'')*'/y },
113
+ { tok: 'string', re: /"(?:[^"\\]|\\.)*"/y },
114
+ // Unquoted mapping key: a scalar followed by a colon + space / end-of-line.
115
+ { tok: 'property', re: /[A-Za-z_][\w.\- ]*?(?=:(?:\s|$))/y },
116
+ { tok: 'number', re: /\b-?\d+(?:\.\d+)?\b/y },
117
+ { tok: 'keyword', re: /\b(?:true|false|null|yes|no|on|off)\b/iy },
118
+ { tok: 'operator', re: /[:?,[\]{}]|(?:^|\s)-(?=\s)/y },
119
+ ]);
120
+ }
121
+
122
+ // --- HTML -----------------------------------------------------------------
123
+ // Stateful: tag/attribute classification depends on being inside a `<…>`.
124
+ function tokenizeHtml(text) {
125
+ const out = [];
126
+ let plain = '';
127
+ const flush = () => {
128
+ if (plain) {
129
+ out.push({ tok: '', text: plain });
130
+ plain = '';
131
+ }
132
+ };
133
+ const push = (tok, t) => {
134
+ if (t) {
135
+ flush();
136
+ out.push({ tok, text: t });
137
+ }
138
+ };
139
+ let i = 0;
140
+ const n = text.length;
141
+ const reComment = /<!--[\s\S]*?-->/y;
142
+ const reDoctype = /<![^>]*>/y;
143
+ const reTagOpen = /<\/?[A-Za-z][\w:-]*/y;
144
+ const reAttrName = /[^\s=/>]+/y;
145
+ const reValDq = /"[^"]*"/y;
146
+ const reValSq = /'[^']*'/y;
147
+ while (i < n) {
148
+ if (text[i] === '<') {
149
+ reComment.lastIndex = i;
150
+ let m = reComment.exec(text);
151
+ if (m && m.index === i) {
152
+ push('comment', m[0]);
153
+ i += m[0].length;
154
+ continue;
155
+ }
156
+ reDoctype.lastIndex = i;
157
+ m = reDoctype.exec(text);
158
+ if (m && m.index === i) {
159
+ push('meta', m[0]);
160
+ i += m[0].length;
161
+ continue;
162
+ }
163
+ reTagOpen.lastIndex = i;
164
+ m = reTagOpen.exec(text);
165
+ if (m && m.index === i) {
166
+ push('tag', m[0]);
167
+ i += m[0].length;
168
+ let expectValue = false;
169
+ while (i < n && text[i] !== '>') {
170
+ const c = text[i];
171
+ if (/\s/.test(c)) {
172
+ plain += c;
173
+ i += 1;
174
+ continue;
175
+ }
176
+ if (c === '/') {
177
+ push('tag', '/');
178
+ i += 1;
179
+ continue;
180
+ }
181
+ if (c === '=') {
182
+ push('operator', '=');
183
+ i += 1;
184
+ expectValue = true;
185
+ continue;
186
+ }
187
+ if (c === '"') {
188
+ reValDq.lastIndex = i;
189
+ const v = reValDq.exec(text);
190
+ if (v && v.index === i) {
191
+ push('string', v[0]);
192
+ i += v[0].length;
193
+ expectValue = false;
194
+ continue;
195
+ }
196
+ }
197
+ if (c === "'") {
198
+ reValSq.lastIndex = i;
199
+ const v = reValSq.exec(text);
200
+ if (v && v.index === i) {
201
+ push('string', v[0]);
202
+ i += v[0].length;
203
+ expectValue = false;
204
+ continue;
205
+ }
206
+ }
207
+ reAttrName.lastIndex = i;
208
+ const a = reAttrName.exec(text);
209
+ if (a && a.index === i && a[0].length) {
210
+ push(expectValue ? 'string' : 'attribute', a[0]);
211
+ i += a[0].length;
212
+ expectValue = false;
213
+ continue;
214
+ }
215
+ plain += c;
216
+ i += 1;
217
+ }
218
+ if (i < n && text[i] === '>') {
219
+ push('tag', '>');
220
+ i += 1;
221
+ }
222
+ continue;
223
+ }
224
+ }
225
+ plain += text[i];
226
+ i += 1;
227
+ }
228
+ flush();
229
+ return out;
230
+ }
231
+
232
+ const BUILTINS = {
233
+ sql: tokenizeSql,
234
+ json: tokenizeJson,
235
+ yaml: tokenizeYaml,
236
+ yml: tokenizeYaml,
237
+ html: tokenizeHtml,
238
+ xml: tokenizeHtml,
239
+ };
240
+
241
+ /**
242
+ * Register a tokenizer for `installCodeEditor()`'s live highlight overlay,
243
+ * keyed by the value of a field's `data-lang`. Registering a built-in name
244
+ * (`sql`, `json`, `yaml`, `html`, …) overrides it. Names are case-insensitive.
245
+ *
246
+ * The tokenizer must return tokens whose `text` parts reconstruct the input
247
+ * exactly; if they don't, the overlay safely declines to highlight that buffer
248
+ * rather than desync from the textarea.
249
+ *
250
+ * @param {string} name e.g. `"tql-sql"`.
251
+ * @param {(text: string) => Array<{tok: string, text: string}>} tokenizer
252
+ * @returns {() => void} an uninstaller that removes this registration
253
+ * (restoring any built-in of the same name).
254
+ */
255
+ export function registerCodeLanguage(name, tokenizer) {
256
+ if (typeof name !== 'string' || !name) {
257
+ throw new TypeError('registerCodeLanguage(name, tokenizer): name must be a non-empty string');
258
+ }
259
+ if (typeof tokenizer !== 'function') {
260
+ throw new TypeError('registerCodeLanguage(name, tokenizer): tokenizer must be a function');
261
+ }
262
+ const key = norm(name);
263
+ registry.set(key, tokenizer);
264
+ return () => {
265
+ if (registry.get(key) === tokenizer) registry.delete(key);
266
+ };
267
+ }
268
+
269
+ /**
270
+ * Resolve the tokenizer for a `data-lang` value: a consumer registration wins,
271
+ * then a built-in grammar, else `null` (no highlighting → plain textarea).
272
+ *
273
+ * @param {string} [name]
274
+ * @returns {((text: string) => Array<{tok: string, text: string}>) | null}
275
+ */
276
+ export function resolveCodeLanguage(name) {
277
+ if (!name) return null;
278
+ const key = norm(name);
279
+ return registry.get(key) || BUILTINS[key] || null;
280
+ }
281
+
282
+ /**
283
+ * Tokenize `text` as `lang`, returning `null` when there is no grammar, the
284
+ * tokenizer throws, or its tokens fail to reconstruct the source exactly. A
285
+ * `null` return tells the overlay to stay out of the way.
286
+ *
287
+ * @param {string} lang
288
+ * @param {string} text
289
+ * @returns {Array<{tok: string, text: string}> | null}
290
+ */
291
+ export function tokenizeCode(lang, text) {
292
+ const fn = resolveCodeLanguage(lang);
293
+ if (!fn) return null;
294
+ let tokens;
295
+ try {
296
+ tokens = fn(text);
297
+ } catch {
298
+ return null;
299
+ }
300
+ if (!Array.isArray(tokens)) return null;
301
+ let rebuilt = '';
302
+ for (let i = 0; i < tokens.length; i += 1) {
303
+ const t = tokens[i];
304
+ if (!t || typeof t.text !== 'string') return null;
305
+ rebuilt += t.text;
306
+ }
307
+ return rebuilt === text ? tokens : null;
308
+ }