@fuzdev/fuz_code 0.45.1 → 0.46.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +1 -0
  2. package/dist/Code.svelte +2 -2
  3. package/dist/Code.svelte.d.ts +2 -2
  4. package/dist/CodeHighlight.svelte +18 -54
  5. package/dist/CodeHighlight.svelte.d.ts +4 -4
  6. package/dist/CodeHighlight.svelte.d.ts.map +1 -1
  7. package/dist/CodeTextarea.svelte +149 -0
  8. package/dist/CodeTextarea.svelte.d.ts +43 -0
  9. package/dist/CodeTextarea.svelte.d.ts.map +1 -0
  10. package/dist/grammar_markdown.js +3 -3
  11. package/dist/grammar_markup.d.ts +8 -7
  12. package/dist/grammar_markup.d.ts.map +1 -1
  13. package/dist/grammar_markup.js +8 -7
  14. package/dist/highlight_manager.d.ts +21 -7
  15. package/dist/highlight_manager.d.ts.map +1 -1
  16. package/dist/highlight_manager.js +130 -74
  17. package/dist/range_highlighting.svelte.d.ts +39 -0
  18. package/dist/range_highlighting.svelte.d.ts.map +1 -0
  19. package/dist/range_highlighting.svelte.js +57 -0
  20. package/dist/svelte_preprocess_fuz_code.d.ts +4 -4
  21. package/dist/svelte_preprocess_fuz_code.d.ts.map +1 -1
  22. package/dist/svelte_preprocess_fuz_code.js +3 -3
  23. package/dist/syntax_styler.d.ts +40 -32
  24. package/dist/syntax_styler.d.ts.map +1 -1
  25. package/dist/syntax_styler.js +81 -49
  26. package/dist/syntax_token.d.ts +4 -4
  27. package/dist/syntax_token.js +2 -2
  28. package/dist/tokenize_syntax.d.ts +2 -4
  29. package/dist/tokenize_syntax.d.ts.map +1 -1
  30. package/dist/tokenize_syntax.js +2 -4
  31. package/package.json +27 -29
  32. package/src/lib/grammar_markdown.ts +3 -3
  33. package/src/lib/grammar_markup.ts +8 -7
  34. package/src/lib/highlight_manager.ts +154 -84
  35. package/src/lib/range_highlighting.svelte.ts +100 -0
  36. package/src/lib/svelte_preprocess_fuz_code.ts +6 -6
  37. package/src/lib/syntax_styler.ts +98 -53
  38. package/src/lib/syntax_token.ts +4 -4
  39. package/src/lib/tokenize_syntax.ts +2 -4
@@ -3,6 +3,13 @@ import {tokenize_syntax} from './tokenize_syntax.js';
3
3
 
4
4
  export type AddSyntaxGrammar = (syntax_styler: SyntaxStyler) => void;
5
5
 
6
+ /**
7
+ * Maps a matched `&`, `<`, or non-breaking space in text content to its
8
+ * HTML-safe form. Used as the replacer in `stringify_token` for leaf strings
9
+ * (non-breaking spaces are normalized to a regular space).
10
+ */
11
+ const escape_text_char = (ch: string): string => (ch === '&' ? '&amp;' : ch === '<' ? '&lt;' : ' ');
12
+
6
13
  /**
7
14
  * Based on Prism (https://github.com/PrismJS/prism)
8
15
  * by Lea Verou (https://lea.verou.me/)
@@ -92,15 +99,14 @@ export class SyntaxStyler {
92
99
  * - Custom grammar: `stylize(code, 'ts', customGrammar)` - uses custom grammar but keeps 'ts' label
93
100
  * - Extended grammar: `stylize(code, 'custom', this.extend_grammar('ts', extension))` - new language variant
94
101
  *
95
- * @param text - The source code to syntax highlight.
96
- * @param lang - Language identifier (e.g., 'ts', 'css', 'html'). Used for:
97
- * - Grammar lookup when `grammar` is undefined
98
- * - Hook context (`lang` field passed to hooks)
99
- * - Language identification in output
100
- * @param grammar - Optional custom grammar object. When undefined, automatically
101
- * looks up the grammar via `this.get_lang(lang)`. Provide this to use a custom
102
- * or modified grammar instead of the registered one.
103
- *
102
+ * @param text - the source code to syntax highlight
103
+ * @param lang - language identifier (e.g., 'ts', 'css', 'html'), used for:
104
+ * - grammar lookup when `grammar` is undefined
105
+ * - hook context (`lang` field passed to hooks)
106
+ * - language identification in output
107
+ * @param grammar - optional custom `SyntaxGrammar` object; when undefined, automatically
108
+ * looks up the grammar via `this.get_lang(lang)`; provide this to use a custom
109
+ * or modified grammar instead of the registered one
104
110
  * @returns HTML string with syntax highlighting using CSS classes (`.token_*`)
105
111
  *
106
112
  * @example
@@ -128,7 +134,42 @@ export class SyntaxStyler {
128
134
  lang: string,
129
135
  grammar: SyntaxGrammar | undefined = this.get_lang(lang),
130
136
  ): string {
131
- var ctx: HookBeforeTokenizeCallbackContext = {
137
+ // stringify with the post-hook `lang`, which a `before_tokenize` hook may
138
+ // have rewritten (it flows into each token's `wrap` hook context)
139
+ const c = this.#tokenize_hooked(text, lang, grammar);
140
+ return this.stringify_token(c.tokens, c.lang);
141
+ }
142
+
143
+ /**
144
+ * Tokenizes `text` into a `SyntaxTokenStream`, running the `before_tokenize`
145
+ * and `after_tokenize` hooks. This is the tokenization half of `stylize` — use
146
+ * it when you need the token stream itself (e.g. CSS Custom Highlight API range
147
+ * highlighting) rather than HTML.
148
+ *
149
+ * @param text - source to tokenize
150
+ * @param lang - language identifier; passed to the tokenize hooks
151
+ * @param grammar - grammar to tokenize with; defaults to `this.get_lang(lang)`
152
+ * @returns the resulting token stream
153
+ */
154
+ tokenize(
155
+ text: string,
156
+ lang: string,
157
+ grammar: SyntaxGrammar | undefined = this.get_lang(lang),
158
+ ): SyntaxTokenStream {
159
+ return this.#tokenize_hooked(text, lang, grammar).tokens;
160
+ }
161
+
162
+ /**
163
+ * Runs `before_tokenize` → `tokenize_syntax` → `after_tokenize`, returning the
164
+ * resolved context. Shared by `stylize` (which also needs the post-hook `lang`)
165
+ * and `tokenize` (which only needs `tokens`).
166
+ */
167
+ #tokenize_hooked(
168
+ text: string,
169
+ lang: string,
170
+ grammar: SyntaxGrammar,
171
+ ): HookAfterTokenizeCallbackContext {
172
+ const ctx: HookBeforeTokenizeCallbackContext = {
132
173
  code: text,
133
174
  grammar,
134
175
  lang,
@@ -138,7 +179,7 @@ export class SyntaxStyler {
138
179
  const c = ctx as any as HookAfterTokenizeCallbackContext;
139
180
  c.tokens = tokenize_syntax(c.code, c.grammar);
140
181
  this.run_hook_after_tokenize(c);
141
- return this.stringify_token(c.tokens, c.lang);
182
+ return c;
142
183
  }
143
184
 
144
185
  /**
@@ -158,7 +199,7 @@ export class SyntaxStyler {
158
199
  * };
159
200
  * ```
160
201
  *
161
- * then the `style` token will be added (and processed) at the end. `insert_before` allows you to insert tokens
202
+ * then the `style` token will be added (and processed) at the end. `grammar_insert_before` allows you to insert tokens
162
203
  * before existing tokens. For the CSS example above, you would use it like this:
163
204
  *
164
205
  * ```js
@@ -185,12 +226,12 @@ export class SyntaxStyler {
185
226
  *
186
227
  * ## Limitations
187
228
  *
188
- * The main problem `insert_before` has to solve is iteration order. Since ES2015, the iteration order for object
229
+ * The main problem `grammar_insert_before` has to solve is iteration order. Since ES2015, the iteration order for object
189
230
  * properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave
190
- * differently when keys are deleted and re-inserted. So `insert_before` can't be implemented by temporarily
231
+ * differently when keys are deleted and re-inserted. So `grammar_insert_before` can't be implemented by temporarily
191
232
  * deleting properties which is necessary to insert at arbitrary positions.
192
233
  *
193
- * To solve this problem, `insert_before` doesn't actually insert the given tokens into the target object.
234
+ * To solve this problem, `grammar_insert_before` doesn't actually insert the given tokens into the target object.
194
235
  * Instead, it will create a new object and replace all references to the target object with the new one. This
195
236
  * can be done without temporarily deleting properties, so the iteration order is well-defined.
196
237
  *
@@ -205,16 +246,13 @@ export class SyntaxStyler {
205
246
  * assert(newMarkup === syntax_styler.get_lang('markup'));
206
247
  * ```
207
248
  *
208
- * @param inside - The property of `root` (e.g. a language id in `syntax_styler.langs`) that contains the
209
- * object to be modified.
210
- * @param before - The key to insert before.
211
- * @param insert - An object containing the key-value pairs to be inserted.
212
- * @param root - The object containing `inside`, i.e. the object that contains the
213
- * object to be modified.
214
- *
215
- * Defaults to `syntax_styler.langs`.
216
- *
217
- * @returns the new grammar object
249
+ * @param inside - the property of `root` (e.g. a language id in `syntax_styler.langs`) that contains the
250
+ * object to be modified
251
+ * @param before - the key to insert before
252
+ * @param insert - an object containing the key-value pairs to be inserted
253
+ * @param root - the object containing `inside`, i.e. the object that contains the
254
+ * object to be modified; defaults to `syntax_styler.langs`
255
+ * @returns the new `SyntaxGrammar` object
218
256
  */
219
257
  grammar_insert_before(
220
258
  inside: string,
@@ -261,16 +299,15 @@ export class SyntaxStyler {
261
299
  *
262
300
  * Runs the `wrap` hook on each `SyntaxToken`.
263
301
  *
264
- * @param o - The token or token stream to be converted.
265
- * @param lang - The name of current language.
266
- * @returns The HTML representation of the token or token stream.
302
+ * @param o - the token or `SyntaxTokenStream` to be converted
303
+ * @param lang - the name of current language
304
+ * @returns HTML representation of the token or token stream
267
305
  */
268
306
  stringify_token(o: string | SyntaxToken | SyntaxTokenStream, lang: string): string {
269
307
  if (typeof o === 'string') {
270
- return o
271
- .replace(/&/g, '&amp;')
272
- .replace(/</g, '&lt;')
273
- .replace(/\u00a0/g, ' ');
308
+ // single pass over the leaf text (only `&` and `<` need escaping in text
309
+ // content; `\u00a0` is normalized to a regular space)
310
+ return o.replace(/[&<\u00a0]/g, escape_text_char);
274
311
  }
275
312
  if (Array.isArray(o)) {
276
313
  var s = '';
@@ -280,21 +317,29 @@ export class SyntaxStyler {
280
317
  return s;
281
318
  }
282
319
 
320
+ var content = this.stringify_token(o.content, lang);
321
+
322
+ // build the class list once; aliases are always an array after normalization
323
+ var classes = `token_${o.type}`;
324
+ for (const a of o.alias) {
325
+ classes += ` token_${a}`;
326
+ }
327
+
328
+ // fast path: with no `wrap` hooks the tag is always a plain <span> with no
329
+ // attributes, so skip the per-token context object and hook dispatch
330
+ if (this.hooks_wrap.length === 0) {
331
+ return '<span class="' + classes + '">' + content + '</span>';
332
+ }
333
+
283
334
  var ctx: HookWrapCallbackContext = {
284
335
  type: o.type,
285
- content: this.stringify_token(o.content, lang),
336
+ content,
286
337
  tag: 'span',
287
- classes: [`token_${o.type}`],
338
+ classes: classes.split(' '),
288
339
  attributes: {},
289
340
  lang,
290
341
  };
291
342
 
292
- var aliases = o.alias;
293
- // alias is always an array after normalization
294
- for (const a of aliases) {
295
- ctx.classes.push(`token_${a}`);
296
- }
297
-
298
343
  this.run_hook_wrap(ctx);
299
344
 
300
345
  var attributes = '';
@@ -332,9 +377,9 @@ export class SyntaxStyler {
332
377
  * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
333
378
  * Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
334
379
  *
335
- * @param base_id - The id of the language to extend. This has to be a key in `syntax_styler.langs`.
336
- * @param extension - The new tokens to append.
337
- * @returns the new grammar
380
+ * @param base_id - the id of the language to extend, must be a key in `syntax_styler.langs`
381
+ * @param extension - the new tokens to append
382
+ * @returns the new `SyntaxGrammar`
338
383
  */
339
384
  extend_grammar(base_id: string, extension: SyntaxGrammarRaw): SyntaxGrammar {
340
385
  // Merge normalized base with un-normalized extension
@@ -346,7 +391,7 @@ export class SyntaxStyler {
346
391
  }
347
392
 
348
393
  /**
349
- * Normalize a single pattern to have consistent shape.
394
+ * Normalizes a single pattern to have consistent shape.
350
395
  * This ensures all patterns have the same object shape for V8 optimization.
351
396
  */
352
397
  #normalize_pattern(
@@ -387,15 +432,15 @@ export class SyntaxStyler {
387
432
  }
388
433
 
389
434
  /**
390
- * Normalize a grammar to have consistent object shapes.
435
+ * Normalizes a grammar to have consistent object shapes.
391
436
  * This performs several optimizations:
392
- * 1. Merges `rest` property into main grammar
393
- * 2. Ensures all pattern values are arrays
394
- * 3. Normalizes all pattern objects to have consistent shapes
395
- * 4. Adds global flag to greedy patterns
437
+ * 1. Merges `rest` property into main grammar.
438
+ * 2. Ensures all pattern values are arrays.
439
+ * 3. Normalizes all pattern objects to have consistent shapes.
440
+ * 4. Adds global flag to greedy patterns.
396
441
  *
397
442
  * This is called once at registration time to avoid runtime overhead.
398
- * @param visited - Set of grammar object IDs already normalized (for circular references)
443
+ * @param visited - set of grammar object IDs already normalized (for circular references)
399
444
  */
400
445
  #normalize_grammar(grammar: SyntaxGrammarRaw, visited: Set<number>): void {
401
446
  // Check if we've already normalized this grammar (circular reference)
@@ -510,14 +555,14 @@ export interface SyntaxGrammarTokenRaw {
510
555
  */
511
556
  alias?: string | Array<string>;
512
557
  /**
513
- * The nested grammar of this token.
558
+ * The nested `SyntaxGrammarRaw` of this token.
514
559
  */
515
560
  inside?: SyntaxGrammarRaw | null;
516
561
  }
517
562
 
518
563
  /**
519
564
  * Grammar token with all properties required.
520
- * This is the normalized representation used at runtime.
565
+ * This is the normalized representation of `SyntaxGrammarTokenRaw` used at runtime.
521
566
  */
522
567
  export interface SyntaxGrammarToken {
523
568
  pattern: RegExp;
@@ -529,7 +574,7 @@ export interface SyntaxGrammarToken {
529
574
 
530
575
  /**
531
576
  * A grammar after normalization.
532
- * All values are arrays of normalized tokens with consistent shapes.
577
+ * All values are arrays of normalized `SyntaxGrammarToken` with consistent shapes.
533
578
  */
534
579
  export type SyntaxGrammar = Record<string, Array<SyntaxGrammarToken>>;
535
580
 
@@ -2,14 +2,14 @@ export class SyntaxToken {
2
2
  /**
3
3
  * The type of the token.
4
4
  *
5
- * This is usually the key of a pattern in a `Grammar`.
5
+ * This is usually the key of a pattern in a `SyntaxGrammar`.
6
6
  */
7
7
  type: string;
8
8
 
9
9
  /**
10
10
  * The strings or tokens contained by this token.
11
11
  *
12
- * This will be a token stream if the pattern matched also defined an `inside` grammar.
12
+ * This will be a `SyntaxTokenStream` if the pattern matched also defined an `inside` grammar.
13
13
  */
14
14
  content: string | SyntaxTokenStream;
15
15
 
@@ -38,8 +38,8 @@ export class SyntaxToken {
38
38
  /**
39
39
  * A token stream is an array of strings and `SyntaxToken` objects.
40
40
  *
41
- * Syntax token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process
42
- * them.
41
+ * `SyntaxTokenStream` values have to fulfill a few properties that are assumed by most functions
42
+ * (mostly internal ones) that process them.
43
43
  *
44
44
  * 1. No adjacent strings.
45
45
  * 2. No empty strings.
@@ -10,11 +10,9 @@ import {SyntaxToken, type SyntaxTokenStream} from './syntax_token.js';
10
10
  * This method could be useful in other contexts as well, as a very crude parser.
11
11
  *
12
12
  * @param text - a string with the code to be styled
13
- * @param grammar - an object containing the tokens to use
14
- *
13
+ * @param grammar - a `SyntaxGrammar` object containing the tokens to use.
15
14
  * Usually a language definition like `syntax_styler.get_lang('markup')`.
16
- *
17
- * @returns an array of strings and tokens, a token stream
15
+ * @returns a `SyntaxTokenStream` array of strings and tokens
18
16
  *
19
17
  * @example
20
18
  * ```ts