@fuzdev/fuz_code 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/LICENSE +25 -0
  2. package/README.md +185 -0
  3. package/dist/Code.svelte +146 -0
  4. package/dist/Code.svelte.d.ts +79 -0
  5. package/dist/Code.svelte.d.ts.map +1 -0
  6. package/dist/CodeHighlight.svelte +205 -0
  7. package/dist/CodeHighlight.svelte.d.ts +101 -0
  8. package/dist/CodeHighlight.svelte.d.ts.map +1 -0
  9. package/dist/code_sample.d.ts +8 -0
  10. package/dist/code_sample.d.ts.map +1 -0
  11. package/dist/code_sample.js +2 -0
  12. package/dist/grammar_clike.d.ts +12 -0
  13. package/dist/grammar_clike.d.ts.map +1 -0
  14. package/dist/grammar_clike.js +43 -0
  15. package/dist/grammar_css.d.ts +11 -0
  16. package/dist/grammar_css.d.ts.map +1 -0
  17. package/dist/grammar_css.js +70 -0
  18. package/dist/grammar_js.d.ts +11 -0
  19. package/dist/grammar_js.d.ts.map +1 -0
  20. package/dist/grammar_js.js +180 -0
  21. package/dist/grammar_json.d.ts +11 -0
  22. package/dist/grammar_json.d.ts.map +1 -0
  23. package/dist/grammar_json.js +35 -0
  24. package/dist/grammar_markdown.d.ts +8 -0
  25. package/dist/grammar_markdown.d.ts.map +1 -0
  26. package/dist/grammar_markdown.js +228 -0
  27. package/dist/grammar_markup.d.ts +31 -0
  28. package/dist/grammar_markup.d.ts.map +1 -0
  29. package/dist/grammar_markup.js +192 -0
  30. package/dist/grammar_svelte.d.ts +12 -0
  31. package/dist/grammar_svelte.d.ts.map +1 -0
  32. package/dist/grammar_svelte.js +150 -0
  33. package/dist/grammar_ts.d.ts +11 -0
  34. package/dist/grammar_ts.d.ts.map +1 -0
  35. package/dist/grammar_ts.js +95 -0
  36. package/dist/highlight_manager.d.ts +25 -0
  37. package/dist/highlight_manager.d.ts.map +1 -0
  38. package/dist/highlight_manager.js +139 -0
  39. package/dist/highlight_priorities.d.ts +3 -0
  40. package/dist/highlight_priorities.d.ts.map +1 -0
  41. package/dist/highlight_priorities.gen.d.ts +4 -0
  42. package/dist/highlight_priorities.gen.d.ts.map +1 -0
  43. package/dist/highlight_priorities.gen.js +58 -0
  44. package/dist/highlight_priorities.js +55 -0
  45. package/dist/syntax_styler.d.ts +277 -0
  46. package/dist/syntax_styler.d.ts.map +1 -0
  47. package/dist/syntax_styler.js +426 -0
  48. package/dist/syntax_styler_global.d.ts +3 -0
  49. package/dist/syntax_styler_global.d.ts.map +1 -0
  50. package/dist/syntax_styler_global.js +18 -0
  51. package/dist/syntax_token.d.ts +34 -0
  52. package/dist/syntax_token.d.ts.map +1 -0
  53. package/dist/syntax_token.js +27 -0
  54. package/dist/theme.css +98 -0
  55. package/dist/theme_highlight.css +160 -0
  56. package/dist/theme_variables.css +20 -0
  57. package/dist/tokenize_syntax.d.ts +28 -0
  58. package/dist/tokenize_syntax.d.ts.map +1 -0
  59. package/dist/tokenize_syntax.js +194 -0
  60. package/package.json +117 -0
  61. package/src/lib/code_sample.ts +10 -0
  62. package/src/lib/grammar_clike.ts +48 -0
  63. package/src/lib/grammar_css.ts +84 -0
  64. package/src/lib/grammar_js.ts +215 -0
  65. package/src/lib/grammar_json.ts +38 -0
  66. package/src/lib/grammar_markdown.ts +289 -0
  67. package/src/lib/grammar_markup.ts +225 -0
  68. package/src/lib/grammar_svelte.ts +165 -0
  69. package/src/lib/grammar_ts.ts +114 -0
  70. package/src/lib/highlight_manager.ts +182 -0
  71. package/src/lib/highlight_priorities.gen.ts +71 -0
  72. package/src/lib/highlight_priorities.ts +110 -0
  73. package/src/lib/syntax_styler.ts +583 -0
  74. package/src/lib/syntax_styler_global.ts +20 -0
  75. package/src/lib/syntax_token.ts +49 -0
  76. package/src/lib/tokenize_syntax.ts +270 -0
@@ -0,0 +1,583 @@
1
+ import {SyntaxToken, type SyntaxTokenStream} from './syntax_token.js';
2
+ import {tokenize_syntax} from './tokenize_syntax.js';
3
+
4
+ export type AddSyntaxGrammar = (syntax_styler: SyntaxStyler) => void;
5
+
6
+ /**
7
+ * Based on Prism (https://github.com/PrismJS/prism)
8
+ * by Lea Verou (https://lea.verou.me/)
9
+ *
10
+ * MIT license
11
+ *
12
+ * @see LICENSE
13
+ */
14
+ export class SyntaxStyler {
15
+ langs: Record<string, SyntaxGrammar | undefined> = {
16
+ plaintext: {},
17
+ };
18
+
19
+ // constructor() {
20
+ // TODO this API? problem is the grammars rely on mutating existing grammars in the `syntax_styler`,
21
+ // so for now adding grammars will remain inherently stateful
22
+ // export interface SyntaxStylerOptions {
23
+ // grammars?: AddGrammar[];
24
+ // }
25
+ // options: SyntaxStylerOptions = {}
26
+ // const {grammars} = options;
27
+ // if (grammars) {
28
+ // for (const add_grammar of grammars) {
29
+ // this.langs[id] =
30
+ // add_grammar(this);
31
+ // }
32
+ // }
33
+ // }
34
+
35
+ add_lang(id: string, grammar: SyntaxGrammarRaw, aliases?: Array<string>): void {
36
+ // Normalize grammar once at registration for optimal runtime performance
37
+ // Use a visited set to handle circular references
38
+ this.normalize_grammar(grammar, new Set());
39
+ // After normalization, grammar has the shape of SyntaxGrammar
40
+ const normalized = grammar as unknown as SyntaxGrammar;
41
+ this.langs[id] = normalized;
42
+ if (aliases !== undefined) {
43
+ for (var alias of aliases) {
44
+ this.langs[alias] = normalized;
45
+ }
46
+ }
47
+ }
48
+
49
+ add_extended_lang(
50
+ base_id: string,
51
+ extension_id: string,
52
+ extension: SyntaxGrammarRaw,
53
+ aliases?: Array<string>,
54
+ ): SyntaxGrammar {
55
+ // extend_grammar returns already normalized grammar
56
+ var grammar = this.extend_grammar(base_id, extension);
57
+ // Store the normalized grammar directly
58
+ this.langs[extension_id] = grammar;
59
+ if (aliases !== undefined) {
60
+ for (var alias of aliases) {
61
+ this.langs[alias] = grammar;
62
+ }
63
+ }
64
+ return grammar;
65
+ }
66
+
67
+ get_lang(id: string): SyntaxGrammar {
68
+ var lang = this.langs[id];
69
+ if (lang === undefined) {
70
+ throw Error(`The language "${id}" has no grammar.`);
71
+ }
72
+ return lang;
73
+ }
74
+
75
+ /**
76
+ * Generates HTML with syntax highlighting from source code.
77
+ *
78
+ * **Process:**
79
+ * 1. Runs `before_tokenize` hook
80
+ * 2. Tokenizes code using the provided or looked-up grammar
81
+ * 3. Runs `after_tokenize` hook
82
+ * 4. Runs `wrap` hook on each token
83
+ * 5. Converts tokens to HTML with CSS classes
84
+ *
85
+ * **Parameter Relationship:**
86
+ * - `lang` is ALWAYS required for hook context and identification
87
+ * - `grammar` is optional; when undefined, automatically looks up via `this.get_lang(lang)`
88
+ * - When both are provided, `grammar` is used for tokenization, `lang` for metadata
89
+ *
90
+ * **Use cases:**
91
+ * - Standard usage: `stylize(code, 'ts')` - uses registered TypeScript grammar
92
+ * - Custom grammar: `stylize(code, 'ts', customGrammar)` - uses custom grammar but keeps 'ts' label
93
+ * - Extended grammar: `stylize(code, 'custom', this.extend_grammar('ts', extension))` - new language variant
94
+ *
95
+ * @param text - The source code to syntax highlight.
96
+ * @param lang - Language identifier (e.g., 'ts', 'css', 'html'). Used for:
97
+ * - Grammar lookup when `grammar` is undefined
98
+ * - Hook context (`lang` field passed to hooks)
99
+ * - Language identification in output
100
+ * @param grammar - Optional custom grammar object. When undefined, automatically
101
+ * looks up the grammar via `this.get_lang(lang)`. Provide this to use a custom
102
+ * or modified grammar instead of the registered one.
103
+ *
104
+ * @returns HTML string with syntax highlighting using CSS classes (`.token_*`)
105
+ *
106
+ * @example
107
+ * // Standard usage - uses registered grammar
108
+ * stylize('var foo = true;', 'ts');
109
+ *
110
+ * @example
111
+ * // Custom grammar - overrides registered grammar
112
+ * const customGrammar = { keyword: [...], string: [...] };
113
+ * stylize('var foo = false;', 'ts', customGrammar);
114
+ *
115
+ * @example
116
+ * // Extended grammar - builds on existing grammar
117
+ * const extended = this.extend_grammar('ts', { customToken: [...] });
118
+ * stylize('var foo = 42;', 'ts-extended', extended);
119
+ */
120
+ stylize(
121
+ text: string,
122
+ lang: string,
123
+ grammar: SyntaxGrammar | undefined = this.get_lang(lang),
124
+ ): string {
125
+ var ctx: HookBeforeTokenizeCallbackContext = {
126
+ code: text,
127
+ grammar,
128
+ lang,
129
+ tokens: undefined,
130
+ };
131
+ this.run_hook_before_tokenize(ctx);
132
+ const c = ctx as any as HookAfterTokenizeCallbackContext;
133
+ c.tokens = tokenize_syntax(c.code, c.grammar);
134
+ this.run_hook_after_tokenize(c);
135
+ return this.stringify_token(c.tokens, c.lang);
136
+ }
137
+
138
+ /**
139
+ * Inserts tokens _before_ another token in a language definition or any other grammar.
140
+ *
141
+ * ## Usage
142
+ *
143
+ * This helper method makes it easy to modify existing languages. For example, the CSS language definition
144
+ * not only defines CSS styling for CSS documents, but also needs to define styling for CSS embedded
145
+ * in HTML through `<style>` elements. To do this, it needs to modify `syntax_styler.get_lang('markup')` and add the
146
+ * appropriate tokens. However, `syntax_styler.get_lang('markup')` is a regular JS object literal, so if you do
147
+ * this:
148
+ *
149
+ * ```js
150
+ * syntax_styler.get_lang('markup').style = {
151
+ * // token
152
+ * };
153
+ * ```
154
+ *
155
+ * then the `style` token will be added (and processed) at the end. `insert_before` allows you to insert tokens
156
+ * before existing tokens. For the CSS example above, you would use it like this:
157
+ *
158
+ * ```js
159
+ * grammar_insert_before('markup', 'cdata', {
160
+ * 'style': {
161
+ * // token
162
+ * }
163
+ * });
164
+ * ```
165
+ *
166
+ * ## Special cases
167
+ *
168
+ * If the grammars of `inside` and `insert` have tokens with the same name, the tokens in `inside`'s grammar
169
+ * will be ignored.
170
+ *
171
+ * This behavior can be used to insert tokens after `before`:
172
+ *
173
+ * ```js
174
+ * grammar_insert_before('markup', 'comment', {
175
+ * 'comment': syntax_styler.get_lang('markup').comment,
176
+ * // tokens after 'comment'
177
+ * });
178
+ * ```
179
+ *
180
+ * ## Limitations
181
+ *
182
+ * The main problem `insert_before` has to solve is iteration order. Since ES2015, the iteration order for object
183
+ * properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave
184
+ * differently when keys are deleted and re-inserted. So `insert_before` can't be implemented by temporarily
185
+ * deleting properties which is necessary to insert at arbitrary positions.
186
+ *
187
+ * To solve this problem, `insert_before` doesn't actually insert the given tokens into the target object.
188
+ * Instead, it will create a new object and replace all references to the target object with the new one. This
189
+ * can be done without temporarily deleting properties, so the iteration order is well-defined.
190
+ *
191
+ * However, only references that can be reached from `syntax_styler.langs` or `insert` will be replaced. I.e. if
192
+ * you hold the target object in a variable, then the value of the variable will not change.
193
+ *
194
+ * ```js
195
+ * var oldMarkup = syntax_styler.get_lang('markup');
196
+ * var newMarkup = grammar_insert_before('markup', 'comment', { ... });
197
+ *
198
+ * assert(oldMarkup !== syntax_styler.get_lang('markup'));
199
+ * assert(newMarkup === syntax_styler.get_lang('markup'));
200
+ * ```
201
+ *
202
+ * @param inside - The property of `root` (e.g. a language id in `syntax_styler.langs`) that contains the
203
+ * object to be modified.
204
+ * @param before - The key to insert before.
205
+ * @param insert - An object containing the key-value pairs to be inserted.
206
+ * @param root - The object containing `inside`, i.e. the object that contains the
207
+ * object to be modified.
208
+ *
209
+ * Defaults to `syntax_styler.langs`.
210
+ *
211
+ * @returns the new grammar object
212
+ */
213
+ grammar_insert_before(
214
+ inside: string,
215
+ before: string,
216
+ insert: SyntaxGrammarRaw,
217
+ root: Record<string, any> = this.langs,
218
+ ): SyntaxGrammar {
219
+ var grammar = root[inside];
220
+ var updated: SyntaxGrammarRaw = {};
221
+
222
+ for (var token in grammar) {
223
+ if (token === before) {
224
+ for (var new_token in insert) {
225
+ updated[new_token] = insert[new_token];
226
+ }
227
+ }
228
+
229
+ // Do not insert tokens which also occur in insert.
230
+ if (!Object.hasOwn(insert, token)) {
231
+ updated[token] = grammar[token];
232
+ }
233
+ }
234
+
235
+ // Normalize the updated grammar to ensure inserted patterns have consistent shape
236
+ this.normalize_grammar(updated, new Set());
237
+
238
+ // After normalization, cast to SyntaxGrammar
239
+ const normalized = updated as unknown as SyntaxGrammar;
240
+ var old = root[inside];
241
+ root[inside] = normalized;
242
+
243
+ // Update references in other language definitions
244
+ depth_first_search(this.langs, (o, key, value) => {
245
+ if (value === old && key !== inside) {
246
+ o[key] = normalized;
247
+ }
248
+ });
249
+
250
+ return normalized;
251
+ }
252
+
253
+ /**
254
+ * Converts the given token or token stream to an HTML representation.
255
+ *
256
+ * Runs the `wrap` hook on each `SyntaxToken`.
257
+ *
258
+ * @param o - The token or token stream to be converted.
259
+ * @param lang - The name of current language.
260
+ * @returns The HTML representation of the token or token stream.
261
+ */
262
+ stringify_token(o: string | SyntaxToken | SyntaxTokenStream, lang: string): string {
263
+ if (typeof o === 'string') {
264
+ return o
265
+ .replace(/&/g, '&amp;')
266
+ .replace(/</g, '&lt;')
267
+ .replace(/\u00a0/g, ' ');
268
+ }
269
+ if (Array.isArray(o)) {
270
+ var s = '';
271
+ for (var e of o) {
272
+ s += this.stringify_token(e, lang);
273
+ }
274
+ return s;
275
+ }
276
+
277
+ var ctx: HookWrapCallbackContext = {
278
+ type: o.type,
279
+ content: this.stringify_token(o.content, lang),
280
+ tag: 'span',
281
+ classes: [`token_${o.type}`],
282
+ attributes: {},
283
+ lang,
284
+ };
285
+
286
+ var aliases = o.alias;
287
+ // alias is always an array after normalization
288
+ for (const a of aliases) {
289
+ ctx.classes.push(`token_${a}`);
290
+ }
291
+
292
+ this.run_hook_wrap(ctx);
293
+
294
+ var attributes = '';
295
+ for (var name in ctx.attributes) {
296
+ attributes += ' ' + name + '="' + (ctx.attributes[name] || '').replace(/"/g, '&quot;') + '"';
297
+ }
298
+
299
+ return (
300
+ '<' +
301
+ ctx.tag +
302
+ ' class="' +
303
+ ctx.classes.join(' ') +
304
+ '"' +
305
+ attributes +
306
+ '>' +
307
+ ctx.content +
308
+ '</' +
309
+ ctx.tag +
310
+ '>'
311
+ );
312
+ }
313
+
314
+ /**
315
+ * Creates a deep copy of the language with the given id and appends the given tokens.
316
+ *
317
+ * If a token in `extension` also appears in the copied language, then the existing token in the copied language
318
+ * will be overwritten at its original position.
319
+ *
320
+ * ## Best practices
321
+ *
322
+ * Since the position of overwriting tokens (token in `extension` that overwrite tokens in the copied language)
323
+ * doesn't matter, they can technically be in any order. However, this can be confusing to others that trying to
324
+ * understand the language definition because, normally, the order of tokens matters in the grammars.
325
+ *
326
+ * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
327
+ * Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
328
+ *
329
+ * @param base_id - The id of the language to extend. This has to be a key in `syntax_styler.langs`.
330
+ * @param extension - The new tokens to append.
331
+ * @returns the new grammar
332
+ */
333
+ extend_grammar(base_id: string, extension: SyntaxGrammarRaw): SyntaxGrammar {
334
+ // Merge normalized base with un-normalized extension
335
+ const extended = {...structuredClone(this.get_lang(base_id)), ...extension};
336
+ // Normalize the extension parts
337
+ this.normalize_grammar(extended as SyntaxGrammarRaw, new Set());
338
+ // Return as SyntaxGrammar
339
+ return extended as unknown as SyntaxGrammar;
340
+ }
341
+
342
+ /**
343
+ * Normalize a single pattern to have consistent shape.
344
+ * This ensures all patterns have the same object shape for V8 optimization.
345
+ */
346
+ private normalize_pattern(
347
+ pattern: RegExp | SyntaxGrammarTokenRaw,
348
+ visited: Set<number>,
349
+ ): SyntaxGrammarToken {
350
+ const p = pattern instanceof RegExp ? {pattern} : pattern;
351
+
352
+ let regex = p.pattern;
353
+
354
+ // Add global flag if greedy and not already present
355
+ if ((p.greedy ?? false) && !regex.global) {
356
+ const flags = regex.flags;
357
+ regex = new RegExp(regex.source, flags.includes('g') ? flags : flags + 'g');
358
+ }
359
+
360
+ // Normalize alias to always be an array
361
+ let normalized_alias: Array<string> = [];
362
+ if (p.alias) {
363
+ normalized_alias = Array.isArray(p.alias) ? p.alias : [p.alias];
364
+ }
365
+
366
+ // Recursively normalize the inside grammar if present
367
+ let normalized_inside: SyntaxGrammar | null = null;
368
+ if (p.inside) {
369
+ this.normalize_grammar(p.inside, visited);
370
+ // After normalization, cast to SyntaxGrammar
371
+ normalized_inside = p.inside as unknown as SyntaxGrammar;
372
+ }
373
+
374
+ return {
375
+ pattern: regex,
376
+ lookbehind: p.lookbehind ?? false,
377
+ greedy: p.greedy ?? false,
378
+ alias: normalized_alias,
379
+ inside: normalized_inside,
380
+ };
381
+ }
382
+
383
+ /**
384
+ * Normalize a grammar to have consistent object shapes.
385
+ * This performs several optimizations:
386
+ * 1. Merges `rest` property into main grammar
387
+ * 2. Ensures all pattern values are arrays
388
+ * 3. Normalizes all pattern objects to have consistent shapes
389
+ * 4. Adds global flag to greedy patterns
390
+ *
391
+ * This is called once at registration time to avoid runtime overhead.
392
+ * @param visited - Set of grammar object IDs already normalized (for circular references)
393
+ */
394
+ private normalize_grammar(grammar: SyntaxGrammarRaw, visited: Set<number>): void {
395
+ // Check if we've already normalized this grammar (circular reference)
396
+ const grammar_id = id_of(grammar);
397
+ if (visited.has(grammar_id)) {
398
+ return;
399
+ }
400
+ visited.add(grammar_id);
401
+
402
+ // Step 1: Merge rest into grammar first
403
+ if (grammar.rest) {
404
+ for (const token in grammar.rest) {
405
+ if (!grammar[token]) {
406
+ // Don't overwrite existing tokens
407
+ grammar[token] = grammar.rest[token];
408
+ }
409
+ }
410
+ delete grammar.rest;
411
+ }
412
+
413
+ // Step 2: Normalize all patterns
414
+ for (const key in grammar) {
415
+ if (key === 'rest') continue;
416
+
417
+ const value = grammar[key];
418
+ if (!value) {
419
+ grammar[key] = [];
420
+ continue;
421
+ }
422
+
423
+ // Always store as array of normalized patterns
424
+ const patterns = Array.isArray(value) ? value : [value];
425
+ grammar[key] = patterns.map((p) => this.normalize_pattern(p, visited));
426
+ }
427
+ }
428
+
429
+ // TODO add some builtins
430
+ plugins: Record<string, any> = {};
431
+
432
+ // TODO maybe extend/compose an event listener?
433
+ hooks_before_tokenize: Array<HookBeforeTokenizeCallback> = [];
434
+ hooks_after_tokenize: Array<HookAfterTokenizeCallback> = [];
435
+ hooks_wrap: Array<HookWrapCallback> = [];
436
+
437
+ add_hook_before_tokenize(cb: HookBeforeTokenizeCallback): void {
438
+ this.hooks_before_tokenize.push(cb);
439
+ }
440
+ add_hook_after_tokenize(cb: HookAfterTokenizeCallback): void {
441
+ this.hooks_after_tokenize.push(cb);
442
+ }
443
+ add_hook_wrap(cb: HookWrapCallback): void {
444
+ this.hooks_wrap.push(cb);
445
+ }
446
+
447
+ run_hook_before_tokenize(ctx: HookBeforeTokenizeCallbackContext): void {
448
+ for (var cb of this.hooks_before_tokenize) {
449
+ cb(ctx);
450
+ }
451
+ }
452
+ run_hook_after_tokenize(ctx: HookAfterTokenizeCallbackContext): void {
453
+ for (var cb of this.hooks_after_tokenize) {
454
+ cb(ctx);
455
+ }
456
+ }
457
+ run_hook_wrap(ctx: HookWrapCallbackContext): void {
458
+ for (var cb of this.hooks_wrap) {
459
+ cb(ctx);
460
+ }
461
+ }
462
+ }
463
+
464
+ export type SyntaxGrammarValueRaw =
465
+ | RegExp
466
+ | SyntaxGrammarTokenRaw
467
+ | Array<RegExp | SyntaxGrammarTokenRaw>;
468
+
469
+ export type SyntaxGrammarRaw = Record<string, SyntaxGrammarValueRaw | undefined> & {
470
+ rest?: SyntaxGrammarRaw | undefined;
471
+ };
472
+
473
+ /**
474
+ * The expansion of a simple `RegExp` literal to support additional properties.
475
+ *
476
+ * The `inside` grammar will be used to tokenize the text value of each token of this kind.
477
+ *
478
+ * This can be used to make nested and even recursive language definitions.
479
+ *
480
+ * Note: This can cause infinite recursion. Be careful when you embed different languages or even the same language into
481
+ * each another.
482
+ *
483
+ * Note: Grammar authors can use optional properties, but they will be normalized
484
+ * to required properties at registration time for optimal performance.
485
+ */
486
+ export interface SyntaxGrammarTokenRaw {
487
+ /**
488
+ * The regular expression of the token.
489
+ */
490
+ pattern: RegExp;
491
+ /**
492
+ * If `true`, then the first capturing group of `pattern` will (effectively)
493
+ * behave as a lookbehind group meaning that the captured text will not be part of the matched text of the new token.
494
+ * @default false
495
+ */
496
+ lookbehind?: boolean;
497
+ /**
498
+ * Whether the token is greedy.
499
+ * @default false
500
+ */
501
+ greedy?: boolean;
502
+ /**
503
+ * An optional alias or list of aliases.
504
+ */
505
+ alias?: string | Array<string>;
506
+ /**
507
+ * The nested grammar of this token.
508
+ */
509
+ inside?: SyntaxGrammarRaw | null;
510
+ }
511
+
512
+ /**
513
+ * Grammar token with all properties required.
514
+ * This is the normalized representation used at runtime.
515
+ */
516
+ export interface SyntaxGrammarToken {
517
+ pattern: RegExp;
518
+ lookbehind: boolean;
519
+ greedy: boolean;
520
+ alias: Array<string>;
521
+ inside: SyntaxGrammar | null;
522
+ }
523
+
524
+ /**
525
+ * A grammar after normalization.
526
+ * All values are arrays of normalized tokens with consistent shapes.
527
+ */
528
+ export type SyntaxGrammar = Record<string, Array<SyntaxGrammarToken>>;
529
+
530
+ const depth_first_search = (
531
+ o: any,
532
+ cb: (obj: any, key: string, value: any) => void,
533
+ visited: Set<number> = new Set(),
534
+ ): void => {
535
+ for (var key in o) {
536
+ cb(o, key, o[key]);
537
+
538
+ var property = o[key];
539
+
540
+ if (
541
+ property &&
542
+ typeof property === 'object' &&
543
+ !(property instanceof RegExp) &&
544
+ !visited.has(id_of(property))
545
+ ) {
546
+ visited.add(id_of(property));
547
+ depth_first_search(property, cb, visited);
548
+ }
549
+ }
550
+ };
551
+
552
+ export type HookBeforeTokenizeCallback = (ctx: HookBeforeTokenizeCallbackContext) => void;
553
+ export type HookAfterTokenizeCallback = (ctx: HookAfterTokenizeCallbackContext) => void;
554
+ export type HookWrapCallback = (ctx: HookWrapCallbackContext) => void;
555
+
556
+ export interface HookBeforeTokenizeCallbackContext {
557
+ code: string;
558
+ grammar: SyntaxGrammar;
559
+ lang: string;
560
+ tokens: undefined;
561
+ }
562
+ export interface HookAfterTokenizeCallbackContext {
563
+ code: string;
564
+ grammar: SyntaxGrammar;
565
+ lang: string;
566
+ tokens: SyntaxTokenStream;
567
+ }
568
+ export interface HookWrapCallbackContext {
569
+ type: string;
570
+ content: string;
571
+ tag: string;
572
+ classes: Array<string>;
573
+ attributes: Record<string, string>;
574
+ lang: string;
575
+ }
576
+
577
+ var unique_id = 0;
578
+
579
+ /**
580
+ * Returns a unique number for the given object. Later calls will still return the same number.
581
+ */
582
+ const ID = Symbol('id');
583
+ const id_of = (obj: any): number => (obj[ID] ??= ++unique_id);
@@ -0,0 +1,20 @@
1
+ import {SyntaxStyler} from './syntax_styler.js';
2
+ import {add_grammar_markup} from './grammar_markup.js';
3
+ import {add_grammar_css} from './grammar_css.js';
4
+ import {add_grammar_clike} from './grammar_clike.js';
5
+ import {add_grammar_js} from './grammar_js.js';
6
+ import {add_grammar_ts} from './grammar_ts.js';
7
+ import {add_grammar_svelte} from './grammar_svelte.js';
8
+ import {add_grammar_json} from './grammar_json.js';
9
+ import {add_grammar_markdown} from './grammar_markdown.js';
10
+
11
+ export const syntax_styler_global = new SyntaxStyler();
12
+
13
+ add_grammar_markup(syntax_styler_global);
14
+ add_grammar_css(syntax_styler_global);
15
+ add_grammar_clike(syntax_styler_global);
16
+ add_grammar_js(syntax_styler_global);
17
+ add_grammar_ts(syntax_styler_global);
18
+ add_grammar_svelte(syntax_styler_global);
19
+ add_grammar_json(syntax_styler_global);
20
+ add_grammar_markdown(syntax_styler_global);
@@ -0,0 +1,49 @@
1
+ export class SyntaxToken {
2
+ /**
3
+ * The type of the token.
4
+ *
5
+ * This is usually the key of a pattern in a `Grammar`.
6
+ */
7
+ type: string;
8
+
9
+ /**
10
+ * The strings or tokens contained by this token.
11
+ *
12
+ * This will be a token stream if the pattern matched also defined an `inside` grammar.
13
+ */
14
+ content: string | SyntaxTokenStream;
15
+
16
+ /**
17
+ * The alias(es) of the token.
18
+ * Always an array, even if empty or single value.
19
+ */
20
+ alias: Array<string>;
21
+
22
+ length: number;
23
+
24
+ constructor(
25
+ type: string,
26
+ content: string | SyntaxTokenStream,
27
+ alias: string | Array<string> | undefined,
28
+ matched_str: string = '',
29
+ ) {
30
+ this.type = type;
31
+ this.content = content;
32
+ // Normalize alias to always be an array
33
+ this.alias = alias ? (Array.isArray(alias) ? alias : [alias]) : [];
34
+ this.length = matched_str.length;
35
+ }
36
+ }
37
+
38
+ /**
39
+ * A token stream is an array of strings and `SyntaxToken` objects.
40
+ *
41
+ * Syntax token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process
42
+ * them.
43
+ *
44
+ * 1. No adjacent strings.
45
+ * 2. No empty strings.
46
+ *
47
+ * The only exception here is the token stream that only contains the empty string and nothing else.
48
+ */
49
+ export type SyntaxTokenStream = Array<string | SyntaxToken>;