@fuzdev/fuz_code 0.45.1 → 0.46.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/Code.svelte +2 -2
- package/dist/Code.svelte.d.ts +2 -2
- package/dist/CodeHighlight.svelte +18 -54
- package/dist/CodeHighlight.svelte.d.ts +4 -4
- package/dist/CodeHighlight.svelte.d.ts.map +1 -1
- package/dist/CodeTextarea.svelte +149 -0
- package/dist/CodeTextarea.svelte.d.ts +43 -0
- package/dist/CodeTextarea.svelte.d.ts.map +1 -0
- package/dist/grammar_markdown.js +3 -3
- package/dist/grammar_markup.d.ts +8 -7
- package/dist/grammar_markup.d.ts.map +1 -1
- package/dist/grammar_markup.js +8 -7
- package/dist/highlight_manager.d.ts +21 -7
- package/dist/highlight_manager.d.ts.map +1 -1
- package/dist/highlight_manager.js +130 -74
- package/dist/range_highlighting.svelte.d.ts +39 -0
- package/dist/range_highlighting.svelte.d.ts.map +1 -0
- package/dist/range_highlighting.svelte.js +57 -0
- package/dist/svelte_preprocess_fuz_code.d.ts +4 -4
- package/dist/svelte_preprocess_fuz_code.d.ts.map +1 -1
- package/dist/svelte_preprocess_fuz_code.js +3 -3
- package/dist/syntax_styler.d.ts +40 -32
- package/dist/syntax_styler.d.ts.map +1 -1
- package/dist/syntax_styler.js +81 -49
- package/dist/syntax_token.d.ts +4 -4
- package/dist/syntax_token.js +2 -2
- package/dist/tokenize_syntax.d.ts +2 -4
- package/dist/tokenize_syntax.d.ts.map +1 -1
- package/dist/tokenize_syntax.js +2 -4
- package/package.json +27 -29
- package/src/lib/grammar_markdown.ts +3 -3
- package/src/lib/grammar_markup.ts +8 -7
- package/src/lib/highlight_manager.ts +154 -84
- package/src/lib/range_highlighting.svelte.ts +100 -0
- package/src/lib/svelte_preprocess_fuz_code.ts +6 -6
- package/src/lib/syntax_styler.ts +98 -53
- package/src/lib/syntax_token.ts +4 -4
- package/src/lib/tokenize_syntax.ts +2 -4
package/src/lib/syntax_styler.ts
CHANGED
|
@@ -3,6 +3,13 @@ import {tokenize_syntax} from './tokenize_syntax.js';
|
|
|
3
3
|
|
|
4
4
|
export type AddSyntaxGrammar = (syntax_styler: SyntaxStyler) => void;
|
|
5
5
|
|
|
6
|
+
/**
|
|
7
|
+
* Maps a matched `&`, `<`, or non-breaking space in text content to its
|
|
8
|
+
* HTML-safe form. Used as the replacer in `stringify_token` for leaf strings
|
|
9
|
+
* (non-breaking spaces are normalized to a regular space).
|
|
10
|
+
*/
|
|
11
|
+
const escape_text_char = (ch: string): string => (ch === '&' ? '&' : ch === '<' ? '<' : ' ');
|
|
12
|
+
|
|
6
13
|
/**
|
|
7
14
|
* Based on Prism (https://github.com/PrismJS/prism)
|
|
8
15
|
* by Lea Verou (https://lea.verou.me/)
|
|
@@ -92,15 +99,14 @@ export class SyntaxStyler {
|
|
|
92
99
|
* - Custom grammar: `stylize(code, 'ts', customGrammar)` - uses custom grammar but keeps 'ts' label
|
|
93
100
|
* - Extended grammar: `stylize(code, 'custom', this.extend_grammar('ts', extension))` - new language variant
|
|
94
101
|
*
|
|
95
|
-
* @param text -
|
|
96
|
-
* @param lang -
|
|
97
|
-
* -
|
|
98
|
-
* -
|
|
99
|
-
* -
|
|
100
|
-
* @param grammar -
|
|
101
|
-
* looks up the grammar via `this.get_lang(lang)
|
|
102
|
-
* or modified grammar instead of the registered one
|
|
103
|
-
*
|
|
102
|
+
* @param text - the source code to syntax highlight
|
|
103
|
+
* @param lang - language identifier (e.g., 'ts', 'css', 'html'), used for:
|
|
104
|
+
* - grammar lookup when `grammar` is undefined
|
|
105
|
+
* - hook context (`lang` field passed to hooks)
|
|
106
|
+
* - language identification in output
|
|
107
|
+
* @param grammar - optional custom `SyntaxGrammar` object; when undefined, automatically
|
|
108
|
+
* looks up the grammar via `this.get_lang(lang)`; provide this to use a custom
|
|
109
|
+
* or modified grammar instead of the registered one
|
|
104
110
|
* @returns HTML string with syntax highlighting using CSS classes (`.token_*`)
|
|
105
111
|
*
|
|
106
112
|
* @example
|
|
@@ -128,7 +134,42 @@ export class SyntaxStyler {
|
|
|
128
134
|
lang: string,
|
|
129
135
|
grammar: SyntaxGrammar | undefined = this.get_lang(lang),
|
|
130
136
|
): string {
|
|
131
|
-
|
|
137
|
+
// stringify with the post-hook `lang`, which a `before_tokenize` hook may
|
|
138
|
+
// have rewritten (it flows into each token's `wrap` hook context)
|
|
139
|
+
const c = this.#tokenize_hooked(text, lang, grammar);
|
|
140
|
+
return this.stringify_token(c.tokens, c.lang);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Tokenizes `text` into a `SyntaxTokenStream`, running the `before_tokenize`
|
|
145
|
+
* and `after_tokenize` hooks. This is the tokenization half of `stylize` — use
|
|
146
|
+
* it when you need the token stream itself (e.g. CSS Custom Highlight API range
|
|
147
|
+
* highlighting) rather than HTML.
|
|
148
|
+
*
|
|
149
|
+
* @param text - source to tokenize
|
|
150
|
+
* @param lang - language identifier; passed to the tokenize hooks
|
|
151
|
+
* @param grammar - grammar to tokenize with; defaults to `this.get_lang(lang)`
|
|
152
|
+
* @returns the resulting token stream
|
|
153
|
+
*/
|
|
154
|
+
tokenize(
|
|
155
|
+
text: string,
|
|
156
|
+
lang: string,
|
|
157
|
+
grammar: SyntaxGrammar | undefined = this.get_lang(lang),
|
|
158
|
+
): SyntaxTokenStream {
|
|
159
|
+
return this.#tokenize_hooked(text, lang, grammar).tokens;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Runs `before_tokenize` → `tokenize_syntax` → `after_tokenize`, returning the
|
|
164
|
+
* resolved context. Shared by `stylize` (which also needs the post-hook `lang`)
|
|
165
|
+
* and `tokenize` (which only needs `tokens`).
|
|
166
|
+
*/
|
|
167
|
+
#tokenize_hooked(
|
|
168
|
+
text: string,
|
|
169
|
+
lang: string,
|
|
170
|
+
grammar: SyntaxGrammar,
|
|
171
|
+
): HookAfterTokenizeCallbackContext {
|
|
172
|
+
const ctx: HookBeforeTokenizeCallbackContext = {
|
|
132
173
|
code: text,
|
|
133
174
|
grammar,
|
|
134
175
|
lang,
|
|
@@ -138,7 +179,7 @@ export class SyntaxStyler {
|
|
|
138
179
|
const c = ctx as any as HookAfterTokenizeCallbackContext;
|
|
139
180
|
c.tokens = tokenize_syntax(c.code, c.grammar);
|
|
140
181
|
this.run_hook_after_tokenize(c);
|
|
141
|
-
return
|
|
182
|
+
return c;
|
|
142
183
|
}
|
|
143
184
|
|
|
144
185
|
/**
|
|
@@ -158,7 +199,7 @@ export class SyntaxStyler {
|
|
|
158
199
|
* };
|
|
159
200
|
* ```
|
|
160
201
|
*
|
|
161
|
-
* then the `style` token will be added (and processed) at the end. `
|
|
202
|
+
* then the `style` token will be added (and processed) at the end. `grammar_insert_before` allows you to insert tokens
|
|
162
203
|
* before existing tokens. For the CSS example above, you would use it like this:
|
|
163
204
|
*
|
|
164
205
|
* ```js
|
|
@@ -185,12 +226,12 @@ export class SyntaxStyler {
|
|
|
185
226
|
*
|
|
186
227
|
* ## Limitations
|
|
187
228
|
*
|
|
188
|
-
* The main problem `
|
|
229
|
+
* The main problem `grammar_insert_before` has to solve is iteration order. Since ES2015, the iteration order for object
|
|
189
230
|
* properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave
|
|
190
|
-
* differently when keys are deleted and re-inserted. So `
|
|
231
|
+
* differently when keys are deleted and re-inserted. So `grammar_insert_before` can't be implemented by temporarily
|
|
191
232
|
* deleting properties which is necessary to insert at arbitrary positions.
|
|
192
233
|
*
|
|
193
|
-
* To solve this problem, `
|
|
234
|
+
* To solve this problem, `grammar_insert_before` doesn't actually insert the given tokens into the target object.
|
|
194
235
|
* Instead, it will create a new object and replace all references to the target object with the new one. This
|
|
195
236
|
* can be done without temporarily deleting properties, so the iteration order is well-defined.
|
|
196
237
|
*
|
|
@@ -205,16 +246,13 @@ export class SyntaxStyler {
|
|
|
205
246
|
* assert(newMarkup === syntax_styler.get_lang('markup'));
|
|
206
247
|
* ```
|
|
207
248
|
*
|
|
208
|
-
* @param inside -
|
|
209
|
-
* object to be modified
|
|
210
|
-
* @param before -
|
|
211
|
-
* @param insert -
|
|
212
|
-
* @param root -
|
|
213
|
-
* object to be modified.
|
|
214
|
-
*
|
|
215
|
-
* Defaults to `syntax_styler.langs`.
|
|
216
|
-
*
|
|
217
|
-
* @returns the new grammar object
|
|
249
|
+
* @param inside - the property of `root` (e.g. a language id in `syntax_styler.langs`) that contains the
|
|
250
|
+
* object to be modified
|
|
251
|
+
* @param before - the key to insert before
|
|
252
|
+
* @param insert - an object containing the key-value pairs to be inserted
|
|
253
|
+
* @param root - the object containing `inside`, i.e. the object that contains the
|
|
254
|
+
* object to be modified; defaults to `syntax_styler.langs`
|
|
255
|
+
* @returns the new `SyntaxGrammar` object
|
|
218
256
|
*/
|
|
219
257
|
grammar_insert_before(
|
|
220
258
|
inside: string,
|
|
@@ -261,16 +299,15 @@ export class SyntaxStyler {
|
|
|
261
299
|
*
|
|
262
300
|
* Runs the `wrap` hook on each `SyntaxToken`.
|
|
263
301
|
*
|
|
264
|
-
* @param o -
|
|
265
|
-
* @param lang -
|
|
266
|
-
* @returns
|
|
302
|
+
* @param o - the token or `SyntaxTokenStream` to be converted
|
|
303
|
+
* @param lang - the name of current language
|
|
304
|
+
* @returns HTML representation of the token or token stream
|
|
267
305
|
*/
|
|
268
306
|
stringify_token(o: string | SyntaxToken | SyntaxTokenStream, lang: string): string {
|
|
269
307
|
if (typeof o === 'string') {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
.replace(/\u00a0/g, ' ');
|
|
308
|
+
// single pass over the leaf text (only `&` and `<` need escaping in text
|
|
309
|
+
// content; `\u00a0` is normalized to a regular space)
|
|
310
|
+
return o.replace(/[&<\u00a0]/g, escape_text_char);
|
|
274
311
|
}
|
|
275
312
|
if (Array.isArray(o)) {
|
|
276
313
|
var s = '';
|
|
@@ -280,21 +317,29 @@ export class SyntaxStyler {
|
|
|
280
317
|
return s;
|
|
281
318
|
}
|
|
282
319
|
|
|
320
|
+
var content = this.stringify_token(o.content, lang);
|
|
321
|
+
|
|
322
|
+
// build the class list once; aliases are always an array after normalization
|
|
323
|
+
var classes = `token_${o.type}`;
|
|
324
|
+
for (const a of o.alias) {
|
|
325
|
+
classes += ` token_${a}`;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// fast path: with no `wrap` hooks the tag is always a plain <span> with no
|
|
329
|
+
// attributes, so skip the per-token context object and hook dispatch
|
|
330
|
+
if (this.hooks_wrap.length === 0) {
|
|
331
|
+
return '<span class="' + classes + '">' + content + '</span>';
|
|
332
|
+
}
|
|
333
|
+
|
|
283
334
|
var ctx: HookWrapCallbackContext = {
|
|
284
335
|
type: o.type,
|
|
285
|
-
content
|
|
336
|
+
content,
|
|
286
337
|
tag: 'span',
|
|
287
|
-
classes:
|
|
338
|
+
classes: classes.split(' '),
|
|
288
339
|
attributes: {},
|
|
289
340
|
lang,
|
|
290
341
|
};
|
|
291
342
|
|
|
292
|
-
var aliases = o.alias;
|
|
293
|
-
// alias is always an array after normalization
|
|
294
|
-
for (const a of aliases) {
|
|
295
|
-
ctx.classes.push(`token_${a}`);
|
|
296
|
-
}
|
|
297
|
-
|
|
298
343
|
this.run_hook_wrap(ctx);
|
|
299
344
|
|
|
300
345
|
var attributes = '';
|
|
@@ -332,9 +377,9 @@ export class SyntaxStyler {
|
|
|
332
377
|
* Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
|
|
333
378
|
* Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
|
|
334
379
|
*
|
|
335
|
-
* @param base_id -
|
|
336
|
-
* @param extension -
|
|
337
|
-
* @returns the new
|
|
380
|
+
* @param base_id - the id of the language to extend, must be a key in `syntax_styler.langs`
|
|
381
|
+
* @param extension - the new tokens to append
|
|
382
|
+
* @returns the new `SyntaxGrammar`
|
|
338
383
|
*/
|
|
339
384
|
extend_grammar(base_id: string, extension: SyntaxGrammarRaw): SyntaxGrammar {
|
|
340
385
|
// Merge normalized base with un-normalized extension
|
|
@@ -346,7 +391,7 @@ export class SyntaxStyler {
|
|
|
346
391
|
}
|
|
347
392
|
|
|
348
393
|
/**
|
|
349
|
-
*
|
|
394
|
+
* Normalizes a single pattern to have consistent shape.
|
|
350
395
|
* This ensures all patterns have the same object shape for V8 optimization.
|
|
351
396
|
*/
|
|
352
397
|
#normalize_pattern(
|
|
@@ -387,15 +432,15 @@ export class SyntaxStyler {
|
|
|
387
432
|
}
|
|
388
433
|
|
|
389
434
|
/**
|
|
390
|
-
*
|
|
435
|
+
* Normalizes a grammar to have consistent object shapes.
|
|
391
436
|
* This performs several optimizations:
|
|
392
|
-
* 1. Merges `rest` property into main grammar
|
|
393
|
-
* 2. Ensures all pattern values are arrays
|
|
394
|
-
* 3. Normalizes all pattern objects to have consistent shapes
|
|
395
|
-
* 4. Adds global flag to greedy patterns
|
|
437
|
+
* 1. Merges `rest` property into main grammar.
|
|
438
|
+
* 2. Ensures all pattern values are arrays.
|
|
439
|
+
* 3. Normalizes all pattern objects to have consistent shapes.
|
|
440
|
+
* 4. Adds global flag to greedy patterns.
|
|
396
441
|
*
|
|
397
442
|
* This is called once at registration time to avoid runtime overhead.
|
|
398
|
-
* @param visited -
|
|
443
|
+
* @param visited - set of grammar object IDs already normalized (for circular references)
|
|
399
444
|
*/
|
|
400
445
|
#normalize_grammar(grammar: SyntaxGrammarRaw, visited: Set<number>): void {
|
|
401
446
|
// Check if we've already normalized this grammar (circular reference)
|
|
@@ -510,14 +555,14 @@ export interface SyntaxGrammarTokenRaw {
|
|
|
510
555
|
*/
|
|
511
556
|
alias?: string | Array<string>;
|
|
512
557
|
/**
|
|
513
|
-
* The nested
|
|
558
|
+
* The nested `SyntaxGrammarRaw` of this token.
|
|
514
559
|
*/
|
|
515
560
|
inside?: SyntaxGrammarRaw | null;
|
|
516
561
|
}
|
|
517
562
|
|
|
518
563
|
/**
|
|
519
564
|
* Grammar token with all properties required.
|
|
520
|
-
* This is the normalized representation used at runtime.
|
|
565
|
+
* This is the normalized representation of `SyntaxGrammarTokenRaw` used at runtime.
|
|
521
566
|
*/
|
|
522
567
|
export interface SyntaxGrammarToken {
|
|
523
568
|
pattern: RegExp;
|
|
@@ -529,7 +574,7 @@ export interface SyntaxGrammarToken {
|
|
|
529
574
|
|
|
530
575
|
/**
|
|
531
576
|
* A grammar after normalization.
|
|
532
|
-
* All values are arrays of normalized
|
|
577
|
+
* All values are arrays of normalized `SyntaxGrammarToken` with consistent shapes.
|
|
533
578
|
*/
|
|
534
579
|
export type SyntaxGrammar = Record<string, Array<SyntaxGrammarToken>>;
|
|
535
580
|
|
package/src/lib/syntax_token.ts
CHANGED
|
@@ -2,14 +2,14 @@ export class SyntaxToken {
|
|
|
2
2
|
/**
|
|
3
3
|
* The type of the token.
|
|
4
4
|
*
|
|
5
|
-
* This is usually the key of a pattern in a `
|
|
5
|
+
* This is usually the key of a pattern in a `SyntaxGrammar`.
|
|
6
6
|
*/
|
|
7
7
|
type: string;
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* The strings or tokens contained by this token.
|
|
11
11
|
*
|
|
12
|
-
* This will be a
|
|
12
|
+
* This will be a `SyntaxTokenStream` if the pattern matched also defined an `inside` grammar.
|
|
13
13
|
*/
|
|
14
14
|
content: string | SyntaxTokenStream;
|
|
15
15
|
|
|
@@ -38,8 +38,8 @@ export class SyntaxToken {
|
|
|
38
38
|
/**
|
|
39
39
|
* A token stream is an array of strings and `SyntaxToken` objects.
|
|
40
40
|
*
|
|
41
|
-
*
|
|
42
|
-
* them.
|
|
41
|
+
* `SyntaxTokenStream` values have to fulfill a few properties that are assumed by most functions
|
|
42
|
+
* (mostly internal ones) that process them.
|
|
43
43
|
*
|
|
44
44
|
* 1. No adjacent strings.
|
|
45
45
|
* 2. No empty strings.
|
|
@@ -10,11 +10,9 @@ import {SyntaxToken, type SyntaxTokenStream} from './syntax_token.js';
|
|
|
10
10
|
* This method could be useful in other contexts as well, as a very crude parser.
|
|
11
11
|
*
|
|
12
12
|
* @param text - a string with the code to be styled
|
|
13
|
-
* @param grammar -
|
|
14
|
-
*
|
|
13
|
+
* @param grammar - a `SyntaxGrammar` object containing the tokens to use.
|
|
15
14
|
* Usually a language definition like `syntax_styler.get_lang('markup')`.
|
|
16
|
-
*
|
|
17
|
-
* @returns an array of strings and tokens, a token stream
|
|
15
|
+
* @returns a `SyntaxTokenStream` array of strings and tokens
|
|
18
16
|
*
|
|
19
17
|
* @example
|
|
20
18
|
* ```ts
|