npm - @fuzdev/fuz_code - Versions diffs - 0.45.1 → 0.46.1 - Mend

@fuzdev/fuz_code 0.45.1 → 0.46.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +1 -0
package/dist/Code.svelte +2 -2
package/dist/Code.svelte.d.ts +2 -2
package/dist/CodeHighlight.svelte +18 -54
package/dist/CodeHighlight.svelte.d.ts +4 -4
package/dist/CodeHighlight.svelte.d.ts.map +1 -1
package/dist/CodeTextarea.svelte +149 -0
package/dist/CodeTextarea.svelte.d.ts +43 -0
package/dist/CodeTextarea.svelte.d.ts.map +1 -0
package/dist/grammar_markdown.js +3 -3
package/dist/grammar_markup.d.ts +8 -7
package/dist/grammar_markup.d.ts.map +1 -1
package/dist/grammar_markup.js +8 -7
package/dist/highlight_manager.d.ts +21 -7
package/dist/highlight_manager.d.ts.map +1 -1
package/dist/highlight_manager.js +130 -74
package/dist/range_highlighting.svelte.d.ts +39 -0
package/dist/range_highlighting.svelte.d.ts.map +1 -0
package/dist/range_highlighting.svelte.js +57 -0
package/dist/svelte_preprocess_fuz_code.d.ts +4 -4
package/dist/svelte_preprocess_fuz_code.d.ts.map +1 -1
package/dist/svelte_preprocess_fuz_code.js +3 -3
package/dist/syntax_styler.d.ts +40 -32
package/dist/syntax_styler.d.ts.map +1 -1
package/dist/syntax_styler.js +81 -49
package/dist/syntax_token.d.ts +4 -4
package/dist/syntax_token.js +2 -2
package/dist/tokenize_syntax.d.ts +2 -4
package/dist/tokenize_syntax.d.ts.map +1 -1
package/dist/tokenize_syntax.js +2 -4
package/package.json +27 -29
package/src/lib/grammar_markdown.ts +3 -3
package/src/lib/grammar_markup.ts +8 -7
package/src/lib/highlight_manager.ts +154 -84
package/src/lib/range_highlighting.svelte.ts +100 -0
package/src/lib/svelte_preprocess_fuz_code.ts +6 -6
package/src/lib/syntax_styler.ts +98 -53
package/src/lib/syntax_token.ts +4 -4
package/src/lib/tokenize_syntax.ts +2 -4

package/src/lib/syntax_styler.ts CHANGED Viewed

@@ -3,6 +3,13 @@ import {tokenize_syntax} from './tokenize_syntax.js';
 export type AddSyntaxGrammar = (syntax_styler: SyntaxStyler) => void;
+/**
+ * Maps a matched `&`, `<`, or non-breaking space in text content to its
+ * HTML-safe form. Used as the replacer in `stringify_token` for leaf strings
+ * (non-breaking spaces are normalized to a regular space).
+ */
+const escape_text_char = (ch: string): string => (ch === '&' ? '&amp;' : ch === '<' ? '&lt;' : ' ');
 /**
  * Based on Prism (https://github.com/PrismJS/prism)
  * by Lea Verou (https://lea.verou.me/)
@@ -92,15 +99,14 @@ export class SyntaxStyler {
 	 * - Custom grammar: `stylize(code, 'ts', customGrammar)` - uses custom grammar but keeps 'ts' label
 	 * - Extended grammar: `stylize(code, 'custom', this.extend_grammar('ts', extension))` - new language variant
 	 *
-	 * @param text - The source code to syntax highlight.
-	 * @param lang - Language identifier (e.g., 'ts', 'css', 'html'). Used for:
-	 *   - Grammar lookup when `grammar` is undefined
-	 *   - Hook context (`lang` field passed to hooks)
-	 *   - Language identification in output
-	 * @param grammar - Optional custom grammar object. When undefined, automatically
-	 *   looks up the grammar via `this.get_lang(lang)`. Provide this to use a custom
-	 *   or modified grammar instead of the registered one.
-	 *
+	 * @param text - the source code to syntax highlight
+	 * @param lang - language identifier (e.g., 'ts', 'css', 'html'), used for:
+	 *   - grammar lookup when `grammar` is undefined
+	 *   - hook context (`lang` field passed to hooks)
+	 *   - language identification in output
+	 * @param grammar - optional custom `SyntaxGrammar` object; when undefined, automatically
+	 *   looks up the grammar via `this.get_lang(lang)`; provide this to use a custom
+	 *   or modified grammar instead of the registered one
 	 * @returns HTML string with syntax highlighting using CSS classes (`.token_*`)
 	 *
 	 * @example
@@ -128,7 +134,42 @@ export class SyntaxStyler {
 		lang: string,
 		grammar: SyntaxGrammar | undefined = this.get_lang(lang),
 	): string {
-		var ctx: HookBeforeTokenizeCallbackContext = {
+		// stringify with the post-hook `lang`, which a `before_tokenize` hook may
+		// have rewritten (it flows into each token's `wrap` hook context)
+		const c = this.#tokenize_hooked(text, lang, grammar);
+		return this.stringify_token(c.tokens, c.lang);
+	}
+	/**
+	 * Tokenizes `text` into a `SyntaxTokenStream`, running the `before_tokenize`
+	 * and `after_tokenize` hooks. This is the tokenization half of `stylize` — use
+	 * it when you need the token stream itself (e.g. CSS Custom Highlight API range
+	 * highlighting) rather than HTML.
+	 *
+	 * @param text - source to tokenize
+	 * @param lang - language identifier; passed to the tokenize hooks
+	 * @param grammar - grammar to tokenize with; defaults to `this.get_lang(lang)`
+	 * @returns the resulting token stream
+	 */
+	tokenize(
+		text: string,
+		lang: string,
+		grammar: SyntaxGrammar | undefined = this.get_lang(lang),
+	): SyntaxTokenStream {
+		return this.#tokenize_hooked(text, lang, grammar).tokens;
+	}
+	/**
+	 * Runs `before_tokenize` → `tokenize_syntax` → `after_tokenize`, returning the
+	 * resolved context. Shared by `stylize` (which also needs the post-hook `lang`)
+	 * and `tokenize` (which only needs `tokens`).
+	 */
+	#tokenize_hooked(
+		text: string,
+		lang: string,
+		grammar: SyntaxGrammar,
+	): HookAfterTokenizeCallbackContext {
+		const ctx: HookBeforeTokenizeCallbackContext = {
 			code: text,
 			grammar,
 			lang,
@@ -138,7 +179,7 @@ export class SyntaxStyler {
 		const c = ctx as any as HookAfterTokenizeCallbackContext;
 		c.tokens = tokenize_syntax(c.code, c.grammar);
 		this.run_hook_after_tokenize(c);
-		return this.stringify_token(c.tokens, c.lang);
+		return c;
 	}
 	/**
@@ -158,7 +199,7 @@ export class SyntaxStyler {
 	 * };
 	 * ```
 	 *
-	 * then the `style` token will be added (and processed) at the end. `insert_before` allows you to insert tokens
+	 * then the `style` token will be added (and processed) at the end. `grammar_insert_before` allows you to insert tokens
 	 * before existing tokens. For the CSS example above, you would use it like this:
 	 *
 	 * ```js
@@ -185,12 +226,12 @@ export class SyntaxStyler {
 	 *
 	 * ## Limitations
 	 *
-	 * The main problem `insert_before` has to solve is iteration order. Since ES2015, the iteration order for object
+	 * The main problem `grammar_insert_before` has to solve is iteration order. Since ES2015, the iteration order for object
 	 * properties is guaranteed to be the insertion order (except for integer keys) but some browsers behave
-	 * differently when keys are deleted and re-inserted. So `insert_before` can't be implemented by temporarily
+	 * differently when keys are deleted and re-inserted. So `grammar_insert_before` can't be implemented by temporarily
 	 * deleting properties which is necessary to insert at arbitrary positions.
 	 *
-	 * To solve this problem, `insert_before` doesn't actually insert the given tokens into the target object.
+	 * To solve this problem, `grammar_insert_before` doesn't actually insert the given tokens into the target object.
 	 * Instead, it will create a new object and replace all references to the target object with the new one. This
 	 * can be done without temporarily deleting properties, so the iteration order is well-defined.
 	 *
@@ -205,16 +246,13 @@ export class SyntaxStyler {
 	 * assert(newMarkup === syntax_styler.get_lang('markup'));
 	 * ```
 	 *
-	 * @param inside - The property of `root` (e.g. a language id in `syntax_styler.langs`) that contains the
-	 * object to be modified.
-	 * @param before - The key to insert before.
-	 * @param insert - An object containing the key-value pairs to be inserted.
-	 * @param root - The object containing `inside`, i.e. the object that contains the
-	 * object to be modified.
-	 *
-	 * Defaults to `syntax_styler.langs`.
-	 *
-	 * @returns the new grammar object
+	 * @param inside - the property of `root` (e.g. a language id in `syntax_styler.langs`) that contains the
+	 * object to be modified
+	 * @param before - the key to insert before
+	 * @param insert - an object containing the key-value pairs to be inserted
+	 * @param root - the object containing `inside`, i.e. the object that contains the
+	 * object to be modified; defaults to `syntax_styler.langs`
+	 * @returns the new `SyntaxGrammar` object
 	 */
 	grammar_insert_before(
 		inside: string,
@@ -261,16 +299,15 @@ export class SyntaxStyler {
 	 *
 	 * Runs the `wrap` hook on each `SyntaxToken`.
 	 *
-	 * @param o - The token or token stream to be converted.
-	 * @param lang - The name of current language.
-	 * @returns The HTML representation of the token or token stream.
+	 * @param o - the token or `SyntaxTokenStream` to be converted
+	 * @param lang - the name of current language
+	 * @returns HTML representation of the token or token stream
 	 */
 	stringify_token(o: string | SyntaxToken | SyntaxTokenStream, lang: string): string {
 		if (typeof o === 'string') {
-			return o
-				.replace(/&/g, '&amp;')
-				.replace(/</g, '&lt;')
-				.replace(/\u00a0/g, ' ');
+			// single pass over the leaf text (only `&` and `<` need escaping in text
+			// content; `\u00a0` is normalized to a regular space)
+			return o.replace(/[&<\u00a0]/g, escape_text_char);
 		}
 		if (Array.isArray(o)) {
 			var s = '';
@@ -280,21 +317,29 @@ export class SyntaxStyler {
 			return s;
 		}
+		var content = this.stringify_token(o.content, lang);
+		// build the class list once; aliases are always an array after normalization
+		var classes = `token_${o.type}`;
+		for (const a of o.alias) {
+			classes += ` token_${a}`;
+		}
+		// fast path: with no `wrap` hooks the tag is always a plain <span> with no
+		// attributes, so skip the per-token context object and hook dispatch
+		if (this.hooks_wrap.length === 0) {
+			return '<span class="' + classes + '">' + content + '</span>';
+		}
 		var ctx: HookWrapCallbackContext = {
 			type: o.type,
-			content: this.stringify_token(o.content, lang),
+			content,
 			tag: 'span',
-			classes: [`token_${o.type}`],
+			classes: classes.split(' '),
 			attributes: {},
 			lang,
 		};
-		var aliases = o.alias;
-		// alias is always an array after normalization
-		for (const a of aliases) {
-			ctx.classes.push(`token_${a}`);
-		}
 		this.run_hook_wrap(ctx);
 		var attributes = '';
@@ -332,9 +377,9 @@ export class SyntaxStyler {
 	 * Therefore, it is encouraged to order overwriting tokens according to the positions of the overwritten tokens.
 	 * Furthermore, all non-overwriting tokens should be placed after the overwriting ones.
 	 *
-	 * @param base_id - The id of the language to extend. This has to be a key in `syntax_styler.langs`.
-	 * @param extension - The new tokens to append.
-	 * @returns the new grammar
+	 * @param base_id - the id of the language to extend, must be a key in `syntax_styler.langs`
+	 * @param extension - the new tokens to append
+	 * @returns the new `SyntaxGrammar`
 	 */
 	extend_grammar(base_id: string, extension: SyntaxGrammarRaw): SyntaxGrammar {
 		// Merge normalized base with un-normalized extension
@@ -346,7 +391,7 @@ export class SyntaxStyler {
 	}
 	/**
-	 * Normalize a single pattern to have consistent shape.
+	 * Normalizes a single pattern to have consistent shape.
 	 * This ensures all patterns have the same object shape for V8 optimization.
 	 */
 	#normalize_pattern(
@@ -387,15 +432,15 @@ export class SyntaxStyler {
 	}
 	/**
-	 * Normalize a grammar to have consistent object shapes.
+	 * Normalizes a grammar to have consistent object shapes.
 	 * This performs several optimizations:
-	 * 1. Merges `rest` property into main grammar
-	 * 2. Ensures all pattern values are arrays
-	 * 3. Normalizes all pattern objects to have consistent shapes
-	 * 4. Adds global flag to greedy patterns
+	 * 1. Merges `rest` property into main grammar.
+	 * 2. Ensures all pattern values are arrays.
+	 * 3. Normalizes all pattern objects to have consistent shapes.
+	 * 4. Adds global flag to greedy patterns.
 	 *
 	 * This is called once at registration time to avoid runtime overhead.
-	 * @param visited - Set of grammar object IDs already normalized (for circular references)
+	 * @param visited - set of grammar object IDs already normalized (for circular references)
 	 */
 	#normalize_grammar(grammar: SyntaxGrammarRaw, visited: Set<number>): void {
 		// Check if we've already normalized this grammar (circular reference)
@@ -510,14 +555,14 @@ export interface SyntaxGrammarTokenRaw {
 	 */
 	alias?: string | Array<string>;
 	/**
-	 * The nested grammar of this token.
+	 * The nested `SyntaxGrammarRaw` of this token.
 	 */
 	inside?: SyntaxGrammarRaw | null;
 }
 /**
  * Grammar token with all properties required.
- * This is the normalized representation used at runtime.
+ * This is the normalized representation of `SyntaxGrammarTokenRaw` used at runtime.
  */
 export interface SyntaxGrammarToken {
 	pattern: RegExp;
@@ -529,7 +574,7 @@ export interface SyntaxGrammarToken {
 /**
  * A grammar after normalization.
- * All values are arrays of normalized tokens with consistent shapes.
+ * All values are arrays of normalized `SyntaxGrammarToken` with consistent shapes.
  */
 export type SyntaxGrammar = Record<string, Array<SyntaxGrammarToken>>;

package/src/lib/syntax_token.ts CHANGED Viewed

@@ -2,14 +2,14 @@ export class SyntaxToken {
 	/**
 	 * The type of the token.
 	 *
-	 * This is usually the key of a pattern in a `Grammar`.
+	 * This is usually the key of a pattern in a `SyntaxGrammar`.
 	 */
 	type: string;
 	/**
 	 * The strings or tokens contained by this token.
 	 *
-	 * This will be a token stream if the pattern matched also defined an `inside` grammar.
+	 * This will be a `SyntaxTokenStream` if the pattern matched also defined an `inside` grammar.
 	 */
 	content: string | SyntaxTokenStream;
@@ -38,8 +38,8 @@ export class SyntaxToken {
 /**
  * A token stream is an array of strings and `SyntaxToken` objects.
  *
- * Syntax token streams have to fulfill a few properties that are assumed by most functions (mostly internal ones) that process
- * them.
+ * `SyntaxTokenStream` values have to fulfill a few properties that are assumed by most functions
+ * (mostly internal ones) that process them.
  *
  * 1. No adjacent strings.
  * 2. No empty strings.

package/src/lib/tokenize_syntax.ts CHANGED Viewed

@@ -10,11 +10,9 @@ import {SyntaxToken, type SyntaxTokenStream} from './syntax_token.js';
  * This method could be useful in other contexts as well, as a very crude parser.
  *
  * @param text - a string with the code to be styled
- * @param grammar - an object containing the tokens to use
- *
+ * @param grammar - a `SyntaxGrammar` object containing the tokens to use.
  * Usually a language definition like `syntax_styler.get_lang('markup')`.
- *
- * @returns an array of strings and tokens, a token stream
+ * @returns a `SyntaxTokenStream` array of strings and tokens
  *
  * @example
  * ```ts