npm - @humanspeak/svelte-markdown - Versions diffs - 0.7.4 → 0.7.5 - Mend

@humanspeak/svelte-markdown 0.7.4 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/Parser.svelte +62 -5
package/dist/Parser.svelte.d.ts +37 -0
package/dist/SvelteMarkdown.svelte +47 -37
package/dist/SvelteMarkdown.svelte.d.ts +5 -10
package/dist/index.d.ts +2 -1
package/dist/renderers/TableCell.svelte +5 -2
package/dist/types.d.ts +28 -0
package/dist/types.js +20 -0
package/dist/utils/markdown-parser.d.ts +66 -2
package/dist/utils/markdown-parser.js +25 -0
package/dist/utils/token-cleanup.d.ts +122 -7
package/dist/utils/token-cleanup.js +196 -68
package/package.json +1 -1

package/dist/Parser.svelte CHANGED Viewed

@@ -1,4 +1,42 @@
 <script lang="ts">
+    /**
+     * @component Parser
+     *
+     * Recursive markdown token parser that transforms tokens into Svelte components.
+     * This component is the core rendering engine of the markdown system, handling
+     * the transformation of parsed markdown tokens into their corresponding Svelte components.
+     *
+     * @example
+     * ```svelte
+     * <Parser
+     *   tokens={parsedTokens}
+     *   renderers={customRenderers}
+     *   type="paragraph"
+     * />
+     * ```
+     *
+     * Features:
+     * - Recursive token parsing
+     * - Custom renderer support
+     * - Special handling for tables, lists, and HTML content
+     * - Type-safe component rendering
+     *
+     * @typedef {Object} Props
+     * @property {string} [type] - Token type for direct component rendering
+     * @property {Token[] | TokensList} [tokens] - Markdown tokens to be rendered
+     * @property {Tokens.TableCell[]} [header] - Table header cells for table rendering
+     * @property {Tokens.TableCell[][]} [rows] - Table row cells for table rendering
+     * @property {boolean} [ordered=false] - Whether the list is ordered (for list rendering)
+     * @property {Renderers} renderers - Component mapping for markdown elements
+     *
+     * Implementation Notes:
+     * - Uses recursive rendering for nested tokens
+     * - Implements special logic for tables, lists, and HTML content
+     * - Handles component prop spreading carefully to avoid conflicts
+     * - Maintains type safety through TypeScript interfaces
+     *
+     */
     import Parser from './Parser.svelte'
     import Html from './renderers/html/index.js'
     import type {
@@ -44,10 +82,11 @@
             <renderers.tablehead {...rest}>
                 <renderers.tablerow {...rest}>
                     {#each header ?? [] as headerItem, i}
+                        {@const { align: _align, ...cellRest } = rest}
                         <renderers.tablecell
                             header={true}
-                            align={(rest.align as string[])[i] || 'center'}
-                            {...rest}
+                            align={(rest.align as string[])[i]}
+                            {...cellRest}
                         >
                             <Parser tokens={headerItem.tokens} {renderers} />
                         </renderers.tablecell>
@@ -58,12 +97,30 @@
                 {#each rows ?? [] as row}
                     <renderers.tablerow {...rest}>
                         {#each row ?? [] as cells, i}
+                            {@const { align: _align, ...cellRest } = rest}
                             <renderers.tablecell
                                 header={false}
-                                align={(rest.align as string[])[i] ?? 'center'}
-                                {...rest}
+                                align={(rest.align as string[])[i]}
+                                {...cellRest}
                             >
-                                <Parser tokens={cells.tokens} {renderers} />
+                                {#if cells.type === 'html'}
+                                    {@const { tag, ...localRest } = cells}
+                                    {@const htmlTag = cells.tag as keyof typeof Html}
+                                    {#if htmlTag in Html}
+                                        {@const HtmlComponent = Html[htmlTag]}
+                                        <HtmlComponent {...cells}>
+                                            {#if cells.tokens?.length}
+                                                <Parser
+                                                    tokens={cells.tokens}
+                                                    {renderers}
+                                                    {...localRest}
+                                                />
+                                            {/if}
+                                        </HtmlComponent>
+                                    {/if}
+                                {:else}
+                                    <Parser tokens={cells.tokens} {renderers} />
+                                {/if}
                             </renderers.tablecell>
                         {/each}
                     </renderers.tablerow>

package/dist/Parser.svelte.d.ts CHANGED Viewed

@@ -1,3 +1,40 @@
+/**
+     * @component Parser
+     *
+     * Recursive markdown token parser that transforms tokens into Svelte components.
+     * This component is the core rendering engine of the markdown system, handling
+     * the transformation of parsed markdown tokens into their corresponding Svelte components.
+     *
+     * @example
+     * ```svelte
+     * <Parser
+     *   tokens={parsedTokens}
+     *   renderers={customRenderers}
+     *   type="paragraph"
+     * />
+     * ```
+     *
+     * Features:
+     * - Recursive token parsing
+     * - Custom renderer support
+     * - Special handling for tables, lists, and HTML content
+     * - Type-safe component rendering
+     *
+     * @typedef {Object} Props
+     * @property {string} [type] - Token type for direct component rendering
+     * @property {Token[] | TokensList} [tokens] - Markdown tokens to be rendered
+     * @property {Tokens.TableCell[]} [header] - Table header cells for table rendering
+     * @property {Tokens.TableCell[][]} [rows] - Table row cells for table rendering
+     * @property {boolean} [ordered=false] - Whether the list is ordered (for list rendering)
+     * @property {Renderers} renderers - Component mapping for markdown elements
+     *
+     * Implementation Notes:
+     * - Uses recursive rendering for nested tokens
+     * - Implements special logic for tables, lists, and HTML content
+     * - Handles component prop spreading carefully to avoid conflicts
+     * - Maintains type safety through TypeScript interfaces
+     *
+     */
 import Parser from './Parser.svelte';
 import type { Renderers, Token, TokensList, Tokens } from './utils/markdown-parser.js';
 declare const Parser: import("svelte").Component<{

package/dist/SvelteMarkdown.svelte CHANGED Viewed

@@ -23,6 +23,29 @@
  @property {function} [parsed] - Callback function called with the parsed tokens
 -->
 <script lang="ts">
+    /**
+     * Component Evolution & Design Notes:
+     *
+     * 1. Core Purpose:
+     * - Serves as the main entry point for markdown rendering in Svelte
+     * - Handles both string input and pre-parsed tokens for flexibility
+     *
+     * 2. Key Design Decisions:
+     * - Uses a separate Parser component for actual rendering to maintain separation of concerns
+     * - Implements token cleanup via shrinkHtmlTokens to optimize HTML token handling
+     * - Maintains state synchronization using Svelte 5's $state and $effect
+     *
+     * 3. Performance Considerations:
+     * - Caches previous source to prevent unnecessary re-parsing
+     * - Uses key directive for proper component rerendering when source changes
+     * - Intentionally avoids reactive tokens to prevent double processing
+     *
+     * 4. Extensibility:
+     * - Supports custom renderers through composition pattern
+     * - Allows parser configuration via options prop
+     * - Provides parsed callback for external token access
+     */
     import {
         Lexer,
         defaultOptions,
@@ -30,19 +53,11 @@
         Slugger,
         type Token,
         type TokensList,
-        type SvelteMarkdownOptions,
-        type Renderers
+        type SvelteMarkdownOptions
     } from './utils/markdown-parser.js'
     import Parser from './Parser.svelte'
     import { shrinkHtmlTokens } from './utils/token-cleanup.js'
-    interface Props {
-        source: Token[] | string
-        renderers?: Partial<Renderers>
-        options?: SvelteMarkdownOptions
-        isInline?: boolean
-        parsed?: (tokens: Token[] | TokensList) => void // eslint-disable-line no-unused-vars
-    }
+    import { type SvelteMarkdownProps } from './types.js'
     const {
         source = [],
@@ -51,34 +66,31 @@
         isInline = false,
         parsed = () => {},
         ...rest
-    }: Props & {
+    }: SvelteMarkdownProps & {
         [key: string]: unknown
     } = $props()
-    // @ts-expect-error - Intentionally not using $state for tokens
-    let tokens: Token[] | undefined // eslint-disable-line svelte/valid-compile
-    let previousSource = $state<string | Token[] | undefined>(undefined)
-    let lexer: Lexer
-    const slugger = source ? new Slugger() : undefined
     const combinedOptions = { ...defaultOptions, ...options }
+    const slugger = source ? new Slugger() : undefined
+    let lexer: Lexer
-    $effect.pre(() => {
-        if (source === previousSource) return
-        previousSource = source
-        if (Array.isArray(source)) {
-            tokens = shrinkHtmlTokens(source) as Token[]
-        } else {
+    const tokens = $derived.by(() => {
+        if (!lexer) {
             lexer = new Lexer(combinedOptions)
-            tokens = shrinkHtmlTokens(
-                isInline ? lexer.inlineTokens(source as string) : lexer.lex(source as string)
-            )
         }
-    })
+        if (Array.isArray(source)) {
+            return source as Token[]
+        }
+        return source
+            ? (shrinkHtmlTokens(
+                  isInline ? lexer.inlineTokens(source as string) : lexer.lex(source as string)
+              ) as Token[])
+            : []
+    }) satisfies Token[] | TokensList | undefined
     $effect(() => {
         if (!tokens) return
-        parsed($state.snapshot(tokens))
+        parsed(tokens)
     })
     const combinedRenderers = {
@@ -91,12 +103,10 @@
     }
 </script>
-{#key source}
-    <Parser
-        {tokens}
-        {...rest}
-        options={combinedOptions}
-        slug={(val: string): string => (slugger ? slugger.slug(val) : '')}
-        renderers={combinedRenderers}
-    />
-{/key}
+<Parser
+    {tokens}
+    {...rest}
+    options={combinedOptions}
+    slug={(val: string): string => (slugger ? slugger.slug(val) : '')}
+    renderers={combinedRenderers}
+/>

package/dist/SvelteMarkdown.svelte.d.ts CHANGED Viewed

@@ -1,4 +1,7 @@
-import { type Token, type TokensList, type SvelteMarkdownOptions, type Renderers } from './utils/markdown-parser.js';
+import { type SvelteMarkdownProps } from './types.js';
+type $$ComponentProps = SvelteMarkdownProps & {
+    [key: string]: unknown;
+};
 /**
  * A Svelte component that renders Markdown content into HTML using a customizable parser.
  * Supports both string input and pre-parsed markdown tokens, with configurable rendering
@@ -21,14 +24,6 @@ import { type Token, type TokensList, type SvelteMarkdownOptions, type Renderers
  * @property {boolean} [isInline=false] - Whether to parse the content as inline markdown
  * @property {function} [parsed] - Callback function called with the parsed tokens
  */
-declare const SvelteMarkdown: import("svelte").Component<{
-    source: Token[] | string;
-    renderers?: Partial<Renderers>;
-    options?: SvelteMarkdownOptions;
-    isInline?: boolean;
-    parsed?: (tokens: Token[] | TokensList) => void;
-} & {
-    [key: string]: unknown;
-}, {}, "">;
+declare const SvelteMarkdown: import("svelte").Component<$$ComponentProps, {}, "">;
 type SvelteMarkdown = ReturnType<typeof SvelteMarkdown>;
 export default SvelteMarkdown;

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { SvelteMarkdownOptions, Token, TokensList } from './utils/markdown-parser.js';
 import SvelteMarkdown from './SvelteMarkdown.svelte';
+import type { SvelteMarkdownProps } from './types.js';
 export default SvelteMarkdown;
-export type { SvelteMarkdownOptions, Token, TokensList };
+export type { SvelteMarkdownOptions, SvelteMarkdownProps, Token, TokensList };

package/dist/renderers/TableCell.svelte CHANGED Viewed

@@ -8,10 +8,13 @@
     }
     const { header, align, children }: Props = $props()
+    // Convert alignment to style object if alignment is specified
+    const style = $derived(align ? `text-align: ${align}` : undefined)
 </script>
 {#if header}
-    <th {align}>{@render children?.()}</th>
+    <th {style}>{@render children?.()}</th>
 {:else}
-    <td {align}>{@render children?.()}</td>
+    <td {style}>{@render children?.()}</td>
 {/if}

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Type definitions for the Svelte Markdown component.
+ *
+ * This module provides TypeScript type definitions for the core functionality
+ * of the Svelte Markdown parser and renderer. It defines the primary interface
+ * for component props and integrates with the marked library's token system.
+ *
+ * Typical usage example:
+ * ```typescript
+ * import type { SvelteMarkdownProps } from './types';
+ *
+ * const markdownProps: SvelteMarkdownProps = {
+ *   source: "# Hello World",
+ *   isInline: false
+ * };
+ * ```
+ *
+ * @packageDocumentation
+ */
+import type { Token, TokensList } from 'marked';
+import type { Renderers, SvelteMarkdownOptions } from './utils/markdown-parser.js';
+export type SvelteMarkdownProps = {
+    source: Token[] | string;
+    renderers?: Partial<Renderers>;
+    options?: SvelteMarkdownOptions;
+    isInline?: boolean;
+    parsed?: (tokens: Token[] | TokensList) => void;
+};

package/dist/types.js ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Type definitions for the Svelte Markdown component.
+ *
+ * This module provides TypeScript type definitions for the core functionality
+ * of the Svelte Markdown parser and renderer. It defines the primary interface
+ * for component props and integrates with the marked library's token system.
+ *
+ * Typical usage example:
+ * ```typescript
+ * import type { SvelteMarkdownProps } from './types';
+ *
+ * const markdownProps: SvelteMarkdownProps = {
+ *   source: "# Hello World",
+ *   isInline: false
+ * };
+ * ```
+ *
+ * @packageDocumentation
+ */
+export {};

package/dist/utils/markdown-parser.d.ts CHANGED Viewed

@@ -3,10 +3,25 @@ export { Lexer, type Token, type Tokens, type TokensList } from 'marked';
 import type { Component } from 'svelte';
 import { type HtmlRenderers } from '../renderers/html/index.js';
 /**
- * Type definition for markdown renderers
- * Maps each markdown element to its corresponding Svelte component
+ * Represents a Svelte component that can be used as a renderer.
+ * Allows for flexible component types while maintaining type safety.
+ *
+ * @typedef {Component<any, any, any> | undefined | null} RendererComponent
  */
 export type RendererComponent = Component<any, any, any> | undefined | null;
+/**
+ * Comprehensive mapping of markdown elements to their renderer components.
+ * Structured in categories for better organization and maintainability.
+ *
+ * Categories:
+ * - HTML: Special renderer for HTML content
+ * - Block elements: Major structural elements
+ * - Table elements: Table-specific components
+ * - Inline elements: Text-level components
+ * - List variations: Specialized list item renderers
+ *
+ * @interface Renderers
+ */
 export type Renderers = {
     html: HtmlRenderers;
     heading: RendererComponent;
@@ -32,12 +47,48 @@ export type Renderers = {
     orderedlistitem: RendererComponent;
     unorderedlistitem: RendererComponent;
 };
+/**
+ * Default renderer configuration mapping markdown elements to Svelte components.
+ * Provides out-of-the-box rendering capabilities while allowing for customization.
+ *
+ * Implementation notes:
+ * - All components are lazy-loaded for better performance
+ * - Null values indicate optional renderers
+ * - Components are type-checked against the Renderers interface
+ *
+ * @const {Renderers}
+ */
 export declare const defaultRenderers: Renderers;
+/**
+ * Configuration options for SvelteMarkdown parser.
+ * Extends marked options with additional Svelte-specific configurations.
+ *
+ * @interface SvelteMarkdownOptions
+ *
+ * @property {string|null} baseUrl - Base URL for relative links
+ * @property {boolean} breaks - Enable line breaks in output
+ * @property {boolean} gfm - Enable GitHub Flavored Markdown
+ * @property {boolean} headerIds - Auto-generate header IDs
+ * @property {string} headerPrefix - Prefix for header IDs
+ * @property {Function|null} highlight - Syntax highlighting function
+ * @property {string} langPrefix - Prefix for code block language classes
+ * @property {boolean} mangle - Encode email addresses
+ * @property {boolean} pedantic - Conform to original markdown spec
+ * @property {Object|null} renderer - Custom renderer
+ * @property {boolean} sanitize - Sanitize HTML input
+ * @property {Function|null} sanitizer - Custom sanitizer function
+ * @property {boolean} silent - Suppress error output
+ * @property {boolean} smartLists - Use smarter list behavior
+ * @property {boolean} smartypants - Use smart punctuation
+ * @property {Object|null} tokenizer - Custom tokenizer
+ * @property {boolean} xhtml - Generate XHTML-compliant tags
+ */
 export type SvelteMarkdownOptions = {
     baseUrl: string | null;
     breaks: boolean;
     gfm: boolean;
     headerIds: boolean;
+    tables: boolean;
     headerPrefix: string;
     highlight: null;
     langPrefix: string;
@@ -52,4 +103,17 @@ export type SvelteMarkdownOptions = {
     tokenizer: null;
     xhtml: boolean;
 };
+/**
+ * Default configuration options for the markdown parser.
+ * Provides sensible defaults while allowing for customization.
+ *
+ * Notable defaults:
+ * - GitHub Flavored Markdown enabled
+ * - Header IDs generated automatically
+ * - No syntax highlighting by default
+ * - HTML sanitization disabled
+ * - Standard markdown parsing rules
+ *
+ * @const {SvelteMarkdownOptions}
+ */
 export declare const defaultOptions: SvelteMarkdownOptions;

package/dist/utils/markdown-parser.js CHANGED Viewed

@@ -2,6 +2,17 @@ export { default as Slugger } from 'github-slugger';
 export { Lexer } from 'marked';
 import {} from '../renderers/html/index.js';
 import { Blockquote, Br, Code, Codespan, Del, Em, Heading, Hr, Html, Image, Link, List, ListItem, Paragraph, Strong, Table, TableBody, TableCell, TableHead, TableRow, Text } from '../renderers/index.js';
+/**
+ * Default renderer configuration mapping markdown elements to Svelte components.
+ * Provides out-of-the-box rendering capabilities while allowing for customization.
+ *
+ * Implementation notes:
+ * - All components are lazy-loaded for better performance
+ * - Null values indicate optional renderers
+ * - Components are type-checked against the Renderers interface
+ *
+ * @const {Renderers}
+ */
 export const defaultRenderers = {
     heading: Heading,
     paragraph: Paragraph,
@@ -27,10 +38,24 @@ export const defaultRenderers = {
     code: Code,
     br: Br
 };
+/**
+ * Default configuration options for the markdown parser.
+ * Provides sensible defaults while allowing for customization.
+ *
+ * Notable defaults:
+ * - GitHub Flavored Markdown enabled
+ * - Header IDs generated automatically
+ * - No syntax highlighting by default
+ * - HTML sanitization disabled
+ * - Standard markdown parsing rules
+ *
+ * @const {SvelteMarkdownOptions}
+ */
 export const defaultOptions = {
     baseUrl: null,
     breaks: false,
     gfm: true,
+    tables: true,
     headerIds: true,
     headerPrefix: '',
     highlight: null,

package/dist/utils/token-cleanup.d.ts CHANGED Viewed

@@ -1,17 +1,132 @@
 import type { Token } from 'marked';
 /**
- * Determines if a string contains an HTML opening or closing tag
- * @param raw - The string to check for HTML tags
- * @returns Object containing the tag name and whether it's an opening tag, or null if no tag found
+ * Analyzes a string to determine if it contains an HTML tag and its characteristics.
+ *
+ * @param {string} raw - Raw string potentially containing an HTML tag
+ * @returns {Object|null} Returns null if no tag found, otherwise returns:
+ *    {
+ *      tag: string      - The name of the HTML tag
+ *      isOpening: bool  - True if opening tag, false if closing
+ *    }
+ *
+ * @example
+ * isHtmlOpenTag('<div class="test">') // Returns { tag: 'div', isOpening: true }
+ * isHtmlOpenTag('</span>') // Returns { tag: 'span', isOpening: false }
+ * isHtmlOpenTag('plain text') // Returns null
  */
 export declare const isHtmlOpenTag: (raw: string) => {
     tag: string;
     isOpening: boolean;
 } | null;
 /**
- * Main function to process and shrink HTML tokens
- * Breaks down complex HTML structures into manageable tokens
- * @param tokens - Array of tokens to process
- * @returns Processed array of tokens with nested structure
+ * Parses HTML attributes from a tag string into a structured object.
+ * Handles both single and double quoted attributes.
+ *
+ * @param {string} raw - Raw HTML tag string containing attributes
+ * @returns {Record<string, string>} Map of attribute names to their values
+ *
+ * @example
+ * extractAttributes('<div class="foo" id="bar">')
+ * // Returns { class: 'foo', id: 'bar' }
+ *
+ * @internal
+ */
+export declare const extractAttributes: (raw: string) => Record<string, string>;
+/**
+ * Converts an HTML string into a sequence of tokens using htmlparser2.
+ * Handles complex nested structures while maintaining proper order and relationships.
+ *
+ * Key features:
+ * - Preserves original HTML structure without automatic tag closing
+ * - Handles self-closing tags with proper XML syntax (e.g., <br/> instead of <br>)
+ * - Gracefully handles malformed HTML by preserving the original structure
+ * - Maintains attribute information in opening tags
+ * - Processes text content between tags
+ *
+ * @param {string} html - HTML string to be parsed
+ * @returns {Token[]} Array of tokens representing the HTML structure
+ *
+ * @example
+ * // Well-formed HTML
+ * parseHtmlBlock('<div>Hello <span>world</span></div>')
+ * // Returns [
+ * //   { type: 'html', raw: '<div>', ... },
+ * //   { type: 'text', raw: 'Hello ', ... },
+ * //   { type: 'html', raw: '<span>', ... },
+ * //   { type: 'text', raw: 'world', ... },
+ * //   { type: 'html', raw: '</span>', ... },
+ * //   { type: 'html', raw: '</div>', ... }
+ * // ]
+ *
+ * // Self-closing tags
+ * parseHtmlBlock('<div>Before<br/>After</div>')
+ * // Returns [
+ * //   { type: 'html', raw: '<div>', ... },
+ * //   { type: 'text', raw: 'Before', ... },
+ * //   { type: 'html', raw: '<br/>', ... },
+ * //   { type: 'text', raw: 'After', ... },
+ * //   { type: 'html', raw: '</div>', ... }
+ * // ]
+ *
+ * // Malformed HTML
+ * parseHtmlBlock('<div>Unclosed')
+ * // Returns [
+ * //   { type: 'html', raw: '<div>', ... },
+ * //   { type: 'text', raw: 'Unclosed', ... }
+ * // ]
+ *
+ * @internal
+ */
+export declare const parseHtmlBlock: (html: string) => Token[];
+/**
+ * Determines if an HTML string contains multiple distinct tags.
+ * Used as a preprocessing step to optimize token processing.
+ *
+ * @param {string} html - HTML string to analyze
+ * @returns {boolean} True if multiple tags are present
+ *
+ * @internal
+ */
+export declare const containsMultipleTags: (html: string) => boolean;
+/**
+ * Primary entry point for HTML token processing. Transforms flat token arrays
+ * into properly nested structures while preserving HTML semantics.
+ *
+ * Key features:
+ * - Breaks down complex HTML structures into atomic tokens
+ * - Maintains attribute information
+ * - Preserves proper nesting relationships
+ * - Handles malformed HTML gracefully
+ *
+ * @param {Token[]} tokens - Array of tokens to process
+ * @returns {Token[]} Processed and properly nested token array
+ *
+ * @example
+ * const tokens = [
+ *   { type: 'html', raw: '<div class="wrapper">' },
+ *   { type: 'text', raw: 'content' },
+ *   { type: 'html', raw: '</div>' }
+ * ];
+ * shrinkHtmlTokens(tokens);
+ * // Returns nested structure with proper token relationships
+ *
+ * @public
  */
 export declare const shrinkHtmlTokens: (tokens: Token[]) => Token[];
+/**
+ * Core token processing logic that handles the complexities of HTML nesting.
+ * Uses a stack-based approach to match opening and closing tags while
+ * maintaining proper hierarchical relationships.
+ *
+ * Implementation details:
+ * - Maintains a stack of opening tags
+ * - Processes nested tokens recursively
+ * - Preserves HTML attributes
+ * - Handles malformed HTML gracefully
+ *
+ * @param {Token[]} tokens - Tokens to be processed
+ * @returns {Token[]} Processed tokens with proper nesting structure
+ *
+ * @internal
+ */
+export declare const processHtmlTokens: (tokens: Token[]) => Token[];

package/dist/utils/token-cleanup.js CHANGED Viewed

@@ -1,15 +1,31 @@
 import { Parser } from 'htmlparser2';
 /**
- * Regular expression pattern to match HTML tags
- * Matches both opening and closing tags with optional attributes
- * Example matches: <div>, </div>, <img src="...">, <input type="text"/>
+ * Matches HTML tags with comprehensive coverage of edge cases.
+ * Pattern breakdown:
+ * - <\/?         : Matches opening < and optional /
+ * - [a-zA-Z]     : Tag must start with letter
+ * - [a-zA-Z0-9-] : Subsequent chars can be letters, numbers, or hyphens
+ * - (?:\s+[^>]*)?: Optional attributes
+ * - >            : Closing bracket
+ *
+ * @const {RegExp}
  */
 const HTML_TAG_PATTERN = /<\/?([a-zA-Z][a-zA-Z0-9-]{0,})(?:\s+[^>]*)?>/;
 const htmlTagRegex = new RegExp(HTML_TAG_PATTERN);
 /**
- * Determines if a string contains an HTML opening or closing tag
- * @param raw - The string to check for HTML tags
- * @returns Object containing the tag name and whether it's an opening tag, or null if no tag found
+ * Analyzes a string to determine if it contains an HTML tag and its characteristics.
+ *
+ * @param {string} raw - Raw string potentially containing an HTML tag
+ * @returns {Object|null} Returns null if no tag found, otherwise returns:
+ *    {
+ *      tag: string      - The name of the HTML tag
+ *      isOpening: bool  - True if opening tag, false if closing
+ *    }
+ *
+ * @example
+ * isHtmlOpenTag('<div class="test">') // Returns { tag: 'div', isOpening: true }
+ * isHtmlOpenTag('</span>') // Returns { tag: 'span', isOpening: false }
+ * isHtmlOpenTag('plain text') // Returns null
  */
 export const isHtmlOpenTag = (raw) => {
     // First check if the string contains any HTML tags at all (faster than full regex match)
@@ -22,36 +38,89 @@ export const isHtmlOpenTag = (raw) => {
     return { tag: match[1], isOpening: !raw.startsWith('</') };
 };
 /**
- * Extracts HTML attributes from a tag string
- * @param raw - The raw HTML tag string (e.g., '<div class="example" id="test">')
- * @returns An object containing key-value pairs of attributes
+ * Parses HTML attributes from a tag string into a structured object.
+ * Handles both single and double quoted attributes.
+ *
+ * @param {string} raw - Raw HTML tag string containing attributes
+ * @returns {Record<string, string>} Map of attribute names to their values
+ *
+ * @example
+ * extractAttributes('<div class="foo" id="bar">')
+ * // Returns { class: 'foo', id: 'bar' }
+ *
+ * @internal
  */
-const extractAttributes = (raw) => {
+export const extractAttributes = (raw) => {
     const attributes = {};
-    // Match pattern: attribute="value" or attribute='value'
-    const attributeRegex = /(\w+)=["']([^"']*?)["']/g;
+    // First pass: handle regular and unclosed quoted attributes
+    const quotedRegex = /([a-zA-Z][\w-]*?)=["']([^"']*?)(?:["']|$)/g;
     let match;
-    // Continue finding matches until we've processed all attributes
-    while ((match = attributeRegex.exec(raw)) !== null) {
+    while ((match = quotedRegex.exec(raw)) !== null) {
         const [, key, value] = match;
         attributes[key] = value.trim();
     }
+    // Second pass: handle boolean attributes
+    const booleanRegex = /(?:^|\s)([a-zA-Z][\w-]*?)(?=[\s>]|$)/g;
+    while ((match = booleanRegex.exec(raw)) !== null) {
+        const [, key] = match;
+        if (key && !attributes[key]) {
+            attributes[key] = '';
+        }
+    }
     return attributes;
 };
 /**
- * Parses an HTML string into an array of tokens
- * Uses htmlparser2 to properly handle nested tags and text content
- * @param html - The HTML string to parse
- * @returns Array of tokens representing the HTML structure
+ * Converts an HTML string into a sequence of tokens using htmlparser2.
+ * Handles complex nested structures while maintaining proper order and relationships.
+ *
+ * Key features:
+ * - Preserves original HTML structure without automatic tag closing
+ * - Handles self-closing tags with proper XML syntax (e.g., <br/> instead of <br>)
+ * - Gracefully handles malformed HTML by preserving the original structure
+ * - Maintains attribute information in opening tags
+ * - Processes text content between tags
+ *
+ * @param {string} html - HTML string to be parsed
+ * @returns {Token[]} Array of tokens representing the HTML structure
+ *
+ * @example
+ * // Well-formed HTML
+ * parseHtmlBlock('<div>Hello <span>world</span></div>')
+ * // Returns [
+ * //   { type: 'html', raw: '<div>', ... },
+ * //   { type: 'text', raw: 'Hello ', ... },
+ * //   { type: 'html', raw: '<span>', ... },
+ * //   { type: 'text', raw: 'world', ... },
+ * //   { type: 'html', raw: '</span>', ... },
+ * //   { type: 'html', raw: '</div>', ... }
+ * // ]
+ *
+ * // Self-closing tags
+ * parseHtmlBlock('<div>Before<br/>After</div>')
+ * // Returns [
+ * //   { type: 'html', raw: '<div>', ... },
+ * //   { type: 'text', raw: 'Before', ... },
+ * //   { type: 'html', raw: '<br/>', ... },
+ * //   { type: 'text', raw: 'After', ... },
+ * //   { type: 'html', raw: '</div>', ... }
+ * // ]
+ *
+ * // Malformed HTML
+ * parseHtmlBlock('<div>Unclosed')
+ * // Returns [
+ * //   { type: 'html', raw: '<div>', ... },
+ * //   { type: 'text', raw: 'Unclosed', ... }
+ * // ]
+ *
+ * @internal
  */
-const parseHtmlBlock = (html) => {
+export const parseHtmlBlock = (html) => {
     const tokens = [];
-    // Buffer for accumulating text content between tags
     let currentText = '';
+    const selfClosingTags = /^(br|hr|img|input|link|meta|area|base|col|embed|keygen|param|source|track|wbr)$/i;
+    const openTags = [];
     const parser = new Parser({
-        // Called when an opening tag is encountered (<div>, <span>, etc.)
         onopentag: (name, attributes) => {
-            // If we have accumulated any text, create a text token first
             if (currentText.trim()) {
                 tokens.push({
                     type: 'text',
@@ -60,23 +129,32 @@ const parseHtmlBlock = (html) => {
                 });
                 currentText = '';
             }
-            // Create a token for the opening tag with its attributes
-            tokens.push({
-                type: 'html',
-                raw: `<${name}${Object.entries(attributes)
-                    .map(([key, value]) => ` ${key}="${value}"`)
-                    .join('')}>`,
-                tag: name,
-                attributes
-            });
+            openTags.push(name);
+            if (selfClosingTags.test(name)) {
+                tokens.push({
+                    type: 'html',
+                    raw: `<${name}${Object.entries(attributes)
+                        .map(([key, value]) => ` ${key}="${value}"`)
+                        .join('')}/>`,
+                    tag: name,
+                    attributes
+                });
+            }
+            else {
+                tokens.push({
+                    type: 'html',
+                    raw: `<${name}${Object.entries(attributes)
+                        .map(([key, value]) => ` ${key}="${value}"`)
+                        .join('')}>`,
+                    tag: name,
+                    attributes
+                });
+            }
         },
-        // Called for text content between tags
         ontext: (text) => {
             currentText += text;
         },
-        // Called when a closing tag is encountered (</div>, </span>, etc.)
         onclosetag: (name) => {
-            // Push any accumulated text before the closing tag
             if (currentText.trim()) {
                 tokens.push({
                     type: 'text',
@@ -85,41 +163,95 @@ const parseHtmlBlock = (html) => {
                 });
                 currentText = '';
             }
-            // Create a token for the closing tag
-            tokens.push({
-                type: 'html',
-                raw: `</${name}>`,
-                tag: name
-            });
+            // Only add closing tag if we found its opening tag
+            // and it's not a self-closing tag
+            if (openTags.includes(name) && !selfClosingTags.test(name)) {
+                if (html.includes(`</${name}>`)) {
+                    tokens.push({
+                        type: 'html',
+                        raw: `</${name}>`,
+                        tag: name
+                    });
+                }
+                openTags.splice(openTags.indexOf(name), 1);
+            }
         }
+    }, {
+        xmlMode: true,
+        // Add this to prevent automatic tag closing
+        recognizeSelfClosing: true
     });
-    // Process the HTML string
     parser.write(html);
     parser.end();
+    if (currentText.trim()) {
+        tokens.push({
+            type: 'text',
+            raw: currentText,
+            text: currentText
+        });
+    }
     return tokens;
 };
 /**
- * Checks if an HTML string contains multiple tags
- * Used to determine if further parsing is needed
- * @param html - The HTML string to check
- * @returns boolean indicating if multiple tags are present
+ * Determines if an HTML string contains multiple distinct tags.
+ * Used as a preprocessing step to optimize token processing.
+ *
+ * @param {string} html - HTML string to analyze
+ * @returns {boolean} True if multiple tags are present
+ *
+ * @internal
  */
-const containsMultipleTags = (html) => {
+export const containsMultipleTags = (html) => {
     // Count the number of opening tags (excluding self-closing)
     const openingTags = html.match(/<[a-zA-Z][^>]*>/g) || [];
     const closingTags = html.match(/<\/[a-zA-Z][^>]*>/g) || [];
     return openingTags.length > 1 || closingTags.length > 1;
 };
 /**
- * Main function to process and shrink HTML tokens
- * Breaks down complex HTML structures into manageable tokens
- * @param tokens - Array of tokens to process
- * @returns Processed array of tokens with nested structure
+ * Primary entry point for HTML token processing. Transforms flat token arrays
+ * into properly nested structures while preserving HTML semantics.
+ *
+ * Key features:
+ * - Breaks down complex HTML structures into atomic tokens
+ * - Maintains attribute information
+ * - Preserves proper nesting relationships
+ * - Handles malformed HTML gracefully
+ *
+ * @param {Token[]} tokens - Array of tokens to process
+ * @returns {Token[]} Processed and properly nested token array
+ *
+ * @example
+ * const tokens = [
+ *   { type: 'html', raw: '<div class="wrapper">' },
+ *   { type: 'text', raw: 'content' },
+ *   { type: 'html', raw: '</div>' }
+ * ];
+ * shrinkHtmlTokens(tokens);
+ * // Returns nested structure with proper token relationships
+ *
+ * @public
  */
 export const shrinkHtmlTokens = (tokens) => {
     const result = [];
     for (const token of tokens) {
-        if (token.type === 'html' && containsMultipleTags(token.raw)) {
+        if (token.type === 'table') {
+            // Process header cells
+            if (token.header) {
+                token.header = token.header.map((cell) => ({
+                    ...cell,
+                    tokens: cell.tokens ? shrinkHtmlTokens(cell.tokens) : []
+                }));
+            }
+            // Process row cells
+            if (token.rows) {
+                token.rows = token.rows.map((row) => row.map((cell) => ({
+                    ...cell,
+                    tokens: cell.tokens ? shrinkHtmlTokens(cell.tokens) : []
+                })));
+            }
+            result.push(token);
+        }
+        else if (token.type === 'html' && containsMultipleTags(token.raw)) {
             // Parse HTML with multiple tags into separate tokens
             result.push(...parseHtmlBlock(token.raw));
         }
@@ -131,26 +263,22 @@ export const shrinkHtmlTokens = (tokens) => {
     return processHtmlTokens(result);
 };
 /**
- * Processes HTML tokens to create a nested structure
- * Handles matching opening and closing tags, maintains proper nesting
- * and preserves attributes
+ * Core token processing logic that handles the complexities of HTML nesting.
+ * Uses a stack-based approach to match opening and closing tags while
+ * maintaining proper hierarchical relationships.
  *
- * @param tokens - Array of tokens to process
- * @returns Processed array of tokens with proper nesting structure
+ * Implementation details:
+ * - Maintains a stack of opening tags
+ * - Processes nested tokens recursively
+ * - Preserves HTML attributes
+ * - Handles malformed HTML gracefully
  *
- * @example
- * Input tokens: [
- *   { type: 'html', raw: '<div>' },
- *   { type: 'text', raw: 'Hello' },
- *   { type: 'html', raw: '</div>' }
- * ]
- * Output: [
- *   { type: 'html', tag: 'div', tokens: [
- *     { type: 'text', raw: 'Hello' }
- *   ]}
- * ]
+ * @param {Token[]} tokens - Tokens to be processed
+ * @returns {Token[]} Processed tokens with proper nesting structure
+ *
+ * @internal
  */
-const processHtmlTokens = (tokens) => {
+export const processHtmlTokens = (tokens) => {
     const result = [];
     // Stack to keep track of opening tags and their positions
     const stack = [];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@humanspeak/svelte-markdown",
-    "version": "0.7.4",
+    "version": "0.7.5",
     "description": "A powerful, customizable markdown renderer for Svelte with TypeScript support",
     "type": "module",
     "svelte": "./dist/index.js",