npm - @tiptap/markdown - Versions diffs - 3.10.1 → 3.10.3 - Mend

@tiptap/markdown 3.10.1 → 3.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@tiptap/markdown",
   "description": "markdown parser and serializer for tiptap",
-  "version": "3.10.1",
+  "version": "3.10.3",
   "homepage": "https://tiptap.dev",
   "keywords": [
     "tiptap",
@@ -37,12 +37,12 @@
     "marked": "^16.1.2"
   },
   "devDependencies": {
-    "@tiptap/core": "^3.10.1",
-    "@tiptap/pm": "^3.10.1"
+    "@tiptap/core": "^3.10.3",
+    "@tiptap/pm": "^3.10.3"
   },
   "peerDependencies": {
-    "@tiptap/core": "^3.10.1",
-    "@tiptap/pm": "^3.10.1"
+    "@tiptap/core": "^3.10.3",
+    "@tiptap/pm": "^3.10.3"
   },
   "repository": {
     "type": "git",

package/src/MarkdownManager.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import {
   findMarksToClose,
   findMarksToCloseAtEnd,
   findMarksToOpen,
+  isTaskItem,
   reopenMarksAfterNode,
   wrapInMarkdownBlock,
 } from './utils.js'
@@ -303,6 +304,11 @@ export class MarkdownManager {
       return null
     }
+    // Special handling for 'list' tokens that may contain mixed bullet/task items
+    if (token.type === 'list') {
+      return this.parseListToken(token)
+    }
     const handlers = this.getHandlersForToken(token.type)
     const helpers = this.createParseHelpers()
@@ -339,7 +345,169 @@ export class MarkdownManager {
   private lastParseResult: JSONContent | JSONContent[] | null = null
   /**
-   * Create the helper functions that are passed to parse handlers.
+   * Parse a list token, handling mixed bullet and task list items by splitting them into separate lists.
+   * This ensures that consecutive task items and bullet items are grouped and parsed as separate list nodes.
+   *
+   * @param token The list token to parse
+   * @returns Array of parsed list nodes, or null if parsing fails
+   */
+  private parseListToken(token: MarkdownToken): JSONContent | JSONContent[] | null {
+    if (!token.items || token.items.length === 0) {
+      // No items, parse normally
+      return this.parseTokenWithHandlers(token)
+    }
+    const hasTask = token.items.some(item => isTaskItem(item).isTask)
+    const hasNonTask = token.items.some(item => !isTaskItem(item).isTask)
+    if (!hasTask || !hasNonTask || this.getHandlersForToken('taskList').length === 0) {
+      // Not mixed or no taskList extension, parse normally
+      return this.parseTokenWithHandlers(token)
+    }
+    // Mixed list with taskList extension available: split into separate lists
+    type TaskListItemToken = MarkdownToken & { type: 'taskItem'; checked?: boolean; indentLevel?: number }
+    const groups: { type: 'list' | 'taskList'; items: (MarkdownToken | TaskListItemToken)[] }[] = []
+    let currentGroup: (MarkdownToken | TaskListItemToken)[] = []
+    let currentType: 'list' | 'taskList' | null = null
+    for (let i = 0; i < token.items.length; i += 1) {
+      const item = token.items[i]
+      const { isTask, checked, indentLevel } = isTaskItem(item)
+      let processedItem = item
+      if (isTask) {
+        // Transform list_item into taskItem token
+        const raw = item.raw || item.text || ''
+        // Split raw content by lines to separate main content from nested
+        const lines = raw.split('\n')
+        // Extract main content from the first line
+        const firstLineMatch = lines[0].match(/^\s*[-+*]\s+\[([ xX])\]\s+(.*)$/)
+        const mainContent = firstLineMatch ? firstLineMatch[2] : ''
+        // Parse nested content from remaining lines
+        let nestedTokens: MarkdownToken[] = []
+        if (lines.length > 1) {
+          // Join all lines after the first
+          const nestedRaw = lines.slice(1).join('\n')
+          // Only parse if there's actual content
+          if (nestedRaw.trim()) {
+            // Find minimum indentation of non-empty lines
+            const nestedLines = lines.slice(1)
+            const nonEmptyLines = nestedLines.filter(line => line.trim())
+            if (nonEmptyLines.length > 0) {
+              const minIndent = Math.min(...nonEmptyLines.map(line => line.length - line.trimStart().length))
+              // Remove common indentation while preserving structure
+              const trimmedLines = nestedLines.map(line => {
+                if (!line.trim()) {
+                  return '' // Keep empty lines
+                }
+                return line.slice(minIndent)
+              })
+              const nestedContent = trimmedLines.join('\n').trim()
+              // Use the lexer to parse nested content
+              if (nestedContent) {
+                // Use the full lexer pipeline to ensure inline tokens are populated
+                nestedTokens = this.markedInstance.lexer(`${nestedContent}\n`)
+              }
+            }
+          }
+        }
+        processedItem = {
+          type: 'taskItem',
+          raw: '',
+          mainContent,
+          indentLevel,
+          checked: checked ?? false,
+          text: mainContent,
+          tokens: this.lexer.inlineTokens(mainContent),
+          nestedTokens,
+        }
+      }
+      const itemType: 'list' | 'taskList' = isTask ? 'taskList' : 'list'
+      if (currentType !== itemType) {
+        if (currentGroup.length > 0) {
+          groups.push({ type: currentType!, items: currentGroup })
+        }
+        currentGroup = [processedItem]
+        currentType = itemType
+      } else {
+        currentGroup.push(processedItem)
+      }
+    }
+    if (currentGroup.length > 0) {
+      groups.push({ type: currentType!, items: currentGroup })
+    }
+    // Parse each group as a separate token
+    const results: JSONContent[] = []
+    for (let i = 0; i < groups.length; i += 1) {
+      const group = groups[i]
+      const subToken = { ...token, type: group.type, items: group.items }
+      const parsed = this.parseToken(subToken)
+      if (parsed) {
+        if (Array.isArray(parsed)) {
+          results.push(...parsed)
+        } else {
+          results.push(parsed)
+        }
+      }
+    }
+    return results.length > 0 ? results : null
+  }
+  /**
+   * Parse a token using registered handlers (extracted for reuse).
+   */
+  private parseTokenWithHandlers(token: MarkdownToken): JSONContent | JSONContent[] | null {
+    if (!token.type) {
+      return null
+    }
+    const handlers = this.getHandlersForToken(token.type)
+    const helpers = this.createParseHelpers()
+    // Try each handler until one returns a valid result
+    const result = handlers.find(handler => {
+      if (!handler.parseMarkdown) {
+        return false
+      }
+      const parseResult = handler.parseMarkdown(token, helpers)
+      const normalized = this.normalizeParseResult(parseResult)
+      // Check if this handler returned a valid result (not null/empty array)
+      if (normalized && (!Array.isArray(normalized) || normalized.length > 0)) {
+        // Store result for return
+        this.lastParseResult = normalized
+        return true
+      }
+      return false
+    })
+    // If a handler worked, return its result
+    if (result && this.lastParseResult) {
+      const toReturn = this.lastParseResult
+      this.lastParseResult = null // Clean up
+      return toReturn
+    }
+    // If no handler worked, try fallback parsing
+    return this.parseFallbackToken(token)
+  }
+  /**
+   * Creates helper functions for parsing markdown tokens.
+   * @returns An object containing helper functions for parsing.
    */
   private createParseHelpers(): MarkdownParseHelpers {
     return {
@@ -375,6 +543,13 @@ export class MarkdownManager {
     }
   }
+  /**
+   * Escape special regex characters in a string.
+   */
+  private escapeRegex(str: string): string {
+    return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
+  }
   /**
    * Parse inline tokens (bold, italic, links, etc.) into text nodes with marks.
    * This is the complex part that handles mark nesting and boundaries.
@@ -382,14 +557,81 @@ export class MarkdownManager {
   private parseInlineTokens(tokens: MarkdownToken[]): JSONContent[] {
     const result: JSONContent[] = []
-    // Process tokens sequentially
-    tokens.forEach(token => {
+    // Process tokens sequentially using an index so we can lookahead and
+    // merge split inline HTML fragments like: text / <em> / inner / </em> / text
+    for (let i = 0; i < tokens.length; i += 1) {
+      const token = tokens[i]
       if (token.type === 'text') {
         // Create text node
         result.push({
           type: 'text',
           text: token.text || '',
         })
+      } else if (token.type === 'html') {
+        // Handle possible split inline HTML by attempting to detect an
+        // opening tag and searching forward for a matching closing tag.
+        const raw = (token.raw ?? token.text ?? '').toString()
+        // Quick checks for opening vs. closing tag
+        const isClosing = /^<\/[\s]*[\w-]+/i.test(raw)
+        const openMatch = raw.match(/^<[\s]*([\w-]+)(\s|>|\/|$)/i)
+        if (!isClosing && openMatch && !/\/>$/.test(raw)) {
+          // Try to find the corresponding closing html token for this tag
+          const tagName = openMatch[1]
+          const escapedTagName = this.escapeRegex(tagName)
+          const closingRegex = new RegExp(`^<\\/\\s*${escapedTagName}\\b`, 'i')
+          let foundIndex = -1
+          // Collect intermediate raw parts to reconstruct full HTML fragment
+          const parts: string[] = [raw]
+          for (let j = i + 1; j < tokens.length; j += 1) {
+            const t = tokens[j]
+            const tRaw = (t.raw ?? t.text ?? '').toString()
+            parts.push(tRaw)
+            if (t.type === 'html' && closingRegex.test(tRaw)) {
+              foundIndex = j
+              break
+            }
+          }
+          if (foundIndex !== -1) {
+            // Merge opening + inner + closing into one html fragment and parse
+            const mergedRaw = parts.join('')
+            const mergedToken = {
+              type: 'html',
+              raw: mergedRaw,
+              text: mergedRaw,
+              block: false,
+            } as unknown as MarkdownToken
+            const parsed = this.parseHTMLToken(mergedToken)
+            if (parsed) {
+              const normalized = this.normalizeParseResult(parsed as any)
+              if (Array.isArray(normalized)) {
+                result.push(...normalized)
+              } else if (normalized) {
+                result.push(normalized)
+              }
+            }
+            // Advance i to the closing token
+            i = foundIndex
+            continue
+          }
+        }
+        // Fallback: single html token parse
+        const parsedSingle = this.parseHTMLToken(token)
+        if (parsedSingle) {
+          const normalized = this.normalizeParseResult(parsedSingle as any)
+          if (Array.isArray(normalized)) {
+            result.push(...normalized)
+          } else if (normalized) {
+            result.push(normalized)
+          }
+        }
       } else if (token.type) {
         // Handle inline marks (bold, italic, etc.)
         const markHandler = this.getHandlerForToken(token.type)
@@ -415,7 +657,7 @@ export class MarkdownManager {
           result.push(...this.parseInlineTokens(token.tokens))
         }
       }
-    })
+    }
     return result
   }
@@ -685,8 +927,12 @@ export class MarkdownManager {
         // Render the node
         const nodeContent = this.renderNodeToMarkdown(node, parentNode, i, level)
-        // Reopen marks after the node
-        const afterMarkdown = reopenMarksAfterNode(marksToReopen, activeMarks, this.getMarkOpening.bind(this))
+        // Reopen marks after the node, but NOT after a hard break
+        // Hard breaks should terminate marks (they create a line break where marks don't continue)
+        const afterMarkdown =
+          node.type === 'hardBreak'
+            ? ''
+            : reopenMarksAfterNode(marksToReopen, activeMarks, this.getMarkOpening.bind(this))
         result.push(beforeMarkdown + nodeContent + afterMarkdown)
       }

package/src/utils.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Content } from '@tiptap/core'
+import type { Content, MarkdownToken } from '@tiptap/core'
 import type { Fragment, Node } from '@tiptap/pm/model'
 import type { ContentType } from './types'
@@ -26,7 +26,6 @@ export function wrapInMarkdownBlock(prefix: string, content: string) {
 /**
  * Identifies marks that need to be closed (active but not in current node).
- * Returns the mark types in reverse order for proper closing sequence.
  */
 export function findMarksToClose(activeMarks: Map<string, any>, currentMarks: Map<string, any>): string[] {
   const marksToClose: string[] = []
@@ -35,7 +34,7 @@ export function findMarksToClose(activeMarks: Map<string, any>, currentMarks: Ma
       marksToClose.push(markType)
     }
   })
-  return marksToClose.reverse()
+  return marksToClose
 }
 /**
@@ -88,7 +87,7 @@ export function findMarksToCloseAtEnd(
     }
   }
-  return marksToCloseAtEnd.reverse()
+  return marksToCloseAtEnd
 }
 /**
@@ -133,6 +132,31 @@ export function reopenMarksAfterNode(
   return afterMarkdown
 }
+/**
+ * Check if a markdown list item token is a task item and extract its state.
+ *
+ * @param item The list item token to check
+ * @returns Object containing isTask flag, checked state, and indentation level
+ *
+ * @example
+ * ```ts
+ * isTaskItem({ raw: '- [ ] Task' }) // { isTask: true, checked: false, indentLevel: 0 }
+ * isTaskItem({ raw: '  - [x] Done' }) // { isTask: true, checked: true, indentLevel: 2 }
+ * isTaskItem({ raw: '- Regular' }) // { isTask: false, indentLevel: 0 }
+ * ```
+ */
+export function isTaskItem(item: MarkdownToken): { isTask: boolean; checked?: boolean; indentLevel: number } {
+  const raw = item.raw || item.text || ''
+  // Match patterns like "- [ ] " or "  - [x] "
+  const match = raw.match(/^(\s*)[-+*]\s+\[([ xX])\]\s+/)
+  if (match) {
+    return { isTask: true, checked: match[2].toLowerCase() === 'x', indentLevel: match[1].length }
+  }
+  return { isTask: false, indentLevel: 0 }
+}
 /**
  * Assumes the content type based off the content.
  * @param content The content to assume the type for.