@tiptap/markdown 3.10.1 → 3.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@tiptap/markdown",
3
3
  "description": "markdown parser and serializer for tiptap",
4
- "version": "3.10.1",
4
+ "version": "3.10.3",
5
5
  "homepage": "https://tiptap.dev",
6
6
  "keywords": [
7
7
  "tiptap",
@@ -37,12 +37,12 @@
37
37
  "marked": "^16.1.2"
38
38
  },
39
39
  "devDependencies": {
40
- "@tiptap/core": "^3.10.1",
41
- "@tiptap/pm": "^3.10.1"
40
+ "@tiptap/core": "^3.10.3",
41
+ "@tiptap/pm": "^3.10.3"
42
42
  },
43
43
  "peerDependencies": {
44
- "@tiptap/core": "^3.10.1",
45
- "@tiptap/pm": "^3.10.1"
44
+ "@tiptap/core": "^3.10.3",
45
+ "@tiptap/pm": "^3.10.3"
46
46
  },
47
47
  "repository": {
48
48
  "type": "git",
@@ -20,6 +20,7 @@ import {
20
20
  findMarksToClose,
21
21
  findMarksToCloseAtEnd,
22
22
  findMarksToOpen,
23
+ isTaskItem,
23
24
  reopenMarksAfterNode,
24
25
  wrapInMarkdownBlock,
25
26
  } from './utils.js'
@@ -303,6 +304,11 @@ export class MarkdownManager {
303
304
  return null
304
305
  }
305
306
 
307
+ // Special handling for 'list' tokens that may contain mixed bullet/task items
308
+ if (token.type === 'list') {
309
+ return this.parseListToken(token)
310
+ }
311
+
306
312
  const handlers = this.getHandlersForToken(token.type)
307
313
  const helpers = this.createParseHelpers()
308
314
 
@@ -339,7 +345,169 @@ export class MarkdownManager {
339
345
  private lastParseResult: JSONContent | JSONContent[] | null = null
340
346
 
341
347
  /**
342
- * Create the helper functions that are passed to parse handlers.
348
+ * Parse a list token, handling mixed bullet and task list items by splitting them into separate lists.
349
+ * This ensures that consecutive task items and bullet items are grouped and parsed as separate list nodes.
350
+ *
351
+ * @param token The list token to parse
352
+ * @returns Array of parsed list nodes, or null if parsing fails
353
+ */
354
+ private parseListToken(token: MarkdownToken): JSONContent | JSONContent[] | null {
355
+ if (!token.items || token.items.length === 0) {
356
+ // No items, parse normally
357
+ return this.parseTokenWithHandlers(token)
358
+ }
359
+
360
+ const hasTask = token.items.some(item => isTaskItem(item).isTask)
361
+ const hasNonTask = token.items.some(item => !isTaskItem(item).isTask)
362
+
363
+ if (!hasTask || !hasNonTask || this.getHandlersForToken('taskList').length === 0) {
364
+ // Not mixed or no taskList extension, parse normally
365
+ return this.parseTokenWithHandlers(token)
366
+ }
367
+
368
+ // Mixed list with taskList extension available: split into separate lists
369
+ type TaskListItemToken = MarkdownToken & { type: 'taskItem'; checked?: boolean; indentLevel?: number }
370
+ const groups: { type: 'list' | 'taskList'; items: (MarkdownToken | TaskListItemToken)[] }[] = []
371
+ let currentGroup: (MarkdownToken | TaskListItemToken)[] = []
372
+ let currentType: 'list' | 'taskList' | null = null
373
+
374
+ for (let i = 0; i < token.items.length; i += 1) {
375
+ const item = token.items[i]
376
+ const { isTask, checked, indentLevel } = isTaskItem(item)
377
+ let processedItem = item
378
+
379
+ if (isTask) {
380
+ // Transform list_item into taskItem token
381
+ const raw = item.raw || item.text || ''
382
+
383
+ // Split raw content by lines to separate main content from nested
384
+ const lines = raw.split('\n')
385
+
386
+ // Extract main content from the first line
387
+ const firstLineMatch = lines[0].match(/^\s*[-+*]\s+\[([ xX])\]\s+(.*)$/)
388
+ const mainContent = firstLineMatch ? firstLineMatch[2] : ''
389
+
390
+ // Parse nested content from remaining lines
391
+ let nestedTokens: MarkdownToken[] = []
392
+ if (lines.length > 1) {
393
+ // Join all lines after the first
394
+ const nestedRaw = lines.slice(1).join('\n')
395
+
396
+ // Only parse if there's actual content
397
+ if (nestedRaw.trim()) {
398
+ // Find minimum indentation of non-empty lines
399
+ const nestedLines = lines.slice(1)
400
+ const nonEmptyLines = nestedLines.filter(line => line.trim())
401
+ if (nonEmptyLines.length > 0) {
402
+ const minIndent = Math.min(...nonEmptyLines.map(line => line.length - line.trimStart().length))
403
+ // Remove common indentation while preserving structure
404
+ const trimmedLines = nestedLines.map(line => {
405
+ if (!line.trim()) {
406
+ return '' // Keep empty lines
407
+ }
408
+ return line.slice(minIndent)
409
+ })
410
+ const nestedContent = trimmedLines.join('\n').trim()
411
+ // Use the lexer to parse nested content
412
+ if (nestedContent) {
413
+ // Use the full lexer pipeline to ensure inline tokens are populated
414
+ nestedTokens = this.markedInstance.lexer(`${nestedContent}\n`)
415
+ }
416
+ }
417
+ }
418
+ }
419
+
420
+ processedItem = {
421
+ type: 'taskItem',
422
+ raw: '',
423
+ mainContent,
424
+ indentLevel,
425
+ checked: checked ?? false,
426
+ text: mainContent,
427
+ tokens: this.lexer.inlineTokens(mainContent),
428
+ nestedTokens,
429
+ }
430
+ }
431
+
432
+ const itemType: 'list' | 'taskList' = isTask ? 'taskList' : 'list'
433
+
434
+ if (currentType !== itemType) {
435
+ if (currentGroup.length > 0) {
436
+ groups.push({ type: currentType!, items: currentGroup })
437
+ }
438
+ currentGroup = [processedItem]
439
+ currentType = itemType
440
+ } else {
441
+ currentGroup.push(processedItem)
442
+ }
443
+ }
444
+
445
+ if (currentGroup.length > 0) {
446
+ groups.push({ type: currentType!, items: currentGroup })
447
+ }
448
+
449
+ // Parse each group as a separate token
450
+ const results: JSONContent[] = []
451
+ for (let i = 0; i < groups.length; i += 1) {
452
+ const group = groups[i]
453
+ const subToken = { ...token, type: group.type, items: group.items }
454
+ const parsed = this.parseToken(subToken)
455
+ if (parsed) {
456
+ if (Array.isArray(parsed)) {
457
+ results.push(...parsed)
458
+ } else {
459
+ results.push(parsed)
460
+ }
461
+ }
462
+ }
463
+
464
+ return results.length > 0 ? results : null
465
+ }
466
+
467
+ /**
468
+ * Parse a token using registered handlers (extracted for reuse).
469
+ */
470
+ private parseTokenWithHandlers(token: MarkdownToken): JSONContent | JSONContent[] | null {
471
+ if (!token.type) {
472
+ return null
473
+ }
474
+
475
+ const handlers = this.getHandlersForToken(token.type)
476
+ const helpers = this.createParseHelpers()
477
+
478
+ // Try each handler until one returns a valid result
479
+ const result = handlers.find(handler => {
480
+ if (!handler.parseMarkdown) {
481
+ return false
482
+ }
483
+
484
+ const parseResult = handler.parseMarkdown(token, helpers)
485
+ const normalized = this.normalizeParseResult(parseResult)
486
+
487
+ // Check if this handler returned a valid result (not null/empty array)
488
+ if (normalized && (!Array.isArray(normalized) || normalized.length > 0)) {
489
+ // Store result for return
490
+ this.lastParseResult = normalized
491
+ return true
492
+ }
493
+
494
+ return false
495
+ })
496
+
497
+ // If a handler worked, return its result
498
+ if (result && this.lastParseResult) {
499
+ const toReturn = this.lastParseResult
500
+ this.lastParseResult = null // Clean up
501
+ return toReturn
502
+ }
503
+
504
+ // If no handler worked, try fallback parsing
505
+ return this.parseFallbackToken(token)
506
+ }
507
+
508
+ /**
509
+ * Creates helper functions for parsing markdown tokens.
510
+ * @returns An object containing helper functions for parsing.
343
511
  */
344
512
  private createParseHelpers(): MarkdownParseHelpers {
345
513
  return {
@@ -375,6 +543,13 @@ export class MarkdownManager {
375
543
  }
376
544
  }
377
545
 
546
+ /**
547
+ * Escape special regex characters in a string.
548
+ */
549
+ private escapeRegex(str: string): string {
550
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
551
+ }
552
+
378
553
  /**
379
554
  * Parse inline tokens (bold, italic, links, etc.) into text nodes with marks.
380
555
  * This is the complex part that handles mark nesting and boundaries.
@@ -382,14 +557,81 @@ export class MarkdownManager {
382
557
  private parseInlineTokens(tokens: MarkdownToken[]): JSONContent[] {
383
558
  const result: JSONContent[] = []
384
559
 
385
- // Process tokens sequentially
386
- tokens.forEach(token => {
560
+ // Process tokens sequentially using an index so we can lookahead and
561
+ // merge split inline HTML fragments like: text / <em> / inner / </em> / text
562
+ for (let i = 0; i < tokens.length; i += 1) {
563
+ const token = tokens[i]
564
+
387
565
  if (token.type === 'text') {
388
566
  // Create text node
389
567
  result.push({
390
568
  type: 'text',
391
569
  text: token.text || '',
392
570
  })
571
+ } else if (token.type === 'html') {
572
+ // Handle possible split inline HTML by attempting to detect an
573
+ // opening tag and searching forward for a matching closing tag.
574
+ const raw = (token.raw ?? token.text ?? '').toString()
575
+
576
+ // Quick checks for opening vs. closing tag
577
+ const isClosing = /^<\/[\s]*[\w-]+/i.test(raw)
578
+ const openMatch = raw.match(/^<[\s]*([\w-]+)(\s|>|\/|$)/i)
579
+
580
+ if (!isClosing && openMatch && !/\/>$/.test(raw)) {
581
+ // Try to find the corresponding closing html token for this tag
582
+ const tagName = openMatch[1]
583
+ const escapedTagName = this.escapeRegex(tagName)
584
+ const closingRegex = new RegExp(`^<\\/\\s*${escapedTagName}\\b`, 'i')
585
+ let foundIndex = -1
586
+
587
+ // Collect intermediate raw parts to reconstruct full HTML fragment
588
+ const parts: string[] = [raw]
589
+ for (let j = i + 1; j < tokens.length; j += 1) {
590
+ const t = tokens[j]
591
+ const tRaw = (t.raw ?? t.text ?? '').toString()
592
+ parts.push(tRaw)
593
+ if (t.type === 'html' && closingRegex.test(tRaw)) {
594
+ foundIndex = j
595
+ break
596
+ }
597
+ }
598
+
599
+ if (foundIndex !== -1) {
600
+ // Merge opening + inner + closing into one html fragment and parse
601
+ const mergedRaw = parts.join('')
602
+ const mergedToken = {
603
+ type: 'html',
604
+ raw: mergedRaw,
605
+ text: mergedRaw,
606
+ block: false,
607
+ } as unknown as MarkdownToken
608
+
609
+ const parsed = this.parseHTMLToken(mergedToken)
610
+ if (parsed) {
611
+ const normalized = this.normalizeParseResult(parsed as any)
612
+ if (Array.isArray(normalized)) {
613
+ result.push(...normalized)
614
+ } else if (normalized) {
615
+ result.push(normalized)
616
+ }
617
+ }
618
+
619
+ // Advance i to the closing token
620
+ i = foundIndex
621
+ continue
622
+ }
623
+ }
624
+
625
+ // Fallback: single html token parse
626
+ const parsedSingle = this.parseHTMLToken(token)
627
+ if (parsedSingle) {
628
+ const normalized = this.normalizeParseResult(parsedSingle as any)
629
+ if (Array.isArray(normalized)) {
630
+ result.push(...normalized)
631
+ } else if (normalized) {
632
+ result.push(normalized)
633
+ }
634
+ }
393
635
  } else if (token.type) {
394
636
  // Handle inline marks (bold, italic, etc.)
395
637
  const markHandler = this.getHandlerForToken(token.type)
@@ -415,7 +657,7 @@ export class MarkdownManager {
415
657
  result.push(...this.parseInlineTokens(token.tokens))
416
658
  }
417
659
  }
418
- })
660
+ }
419
661
 
420
662
  return result
421
663
  }
@@ -685,8 +927,12 @@ export class MarkdownManager {
685
927
  // Render the node
686
928
  const nodeContent = this.renderNodeToMarkdown(node, parentNode, i, level)
687
929
 
688
- // Reopen marks after the node
689
- const afterMarkdown = reopenMarksAfterNode(marksToReopen, activeMarks, this.getMarkOpening.bind(this))
930
+ // Reopen marks after the node, but NOT after a hard break
931
+ // Hard breaks should terminate marks (they create a line break where marks don't continue)
932
+ const afterMarkdown =
933
+ node.type === 'hardBreak'
934
+ ? ''
935
+ : reopenMarksAfterNode(marksToReopen, activeMarks, this.getMarkOpening.bind(this))
690
936
 
691
937
  result.push(beforeMarkdown + nodeContent + afterMarkdown)
692
938
  }
package/src/utils.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { Content } from '@tiptap/core'
1
+ import type { Content, MarkdownToken } from '@tiptap/core'
2
2
  import type { Fragment, Node } from '@tiptap/pm/model'
3
3
 
4
4
  import type { ContentType } from './types'
@@ -26,7 +26,6 @@ export function wrapInMarkdownBlock(prefix: string, content: string) {
26
26
 
27
27
  /**
28
28
  * Identifies marks that need to be closed (active but not in current node).
29
- * Returns the mark types in reverse order for proper closing sequence.
30
29
  */
31
30
  export function findMarksToClose(activeMarks: Map<string, any>, currentMarks: Map<string, any>): string[] {
32
31
  const marksToClose: string[] = []
@@ -35,7 +34,7 @@ export function findMarksToClose(activeMarks: Map<string, any>, currentMarks: Ma
35
34
  marksToClose.push(markType)
36
35
  }
37
36
  })
38
- return marksToClose.reverse()
37
+ return marksToClose
39
38
  }
40
39
 
41
40
  /**
@@ -88,7 +87,7 @@ export function findMarksToCloseAtEnd(
88
87
  }
89
88
  }
90
89
 
91
- return marksToCloseAtEnd.reverse()
90
+ return marksToCloseAtEnd
92
91
  }
93
92
 
94
93
  /**
@@ -133,6 +132,31 @@ export function reopenMarksAfterNode(
133
132
  return afterMarkdown
134
133
  }
135
134
 
135
+ /**
136
+ * Check if a markdown list item token is a task item and extract its state.
137
+ *
138
+ * @param item The list item token to check
139
+ * @returns Object containing isTask flag, checked state, and indentation level
140
+ *
141
+ * @example
142
+ * ```ts
143
+ * isTaskItem({ raw: '- [ ] Task' }) // { isTask: true, checked: false, indentLevel: 0 }
144
+ * isTaskItem({ raw: ' - [x] Done' }) // { isTask: true, checked: true, indentLevel: 2 }
145
+ * isTaskItem({ raw: '- Regular' }) // { isTask: false, indentLevel: 0 }
146
+ * ```
147
+ */
148
+ export function isTaskItem(item: MarkdownToken): { isTask: boolean; checked?: boolean; indentLevel: number } {
149
+ const raw = item.raw || item.text || ''
150
+
151
+ // Match patterns like "- [ ] " or " - [x] "
152
+ const match = raw.match(/^(\s*)[-+*]\s+\[([ xX])\]\s+/)
153
+
154
+ if (match) {
155
+ return { isTask: true, checked: match[2].toLowerCase() === 'x', indentLevel: match[1].length }
156
+ }
157
+ return { isTask: false, indentLevel: 0 }
158
+ }
159
+
136
160
  /**
137
161
  * Assumes the content type based off the content.
138
162
  * @param content The content to assume the type for.