@tiptap/markdown 3.20.6 → 3.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,17 +3,21 @@ import {
3
3
  type ExtendableConfig,
4
4
  type JSONContent,
5
5
  type MarkdownExtensionSpec,
6
+ type MarkdownLexerConfiguration,
6
7
  type MarkdownParseHelpers,
7
8
  type MarkdownParseResult,
8
9
  type MarkdownRendererHelpers,
9
10
  type MarkdownToken,
10
11
  type MarkdownTokenizer,
11
12
  type RenderContext,
13
+ callOrReturn,
14
+ decodeHtmlEntities,
15
+ encodeHtmlEntities,
12
16
  flattenExtensions,
13
17
  generateJSON,
14
18
  getExtensionField,
15
19
  } from '@tiptap/core'
16
- import { type Lexer, type Token, type TokenizerExtension, marked } from 'marked'
20
+ import { type Lexer, type Token, type TokenizerExtension, type TokenizerThis, marked } from 'marked'
17
21
 
18
22
  import {
19
23
  closeMarksBeforeNode,
@@ -27,13 +31,15 @@ import {
27
31
 
28
32
  export class MarkdownManager {
29
33
  private markedInstance: typeof marked
30
- private lexer: Lexer
34
+ private activeParseLexer: Lexer | null = null
31
35
  private registry: Map<string, MarkdownExtensionSpec[]>
32
36
  private nodeTypeRegistry: Map<string, MarkdownExtensionSpec[]>
33
37
  private indentStyle: 'space' | 'tab'
34
38
  private indentSize: number
35
39
  private baseExtensions: AnyExtension[] = []
36
40
  private extensions: AnyExtension[] = []
41
+ /** Set of extension names whose `code` spec property is truthy (nodes and marks). */
42
+ private codeTypes: Set<string> = new Set()
37
43
 
38
44
  /**
39
45
  * Create a MarkdownManager.
@@ -49,7 +55,6 @@ export class MarkdownManager {
49
55
  extensions: AnyExtension[]
50
56
  }) {
51
57
  this.markedInstance = options?.marked ?? marked
52
- this.lexer = new this.markedInstance.Lexer()
53
58
  this.indentStyle = options?.indentation?.style ?? 'space'
54
59
  this.indentSize = options?.indentation?.size ?? 2
55
60
  this.baseExtensions = options?.extensions || []
@@ -65,9 +70,8 @@ export class MarkdownManager {
65
70
  if (options?.extensions) {
66
71
  this.baseExtensions = options.extensions
67
72
  const flattened = flattenExtensions(options.extensions)
68
- flattened.forEach(ext => this.registerExtension(ext, false))
73
+ flattened.forEach(ext => this.registerExtension(ext))
69
74
  }
70
- this.lexer = new this.markedInstance.Lexer() // Reset lexer to include all tokenizers
71
75
  }
72
76
 
73
77
  /** Returns the underlying marked instance. */
@@ -95,11 +99,19 @@ export class MarkdownManager {
95
99
  * `markdownName`, `parseMarkdown`, `renderMarkdown` and `priority` from the
96
100
  * extension config (using the same resolution used across the codebase).
97
101
  */
98
- registerExtension(extension: AnyExtension, recreateLexer: boolean = true): void {
102
+ registerExtension(extension: AnyExtension): void {
99
103
  // Keep track of all extensions for HTML parsing
100
104
  this.extensions.push(extension)
101
105
 
106
+ // Track extensions that declare `code: true` so we can skip HTML entity
107
+ // encoding inside code contexts without hardcoding specific type names.
108
+ const isCode = callOrReturn(getExtensionField(extension, 'code'))
109
+
102
110
  const name = extension.name
111
+
112
+ if (isCode) {
113
+ this.codeTypes.add(name)
114
+ }
103
115
  const tokenName =
104
116
  (getExtensionField(extension, 'markdownTokenName') as ExtendableConfig['markdownTokenName']) || name
105
117
  const parseMarkdown = getExtensionField(extension, 'parseMarkdown') as ExtendableConfig['parseMarkdown'] | undefined
@@ -143,13 +155,24 @@ export class MarkdownManager {
143
155
  // Register custom tokenizer with marked.js
144
156
  if (tokenizer && this.hasMarked()) {
145
157
  this.registerTokenizer(tokenizer)
158
+ }
159
+ }
146
160
 
147
- if (recreateLexer) {
148
- this.lexer = new this.markedInstance.Lexer() // Reset lexer to include new tokenizer
149
- }
161
+ private createLexer(): Lexer {
162
+ return new this.markedInstance.Lexer()
163
+ }
164
+
165
+ private createTokenizerHelpers(lexer: Lexer): MarkdownLexerConfiguration {
166
+ return {
167
+ inlineTokens: (src: string) => lexer.inlineTokens(src),
168
+ blockTokens: (src: string) => lexer.blockTokens(src),
150
169
  }
151
170
  }
152
171
 
172
+ private tokenizeInline(src: string): MarkdownToken[] {
173
+ return (this.activeParseLexer ?? this.createLexer()).inlineTokens(src) as MarkdownToken[]
174
+ }
175
+
153
176
  /**
154
177
  * Register a custom tokenizer with marked.js for parsing non-standard markdown syntax.
155
178
  */
@@ -159,27 +182,15 @@ export class MarkdownManager {
159
182
  }
160
183
 
161
184
  const { name, start, level = 'inline', tokenize } = tokenizer
162
-
163
- // Helper functions that use a fresh lexer instance with all registered extensions
164
- const tokenizeInline = (src: string) => {
165
- return this.lexer.inlineTokens(src)
166
- }
167
-
168
- const tokenizeBlock = (src: string) => {
169
- return this.lexer.blockTokens(src)
170
- }
171
-
172
- const helper = {
173
- inlineTokens: tokenizeInline,
174
- blockTokens: tokenizeBlock,
175
- }
185
+ const createTokenizerHelpers = this.createTokenizerHelpers.bind(this)
186
+ const createLexer = this.createLexer.bind(this)
176
187
 
177
188
  let startCb: (src: string) => number
178
189
 
179
190
  if (!start) {
180
191
  startCb = (src: string) => {
181
192
  // For other tokenizers, try to find a match and return its position
182
- const result = tokenize(src, [], helper)
193
+ const result = tokenize(src, [], this.createTokenizerHelpers(this.createLexer()))
183
194
  if (result && result.raw) {
184
195
  const index = src.indexOf(result.raw)
185
196
  return index
@@ -195,7 +206,8 @@ export class MarkdownManager {
195
206
  name,
196
207
  level,
197
208
  start: startCb,
198
- tokenizer: (src, tokens) => {
209
+ tokenizer(this: TokenizerThis, src, tokens) {
210
+ const helper = this.lexer ? createTokenizerHelpers(this.lexer) : createTokenizerHelpers(createLexer())
199
211
  const result = tokenize(src, tokens, helper)
200
212
 
201
213
  if (result && result.type) {
@@ -289,16 +301,26 @@ export class MarkdownManager {
289
301
  throw new Error('No marked instance available for parsing')
290
302
  }
291
303
 
292
- // Use marked to tokenize the markdown
293
- const tokens = this.markedInstance.lexer(markdown)
304
+ const previousParseLexer = this.activeParseLexer
305
+ const parseLexer = this.createLexer()
294
306
 
295
- // Convert tokens to Tiptap JSON
296
- const content = this.parseTokens(tokens, true)
307
+ this.activeParseLexer = parseLexer
297
308
 
298
- // Return a document node containing the parsed content
299
- return {
300
- type: 'doc',
301
- content,
309
+ try {
310
+ // Use a parse-scoped lexer so follow-up inline tokenization can reuse
311
+ // the same configured lexer state without sharing it across parses.
312
+ const tokens = parseLexer.lex(markdown) as MarkdownToken[]
313
+
314
+ // Convert tokens to Tiptap JSON
315
+ const content = this.parseTokens(tokens, true)
316
+
317
+ // Return a document node containing the parsed content
318
+ return {
319
+ type: 'doc',
320
+ content,
321
+ }
322
+ } finally {
323
+ this.activeParseLexer = previousParseLexer
302
324
  }
303
325
  }
304
326
 
@@ -491,7 +513,7 @@ export class MarkdownManager {
491
513
  indentLevel,
492
514
  checked: checked ?? false,
493
515
  text: mainContent,
494
- tokens: this.lexer.inlineTokens(mainContent),
516
+ tokens: this.tokenizeInline(mainContent),
495
517
  nestedTokens,
496
518
  }
497
519
  }
@@ -631,9 +653,10 @@ export class MarkdownManager {
631
653
  const token = tokens[i]
632
654
 
633
655
  if (token.type === 'text') {
656
+ // Create text node – decode HTML entities so that e.g. `&lt;` displays as `<` in the editor
634
657
  result.push({
635
658
  type: 'text',
636
- text: token.text || '',
659
+ text: decodeHtmlEntities(token.text || ''),
637
660
  })
638
661
  } else if (token.type === 'html') {
639
662
  // Handle possible split inline HTML by attempting to detect an
@@ -797,7 +820,7 @@ export class MarkdownManager {
797
820
  case 'text':
798
821
  return {
799
822
  type: 'text',
800
- text: token.text || '',
823
+ text: decodeHtmlEntities(token.text || ''),
801
824
  }
802
825
 
803
826
  case 'html':
@@ -875,6 +898,18 @@ export class MarkdownManager {
875
898
  }
876
899
  }
877
900
 
901
+ /**
902
+ * Encode HTML entities in text unless the node is inside a code context
903
+ * (code mark or code-block parent) where literal characters should be preserved.
904
+ */
905
+ private encodeTextForMarkdown(text: string, node: JSONContent, parentNode?: JSONContent): string {
906
+ const isInsideCode =
907
+ (parentNode?.type != null && this.codeTypes.has(parentNode.type)) ||
908
+ (node.marks || []).some(m => this.codeTypes.has(typeof m === 'string' ? m : m.type))
909
+
910
+ return isInsideCode ? text : encodeHtmlEntities(text)
911
+ }
912
+
878
913
  renderNodeToMarkdown(
879
914
  node: JSONContent,
880
915
  parentNode?: JSONContent,
@@ -885,7 +920,7 @@ export class MarkdownManager {
885
920
  // if node is a text node, we simply return it's text content
886
921
  // marks are handled at the array level in renderNodesWithMarkBoundaries
887
922
  if (node.type === 'text') {
888
- return node.text || ''
923
+ return this.encodeTextForMarkdown(node.text || '', node, parentNode)
889
924
  }
890
925
 
891
926
  if (!node.type) {
@@ -982,7 +1017,7 @@ export class MarkdownManager {
982
1017
  }
983
1018
 
984
1019
  if (node.type === 'text') {
985
- let textContent = node.text || ''
1020
+ let textContent = this.encodeTextForMarkdown(node.text || '', node, parentNode)
986
1021
  const currentMarks = new Map((node.marks || []).map(mark => [mark.type, mark]))
987
1022
 
988
1023
  // Find marks that need to be closed and opened