@hcengineering/text-markdown 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/parser.ts ADDED
@@ -0,0 +1,853 @@
1
+ //
2
+ // Copyright © 2025 Hardcore Engineering Inc.
3
+ //
4
+ // Licensed under the Eclipse Public License, Version 2.0 (the "License");
5
+ // you may not use this file except in compliance with the License. You may
6
+ // obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
7
+ //
8
+ // Unless required by applicable law or agreed to in writing, software
9
+ // distributed under the License is distributed on an "AS IS" BASIS,
10
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ //
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+ //
15
+
16
+ import { Attrs, MarkupMark, MarkupMarkType, MarkupNode, MarkupNodeType } from '@hcengineering/text-core'
17
+ import { htmlToMarkup } from '@hcengineering/text-html'
18
+ import MarkdownIt, { type Token } from 'markdown-it'
19
+ import type { RuleCore } from 'markdown-it/lib/parser_core'
20
+ import type StateCore from 'markdown-it/lib/rules_core/state_core'
21
+
22
+ import { addToSet, removeFromSet, sameSet } from './marks'
23
+ import { nodeContent } from './node'
24
+
25
+ type SpecRule<T> = T | ((tok: Token, state: MarkdownParseState) => T)
26
+
27
+ function readSpec<T> (rule: SpecRule<T>, tok: Token, state: MarkdownParseState): T {
28
+ if (typeof rule === 'function') {
29
+ return (rule as (tok: Token, state: MarkdownParseState) => T)(tok, state)
30
+ }
31
+ return rule
32
+ }
33
+
34
+ interface ParsingBlockRule {
35
+ block: SpecRule<MarkupNodeType>
36
+ getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
37
+ wrapContent?: boolean
38
+ noCloseToken?: boolean
39
+ }
40
+
41
+ interface ParsingNodeRule {
42
+ node: MarkupNodeType
43
+ getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
44
+ }
45
+
46
+ interface ParsingMarkRule {
47
+ mark: MarkupMarkType
48
+ getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
49
+ noCloseToken?: boolean
50
+ }
51
+
52
+ interface ParsingSpecialRule {
53
+ type: (state: MarkdownParseState, tok: Token) => { type: MarkupMarkType | MarkupNodeType, node: boolean }
54
+ getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
55
+ }
56
+
57
+ // eslint-disable-next-line @typescript-eslint/no-empty-interface
58
+ interface ParsingIgnoreRule {
59
+ // empty
60
+ }
61
+
62
+ type HandlerRecord = (state: MarkdownParseState, tok: Token) => void
63
+ type HandlersRecord = Record<string, HandlerRecord>
64
+
65
+ // ****************************************************************
66
+ // Markdown parser
67
+ // ****************************************************************
68
+ function isText (a: MarkupNode, b: MarkupNode): boolean {
69
+ return (a.type === MarkupNodeType.text || a.type === MarkupNodeType.reference) && b.type === MarkupNodeType.text
70
+ }
71
+ function maybeMerge (a: MarkupNode, b: MarkupNode): MarkupNode | undefined {
72
+ if (isText(a, b) && (sameSet(a.marks, b.marks) || (a.text === '' && (a.marks?.length ?? 0) === 0))) {
73
+ if (a.text === '' && (a.marks?.length ?? 0) === 0) {
74
+ return { ...b }
75
+ }
76
+ return { ...a, text: (a.text ?? '') + (b.text ?? '') }
77
+ }
78
+ return undefined
79
+ }
80
+
81
+ interface StateElement {
82
+ type: MarkupNodeType
83
+ content: MarkupNode[]
84
+ attrs: Attrs
85
+ }
86
+
87
+ // Object used to track the context of a running parse.
88
+ class MarkdownParseState {
89
+ stack: StateElement[]
90
+ marks: MarkupMark[]
91
+ tokenHandlers: Record<string, (state: MarkdownParseState, tok: Token) => void>
92
+
93
+ constructor (
94
+ tokenHandlers: Record<string, (state: MarkdownParseState, tok: Token) => void>,
95
+ readonly refUrl: string,
96
+ readonly imageUrl: string
97
+ ) {
98
+ this.stack = [{ type: MarkupNodeType.doc, attrs: {}, content: [] }]
99
+ this.marks = []
100
+ this.tokenHandlers = tokenHandlers
101
+ }
102
+
103
+ top (): StateElement | undefined {
104
+ return this.stack[this.stack.length - 1]
105
+ }
106
+
107
+ push (elt: MarkupNode): void {
108
+ if (this.stack.length > 0) {
109
+ const tt = this.top()
110
+ tt?.content.push(elt)
111
+ }
112
+ }
113
+
114
+ mergeWithLast (nodes: MarkupNode[], node: MarkupNode): boolean {
115
+ const last = nodes[nodes.length - 1]
116
+ let merged: MarkupNode | undefined
117
+ if (last !== undefined && (merged = maybeMerge(last, node)) !== undefined) {
118
+ nodes[nodes.length - 1] = merged
119
+ return true
120
+ }
121
+ return false
122
+ }
123
+
124
+ // Adds the given text to the current position in the document,
125
+ // using the current marks as styling.
126
+ addText (text?: string): void {
127
+ const top = this.top()
128
+ if (text === undefined || top === undefined || text.length === 0) {
129
+ return
130
+ }
131
+
132
+ const node: MarkupNode = {
133
+ type: MarkupNodeType.text,
134
+ text
135
+ }
136
+ if (this.marks !== undefined) {
137
+ node.marks = this.marks
138
+ }
139
+
140
+ const nodes = top.content
141
+
142
+ if (!this.mergeWithLast(nodes, node)) {
143
+ nodes.push(node)
144
+ }
145
+ }
146
+
147
+ // Adds the given mark to the set of active marks.
148
+ openMark (mark: MarkupMark): void {
149
+ this.marks = addToSet(mark, this.marks)
150
+ }
151
+
152
+ // Removes the given mark from the set of active marks.
153
+ closeMark (mark: MarkupMarkType): void {
154
+ this.marks = removeFromSet(mark, this.marks)
155
+ }
156
+
157
+ parseTokens (toks: Token[] | null): void {
158
+ const _toks = [...(toks ?? [])]
159
+ while (_toks.length > 0) {
160
+ const tok = _toks.shift()
161
+ if (tok === undefined) {
162
+ break
163
+ }
164
+ // Check if we need to merge some content into
165
+ // Merge <sub> </sub> into one html token
166
+ if (tok.type === 'html_inline' && tok.content.trim() === '<sub>') {
167
+ while (_toks.length > 0) {
168
+ const _tok = _toks.shift()
169
+ if (_tok !== undefined) {
170
+ tok.content += _tok.content
171
+ if (_tok.type === 'html_inline' && _tok.content.trim() === '</sub>') {
172
+ break
173
+ }
174
+ }
175
+ }
176
+ }
177
+
178
+ const handler = this.tokenHandlers[tok.type]
179
+ if (handler === undefined) {
180
+ throw new Error(`Token type '${String(tok.type)} not supported by Markdown parser`)
181
+ }
182
+ handler(this, tok)
183
+ }
184
+ }
185
+
186
+ // Add a node at the current position.
187
+ addNode (type: MarkupNodeType, attrs: Attrs, content: MarkupNode[] = []): MarkupNode {
188
+ const node: MarkupNode = { type, content }
189
+
190
+ if (Object.keys(attrs ?? {}).length > 0) {
191
+ node.attrs = attrs
192
+ }
193
+ if (this.marks.length > 0) {
194
+ node.marks = this.marks
195
+ }
196
+ this.push(node)
197
+ return node
198
+ }
199
+
200
+ // Wrap subsequent content in a node of the given type.
201
+ openNode (type: MarkupNodeType, attrs: Attrs): void {
202
+ this.stack.push({ type, attrs, content: [] })
203
+ }
204
+
205
+ // Close and return the node that is currently on top of the stack.
206
+ closeNode (): MarkupNode {
207
+ if (this.marks.length > 0) this.marks = []
208
+ const info = this.stack.pop()
209
+ if (info !== undefined) {
210
+ return this.addNode(info.type, info.attrs, info.content)
211
+ }
212
+ return { type: MarkupNodeType.doc }
213
+ }
214
+ }
215
+
216
+ function attrs (
217
+ spec: ParsingBlockRule | ParsingMarkRule | ParsingNodeRule,
218
+ token: Token,
219
+ state: MarkdownParseState
220
+ ): Attrs {
221
+ return spec.getAttrs?.(token, state) ?? {}
222
+ }
223
+
224
+ // Code content is represented as a single token with a `content`
225
+ // property in Markdown-it.
226
+ function noCloseToken (spec: ParsingBlockRule | ParsingMarkRule, type: string): boolean {
227
+ return (spec.noCloseToken ?? false) || ['code_inline', 'code_block', 'fence'].indexOf(type) > 0
228
+ }
229
+
230
+ function withoutTrailingNewline (str: string): string {
231
+ return str[str.length - 1] === '\n' ? str.slice(0, str.length - 1) : str
232
+ }
233
+
234
+ function addSpecBlock (
235
+ handlers: HandlersRecord,
236
+ spec: ParsingBlockRule,
237
+ type: string,
238
+ specBlock: SpecRule<MarkupNodeType>
239
+ ): void {
240
+ if (noCloseToken(spec, type)) {
241
+ handlers[type] = newSimpleBlockHandler(specBlock, spec)
242
+ } else {
243
+ handlers[type + '_open'] = (state, tok) => {
244
+ state.openNode(readSpec(specBlock, tok, state), attrs(spec, tok, state))
245
+ if (spec.wrapContent === true) {
246
+ state.openNode(MarkupNodeType.paragraph, {})
247
+ }
248
+ }
249
+ handlers[type + '_close'] = (state) => {
250
+ if (spec.wrapContent === true) {
251
+ state.closeNode()
252
+ }
253
+ state.closeNode()
254
+ }
255
+ }
256
+ }
257
+ function newSimpleBlockHandler (specBlock: SpecRule<MarkupNodeType>, spec: ParsingBlockRule): HandlerRecord {
258
+ return (state, tok) => {
259
+ state.openNode(readSpec(specBlock, tok, state), attrs(spec, tok, state))
260
+ state.addText(withoutTrailingNewline(tok.content))
261
+ state.closeNode()
262
+ }
263
+ }
264
+
265
+ function addSpecMark (handlers: HandlersRecord, spec: ParsingMarkRule, type: string, specMark: MarkupMarkType): void {
266
+ if (noCloseToken(spec, type)) {
267
+ handlers[type] = newSimpleMarkHandler(spec, specMark)
268
+ } else {
269
+ handlers[type + '_open'] = (state, tok) => {
270
+ state.openMark({ type: specMark, attrs: attrs(spec, tok, state) })
271
+ }
272
+ handlers[type + '_close'] = (state) => {
273
+ state.closeMark(specMark)
274
+ }
275
+ }
276
+ }
277
+ function addSpecialRule (handlers: HandlersRecord, spec: ParsingSpecialRule, type: string): void {
278
+ handlers[type + '_open'] = (state, tok) => {
279
+ const type = spec.type(state, tok)
280
+ if (type.node) {
281
+ state.openNode(type.type as MarkupNodeType, spec.getAttrs?.(tok, state) ?? {})
282
+ } else {
283
+ state.openMark({ type: type.type as MarkupMarkType, attrs: spec.getAttrs?.(tok, state) ?? {} })
284
+ }
285
+ }
286
+ handlers[type + '_close'] = (state, tok) => {
287
+ const type = spec.type(state, tok)
288
+ if (type.node) {
289
+ state.closeNode()
290
+ } else {
291
+ state.closeMark(type.type as MarkupMarkType)
292
+ }
293
+ }
294
+ }
295
+ function addIgnoreRule (handlers: HandlersRecord, spec: ParsingIgnoreRule, type: string): void {
296
+ handlers[type + '_open'] = (state, tok) => {}
297
+ handlers[type + '_close'] = (state, tok) => {}
298
+ }
299
+ function newSimpleMarkHandler (spec: ParsingMarkRule, specMark: MarkupMarkType): HandlerRecord {
300
+ return (state: MarkdownParseState, tok: Token): void => {
301
+ state.openMark({ attrs: attrs(spec, tok, state), type: specMark })
302
+ state.addText(withoutTrailingNewline(tok.content))
303
+ state.closeMark(specMark)
304
+ }
305
+ }
306
+
307
+ function tokenHandlers (
308
+ tokensBlock: Record<string, ParsingBlockRule>,
309
+ tokensNode: Record<string, ParsingNodeRule>,
310
+ tokensMark: Record<string, ParsingMarkRule>,
311
+ specialRules: Record<string, ParsingSpecialRule>,
312
+ ignoreRules: Record<string, ParsingIgnoreRule>,
313
+ htmlParser: HtmlParser
314
+ ): HandlersRecord {
315
+ const handlers: HandlersRecord = {}
316
+
317
+ Object.entries(tokensBlock).forEach(([type, spec]) => {
318
+ addSpecBlock(handlers, spec, type, spec.block)
319
+ })
320
+ Object.entries(tokensNode).forEach(([type, spec]) => {
321
+ addSpecNode(handlers, type, spec)
322
+ })
323
+ Object.entries(tokensMark).forEach(([type, spec]) => {
324
+ addSpecMark(handlers, spec, type, spec.mark)
325
+ })
326
+ Object.entries(specialRules).forEach(([type, spec]) => {
327
+ addSpecialRule(handlers, spec, type)
328
+ })
329
+ Object.entries(ignoreRules).forEach(([type, spec]) => {
330
+ addIgnoreRule(handlers, spec, type)
331
+ })
332
+
333
+ handlers.html_inline = (state: MarkdownParseState, tok: Token) => {
334
+ try {
335
+ const top = state.top()
336
+ if (tok.content.trim() === '</a>' && top?.type === MarkupNodeType.embed) {
337
+ top.content = []
338
+ state.closeNode()
339
+ return
340
+ }
341
+ const markup = htmlParser(tok.content)
342
+ if (markup.content !== undefined) {
343
+ // unwrap content from wrapping paragraph
344
+ const shouldUnwrap =
345
+ markup.content.length === 1 &&
346
+ markup.content[0].type === MarkupNodeType.paragraph &&
347
+ top?.type === MarkupNodeType.paragraph
348
+
349
+ const content = nodeContent(shouldUnwrap ? markup.content[0] : markup)
350
+ for (const c of content) {
351
+ if (c.type === MarkupNodeType.embed) {
352
+ state.openNode(MarkupNodeType.embed, c.attrs ?? {})
353
+ continue
354
+ }
355
+ state.push(c)
356
+ }
357
+ }
358
+ } catch (err: any) {
359
+ console.error(err)
360
+ state.addText(tok.content)
361
+ }
362
+ }
363
+ handlers.html_block = (state: MarkdownParseState, tok: Token) => {
364
+ try {
365
+ const model = htmlParser(tok.content)
366
+ const content = nodeContent(model)
367
+ for (const c of content) {
368
+ state.push(c)
369
+ }
370
+ } catch (err: any) {
371
+ console.error(err)
372
+ state.addText(tok.content)
373
+ }
374
+ }
375
+
376
+ addTextHandlers(handlers)
377
+
378
+ return handlers
379
+ }
380
+
381
+ function addTextHandlers (handlers: HandlersRecord): void {
382
+ handlers.text = (state, tok) => {
383
+ state.addText(tok.content)
384
+ }
385
+ handlers.inline = (state, tok) => {
386
+ state.parseTokens(tok.children)
387
+ }
388
+ handlers.softbreak = (state) => {
389
+ state.addText('\n')
390
+ }
391
+ }
392
+
393
+ function addSpecNode (handlers: HandlersRecord, type: string, spec: ParsingNodeRule): void {
394
+ handlers[type] = (state: MarkdownParseState, tok: Token) => state.addNode(spec.node, attrs(spec, tok, state))
395
+ }
396
+
397
+ function tokAttrGet (token: Token, name: string): string | undefined {
398
+ const attr = token.attrGet(name)
399
+ if (attr != null) {
400
+ return attr
401
+ }
402
+ // try iterate attrs
403
+ for (const [k, v] of token.attrs ?? []) {
404
+ if (k === name) {
405
+ return v
406
+ }
407
+ }
408
+ }
409
+
410
+ function tokToAttrs (token: Token, ...names: string[]): Record<string, string> {
411
+ const result: Record<string, string> = {}
412
+ for (const name of names) {
413
+ const attr = token.attrGet(name)
414
+ if (attr !== null) {
415
+ result[name] = attr
416
+ }
417
+ }
418
+ return result
419
+ }
420
+
421
+ function todoItemMetaAttrsGet (tok: Token): Record<string, string> {
422
+ const userid = tokAttrGet(tok, 'userid')
423
+ const todoid = tokAttrGet(tok, 'todoid')
424
+
425
+ const result: Record<string, string> = {}
426
+
427
+ if (userid !== undefined) {
428
+ result.userid = userid
429
+ }
430
+ if (todoid !== undefined) {
431
+ result.todoid = todoid
432
+ }
433
+
434
+ return result
435
+ }
436
+
437
+ // ::- A configuration of a Markdown parser. Such a parser uses
438
+ const tokensBlock: Record<string, ParsingBlockRule> = {
439
+ blockquote: { block: MarkupNodeType.blockquote },
440
+ paragraph: { block: MarkupNodeType.paragraph },
441
+ list_item: { block: MarkupNodeType.list_item },
442
+ task_item: { block: MarkupNodeType.taskItem, getAttrs: (tok) => ({ 'data-type': 'taskItem' }) },
443
+ bullet_list: {
444
+ block: MarkupNodeType.bullet_list,
445
+ getAttrs: (tok) => ({
446
+ bullet: tok.markup
447
+ })
448
+ },
449
+ todo_list: {
450
+ block: MarkupNodeType.todoList,
451
+ getAttrs: (tok) => ({
452
+ bullet: tok.markup
453
+ })
454
+ },
455
+ todo_item: {
456
+ block: MarkupNodeType.todoItem,
457
+ getAttrs: (tok) => ({
458
+ checked: tokAttrGet(tok, 'checked') === 'true',
459
+ ...todoItemMetaAttrsGet(tok)
460
+ })
461
+ },
462
+ ordered_list: {
463
+ block: MarkupNodeType.ordered_list,
464
+ getAttrs: (tok: Token) => ({ order: tokAttrGet(tok, 'start') ?? '1' })
465
+ },
466
+ task_list: {
467
+ block: MarkupNodeType.taskList,
468
+ getAttrs: (tok: Token) => ({ order: tokAttrGet(tok, 'start') ?? '1', 'data-type': 'taskList' })
469
+ },
470
+ heading: {
471
+ block: MarkupNodeType.heading,
472
+ getAttrs: (tok: Token) => ({ level: Number(tok.tag.slice(1)), marker: tok.markup })
473
+ },
474
+ code_block: {
475
+ block: (tok) => {
476
+ if (tok.info === 'mermaid') {
477
+ return MarkupNodeType.mermaid
478
+ }
479
+ return MarkupNodeType.code_block
480
+ },
481
+ getAttrs: (tok: Token) => {
482
+ return { language: tok.info ?? '' }
483
+ },
484
+ noCloseToken: true
485
+ },
486
+ fence: {
487
+ block: (tok) => {
488
+ if (tok.info === 'mermaid') {
489
+ return MarkupNodeType.mermaid
490
+ }
491
+ return MarkupNodeType.code_block
492
+ },
493
+ getAttrs: (tok: Token) => {
494
+ return { language: tok.info ?? '' }
495
+ },
496
+ noCloseToken: true
497
+ },
498
+ sub: {
499
+ block: MarkupNodeType.subLink,
500
+ getAttrs: (tok: Token) => {
501
+ return { language: tok.info ?? '' }
502
+ },
503
+ noCloseToken: false
504
+ },
505
+ table: {
506
+ block: MarkupNodeType.table,
507
+ noCloseToken: false
508
+ },
509
+ th: {
510
+ block: MarkupNodeType.table_header,
511
+ getAttrs: (tok: Token) => {
512
+ return {
513
+ colspan: Number(tok.attrGet('colspan') ?? '1'),
514
+ rowspan: Number(tok.attrGet('rowspan') ?? '1')
515
+ }
516
+ },
517
+ wrapContent: true,
518
+ noCloseToken: false
519
+ },
520
+ tr: {
521
+ block: MarkupNodeType.table_row,
522
+ noCloseToken: false
523
+ },
524
+ td: {
525
+ block: MarkupNodeType.table_cell,
526
+ getAttrs: (tok: Token) => {
527
+ return {
528
+ colspan: Number(tok.attrGet('colspan') ?? '1'),
529
+ rowspan: Number(tok.attrGet('rowspan') ?? '1')
530
+ }
531
+ },
532
+ wrapContent: true,
533
+ noCloseToken: false
534
+ }
535
+ }
536
+ const tokensNode: Record<string, ParsingNodeRule> = {
537
+ hr: { node: MarkupNodeType.horizontal_rule },
538
+ image: {
539
+ node: MarkupNodeType.image,
540
+ getAttrs: (tok: Token, state) => {
541
+ const result = tokToAttrs(tok, 'src', 'title', 'alt', 'data')
542
+ result.alt = convertStringLikeToken(tok, result.alt)
543
+ if (result.src.startsWith(state.imageUrl)) {
544
+ const url = new URL(result.src)
545
+ result['data-type'] = 'image'
546
+ const file = url.searchParams.get('file')
547
+ if (file != null) {
548
+ result['file-id'] = file
549
+ }
550
+
551
+ const width = url.searchParams.get('width')
552
+ if (width != null) {
553
+ result.width = width
554
+ }
555
+
556
+ const height = url.searchParams.get('height')
557
+ if (height != null) {
558
+ result.height = height
559
+ }
560
+ }
561
+ return result
562
+ }
563
+ },
564
+ hardbreak: { node: MarkupNodeType.hard_break }
565
+ }
566
+ const tokensMark: Record<string, ParsingMarkRule> = {
567
+ em: {
568
+ mark: MarkupMarkType.em,
569
+ getAttrs: (tok: Token, state: MarkdownParseState) => {
570
+ return { marker: tok.markup }
571
+ }
572
+ },
573
+ bold: {
574
+ mark: MarkupMarkType.bold,
575
+ getAttrs: (tok: Token, state: MarkdownParseState) => {
576
+ return { marker: tok.markup }
577
+ }
578
+ },
579
+ strong: {
580
+ mark: MarkupMarkType.bold,
581
+ getAttrs: (tok: Token, state: MarkdownParseState) => {
582
+ return { marker: tok.markup }
583
+ }
584
+ },
585
+ s: { mark: MarkupMarkType.strike },
586
+ u: { mark: MarkupMarkType.underline },
587
+ code_inline: {
588
+ mark: MarkupMarkType.code,
589
+ noCloseToken: true
590
+ }
591
+ }
592
+
593
+ const specialRule: Record<string, ParsingSpecialRule> = {
594
+ link: {
595
+ type: (state, tok) => {
596
+ const href = tok.attrGet('href')
597
+ if ((href?.startsWith(state.refUrl) ?? false) || state.stack[state.stack.length - 1]?.type === 'reference') {
598
+ return { type: MarkupNodeType.reference, node: true }
599
+ }
600
+ return { type: MarkupMarkType.link, node: false, close: true }
601
+ },
602
+ getAttrs: (tok: Token, state) => {
603
+ const attrs = tokToAttrs(tok, 'href', 'title')
604
+ if (attrs.href !== undefined) {
605
+ try {
606
+ const url = new URL(attrs.href)
607
+ if (attrs.href.startsWith(state.refUrl) ?? false) {
608
+ return {
609
+ label: url.searchParams?.get('label') ?? '',
610
+ id: url.searchParams?.get('_id') ?? '',
611
+ objectclass: url.searchParams?.get('_class') ?? ''
612
+ }
613
+ }
614
+ } catch (err: any) {
615
+ // ignore
616
+ }
617
+ }
618
+ return attrs
619
+ }
620
+ }
621
+ }
622
+
623
+ const ignoreRule: Record<string, ParsingIgnoreRule> = {
624
+ thead: {},
625
+ tbody: {}
626
+ }
627
+
628
+ export const isInlineToken = (token?: Token): boolean => token?.type === 'inline'
629
+
630
+ export const isParagraphToken = (token?: Token): boolean => token?.type === 'paragraph_open'
631
+
632
+ export const isListItemToken = (token?: Token): boolean => token?.type === 'list_item_open'
633
+
634
+ export interface TaskListEnv {
635
+ tasklists: number
636
+ }
637
+
638
+ interface TaskListStateCore extends StateCore {
639
+ env: TaskListEnv
640
+ }
641
+
642
+ // The leading whitespace in a list item (token.content) is already trimmed off by markdown-it.
643
+ // The regex below checks for '[ ] ' or '[x] ' or '[X] ' at the start of the string token.content,
644
+ // where the space is either a normal space or a non-breaking space (character 160 = \u00A0).
645
+ const startsWithTodoMarkdown = (token: Token): boolean => /^\[[xX \u00A0]\][ \u00A0]/.test(token.content)
646
+ const isCheckedTodoItem = (token: Token): boolean => /^\[[xX]\][ \u00A0]/.test(token.content)
647
+
648
+ export type HtmlParser = (html: string) => MarkupNode
649
+
650
+ export interface MarkdownParserOptions {
651
+ refUrl: string
652
+ imageUrl: string
653
+ htmlParser?: HtmlParser
654
+ }
655
+
656
+ export class MarkdownParser {
657
+ tokenizer: MarkdownIt
658
+ tokenHandlers: Record<string, (state: MarkdownParseState, tok: Token) => void>
659
+ htmlParser: HtmlParser
660
+
661
+ constructor (private readonly options: MarkdownParserOptions) {
662
+ this.tokenizer = MarkdownIt('default', {
663
+ html: true
664
+ })
665
+ this.tokenizer.core.ruler.after('inline', 'task_list', this.listRule)
666
+ this.tokenizer.core.ruler.after('inline', 'html_comment', this.htmlCommentRule)
667
+
668
+ this.htmlParser = options.htmlParser ?? htmlToMarkup
669
+ this.tokenHandlers = tokenHandlers(tokensBlock, tokensNode, tokensMark, specialRule, ignoreRule, this.htmlParser)
670
+ }
671
+
672
+ parse (text: string): MarkupNode {
673
+ const state = new MarkdownParseState(this.tokenHandlers, this.options.refUrl, this.options.imageUrl)
674
+ let doc: MarkupNode
675
+
676
+ const tokens = this.tokenizer.parse(text, {})
677
+
678
+ state.parseTokens(tokens)
679
+ do {
680
+ doc = state.closeNode()
681
+ } while (state.stack.length > 0)
682
+ return doc
683
+ }
684
+
685
+ htmlCommentRule: RuleCore = (state: StateCore): boolean => {
686
+ const tokens = state.tokens
687
+ for (let i = 0; i < tokens.length; i++) {
688
+ // Prosemirror entirely ignores comments when parsing, so
689
+ // here we replaces html comment tag with a custom tag so the comments got parsed as a node
690
+ if (tokens[i].type === 'html_block' || tokens[i].type === 'html_inline') {
691
+ const content = tokens[i].content.replaceAll('<!--', '<comment>').replaceAll('-->', '</comment>')
692
+ tokens[i].content = content
693
+ }
694
+ }
695
+ return true
696
+ }
697
+
698
+ listRule: RuleCore = (state: TaskListStateCore): boolean => {
699
+ const tokens = state.tokens
700
+ const states: Array<{ closeIdx: number, lastItemIdx: number }> = []
701
+
702
+ // step #1 - convert list items to todo items
703
+ for (let open = 0; open < tokens.length; open++) {
704
+ if (isTodoListItem(tokens, open)) {
705
+ convertTodoItem(tokens, open)
706
+ }
707
+ }
708
+
709
+ // step #2 - convert lists to proper type
710
+ // listCloseIdx and itemCloseIdx tracks position of the list and item close tokens
711
+ // because we insert items into the list, the variables keep the position from the
712
+ // end of the list so we don't have to count inserts
713
+ let listCloseIdx = -1
714
+ let itemCloseIdx = -1
715
+
716
+ for (let i = tokens.length - 1; i >= 0; i--) {
717
+ if (tokens[i].type === 'bullet_list_close') {
718
+ states.push({ closeIdx: listCloseIdx, lastItemIdx: itemCloseIdx })
719
+ listCloseIdx = tokens.length - i
720
+ itemCloseIdx = -1
721
+ } else if (tokens[i].type === 'list_item_close' || tokens[i].type === 'todo_item_close') {
722
+ // when found item close token of different type, split the list
723
+ if (itemCloseIdx === -1) {
724
+ itemCloseIdx = tokens.length - i
725
+ } else if (tokens[i].type !== tokens[tokens.length - itemCloseIdx].type) {
726
+ const bulletListOpen = new state.Token('bullet_list_open', 'ul', 1)
727
+ bulletListOpen.markup = tokens[i + 1].markup
728
+ tokens.splice(i + 1, 0, bulletListOpen)
729
+ tokens.splice(i + 1, 0, new state.Token('bullet_list_close', 'ul', -1))
730
+ convertTodoList(tokens, i + 2, tokens.length - listCloseIdx, tokens.length - itemCloseIdx)
731
+ listCloseIdx = tokens.length - i - 1
732
+ itemCloseIdx = tokens.length - i
733
+ }
734
+ } else if (tokens[i].type === 'bullet_list_open') {
735
+ if (itemCloseIdx !== -1) {
736
+ convertTodoList(tokens, i, tokens.length - listCloseIdx, tokens.length - itemCloseIdx)
737
+ }
738
+
739
+ const prevState = states.pop() ?? { closeIdx: -1, lastItemIdx: -1 }
740
+ listCloseIdx = prevState.closeIdx
741
+ itemCloseIdx = prevState.lastItemIdx
742
+ }
743
+ }
744
+
745
+ return true
746
+ }
747
+ }
748
+
749
+ function convertTodoList (tokens: Token[], open: number, close: number, item: number): void {
750
+ if (tokens[open].type !== 'bullet_list_open') {
751
+ throw new Error('bullet_list_open token expected')
752
+ }
753
+ if (tokens[close].type !== 'bullet_list_close') {
754
+ throw new Error('bullet_list_close token expected')
755
+ }
756
+
757
+ if (tokens[item].type === 'todo_item_close') {
758
+ tokens[open].type = 'todo_list_open'
759
+ tokens[close].type = 'todo_list_close'
760
+ }
761
+ }
762
+
763
+ function convertTodoItem (tokens: Token[], open: number): boolean {
764
+ const close = findListItemCloseToken(tokens, open)
765
+ if (close !== -1) {
766
+ tokens[open].type = 'todo_item_open'
767
+ tokens[close].type = 'todo_item_close'
768
+
769
+ const inline = tokens[open + 2]
770
+
771
+ if (tokens[open].attrs == null) {
772
+ tokens[open].attrs = []
773
+ }
774
+
775
+ ;(tokens[open].attrs as any).push(['checked', isCheckedTodoItem(inline) ? 'true' : 'false'])
776
+
777
+ if (inline.children !== null) {
778
+ const newContent = inline.children[0].content.slice(4)
779
+ if (newContent.length > 0) {
780
+ inline.children[0].content = newContent
781
+ } else {
782
+ inline.children = inline.children.slice(1)
783
+ }
784
+
785
+ const metaTok = inline.children.find(
786
+ (tok) => tok.type === 'html_inline' && tok.content.startsWith('<!--') && tok.content.endsWith('-->')
787
+ )
788
+ if (metaTok !== undefined) {
789
+ const metaValues = metaTok.content.slice(5, -4).split(',')
790
+ for (const mv of metaValues) {
791
+ if (mv.startsWith('todoid')) {
792
+ ;(tokens[open].attrs as any).push(['todoid', mv.slice(7)])
793
+ }
794
+ if (mv.startsWith('userid')) {
795
+ ;(tokens[open].attrs as any).push(['userid', mv.slice(7)])
796
+ }
797
+ }
798
+ }
799
+ }
800
+
801
+ return true
802
+ }
803
+
804
+ return false
805
+ }
806
+
807
+ function findListItemCloseToken (tokens: Token[], open: number): number {
808
+ if (tokens[open].type !== 'list_item_open') {
809
+ throw new Error('list_item_open token expected')
810
+ }
811
+
812
+ const level = tokens[open].level
813
+ for (let close = open + 1; close < tokens.length; close++) {
814
+ if (tokens[close].type === 'list_item_close' && tokens[close].level === level) {
815
+ return close
816
+ }
817
+ }
818
+
819
+ return -1
820
+ }
821
+
822
+ // todo token structure
823
+ // tokens[i].type === list_item_open
824
+ // tokens[i + 1].type === paragraph
825
+ // tokens[i + 2].type === inline
826
+ function isTodoListItem (tokens: Token[], pos: number): boolean {
827
+ return (
828
+ isListItemToken(tokens[pos]) &&
829
+ isParagraphToken(tokens[pos + 1]) &&
830
+ isInlineToken(tokens[pos + 2]) &&
831
+ startsWithTodoMarkdown(tokens[pos + 2])
832
+ )
833
+ }
834
+
835
+ function convertStringLikeToken (tok: Token, attrValue?: string): string {
836
+ if (typeof attrValue === 'string' && attrValue !== '') {
837
+ return attrValue
838
+ }
839
+ const children = tok.children ?? []
840
+ let out = ''
841
+ for (const child of children) {
842
+ switch (child.type) {
843
+ case 'text':
844
+ out += child.content
845
+ break
846
+ case 'hardbreak':
847
+ out += '\n'
848
+ break
849
+ }
850
+ }
851
+
852
+ return out
853
+ }