@hcengineering/text-markdown 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/__tests__/markdown.test.js +1044 -0
- package/lib/__tests__/markdown.test.js.map +7 -0
- package/lib/compare.js +100 -0
- package/lib/compare.js.map +7 -0
- package/lib/index.js +47 -0
- package/lib/index.js.map +7 -0
- package/lib/marks.js +59 -0
- package/lib/marks.js.map +7 -0
- package/lib/node.js +34 -0
- package/lib/node.js.map +7 -0
- package/lib/parser.js +724 -0
- package/lib/parser.js.map +7 -0
- package/lib/serializer.js +614 -0
- package/lib/serializer.js.map +7 -0
- package/package.json +59 -0
- package/src/__tests__/markdown.test.ts +1076 -0
- package/src/compare.ts +119 -0
- package/src/index.ts +47 -0
- package/src/marks.ts +46 -0
- package/src/node.ts +24 -0
- package/src/parser.ts +853 -0
- package/src/serializer.ts +833 -0
- package/tsconfig.json +12 -0
- package/types/__tests__/markdown.test.d.ts +9 -0
- package/types/__tests__/markdown.test.d.ts.map +1 -0
- package/types/compare.d.ts +10 -0
- package/types/compare.d.ts.map +1 -0
- package/types/index.d.ts +14 -0
- package/types/index.d.ts.map +1 -0
- package/types/marks.d.ts +8 -0
- package/types/marks.d.ts.map +1 -0
- package/types/node.d.ts +4 -0
- package/types/node.d.ts.map +1 -0
- package/types/parser.d.ts +50 -0
- package/types/parser.d.ts.map +1 -0
- package/types/serializer.d.ts +102 -0
- package/types/serializer.d.ts.map +1 -0
package/src/parser.ts
ADDED
|
@@ -0,0 +1,853 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright © 2025 Hardcore Engineering Inc.
|
|
3
|
+
//
|
|
4
|
+
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
|
|
5
|
+
// you may not use this file except in compliance with the License. You may
|
|
6
|
+
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
|
|
7
|
+
//
|
|
8
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
//
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
//
|
|
15
|
+
|
|
16
|
+
import { Attrs, MarkupMark, MarkupMarkType, MarkupNode, MarkupNodeType } from '@hcengineering/text-core'
|
|
17
|
+
import { htmlToMarkup } from '@hcengineering/text-html'
|
|
18
|
+
import MarkdownIt, { type Token } from 'markdown-it'
|
|
19
|
+
import type { RuleCore } from 'markdown-it/lib/parser_core'
|
|
20
|
+
import type StateCore from 'markdown-it/lib/rules_core/state_core'
|
|
21
|
+
|
|
22
|
+
import { addToSet, removeFromSet, sameSet } from './marks'
|
|
23
|
+
import { nodeContent } from './node'
|
|
24
|
+
|
|
25
|
+
type SpecRule<T> = T | ((tok: Token, state: MarkdownParseState) => T)
|
|
26
|
+
|
|
27
|
+
function readSpec<T> (rule: SpecRule<T>, tok: Token, state: MarkdownParseState): T {
|
|
28
|
+
if (typeof rule === 'function') {
|
|
29
|
+
return (rule as (tok: Token, state: MarkdownParseState) => T)(tok, state)
|
|
30
|
+
}
|
|
31
|
+
return rule
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface ParsingBlockRule {
|
|
35
|
+
block: SpecRule<MarkupNodeType>
|
|
36
|
+
getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
|
|
37
|
+
wrapContent?: boolean
|
|
38
|
+
noCloseToken?: boolean
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface ParsingNodeRule {
|
|
42
|
+
node: MarkupNodeType
|
|
43
|
+
getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
interface ParsingMarkRule {
|
|
47
|
+
mark: MarkupMarkType
|
|
48
|
+
getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
|
|
49
|
+
noCloseToken?: boolean
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface ParsingSpecialRule {
|
|
53
|
+
type: (state: MarkdownParseState, tok: Token) => { type: MarkupMarkType | MarkupNodeType, node: boolean }
|
|
54
|
+
getAttrs?: (tok: Token, state: MarkdownParseState) => Attrs
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
|
58
|
+
interface ParsingIgnoreRule {
|
|
59
|
+
// empty
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
type HandlerRecord = (state: MarkdownParseState, tok: Token) => void
|
|
63
|
+
type HandlersRecord = Record<string, HandlerRecord>
|
|
64
|
+
|
|
65
|
+
// ****************************************************************
|
|
66
|
+
// Markdown parser
|
|
67
|
+
// ****************************************************************
|
|
68
|
+
function isText (a: MarkupNode, b: MarkupNode): boolean {
|
|
69
|
+
return (a.type === MarkupNodeType.text || a.type === MarkupNodeType.reference) && b.type === MarkupNodeType.text
|
|
70
|
+
}
|
|
71
|
+
function maybeMerge (a: MarkupNode, b: MarkupNode): MarkupNode | undefined {
|
|
72
|
+
if (isText(a, b) && (sameSet(a.marks, b.marks) || (a.text === '' && (a.marks?.length ?? 0) === 0))) {
|
|
73
|
+
if (a.text === '' && (a.marks?.length ?? 0) === 0) {
|
|
74
|
+
return { ...b }
|
|
75
|
+
}
|
|
76
|
+
return { ...a, text: (a.text ?? '') + (b.text ?? '') }
|
|
77
|
+
}
|
|
78
|
+
return undefined
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
interface StateElement {
|
|
82
|
+
type: MarkupNodeType
|
|
83
|
+
content: MarkupNode[]
|
|
84
|
+
attrs: Attrs
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Object used to track the context of a running parse.
|
|
88
|
+
class MarkdownParseState {
|
|
89
|
+
stack: StateElement[]
|
|
90
|
+
marks: MarkupMark[]
|
|
91
|
+
tokenHandlers: Record<string, (state: MarkdownParseState, tok: Token) => void>
|
|
92
|
+
|
|
93
|
+
constructor (
|
|
94
|
+
tokenHandlers: Record<string, (state: MarkdownParseState, tok: Token) => void>,
|
|
95
|
+
readonly refUrl: string,
|
|
96
|
+
readonly imageUrl: string
|
|
97
|
+
) {
|
|
98
|
+
this.stack = [{ type: MarkupNodeType.doc, attrs: {}, content: [] }]
|
|
99
|
+
this.marks = []
|
|
100
|
+
this.tokenHandlers = tokenHandlers
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
top (): StateElement | undefined {
|
|
104
|
+
return this.stack[this.stack.length - 1]
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
push (elt: MarkupNode): void {
|
|
108
|
+
if (this.stack.length > 0) {
|
|
109
|
+
const tt = this.top()
|
|
110
|
+
tt?.content.push(elt)
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
mergeWithLast (nodes: MarkupNode[], node: MarkupNode): boolean {
|
|
115
|
+
const last = nodes[nodes.length - 1]
|
|
116
|
+
let merged: MarkupNode | undefined
|
|
117
|
+
if (last !== undefined && (merged = maybeMerge(last, node)) !== undefined) {
|
|
118
|
+
nodes[nodes.length - 1] = merged
|
|
119
|
+
return true
|
|
120
|
+
}
|
|
121
|
+
return false
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Adds the given text to the current position in the document,
|
|
125
|
+
// using the current marks as styling.
|
|
126
|
+
addText (text?: string): void {
|
|
127
|
+
const top = this.top()
|
|
128
|
+
if (text === undefined || top === undefined || text.length === 0) {
|
|
129
|
+
return
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const node: MarkupNode = {
|
|
133
|
+
type: MarkupNodeType.text,
|
|
134
|
+
text
|
|
135
|
+
}
|
|
136
|
+
if (this.marks !== undefined) {
|
|
137
|
+
node.marks = this.marks
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const nodes = top.content
|
|
141
|
+
|
|
142
|
+
if (!this.mergeWithLast(nodes, node)) {
|
|
143
|
+
nodes.push(node)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Adds the given mark to the set of active marks.
|
|
148
|
+
openMark (mark: MarkupMark): void {
|
|
149
|
+
this.marks = addToSet(mark, this.marks)
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Removes the given mark from the set of active marks.
|
|
153
|
+
closeMark (mark: MarkupMarkType): void {
|
|
154
|
+
this.marks = removeFromSet(mark, this.marks)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
parseTokens (toks: Token[] | null): void {
|
|
158
|
+
const _toks = [...(toks ?? [])]
|
|
159
|
+
while (_toks.length > 0) {
|
|
160
|
+
const tok = _toks.shift()
|
|
161
|
+
if (tok === undefined) {
|
|
162
|
+
break
|
|
163
|
+
}
|
|
164
|
+
// Check if we need to merge some content into
|
|
165
|
+
// Merge <sub> </sub> into one html token
|
|
166
|
+
if (tok.type === 'html_inline' && tok.content.trim() === '<sub>') {
|
|
167
|
+
while (_toks.length > 0) {
|
|
168
|
+
const _tok = _toks.shift()
|
|
169
|
+
if (_tok !== undefined) {
|
|
170
|
+
tok.content += _tok.content
|
|
171
|
+
if (_tok.type === 'html_inline' && _tok.content.trim() === '</sub>') {
|
|
172
|
+
break
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const handler = this.tokenHandlers[tok.type]
|
|
179
|
+
if (handler === undefined) {
|
|
180
|
+
throw new Error(`Token type '${String(tok.type)} not supported by Markdown parser`)
|
|
181
|
+
}
|
|
182
|
+
handler(this, tok)
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Add a node at the current position.
|
|
187
|
+
addNode (type: MarkupNodeType, attrs: Attrs, content: MarkupNode[] = []): MarkupNode {
|
|
188
|
+
const node: MarkupNode = { type, content }
|
|
189
|
+
|
|
190
|
+
if (Object.keys(attrs ?? {}).length > 0) {
|
|
191
|
+
node.attrs = attrs
|
|
192
|
+
}
|
|
193
|
+
if (this.marks.length > 0) {
|
|
194
|
+
node.marks = this.marks
|
|
195
|
+
}
|
|
196
|
+
this.push(node)
|
|
197
|
+
return node
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Wrap subsequent content in a node of the given type.
|
|
201
|
+
openNode (type: MarkupNodeType, attrs: Attrs): void {
|
|
202
|
+
this.stack.push({ type, attrs, content: [] })
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Close and return the node that is currently on top of the stack.
|
|
206
|
+
closeNode (): MarkupNode {
|
|
207
|
+
if (this.marks.length > 0) this.marks = []
|
|
208
|
+
const info = this.stack.pop()
|
|
209
|
+
if (info !== undefined) {
|
|
210
|
+
return this.addNode(info.type, info.attrs, info.content)
|
|
211
|
+
}
|
|
212
|
+
return { type: MarkupNodeType.doc }
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function attrs (
|
|
217
|
+
spec: ParsingBlockRule | ParsingMarkRule | ParsingNodeRule,
|
|
218
|
+
token: Token,
|
|
219
|
+
state: MarkdownParseState
|
|
220
|
+
): Attrs {
|
|
221
|
+
return spec.getAttrs?.(token, state) ?? {}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Code content is represented as a single token with a `content`
|
|
225
|
+
// property in Markdown-it.
|
|
226
|
+
function noCloseToken (spec: ParsingBlockRule | ParsingMarkRule, type: string): boolean {
|
|
227
|
+
return (spec.noCloseToken ?? false) || ['code_inline', 'code_block', 'fence'].indexOf(type) > 0
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
function withoutTrailingNewline (str: string): string {
|
|
231
|
+
return str[str.length - 1] === '\n' ? str.slice(0, str.length - 1) : str
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
function addSpecBlock (
|
|
235
|
+
handlers: HandlersRecord,
|
|
236
|
+
spec: ParsingBlockRule,
|
|
237
|
+
type: string,
|
|
238
|
+
specBlock: SpecRule<MarkupNodeType>
|
|
239
|
+
): void {
|
|
240
|
+
if (noCloseToken(spec, type)) {
|
|
241
|
+
handlers[type] = newSimpleBlockHandler(specBlock, spec)
|
|
242
|
+
} else {
|
|
243
|
+
handlers[type + '_open'] = (state, tok) => {
|
|
244
|
+
state.openNode(readSpec(specBlock, tok, state), attrs(spec, tok, state))
|
|
245
|
+
if (spec.wrapContent === true) {
|
|
246
|
+
state.openNode(MarkupNodeType.paragraph, {})
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
handlers[type + '_close'] = (state) => {
|
|
250
|
+
if (spec.wrapContent === true) {
|
|
251
|
+
state.closeNode()
|
|
252
|
+
}
|
|
253
|
+
state.closeNode()
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
function newSimpleBlockHandler (specBlock: SpecRule<MarkupNodeType>, spec: ParsingBlockRule): HandlerRecord {
|
|
258
|
+
return (state, tok) => {
|
|
259
|
+
state.openNode(readSpec(specBlock, tok, state), attrs(spec, tok, state))
|
|
260
|
+
state.addText(withoutTrailingNewline(tok.content))
|
|
261
|
+
state.closeNode()
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function addSpecMark (handlers: HandlersRecord, spec: ParsingMarkRule, type: string, specMark: MarkupMarkType): void {
|
|
266
|
+
if (noCloseToken(spec, type)) {
|
|
267
|
+
handlers[type] = newSimpleMarkHandler(spec, specMark)
|
|
268
|
+
} else {
|
|
269
|
+
handlers[type + '_open'] = (state, tok) => {
|
|
270
|
+
state.openMark({ type: specMark, attrs: attrs(spec, tok, state) })
|
|
271
|
+
}
|
|
272
|
+
handlers[type + '_close'] = (state) => {
|
|
273
|
+
state.closeMark(specMark)
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
function addSpecialRule (handlers: HandlersRecord, spec: ParsingSpecialRule, type: string): void {
|
|
278
|
+
handlers[type + '_open'] = (state, tok) => {
|
|
279
|
+
const type = spec.type(state, tok)
|
|
280
|
+
if (type.node) {
|
|
281
|
+
state.openNode(type.type as MarkupNodeType, spec.getAttrs?.(tok, state) ?? {})
|
|
282
|
+
} else {
|
|
283
|
+
state.openMark({ type: type.type as MarkupMarkType, attrs: spec.getAttrs?.(tok, state) ?? {} })
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
handlers[type + '_close'] = (state, tok) => {
|
|
287
|
+
const type = spec.type(state, tok)
|
|
288
|
+
if (type.node) {
|
|
289
|
+
state.closeNode()
|
|
290
|
+
} else {
|
|
291
|
+
state.closeMark(type.type as MarkupMarkType)
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
function addIgnoreRule (handlers: HandlersRecord, spec: ParsingIgnoreRule, type: string): void {
|
|
296
|
+
handlers[type + '_open'] = (state, tok) => {}
|
|
297
|
+
handlers[type + '_close'] = (state, tok) => {}
|
|
298
|
+
}
|
|
299
|
+
function newSimpleMarkHandler (spec: ParsingMarkRule, specMark: MarkupMarkType): HandlerRecord {
|
|
300
|
+
return (state: MarkdownParseState, tok: Token): void => {
|
|
301
|
+
state.openMark({ attrs: attrs(spec, tok, state), type: specMark })
|
|
302
|
+
state.addText(withoutTrailingNewline(tok.content))
|
|
303
|
+
state.closeMark(specMark)
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function tokenHandlers (
|
|
308
|
+
tokensBlock: Record<string, ParsingBlockRule>,
|
|
309
|
+
tokensNode: Record<string, ParsingNodeRule>,
|
|
310
|
+
tokensMark: Record<string, ParsingMarkRule>,
|
|
311
|
+
specialRules: Record<string, ParsingSpecialRule>,
|
|
312
|
+
ignoreRules: Record<string, ParsingIgnoreRule>,
|
|
313
|
+
htmlParser: HtmlParser
|
|
314
|
+
): HandlersRecord {
|
|
315
|
+
const handlers: HandlersRecord = {}
|
|
316
|
+
|
|
317
|
+
Object.entries(tokensBlock).forEach(([type, spec]) => {
|
|
318
|
+
addSpecBlock(handlers, spec, type, spec.block)
|
|
319
|
+
})
|
|
320
|
+
Object.entries(tokensNode).forEach(([type, spec]) => {
|
|
321
|
+
addSpecNode(handlers, type, spec)
|
|
322
|
+
})
|
|
323
|
+
Object.entries(tokensMark).forEach(([type, spec]) => {
|
|
324
|
+
addSpecMark(handlers, spec, type, spec.mark)
|
|
325
|
+
})
|
|
326
|
+
Object.entries(specialRules).forEach(([type, spec]) => {
|
|
327
|
+
addSpecialRule(handlers, spec, type)
|
|
328
|
+
})
|
|
329
|
+
Object.entries(ignoreRules).forEach(([type, spec]) => {
|
|
330
|
+
addIgnoreRule(handlers, spec, type)
|
|
331
|
+
})
|
|
332
|
+
|
|
333
|
+
handlers.html_inline = (state: MarkdownParseState, tok: Token) => {
|
|
334
|
+
try {
|
|
335
|
+
const top = state.top()
|
|
336
|
+
if (tok.content.trim() === '</a>' && top?.type === MarkupNodeType.embed) {
|
|
337
|
+
top.content = []
|
|
338
|
+
state.closeNode()
|
|
339
|
+
return
|
|
340
|
+
}
|
|
341
|
+
const markup = htmlParser(tok.content)
|
|
342
|
+
if (markup.content !== undefined) {
|
|
343
|
+
// unwrap content from wrapping paragraph
|
|
344
|
+
const shouldUnwrap =
|
|
345
|
+
markup.content.length === 1 &&
|
|
346
|
+
markup.content[0].type === MarkupNodeType.paragraph &&
|
|
347
|
+
top?.type === MarkupNodeType.paragraph
|
|
348
|
+
|
|
349
|
+
const content = nodeContent(shouldUnwrap ? markup.content[0] : markup)
|
|
350
|
+
for (const c of content) {
|
|
351
|
+
if (c.type === MarkupNodeType.embed) {
|
|
352
|
+
state.openNode(MarkupNodeType.embed, c.attrs ?? {})
|
|
353
|
+
continue
|
|
354
|
+
}
|
|
355
|
+
state.push(c)
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
} catch (err: any) {
|
|
359
|
+
console.error(err)
|
|
360
|
+
state.addText(tok.content)
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
handlers.html_block = (state: MarkdownParseState, tok: Token) => {
|
|
364
|
+
try {
|
|
365
|
+
const model = htmlParser(tok.content)
|
|
366
|
+
const content = nodeContent(model)
|
|
367
|
+
for (const c of content) {
|
|
368
|
+
state.push(c)
|
|
369
|
+
}
|
|
370
|
+
} catch (err: any) {
|
|
371
|
+
console.error(err)
|
|
372
|
+
state.addText(tok.content)
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
addTextHandlers(handlers)
|
|
377
|
+
|
|
378
|
+
return handlers
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function addTextHandlers (handlers: HandlersRecord): void {
|
|
382
|
+
handlers.text = (state, tok) => {
|
|
383
|
+
state.addText(tok.content)
|
|
384
|
+
}
|
|
385
|
+
handlers.inline = (state, tok) => {
|
|
386
|
+
state.parseTokens(tok.children)
|
|
387
|
+
}
|
|
388
|
+
handlers.softbreak = (state) => {
|
|
389
|
+
state.addText('\n')
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
function addSpecNode (handlers: HandlersRecord, type: string, spec: ParsingNodeRule): void {
|
|
394
|
+
handlers[type] = (state: MarkdownParseState, tok: Token) => state.addNode(spec.node, attrs(spec, tok, state))
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
function tokAttrGet (token: Token, name: string): string | undefined {
|
|
398
|
+
const attr = token.attrGet(name)
|
|
399
|
+
if (attr != null) {
|
|
400
|
+
return attr
|
|
401
|
+
}
|
|
402
|
+
// try iterate attrs
|
|
403
|
+
for (const [k, v] of token.attrs ?? []) {
|
|
404
|
+
if (k === name) {
|
|
405
|
+
return v
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function tokToAttrs (token: Token, ...names: string[]): Record<string, string> {
|
|
411
|
+
const result: Record<string, string> = {}
|
|
412
|
+
for (const name of names) {
|
|
413
|
+
const attr = token.attrGet(name)
|
|
414
|
+
if (attr !== null) {
|
|
415
|
+
result[name] = attr
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return result
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function todoItemMetaAttrsGet (tok: Token): Record<string, string> {
|
|
422
|
+
const userid = tokAttrGet(tok, 'userid')
|
|
423
|
+
const todoid = tokAttrGet(tok, 'todoid')
|
|
424
|
+
|
|
425
|
+
const result: Record<string, string> = {}
|
|
426
|
+
|
|
427
|
+
if (userid !== undefined) {
|
|
428
|
+
result.userid = userid
|
|
429
|
+
}
|
|
430
|
+
if (todoid !== undefined) {
|
|
431
|
+
result.todoid = todoid
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
return result
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// ::- A configuration of a Markdown parser. Such a parser uses
|
|
438
|
+
const tokensBlock: Record<string, ParsingBlockRule> = {
|
|
439
|
+
blockquote: { block: MarkupNodeType.blockquote },
|
|
440
|
+
paragraph: { block: MarkupNodeType.paragraph },
|
|
441
|
+
list_item: { block: MarkupNodeType.list_item },
|
|
442
|
+
task_item: { block: MarkupNodeType.taskItem, getAttrs: (tok) => ({ 'data-type': 'taskItem' }) },
|
|
443
|
+
bullet_list: {
|
|
444
|
+
block: MarkupNodeType.bullet_list,
|
|
445
|
+
getAttrs: (tok) => ({
|
|
446
|
+
bullet: tok.markup
|
|
447
|
+
})
|
|
448
|
+
},
|
|
449
|
+
todo_list: {
|
|
450
|
+
block: MarkupNodeType.todoList,
|
|
451
|
+
getAttrs: (tok) => ({
|
|
452
|
+
bullet: tok.markup
|
|
453
|
+
})
|
|
454
|
+
},
|
|
455
|
+
todo_item: {
|
|
456
|
+
block: MarkupNodeType.todoItem,
|
|
457
|
+
getAttrs: (tok) => ({
|
|
458
|
+
checked: tokAttrGet(tok, 'checked') === 'true',
|
|
459
|
+
...todoItemMetaAttrsGet(tok)
|
|
460
|
+
})
|
|
461
|
+
},
|
|
462
|
+
ordered_list: {
|
|
463
|
+
block: MarkupNodeType.ordered_list,
|
|
464
|
+
getAttrs: (tok: Token) => ({ order: tokAttrGet(tok, 'start') ?? '1' })
|
|
465
|
+
},
|
|
466
|
+
task_list: {
|
|
467
|
+
block: MarkupNodeType.taskList,
|
|
468
|
+
getAttrs: (tok: Token) => ({ order: tokAttrGet(tok, 'start') ?? '1', 'data-type': 'taskList' })
|
|
469
|
+
},
|
|
470
|
+
heading: {
|
|
471
|
+
block: MarkupNodeType.heading,
|
|
472
|
+
getAttrs: (tok: Token) => ({ level: Number(tok.tag.slice(1)), marker: tok.markup })
|
|
473
|
+
},
|
|
474
|
+
code_block: {
|
|
475
|
+
block: (tok) => {
|
|
476
|
+
if (tok.info === 'mermaid') {
|
|
477
|
+
return MarkupNodeType.mermaid
|
|
478
|
+
}
|
|
479
|
+
return MarkupNodeType.code_block
|
|
480
|
+
},
|
|
481
|
+
getAttrs: (tok: Token) => {
|
|
482
|
+
return { language: tok.info ?? '' }
|
|
483
|
+
},
|
|
484
|
+
noCloseToken: true
|
|
485
|
+
},
|
|
486
|
+
fence: {
|
|
487
|
+
block: (tok) => {
|
|
488
|
+
if (tok.info === 'mermaid') {
|
|
489
|
+
return MarkupNodeType.mermaid
|
|
490
|
+
}
|
|
491
|
+
return MarkupNodeType.code_block
|
|
492
|
+
},
|
|
493
|
+
getAttrs: (tok: Token) => {
|
|
494
|
+
return { language: tok.info ?? '' }
|
|
495
|
+
},
|
|
496
|
+
noCloseToken: true
|
|
497
|
+
},
|
|
498
|
+
sub: {
|
|
499
|
+
block: MarkupNodeType.subLink,
|
|
500
|
+
getAttrs: (tok: Token) => {
|
|
501
|
+
return { language: tok.info ?? '' }
|
|
502
|
+
},
|
|
503
|
+
noCloseToken: false
|
|
504
|
+
},
|
|
505
|
+
table: {
|
|
506
|
+
block: MarkupNodeType.table,
|
|
507
|
+
noCloseToken: false
|
|
508
|
+
},
|
|
509
|
+
th: {
|
|
510
|
+
block: MarkupNodeType.table_header,
|
|
511
|
+
getAttrs: (tok: Token) => {
|
|
512
|
+
return {
|
|
513
|
+
colspan: Number(tok.attrGet('colspan') ?? '1'),
|
|
514
|
+
rowspan: Number(tok.attrGet('rowspan') ?? '1')
|
|
515
|
+
}
|
|
516
|
+
},
|
|
517
|
+
wrapContent: true,
|
|
518
|
+
noCloseToken: false
|
|
519
|
+
},
|
|
520
|
+
tr: {
|
|
521
|
+
block: MarkupNodeType.table_row,
|
|
522
|
+
noCloseToken: false
|
|
523
|
+
},
|
|
524
|
+
td: {
|
|
525
|
+
block: MarkupNodeType.table_cell,
|
|
526
|
+
getAttrs: (tok: Token) => {
|
|
527
|
+
return {
|
|
528
|
+
colspan: Number(tok.attrGet('colspan') ?? '1'),
|
|
529
|
+
rowspan: Number(tok.attrGet('rowspan') ?? '1')
|
|
530
|
+
}
|
|
531
|
+
},
|
|
532
|
+
wrapContent: true,
|
|
533
|
+
noCloseToken: false
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
const tokensNode: Record<string, ParsingNodeRule> = {
|
|
537
|
+
hr: { node: MarkupNodeType.horizontal_rule },
|
|
538
|
+
image: {
|
|
539
|
+
node: MarkupNodeType.image,
|
|
540
|
+
getAttrs: (tok: Token, state) => {
|
|
541
|
+
const result = tokToAttrs(tok, 'src', 'title', 'alt', 'data')
|
|
542
|
+
result.alt = convertStringLikeToken(tok, result.alt)
|
|
543
|
+
if (result.src.startsWith(state.imageUrl)) {
|
|
544
|
+
const url = new URL(result.src)
|
|
545
|
+
result['data-type'] = 'image'
|
|
546
|
+
const file = url.searchParams.get('file')
|
|
547
|
+
if (file != null) {
|
|
548
|
+
result['file-id'] = file
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
const width = url.searchParams.get('width')
|
|
552
|
+
if (width != null) {
|
|
553
|
+
result.width = width
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
const height = url.searchParams.get('height')
|
|
557
|
+
if (height != null) {
|
|
558
|
+
result.height = height
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
return result
|
|
562
|
+
}
|
|
563
|
+
},
|
|
564
|
+
hardbreak: { node: MarkupNodeType.hard_break }
|
|
565
|
+
}
|
|
566
|
+
const tokensMark: Record<string, ParsingMarkRule> = {
|
|
567
|
+
em: {
|
|
568
|
+
mark: MarkupMarkType.em,
|
|
569
|
+
getAttrs: (tok: Token, state: MarkdownParseState) => {
|
|
570
|
+
return { marker: tok.markup }
|
|
571
|
+
}
|
|
572
|
+
},
|
|
573
|
+
bold: {
|
|
574
|
+
mark: MarkupMarkType.bold,
|
|
575
|
+
getAttrs: (tok: Token, state: MarkdownParseState) => {
|
|
576
|
+
return { marker: tok.markup }
|
|
577
|
+
}
|
|
578
|
+
},
|
|
579
|
+
strong: {
|
|
580
|
+
mark: MarkupMarkType.bold,
|
|
581
|
+
getAttrs: (tok: Token, state: MarkdownParseState) => {
|
|
582
|
+
return { marker: tok.markup }
|
|
583
|
+
}
|
|
584
|
+
},
|
|
585
|
+
s: { mark: MarkupMarkType.strike },
|
|
586
|
+
u: { mark: MarkupMarkType.underline },
|
|
587
|
+
code_inline: {
|
|
588
|
+
mark: MarkupMarkType.code,
|
|
589
|
+
noCloseToken: true
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
const specialRule: Record<string, ParsingSpecialRule> = {
|
|
594
|
+
link: {
|
|
595
|
+
type: (state, tok) => {
|
|
596
|
+
const href = tok.attrGet('href')
|
|
597
|
+
if ((href?.startsWith(state.refUrl) ?? false) || state.stack[state.stack.length - 1]?.type === 'reference') {
|
|
598
|
+
return { type: MarkupNodeType.reference, node: true }
|
|
599
|
+
}
|
|
600
|
+
return { type: MarkupMarkType.link, node: false, close: true }
|
|
601
|
+
},
|
|
602
|
+
getAttrs: (tok: Token, state) => {
|
|
603
|
+
const attrs = tokToAttrs(tok, 'href', 'title')
|
|
604
|
+
if (attrs.href !== undefined) {
|
|
605
|
+
try {
|
|
606
|
+
const url = new URL(attrs.href)
|
|
607
|
+
if (attrs.href.startsWith(state.refUrl) ?? false) {
|
|
608
|
+
return {
|
|
609
|
+
label: url.searchParams?.get('label') ?? '',
|
|
610
|
+
id: url.searchParams?.get('_id') ?? '',
|
|
611
|
+
objectclass: url.searchParams?.get('_class') ?? ''
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
} catch (err: any) {
|
|
615
|
+
// ignore
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
return attrs
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
const ignoreRule: Record<string, ParsingIgnoreRule> = {
|
|
624
|
+
thead: {},
|
|
625
|
+
tbody: {}
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
export const isInlineToken = (token?: Token): boolean => token?.type === 'inline'
|
|
629
|
+
|
|
630
|
+
export const isParagraphToken = (token?: Token): boolean => token?.type === 'paragraph_open'
|
|
631
|
+
|
|
632
|
+
export const isListItemToken = (token?: Token): boolean => token?.type === 'list_item_open'
|
|
633
|
+
|
|
634
|
+
export interface TaskListEnv {
|
|
635
|
+
tasklists: number
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
interface TaskListStateCore extends StateCore {
|
|
639
|
+
env: TaskListEnv
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// The leading whitespace in a list item (token.content) is already trimmed off by markdown-it.
|
|
643
|
+
// The regex below checks for '[ ] ' or '[x] ' or '[X] ' at the start of the string token.content,
|
|
644
|
+
// where the space is either a normal space or a non-breaking space (character 160 = \u00A0).
|
|
645
|
+
const startsWithTodoMarkdown = (token: Token): boolean => /^\[[xX \u00A0]\][ \u00A0]/.test(token.content)
|
|
646
|
+
const isCheckedTodoItem = (token: Token): boolean => /^\[[xX]\][ \u00A0]/.test(token.content)
|
|
647
|
+
|
|
648
|
+
export type HtmlParser = (html: string) => MarkupNode
|
|
649
|
+
|
|
650
|
+
export interface MarkdownParserOptions {
|
|
651
|
+
refUrl: string
|
|
652
|
+
imageUrl: string
|
|
653
|
+
htmlParser?: HtmlParser
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
export class MarkdownParser {
|
|
657
|
+
tokenizer: MarkdownIt
|
|
658
|
+
tokenHandlers: Record<string, (state: MarkdownParseState, tok: Token) => void>
|
|
659
|
+
htmlParser: HtmlParser
|
|
660
|
+
|
|
661
|
+
constructor (private readonly options: MarkdownParserOptions) {
|
|
662
|
+
this.tokenizer = MarkdownIt('default', {
|
|
663
|
+
html: true
|
|
664
|
+
})
|
|
665
|
+
this.tokenizer.core.ruler.after('inline', 'task_list', this.listRule)
|
|
666
|
+
this.tokenizer.core.ruler.after('inline', 'html_comment', this.htmlCommentRule)
|
|
667
|
+
|
|
668
|
+
this.htmlParser = options.htmlParser ?? htmlToMarkup
|
|
669
|
+
this.tokenHandlers = tokenHandlers(tokensBlock, tokensNode, tokensMark, specialRule, ignoreRule, this.htmlParser)
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
parse (text: string): MarkupNode {
|
|
673
|
+
const state = new MarkdownParseState(this.tokenHandlers, this.options.refUrl, this.options.imageUrl)
|
|
674
|
+
let doc: MarkupNode
|
|
675
|
+
|
|
676
|
+
const tokens = this.tokenizer.parse(text, {})
|
|
677
|
+
|
|
678
|
+
state.parseTokens(tokens)
|
|
679
|
+
do {
|
|
680
|
+
doc = state.closeNode()
|
|
681
|
+
} while (state.stack.length > 0)
|
|
682
|
+
return doc
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
htmlCommentRule: RuleCore = (state: StateCore): boolean => {
|
|
686
|
+
const tokens = state.tokens
|
|
687
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
688
|
+
// Prosemirror entirely ignores comments when parsing, so
|
|
689
|
+
// here we replaces html comment tag with a custom tag so the comments got parsed as a node
|
|
690
|
+
if (tokens[i].type === 'html_block' || tokens[i].type === 'html_inline') {
|
|
691
|
+
const content = tokens[i].content.replaceAll('<!--', '<comment>').replaceAll('-->', '</comment>')
|
|
692
|
+
tokens[i].content = content
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
return true
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
listRule: RuleCore = (state: TaskListStateCore): boolean => {
|
|
699
|
+
const tokens = state.tokens
|
|
700
|
+
const states: Array<{ closeIdx: number, lastItemIdx: number }> = []
|
|
701
|
+
|
|
702
|
+
// step #1 - convert list items to todo items
|
|
703
|
+
for (let open = 0; open < tokens.length; open++) {
|
|
704
|
+
if (isTodoListItem(tokens, open)) {
|
|
705
|
+
convertTodoItem(tokens, open)
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
// step #2 - convert lists to proper type
|
|
710
|
+
// listCloseIdx and itemCloseIdx tracks position of the list and item close tokens
|
|
711
|
+
// because we insert items into the list, the variables keep the position from the
|
|
712
|
+
// end of the list so we don't have to count inserts
|
|
713
|
+
let listCloseIdx = -1
|
|
714
|
+
let itemCloseIdx = -1
|
|
715
|
+
|
|
716
|
+
for (let i = tokens.length - 1; i >= 0; i--) {
|
|
717
|
+
if (tokens[i].type === 'bullet_list_close') {
|
|
718
|
+
states.push({ closeIdx: listCloseIdx, lastItemIdx: itemCloseIdx })
|
|
719
|
+
listCloseIdx = tokens.length - i
|
|
720
|
+
itemCloseIdx = -1
|
|
721
|
+
} else if (tokens[i].type === 'list_item_close' || tokens[i].type === 'todo_item_close') {
|
|
722
|
+
// when found item close token of different type, split the list
|
|
723
|
+
if (itemCloseIdx === -1) {
|
|
724
|
+
itemCloseIdx = tokens.length - i
|
|
725
|
+
} else if (tokens[i].type !== tokens[tokens.length - itemCloseIdx].type) {
|
|
726
|
+
const bulletListOpen = new state.Token('bullet_list_open', 'ul', 1)
|
|
727
|
+
bulletListOpen.markup = tokens[i + 1].markup
|
|
728
|
+
tokens.splice(i + 1, 0, bulletListOpen)
|
|
729
|
+
tokens.splice(i + 1, 0, new state.Token('bullet_list_close', 'ul', -1))
|
|
730
|
+
convertTodoList(tokens, i + 2, tokens.length - listCloseIdx, tokens.length - itemCloseIdx)
|
|
731
|
+
listCloseIdx = tokens.length - i - 1
|
|
732
|
+
itemCloseIdx = tokens.length - i
|
|
733
|
+
}
|
|
734
|
+
} else if (tokens[i].type === 'bullet_list_open') {
|
|
735
|
+
if (itemCloseIdx !== -1) {
|
|
736
|
+
convertTodoList(tokens, i, tokens.length - listCloseIdx, tokens.length - itemCloseIdx)
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
const prevState = states.pop() ?? { closeIdx: -1, lastItemIdx: -1 }
|
|
740
|
+
listCloseIdx = prevState.closeIdx
|
|
741
|
+
itemCloseIdx = prevState.lastItemIdx
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
return true
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
function convertTodoList (tokens: Token[], open: number, close: number, item: number): void {
|
|
750
|
+
if (tokens[open].type !== 'bullet_list_open') {
|
|
751
|
+
throw new Error('bullet_list_open token expected')
|
|
752
|
+
}
|
|
753
|
+
if (tokens[close].type !== 'bullet_list_close') {
|
|
754
|
+
throw new Error('bullet_list_close token expected')
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
if (tokens[item].type === 'todo_item_close') {
|
|
758
|
+
tokens[open].type = 'todo_list_open'
|
|
759
|
+
tokens[close].type = 'todo_list_close'
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
function convertTodoItem (tokens: Token[], open: number): boolean {
|
|
764
|
+
const close = findListItemCloseToken(tokens, open)
|
|
765
|
+
if (close !== -1) {
|
|
766
|
+
tokens[open].type = 'todo_item_open'
|
|
767
|
+
tokens[close].type = 'todo_item_close'
|
|
768
|
+
|
|
769
|
+
const inline = tokens[open + 2]
|
|
770
|
+
|
|
771
|
+
if (tokens[open].attrs == null) {
|
|
772
|
+
tokens[open].attrs = []
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
;(tokens[open].attrs as any).push(['checked', isCheckedTodoItem(inline) ? 'true' : 'false'])
|
|
776
|
+
|
|
777
|
+
if (inline.children !== null) {
|
|
778
|
+
const newContent = inline.children[0].content.slice(4)
|
|
779
|
+
if (newContent.length > 0) {
|
|
780
|
+
inline.children[0].content = newContent
|
|
781
|
+
} else {
|
|
782
|
+
inline.children = inline.children.slice(1)
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
const metaTok = inline.children.find(
|
|
786
|
+
(tok) => tok.type === 'html_inline' && tok.content.startsWith('<!--') && tok.content.endsWith('-->')
|
|
787
|
+
)
|
|
788
|
+
if (metaTok !== undefined) {
|
|
789
|
+
const metaValues = metaTok.content.slice(5, -4).split(',')
|
|
790
|
+
for (const mv of metaValues) {
|
|
791
|
+
if (mv.startsWith('todoid')) {
|
|
792
|
+
;(tokens[open].attrs as any).push(['todoid', mv.slice(7)])
|
|
793
|
+
}
|
|
794
|
+
if (mv.startsWith('userid')) {
|
|
795
|
+
;(tokens[open].attrs as any).push(['userid', mv.slice(7)])
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return true
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
return false
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
function findListItemCloseToken (tokens: Token[], open: number): number {
|
|
808
|
+
if (tokens[open].type !== 'list_item_open') {
|
|
809
|
+
throw new Error('list_item_open token expected')
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
const level = tokens[open].level
|
|
813
|
+
for (let close = open + 1; close < tokens.length; close++) {
|
|
814
|
+
if (tokens[close].type === 'list_item_close' && tokens[close].level === level) {
|
|
815
|
+
return close
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
return -1
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// todo token structure
|
|
823
|
+
// tokens[i].type === list_item_open
|
|
824
|
+
// tokens[i + 1].type === paragraph
|
|
825
|
+
// tokens[i + 2].type === inline
|
|
826
|
+
function isTodoListItem (tokens: Token[], pos: number): boolean {
|
|
827
|
+
return (
|
|
828
|
+
isListItemToken(tokens[pos]) &&
|
|
829
|
+
isParagraphToken(tokens[pos + 1]) &&
|
|
830
|
+
isInlineToken(tokens[pos + 2]) &&
|
|
831
|
+
startsWithTodoMarkdown(tokens[pos + 2])
|
|
832
|
+
)
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
function convertStringLikeToken (tok: Token, attrValue?: string): string {
|
|
836
|
+
if (typeof attrValue === 'string' && attrValue !== '') {
|
|
837
|
+
return attrValue
|
|
838
|
+
}
|
|
839
|
+
const children = tok.children ?? []
|
|
840
|
+
let out = ''
|
|
841
|
+
for (const child of children) {
|
|
842
|
+
switch (child.type) {
|
|
843
|
+
case 'text':
|
|
844
|
+
out += child.content
|
|
845
|
+
break
|
|
846
|
+
case 'hardbreak':
|
|
847
|
+
out += '\n'
|
|
848
|
+
break
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
return out
|
|
853
|
+
}
|