@farvardin/lezer-parser-markdown 1.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@farvardin/lezer-parser-markdown",
3
+ "version": "1.6.3",
4
+ "description": "Incremental Markdown parser that consumes and emits Lezer trees (txt2tags version)",
5
+ "main": "dist/index.cjs",
6
+ "type": "module",
7
+ "exports": {
8
+ "import": "./dist/index.js",
9
+ "require": "./dist/index.cjs"
10
+ },
11
+ "module": "dist/index.js",
12
+ "types": "dist/index.d.ts",
13
+ "author": "Éric Forgeot",
14
+ "license": "MIT",
15
+ "devDependencies": {
16
+ "ist": "^1.1.1",
17
+ "mocha": "^10.2.0",
18
+ "@lezer/html": "^1.0.0",
19
+ "getdocs-ts": "^0.1.0",
20
+ "builddocs": "^1.0.0",
21
+ "@marijn/buildtool": "^0.1.6"
22
+ },
23
+ "dependencies": {
24
+ "@lezer/common": "^1.5.0",
25
+ "@lezer/highlight": "^1.0.0"
26
+ },
27
+ "repository": {
28
+ "type" : "git",
29
+ "url" : "https://github.com/farvardin/lezer-parser-markdown.git"
30
+ },
31
+ "scripts": {
32
+ "watch": "node build.js --watch",
33
+ "prepare": "node build.js",
34
+ "test": "mocha",
35
+ "build-readme": "node bin/build-readme.cjs"
36
+ }
37
+ }
package/publish.sh ADDED
@@ -0,0 +1 @@
1
+ npm publish --access public
package/src/README.md ADDED
@@ -0,0 +1,83 @@
1
+ <!-- /README.md is generated from /src/README.md -->
2
+
3
+ # @lezer/markdown
4
+
5
+ This is an incremental Markdown ([CommonMark](https://commonmark.org/)
6
+ with support for extension) parser that integrates well with the
7
+ [Lezer](https://lezer.codemirror.net/) parser system. It does not in
8
+ fact use the Lezer runtime (that runs LR parsers, and Markdown can't
9
+ really be parsed that way), but it produces Lezer-style compact syntax
10
+ trees and consumes fragments of such trees for its incremental
11
+ parsing.
12
+
13
+ Note that this only _parses_ the document, producing a data structure
14
+ that represents its syntactic form, and doesn't help with outputting
15
+ HTML. Also, in order to be single-pass and incremental, it doesn't do
16
+ some things that a conforming CommonMark parser is expected to
17
+ do—specifically, it doesn't validate link references, so it'll parse
18
+ `[a][b]` and similar as a link, even if no `[b]` reference is
19
+ declared.
20
+
21
+ The
22
+ [@codemirror/lang-markdown](https://github.com/codemirror/lang-markdown)
23
+ package integrates this parser with CodeMirror to provide Markdown
24
+ editor support.
25
+
26
+ The code is licensed under an MIT license.
27
+
28
+ ## Interface
29
+
30
+ @parser
31
+
32
+ @MarkdownParser
33
+
34
+ @MarkdownConfig
35
+
36
+ @MarkdownExtension
37
+
38
+ @parseCode
39
+
40
+ ### GitHub Flavored Markdown
41
+
42
+ @GFM
43
+
44
+ @Table
45
+
46
+ @TaskList
47
+
48
+ @Strikethrough
49
+
50
+ @Autolink
51
+
52
+ ### Other extensions
53
+
54
+ @Subscript
55
+
56
+ @Superscript
57
+
58
+ @Emoji
59
+
60
+ ### Extension
61
+
62
+ The parser can, to a certain extent, be extended to handle additional
63
+ syntax.
64
+
65
+ @NodeSpec
66
+
67
+ @BlockContext
68
+
69
+ @BlockParser
70
+
71
+ @LeafBlockParser
72
+
73
+ @Line
74
+
75
+ @LeafBlock
76
+
77
+ @InlineContext
78
+
79
+ @InlineParser
80
+
81
+ @DelimiterType
82
+
83
+ @Element
@@ -0,0 +1,301 @@
1
+ import {InlineContext, BlockContext, MarkdownConfig,
2
+ LeafBlockParser, LeafBlock, Line, Element, space, Punctuation} from "./markdown"
3
+ import {tags as t} from "@lezer/highlight"
4
+
5
+ const StrikethroughDelim = {resolve: "Strikethrough", mark: "StrikethroughMark"}
6
+
7
+ /// An extension that implements
8
+ /// [GFM-style](https://github.github.com/gfm/#strikethrough-extension-)
9
+ /// Strikethrough syntax using `~~` delimiters.
10
+ export const Strikethrough: MarkdownConfig = {
11
+ defineNodes: [{
12
+ name: "Strikethrough",
13
+ style: {"Strikethrough/...": t.strikethrough}
14
+ }, {
15
+ name: "StrikethroughMark",
16
+ style: t.processingInstruction
17
+ }],
18
+ parseInline: [{
19
+ name: "Strikethrough",
20
+ parse(cx, next, pos) {
21
+ if (next != 126 /* '~' */ || cx.char(pos + 1) != 126 || cx.char(pos + 2) == 126) return -1
22
+ let before = cx.slice(pos - 1, pos), after = cx.slice(pos + 2, pos + 3)
23
+ let sBefore = /\s|^$/.test(before), sAfter = /\s|^$/.test(after)
24
+ let pBefore = Punctuation.test(before), pAfter = Punctuation.test(after)
25
+ return cx.addDelimiter(StrikethroughDelim, pos, pos + 2,
26
+ !sAfter && (!pAfter || sBefore || pBefore),
27
+ !sBefore && (!pBefore || sAfter || pAfter))
28
+ },
29
+ after: "Emphasis"
30
+ }]
31
+ }
32
+
33
+ // Parse a line as a table row and return the row count. When `elts`
34
+ // is given, push syntax elements for the content onto it.
35
+ function parseRow(cx: BlockContext, line: string, startI = 0, elts?: Element[], offset = 0) {
36
+ let count = 0, first = true, cellStart = -1, cellEnd = -1, esc = false
37
+ let parseCell = () => {
38
+ elts!.push(cx.elt("TableCell", offset + cellStart, offset + cellEnd,
39
+ cx.parser.parseInline(line.slice(cellStart, cellEnd), offset + cellStart)))
40
+ }
41
+
42
+ for (let i = startI; i < line.length; i++) {
43
+ let next = line.charCodeAt(i)
44
+ if (next == 124 /* '|' */ && !esc) {
45
+ if (!first || cellStart > -1) count++
46
+ first = false
47
+ if (elts) {
48
+ if (cellStart > -1) parseCell()
49
+ elts.push(cx.elt("TableDelimiter", i + offset, i + offset + 1))
50
+ }
51
+ cellStart = cellEnd = -1
52
+ } else if (esc || next != 32 && next != 9) {
53
+ if (cellStart < 0) cellStart = i
54
+ cellEnd = i + 1
55
+ }
56
+ esc = !esc && next == 92
57
+ }
58
+ if (cellStart > -1) {
59
+ count++
60
+ if (elts) parseCell()
61
+ }
62
+ return count
63
+ }
64
+
65
+ function hasPipe(str: string, start: number) {
66
+ for (let i = start; i < str.length; i++) {
67
+ let next = str.charCodeAt(i)
68
+ if (next == 124 /* '|' */) return true
69
+ if (next == 92 /* '\\' */) i++
70
+ }
71
+ return false
72
+ }
73
+
74
+ const delimiterLine = /^\|?(\s*:?-+:?\s*\|)+(\s*:?-+:?\s*)?$/
75
+
76
+ class TableParser implements LeafBlockParser {
77
+ // Null means we haven't seen the second line yet, false means this
78
+ // isn't a table, and an array means this is a table and we've
79
+ // parsed the given rows so far.
80
+ rows: false | null | Element[] = null
81
+
82
+ nextLine(cx: BlockContext, line: Line, leaf: LeafBlock) {
83
+ if (this.rows == null) { // Second line
84
+ this.rows = false
85
+ let lineText
86
+ if ((line.next == 45 || line.next == 58 || line.next == 124 /* '-:|' */) &&
87
+ delimiterLine.test(lineText = line.text.slice(line.pos))) {
88
+ let firstRow: Element[] = [], firstCount = parseRow(cx, leaf.content, 0, firstRow, leaf.start)
89
+ if (firstCount == parseRow(cx, lineText, 0))
90
+ this.rows = [cx.elt("TableHeader", leaf.start, leaf.start + leaf.content.length, firstRow),
91
+ cx.elt("TableDelimiter", cx.lineStart + line.pos, cx.lineStart + line.text.length)]
92
+ }
93
+ } else if (this.rows) { // Line after the second
94
+ let content: Element[] = []
95
+ parseRow(cx, line.text, line.pos, content, cx.lineStart)
96
+ this.rows.push(cx.elt("TableRow", cx.lineStart + line.pos, cx.lineStart + line.text.length, content))
97
+ }
98
+ return false
99
+ }
100
+
101
+ finish(cx: BlockContext, leaf: LeafBlock) {
102
+ if (!this.rows) return false
103
+ cx.addLeafElement(leaf, cx.elt("Table", leaf.start, leaf.start + leaf.content.length, this.rows as readonly Element[]))
104
+ return true
105
+ }
106
+ }
107
+
108
+ /// This extension provides
109
+ /// [GFM-style](https://github.github.com/gfm/#tables-extension-)
110
+ /// tables, using syntax like this:
111
+ ///
112
+ /// ```
113
+ /// | head 1 | head 2 |
114
+ /// | --- | --- |
115
+ /// | cell 1 | cell 2 |
116
+ /// ```
117
+ export const Table: MarkdownConfig = {
118
+ defineNodes: [
119
+ {name: "Table", block: true},
120
+ {name: "TableHeader", style: {"TableHeader/...": t.heading}},
121
+ "TableRow",
122
+ {name: "TableCell", style: t.content},
123
+ {name: "TableDelimiter", style: t.processingInstruction},
124
+ ],
125
+ parseBlock: [{
126
+ name: "Table",
127
+ leaf(_, leaf) { return hasPipe(leaf.content, 0) ? new TableParser : null },
128
+ endLeaf(cx, line, leaf) {
129
+ if (leaf.parsers.some(p => p instanceof TableParser) || !hasPipe(line.text, line.basePos)) return false
130
+ let next = cx.peekLine()
131
+ return delimiterLine.test(next) && parseRow(cx, line.text, line.basePos) == parseRow(cx, next, line.basePos)
132
+ },
133
+ before: "SetextHeading"
134
+ }]
135
+ }
136
+
137
+ class TaskParser implements LeafBlockParser {
138
+ nextLine() { return false }
139
+
140
+ finish(cx: BlockContext, leaf: LeafBlock) {
141
+ cx.addLeafElement(leaf, cx.elt("Task", leaf.start, leaf.start + leaf.content.length, [
142
+ cx.elt("TaskMarker", leaf.start, leaf.start + 3),
143
+ ...cx.parser.parseInline(leaf.content.slice(3), leaf.start + 3)
144
+ ]))
145
+ return true
146
+ }
147
+ }
148
+
149
+ /// Extension providing
150
+ /// [GFM-style](https://github.github.com/gfm/#task-list-items-extension-)
151
+ /// task list items, where list items can be prefixed with `[ ]` or
152
+ /// `[x]` to add a checkbox.
153
+ export const TaskList: MarkdownConfig = {
154
+ defineNodes: [
155
+ {name: "Task", block: true, style: t.list},
156
+ {name: "TaskMarker", style: t.atom}
157
+ ],
158
+ parseBlock: [{
159
+ name: "TaskList",
160
+ leaf(cx, leaf) {
161
+ return /^\[[ xX]\][ \t]/.test(leaf.content) && cx.parentType().name == "ListItem" ? new TaskParser : null
162
+ },
163
+ after: "SetextHeading"
164
+ }]
165
+ }
166
+
167
+ const autolinkRE = /(www\.)|(https?:\/\/)|([\w.+-]{1,100}@)|(mailto:|xmpp:)/gy
168
+ const urlRE = /[\w-]+(\.[\w-]+)+(\/[^\s<]*)?/gy
169
+ const lastTwoDomainWords = /[\w-]+\.[\w-]+($|\/)/
170
+ const emailRE = /[\w.+-]+@[\w-]+(\.[\w.-]+)+/gy
171
+ const xmppResourceRE = /\/[a-zA-Z\d@.]+/gy
172
+
173
+ function count(str: string, from: number, to: number, ch: string) {
174
+ let result = 0
175
+ for (let i = from; i < to; i++) if (str[i] == ch) result++
176
+ return result
177
+ }
178
+
179
+ function autolinkURLEnd(text: string, from: number) {
180
+ urlRE.lastIndex = from
181
+ let m = urlRE.exec(text)
182
+ if (!m || lastTwoDomainWords.exec(m[0])![0].indexOf("_") > -1) return -1
183
+ let end = from + m[0].length
184
+ for (;;) {
185
+ let last = text[end - 1], m
186
+ if (/[?!.,:*_~]/.test(last) ||
187
+ last == ")" && count(text, from, end, ")") > count(text, from, end, "("))
188
+ end--
189
+ else if (last == ";" && (m = /&(?:#\d+|#x[a-f\d]+|\w+);$/.exec(text.slice(from, end))))
190
+ end = from + m.index
191
+ else
192
+ break
193
+ }
194
+ return end
195
+ }
196
+
197
+ function autolinkEmailEnd(text: string, from: number) {
198
+ emailRE.lastIndex = from
199
+ let m = emailRE.exec(text)
200
+ if (!m) return -1
201
+ let last = m[0][m[0].length - 1]
202
+ return last == "_" || last == "-" ? -1 : from + m[0].length - (last == "." ? 1 : 0)
203
+ }
204
+
205
+ /// Extension that implements autolinking for
206
+ /// `www.`/`http://`/`https://`/`mailto:`/`xmpp:` URLs and email
207
+ /// addresses.
208
+ export const Autolink: MarkdownConfig = {
209
+ parseInline: [{
210
+ name: "Autolink",
211
+ parse(cx, next, absPos) {
212
+ let pos = absPos - cx.offset
213
+ if (pos && /\w/.test(cx.text[pos - 1])) return -1
214
+ autolinkRE.lastIndex = pos
215
+ let m = autolinkRE.exec(cx.text), end = -1
216
+ if (!m) return -1
217
+ if (m[1] || m[2]) { // www., http://
218
+ end = autolinkURLEnd(cx.text, pos + m[0].length)
219
+ if (end > -1 && cx.hasOpenLink) {
220
+ let noBracket = /([^\[\]]|\[[^\]]*\])*/.exec(cx.text.slice(pos, end))
221
+ end = pos + noBracket![0].length
222
+ }
223
+ } else if (m[3]) { // email address
224
+ end = autolinkEmailEnd(cx.text, pos)
225
+ } else { // mailto:/xmpp:
226
+ end = autolinkEmailEnd(cx.text, pos + m[0].length)
227
+ if (end > -1 && m[0] == "xmpp:") {
228
+ xmppResourceRE.lastIndex = end
229
+ m = xmppResourceRE.exec(cx.text)
230
+ if (m) end = m.index + m[0].length
231
+ }
232
+ }
233
+ if (end < 0) return -1
234
+ cx.addElement(cx.elt("URL", absPos, end + cx.offset))
235
+ return end + cx.offset
236
+ }
237
+ }]
238
+ }
239
+
240
+ /// Extension bundle containing [`Table`](#Table),
241
+ /// [`TaskList`](#TaskList), [`Strikethrough`](#Strikethrough), and
242
+ /// [`Autolink`](#Autolink).
243
+ export const GFM = [Table, TaskList, Strikethrough, Autolink]
244
+
245
+ function parseSubSuper(ch: number, node: string, mark: string) {
246
+ return (cx: InlineContext, next: number, pos: number) => {
247
+ if (next != ch || cx.char(pos + 1) == ch) return -1
248
+ let elts = [cx.elt(mark, pos, pos + 1)]
249
+ for (let i = pos + 1; i < cx.end; i++) {
250
+ let next = cx.char(i)
251
+ if (next == ch)
252
+ return cx.addElement(cx.elt(node, pos, i + 1, elts.concat(cx.elt(mark, i, i + 1))))
253
+ if (next == 92 /* '\\' */)
254
+ elts.push(cx.elt("Escape", i, i++ + 2))
255
+ if (space(next)) break
256
+ }
257
+ return -1
258
+ }
259
+ }
260
+
261
+ /// Extension providing
262
+ /// [Pandoc-style](https://pandoc.org/MANUAL.html#superscripts-and-subscripts)
263
+ /// superscript using `^` markers.
264
+ export const Superscript: MarkdownConfig = {
265
+ defineNodes: [
266
+ {name: "Superscript", style: t.special(t.content)},
267
+ {name: "SuperscriptMark", style: t.processingInstruction}
268
+ ],
269
+ parseInline: [{
270
+ name: "Superscript",
271
+ parse: parseSubSuper(94 /* '^' */, "Superscript", "SuperscriptMark")
272
+ }]
273
+ }
274
+
275
+ /// Extension providing
276
+ /// [Pandoc-style](https://pandoc.org/MANUAL.html#superscripts-and-subscripts)
277
+ /// subscript using `~` markers.
278
+ export const Subscript: MarkdownConfig = {
279
+ defineNodes: [
280
+ {name: "Subscript", style: t.special(t.content)},
281
+ {name: "SubscriptMark", style: t.processingInstruction}
282
+ ],
283
+ parseInline: [{
284
+ name: "Subscript",
285
+ parse: parseSubSuper(126 /* '~' */, "Subscript", "SubscriptMark")
286
+ }]
287
+ }
288
+
289
+ /// Extension that parses two colons with only letters, underscores,
290
+ /// and numbers between them as `Emoji` nodes.
291
+ export const Emoji: MarkdownConfig = {
292
+ defineNodes: [{name: "Emoji", style: t.character}],
293
+ parseInline: [{
294
+ name: "Emoji",
295
+ parse(cx, next, pos) {
296
+ let match: RegExpMatchArray | null
297
+ if (next != 58 /* ':' */ || !(match = /^[a-zA-Z_0-9]+:/.exec(cx.slice(pos + 1, cx.end)))) return -1
298
+ return cx.addElement(cx.elt("Emoji", pos, pos + 1 + match[0].length))
299
+ }
300
+ }]
301
+ }
package/src/index.ts ADDED
@@ -0,0 +1,5 @@
1
+ export {parser, MarkdownParser, MarkdownConfig, MarkdownExtension,
2
+ NodeSpec, InlineParser, BlockParser, LeafBlockParser,
3
+ Line, Element, LeafBlock, DelimiterType, BlockContext, InlineContext} from "./markdown"
4
+ export {parseCode} from "./nest"
5
+ export {Table, TaskList, Strikethrough, Autolink, GFM, Subscript, Superscript, Emoji} from "./extension"