markdown-parser 0.0.7 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/{MIT-LICENSE.txt → LICENSE} +4 -3
- package/README.md +73 -153
- package/dist/index.d.ts +112 -0
- package/dist/index.js +2699 -0
- package/package.json +36 -34
- package/.npmignore +0 -21
- package/.travis.yml +0 -4
- package/Gruntfile.js +0 -33
- package/src/markdown-grammar.pegjs +0 -295
- package/src/markdown-parser.js +0 -36
- package/src/parser-browser.js +0 -3462
- package/src/parser.js +0 -3462
- package/test/main.js +0 -76
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nimesh Nayaju
|
|
3
4
|
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -17,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
17
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
18
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
19
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
20
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,175 +1,95 @@
|
|
|
1
|
-
#
|
|
1
|
+
# `markdown-parser`
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
A markdown parser with streaming support, suitable for incrementally parsing LLM markdown streams. Parses markdown into a structured fully typed tree of nodes, following the [CommonMark](https://commonmark.org/) specification. It supports streaming/incremental parsing, so you can feed it growing input and emit only the blocks that have become finalized.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Installation
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
```bash
|
|
8
|
+
npm install markdown-parser
|
|
9
|
+
```
|
|
8
10
|
|
|
9
|
-
##
|
|
11
|
+
## Usage
|
|
10
12
|
|
|
11
|
-
|
|
13
|
+
```typescript
|
|
14
|
+
import { MarkdownParser } from "markdown-parser";
|
|
15
|
+
|
|
16
|
+
const parser = new MarkdownParser();
|
|
17
|
+
|
|
18
|
+
// Parse complete markdown
|
|
19
|
+
const nodes = parser.parse("# Hello World\nThis is a paragraph.");
|
|
20
|
+
// [
|
|
21
|
+
// { type: "heading", level: 1, children: [{ type: "text", text: "Hello World" }] },
|
|
22
|
+
// { type: "paragraph", children: [{ type: "text", text: "This is a paragraph." }] }
|
|
23
|
+
// ]
|
|
24
|
+
|
|
25
|
+
// Parse with streaming mode (for incremental content)
|
|
26
|
+
const partialNodes = parser.parse("# Hello World\nThis", { stream: true });
|
|
27
|
+
// Emits heading, but not the paragraph (still open)
|
|
28
|
+
// [
|
|
29
|
+
// { type: "heading", level: 1, children: [{ type: "text", text: "Hello World" }] },
|
|
30
|
+
// ]
|
|
31
|
+
|
|
32
|
+
// Continue parsing as more content arrives
|
|
33
|
+
const moreNodes = parser.parse(" is a paragraph\n\nThis is another paragraph.", { stream: true });
|
|
34
|
+
// Emits the paragraph
|
|
35
|
+
// [
|
|
36
|
+
// { type: "paragraph", children: [{ type: "text", text: "This is a paragraph." }] }
|
|
37
|
+
// ]
|
|
38
|
+
|
|
39
|
+
const finalNodes = parse.parse("", { stream: false })
|
|
40
|
+
// Closes anything still open and emits remaining blocks
|
|
41
|
+
// [
|
|
42
|
+
// { type: "paragraph", children: [{ type: "text", text: "This is another paragraph." }] }
|
|
43
|
+
// ]
|
|
44
|
+
```
|
|
12
45
|
|
|
13
|
-
|
|
46
|
+
When stream is false (default), the parser finalizes all open blocks at the end of the input and returns the full set of blocks (for that input). When you parse in streaming mode, the parser keeps internal state across calls and returns only blocks that have become closed and stable since the last call.
|
|
14
47
|
|
|
15
|
-
|
|
16
|
-
- bold
|
|
17
|
-
- code
|
|
18
|
-
- heading
|
|
19
|
-
- italic
|
|
20
|
-
- reference (link, images)
|
|
21
|
-
- lists
|
|
22
|
-
- listsOrdered
|
|
23
|
-
- sections
|
|
24
|
-
- strikethroughs
|
|
25
|
-
- tasks
|
|
48
|
+
## API
|
|
26
49
|
|
|
27
|
-
|
|
50
|
+
### `MarkdownParser`
|
|
28
51
|
|
|
29
|
-
|
|
30
|
-
npm install markdown-parser
|
|
31
|
-
~~~
|
|
52
|
+
The main parser class that converts markdown text into a structured block AST (headings, paragraphs, lists, etc.).
|
|
32
53
|
|
|
33
|
-
|
|
54
|
+
#### `parse(text: string, options?: { stream: boolean }): BlockNode[]`
|
|
34
55
|
|
|
35
|
-
|
|
36
|
-
var Parser = require('markdown-parser');
|
|
37
|
-
|
|
38
|
-
var parser = new Parser();
|
|
39
|
-
|
|
40
|
-
// options repository url
|
|
41
|
-
// var parser = new Parser({html_url: "https://github.com/darul75/markdown-parser"});
|
|
42
|
-
|
|
43
|
-
// fetch by api or what else
|
|
44
|
-
var markdownContent = ".....";
|
|
45
|
-
|
|
46
|
-
parser.parse(markdownContent, function(err, result) {
|
|
47
|
-
// result example
|
|
48
|
-
/*
|
|
49
|
-
{
|
|
50
|
-
"bolds": [
|
|
51
|
-
"No JQUERY dependency needed anymore"
|
|
52
|
-
],
|
|
53
|
-
"codes": [
|
|
54
|
-
{
|
|
55
|
-
"type": "html",
|
|
56
|
-
"code": "<script type=\"text/javascript\" src=\"angular.min.js\"></script>"
|
|
57
|
-
},
|
|
58
|
-
{
|
|
59
|
-
"type": "html",
|
|
60
|
-
"code": "<link rel=\"stylesheet\" type=\"text/css\" href=\"ng-slider.min.css\">"
|
|
61
|
-
}
|
|
62
|
-
],
|
|
63
|
-
"headings": [
|
|
64
|
-
" Options",
|
|
65
|
-
" Build",
|
|
66
|
-
" Issue",
|
|
67
|
-
" Metrics",
|
|
68
|
-
" License"
|
|
69
|
-
],
|
|
70
|
-
"italics": [],
|
|
71
|
-
"references": [
|
|
72
|
-
{
|
|
73
|
-
"title": "![NPM version",
|
|
74
|
-
"href": "https://badge.fury.io/js/ng-slider.png",
|
|
75
|
-
"image": true
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
"title": "angular slider demo",
|
|
79
|
-
"href": "http://darul75.github.io/ng-slider/images/slider1.png \"angular slider demo screenshot\"",
|
|
80
|
-
"image": true
|
|
81
|
-
}
|
|
82
|
-
],
|
|
83
|
-
"lists": [
|
|
84
|
-
[
|
|
85
|
-
"`from`: start value",
|
|
86
|
-
"`to`: end value",
|
|
87
|
-
"`step`: step value",
|
|
88
|
-
"`dimension`: string, example \" $\"",
|
|
89
|
-
"`scale`: array for scale",
|
|
90
|
-
"`round`: how many numbers allowed after comma",
|
|
91
|
-
"`smooth`: true/false; false snaps the button to value",
|
|
92
|
-
"`vertical`: true/false; vertical slider, default false",
|
|
93
|
-
"`css`: hash object"
|
|
94
|
-
],
|
|
95
|
-
[
|
|
96
|
-
"`callback` : function triggering current value, can be useful"
|
|
97
|
-
],
|
|
98
|
-
[
|
|
99
|
-
"2.1.3: bug fixes, refactoring, inline options param",
|
|
100
|
-
"2.1.2: bug fixes, changes in z-index via CSS and not js",
|
|
101
|
-
"2.1.1: override css, colors...",
|
|
102
|
-
"2.1.0: bug fixes",
|
|
103
|
-
"2.0.0: no JQuery"
|
|
104
|
-
]
|
|
105
|
-
],
|
|
106
|
-
"listsOrdered": [],
|
|
107
|
-
"sections": [
|
|
108
|
-
"Why",
|
|
109
|
-
"Screenshot",
|
|
110
|
-
"Demo",
|
|
111
|
-
"How to use it",
|
|
112
|
-
"Installation",
|
|
113
|
-
"RELEASE"
|
|
114
|
-
],
|
|
115
|
-
"strikethroughs": [],
|
|
116
|
-
"tasks": []
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
*/
|
|
120
|
-
|
|
121
|
-
});
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
## Return
|
|
125
|
-
```json
|
|
126
|
-
{
|
|
127
|
-
"bolds": [],
|
|
128
|
-
"codes": [],
|
|
129
|
-
"headings": [],
|
|
130
|
-
"italics": [],
|
|
131
|
-
"references": [],
|
|
132
|
-
"lists": [],
|
|
133
|
-
"listsOrdered": [],
|
|
134
|
-
"sections": [],
|
|
135
|
-
"strikethroughs": [],
|
|
136
|
-
"tasks": []
|
|
137
|
-
}
|
|
138
|
-
```
|
|
56
|
+
Parses markdown text and returns an array of block nodes.
|
|
139
57
|
|
|
140
|
-
|
|
58
|
+
- `text` - The markdown text to parse
|
|
59
|
+
- `options.stream` - When `true`, enables streaming mode which buffers incomplete blocks until they can be fully parsed. Defaults to `false`.
|
|
141
60
|
|
|
142
|
-
|
|
61
|
+
## Supported Nodes
|
|
143
62
|
|
|
144
|
-
|
|
145
|
-
- [ ] Finish tests
|
|
146
|
-
- [ ] Browserify
|
|
147
|
-
- [ ] Missing markdown
|
|
63
|
+
The parser provides 100% support for the CommonMark specification, and includes full support for GitHub Flavored Markdown (GFM) tables.
|
|
148
64
|
|
|
149
|
-
|
|
65
|
+
### Block nodes
|
|
150
66
|
|
|
151
|
-
[
|
|
67
|
+
- [x] Heading (ATX and setext style)
|
|
68
|
+
- [x] Paragraph
|
|
69
|
+
- [x] Code block (fenced and indented)
|
|
70
|
+
- [x] Thematic break (horizontal rule)
|
|
71
|
+
- [x] HTML block
|
|
72
|
+
- [x] Blockquote
|
|
73
|
+
- [x] List (ordered and unordered)
|
|
74
|
+
- [x] Link reference definitions
|
|
75
|
+
- [x] Table (GFM)
|
|
152
76
|
|
|
153
|
-
|
|
77
|
+
### Inline nodes
|
|
154
78
|
|
|
155
|
-
|
|
79
|
+
- [x] Text
|
|
80
|
+
- [x] Code span
|
|
81
|
+
- [x] Hard break
|
|
82
|
+
- [x] Soft break
|
|
83
|
+
- [x] HTML (inline)
|
|
84
|
+
- [x] Autolink
|
|
85
|
+
- [x] Link
|
|
86
|
+
- [x] Image
|
|
87
|
+
- [x] Emphasis
|
|
88
|
+
- [x] Strong
|
|
156
89
|
|
|
157
|
-
|
|
90
|
+
## Some notes on the implementation
|
|
158
91
|
|
|
159
|
-
|
|
160
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
161
|
-
in the Software without restriction, including without limitation the rights
|
|
162
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
163
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
164
|
-
furnished to do so, subject to the following conditions:
|
|
92
|
+
The implementation is inspired by various other markdown parsers, including [commonmark.js](https://github.com/commonmark/commonmark.js), [markdown-it](https://github.com/markdown-it/markdown-it), and [marked.js](https://github.com/markedjs/marked). In fact, the implementation is structurally very similar to how commonmark.js goes about parsing; the only major difference is how we decide which lines to parse when streaming is set to true. I started with a much simpler and a lot more readable implementation for the parser, but it became complex when adding block container (blockquote and lists) support, so I ended up going for a slightly complex solution but a more robust and extensible one.
|
|
165
93
|
|
|
166
|
-
|
|
167
|
-
all copies or substantial portions of the Software.
|
|
94
|
+
Commonmark specification allows link reference definitions to appear after the links that use them. Therefore, when streaming is enabled, it is important to consider that a link reference might not resolve, since its definition could arrive in a later chunk of the input.
|
|
168
95
|
|
|
169
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
170
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
171
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
172
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
173
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
174
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
175
|
-
THE SOFTWARE.
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
interface CodeSpanNode {
|
|
2
|
+
type: "code-span";
|
|
3
|
+
text: string;
|
|
4
|
+
}
|
|
5
|
+
interface TextNode {
|
|
6
|
+
type: "text";
|
|
7
|
+
text: string;
|
|
8
|
+
}
|
|
9
|
+
interface HardBreakNode {
|
|
10
|
+
type: "hardbreak";
|
|
11
|
+
}
|
|
12
|
+
interface SoftBreakNode {
|
|
13
|
+
type: "softbreak";
|
|
14
|
+
}
|
|
15
|
+
interface StrongNode {
|
|
16
|
+
type: "strong";
|
|
17
|
+
children: Array<InlineNode>;
|
|
18
|
+
}
|
|
19
|
+
interface EmphasisNode {
|
|
20
|
+
type: "emphasis";
|
|
21
|
+
children: Array<InlineNode>;
|
|
22
|
+
}
|
|
23
|
+
interface LinkNode {
|
|
24
|
+
type: "link";
|
|
25
|
+
href: string;
|
|
26
|
+
title?: string;
|
|
27
|
+
children: Array<InlineNode>;
|
|
28
|
+
}
|
|
29
|
+
interface ImageNode {
|
|
30
|
+
type: "image";
|
|
31
|
+
href: string;
|
|
32
|
+
title?: string;
|
|
33
|
+
children: Array<InlineNode>;
|
|
34
|
+
}
|
|
35
|
+
interface HtmlTagNode {
|
|
36
|
+
type: "html";
|
|
37
|
+
content: string;
|
|
38
|
+
}
|
|
39
|
+
type InlineNode = TextNode | CodeSpanNode | HardBreakNode | SoftBreakNode | StrongNode | EmphasisNode | LinkNode | ImageNode | HtmlTagNode;
|
|
40
|
+
|
|
41
|
+
declare class MarkdownParser {
|
|
42
|
+
private splitter;
|
|
43
|
+
private root;
|
|
44
|
+
private nextLineIndex;
|
|
45
|
+
private nextNodeIndex;
|
|
46
|
+
private referenceDefinitions;
|
|
47
|
+
parse(input: string, options?: {
|
|
48
|
+
stream?: boolean;
|
|
49
|
+
}): BlockNode[];
|
|
50
|
+
private parseLine;
|
|
51
|
+
private parseReferenceLinkDefinitions;
|
|
52
|
+
private convertInternalBlockToPublicBlock;
|
|
53
|
+
}
|
|
54
|
+
interface TableNode {
|
|
55
|
+
type: "table";
|
|
56
|
+
head: {
|
|
57
|
+
cells: Array<{
|
|
58
|
+
align: "left" | "right" | "center" | undefined;
|
|
59
|
+
children: Array<InlineNode>;
|
|
60
|
+
}>;
|
|
61
|
+
};
|
|
62
|
+
body: {
|
|
63
|
+
rows: Array<{
|
|
64
|
+
cells: Array<{
|
|
65
|
+
align: "left" | "right" | "center" | undefined;
|
|
66
|
+
children: Array<InlineNode>;
|
|
67
|
+
}>;
|
|
68
|
+
}>;
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
interface ThematicBreakNode {
|
|
72
|
+
type: "thematic-break";
|
|
73
|
+
}
|
|
74
|
+
interface CodeBlockNode {
|
|
75
|
+
type: "code-block";
|
|
76
|
+
info?: string;
|
|
77
|
+
content: string;
|
|
78
|
+
}
|
|
79
|
+
interface BlockquoteNode {
|
|
80
|
+
type: "blockquote";
|
|
81
|
+
children: Array<BlockNode>;
|
|
82
|
+
}
|
|
83
|
+
type ListNode = {
|
|
84
|
+
type: "list";
|
|
85
|
+
tight: boolean;
|
|
86
|
+
items: Array<{
|
|
87
|
+
children: Array<BlockNode>;
|
|
88
|
+
}>;
|
|
89
|
+
} & ({
|
|
90
|
+
kind: "ordered";
|
|
91
|
+
start: number;
|
|
92
|
+
} | {
|
|
93
|
+
kind: "unordered";
|
|
94
|
+
marker: string;
|
|
95
|
+
});
|
|
96
|
+
interface HtmlBlockNode {
|
|
97
|
+
type: "html-block";
|
|
98
|
+
content: string;
|
|
99
|
+
}
|
|
100
|
+
interface HeadingNode {
|
|
101
|
+
type: "heading";
|
|
102
|
+
level: 1 | 2 | 3 | 4 | 5 | 6;
|
|
103
|
+
children: Array<InlineNode>;
|
|
104
|
+
}
|
|
105
|
+
interface ParagraphNode {
|
|
106
|
+
type: "paragraph";
|
|
107
|
+
children: Array<InlineNode>;
|
|
108
|
+
}
|
|
109
|
+
type BlockNode = TableNode | ThematicBreakNode | CodeBlockNode | HeadingNode | ParagraphNode | BlockquoteNode | ListNode | HtmlBlockNode;
|
|
110
|
+
|
|
111
|
+
export { MarkdownParser };
|
|
112
|
+
export type { BlockNode, InlineNode };
|