@minamorl/markdown-next 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +212 -0
- package/lib/src/parser.d.ts +38 -0
- package/lib/src/parser.js +411 -0
- package/package.json +41 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 minamorl
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# @minamorl/markdown-next
|
|
2
|
+
|
|
3
|
+
A markdown parser that outputs either HTML strings or an AST you can render however you want. Built with [Parsimmon](https://github.com/jneen/parsimmon).
|
|
4
|
+
|
|
5
|
+
What makes it different:
|
|
6
|
+
- **AST output** for React/Vue/custom renderers (not just HTML strings)
|
|
7
|
+
- **Aozora bunko ruby** syntax: `|漢字《かんじ》`
|
|
8
|
+
- **HTML passthrough**: write `<cite>` or `<ruby>` directly in your markdown
|
|
9
|
+
- **Plugin system**: define custom `@[plugin:args]` blocks
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
npm install @minamorl/markdown-next
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Basic Usage
|
|
18
|
+
|
|
19
|
+
The simplest way to get HTML:
|
|
20
|
+
|
|
21
|
+
```typescript
|
|
22
|
+
import { parse } from '@minamorl/markdown-next'
|
|
23
|
+
|
|
24
|
+
parse('# Hello **world**')
|
|
25
|
+
// '<h1>Hello <strong>world</strong></h1>'
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Getting an AST Instead
|
|
29
|
+
|
|
30
|
+
If you're building a React app or need fine-grained control over rendering, use the AST output:
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
import { Parser, asAST } from '@minamorl/markdown-next'
|
|
34
|
+
|
|
35
|
+
const parser = new Parser({ export: asAST })
|
|
36
|
+
const ast = parser.parse('# Hello **world**')
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
The AST uses a simple tuple format: `[tagName, attributes, children]`
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
// Input: '# Hello **world**'
|
|
43
|
+
// Output:
|
|
44
|
+
[
|
|
45
|
+
['h1', null, ['Hello ', ['strong', null, 'world']]]
|
|
46
|
+
]
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Rendering AST in React
|
|
50
|
+
|
|
51
|
+
Here's a minimal renderer:
|
|
52
|
+
|
|
53
|
+
```tsx
|
|
54
|
+
import { Parser, asAST, ASTNode } from '@minamorl/markdown-next'
|
|
55
|
+
|
|
56
|
+
function renderNode(node: ASTNode, key: number = 0): React.ReactNode {
|
|
57
|
+
if (typeof node === 'string') return node
|
|
58
|
+
if (!Array.isArray(node)) return null
|
|
59
|
+
|
|
60
|
+
const [tag, attrs, children] = node
|
|
61
|
+
|
|
62
|
+
// Self-closing tags
|
|
63
|
+
if (tag === 'img' || tag === 'br' || tag === 'hr') {
|
|
64
|
+
return React.createElement(tag, { ...attrs, key })
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Recursively render children
|
|
68
|
+
const kids = Array.isArray(children)
|
|
69
|
+
? children.map((child, i) => renderNode(child, i))
|
|
70
|
+
: renderNode(children, 0)
|
|
71
|
+
|
|
72
|
+
return React.createElement(tag, { ...attrs, key }, kids)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Usage
|
|
76
|
+
const parser = new Parser({ export: asAST })
|
|
77
|
+
|
|
78
|
+
function Markdown({ content }: { content: string }) {
|
|
79
|
+
const ast = parser.parse(content)
|
|
80
|
+
return <>{ast.map((node, i) => renderNode(node, i))}</>
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Japanese Ruby Annotations
|
|
85
|
+
|
|
86
|
+
Supports [Aozora bunko](https://www.aozora.gr.jp/) ruby syntax, commonly used in Japanese publishing:
|
|
87
|
+
|
|
88
|
+
```typescript
|
|
89
|
+
parse('|漢字《かんじ》')
|
|
90
|
+
// '<p><ruby>漢字<rt>かんじ</rt></ruby></p>'
|
|
91
|
+
|
|
92
|
+
parse('This is |日本語《にほんご》 text')
|
|
93
|
+
// '<p>This is <ruby>日本語<rt>にほんご</rt></ruby> text</p>'
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
The `|` (full-width pipe) marks the start of the base text, and `《》` contains the ruby reading.
|
|
97
|
+
|
|
98
|
+
## HTML Passthrough
|
|
99
|
+
|
|
100
|
+
Sometimes you need HTML that markdown doesn't support. Just write it:
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
parse('<cite>The Art of Computer Programming</cite>')
|
|
104
|
+
// '<p><cite>The Art of Computer Programming</cite></p>'
|
|
105
|
+
|
|
106
|
+
parse('<ruby>東京<rt>とうきょう</rt></ruby>')
|
|
107
|
+
// '<p><ruby>東京<rt>とうきょう</rt></ruby></p>'
|
|
108
|
+
|
|
109
|
+
parse('<details><summary>Click me</summary>Hidden content</details>')
|
|
110
|
+
// '<p><details><summary>Click me</summary>Hidden content</details></p>'
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Plugins
|
|
114
|
+
|
|
115
|
+
Define custom syntax with plugins. Useful for embeds, custom components, or domain-specific markup.
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
const parser = new Parser({
|
|
119
|
+
export: asHTML,
|
|
120
|
+
plugins: {
|
|
121
|
+
youtube: (args) => {
|
|
122
|
+
return `<iframe src="https://youtube.com/embed/${args}" frameborder="0"></iframe>`
|
|
123
|
+
},
|
|
124
|
+
note: (args, content, mapper) => {
|
|
125
|
+
return mapper('div', { class: `note note-${args || 'info'}` })(content)
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
// Inline plugin
|
|
131
|
+
parser.parse('Check this video: @[youtube:dQw4w9WgXcQ]')
|
|
132
|
+
|
|
133
|
+
// Block plugin with content
|
|
134
|
+
parser.parse(`
|
|
135
|
+
@[note:warning]
|
|
136
|
+
This is important
|
|
137
|
+
`)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Plugin function signature:
|
|
141
|
+
```typescript
|
|
142
|
+
type Plugin = (
|
|
143
|
+
args: string, // Everything after the colon
|
|
144
|
+
content: Content, // Parsed content (for block plugins)
|
|
145
|
+
mapper: ElementMapper, // Helper to create elements
|
|
146
|
+
join: JoinFunction // Helper to join content
|
|
147
|
+
) => string | ASTNode
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Supported Markdown
|
|
151
|
+
|
|
152
|
+
| Syntax | Example |
|
|
153
|
+
|--------|---------|
|
|
154
|
+
| Headers | `# H1` through `###### H6` |
|
|
155
|
+
| Alt headers | `Header\n===` or `Header\n---` |
|
|
156
|
+
| Bold | `**text**` or `__text__` |
|
|
157
|
+
| Italic | `*text*` or `_text_` |
|
|
158
|
+
| Code | `` `inline` `` or fenced blocks |
|
|
159
|
+
| Links | `[text](url)` |
|
|
160
|
+
| Images | `` |
|
|
161
|
+
| Lists | `- item` or `1. item` |
|
|
162
|
+
| Nested lists | 2-space indent |
|
|
163
|
+
| Blockquotes | `> text` |
|
|
164
|
+
| Tables | `| a | b |` with `|---|---|` separator |
|
|
165
|
+
| Ruby | `|base《reading》` |
|
|
166
|
+
| HTML | Any valid HTML tag |
|
|
167
|
+
|
|
168
|
+
## API Reference
|
|
169
|
+
|
|
170
|
+
### `parse(markdown: string): string`
|
|
171
|
+
|
|
172
|
+
Quick function for HTML output.
|
|
173
|
+
|
|
174
|
+
### `Parser<T>`
|
|
175
|
+
|
|
176
|
+
```typescript
|
|
177
|
+
const parser = new Parser({
|
|
178
|
+
export: asHTML, // or asAST
|
|
179
|
+
plugins: { ... } // optional
|
|
180
|
+
})
|
|
181
|
+
|
|
182
|
+
parser.parse(markdown) // returns T (string for asHTML, ASTNode[] for asAST)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### `asHTML`
|
|
186
|
+
|
|
187
|
+
Export type that produces HTML strings.
|
|
188
|
+
|
|
189
|
+
### `asAST`
|
|
190
|
+
|
|
191
|
+
Export type that produces an array of AST nodes.
|
|
192
|
+
|
|
193
|
+
### `ASTNode`
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
type ASTNode = string | [string, Record<string, string> | null, ASTNode | ASTNode[]]
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Why Another Markdown Parser?
|
|
200
|
+
|
|
201
|
+
I needed:
|
|
202
|
+
1. Ruby annotation support for Japanese text
|
|
203
|
+
2. AST output for React (not just HTML strings)
|
|
204
|
+
3. Extensibility without forking
|
|
205
|
+
|
|
206
|
+
Most parsers give you HTML. That's fine until you need to render in React, handle custom components, or process the structure programmatically. The AST output solves this.
|
|
207
|
+
|
|
208
|
+
The ruby and HTML passthrough features came from working with Japanese content where standard markdown falls short.
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
MIT
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import P = require("parsimmon");
|
|
2
|
+
export interface ListTree {
|
|
3
|
+
type: "ul" | "ol" | "shadow";
|
|
4
|
+
children: Array<ListTree>;
|
|
5
|
+
value: string | null;
|
|
6
|
+
parent: ListTree | null;
|
|
7
|
+
}
|
|
8
|
+
export type Plugin<T> = (args: string, content: any, mapper: Mapper<T>, join: Function) => string;
|
|
9
|
+
export type Mapper<T> = (tagName: string, attributes?: any) => (children: string | T | null) => T;
|
|
10
|
+
export interface ExportType<T> {
|
|
11
|
+
mapper: Mapper<T>;
|
|
12
|
+
join: Function;
|
|
13
|
+
postprocess: (x: any) => any;
|
|
14
|
+
}
|
|
15
|
+
export declare class Parser<T> {
|
|
16
|
+
opts: {
|
|
17
|
+
export: ExportType<T>;
|
|
18
|
+
plugins?: {
|
|
19
|
+
[pluginName: string]: Plugin<T>;
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
liLevelBefore: number | null;
|
|
23
|
+
liLevel: number | null;
|
|
24
|
+
rootTree: ListTree;
|
|
25
|
+
currentTree: ListTree;
|
|
26
|
+
acceptables: P.Parser<T>;
|
|
27
|
+
constructor(opts: {
|
|
28
|
+
export: ExportType<T>;
|
|
29
|
+
plugins?: {
|
|
30
|
+
[pluginName: string]: Plugin<T>;
|
|
31
|
+
};
|
|
32
|
+
});
|
|
33
|
+
create(): void;
|
|
34
|
+
parse(s: string): any;
|
|
35
|
+
}
|
|
36
|
+
export declare const asHTML: ExportType<string>;
|
|
37
|
+
export declare const asAST: ExportType<any>;
|
|
38
|
+
export declare const parse: (s: string) => any;
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.parse = exports.asAST = exports.asHTML = exports.Parser = void 0;
|
|
4
|
+
const P = require("parsimmon");
|
|
5
|
+
class Parser {
|
|
6
|
+
constructor(opts) {
|
|
7
|
+
this.opts = opts;
|
|
8
|
+
this.liLevelBefore = null;
|
|
9
|
+
this.liLevel = null;
|
|
10
|
+
this.rootTree = {
|
|
11
|
+
value: null,
|
|
12
|
+
children: [],
|
|
13
|
+
type: "shadow",
|
|
14
|
+
parent: null
|
|
15
|
+
};
|
|
16
|
+
this.currentTree = {
|
|
17
|
+
value: null,
|
|
18
|
+
children: [],
|
|
19
|
+
type: "shadow",
|
|
20
|
+
parent: null
|
|
21
|
+
};
|
|
22
|
+
this.create();
|
|
23
|
+
}
|
|
24
|
+
create() {
|
|
25
|
+
function flags(re) {
|
|
26
|
+
var s = '' + re;
|
|
27
|
+
return s.slice(s.lastIndexOf('/') + 1);
|
|
28
|
+
}
|
|
29
|
+
function ignore(re, group = 0) {
|
|
30
|
+
const { makeSuccess, makeFailure } = P;
|
|
31
|
+
const anchored = RegExp('^(?:' + re.source + ')', flags(re));
|
|
32
|
+
const expected = '' + re;
|
|
33
|
+
return P(function (input, i) {
|
|
34
|
+
var match = anchored.exec(input.slice(i));
|
|
35
|
+
if (match) {
|
|
36
|
+
var fullMatch = match[0];
|
|
37
|
+
var groupMatch = match[group];
|
|
38
|
+
if (groupMatch != null) {
|
|
39
|
+
return makeFailure(i + fullMatch.length, groupMatch);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return makeSuccess(i, expected);
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
const whitespace = P.regexp(/\s+/m);
|
|
46
|
+
const asterisk = P.string("*");
|
|
47
|
+
const sharp = P.string("#");
|
|
48
|
+
const plainStr = P.regexp(/[^`_\*\r\n]+/);
|
|
49
|
+
const codePlainStr = P.regexp(/[^`\r\n]+/);
|
|
50
|
+
const linebreak = P.string("\r\n").or(P.string("\n")).or(P.string("\r"));
|
|
51
|
+
const equal = P.string("=");
|
|
52
|
+
const minus = P.string("-");
|
|
53
|
+
const join = this.opts.export.join;
|
|
54
|
+
const mapper = this.opts.export.mapper;
|
|
55
|
+
const token = (p) => {
|
|
56
|
+
return p.skip(P.regexp(/\s*/m));
|
|
57
|
+
};
|
|
58
|
+
const h1Special = P.regexp(/^(.*)\n\=+/, 1)
|
|
59
|
+
.skip(P.alt(P.eof, P.string("\n")))
|
|
60
|
+
.map(mapper("h1"));
|
|
61
|
+
const h2Special = P.regexp(/^(.*)\n\-+/, 1)
|
|
62
|
+
.skip(P.alt(P.eof, P.string("\n")))
|
|
63
|
+
.map(mapper("h2"));
|
|
64
|
+
const h1 = token(P.seq(sharp, whitespace).then(plainStr)).map(mapper("h1"));
|
|
65
|
+
const h2 = token(P.seq(sharp.times(2), whitespace).then(plainStr)).map(mapper("h2"));
|
|
66
|
+
const h3 = token(P.seq(sharp.times(3), whitespace).then(plainStr)).map(mapper("h3"));
|
|
67
|
+
const h4 = token(P.seq(sharp.times(4), whitespace).then(plainStr)).map(mapper("h4"));
|
|
68
|
+
const h5 = token(P.seq(sharp.times(5), whitespace).then(plainStr)).map(mapper("h5"));
|
|
69
|
+
const h6 = token(P.seq(sharp.times(6), whitespace).then(plainStr)).map(mapper("h6"));
|
|
70
|
+
const strongStart = P.string("**").or(P.string("__"));
|
|
71
|
+
const strongEnd = strongStart;
|
|
72
|
+
const strong = strongStart
|
|
73
|
+
.then(plainStr)
|
|
74
|
+
.map(mapper("strong"))
|
|
75
|
+
.skip(strongEnd);
|
|
76
|
+
const emStart = P.string("*").or(P.string("_"));
|
|
77
|
+
const emEnd = emStart;
|
|
78
|
+
const em = emStart
|
|
79
|
+
.then(plainStr)
|
|
80
|
+
.map(mapper("em"))
|
|
81
|
+
.skip(emEnd);
|
|
82
|
+
const anchor = P.seqMap(P.string("["), P.regexp(/[^\]\r\n]+/), P.string("]("), P.regexp(/[^\)\r\n]+/), P.string(")"), (_1, label, _2, href, _3) => {
|
|
83
|
+
return mapper("a", { href })(label);
|
|
84
|
+
});
|
|
85
|
+
const img = P.seqMap(P.string("!["), P.regexp(/[^\]\r\n]+/), P.string("]("), P.regexp(/[^\)\r\n]+/), P.string(")"), (_1, alt, _2, src, _3) => {
|
|
86
|
+
return mapper("img", { src, alt })(null);
|
|
87
|
+
});
|
|
88
|
+
const codeStart = P.string("`");
|
|
89
|
+
const codeEnd = P.string("`");
|
|
90
|
+
const code = codeStart
|
|
91
|
+
.then(codePlainStr)
|
|
92
|
+
.map(mapper("code"))
|
|
93
|
+
.skip(codeEnd);
|
|
94
|
+
const pluginInline = P.seqMap(P.string("@["), P.regexp(/[a-zA-Z]+/), P.regexp(/:{0,1}([^\]]*)/, 1), P.string("]"), (_1, pluginName, args, _2) => {
|
|
95
|
+
return this.opts.plugins && this.opts.plugins[pluginName] ?
|
|
96
|
+
this.opts.plugins[pluginName](args, null, mapper, join) : join([_1, pluginName, args, _2]);
|
|
97
|
+
});
|
|
98
|
+
// Aozora bunko ruby format: |text《ruby》
|
|
99
|
+
const aozoraRuby = P.seqMap(P.string("|"), P.regexp(/[^《]+/), P.string("《"), P.regexp(/[^》]+/), P.string("》"), (_pipe, base, _open, ruby, _close) => {
|
|
100
|
+
return mapper("ruby")(join([base, mapper("rt")(ruby)]));
|
|
101
|
+
});
|
|
102
|
+
// HTML element parser - converts <tag>content</tag> to AST format
|
|
103
|
+
const htmlSelfClosing = P.regexp(/<(br|hr)\s*\/?>/).map((match) => {
|
|
104
|
+
var _a;
|
|
105
|
+
const tag = ((_a = match.match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'br';
|
|
106
|
+
return mapper(tag)(null);
|
|
107
|
+
});
|
|
108
|
+
const htmlElement = P.lazy(() => {
|
|
109
|
+
// Match opening tag with optional attributes
|
|
110
|
+
const openTag = P.regexp(/<([a-zA-Z][a-zA-Z0-9]*)((?:\s+[^>]*)?)>/, 1);
|
|
111
|
+
// Content can include nested HTML elements or text
|
|
112
|
+
const content = P.alt(htmlElement, htmlSelfClosing, P.regexp(/[^<]+/)).many();
|
|
113
|
+
return P.seqMap(openTag, P.index, content, P.regexp(/<\/([a-zA-Z][a-zA-Z0-9]*)>/, 1), (tag, _idx, children, closeTag) => {
|
|
114
|
+
if (tag.toLowerCase() !== closeTag.toLowerCase()) {
|
|
115
|
+
// Mismatched tags - return as text
|
|
116
|
+
return join(['<' + tag + '>', ...children, '</' + closeTag + '>']);
|
|
117
|
+
}
|
|
118
|
+
return mapper(tag)(join(children));
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
const inline = P.alt(pluginInline, aozoraRuby, anchor, img, em, strong, code, htmlElement, htmlSelfClosing, P.regexp(/[^\r\n<=-\[\]\*\`\@|]+/), P.regexp(/./));
|
|
122
|
+
// Table cell content - supports inline elements
|
|
123
|
+
const tableCellInline = P.alt(anchor, img, em, strong, code, P.regexp(/[^\r\n\[\]\*|`]+/));
|
|
124
|
+
// Parse a single table row: |cell|cell|cell| with flexible spacing
|
|
125
|
+
const parseTableRow = (input) => {
|
|
126
|
+
// Remove leading/trailing pipes and split by pipe
|
|
127
|
+
const trimmed = input.trim();
|
|
128
|
+
if (!trimmed.startsWith('|') || !trimmed.endsWith('|')) {
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
// Remove first and last pipe, split by remaining pipes
|
|
132
|
+
const inner = trimmed.slice(1, -1);
|
|
133
|
+
return inner.split('|').map(cell => cell.trim());
|
|
134
|
+
};
|
|
135
|
+
// Table row regex - matches |...|
|
|
136
|
+
const tableRowLine = P.regexp(/^\|[^\r\n]+\|/, 0);
|
|
137
|
+
// Separator row pattern - matches |---|---|---| with optional alignment markers
|
|
138
|
+
const separatorRowPattern = /^\|\s*:?-+:?\s*(\|\s*:?-+:?\s*)*\|$/;
|
|
139
|
+
// Check if a row is a separator row
|
|
140
|
+
const isSeparatorRow = (row) => {
|
|
141
|
+
return separatorRowPattern.test(row.trim());
|
|
142
|
+
};
|
|
143
|
+
// Parse cell content with inline elements
|
|
144
|
+
const parseCellContent = (content) => {
|
|
145
|
+
if (!content || content.trim() === '') {
|
|
146
|
+
return '';
|
|
147
|
+
}
|
|
148
|
+
const cellParser = tableCellInline.atLeast(1).map(join);
|
|
149
|
+
const result = cellParser.parse(content.trim());
|
|
150
|
+
if (result.status) {
|
|
151
|
+
return result.value;
|
|
152
|
+
}
|
|
153
|
+
return content.trim();
|
|
154
|
+
};
|
|
155
|
+
// Table parser
|
|
156
|
+
const table = P.seqMap(tableRowLine.skip(linebreak), tableRowLine.skip(linebreak), tableRowLine.skip(linebreak.atMost(1)).atLeast(0), (headerLine, sepLine, bodyLines) => {
|
|
157
|
+
// Validate separator row
|
|
158
|
+
if (!isSeparatorRow(sepLine)) {
|
|
159
|
+
return P.makeFailure(0, 'Not a valid table separator');
|
|
160
|
+
}
|
|
161
|
+
const headerCells = parseTableRow(headerLine);
|
|
162
|
+
const bodyCells = bodyLines.map(parseTableRow);
|
|
163
|
+
if (headerCells.length === 0) {
|
|
164
|
+
return P.makeFailure(0, 'No header cells');
|
|
165
|
+
}
|
|
166
|
+
return mapper("table")(join([
|
|
167
|
+
mapper("tr")(join(headerCells.map(h => mapper("th")(parseCellContent(h))))),
|
|
168
|
+
join(bodyCells.map(row => mapper("tr")(join(row.map(cell => mapper("td")(parseCellContent(cell)))))))
|
|
169
|
+
]));
|
|
170
|
+
});
|
|
171
|
+
const inlines = inline.atLeast(1).map(join);
|
|
172
|
+
const paragraphBegin = inlines;
|
|
173
|
+
const paragraphEnd = ignore(/```\n.*\n```/);
|
|
174
|
+
const paragraphLine = P.lazy(() => P.alt(P.seq(paragraphBegin, linebreak.skip(paragraphEnd).result(mapper("br")(null)), paragraphLine).map(join), inlines));
|
|
175
|
+
const paragraph = paragraphLine
|
|
176
|
+
.map(mapper("p"));
|
|
177
|
+
const listIndent = P.string(" ");
|
|
178
|
+
const liSingleLine = P.regexp(/[^`\r\n]*/);
|
|
179
|
+
const ulStart = P.string("- ").or(P.string("* "));
|
|
180
|
+
const olStart = P.regexp(/[0-9]+\. /);
|
|
181
|
+
let liLevel = [1];
|
|
182
|
+
let counter = 0;
|
|
183
|
+
const initializeList = () => {
|
|
184
|
+
this.rootTree = this.currentTree = {
|
|
185
|
+
value: null,
|
|
186
|
+
children: [],
|
|
187
|
+
type: "shadow",
|
|
188
|
+
parent: null
|
|
189
|
+
};
|
|
190
|
+
liLevel = [1];
|
|
191
|
+
counter = 0;
|
|
192
|
+
};
|
|
193
|
+
const listLineContent = () => {
|
|
194
|
+
return P.seqMap(listIndent.many(), P.index, ulStart.or(olStart), liSingleLine, (_1, index, start, str) => {
|
|
195
|
+
let nodeType;
|
|
196
|
+
// detect which types of content
|
|
197
|
+
liLevel.push(index.column);
|
|
198
|
+
nodeType = ((start == "* ") || (start == "- ")) ? "ul" : "ol";
|
|
199
|
+
counter += 1;
|
|
200
|
+
return { counter, nodeType, str, liLevel, index };
|
|
201
|
+
})
|
|
202
|
+
.skip(linebreak.atMost(1))
|
|
203
|
+
.chain(v => {
|
|
204
|
+
if (v.liLevel.filter(x => x % 2 !== 1).length > 0) {
|
|
205
|
+
initializeList();
|
|
206
|
+
return P.fail("Invalid indentation");
|
|
207
|
+
}
|
|
208
|
+
return P.succeed(v);
|
|
209
|
+
})
|
|
210
|
+
.map(v => {
|
|
211
|
+
const liLevelBefore = liLevel[v.counter - 1];
|
|
212
|
+
const liLevelCurrent = liLevel[v.counter];
|
|
213
|
+
if (liLevelBefore === liLevelCurrent) {
|
|
214
|
+
this.currentTree.children.push({
|
|
215
|
+
value: v.str,
|
|
216
|
+
children: [],
|
|
217
|
+
type: v.nodeType,
|
|
218
|
+
parent: this.currentTree
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
else if (liLevelBefore < liLevelCurrent) {
|
|
222
|
+
const currentTreeIndex = this.currentTree.children.length - 1;
|
|
223
|
+
this.currentTree = this.currentTree.children[currentTreeIndex];
|
|
224
|
+
this.currentTree.children.push({
|
|
225
|
+
children: [],
|
|
226
|
+
type: v.nodeType,
|
|
227
|
+
parent: this.currentTree,
|
|
228
|
+
value: v.str
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
else if (liLevelBefore > liLevelCurrent) {
|
|
232
|
+
const unindetationStep = (liLevelBefore - liLevelCurrent) / 2;
|
|
233
|
+
for (let i = 0; i < unindetationStep; i++) {
|
|
234
|
+
if (this.currentTree.parent !== null) {
|
|
235
|
+
this.currentTree = this.currentTree.parent;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
this.currentTree.children.push({
|
|
239
|
+
type: v.nodeType,
|
|
240
|
+
children: [],
|
|
241
|
+
parent: this.currentTree,
|
|
242
|
+
value: v.str
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
const _nodeType = v.nodeType;
|
|
246
|
+
return _nodeType;
|
|
247
|
+
});
|
|
248
|
+
};
|
|
249
|
+
const lists = P.lazy(() => {
|
|
250
|
+
return listLineContent().atLeast(1).map(nodeTypes => {
|
|
251
|
+
this.rootTree.type = nodeTypes[0];
|
|
252
|
+
const result = treeToHtml(this.rootTree);
|
|
253
|
+
// initialization
|
|
254
|
+
initializeList();
|
|
255
|
+
return result;
|
|
256
|
+
});
|
|
257
|
+
});
|
|
258
|
+
const treeToHtml = (treeOrNode) => {
|
|
259
|
+
if (treeOrNode.type === "shadow") {
|
|
260
|
+
return join(treeOrNode.children.map(treeToHtml));
|
|
261
|
+
}
|
|
262
|
+
else if (treeOrNode.children.length === 0 && treeOrNode.value !== null) {
|
|
263
|
+
return mapper("li")(treeOrNode.value);
|
|
264
|
+
}
|
|
265
|
+
else if (treeOrNode.children.length !== 0 && treeOrNode.value !== null) {
|
|
266
|
+
const { children } = treeOrNode;
|
|
267
|
+
return mapper("li")(join([treeOrNode.value, mapper(treeOrNode.children[0].type)(join(children.map(treeToHtml)))]));
|
|
268
|
+
}
|
|
269
|
+
else {
|
|
270
|
+
const { children } = treeOrNode;
|
|
271
|
+
return mapper(treeOrNode.type)(join(children.map(treeToHtml)));
|
|
272
|
+
}
|
|
273
|
+
};
|
|
274
|
+
const codeBlockBegin = P.regexp(/^```/);
|
|
275
|
+
const codeBlockEnd = P.regexp(/^```/);
|
|
276
|
+
const codeBlockDefinitionStr = P.regexp(/[^`\r\n]*/);
|
|
277
|
+
const codeBlockStr = P.regexp(/[^\r\n]+/);
|
|
278
|
+
const codeBlock = P.seqMap(codeBlockBegin, codeBlockDefinitionStr, linebreak, linebreak.or(codeBlockStr.lookahead(linebreak)).many(), codeBlockEnd, (_1, definition, _2, code, _3) => {
|
|
279
|
+
if (code.length > 0) {
|
|
280
|
+
code.pop();
|
|
281
|
+
}
|
|
282
|
+
if (definition === "")
|
|
283
|
+
return mapper("pre")(mapper("code")(join(code)));
|
|
284
|
+
return mapper("pre", { "data-language": definition })(mapper("code")(join(code)));
|
|
285
|
+
});
|
|
286
|
+
const blockquoteBegin = P.string("> ");
|
|
287
|
+
// Parse blockquote content using inlines to support HTML tags and ruby
|
|
288
|
+
const blockquoteInline = P.alt(pluginInline, aozoraRuby, anchor, img, em, strong, code, htmlElement, htmlSelfClosing, P.regexp(/[^\r\n<|\[\]\*\`\@]+/), P.regexp(/./));
|
|
289
|
+
const blockquoteContent = blockquoteInline.atLeast(1).map(join);
|
|
290
|
+
const blockquoteLine = P.lazy(() => {
|
|
291
|
+
let blockquoteLevel = 0;
|
|
292
|
+
return P.seqMap(blockquoteBegin.then(blockquoteBegin.many().map(x => blockquoteLevel = x.length)), blockquoteContent, linebreak.atMost(1), (_1, text, _2) => {
|
|
293
|
+
return { text, blockquoteLevel };
|
|
294
|
+
});
|
|
295
|
+
});
|
|
296
|
+
const createBlockquoteTree = (x) => {
|
|
297
|
+
let depth = 0;
|
|
298
|
+
let root = { text: null, children: [] };
|
|
299
|
+
let currentNode = root;
|
|
300
|
+
for (const o of x) {
|
|
301
|
+
if (o.blockquoteLevel < depth) {
|
|
302
|
+
let node = { text: o.text, children: [], parent: currentNode.parent };
|
|
303
|
+
for (let i = 0; i < depth - o.blockquoteLevel; i++) {
|
|
304
|
+
if (currentNode.parent) {
|
|
305
|
+
currentNode = currentNode.parent;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
node.parent = currentNode;
|
|
309
|
+
currentNode.children.push(node);
|
|
310
|
+
currentNode = node.parent;
|
|
311
|
+
depth = o.blockquoteLevel;
|
|
312
|
+
}
|
|
313
|
+
else if (o.blockquoteLevel > depth) {
|
|
314
|
+
let node = { text: o.text, children: [], parent: currentNode };
|
|
315
|
+
let shadowNode = { text: null, children: [node], parent: currentNode };
|
|
316
|
+
node.parent = shadowNode;
|
|
317
|
+
currentNode.children.push(shadowNode);
|
|
318
|
+
currentNode = shadowNode;
|
|
319
|
+
depth = o.blockquoteLevel;
|
|
320
|
+
}
|
|
321
|
+
else {
|
|
322
|
+
let node = { text: o.text, children: [], parent: currentNode };
|
|
323
|
+
currentNode.children.push(node);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
return root;
|
|
327
|
+
};
|
|
328
|
+
const parseBlockquoteTree = (tree, isRoot = false) => {
|
|
329
|
+
let result = [];
|
|
330
|
+
for (const [i, v] of tree.children.entries()) {
|
|
331
|
+
if (v.text !== null) {
|
|
332
|
+
if (tree.children[i + 1] && tree.children[i + 1].text !== null) {
|
|
333
|
+
result.push(join([v.text, mapper("br")(null)]));
|
|
334
|
+
}
|
|
335
|
+
else {
|
|
336
|
+
result.push(v.text);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
else if (v.children.length !== 0) {
|
|
340
|
+
result.push(parseBlockquoteTree(v));
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
const _result = mapper("blockquote")(result.reduce((a, b) => join([a, b])));
|
|
344
|
+
return _result;
|
|
345
|
+
};
|
|
346
|
+
const blockquote = P.lazy(() => {
|
|
347
|
+
return blockquoteLine.atLeast(1).map(x => {
|
|
348
|
+
return parseBlockquoteTree(createBlockquoteTree(x), true);
|
|
349
|
+
});
|
|
350
|
+
});
|
|
351
|
+
const pluginBlock = P.seqMap(P.string("@["), P.regexp(/[a-zA-Z]+/), P.regexp(/(:[^\]]*)*/), P.string("]\n"), P.seq(P.string(" ").result(""), P.regexp(/[^\r\n]+/), linebreak.atMost(1).result("\n")).map(join).atLeast(1).map(join), (_1, pluginName, args, _2, content) => {
|
|
352
|
+
return this.opts.plugins && this.opts.plugins[pluginName] ? this.opts.plugins[pluginName](args, content, mapper, join)
|
|
353
|
+
: join([_1, pluginName, args, _2, content]);
|
|
354
|
+
});
|
|
355
|
+
const block = P.alt(P.regexp(/\s+/).result(""), pluginBlock, h1Special, h2Special, h6, h5, h4, h3, h2, h1, table, codeBlock, lists, blockquote, paragraph, linebreak.result(""));
|
|
356
|
+
this.acceptables = P.alt(block).many().map(join);
|
|
357
|
+
}
|
|
358
|
+
parse(s) {
|
|
359
|
+
this.liLevelBefore = this.liLevel = null;
|
|
360
|
+
this.rootTree = this.currentTree = {
|
|
361
|
+
value: null,
|
|
362
|
+
children: [],
|
|
363
|
+
type: "shadow",
|
|
364
|
+
parent: null
|
|
365
|
+
};
|
|
366
|
+
const parsed = this.acceptables.parse(s.trim());
|
|
367
|
+
if (parsed.status === true && parsed.hasOwnProperty("value"))
|
|
368
|
+
return this.opts.export.postprocess(parsed.value);
|
|
369
|
+
console.error(s.trim());
|
|
370
|
+
console.error(parsed);
|
|
371
|
+
throw new Error("Parsing was failed.");
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
exports.Parser = Parser;
|
|
375
|
+
function escapeHtml(text) {
|
|
376
|
+
const map = {
|
|
377
|
+
'&': '&',
|
|
378
|
+
'<': '<',
|
|
379
|
+
'>': '>',
|
|
380
|
+
'"': '"',
|
|
381
|
+
"'": '''
|
|
382
|
+
};
|
|
383
|
+
return text.replace(/[&<>"']/g, (m) => map[m]);
|
|
384
|
+
}
|
|
385
|
+
exports.asHTML = {
|
|
386
|
+
mapper: (tag, args) => children => [
|
|
387
|
+
"<" + tag,
|
|
388
|
+
args ? " " + Object.keys(args).map(x => `${x}="${escapeHtml(String(args[x]))}"`).join(" ") : "",
|
|
389
|
+
children !== null && children !== "" ? ">" + children + "</" + tag + ">" : " />"
|
|
390
|
+
].join(""),
|
|
391
|
+
join: (x) => x.join(""),
|
|
392
|
+
postprocess: (x) => x
|
|
393
|
+
};
|
|
394
|
+
exports.asAST = {
|
|
395
|
+
mapper: (tag, args) => children => [
|
|
396
|
+
tag,
|
|
397
|
+
args ? args : null,
|
|
398
|
+
children
|
|
399
|
+
],
|
|
400
|
+
join: (x) => x, // identical
|
|
401
|
+
postprocess: (obj) => {
|
|
402
|
+
return obj.filter((x) => (x !== ''));
|
|
403
|
+
}
|
|
404
|
+
};
|
|
405
|
+
const parse = (s) => {
|
|
406
|
+
const p = new Parser({
|
|
407
|
+
export: exports.asHTML,
|
|
408
|
+
});
|
|
409
|
+
return p.parse(s);
|
|
410
|
+
};
|
|
411
|
+
exports.parse = parse;
|
package/package.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@minamorl/markdown-next",
|
|
3
|
+
"description": "Markdown parser with Aozora bunko ruby support and HTML passthrough",
|
|
4
|
+
"main": "./lib/src/parser.js",
|
|
5
|
+
"types": "./lib/src/parser.d.ts",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"build": "tsc",
|
|
8
|
+
"example": "cd examples && webpack",
|
|
9
|
+
"prepublish": "pnpm run build",
|
|
10
|
+
"test": "pnpm run build && mocha lib/test"
|
|
11
|
+
},
|
|
12
|
+
"author": "minamorl",
|
|
13
|
+
"license": "MIT",
|
|
14
|
+
"files": [
|
|
15
|
+
"lib/src/",
|
|
16
|
+
"LICENSE"
|
|
17
|
+
],
|
|
18
|
+
"devDependencies": {
|
|
19
|
+
"@types/mocha": "^10.0.10",
|
|
20
|
+
"@types/node": "^20.0.0",
|
|
21
|
+
"@types/power-assert": "^1.5.12",
|
|
22
|
+
"mocha": "^11.7.5",
|
|
23
|
+
"power-assert": "^1.6.1",
|
|
24
|
+
"typescript": "^5.9.3",
|
|
25
|
+
"webpack": "^5.102.1",
|
|
26
|
+
"webpack-cli": "^6.0.1"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@types/parsimmon": "^1.10.9",
|
|
30
|
+
"parsimmon": "^1.18.1"
|
|
31
|
+
},
|
|
32
|
+
"repository": {
|
|
33
|
+
"type": "git",
|
|
34
|
+
"url": "git+https://github.com/minamorl/markdown-next.git"
|
|
35
|
+
},
|
|
36
|
+
"bugs": {
|
|
37
|
+
"url": "https://github.com/minamorl/markdown-next/issues"
|
|
38
|
+
},
|
|
39
|
+
"homepage": "https://github.com/minamorl/markdown-next#readme",
|
|
40
|
+
"version": "2.0.1"
|
|
41
|
+
}
|