commonmarker 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +4 -2
- data/Rakefile +0 -5
- data/ext/commonmarker/cmark/CMakeLists.txt +1 -1
- data/ext/commonmarker/cmark/Makefile +12 -7
- data/ext/commonmarker/cmark/README.md +12 -8
- data/ext/commonmarker/cmark/api_test/main.c +18 -2
- data/ext/commonmarker/cmark/benchmarks.md +4 -4
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeError.log +12 -12
- data/ext/commonmarker/cmark/build/CMakeFiles/CMakeOutput.log +106 -106
- data/ext/commonmarker/cmark/build/CMakeFiles/Makefile2 +7 -7
- data/ext/commonmarker/cmark/build/CMakeFiles/progress.marks +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/progress.marks +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/cmark.dir/DependInfo.cmake +2 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/cmark.dir/build.make +61 -9
- data/ext/commonmarker/cmark/build/src/CMakeFiles/cmark.dir/cmake_clean.cmake +2 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/cmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/cmark.dir/progress.make +2 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +3 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +81 -29
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +3 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/progress.make +19 -17
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/C.includecache +56 -10
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/DependInfo.cmake +2 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/build.make +60 -8
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmake_clean.cmake +2 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/depend.internal +27 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/depend.make +27 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/flags.make +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/latex.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/man.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/progress.make +19 -17
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/progress.marks +1 -1
- data/ext/commonmarker/cmark/build/src/Makefile +66 -0
- data/ext/commonmarker/cmark/build/src/cmake_install.cmake +3 -3
- data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
- data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
- data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
- data/ext/commonmarker/cmark/changelog.txt +144 -0
- data/ext/commonmarker/cmark/man/make_man_page.py +3 -3
- data/ext/commonmarker/cmark/man/man1/cmark.1 +10 -2
- data/ext/commonmarker/cmark/man/man3/cmark.3 +106 -85
- data/ext/commonmarker/cmark/src/CMakeLists.txt +5 -2
- data/ext/commonmarker/cmark/src/blocks.c +76 -9
- data/ext/commonmarker/cmark/src/cmark.c +9 -2
- data/ext/commonmarker/cmark/src/cmark.h +16 -3
- data/ext/commonmarker/cmark/src/commonmark.c +162 -309
- data/ext/commonmarker/cmark/src/html.c +30 -10
- data/ext/commonmarker/cmark/src/inlines.c +80 -72
- data/ext/commonmarker/cmark/src/latex.c +430 -0
- data/ext/commonmarker/cmark/src/main.c +12 -4
- data/ext/commonmarker/cmark/src/man.c +118 -156
- data/ext/commonmarker/cmark/src/node.h +1 -0
- data/ext/commonmarker/cmark/src/render.c +186 -0
- data/ext/commonmarker/cmark/src/render.h +66 -0
- data/ext/commonmarker/cmark/src/scanners.c +14586 -8944
- data/ext/commonmarker/cmark/src/scanners.h +16 -2
- data/ext/commonmarker/cmark/src/scanners.re +93 -9
- data/ext/commonmarker/cmark/test/__pycache__/cmark.cpython-34.pyc +0 -0
- data/ext/commonmarker/cmark/test/__pycache__/normalize.cpython-34.pyc +0 -0
- data/ext/commonmarker/cmark/test/smart_punct.txt +74 -10
- data/ext/commonmarker/cmark/test/spec.txt +726 -92
- data/ext/commonmarker/cmark/test/spec_tests.py +16 -13
- data/lib/commonmarker/config.rb +2 -0
- data/lib/commonmarker/version.rb +1 -1
- data/test/test_helper.rb +1 -1
- data/test/test_spec.rb +11 -10
- metadata +9 -6
- data/ext/commonmarker/cmark/algorithm.md +0 -116
- data/ext/commonmarker/cmark/src/debug.h +0 -36
- data/test/spec_tests.json +0 -4482
@@ -10,7 +10,13 @@ bufsize_t _scan_scheme(const unsigned char *p);
|
|
10
10
|
bufsize_t _scan_autolink_uri(const unsigned char *p);
|
11
11
|
bufsize_t _scan_autolink_email(const unsigned char *p);
|
12
12
|
bufsize_t _scan_html_tag(const unsigned char *p);
|
13
|
-
bufsize_t
|
13
|
+
bufsize_t _scan_html_block_start(const unsigned char *p);
|
14
|
+
bufsize_t _scan_html_block_start_7(const unsigned char *p);
|
15
|
+
bufsize_t _scan_html_block_end_1(const unsigned char *p);
|
16
|
+
bufsize_t _scan_html_block_end_2(const unsigned char *p);
|
17
|
+
bufsize_t _scan_html_block_end_3(const unsigned char *p);
|
18
|
+
bufsize_t _scan_html_block_end_4(const unsigned char *p);
|
19
|
+
bufsize_t _scan_html_block_end_5(const unsigned char *p);
|
14
20
|
bufsize_t _scan_link_url(const unsigned char *p);
|
15
21
|
bufsize_t _scan_link_title(const unsigned char *p);
|
16
22
|
bufsize_t _scan_spacechars(const unsigned char *p);
|
@@ -20,12 +26,19 @@ bufsize_t _scan_hrule(const unsigned char *p);
|
|
20
26
|
bufsize_t _scan_open_code_fence(const unsigned char *p);
|
21
27
|
bufsize_t _scan_close_code_fence(const unsigned char *p);
|
22
28
|
bufsize_t _scan_entity(const unsigned char *p);
|
29
|
+
bufsize_t _scan_dangerous_url(const unsigned char *p);
|
23
30
|
|
24
31
|
#define scan_scheme(c, n) _scan_at(&_scan_scheme, c, n)
|
25
32
|
#define scan_autolink_uri(c, n) _scan_at(&_scan_autolink_uri, c, n)
|
26
33
|
#define scan_autolink_email(c, n) _scan_at(&_scan_autolink_email, c, n)
|
27
34
|
#define scan_html_tag(c, n) _scan_at(&_scan_html_tag, c, n)
|
28
|
-
#define
|
35
|
+
#define scan_html_block_start(c, n) _scan_at(&_scan_html_block_start, c, n)
|
36
|
+
#define scan_html_block_start_7(c, n) _scan_at(&_scan_html_block_start_7, c, n)
|
37
|
+
#define scan_html_block_end_1(c, n) _scan_at(&_scan_html_block_end_1, c, n)
|
38
|
+
#define scan_html_block_end_2(c, n) _scan_at(&_scan_html_block_end_2, c, n)
|
39
|
+
#define scan_html_block_end_3(c, n) _scan_at(&_scan_html_block_end_3, c, n)
|
40
|
+
#define scan_html_block_end_4(c, n) _scan_at(&_scan_html_block_end_4, c, n)
|
41
|
+
#define scan_html_block_end_5(c, n) _scan_at(&_scan_html_block_end_5, c, n)
|
29
42
|
#define scan_link_url(c, n) _scan_at(&_scan_link_url, c, n)
|
30
43
|
#define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
|
31
44
|
#define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
|
@@ -35,6 +48,7 @@ bufsize_t _scan_entity(const unsigned char *p);
|
|
35
48
|
#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
|
36
49
|
#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
|
37
50
|
#define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
|
51
|
+
#define scan_dangerous_url(c, n) _scan_at(&_scan_dangerous_url, c, n)
|
38
52
|
|
39
53
|
#ifdef __cplusplus
|
40
54
|
}
|
@@ -30,9 +30,9 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
|
|
30
30
|
|
31
31
|
escaped_char = [\\][!"#$%&'()*+,./:;<=>?@[\\\]^_`{|}~-];
|
32
32
|
|
33
|
-
tagname = [A-Za-z][A-Za-z0-9]*;
|
33
|
+
tagname = [A-Za-z][A-Za-z0-9-]*;
|
34
34
|
|
35
|
-
blocktagname = '
|
35
|
+
blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'head'|'header'|'hr'|'html'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'meta'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'pre'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';
|
36
36
|
|
37
37
|
attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
|
38
38
|
|
@@ -117,16 +117,85 @@ bufsize_t _scan_html_tag(const unsigned char *p)
|
|
117
117
|
*/
|
118
118
|
}
|
119
119
|
|
120
|
-
// Try to match an HTML block tag
|
121
|
-
//
|
122
|
-
|
120
|
+
// Try to match an HTML block tag start line, returning
|
121
|
+
// an integer code for the type of block (1-6, matching the spec).
|
122
|
+
// #7 is handled by a separate function, below.
|
123
|
+
bufsize_t _scan_html_block_start(const unsigned char *p)
|
124
|
+
{
|
125
|
+
const unsigned char *marker = NULL;
|
126
|
+
/*!re2c
|
127
|
+
[<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; }
|
128
|
+
'<!--' { return 2; }
|
129
|
+
'<?' { return 3; }
|
130
|
+
'<!' [A-Z] { return 4; }
|
131
|
+
'<![CDATA[' { return 5; }
|
132
|
+
[<] [/]? blocktagname (spacechar | [/]? [>]) { return 6; }
|
133
|
+
.? { return 0; }
|
134
|
+
*/
|
135
|
+
}
|
136
|
+
|
137
|
+
// Try to match an HTML block tag start line of type 7, returning
|
138
|
+
// 7 if successful, 0 if not.
|
139
|
+
bufsize_t _scan_html_block_start_7(const unsigned char *p)
|
140
|
+
{
|
141
|
+
const unsigned char *marker = NULL;
|
142
|
+
/*!re2c
|
143
|
+
[<] (opentag | closetag) [\t\n\f ]* [\r\n] { return 7; }
|
144
|
+
.? { return 0; }
|
145
|
+
*/
|
146
|
+
}
|
147
|
+
|
148
|
+
// Try to match an HTML block end line of type 1
|
149
|
+
bufsize_t _scan_html_block_end_1(const unsigned char *p)
|
150
|
+
{
|
151
|
+
const unsigned char *marker = NULL;
|
152
|
+
const unsigned char *start = p;
|
153
|
+
/*!re2c
|
154
|
+
.* [<] [/] ('script'|'pre'|'style') [>] { return (bufsize_t)(p - start); }
|
155
|
+
.? { return 0; }
|
156
|
+
*/
|
157
|
+
}
|
158
|
+
|
159
|
+
// Try to match an HTML block end line of type 2
|
160
|
+
bufsize_t _scan_html_block_end_2(const unsigned char *p)
|
161
|
+
{
|
162
|
+
const unsigned char *marker = NULL;
|
163
|
+
const unsigned char *start = p;
|
164
|
+
/*!re2c
|
165
|
+
.* '-->' { return (bufsize_t)(p - start); }
|
166
|
+
.? { return 0; }
|
167
|
+
*/
|
168
|
+
}
|
169
|
+
|
170
|
+
// Try to match an HTML block end line of type 3
|
171
|
+
bufsize_t _scan_html_block_end_3(const unsigned char *p)
|
172
|
+
{
|
173
|
+
const unsigned char *marker = NULL;
|
174
|
+
const unsigned char *start = p;
|
175
|
+
/*!re2c
|
176
|
+
.* '?>' { return (bufsize_t)(p - start); }
|
177
|
+
.? { return 0; }
|
178
|
+
*/
|
179
|
+
}
|
180
|
+
|
181
|
+
// Try to match an HTML block end line of type 4
|
182
|
+
bufsize_t _scan_html_block_end_4(const unsigned char *p)
|
183
|
+
{
|
184
|
+
const unsigned char *marker = NULL;
|
185
|
+
const unsigned char *start = p;
|
186
|
+
/*!re2c
|
187
|
+
.* '>' { return (bufsize_t)(p - start); }
|
188
|
+
.? { return 0; }
|
189
|
+
*/
|
190
|
+
}
|
191
|
+
|
192
|
+
// Try to match an HTML block end line of type 5
|
193
|
+
bufsize_t _scan_html_block_end_5(const unsigned char *p)
|
123
194
|
{
|
124
195
|
const unsigned char *marker = NULL;
|
125
196
|
const unsigned char *start = p;
|
126
197
|
/*!re2c
|
127
|
-
|
128
|
-
[<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); }
|
129
|
-
[<] [!?] { return (bufsize_t)(p - start); }
|
198
|
+
.* ']]>' { return (bufsize_t)(p - start); }
|
130
199
|
.? { return 0; }
|
131
200
|
*/
|
132
201
|
}
|
@@ -141,7 +210,7 @@ bufsize_t _scan_link_url(const unsigned char *p)
|
|
141
210
|
const unsigned char *start = p;
|
142
211
|
/*!re2c
|
143
212
|
[ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
|
144
|
-
[ \r\n]* (reg_char+ | escaped_char | in_parens_nosp | [\\])* { return (bufsize_t)(p - start); }
|
213
|
+
[ \r\n]* (reg_char+ | escaped_char | in_parens_nosp | [\\][^()])* { return (bufsize_t)(p - start); }
|
145
214
|
.? { return 0; }
|
146
215
|
*/
|
147
216
|
}
|
@@ -164,6 +233,7 @@ bufsize_t _scan_link_title(const unsigned char *p)
|
|
164
233
|
// Match space characters, including newlines.
|
165
234
|
bufsize_t _scan_spacechars(const unsigned char *p)
|
166
235
|
{
|
236
|
+
const unsigned char *marker = NULL;
|
167
237
|
const unsigned char *start = p; \
|
168
238
|
/*!re2c
|
169
239
|
[ \t\v\f\r\n]* { return (bufsize_t)(p - start); }
|
@@ -245,3 +315,17 @@ bufsize_t _scan_entity(const unsigned char *p)
|
|
245
315
|
.? { return 0; }
|
246
316
|
*/
|
247
317
|
}
|
318
|
+
|
319
|
+
// Returns positive value if a URL begins in a way that is potentially
|
320
|
+
// dangerous, with javascript:, vbscript:, file:, or data:, otherwise 0.
|
321
|
+
bufsize_t _scan_dangerous_url(const unsigned char *p)
|
322
|
+
{
|
323
|
+
const unsigned char *marker = NULL;
|
324
|
+
const unsigned char *start = p;
|
325
|
+
/*!re2c
|
326
|
+
'data:image/' ('png'|'gif'|'jpeg'|'webp') { return 0; }
|
327
|
+
'javascript:' | 'vbscript:' | 'file:' | 'data:' { return (bufsize_t)(p - start); }
|
328
|
+
.? { return 0; }
|
329
|
+
*/
|
330
|
+
}
|
331
|
+
|
Binary file
|
Binary file
|
@@ -1,5 +1,9 @@
|
|
1
1
|
## Smart punctuation
|
2
2
|
|
3
|
+
Open quotes are matched with closed quotes.
|
4
|
+
The same method is used for matching openers and closers
|
5
|
+
as is used in emphasis parsing:
|
6
|
+
|
3
7
|
.
|
4
8
|
"Hello," said the spider.
|
5
9
|
"'Shelob' is my name."
|
@@ -28,6 +32,10 @@ So is ‘pine.’</p>
|
|
28
32
|
<p>‘He said, “I want to go.”’</p>
|
29
33
|
.
|
30
34
|
|
35
|
+
A single quote that isn't an open quote matched
|
36
|
+
with a close quote will be treated as an
|
37
|
+
apostrophe:
|
38
|
+
|
31
39
|
.
|
32
40
|
Were you alive in the 70's?
|
33
41
|
.
|
@@ -40,12 +48,19 @@ Here is some quoted '`code`' and a "[quoted link](url)".
|
|
40
48
|
<p>Here is some quoted ‘<code>code</code>’ and a “<a href="url">quoted link</a>”.</p>
|
41
49
|
.
|
42
50
|
|
51
|
+
Here the first `'` is treated as an apostrophe, not
|
52
|
+
an open quote, because the final single quote is matched
|
53
|
+
by the single quote before `jolly`:
|
54
|
+
|
43
55
|
.
|
44
56
|
'tis the season to be 'jolly'
|
45
57
|
.
|
46
58
|
<p>’tis the season to be ‘jolly’</p>
|
47
59
|
.
|
48
60
|
|
61
|
+
An unmatched double quote will be interpreted as a
|
62
|
+
left double quote, to facilitate this style:
|
63
|
+
|
49
64
|
.
|
50
65
|
"A paragraph with no closing quote.
|
51
66
|
|
@@ -55,40 +70,89 @@ Here is some quoted '`code`' and a "[quoted link](url)".
|
|
55
70
|
<p>“Second paragraph by same speaker, in fiction.”</p>
|
56
71
|
.
|
57
72
|
|
73
|
+
Quotes that are escaped come out as literal straight
|
74
|
+
quotes:
|
75
|
+
|
58
76
|
.
|
59
77
|
\"This is not smart.\"
|
60
78
|
This isn\'t either.
|
79
|
+
5\'8\"
|
61
80
|
.
|
62
81
|
<p>"This is not smart."
|
63
|
-
This isn't either
|
82
|
+
This isn't either.
|
83
|
+
5'8"</p>
|
64
84
|
.
|
65
85
|
|
86
|
+
Two hyphens form an en-dash, three an em-dash.
|
87
|
+
|
66
88
|
.
|
67
|
-
Some dashes:
|
68
|
-
|
89
|
+
Some dashes: em---em
|
90
|
+
en--en
|
91
|
+
em --- em
|
92
|
+
en -- en
|
93
|
+
2--3
|
69
94
|
.
|
70
|
-
<p>Some dashes:
|
71
|
-
|
95
|
+
<p>Some dashes: em—em
|
96
|
+
en–en
|
97
|
+
em — em
|
98
|
+
en – en
|
99
|
+
2–3</p>
|
72
100
|
.
|
73
101
|
|
102
|
+
A sequence of more than three hyphens is
|
103
|
+
parsed as a sequence of em and/or en dashes,
|
104
|
+
with no hyphens. If possible, a homogeneous
|
105
|
+
sequence of dashes is used (so, 10 hyphens
|
106
|
+
= 5 en dashes, and 9 hyphens = 3 em dashes).
|
107
|
+
When a heterogeneous sequence must be used,
|
108
|
+
the em dashes come first, followed by the en
|
109
|
+
dashes, and as few en dashes as possible are
|
110
|
+
used (so, 7 hyphens = 2 em dashes an 1 en
|
111
|
+
dash).
|
112
|
+
|
74
113
|
.
|
75
|
-
|
76
|
-
|
77
|
-
|
114
|
+
one-
|
115
|
+
two--
|
116
|
+
three---
|
117
|
+
four----
|
118
|
+
five-----
|
119
|
+
six------
|
120
|
+
seven-------
|
121
|
+
eight--------
|
122
|
+
nine---------
|
123
|
+
thirteen-------------.
|
124
|
+
.
|
125
|
+
<p>one-
|
126
|
+
two–
|
127
|
+
three—
|
128
|
+
four––
|
129
|
+
five—–
|
130
|
+
six——
|
131
|
+
seven—––
|
132
|
+
eight––––
|
133
|
+
nine———
|
134
|
+
thirteen———––.</p>
|
78
135
|
.
|
79
136
|
|
137
|
+
Hyphens can be escaped:
|
138
|
+
|
80
139
|
.
|
81
|
-
|
140
|
+
Escaped hyphens: \-- \-\-\-.
|
82
141
|
.
|
83
|
-
<p>
|
142
|
+
<p>Escaped hyphens: -- ---.</p>
|
84
143
|
.
|
85
144
|
|
145
|
+
Three periods form an ellipsis:
|
146
|
+
|
86
147
|
.
|
87
148
|
Ellipses...and...and....
|
88
149
|
.
|
89
150
|
<p>Ellipses…and…and….</p>
|
90
151
|
.
|
91
152
|
|
153
|
+
Periods can be escaped if ellipsis-formation
|
154
|
+
is not wanted:
|
155
|
+
|
92
156
|
.
|
93
157
|
No ellipses\.\.\.
|
94
158
|
.
|
@@ -1,8 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: CommonMark Spec
|
3
3
|
author: John MacFarlane
|
4
|
-
version: 0.
|
5
|
-
date:
|
4
|
+
version: 0.21
|
5
|
+
date:
|
6
6
|
license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
|
7
7
|
...
|
8
8
|
|
@@ -237,7 +237,7 @@ or more [unicode whitespace character]s.
|
|
237
237
|
|
238
238
|
A [space](@space) is `U+0020`.
|
239
239
|
|
240
|
-
A [non-
|
240
|
+
A [non-whitespace character](@non-space-character) is any character
|
241
241
|
that is not a [whitespace character].
|
242
242
|
|
243
243
|
An [ASCII punctuation character](@ascii-punctuation-character)
|
@@ -474,7 +474,7 @@ a------
|
|
474
474
|
<p>---a---</p>
|
475
475
|
.
|
476
476
|
|
477
|
-
It is required that all of the [non-
|
477
|
+
It is required that all of the [non-whitespace character]s be the same.
|
478
478
|
So, this is not a horizontal rule:
|
479
479
|
|
480
480
|
.
|
@@ -564,7 +564,7 @@ consists of a string of characters, parsed as inline content, between an
|
|
564
564
|
opening sequence of 1--6 unescaped `#` characters and an optional
|
565
565
|
closing sequence of any number of `#` characters. The opening sequence
|
566
566
|
of `#` characters cannot be followed directly by a
|
567
|
-
[non-
|
567
|
+
[non-whitespace character]. The optional closing sequence of `#`s must be
|
568
568
|
preceded by a [space] and may be followed by spaces only. The opening
|
569
569
|
`#` character may be indented 0-3 spaces. The raw contents of the
|
570
570
|
header are stripped of leading and trailing spaces before being parsed
|
@@ -696,7 +696,7 @@ Spaces are allowed after the closing sequence:
|
|
696
696
|
.
|
697
697
|
|
698
698
|
A sequence of `#` characters with a
|
699
|
-
[non-
|
699
|
+
[non-whitespace character] following it
|
700
700
|
is not a closing sequence, but counts as part of the contents of the
|
701
701
|
header:
|
702
702
|
|
@@ -765,7 +765,7 @@ ATX headers can be empty:
|
|
765
765
|
## Setext headers
|
766
766
|
|
767
767
|
A [setext header](@setext-header)
|
768
|
-
consists of a line of text, containing at least one [non-
|
768
|
+
consists of a line of text, containing at least one [non-whitespace character],
|
769
769
|
with no more than 3 spaces indentation, followed by a [setext header
|
770
770
|
underline]. The line of text must be
|
771
771
|
one that, were it not followed by the setext header underline,
|
@@ -1348,7 +1348,8 @@ aaa
|
|
1348
1348
|
</code></pre>
|
1349
1349
|
.
|
1350
1350
|
|
1351
|
-
Unclosed code blocks are closed by the end of the document
|
1351
|
+
Unclosed code blocks are closed by the end of the document
|
1352
|
+
(or the enclosing [block quote] or [list item]):
|
1352
1353
|
|
1353
1354
|
.
|
1354
1355
|
```
|
@@ -1368,6 +1369,19 @@ aaa
|
|
1368
1369
|
</code></pre>
|
1369
1370
|
.
|
1370
1371
|
|
1372
|
+
.
|
1373
|
+
> ```
|
1374
|
+
> aaa
|
1375
|
+
|
1376
|
+
bbb
|
1377
|
+
.
|
1378
|
+
<blockquote>
|
1379
|
+
<pre><code>aaa
|
1380
|
+
</code></pre>
|
1381
|
+
</blockquote>
|
1382
|
+
<p>bbb</p>
|
1383
|
+
.
|
1384
|
+
|
1371
1385
|
A code block can have all empty lines as its content:
|
1372
1386
|
|
1373
1387
|
.
|
@@ -1593,27 +1607,65 @@ Closing code fences cannot have [info string]s:
|
|
1593
1607
|
|
1594
1608
|
## HTML blocks
|
1595
1609
|
|
1596
|
-
An [HTML block
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
|
1610
|
+
An [HTML block](@html-block) is a group of lines that is treated
|
1611
|
+
as raw HTML (and will not be escaped in HTML output).
|
1612
|
+
|
1613
|
+
There are seven kinds of [HTML block], which can be defined
|
1614
|
+
by their start and end conditions. The block begins with a line that
|
1615
|
+
meets a [start condition](@start-condition) (after up to three spaces
|
1616
|
+
optional indentation). It ends with the first subsequent line that
|
1617
|
+
meets a matching [end condition](@end-condition), or the last line of
|
1618
|
+
the document, if no line is encountered that meets the
|
1619
|
+
[end condition]. If the first line meets both the [start condition]
|
1620
|
+
and the [end condition], the block will contain just that line.
|
1621
|
+
|
1622
|
+
1. **Start condition:** line begins with the string `<script`,
|
1623
|
+
`<pre`, or `<style` (case-insensitive), followed by whitespace,
|
1624
|
+
the string `>`, or the end of the line.\
|
1625
|
+
**End condition:** line contains an end tag
|
1626
|
+
`</script>`, `</pre>`, or `</style>` (case-insensitive; it
|
1627
|
+
need not match the start tag).
|
1628
|
+
|
1629
|
+
2. **Start condition:** line begins with the string `<!--`.\
|
1630
|
+
**End condition:** line contains the string `-->`.
|
1631
|
+
|
1632
|
+
3. **Start condition:** line begins with the string `<?`.\
|
1633
|
+
**End condition:** line contains the string `?>`.
|
1634
|
+
|
1635
|
+
4. **Start condition:** line begins with the string `<!`
|
1636
|
+
followed by an uppercase ASCII letter.\
|
1637
|
+
**End condition:** line contains the character `>`.
|
1638
|
+
|
1639
|
+
5. **Start condition:** line begins with the string
|
1640
|
+
`<![CDATA[`.\
|
1641
|
+
**End condition:** line contains the string `]]>`.
|
1642
|
+
|
1643
|
+
6. **Start condition:** line begins the string `<` or `</`
|
1644
|
+
followed by one of the strings (case-insensitive) `address`,
|
1645
|
+
`article`, `aside`, `base`, `basefont`, `blockquote`, `body`,
|
1646
|
+
`caption`, `center`, `col`, `colgroup`, `dd`, `details`, `dialog`,
|
1647
|
+
`dir`, `div`, `dl`, `dt`, `fieldset`, `figcaption`, `figure`,
|
1648
|
+
`footer`, `form`, `frame`, `frameset`, `h1`, `head`, `header`, `hr`,
|
1649
|
+
`html`, `legend`, `li`, `link`, `main`, `menu`, `menuitem`, `meta`,
|
1650
|
+
`nav`, `noframes`, `ol`, `optgroup`, `option`, `p`, `param`, `pre`,
|
1651
|
+
`section`, `source`, `title`, `summary`, `table`, `tbody`, `td`,
|
1652
|
+
`tfoot`, `th`, `thead`, `title`, `tr`, `track`, `ul`, followed
|
1653
|
+
by [whitespace], the end of the line, the string `>`, or
|
1654
|
+
the string `/>`.\
|
1655
|
+
**End condition:** line is followed by a [blank line].
|
1656
|
+
|
1657
|
+
7. **Start condition:** line begins with an [open tag]
|
1658
|
+
(with any [tag name]) followed only by [whitespace] or the end
|
1659
|
+
of the line.\
|
1660
|
+
**End condition:** line is followed by a [blank line].
|
1661
|
+
|
1662
|
+
All types of [HTML blocks] except type 7 may interrupt
|
1663
|
+
a paragraph. Blocks of type 7 may not interrupt a paragraph.
|
1664
|
+
(This restricted is intended to prevent unwanted interpretation
|
1665
|
+
of long tags inside a wrapped paragraph as starting HTML blocks.)
|
1666
|
+
|
1667
|
+
Some simple examples follow. Here are some basic HTML blocks
|
1668
|
+
of type 6:
|
1617
1669
|
|
1618
1670
|
.
|
1619
1671
|
<table>
|
@@ -1646,6 +1698,16 @@ okay.
|
|
1646
1698
|
<foo><a>
|
1647
1699
|
.
|
1648
1700
|
|
1701
|
+
A block can also start with a closing tag:
|
1702
|
+
|
1703
|
+
.
|
1704
|
+
</div>
|
1705
|
+
*foo*
|
1706
|
+
.
|
1707
|
+
</div>
|
1708
|
+
*foo*
|
1709
|
+
.
|
1710
|
+
|
1649
1711
|
Here we have two HTML blocks with a Markdown paragraph between them:
|
1650
1712
|
|
1651
1713
|
.
|
@@ -1660,7 +1722,94 @@ Here we have two HTML blocks with a Markdown paragraph between them:
|
|
1660
1722
|
</DIV>
|
1661
1723
|
.
|
1662
1724
|
|
1663
|
-
|
1725
|
+
The tag on the first line can be partial, as long
|
1726
|
+
as it is split where there would be whitespace:
|
1727
|
+
|
1728
|
+
.
|
1729
|
+
<div id="foo"
|
1730
|
+
class="bar">
|
1731
|
+
</div>
|
1732
|
+
.
|
1733
|
+
<div id="foo"
|
1734
|
+
class="bar">
|
1735
|
+
</div>
|
1736
|
+
.
|
1737
|
+
|
1738
|
+
.
|
1739
|
+
<div id="foo" class="bar
|
1740
|
+
baz">
|
1741
|
+
</div>
|
1742
|
+
.
|
1743
|
+
<div id="foo" class="bar
|
1744
|
+
baz">
|
1745
|
+
</div>
|
1746
|
+
.
|
1747
|
+
|
1748
|
+
An open tag need not be closed:
|
1749
|
+
.
|
1750
|
+
<div>
|
1751
|
+
*foo*
|
1752
|
+
|
1753
|
+
*bar*
|
1754
|
+
.
|
1755
|
+
<div>
|
1756
|
+
*foo*
|
1757
|
+
<p><em>bar</em></p>
|
1758
|
+
.
|
1759
|
+
|
1760
|
+
|
1761
|
+
A partial tag need not even be completed (garbage
|
1762
|
+
in, garbage out):
|
1763
|
+
|
1764
|
+
.
|
1765
|
+
<div id="foo"
|
1766
|
+
*hi*
|
1767
|
+
.
|
1768
|
+
<div id="foo"
|
1769
|
+
*hi*
|
1770
|
+
.
|
1771
|
+
|
1772
|
+
.
|
1773
|
+
<div class
|
1774
|
+
foo
|
1775
|
+
.
|
1776
|
+
<div class
|
1777
|
+
foo
|
1778
|
+
.
|
1779
|
+
|
1780
|
+
The initial tag doesn't even need to be a valid
|
1781
|
+
tag, as long as it starts like one:
|
1782
|
+
|
1783
|
+
.
|
1784
|
+
<div *???-&&&-<---
|
1785
|
+
*foo*
|
1786
|
+
.
|
1787
|
+
<div *???-&&&-<---
|
1788
|
+
*foo*
|
1789
|
+
.
|
1790
|
+
|
1791
|
+
In type 6 blocks, the initial tag need not be on a line by
|
1792
|
+
itself:
|
1793
|
+
|
1794
|
+
.
|
1795
|
+
<div><a href="bar">*foo*</a></div>
|
1796
|
+
.
|
1797
|
+
<div><a href="bar">*foo*</a></div>
|
1798
|
+
.
|
1799
|
+
|
1800
|
+
.
|
1801
|
+
<table><tr><td>
|
1802
|
+
foo
|
1803
|
+
</td></tr></table>
|
1804
|
+
.
|
1805
|
+
<table><tr><td>
|
1806
|
+
foo
|
1807
|
+
</td></tr></table>
|
1808
|
+
.
|
1809
|
+
|
1810
|
+
Everything until the next blank line or end of document
|
1811
|
+
gets included in the HTML block. So, in the following
|
1812
|
+
example, what looks like a Markdown code block
|
1664
1813
|
is actually part of the HTML block, which continues until a blank
|
1665
1814
|
line or the end of the document is reached:
|
1666
1815
|
|
@@ -1676,43 +1825,267 @@ int x = 33;
|
|
1676
1825
|
```
|
1677
1826
|
.
|
1678
1827
|
|
1679
|
-
|
1828
|
+
To start an [HTML block] with a tag that is *not* in the
|
1829
|
+
list of block-level tags in (6), you must put the tag by
|
1830
|
+
itself on the first line (and it must be complete):
|
1831
|
+
|
1832
|
+
.
|
1833
|
+
<a href="foo">
|
1834
|
+
*bar*
|
1835
|
+
</a>
|
1836
|
+
.
|
1837
|
+
<a href="foo">
|
1838
|
+
*bar*
|
1839
|
+
</a>
|
1840
|
+
.
|
1841
|
+
|
1842
|
+
In type 7 blocks, the [tag name] can be anything:
|
1843
|
+
|
1844
|
+
.
|
1845
|
+
<Warning>
|
1846
|
+
*bar*
|
1847
|
+
</Warning>
|
1848
|
+
.
|
1849
|
+
<Warning>
|
1850
|
+
*bar*
|
1851
|
+
</Warning>
|
1852
|
+
.
|
1853
|
+
|
1854
|
+
.
|
1855
|
+
<i class="foo">
|
1856
|
+
*bar*
|
1857
|
+
</i>
|
1858
|
+
.
|
1859
|
+
<i class="foo">
|
1860
|
+
*bar*
|
1861
|
+
</i>
|
1862
|
+
.
|
1863
|
+
|
1864
|
+
These rules are designed to allow us to work with tags that
|
1865
|
+
can function as either block-level or inline-level tags.
|
1866
|
+
The `<del>` tag is a nice example. We can surround content with
|
1867
|
+
`<del>` tags in three different ways. In this case, we get a raw
|
1868
|
+
HTML block, because the `<del>` tag is on a line by itself:
|
1869
|
+
|
1870
|
+
.
|
1871
|
+
<del>
|
1872
|
+
*foo*
|
1873
|
+
</del>
|
1874
|
+
.
|
1875
|
+
<del>
|
1876
|
+
*foo*
|
1877
|
+
</del>
|
1878
|
+
.
|
1879
|
+
|
1880
|
+
In this case, we get a raw HTML block that just includes
|
1881
|
+
the `<del>` tag (because it ends with the following blank
|
1882
|
+
line). So the contents get interpreted as CommonMark:
|
1883
|
+
|
1884
|
+
.
|
1885
|
+
<del>
|
1886
|
+
|
1887
|
+
*foo*
|
1888
|
+
|
1889
|
+
</del>
|
1890
|
+
.
|
1891
|
+
<del>
|
1892
|
+
<p><em>foo</em></p>
|
1893
|
+
</del>
|
1894
|
+
.
|
1895
|
+
|
1896
|
+
Finally, in this case, the `<del>` tags are interpreted
|
1897
|
+
as [raw HTML] *inside* the CommonMark paragraph. (Because
|
1898
|
+
the tag is not on a line by itself, we get inline HTML
|
1899
|
+
rather than an [HTML block].)
|
1900
|
+
|
1901
|
+
.
|
1902
|
+
<del>*foo*</del>
|
1903
|
+
.
|
1904
|
+
<p><del><em>foo</em></del></p>
|
1905
|
+
.
|
1906
|
+
|
1907
|
+
HTML tags designed to contain literal content
|
1908
|
+
(`script`, `style`, `pre`), comments, processing instructions,
|
1909
|
+
and declarations are treated somewhat differently.
|
1910
|
+
Instead of ending at the first blank line, these blocks
|
1911
|
+
end at the first line containing a corresponding end tag.
|
1912
|
+
As a result, these blocks can contain blank lines:
|
1913
|
+
|
1914
|
+
A pre tag (type 1):
|
1915
|
+
|
1916
|
+
.
|
1917
|
+
<pre language="haskell"><code>
|
1918
|
+
import Text.HTML.TagSoup
|
1919
|
+
|
1920
|
+
main :: IO ()
|
1921
|
+
main = print $ parseTags tags
|
1922
|
+
</code></pre>
|
1923
|
+
.
|
1924
|
+
<pre language="haskell"><code>
|
1925
|
+
import Text.HTML.TagSoup
|
1926
|
+
|
1927
|
+
main :: IO ()
|
1928
|
+
main = print $ parseTags tags
|
1929
|
+
</code></pre>
|
1930
|
+
.
|
1931
|
+
|
1932
|
+
A script tag (type 1):
|
1933
|
+
|
1934
|
+
.
|
1935
|
+
<script type="text/javascript">
|
1936
|
+
// JavaScript example
|
1937
|
+
|
1938
|
+
document.getElementById("demo").innerHTML = "Hello JavaScript!";
|
1939
|
+
</script>
|
1940
|
+
.
|
1941
|
+
<script type="text/javascript">
|
1942
|
+
// JavaScript example
|
1943
|
+
|
1944
|
+
document.getElementById("demo").innerHTML = "Hello JavaScript!";
|
1945
|
+
</script>
|
1946
|
+
.
|
1947
|
+
|
1948
|
+
A style tag (type 1):
|
1949
|
+
|
1950
|
+
.
|
1951
|
+
<style
|
1952
|
+
type="text/css">
|
1953
|
+
h1 {color:red;}
|
1954
|
+
|
1955
|
+
p {color:blue;}
|
1956
|
+
</style>
|
1957
|
+
.
|
1958
|
+
<style
|
1959
|
+
type="text/css">
|
1960
|
+
h1 {color:red;}
|
1961
|
+
|
1962
|
+
p {color:blue;}
|
1963
|
+
</style>
|
1964
|
+
.
|
1965
|
+
|
1966
|
+
If there is no matching end tag, the block will end at the
|
1967
|
+
end of the document (or the enclosing [block quote] or
|
1968
|
+
[list item]):
|
1969
|
+
|
1970
|
+
.
|
1971
|
+
<style
|
1972
|
+
type="text/css">
|
1973
|
+
|
1974
|
+
foo
|
1975
|
+
.
|
1976
|
+
<style
|
1977
|
+
type="text/css">
|
1978
|
+
|
1979
|
+
foo
|
1980
|
+
.
|
1981
|
+
|
1982
|
+
.
|
1983
|
+
> <div>
|
1984
|
+
> foo
|
1985
|
+
|
1986
|
+
bar
|
1987
|
+
.
|
1988
|
+
<blockquote>
|
1989
|
+
<div>
|
1990
|
+
foo
|
1991
|
+
</blockquote>
|
1992
|
+
<p>bar</p>
|
1993
|
+
.
|
1994
|
+
|
1995
|
+
.
|
1996
|
+
- <div>
|
1997
|
+
- foo
|
1998
|
+
.
|
1999
|
+
<ul>
|
2000
|
+
<li>
|
2001
|
+
<div>
|
2002
|
+
</li>
|
2003
|
+
<li>foo</li>
|
2004
|
+
</ul>
|
2005
|
+
.
|
2006
|
+
|
2007
|
+
The end tag can occur on the same line as the start tag:
|
2008
|
+
|
2009
|
+
.
|
2010
|
+
<style>p{color:red;}</style>
|
2011
|
+
*foo*
|
2012
|
+
.
|
2013
|
+
<style>p{color:red;}</style>
|
2014
|
+
<p><em>foo</em></p>
|
2015
|
+
.
|
2016
|
+
|
2017
|
+
.
|
2018
|
+
<!-- foo -->*bar*
|
2019
|
+
*baz*
|
2020
|
+
.
|
2021
|
+
<!-- foo -->*bar*
|
2022
|
+
<p><em>baz</em></p>
|
2023
|
+
.
|
2024
|
+
|
2025
|
+
Note that anything on the last line after the
|
2026
|
+
end tag will be included in the [HTML block]:
|
2027
|
+
|
2028
|
+
.
|
2029
|
+
<script>
|
2030
|
+
foo
|
2031
|
+
</script>1. *bar*
|
2032
|
+
.
|
2033
|
+
<script>
|
2034
|
+
foo
|
2035
|
+
</script>1. *bar*
|
2036
|
+
.
|
2037
|
+
|
2038
|
+
A comment (type 2):
|
1680
2039
|
|
1681
2040
|
.
|
1682
2041
|
<!-- Foo
|
2042
|
+
|
1683
2043
|
bar
|
1684
2044
|
baz -->
|
1685
2045
|
.
|
1686
2046
|
<!-- Foo
|
2047
|
+
|
1687
2048
|
bar
|
1688
2049
|
baz -->
|
1689
2050
|
.
|
1690
2051
|
|
1691
|
-
|
2052
|
+
|
2053
|
+
A processing instruction (type 3):
|
1692
2054
|
|
1693
2055
|
.
|
1694
2056
|
<?php
|
2057
|
+
|
1695
2058
|
echo '>';
|
2059
|
+
|
1696
2060
|
?>
|
1697
2061
|
.
|
1698
2062
|
<?php
|
2063
|
+
|
1699
2064
|
echo '>';
|
2065
|
+
|
1700
2066
|
?>
|
1701
2067
|
.
|
1702
2068
|
|
1703
|
-
|
2069
|
+
A declaration (type 4):
|
2070
|
+
|
2071
|
+
.
|
2072
|
+
<!DOCTYPE html>
|
2073
|
+
.
|
2074
|
+
<!DOCTYPE html>
|
2075
|
+
.
|
2076
|
+
|
2077
|
+
CDATA (type 5):
|
1704
2078
|
|
1705
2079
|
.
|
1706
2080
|
<![CDATA[
|
1707
2081
|
function matchwo(a,b)
|
1708
2082
|
{
|
1709
|
-
if (a < b && a < 0) then
|
1710
|
-
|
1711
|
-
|
1712
|
-
}
|
1713
|
-
|
1714
|
-
|
1715
|
-
return 0;
|
2083
|
+
if (a < b && a < 0) then {
|
2084
|
+
return 1;
|
2085
|
+
|
2086
|
+
} else {
|
2087
|
+
|
2088
|
+
return 0;
|
1716
2089
|
}
|
1717
2090
|
}
|
1718
2091
|
]]>
|
@@ -1720,13 +2093,12 @@ else
|
|
1720
2093
|
<![CDATA[
|
1721
2094
|
function matchwo(a,b)
|
1722
2095
|
{
|
1723
|
-
if (a < b && a < 0) then
|
1724
|
-
|
1725
|
-
|
1726
|
-
}
|
1727
|
-
|
1728
|
-
|
1729
|
-
return 0;
|
2096
|
+
if (a < b && a < 0) then {
|
2097
|
+
return 1;
|
2098
|
+
|
2099
|
+
} else {
|
2100
|
+
|
2101
|
+
return 0;
|
1730
2102
|
}
|
1731
2103
|
}
|
1732
2104
|
]]>
|
@@ -1744,8 +2116,18 @@ The opening tag can be indented 1-3 spaces, but not 4:
|
|
1744
2116
|
</code></pre>
|
1745
2117
|
.
|
1746
2118
|
|
1747
|
-
|
1748
|
-
|
2119
|
+
.
|
2120
|
+
<div>
|
2121
|
+
|
2122
|
+
<div>
|
2123
|
+
.
|
2124
|
+
<div>
|
2125
|
+
<pre><code><div>
|
2126
|
+
</code></pre>
|
2127
|
+
.
|
2128
|
+
|
2129
|
+
An HTML block of types 1--6 can interrupt a paragraph, and need not be
|
2130
|
+
preceded by a blank line.
|
1749
2131
|
|
1750
2132
|
.
|
1751
2133
|
Foo
|
@@ -1759,8 +2141,8 @@ bar
|
|
1759
2141
|
</div>
|
1760
2142
|
.
|
1761
2143
|
|
1762
|
-
However, a following blank line is
|
1763
|
-
a document:
|
2144
|
+
However, a following blank line is needed, except at the end of
|
2145
|
+
a document, and except for blocks of types 1--5, above:
|
1764
2146
|
|
1765
2147
|
.
|
1766
2148
|
<div>
|
@@ -1774,14 +2156,16 @@ bar
|
|
1774
2156
|
*foo*
|
1775
2157
|
.
|
1776
2158
|
|
1777
|
-
|
2159
|
+
HTML blocks of type 7 cannot interrupt a paragraph:
|
1778
2160
|
|
1779
2161
|
.
|
1780
|
-
|
1781
|
-
|
2162
|
+
Foo
|
2163
|
+
<a href="bar">
|
2164
|
+
baz
|
1782
2165
|
.
|
1783
|
-
<
|
1784
|
-
|
2166
|
+
<p>Foo
|
2167
|
+
<a href="bar">
|
2168
|
+
baz</p>
|
1785
2169
|
.
|
1786
2170
|
|
1787
2171
|
This rule differs from John Gruber's original Markdown syntax
|
@@ -1800,8 +2184,8 @@ here:
|
|
1800
2184
|
- It requires a matching end tag, which it also does not allow to
|
1801
2185
|
be indented.
|
1802
2186
|
|
1803
|
-
|
1804
|
-
|
2187
|
+
Most Markdown implementations (including some of Gruber's own) do not
|
2188
|
+
respect all of these restrictions.
|
1805
2189
|
|
1806
2190
|
There is one respect, however, in which Gruber's rule is more liberal
|
1807
2191
|
than the one given here, since it allows blank lines to occur inside
|
@@ -1812,6 +2196,8 @@ if no matching end tag is found. Second, it provides a very simple
|
|
1812
2196
|
and flexible way of including Markdown content inside HTML tags:
|
1813
2197
|
simply separate the Markdown from the HTML using blank lines:
|
1814
2198
|
|
2199
|
+
Compare:
|
2200
|
+
|
1815
2201
|
.
|
1816
2202
|
<div>
|
1817
2203
|
|
@@ -1824,8 +2210,6 @@ simply separate the Markdown from the HTML using blank lines:
|
|
1824
2210
|
</div>
|
1825
2211
|
.
|
1826
2212
|
|
1827
|
-
Compare:
|
1828
|
-
|
1829
2213
|
.
|
1830
2214
|
<div>
|
1831
2215
|
*Emphasized* text.
|
@@ -1869,11 +2253,37 @@ Hi
|
|
1869
2253
|
</table>
|
1870
2254
|
.
|
1871
2255
|
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
2256
|
+
There are problems, however, if the inner tags are indented
|
2257
|
+
*and* separated by spaces, as then they will be interpreted as
|
2258
|
+
an indented code block:
|
2259
|
+
|
2260
|
+
.
|
2261
|
+
<table>
|
2262
|
+
|
2263
|
+
<tr>
|
2264
|
+
|
2265
|
+
<td>
|
2266
|
+
Hi
|
2267
|
+
</td>
|
2268
|
+
|
2269
|
+
</tr>
|
2270
|
+
|
2271
|
+
</table>
|
2272
|
+
.
|
2273
|
+
<table>
|
2274
|
+
<tr>
|
2275
|
+
<pre><code><td>
|
2276
|
+
Hi
|
2277
|
+
</td>
|
2278
|
+
</code></pre>
|
2279
|
+
</tr>
|
2280
|
+
</table>
|
2281
|
+
.
|
1875
2282
|
|
1876
|
-
|
2283
|
+
Fortunately, blank lines are usually not necessary and can be
|
2284
|
+
deleted. The exception is inside `<pre>` tags, but as described
|
2285
|
+
above, raw HTML blocks starting with `<pre>` *can* contain blank
|
2286
|
+
lines.
|
1877
2287
|
|
1878
2288
|
## Link reference definitions
|
1879
2289
|
|
@@ -1885,7 +2295,7 @@ optional [whitespace] (including up to one
|
|
1885
2295
|
[line ending]), and an optional [link
|
1886
2296
|
title], which if it is present must be separated
|
1887
2297
|
from the [link destination] by [whitespace].
|
1888
|
-
No further [non-
|
2298
|
+
No further [non-whitespace character]s may occur on the line.
|
1889
2299
|
|
1890
2300
|
A [link reference definition]
|
1891
2301
|
does not correspond to a structural element of a document. Instead, it
|
@@ -2056,7 +2466,7 @@ bar
|
|
2056
2466
|
.
|
2057
2467
|
|
2058
2468
|
This is not a link reference definition, because there are
|
2059
|
-
[non-
|
2469
|
+
[non-whitespace character]s after the title:
|
2060
2470
|
|
2061
2471
|
.
|
2062
2472
|
[foo]: /url "title" ok
|
@@ -2305,7 +2715,7 @@ So, we explain what counts as a block quote or list item by explaining
|
|
2305
2715
|
how these can be *generated* from their contents. This should suffice
|
2306
2716
|
to define the syntax, although it does not give a recipe for *parsing*
|
2307
2717
|
these constructions. (A recipe is provided below in the section entitled
|
2308
|
-
[A parsing strategy](#appendix-a-
|
2718
|
+
[A parsing strategy](#appendix-a-parsing-strategy).)
|
2309
2719
|
|
2310
2720
|
## Block quotes
|
2311
2721
|
|
@@ -2323,7 +2733,7 @@ The following rules define [block quotes]:
|
|
2323
2733
|
2. **Laziness.** If a string of lines *Ls* constitute a [block
|
2324
2734
|
quote](#block-quotes) with contents *Bs*, then the result of deleting
|
2325
2735
|
the initial [block quote marker] from one or
|
2326
|
-
more lines in which the next [non-
|
2736
|
+
more lines in which the next [non-whitespace character] after the [block
|
2327
2737
|
quote marker] is [paragraph continuation
|
2328
2738
|
text] is a block quote with *Bs* as its content.
|
2329
2739
|
[Paragraph continuation text](@paragraph-continuation-text) is text
|
@@ -2694,13 +3104,15 @@ A [bullet list marker](@bullet-list-marker)
|
|
2694
3104
|
is a `-`, `+`, or `*` character.
|
2695
3105
|
|
2696
3106
|
An [ordered list marker](@ordered-list-marker)
|
2697
|
-
is a sequence of
|
2698
|
-
`.` character or a `)` character.
|
3107
|
+
is a sequence of 1--9 arabic digits (`0-9`), followed by either a
|
3108
|
+
`.` character or a `)` character. (The reason for the length
|
3109
|
+
limit is that with 10 digits we start seeing integer overflows
|
3110
|
+
in some browsers.)
|
2699
3111
|
|
2700
3112
|
The following rules define [list items]:
|
2701
3113
|
|
2702
3114
|
1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of
|
2703
|
-
blocks *Bs* starting with a [non-
|
3115
|
+
blocks *Bs* starting with a [non-whitespace character] and not separated
|
2704
3116
|
from each other by more than one blank line, and *M* is a list
|
2705
3117
|
marker of width *W* followed by 0 < *N* < 5 spaces, then the result
|
2706
3118
|
of prepending *M* and the following spaces to the first line of
|
@@ -2758,7 +3170,7 @@ The most important thing to notice is that the position of
|
|
2758
3170
|
the text after the list marker determines how much indentation
|
2759
3171
|
is needed in subsequent blocks in the list item. If the list
|
2760
3172
|
marker takes up two spaces, and there are three spaces between
|
2761
|
-
the list marker and the next [non-
|
3173
|
+
the list marker and the next [non-whitespace character], then blocks
|
2762
3174
|
must be indented five spaces in order to fall under the list
|
2763
3175
|
item.
|
2764
3176
|
|
@@ -2816,7 +3228,7 @@ put under the list item:
|
|
2816
3228
|
|
2817
3229
|
It is tempting to think of this in terms of columns: the continuation
|
2818
3230
|
blocks must be indented at least to the column of the first
|
2819
|
-
[non-
|
3231
|
+
[non-whitespace character] after the list marker. However, that is not quite right.
|
2820
3232
|
The spaces after the list marker determine how much relative indentation
|
2821
3233
|
is needed. Which column this indentation reaches will depend on
|
2822
3234
|
how the list item is embedded in other constructions, as shown by
|
@@ -2964,6 +3376,49 @@ A list item may contain any kind of block:
|
|
2964
3376
|
</ol>
|
2965
3377
|
.
|
2966
3378
|
|
3379
|
+
Note that ordered list start numbers must be nine digits or less:
|
3380
|
+
|
3381
|
+
.
|
3382
|
+
123456789. ok
|
3383
|
+
.
|
3384
|
+
<ol start="123456789">
|
3385
|
+
<li>ok</li>
|
3386
|
+
</ol>
|
3387
|
+
.
|
3388
|
+
|
3389
|
+
.
|
3390
|
+
1234567890. not ok
|
3391
|
+
.
|
3392
|
+
<p>1234567890. not ok</p>
|
3393
|
+
.
|
3394
|
+
|
3395
|
+
A start number may begin with 0s:
|
3396
|
+
|
3397
|
+
.
|
3398
|
+
0. ok
|
3399
|
+
.
|
3400
|
+
<ol start="0">
|
3401
|
+
<li>ok</li>
|
3402
|
+
</ol>
|
3403
|
+
.
|
3404
|
+
|
3405
|
+
.
|
3406
|
+
003. ok
|
3407
|
+
.
|
3408
|
+
<ol start="3">
|
3409
|
+
<li>ok</li>
|
3410
|
+
</ol>
|
3411
|
+
.
|
3412
|
+
|
3413
|
+
A start number may not be negative:
|
3414
|
+
|
3415
|
+
.
|
3416
|
+
-1. not ok
|
3417
|
+
.
|
3418
|
+
<p>-1. not ok</p>
|
3419
|
+
.
|
3420
|
+
|
3421
|
+
|
2967
3422
|
2. **Item starting with indented code.** If a sequence of lines *Ls*
|
2968
3423
|
constitute a sequence of blocks *Bs* starting with an indented code
|
2969
3424
|
block and not separated from each other by more than one blank line,
|
@@ -3069,7 +3524,7 @@ inside the code block:
|
|
3069
3524
|
|
3070
3525
|
Note that rules #1 and #2 only apply to two cases: (a) cases
|
3071
3526
|
in which the lines to be included in a list item begin with a
|
3072
|
-
[non-
|
3527
|
+
[non-whitespace character], and (b) cases in which
|
3073
3528
|
they begin with an indented code
|
3074
3529
|
block. In a case like the following, where the first block begins with
|
3075
3530
|
a three-space indent, the rules do not allow us to form a list item by
|
@@ -3301,7 +3756,7 @@ Four spaces indent gives a code block:
|
|
3301
3756
|
5. **Laziness.** If a string of lines *Ls* constitute a [list
|
3302
3757
|
item](#list-items) with contents *Bs*, then the result of deleting
|
3303
3758
|
some or all of the indentation from one or more lines in which the
|
3304
|
-
next [non-
|
3759
|
+
next [non-whitespace character] after the indentation is
|
3305
3760
|
[paragraph continuation text] is a
|
3306
3761
|
list item with the same contents and attributes. The unindented
|
3307
3762
|
lines are called
|
@@ -4360,7 +4815,7 @@ raw HTML:
|
|
4360
4815
|
.
|
4361
4816
|
<a href="/bar\/)">
|
4362
4817
|
.
|
4363
|
-
<
|
4818
|
+
<a href="/bar\/)">
|
4364
4819
|
.
|
4365
4820
|
|
4366
4821
|
But they work in all other contexts, including URLs and link titles,
|
@@ -4474,7 +4929,7 @@ code blocks, including raw HTML, URLs, [link title]s, and
|
|
4474
4929
|
.
|
4475
4930
|
<a href="öö.html">
|
4476
4931
|
.
|
4477
|
-
<
|
4932
|
+
<a href="öö.html">
|
4478
4933
|
.
|
4479
4934
|
|
4480
4935
|
.
|
@@ -6031,6 +6486,20 @@ in Markdown:
|
|
6031
6486
|
<p><a href="foo):">link</a></p>
|
6032
6487
|
.
|
6033
6488
|
|
6489
|
+
A link can contain fragment identifiers and queries:
|
6490
|
+
|
6491
|
+
.
|
6492
|
+
[link](#fragment)
|
6493
|
+
|
6494
|
+
[link](http://example.com#fragment)
|
6495
|
+
|
6496
|
+
[link](http://example.com?foo=bar&baz#fragment)
|
6497
|
+
.
|
6498
|
+
<p><a href="#fragment">link</a></p>
|
6499
|
+
<p><a href="http://example.com#fragment">link</a></p>
|
6500
|
+
<p><a href="http://example.com?foo=bar&baz#fragment">link</a></p>
|
6501
|
+
.
|
6502
|
+
|
6034
6503
|
Note that a backslash before a non-escapable character is
|
6035
6504
|
just a backslash:
|
6036
6505
|
|
@@ -6245,7 +6714,7 @@ that [matches] a [link reference definition] elsewhere in the document.
|
|
6245
6714
|
|
6246
6715
|
A [link label](@link-label) begins with a left bracket (`[`) and ends
|
6247
6716
|
with the first right bracket (`]`) that is not backslash-escaped.
|
6248
|
-
Between these brackets there must be at least one non-
|
6717
|
+
Between these brackets there must be at least one [non-whitespace character].
|
6249
6718
|
Unescaped square bracket characters are not allowed in
|
6250
6719
|
[link label]s. A link label can have at most 999
|
6251
6720
|
characters inside the square brackets.
|
@@ -6492,7 +6961,7 @@ backslash-escaped:
|
|
6492
6961
|
<p><a href="/uri">foo</a></p>
|
6493
6962
|
.
|
6494
6963
|
|
6495
|
-
A [link label] must contain at least one non-
|
6964
|
+
A [link label] must contain at least one [non-whitespace character]:
|
6496
6965
|
|
6497
6966
|
.
|
6498
6967
|
[]
|
@@ -7074,7 +7543,8 @@ so custom tags (and even, say, DocBook tags) may be used.
|
|
7074
7543
|
Here is the grammar for tags:
|
7075
7544
|
|
7076
7545
|
A [tag name](@tag-name) consists of an ASCII letter
|
7077
|
-
followed by zero or more ASCII letters or
|
7546
|
+
followed by zero or more ASCII letters, digits, or
|
7547
|
+
hyphens (`-`).
|
7078
7548
|
|
7079
7549
|
An [attribute](@attribute) consists of [whitespace],
|
7080
7550
|
an [attribute name], and an optional
|
@@ -7107,7 +7577,7 @@ consists of `"`, zero or more
|
|
7107
7577
|
characters not including `"`, and a final `"`.
|
7108
7578
|
|
7109
7579
|
An [open tag](@open-tag) consists of a `<` character, a [tag name],
|
7110
|
-
zero or more [attributes], optional [whitespace], an optional `/`
|
7580
|
+
zero or more [attributes](@attribute], optional [whitespace], an optional `/`
|
7111
7581
|
character, and a `>` character.
|
7112
7582
|
|
7113
7583
|
A [closing tag](@closing-tag) consists of the string `</`, a
|
@@ -7172,6 +7642,21 @@ _boolean zoop:33=zoop:33 />
|
|
7172
7642
|
_boolean zoop:33=zoop:33 /></p>
|
7173
7643
|
.
|
7174
7644
|
|
7645
|
+
Custom tag names can be used:
|
7646
|
+
|
7647
|
+
.
|
7648
|
+
<responsive-image src="foo.jpg" />
|
7649
|
+
|
7650
|
+
<My-Tag>
|
7651
|
+
foo
|
7652
|
+
</My-Tag>
|
7653
|
+
.
|
7654
|
+
<responsive-image src="foo.jpg" />
|
7655
|
+
<My-Tag>
|
7656
|
+
foo
|
7657
|
+
</My-Tag>
|
7658
|
+
.
|
7659
|
+
|
7175
7660
|
Illegal tag names, not parsed as HTML:
|
7176
7661
|
|
7177
7662
|
.
|
@@ -7220,8 +7705,8 @@ Closing tags:
|
|
7220
7705
|
</a>
|
7221
7706
|
</foo >
|
7222
7707
|
.
|
7223
|
-
|
7224
|
-
</foo
|
7708
|
+
</a>
|
7709
|
+
</foo >
|
7225
7710
|
.
|
7226
7711
|
|
7227
7712
|
Illegal attributes in closing tag:
|
@@ -7288,7 +7773,7 @@ Entities are preserved in HTML attributes:
|
|
7288
7773
|
.
|
7289
7774
|
<a href="ö">
|
7290
7775
|
.
|
7291
|
-
<
|
7776
|
+
<a href="ö">
|
7292
7777
|
.
|
7293
7778
|
|
7294
7779
|
Backslash escapes do not work in HTML attributes:
|
@@ -7296,7 +7781,7 @@ Backslash escapes do not work in HTML attributes:
|
|
7296
7781
|
.
|
7297
7782
|
<a href="\*">
|
7298
7783
|
.
|
7299
|
-
<
|
7784
|
+
<a href="\*">
|
7300
7785
|
.
|
7301
7786
|
|
7302
7787
|
.
|
@@ -7500,7 +7985,10 @@ Multiple spaces
|
|
7500
7985
|
|
7501
7986
|
<!-- END TESTS -->
|
7502
7987
|
|
7503
|
-
# Appendix
|
7988
|
+
# Appendix: A parsing strategy {-}
|
7989
|
+
|
7990
|
+
In this appendix we describe some features of the parsing strategy
|
7991
|
+
used in the CommonMark reference implementations.
|
7504
7992
|
|
7505
7993
|
## Overview {-}
|
7506
7994
|
|
@@ -7517,8 +8005,6 @@ are parsed into sequences of Markdown inline elements (strings,
|
|
7517
8005
|
code spans, links, emphasis, and so on), using the map of link
|
7518
8006
|
references constructed in phase 1.
|
7519
8007
|
|
7520
|
-
## The document tree {-}
|
7521
|
-
|
7522
8008
|
At each point in processing, the document is represented as a tree of
|
7523
8009
|
**blocks**. The root of the tree is a `document` block. The `document`
|
7524
8010
|
may have any number of other blocks as **children**. These children
|
@@ -7542,7 +8028,7 @@ marked by arrows:
|
|
7542
8028
|
"aliquando id"
|
7543
8029
|
```
|
7544
8030
|
|
7545
|
-
##
|
8031
|
+
## Phase 1: block structure {-}
|
7546
8032
|
|
7547
8033
|
Each line that is processed has an effect on this tree. The line is
|
7548
8034
|
analyzed and, depending on its contents, the document may be altered
|
@@ -7557,6 +8043,36 @@ in one or more of the following ways:
|
|
7557
8043
|
Once a line has been incorporated into the tree in this way,
|
7558
8044
|
it can be discarded, so input can be read in a stream.
|
7559
8045
|
|
8046
|
+
For each line, we follow this procedure:
|
8047
|
+
|
8048
|
+
1. First we iterate through the open blocks, starting with the
|
8049
|
+
root document, and descending through last children down to the last
|
8050
|
+
open block. Each block imposes a condition that the line must satisfy
|
8051
|
+
if the block is to remain open. For example, a block quote requires a
|
8052
|
+
`>` character. A paragraph requires a non-blank line.
|
8053
|
+
In this phase we may match all or just some of the open
|
8054
|
+
blocks. But we cannot close unmatched blocks yet, because we may have a
|
8055
|
+
[lazy continuation line].
|
8056
|
+
|
8057
|
+
2. Next, after consuming the continuation markers for existing
|
8058
|
+
blocks, we look for new block starts (e.g. `>` for a block quote.
|
8059
|
+
If we encounter a new block start, we close any blocks unmatched
|
8060
|
+
in step 1 before creating the new block as a child of the last
|
8061
|
+
matched block.
|
8062
|
+
|
8063
|
+
3. Finally, we look at the remainder of the line (after block
|
8064
|
+
markers like `>`, list markers, and indentation have been consumed).
|
8065
|
+
This is text that can be incorporated into the last open
|
8066
|
+
block (a paragraph, code block, header, or raw HTML).
|
8067
|
+
|
8068
|
+
Setext headers are formed when we detect that the second line of
|
8069
|
+
a paragraph is a setext header line.
|
8070
|
+
|
8071
|
+
Reference link definitions are detected when a paragraph is closed;
|
8072
|
+
the accumulated text lines are parsed to see if they begin with
|
8073
|
+
one or more reference link definitions. Any remainder becomes a
|
8074
|
+
normal paragraph.
|
8075
|
+
|
7560
8076
|
We can see how this works by considering how the tree above is
|
7561
8077
|
generated by four lines of Markdown:
|
7562
8078
|
|
@@ -7654,7 +8170,7 @@ We thus obtain the final tree:
|
|
7654
8170
|
"aliquando id"
|
7655
8171
|
```
|
7656
8172
|
|
7657
|
-
##
|
8173
|
+
## Phase 2: inline structure {-}
|
7658
8174
|
|
7659
8175
|
Once all of the input has been parsed, all open blocks are closed.
|
7660
8176
|
|
@@ -7685,5 +8201,123 @@ Notice how the [line ending] in the first paragraph has
|
|
7685
8201
|
been parsed as a `softbreak`, and the asterisks in the first list item
|
7686
8202
|
have become an `emph`.
|
7687
8203
|
|
7688
|
-
|
7689
|
-
|
8204
|
+
### An algorithm for parsing nested emphasis and links {-}
|
8205
|
+
|
8206
|
+
By far the trickiest part of inline parsing is handling emphasis,
|
8207
|
+
strong emphasis, links, and images. This is done using the following
|
8208
|
+
algorithm.
|
8209
|
+
|
8210
|
+
When we're parsing inlines and we hit either
|
8211
|
+
|
8212
|
+
- a run of `*` or `_` characters, or
|
8213
|
+
- a `[` or `![`
|
8214
|
+
|
8215
|
+
we insert a text node with these symbols as its literal content, and we
|
8216
|
+
add a pointer to this text node to the [delimiter stack](@delimiter-stack).
|
8217
|
+
|
8218
|
+
The [delimiter stack] is a doubly linked list. Each
|
8219
|
+
element contains a pointer to a text node, plus information about
|
8220
|
+
|
8221
|
+
- the type of delimiter (`[`, `![`, `*`, `_`)
|
8222
|
+
- the number of delimiters,
|
8223
|
+
- whether the delimiter is "active" (all are active to start), and
|
8224
|
+
- whether the delimiter is a potential opener, a potential closer,
|
8225
|
+
or both (which depends on what sort of characters precede
|
8226
|
+
and follow the delimiters).
|
8227
|
+
|
8228
|
+
When we hit a `]` character, we call the *look for link or image*
|
8229
|
+
procedure (see below).
|
8230
|
+
|
8231
|
+
When we hit the end of the input, we call the *process emphasis*
|
8232
|
+
procedure (see below), with `stack_bottom` = NULL.
|
8233
|
+
|
8234
|
+
#### *look for link or image* {-}
|
8235
|
+
|
8236
|
+
Starting at the top of the delimiter stack, we look backwards
|
8237
|
+
through the stack for an opening `[` or `![` delimiter.
|
8238
|
+
|
8239
|
+
- If we don't find one, we return a literal text node `]`.
|
8240
|
+
|
8241
|
+
- If we do find one, but it's not *active*, we remove the inactive
|
8242
|
+
delimiter from the stack, and return a literal text node `]`.
|
8243
|
+
|
8244
|
+
- If we find one and it's active, then we parse ahead to see if
|
8245
|
+
we have an inline link/image, reference link/image, compact reference
|
8246
|
+
link/image, or shortcut reference link/image.
|
8247
|
+
|
8248
|
+
+ If we don't, then we remove the opening delimiter from the
|
8249
|
+
delimiter stack and return a literal text node `]`.
|
8250
|
+
|
8251
|
+
+ If we do, then
|
8252
|
+
|
8253
|
+
* We return a link or image node whose children are the inlines
|
8254
|
+
after the text node pointed to by the opening delimiter.
|
8255
|
+
|
8256
|
+
* We run *process emphasis* on these inlines, with the `[` opener
|
8257
|
+
as `stack_bottom`.
|
8258
|
+
|
8259
|
+
* We remove the opening delimiter.
|
8260
|
+
|
8261
|
+
* If we have a link (and not an image), we also set all
|
8262
|
+
`[` delimiters before the opening delimiter to *inactive*. (This
|
8263
|
+
will prevent us from getting links within links.)
|
8264
|
+
|
8265
|
+
#### *process emphasis* {-}
|
8266
|
+
|
8267
|
+
Parameter `stack_bottom` sets a lower bound to how far we
|
8268
|
+
descend in the [delimiter stack]. If it is NULL, we can
|
8269
|
+
go all the way to the bottom. Otherwise, we stop before
|
8270
|
+
visiting `stack_bottom`.
|
8271
|
+
|
8272
|
+
Let `current_position` point to the element on the [delimiter stack]
|
8273
|
+
just above `stack_bottom` (or the first element if `stack_bottom`
|
8274
|
+
is NULL).
|
8275
|
+
|
8276
|
+
We keep track of the `openers_bottom` for each delimiter
|
8277
|
+
type (`*`, `_`). Initialize this to `stack_bottom`.
|
8278
|
+
|
8279
|
+
Then we repeat the following until we run out of potential
|
8280
|
+
closers:
|
8281
|
+
|
8282
|
+
- Move `current_position` forward in the delimiter stack (if needed)
|
8283
|
+
until we find the first potential closer with delimiter `*` or `_`.
|
8284
|
+
(This will be the potential closer closest
|
8285
|
+
to the beginning of the input -- the first one in parse order.)
|
8286
|
+
|
8287
|
+
- Now, look back in the stack (staying above `stack_bottom` and
|
8288
|
+
the `openers_bottom` for this delimiter type) for the
|
8289
|
+
first matching potential opener ("matching" means same delimiter).
|
8290
|
+
|
8291
|
+
- If one is found:
|
8292
|
+
|
8293
|
+
+ Figure out whether we have emphasis or strong emphasis:
|
8294
|
+
if both closer and opener spans have length >= 2, we have
|
8295
|
+
strong, otherwise regular.
|
8296
|
+
|
8297
|
+
+ Insert an emph or strong emph node accordingly, after
|
8298
|
+
the text node corresponding to the opener.
|
8299
|
+
|
8300
|
+
+ Remove any delimiters between the opener and closer from
|
8301
|
+
the delimiter stack.
|
8302
|
+
|
8303
|
+
+ Remove 1 (for regular emph) or 2 (for strong emph) delimiters
|
8304
|
+
from the opening and closing text nodes. If they become empty
|
8305
|
+
as a result, remove them and remove the corresponding element
|
8306
|
+
of the delimiter stack. If the closing node is removed, reset
|
8307
|
+
`current_position` to the next element in the stack.
|
8308
|
+
|
8309
|
+
- If none in found:
|
8310
|
+
|
8311
|
+
+ Set `openers_bottom` to the element before `current_position`.
|
8312
|
+
(We know that there are no openers for this kind of closer up to and
|
8313
|
+
including this point, so this puts a lower bound on future searches.)
|
8314
|
+
|
8315
|
+
+ If the closer at `current_position` is not a potential opener,
|
8316
|
+
remove it from the delimiter stack (since we know it can't
|
8317
|
+
be a closer either).
|
8318
|
+
|
8319
|
+
+ Advance `current_position` to the next element in the stack.
|
8320
|
+
|
8321
|
+
After we're done, we remove all delimiters above `stack_bottom` from the
|
8322
|
+
delimiter stack.
|
8323
|
+
|