commonmarker 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of commonmarker might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +23 -17
- data/ext/commonmarker/cmark/CMakeLists.txt +1 -1
- data/ext/commonmarker/cmark/Makefile +13 -14
- data/ext/commonmarker/cmark/README.md +1 -0
- data/ext/commonmarker/cmark/api_test/cplusplus.h +1 -2
- data/ext/commonmarker/cmark/api_test/harness.c +60 -79
- data/ext/commonmarker/cmark/api_test/harness.h +13 -20
- data/ext/commonmarker/cmark/api_test/main.c +809 -714
- data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -0
- data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -67
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
- data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark_ctype.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/iterator.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/latex.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/man.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
- data/ext/commonmarker/cmark/build/src/cmake_install.cmake +10 -2
- data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
- data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
- data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
- data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +1 -0
- data/ext/commonmarker/cmark/changelog.txt +85 -0
- data/ext/commonmarker/cmark/man/man3/cmark.3 +75 -34
- data/ext/commonmarker/cmark/src/CMakeLists.txt +13 -8
- data/ext/commonmarker/cmark/src/blocks.c +78 -70
- data/ext/commonmarker/cmark/src/chunk.h +5 -7
- data/ext/commonmarker/cmark/src/cmark.h +88 -34
- data/ext/commonmarker/cmark/src/cmark_ctype.c +6 -6
- data/ext/commonmarker/cmark/src/commonmark.c +24 -8
- data/ext/commonmarker/cmark/src/houdini_html_u.c +6 -5
- data/ext/commonmarker/cmark/src/html.c +33 -11
- data/ext/commonmarker/cmark/src/inlines.c +9 -10
- data/ext/commonmarker/cmark/src/iterator.c +2 -2
- data/ext/commonmarker/cmark/src/latex.c +54 -28
- data/ext/commonmarker/cmark/src/main.c +0 -9
- data/ext/commonmarker/cmark/src/man.c +17 -5
- data/ext/commonmarker/cmark/src/node.c +123 -44
- data/ext/commonmarker/cmark/src/node.h +8 -2
- data/ext/commonmarker/cmark/src/render.c +8 -1
- data/ext/commonmarker/cmark/src/render.h +1 -0
- data/ext/commonmarker/cmark/src/scanners.c +3755 -4379
- data/ext/commonmarker/cmark/src/scanners.h +7 -6
- data/ext/commonmarker/cmark/src/scanners.re +9 -10
- data/ext/commonmarker/cmark/src/utf8.c +6 -3
- data/ext/commonmarker/cmark/src/utf8.h +4 -2
- data/ext/commonmarker/cmark/src/xml.c +18 -4
- data/ext/commonmarker/cmark/test/CMakeLists.txt +11 -0
- data/ext/commonmarker/cmark/test/normalize.py +5 -1
- data/ext/commonmarker/cmark/test/roundtrip.bat +1 -0
- data/ext/commonmarker/cmark/test/roundtrip.sh +1 -1
- data/ext/commonmarker/cmark/test/spec.txt +257 -157
- data/ext/commonmarker/cmark/why-cmark-and-not-x.md +104 -0
- data/lib/commonmarker/config.rb +6 -6
- data/lib/commonmarker/version.rb +1 -1
- metadata +5 -5
- data/ext/commonmarker/cmark/src/bench.h +0 -27
- data/ext/commonmarker/cmark/wrappers/wrapper.lua +0 -239
@@ -21,9 +21,9 @@ bufsize_t _scan_html_block_end_5(const unsigned char *p);
|
|
21
21
|
bufsize_t _scan_link_url(const unsigned char *p);
|
22
22
|
bufsize_t _scan_link_title(const unsigned char *p);
|
23
23
|
bufsize_t _scan_spacechars(const unsigned char *p);
|
24
|
-
bufsize_t
|
25
|
-
bufsize_t
|
26
|
-
bufsize_t
|
24
|
+
bufsize_t _scan_atx_heading_start(const unsigned char *p);
|
25
|
+
bufsize_t _scan_setext_heading_line(const unsigned char *p);
|
26
|
+
bufsize_t _scan_thematic_break(const unsigned char *p);
|
27
27
|
bufsize_t _scan_open_code_fence(const unsigned char *p);
|
28
28
|
bufsize_t _scan_close_code_fence(const unsigned char *p);
|
29
29
|
bufsize_t _scan_entity(const unsigned char *p);
|
@@ -43,9 +43,10 @@ bufsize_t _scan_dangerous_url(const unsigned char *p);
|
|
43
43
|
#define scan_link_url(c, n) _scan_at(&_scan_link_url, c, n)
|
44
44
|
#define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
|
45
45
|
#define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
|
46
|
-
#define
|
47
|
-
#define
|
48
|
-
|
46
|
+
#define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
|
47
|
+
#define scan_setext_heading_line(c, n) \
|
48
|
+
_scan_at(&_scan_setext_heading_line, c, n)
|
49
|
+
#define scan_thematic_break(c, n) _scan_at(&_scan_thematic_break, c, n)
|
49
50
|
#define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
|
50
51
|
#define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
|
51
52
|
#define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
|
@@ -6,10 +6,9 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
|
|
6
6
|
{
|
7
7
|
bufsize_t res;
|
8
8
|
unsigned char *ptr = (unsigned char *)c->data;
|
9
|
-
unsigned char zero = '\0';
|
10
9
|
|
11
|
-
if (ptr == NULL) {
|
12
|
-
|
10
|
+
if (ptr == NULL || offset > c->len) {
|
11
|
+
return 0;
|
13
12
|
} else {
|
14
13
|
unsigned char lim = ptr[c->len];
|
15
14
|
|
@@ -216,7 +215,7 @@ bufsize_t _scan_link_url(const unsigned char *p)
|
|
216
215
|
const unsigned char *start = p;
|
217
216
|
/*!re2c
|
218
217
|
[ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
|
219
|
-
[ \r\n]* (reg_char+ | escaped_char |
|
218
|
+
[ \r\n]* (reg_char+ | escaped_char | [\\] | in_parens_nosp)* { return (bufsize_t)(p - start); }
|
220
219
|
.? { return 0; }
|
221
220
|
*/
|
222
221
|
}
|
@@ -247,8 +246,8 @@ bufsize_t _scan_spacechars(const unsigned char *p)
|
|
247
246
|
*/
|
248
247
|
}
|
249
248
|
|
250
|
-
// Match ATX
|
251
|
-
bufsize_t
|
249
|
+
// Match ATX heading start.
|
250
|
+
bufsize_t _scan_atx_heading_start(const unsigned char *p)
|
252
251
|
{
|
253
252
|
const unsigned char *marker = NULL;
|
254
253
|
const unsigned char *start = p;
|
@@ -258,9 +257,9 @@ bufsize_t _scan_atx_header_start(const unsigned char *p)
|
|
258
257
|
*/
|
259
258
|
}
|
260
259
|
|
261
|
-
// Match setext
|
260
|
+
// Match setext heading line. Return 1 for level-1 heading,
|
262
261
|
// 2 for level-2, 0 for no match.
|
263
|
-
bufsize_t
|
262
|
+
bufsize_t _scan_setext_heading_line(const unsigned char *p)
|
264
263
|
{
|
265
264
|
const unsigned char *marker = NULL;
|
266
265
|
/*!re2c
|
@@ -270,10 +269,10 @@ bufsize_t _scan_setext_header_line(const unsigned char *p)
|
|
270
269
|
*/
|
271
270
|
}
|
272
271
|
|
273
|
-
// Scan a
|
272
|
+
// Scan a thematic break line: "...three or more hyphens, asterisks,
|
274
273
|
// or underscores on a line by themselves. If you wish, you may use
|
275
274
|
// spaces between the hyphens or asterisks."
|
276
|
-
bufsize_t
|
275
|
+
bufsize_t _scan_thematic_break(const unsigned char *p)
|
277
276
|
{
|
278
277
|
const unsigned char *marker = NULL;
|
279
278
|
const unsigned char *start = p;
|
@@ -107,7 +107,8 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
|
|
107
107
|
return length;
|
108
108
|
}
|
109
109
|
|
110
|
-
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
|
110
|
+
void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
|
111
|
+
bufsize_t size) {
|
111
112
|
bufsize_t i = 0;
|
112
113
|
|
113
114
|
while (i < size) {
|
@@ -146,7 +147,8 @@ void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)
|
|
146
147
|
}
|
147
148
|
}
|
148
149
|
|
149
|
-
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
|
150
|
+
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
|
151
|
+
int32_t *dst) {
|
150
152
|
int length;
|
151
153
|
int32_t uc = -1;
|
152
154
|
|
@@ -222,7 +224,8 @@ void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
|
|
222
224
|
cmark_strbuf_put(buf, dst, len);
|
223
225
|
}
|
224
226
|
|
225
|
-
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
227
|
+
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
228
|
+
bufsize_t len) {
|
226
229
|
int32_t c;
|
227
230
|
|
228
231
|
#define bufpush(x) cmark_utf8proc_encode_char(x, dest)
|
@@ -8,10 +8,12 @@
|
|
8
8
|
extern "C" {
|
9
9
|
#endif
|
10
10
|
|
11
|
-
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
11
|
+
void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
|
12
|
+
bufsize_t len);
|
12
13
|
void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
|
13
14
|
int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
|
14
|
-
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
|
15
|
+
void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
|
16
|
+
bufsize_t size);
|
15
17
|
int cmark_utf8proc_is_space(int32_t uc);
|
16
18
|
int cmark_utf8proc_is_punctuation(int32_t uc);
|
17
19
|
|
@@ -50,10 +50,13 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
|
|
50
50
|
literal = false;
|
51
51
|
|
52
52
|
switch (node->type) {
|
53
|
+
case CMARK_NODE_DOCUMENT:
|
54
|
+
cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
|
55
|
+
break;
|
53
56
|
case CMARK_NODE_TEXT:
|
54
57
|
case CMARK_NODE_CODE:
|
55
|
-
case
|
56
|
-
case
|
58
|
+
case CMARK_NODE_HTML_BLOCK:
|
59
|
+
case CMARK_NODE_HTML_INLINE:
|
57
60
|
cmark_strbuf_puts(xml, ">");
|
58
61
|
escape_xml(xml, node->as.literal.data, node->as.literal.len);
|
59
62
|
cmark_strbuf_puts(xml, "</");
|
@@ -83,8 +86,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
|
|
83
86
|
(cmark_node_get_list_tight(node) ? "true" : "false"));
|
84
87
|
cmark_strbuf_puts(xml, buffer);
|
85
88
|
break;
|
86
|
-
case
|
87
|
-
sprintf(buffer, " level=\"%d\"", node->as.
|
89
|
+
case CMARK_NODE_HEADING:
|
90
|
+
sprintf(buffer, " level=\"%d\"", node->as.heading.level);
|
88
91
|
cmark_strbuf_puts(xml, buffer);
|
89
92
|
break;
|
90
93
|
case CMARK_NODE_CODE_BLOCK:
|
@@ -99,6 +102,17 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
|
|
99
102
|
cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
|
100
103
|
literal = true;
|
101
104
|
break;
|
105
|
+
case CMARK_NODE_CUSTOM_BLOCK:
|
106
|
+
case CMARK_NODE_CUSTOM_INLINE:
|
107
|
+
cmark_strbuf_puts(xml, " on_enter=\"");
|
108
|
+
escape_xml(xml, node->as.custom.on_enter.data,
|
109
|
+
node->as.custom.on_enter.len);
|
110
|
+
cmark_strbuf_putc(xml, '"');
|
111
|
+
cmark_strbuf_puts(xml, " on_exit=\"");
|
112
|
+
escape_xml(xml, node->as.custom.on_exit.data,
|
113
|
+
node->as.custom.on_exit.len);
|
114
|
+
cmark_strbuf_putc(xml, '"');
|
115
|
+
break;
|
102
116
|
case CMARK_NODE_LINK:
|
103
117
|
case CMARK_NODE_IMAGE:
|
104
118
|
cmark_strbuf_puts(xml, " destination=\"");
|
@@ -16,6 +16,9 @@ if (WIN32)
|
|
16
16
|
set_tests_properties(api_test PROPERTIES
|
17
17
|
ENVIRONMENT "PATH=${WIN_DLL_DIR};$ENV{PATH}"
|
18
18
|
)
|
19
|
+
set(ROUNDTRIP,"${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat")
|
20
|
+
else(WIN32)
|
21
|
+
set(ROUNDTRIP,"${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh")
|
19
22
|
endif(WIN32)
|
20
23
|
|
21
24
|
IF (PYTHONINTERP_FOUND)
|
@@ -43,6 +46,14 @@ IF (PYTHONINTERP_FOUND)
|
|
43
46
|
${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart"
|
44
47
|
)
|
45
48
|
|
49
|
+
add_test(roundtriptest_executable
|
50
|
+
${PYTHON_EXECUTABLE}
|
51
|
+
"${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize"
|
52
|
+
"--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program"
|
53
|
+
"${ROUNDTRIP} ${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
|
54
|
+
)
|
55
|
+
|
56
|
+
|
46
57
|
ELSE(PYTHONINTERP_FOUND)
|
47
58
|
|
48
59
|
message("\n*** A python 3 interpreter is required to run the spec tests.\n")
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
from html.parser import HTMLParser
|
3
|
+
import urllib
|
3
4
|
|
4
5
|
try:
|
5
6
|
from html.parser import HTMLParseError
|
@@ -61,7 +62,10 @@ class MyHTMLParser(HTMLParser):
|
|
61
62
|
attrs.sort()
|
62
63
|
for (k,v) in attrs:
|
63
64
|
self.output += " " + k
|
64
|
-
if v
|
65
|
+
if v in ['href','src']:
|
66
|
+
self.output += ("=" + '"' +
|
67
|
+
urllib.quote(urllib.unquote(v), safe='/') + '"')
|
68
|
+
elif v != None:
|
65
69
|
self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"')
|
66
70
|
self.output += ">"
|
67
71
|
self.last_tag = tag
|
@@ -0,0 +1 @@
|
|
1
|
+
"%1" -t commonmark | "%1"
|
@@ -1,2 +1,2 @@
|
|
1
1
|
#!/bin/sh
|
2
|
-
|
2
|
+
"$1" -t commonmark | "$1"
|
@@ -36,11 +36,11 @@ questions it does not answer:
|
|
36
36
|
users in real documents. (See [this comment by John
|
37
37
|
Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
|
38
38
|
|
39
|
-
2. Is a blank line needed before a block quote or
|
39
|
+
2. Is a blank line needed before a block quote or heading?
|
40
40
|
Most implementations do not require the blank line. However,
|
41
41
|
this can lead to unexpected results in hard-wrapped text, and
|
42
42
|
also to ambiguities in parsing (note that some implementations
|
43
|
-
put the
|
43
|
+
put the heading inside the blockquote, while others do not).
|
44
44
|
(John Gruber has also spoken [in favor of requiring the blank
|
45
45
|
lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
|
46
46
|
|
@@ -85,8 +85,8 @@ questions it does not answer:
|
|
85
85
|
10. item 2a
|
86
86
|
```
|
87
87
|
|
88
|
-
6. Is this one list with a
|
89
|
-
or two lists separated by a
|
88
|
+
6. Is this one list with a thematic break in its second item,
|
89
|
+
or two lists separated by a thematic break?
|
90
90
|
|
91
91
|
``` markdown
|
92
92
|
* a
|
@@ -128,8 +128,8 @@ questions it does not answer:
|
|
128
128
|
- and it can screw things up`
|
129
129
|
```
|
130
130
|
|
131
|
-
11. Can list items include section
|
132
|
-
allow this, but does allow blockquotes to include
|
131
|
+
11. Can list items include section headings? (`Markdown.pl` does not
|
132
|
+
allow this, but does allow blockquotes to include headings.)
|
133
133
|
|
134
134
|
``` markdown
|
135
135
|
- # Heading
|
@@ -325,9 +325,9 @@ with the replacement character (`U+FFFD`).
|
|
325
325
|
|
326
326
|
We can think of a document as a sequence of
|
327
327
|
[blocks](@block)---structural elements like paragraphs, block
|
328
|
-
quotations, lists,
|
328
|
+
quotations, lists, headings, rules, and code blocks. Some blocks (like
|
329
329
|
block quotes and list items) contain other blocks; others (like
|
330
|
-
|
330
|
+
headings and paragraphs) contain [inline](@inline) content---text,
|
331
331
|
links, emphasized text, images, code, and so on.
|
332
332
|
|
333
333
|
## Precedence
|
@@ -348,7 +348,7 @@ two items, not a list with one item containing a code span:
|
|
348
348
|
|
349
349
|
This means that parsing can proceed in two steps: first, the block
|
350
350
|
structure of the document can be discerned; second, text lines inside
|
351
|
-
paragraphs,
|
351
|
+
paragraphs, headings, and other block constructs can be parsed for inline
|
352
352
|
structure. The second step requires information about link reference
|
353
353
|
definitions that will be available only at the end of the first
|
354
354
|
step. Note that the first step requires processing lines in sequence,
|
@@ -367,12 +367,12 @@ which cannot.
|
|
367
367
|
This section describes the different kinds of leaf block that make up a
|
368
368
|
Markdown document.
|
369
369
|
|
370
|
-
##
|
370
|
+
## Thematic breaks
|
371
371
|
|
372
372
|
A line consisting of 0-3 spaces of indentation, followed by a sequence
|
373
373
|
of three or more matching `-`, `_`, or `*` characters, each followed
|
374
374
|
optionally by any number of spaces, forms a
|
375
|
-
[
|
375
|
+
[thematic break](@thematic-break).
|
376
376
|
|
377
377
|
.
|
378
378
|
***
|
@@ -490,7 +490,7 @@ a------
|
|
490
490
|
.
|
491
491
|
|
492
492
|
It is required that all of the [non-whitespace character]s be the same.
|
493
|
-
So, this is not a
|
493
|
+
So, this is not a thematic break:
|
494
494
|
|
495
495
|
.
|
496
496
|
*-*
|
@@ -498,7 +498,7 @@ So, this is not a horizontal rule:
|
|
498
498
|
<p><em>-</em></p>
|
499
499
|
.
|
500
500
|
|
501
|
-
|
501
|
+
Thematic breaks do not need blank lines before or after:
|
502
502
|
|
503
503
|
.
|
504
504
|
- foo
|
@@ -514,7 +514,7 @@ Horizontal rules do not need blank lines before or after:
|
|
514
514
|
</ul>
|
515
515
|
.
|
516
516
|
|
517
|
-
|
517
|
+
Thematic breaks can interrupt a paragraph:
|
518
518
|
|
519
519
|
.
|
520
520
|
Foo
|
@@ -527,10 +527,10 @@ bar
|
|
527
527
|
.
|
528
528
|
|
529
529
|
If a line of dashes that meets the above conditions for being a
|
530
|
-
|
531
|
-
|
532
|
-
[setext
|
533
|
-
this is a setext
|
530
|
+
thematic break could also be interpreted as the underline of a [setext
|
531
|
+
heading], the interpretation as a
|
532
|
+
[setext heading] takes precedence. Thus, for example,
|
533
|
+
this is a setext heading, not a paragraph followed by a thematic break:
|
534
534
|
|
535
535
|
.
|
536
536
|
Foo
|
@@ -541,8 +541,8 @@ bar
|
|
541
541
|
<p>bar</p>
|
542
542
|
.
|
543
543
|
|
544
|
-
When both a
|
545
|
-
interpretations of a line, the
|
544
|
+
When both a thematic break and a list item are possible
|
545
|
+
interpretations of a line, the thematic break takes precedence:
|
546
546
|
|
547
547
|
.
|
548
548
|
* Foo
|
@@ -558,7 +558,7 @@ interpretations of a line, the horizontal rule takes precedence:
|
|
558
558
|
</ul>
|
559
559
|
.
|
560
560
|
|
561
|
-
If you want a
|
561
|
+
If you want a thematic break in a list item, use a different bullet:
|
562
562
|
|
563
563
|
.
|
564
564
|
- Foo
|
@@ -572,21 +572,21 @@ If you want a horizontal rule in a list item, use a different bullet:
|
|
572
572
|
</ul>
|
573
573
|
.
|
574
574
|
|
575
|
-
## ATX
|
575
|
+
## ATX headings
|
576
576
|
|
577
|
-
An [ATX
|
577
|
+
An [ATX heading](@atx-heading)
|
578
578
|
consists of a string of characters, parsed as inline content, between an
|
579
579
|
opening sequence of 1--6 unescaped `#` characters and an optional
|
580
580
|
closing sequence of any number of unescaped `#` characters.
|
581
|
-
The opening sequence of `#` characters
|
582
|
-
[
|
581
|
+
The opening sequence of `#` characters must be followed by a
|
582
|
+
[space] or by the end of line. The optional closing sequence of `#`s must be
|
583
583
|
preceded by a [space] and may be followed by spaces only. The opening
|
584
584
|
`#` character may be indented 0-3 spaces. The raw contents of the
|
585
|
-
|
586
|
-
as inline content. The
|
585
|
+
heading are stripped of leading and trailing spaces before being parsed
|
586
|
+
as inline content. The heading level is equal to the number of `#`
|
587
587
|
characters in the opening sequence.
|
588
588
|
|
589
|
-
Simple
|
589
|
+
Simple headings:
|
590
590
|
|
591
591
|
.
|
592
592
|
# foo
|
@@ -604,7 +604,7 @@ Simple headers:
|
|
604
604
|
<h6>foo</h6>
|
605
605
|
.
|
606
606
|
|
607
|
-
More than six `#` characters is not a
|
607
|
+
More than six `#` characters is not a heading:
|
608
608
|
|
609
609
|
.
|
610
610
|
####### foo
|
@@ -613,23 +613,31 @@ More than six `#` characters is not a header:
|
|
613
613
|
.
|
614
614
|
|
615
615
|
At least one space is required between the `#` characters and the
|
616
|
-
|
616
|
+
heading's contents, unless the heading is empty. Note that many
|
617
617
|
implementations currently do not require the space. However, the
|
618
618
|
space was required by the
|
619
619
|
[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
|
620
620
|
and it helps prevent things like the following from being parsed as
|
621
|
-
|
621
|
+
headings:
|
622
622
|
|
623
623
|
.
|
624
624
|
#5 bolt
|
625
625
|
|
626
|
-
#
|
626
|
+
#hashtag
|
627
627
|
.
|
628
628
|
<p>#5 bolt</p>
|
629
|
-
<p>#
|
629
|
+
<p>#hashtag</p>
|
630
630
|
.
|
631
631
|
|
632
|
-
|
632
|
+
A tab will not work:
|
633
|
+
|
634
|
+
.
|
635
|
+
#→foo
|
636
|
+
.
|
637
|
+
<p>#→foo</p>
|
638
|
+
.
|
639
|
+
|
640
|
+
This is not a heading, because the first `#` is escaped:
|
633
641
|
|
634
642
|
.
|
635
643
|
\## foo
|
@@ -712,7 +720,7 @@ Spaces are allowed after the closing sequence:
|
|
712
720
|
|
713
721
|
A sequence of `#` characters with anything but [space]s following it
|
714
722
|
is not a closing sequence, but counts as part of the contents of the
|
715
|
-
|
723
|
+
heading:
|
716
724
|
|
717
725
|
.
|
718
726
|
### foo ### b
|
@@ -741,7 +749,7 @@ of the closing sequence:
|
|
741
749
|
<h1>foo #</h1>
|
742
750
|
.
|
743
751
|
|
744
|
-
ATX
|
752
|
+
ATX headings need not be separated from surrounding content by blank
|
745
753
|
lines, and they can interrupt paragraphs:
|
746
754
|
|
747
755
|
.
|
@@ -764,7 +772,7 @@ Bar foo
|
|
764
772
|
<p>Bar foo</p>
|
765
773
|
.
|
766
774
|
|
767
|
-
ATX
|
775
|
+
ATX headings can be empty:
|
768
776
|
|
769
777
|
.
|
770
778
|
##
|
@@ -776,33 +784,33 @@ ATX headers can be empty:
|
|
776
784
|
<h3></h3>
|
777
785
|
.
|
778
786
|
|
779
|
-
## Setext
|
787
|
+
## Setext headings
|
780
788
|
|
781
|
-
A [setext
|
789
|
+
A [setext heading](@setext-heading)
|
782
790
|
consists of a line of text, containing at least one [non-whitespace character],
|
783
|
-
with no more than 3 spaces indentation, followed by a [setext
|
791
|
+
with no more than 3 spaces indentation, followed by a [setext heading
|
784
792
|
underline]. The line of text must be
|
785
|
-
one that, were it not followed by the setext
|
793
|
+
one that, were it not followed by the setext heading underline,
|
786
794
|
would be interpreted as part of a paragraph: it cannot be
|
787
|
-
interpretable as a [code fence], [ATX
|
788
|
-
[block quote][block quotes], [
|
795
|
+
interpretable as a [code fence], [ATX heading][ATX headings],
|
796
|
+
[block quote][block quotes], [thematic break][thematic breaks],
|
789
797
|
[list item][list items], or [HTML block][HTML blocks].
|
790
798
|
|
791
|
-
A [setext
|
799
|
+
A [setext heading underline](@setext-heading-underline) is a sequence of
|
792
800
|
`=` characters or a sequence of `-` characters, with no more than 3
|
793
801
|
spaces indentation and any number of trailing spaces. If a line
|
794
802
|
containing a single `-` can be interpreted as an
|
795
803
|
empty [list items], it should be interpreted this way
|
796
|
-
and not as a [setext
|
804
|
+
and not as a [setext heading underline].
|
797
805
|
|
798
|
-
The
|
799
|
-
[setext
|
800
|
-
|
806
|
+
The heading is a level 1 heading if `=` characters are used in the
|
807
|
+
[setext heading underline], and a level 2
|
808
|
+
heading if `-` characters are used. The contents of the heading are the
|
801
809
|
result of parsing the first line as Markdown inline content.
|
802
810
|
|
803
|
-
In general, a setext
|
811
|
+
In general, a setext heading need not be preceded or followed by a
|
804
812
|
blank line. However, it cannot interrupt a paragraph, so when a
|
805
|
-
setext
|
813
|
+
setext heading comes after a paragraph, a blank line is needed between
|
806
814
|
them.
|
807
815
|
|
808
816
|
Simple examples:
|
@@ -831,7 +839,7 @@ Foo
|
|
831
839
|
<h1>Foo</h1>
|
832
840
|
.
|
833
841
|
|
834
|
-
The
|
842
|
+
The heading content can be indented up to three spaces, and need
|
835
843
|
not line up with the underlining:
|
836
844
|
|
837
845
|
.
|
@@ -866,7 +874,7 @@ Foo
|
|
866
874
|
<hr />
|
867
875
|
.
|
868
876
|
|
869
|
-
The setext
|
877
|
+
The setext heading underline can be indented up to three spaces, and
|
870
878
|
may have trailing spaces:
|
871
879
|
|
872
880
|
.
|
@@ -886,7 +894,7 @@ Foo
|
|
886
894
|
---</p>
|
887
895
|
.
|
888
896
|
|
889
|
-
The setext
|
897
|
+
The setext heading underline cannot contain internal spaces:
|
890
898
|
|
891
899
|
.
|
892
900
|
Foo
|
@@ -920,7 +928,7 @@ Foo\
|
|
920
928
|
.
|
921
929
|
|
922
930
|
Since indicators of block structure take precedence over
|
923
|
-
indicators of inline structure, the following are setext
|
931
|
+
indicators of inline structure, the following are setext headings:
|
924
932
|
|
925
933
|
.
|
926
934
|
`Foo
|
@@ -937,7 +945,7 @@ of dashes"/>
|
|
937
945
|
<p>of dashes"/></p>
|
938
946
|
.
|
939
947
|
|
940
|
-
The setext
|
948
|
+
The setext heading underline cannot be a [lazy continuation
|
941
949
|
line] in a list item or block quote:
|
942
950
|
|
943
951
|
.
|
@@ -960,7 +968,7 @@ line] in a list item or block quote:
|
|
960
968
|
<hr />
|
961
969
|
.
|
962
970
|
|
963
|
-
A setext
|
971
|
+
A setext heading cannot interrupt a paragraph:
|
964
972
|
|
965
973
|
.
|
966
974
|
Foo
|
@@ -995,7 +1003,7 @@ Baz
|
|
995
1003
|
<p>Baz</p>
|
996
1004
|
.
|
997
1005
|
|
998
|
-
Setext
|
1006
|
+
Setext headings cannot be empty:
|
999
1007
|
|
1000
1008
|
.
|
1001
1009
|
|
@@ -1004,9 +1012,9 @@ Setext headers cannot be empty:
|
|
1004
1012
|
<p>====</p>
|
1005
1013
|
.
|
1006
1014
|
|
1007
|
-
Setext
|
1015
|
+
Setext heading text lines must not be interpretable as block
|
1008
1016
|
constructs other than paragraphs. So, the line of dashes
|
1009
|
-
in these examples gets interpreted as a
|
1017
|
+
in these examples gets interpreted as a thematic break:
|
1010
1018
|
|
1011
1019
|
.
|
1012
1020
|
---
|
@@ -1045,7 +1053,7 @@ in these examples gets interpreted as a horizontal rule:
|
|
1045
1053
|
<hr />
|
1046
1054
|
.
|
1047
1055
|
|
1048
|
-
If you want a
|
1056
|
+
If you want a heading with `> foo` as its literal text, you can
|
1049
1057
|
use backslash escapes:
|
1050
1058
|
|
1051
1059
|
.
|
@@ -1192,17 +1200,17 @@ And indented code can occur immediately before and after other kinds of
|
|
1192
1200
|
blocks:
|
1193
1201
|
|
1194
1202
|
.
|
1195
|
-
#
|
1203
|
+
# Heading
|
1196
1204
|
foo
|
1197
|
-
|
1205
|
+
Heading
|
1198
1206
|
------
|
1199
1207
|
foo
|
1200
1208
|
----
|
1201
1209
|
.
|
1202
|
-
<h1>
|
1210
|
+
<h1>Heading</h1>
|
1203
1211
|
<pre><code>foo
|
1204
1212
|
</code></pre>
|
1205
|
-
<h2>
|
1213
|
+
<h2>Heading</h2>
|
1206
1214
|
<pre><code>foo
|
1207
1215
|
</code></pre>
|
1208
1216
|
<hr />
|
@@ -1363,7 +1371,7 @@ aaa
|
|
1363
1371
|
.
|
1364
1372
|
|
1365
1373
|
Unclosed code blocks are closed by the end of the document
|
1366
|
-
(or the enclosing [block quote] or [list item]):
|
1374
|
+
(or the enclosing [block quote][block quotes] or [list item][list items]):
|
1367
1375
|
|
1368
1376
|
.
|
1369
1377
|
```
|
@@ -1987,8 +1995,8 @@ p {color:blue;}
|
|
1987
1995
|
.
|
1988
1996
|
|
1989
1997
|
If there is no matching end tag, the block will end at the
|
1990
|
-
end of the document (or the enclosing [block quote]
|
1991
|
-
[list item]):
|
1998
|
+
end of the document (or the enclosing [block quote][block quotes]
|
1999
|
+
or [list item][list items]):
|
1992
2000
|
|
1993
2001
|
.
|
1994
2002
|
<style
|
@@ -2547,8 +2555,8 @@ Foo
|
|
2547
2555
|
<p>[bar]</p>
|
2548
2556
|
.
|
2549
2557
|
|
2550
|
-
However, it can directly follow other block elements, such as
|
2551
|
-
and
|
2558
|
+
However, it can directly follow other block elements, such as headings
|
2559
|
+
and thematic breaks, and it need not be followed by a blank line.
|
2552
2560
|
|
2553
2561
|
.
|
2554
2562
|
# [Foo]
|
@@ -3423,6 +3431,48 @@ A list item may contain any kind of block:
|
|
3423
3431
|
</ol>
|
3424
3432
|
.
|
3425
3433
|
|
3434
|
+
A list item that contains an indented code block will preserve
|
3435
|
+
empty lines within the code block verbatim, unless there are two
|
3436
|
+
or more empty lines in a row (since as described above, two
|
3437
|
+
blank lines end the list):
|
3438
|
+
|
3439
|
+
.
|
3440
|
+
- Foo
|
3441
|
+
|
3442
|
+
bar
|
3443
|
+
|
3444
|
+
baz
|
3445
|
+
.
|
3446
|
+
<ul>
|
3447
|
+
<li>
|
3448
|
+
<p>Foo</p>
|
3449
|
+
<pre><code>bar
|
3450
|
+
|
3451
|
+
baz
|
3452
|
+
</code></pre>
|
3453
|
+
</li>
|
3454
|
+
</ul>
|
3455
|
+
.
|
3456
|
+
|
3457
|
+
.
|
3458
|
+
- Foo
|
3459
|
+
|
3460
|
+
bar
|
3461
|
+
|
3462
|
+
|
3463
|
+
baz
|
3464
|
+
.
|
3465
|
+
<ul>
|
3466
|
+
<li>
|
3467
|
+
<p>Foo</p>
|
3468
|
+
<pre><code>bar
|
3469
|
+
</code></pre>
|
3470
|
+
</li>
|
3471
|
+
</ul>
|
3472
|
+
<pre><code> baz
|
3473
|
+
</code></pre>
|
3474
|
+
.
|
3475
|
+
|
3426
3476
|
Note that ordered list start numbers must be nine digits or less:
|
3427
3477
|
|
3428
3478
|
.
|
@@ -3994,7 +4044,7 @@ A list may be the first block in a list item:
|
|
3994
4044
|
</ol>
|
3995
4045
|
.
|
3996
4046
|
|
3997
|
-
A list item can contain a
|
4047
|
+
A list item can contain a heading:
|
3998
4048
|
|
3999
4049
|
.
|
4000
4050
|
- # Foo
|
@@ -4812,7 +4862,7 @@ not have their usual Markdown meanings:
|
|
4812
4862
|
\`not code`
|
4813
4863
|
1\. not a list
|
4814
4864
|
\* not a list
|
4815
|
-
\# not a
|
4865
|
+
\# not a heading
|
4816
4866
|
\[foo]: /url "not a reference"
|
4817
4867
|
.
|
4818
4868
|
<p>*not emphasized*
|
@@ -4821,7 +4871,7 @@ not have their usual Markdown meanings:
|
|
4821
4871
|
`not code`
|
4822
4872
|
1. not a list
|
4823
4873
|
* not a list
|
4824
|
-
# not a
|
4874
|
+
# not a heading
|
4825
4875
|
[foo]: /url "not a reference"</p>
|
4826
4876
|
.
|
4827
4877
|
|
@@ -4907,21 +4957,21 @@ foo
|
|
4907
4957
|
.
|
4908
4958
|
|
4909
4959
|
|
4910
|
-
##
|
4960
|
+
## Entity and numeric character references
|
4911
4961
|
|
4912
|
-
|
4913
|
-
|
4914
|
-
are recognized as such and
|
4915
|
-
|
4916
|
-
|
4917
|
-
|
4918
|
-
|
4962
|
+
All valid HTML entity references and numeric character
|
4963
|
+
references, except those occuring in code blocks, code spans,
|
4964
|
+
and raw HTML, are recognized as such and treated as equivalent to the
|
4965
|
+
corresponding Unicode characters. Conforming CommonMark parsers
|
4966
|
+
need not store information about whether a particular character
|
4967
|
+
was represented in the source using a Unicode character or
|
4968
|
+
an entity reference.
|
4919
4969
|
|
4920
|
-
[
|
4970
|
+
[Entity references](@entity-references) consist of `&` + any of the valid
|
4921
4971
|
HTML5 entity names + `;`. The
|
4922
|
-
|
4923
|
-
is used as an authoritative source
|
4924
|
-
corresponding code points.
|
4972
|
+
document <https://html.spec.whatwg.org/multipage/entities.json>
|
4973
|
+
is used as an authoritative source for the valid entity
|
4974
|
+
references and their corresponding code points.
|
4925
4975
|
|
4926
4976
|
.
|
4927
4977
|
& © Æ Ď
|
@@ -4933,10 +4983,11 @@ corresponding code points.
|
|
4933
4983
|
∲ ≧̸</p>
|
4934
4984
|
.
|
4935
4985
|
|
4936
|
-
[Decimal
|
4937
|
-
|
4938
|
-
|
4939
|
-
|
4986
|
+
[Decimal numeric character
|
4987
|
+
references](@decimal-numeric-character-references)
|
4988
|
+
consist of `&#` + a string of 1--8 arabic digits + `;`. A
|
4989
|
+
numeric character reference is parsed as the corresponding
|
4990
|
+
Unicode character. Invalid Unicode code points will be replaced by
|
4940
4991
|
the "unknown code point" character (`U+FFFD`). For security reasons,
|
4941
4992
|
the code point `U+0000` will also be replaced by `U+FFFD`.
|
4942
4993
|
|
@@ -4946,10 +4997,11 @@ the code point `U+0000` will also be replaced by `U+FFFD`.
|
|
4946
4997
|
<p># Ӓ Ϡ � �</p>
|
4947
4998
|
.
|
4948
4999
|
|
4949
|
-
[Hexadecimal
|
4950
|
-
|
4951
|
-
|
4952
|
-
|
5000
|
+
[Hexadecimal numeric character
|
5001
|
+
references](@hexadecimal-numeric-character-references) consist of `&#` +
|
5002
|
+
either `X` or `x` + a string of 1-8 hexadecimal digits + `;`.
|
5003
|
+
They too are parsed as the corresponding Unicode character (this
|
5004
|
+
time specified with a hexadecimal numeral instead of decimal).
|
4953
5005
|
|
4954
5006
|
.
|
4955
5007
|
" ആ ಫ
|
@@ -4960,14 +5012,16 @@ AST.
|
|
4960
5012
|
Here are some nonentities:
|
4961
5013
|
|
4962
5014
|
.
|
4963
|
-
  &x; &#; &#x;
|
5015
|
+
  &x; &#; &#x;
|
5016
|
+
&ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
|
4964
5017
|
.
|
4965
|
-
<p>&nbsp &x; &#; &#x;
|
5018
|
+
<p>&nbsp &x; &#; &#x;
|
5019
|
+
&ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;</p>
|
4966
5020
|
.
|
4967
5021
|
|
4968
|
-
Although HTML5 does accept some
|
4969
|
-
(such as `©`), these are not
|
4970
|
-
makes the grammar too ambiguous:
|
5022
|
+
Although HTML5 does accept some entity references
|
5023
|
+
without a trailing semicolon (such as `©`), these are not
|
5024
|
+
recognized here, because it makes the grammar too ambiguous:
|
4971
5025
|
|
4972
5026
|
.
|
4973
5027
|
©
|
@@ -4976,7 +5030,7 @@ makes the grammar too ambiguous:
|
|
4976
5030
|
.
|
4977
5031
|
|
4978
5032
|
Strings that are not on the list of HTML5 named entities are not
|
4979
|
-
recognized as
|
5033
|
+
recognized as entity references either:
|
4980
5034
|
|
4981
5035
|
.
|
4982
5036
|
&MadeUpEntity;
|
@@ -4984,9 +5038,9 @@ recognized as entities either:
|
|
4984
5038
|
<p>&MadeUpEntity;</p>
|
4985
5039
|
.
|
4986
5040
|
|
4987
|
-
|
4988
|
-
code blocks
|
4989
|
-
[fenced code block] [info string]s:
|
5041
|
+
Entity and numeric character references are recognized in any
|
5042
|
+
context besides code spans or code blocks or raw HTML, including
|
5043
|
+
URLs, [link title]s, and [fenced code block][] [info string]s:
|
4990
5044
|
|
4991
5045
|
.
|
4992
5046
|
<a href="öö.html">
|
@@ -5017,7 +5071,8 @@ foo
|
|
5017
5071
|
</code></pre>
|
5018
5072
|
.
|
5019
5073
|
|
5020
|
-
|
5074
|
+
Entity and numeric character references are treated as literal
|
5075
|
+
text in code spans and code blocks, and in raw HTML:
|
5021
5076
|
|
5022
5077
|
.
|
5023
5078
|
`föö`
|
@@ -5032,6 +5087,12 @@ Entities are treated as literal text in code spans and code blocks:
|
|
5032
5087
|
</code></pre>
|
5033
5088
|
.
|
5034
5089
|
|
5090
|
+
.
|
5091
|
+
<a href="föfö"/>
|
5092
|
+
.
|
5093
|
+
<a href="föfö"/>
|
5094
|
+
.
|
5095
|
+
|
5035
5096
|
## Code spans
|
5036
5097
|
|
5037
5098
|
A [backtick string](@backtick-string)
|
@@ -5304,7 +5365,7 @@ The following rules define emphasis and strong emphasis:
|
|
5304
5365
|
2. A single `_` character [can open emphasis] iff
|
5305
5366
|
it is part of a [left-flanking delimiter run]
|
5306
5367
|
and either (a) not part of a [right-flanking delimiter run]
|
5307
|
-
or (b) part of a [right-flanking
|
5368
|
+
or (b) part of a [right-flanking delimiter run]
|
5308
5369
|
preceded by punctuation.
|
5309
5370
|
|
5310
5371
|
3. A single `*` character [can close emphasis](@can-close-emphasis)
|
@@ -5313,7 +5374,7 @@ The following rules define emphasis and strong emphasis:
|
|
5313
5374
|
4. A single `_` character [can close emphasis] iff
|
5314
5375
|
it is part of a [right-flanking delimiter run]
|
5315
5376
|
and either (a) not part of a [left-flanking delimiter run]
|
5316
|
-
or (b) part of a [left-flanking
|
5377
|
+
or (b) part of a [left-flanking delimiter run]
|
5317
5378
|
followed by punctuation.
|
5318
5379
|
|
5319
5380
|
5. A double `**` [can open strong emphasis](@can-open-strong-emphasis)
|
@@ -5322,7 +5383,7 @@ The following rules define emphasis and strong emphasis:
|
|
5322
5383
|
6. A double `__` [can open strong emphasis] iff
|
5323
5384
|
it is part of a [left-flanking delimiter run]
|
5324
5385
|
and either (a) not part of a [right-flanking delimiter run]
|
5325
|
-
or (b) part of a [right-flanking
|
5386
|
+
or (b) part of a [right-flanking delimiter run]
|
5326
5387
|
preceded by punctuation.
|
5327
5388
|
|
5328
5389
|
7. A double `**` [can close strong emphasis](@can-close-strong-emphasis)
|
@@ -5331,7 +5392,7 @@ The following rules define emphasis and strong emphasis:
|
|
5331
5392
|
8. A double `__` [can close strong emphasis]
|
5332
5393
|
it is part of a [right-flanking delimiter run]
|
5333
5394
|
and either (a) not part of a [left-flanking delimiter run]
|
5334
|
-
or (b) part of a [left-flanking
|
5395
|
+
or (b) part of a [left-flanking delimiter run]
|
5335
5396
|
followed by punctuation.
|
5336
5397
|
|
5337
5398
|
9. Emphasis begins with a delimiter that [can open emphasis] and ends
|
@@ -6555,11 +6616,11 @@ A link can contain fragment identifiers and queries:
|
|
6555
6616
|
|
6556
6617
|
[link](http://example.com#fragment)
|
6557
6618
|
|
6558
|
-
[link](http://example.com?foo=
|
6619
|
+
[link](http://example.com?foo=3#frag)
|
6559
6620
|
.
|
6560
6621
|
<p><a href="#fragment">link</a></p>
|
6561
6622
|
<p><a href="http://example.com#fragment">link</a></p>
|
6562
|
-
<p><a href="http://example.com?foo=
|
6623
|
+
<p><a href="http://example.com?foo=3#frag">link</a></p>
|
6563
6624
|
.
|
6564
6625
|
|
6565
6626
|
Note that a backslash before a non-escapable character is
|
@@ -6572,9 +6633,13 @@ just a backslash:
|
|
6572
6633
|
.
|
6573
6634
|
|
6574
6635
|
URL-escaping should be left alone inside the destination, as all
|
6575
|
-
URL-escaped characters are also valid URL characters.
|
6576
|
-
the destination will be parsed
|
6577
|
-
code points, as usual
|
6636
|
+
URL-escaped characters are also valid URL characters. Entity and
|
6637
|
+
numerical character references in the destination will be parsed
|
6638
|
+
into the corresponding Unicode code points, as usual. These may
|
6639
|
+
be optionally URL-escaped when written as HTML, but this spec
|
6640
|
+
does not enforce any particular policy for rendering URLs in
|
6641
|
+
HTML or other formats. Renderers may make different decisions
|
6642
|
+
about how to escape or normalize URLs in the output.
|
6578
6643
|
|
6579
6644
|
.
|
6580
6645
|
[link](foo%20bä)
|
@@ -6604,7 +6669,8 @@ Titles may be in single quotes, double quotes, or parentheses:
|
|
6604
6669
|
<a href="/url" title="title">link</a></p>
|
6605
6670
|
.
|
6606
6671
|
|
6607
|
-
Backslash escapes and
|
6672
|
+
Backslash escapes and entity and numeric character references
|
6673
|
+
may be used in titles:
|
6608
6674
|
|
6609
6675
|
.
|
6610
6676
|
[link](/url "title \""")
|
@@ -6632,15 +6698,16 @@ But it is easy to work around this by using a different quote type:
|
|
6632
6698
|
title, and its test suite included a test demonstrating this.
|
6633
6699
|
But it is hard to see a good rationale for the extra complexity this
|
6634
6700
|
brings, since there are already many ways---backslash escaping,
|
6635
|
-
|
6636
|
-
|
6637
|
-
|
6638
|
-
|
6639
|
-
|
6640
|
-
|
6641
|
-
|
6642
|
-
|
6643
|
-
|
6701
|
+
entity and numeric character references, or using a different
|
6702
|
+
quote type for the enclosing title---to write titles containing
|
6703
|
+
double quotes. `Markdown.pl`'s handling of titles has a number
|
6704
|
+
of other strange features. For example, it allows single-quoted
|
6705
|
+
titles in inline links, but not reference links. And, in
|
6706
|
+
reference links but not inline links, it allows a title to begin
|
6707
|
+
with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows
|
6708
|
+
titles with no closing quotation mark, though 1.0.2b8 does not.
|
6709
|
+
It seems preferable to adopt a simple, rational rule that works
|
6710
|
+
the same way in inline links and link reference definitions.)
|
6644
6711
|
|
6645
6712
|
[Whitespace] is allowed around the destination and title:
|
6646
6713
|
|
@@ -6771,7 +6838,7 @@ There are three kinds of [reference link](@reference-link)s:
|
|
6771
6838
|
and [shortcut](#shortcut-reference-link).
|
6772
6839
|
|
6773
6840
|
A [full reference link](@full-reference-link)
|
6774
|
-
consists of a [link text]
|
6841
|
+
consists of a [link text] immediately followed by a [link label]
|
6775
6842
|
that [matches] a [link reference definition] elsewhere in the document.
|
6776
6843
|
|
6777
6844
|
A [link label](@link-label) begins with a left bracket (`[`) and ends
|
@@ -6941,14 +7008,15 @@ purposes of determining matching:
|
|
6941
7008
|
<p><a href="/url">Baz</a></p>
|
6942
7009
|
.
|
6943
7010
|
|
6944
|
-
|
7011
|
+
No [whitespace] is allowed between the [link text] and the
|
7012
|
+
[link label]:
|
6945
7013
|
|
6946
7014
|
.
|
6947
7015
|
[foo] [bar]
|
6948
7016
|
|
6949
7017
|
[bar]: /url "title"
|
6950
7018
|
.
|
6951
|
-
<p
|
7019
|
+
<p>[foo] <a href="/url" title="title">bar</a></p>
|
6952
7020
|
.
|
6953
7021
|
|
6954
7022
|
.
|
@@ -6957,9 +7025,37 @@ There can be [whitespace] between the [link text] and the [link label]:
|
|
6957
7025
|
|
6958
7026
|
[bar]: /url "title"
|
6959
7027
|
.
|
6960
|
-
<p
|
7028
|
+
<p>[foo]
|
7029
|
+
<a href="/url" title="title">bar</a></p>
|
6961
7030
|
.
|
6962
7031
|
|
7032
|
+
This is a departure from John Gruber's original Markdown syntax
|
7033
|
+
description, which explicitly allows whitespace between the link
|
7034
|
+
text and the link label. It brings reference links in line with
|
7035
|
+
[inline link]s, which (according to both original Markdown and
|
7036
|
+
this spec) cannot have whitespace after the link text. More
|
7037
|
+
importantly, it prevents inadvertent capture of consecutive
|
7038
|
+
[shortcut reference link]s. If whitespace is allowed between the
|
7039
|
+
link text and the link label, then in the following we will have
|
7040
|
+
a single reference link, not two shortcut reference links, as
|
7041
|
+
intended:
|
7042
|
+
|
7043
|
+
``` markdown
|
7044
|
+
[foo]
|
7045
|
+
[bar]
|
7046
|
+
|
7047
|
+
[foo]: /url1
|
7048
|
+
[bar]: /url2
|
7049
|
+
```
|
7050
|
+
|
7051
|
+
(Note that [shortcut reference link]s were introduced by Gruber
|
7052
|
+
himself in a beta version of `Markdown.pl`, but never included
|
7053
|
+
in the official syntax description. Without shortcut reference
|
7054
|
+
links, it is harmless to allow space between the link text and
|
7055
|
+
link label; but once shortcut references are introduced, it is
|
7056
|
+
too dangerous to allow this, as it frequently leads to
|
7057
|
+
unintended results.)
|
7058
|
+
|
6963
7059
|
When there are multiple matching [link reference definition]s,
|
6964
7060
|
the first is used:
|
6965
7061
|
|
@@ -7023,6 +7119,16 @@ backslash-escaped:
|
|
7023
7119
|
<p><a href="/uri">foo</a></p>
|
7024
7120
|
.
|
7025
7121
|
|
7122
|
+
Note that in this example `]` is not backslash-escaped:
|
7123
|
+
|
7124
|
+
.
|
7125
|
+
[bar\\]: /uri
|
7126
|
+
|
7127
|
+
[bar\\]
|
7128
|
+
.
|
7129
|
+
<p><a href="/uri">bar\</a></p>
|
7130
|
+
.
|
7131
|
+
|
7026
7132
|
A [link label] must contain at least one [non-whitespace character]:
|
7027
7133
|
|
7028
7134
|
.
|
@@ -7050,7 +7156,7 @@ A [link label] must contain at least one [non-whitespace character]:
|
|
7050
7156
|
A [collapsed reference link](@collapsed-reference-link)
|
7051
7157
|
consists of a [link label] that [matches] a
|
7052
7158
|
[link reference definition] elsewhere in the
|
7053
|
-
document,
|
7159
|
+
document, followed by the string `[]`.
|
7054
7160
|
The contents of the first link label are parsed as inlines,
|
7055
7161
|
which are used as the link's text. The link's URI and title are
|
7056
7162
|
provided by the matching reference link definition. Thus,
|
@@ -7083,8 +7189,8 @@ The link labels are case-insensitive:
|
|
7083
7189
|
.
|
7084
7190
|
|
7085
7191
|
|
7086
|
-
As with full reference links, [whitespace] is
|
7087
|
-
between the two sets of brackets:
|
7192
|
+
As with full reference links, [whitespace] is not
|
7193
|
+
allowed between the two sets of brackets:
|
7088
7194
|
|
7089
7195
|
.
|
7090
7196
|
[foo]
|
@@ -7092,7 +7198,8 @@ between the two sets of brackets:
|
|
7092
7198
|
|
7093
7199
|
[foo]: /url "title"
|
7094
7200
|
.
|
7095
|
-
<p><a href="/url" title="title">foo</a
|
7201
|
+
<p><a href="/url" title="title">foo</a>
|
7202
|
+
[]</p>
|
7096
7203
|
.
|
7097
7204
|
|
7098
7205
|
A [shortcut reference link](@shortcut-reference-link)
|
@@ -7313,7 +7420,7 @@ My ![foo bar](/path/to/train.jpg "title" )
|
|
7313
7420
|
Reference-style:
|
7314
7421
|
|
7315
7422
|
.
|
7316
|
-
![foo]
|
7423
|
+
![foo][bar]
|
7317
7424
|
|
7318
7425
|
[bar]: /url
|
7319
7426
|
.
|
@@ -7321,7 +7428,7 @@ Reference-style:
|
|
7321
7428
|
.
|
7322
7429
|
|
7323
7430
|
.
|
7324
|
-
![foo]
|
7431
|
+
![foo][bar]
|
7325
7432
|
|
7326
7433
|
[BAR]: /url
|
7327
7434
|
.
|
@@ -7356,7 +7463,7 @@ The labels are case-insensitive:
|
|
7356
7463
|
<p><img src="/url" alt="Foo" title="title" /></p>
|
7357
7464
|
.
|
7358
7465
|
|
7359
|
-
As with
|
7466
|
+
As with reference links, [whitespace] is not allowed
|
7360
7467
|
between the two sets of brackets:
|
7361
7468
|
|
7362
7469
|
.
|
@@ -7365,7 +7472,8 @@ between the two sets of brackets:
|
|
7365
7472
|
|
7366
7473
|
[foo]: /url "title"
|
7367
7474
|
.
|
7368
|
-
<p><img src="/url" alt="foo" title="title"
|
7475
|
+
<p><img src="/url" alt="foo" title="title" />
|
7476
|
+
[]</p>
|
7369
7477
|
.
|
7370
7478
|
|
7371
7479
|
Shortcut:
|
@@ -7639,7 +7747,7 @@ consists of `"`, zero or more
|
|
7639
7747
|
characters not including `"`, and a final `"`.
|
7640
7748
|
|
7641
7749
|
An [open tag](@open-tag) consists of a `<` character, a [tag name],
|
7642
|
-
zero or more [
|
7750
|
+
zero or more [attribute]s, optional [whitespace], an optional `/`
|
7643
7751
|
character, and a `>` character.
|
7644
7752
|
|
7645
7753
|
A [closing tag](@closing-tag) consists of the string `</`, a
|
@@ -7707,16 +7815,9 @@ _boolean zoop:33=zoop:33 /></p>
|
|
7707
7815
|
Custom tag names can be used:
|
7708
7816
|
|
7709
7817
|
.
|
7710
|
-
<responsive-image src="foo.jpg" />
|
7711
|
-
|
7712
|
-
<My-Tag>
|
7713
|
-
foo
|
7714
|
-
</My-Tag>
|
7818
|
+
Foo <responsive-image src="foo.jpg" />
|
7715
7819
|
.
|
7716
|
-
<responsive-image src="foo.jpg"
|
7717
|
-
<My-Tag>
|
7718
|
-
foo
|
7719
|
-
</My-Tag>
|
7820
|
+
<p>Foo <responsive-image src="foo.jpg" /></p>
|
7720
7821
|
.
|
7721
7822
|
|
7722
7823
|
Illegal tag names, not parsed as HTML:
|
@@ -7764,11 +7865,9 @@ Missing [whitespace]:
|
|
7764
7865
|
Closing tags:
|
7765
7866
|
|
7766
7867
|
.
|
7767
|
-
</a>
|
7768
|
-
</foo >
|
7868
|
+
</a></foo >
|
7769
7869
|
.
|
7770
|
-
|
7771
|
-
</foo >
|
7870
|
+
<p></a></foo ></p>
|
7772
7871
|
.
|
7773
7872
|
|
7774
7873
|
Illegal attributes in closing tag:
|
@@ -7830,20 +7929,21 @@ foo <![CDATA[>&<]]>
|
|
7830
7929
|
<p>foo <![CDATA[>&<]]></p>
|
7831
7930
|
.
|
7832
7931
|
|
7833
|
-
|
7932
|
+
Entity and numeric character references are preserved in HTML
|
7933
|
+
attributes:
|
7834
7934
|
|
7835
7935
|
.
|
7836
|
-
<a href="ö">
|
7936
|
+
foo <a href="ö">
|
7837
7937
|
.
|
7838
|
-
<a href="ö">
|
7938
|
+
<p>foo <a href="ö"></p>
|
7839
7939
|
.
|
7840
7940
|
|
7841
7941
|
Backslash escapes do not work in HTML attributes:
|
7842
7942
|
|
7843
7943
|
.
|
7844
|
-
<a href="\*">
|
7944
|
+
foo <a href="\*">
|
7845
7945
|
.
|
7846
|
-
<a href="\*">
|
7946
|
+
<p>foo <a href="\*"></p>
|
7847
7947
|
.
|
7848
7948
|
|
7849
7949
|
.
|
@@ -8062,7 +8162,7 @@ list items, and so on---is constructed. Text is assigned to these
|
|
8062
8162
|
blocks but not parsed. Link reference definitions are parsed and a
|
8063
8163
|
map of links is constructed.
|
8064
8164
|
|
8065
|
-
2. In the second phase, the raw text contents of paragraphs and
|
8165
|
+
2. In the second phase, the raw text contents of paragraphs and headings
|
8066
8166
|
are parsed into sequences of Markdown inline elements (strings,
|
8067
8167
|
code spans, links, emphasis, and so on), using the map of link
|
8068
8168
|
references constructed in phase 1.
|
@@ -8125,10 +8225,10 @@ matched block.
|
|
8125
8225
|
3. Finally, we look at the remainder of the line (after block
|
8126
8226
|
markers like `>`, list markers, and indentation have been consumed).
|
8127
8227
|
This is text that can be incorporated into the last open
|
8128
|
-
block (a paragraph, code block,
|
8228
|
+
block (a paragraph, code block, heading, or raw HTML).
|
8129
8229
|
|
8130
|
-
Setext
|
8131
|
-
a paragraph is a setext
|
8230
|
+
Setext headings are formed when we detect that the second line of
|
8231
|
+
a paragraph is a setext heading line.
|
8132
8232
|
|
8133
8233
|
Reference link definitions are detected when a paragraph is closed;
|
8134
8234
|
the accumulated text lines are parsed to see if they begin with
|
@@ -8237,7 +8337,7 @@ We thus obtain the final tree:
|
|
8237
8337
|
Once all of the input has been parsed, all open blocks are closed.
|
8238
8338
|
|
8239
8339
|
We then "walk the tree," visiting every node, and parse raw
|
8240
|
-
string contents of paragraphs and
|
8340
|
+
string contents of paragraphs and headings as inlines. At this
|
8241
8341
|
point we have seen all the link reference definitions, so we can
|
8242
8342
|
resolve reference links as we go.
|
8243
8343
|
|