commonmarker 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -17
  3. data/ext/commonmarker/cmark/CMakeLists.txt +1 -1
  4. data/ext/commonmarker/cmark/Makefile +13 -14
  5. data/ext/commonmarker/cmark/README.md +1 -0
  6. data/ext/commonmarker/cmark/api_test/cplusplus.h +1 -2
  7. data/ext/commonmarker/cmark/api_test/harness.c +60 -79
  8. data/ext/commonmarker/cmark/api_test/harness.h +13 -20
  9. data/ext/commonmarker/cmark/api_test/main.c +809 -714
  10. data/ext/commonmarker/cmark/build/CMakeCache.txt +3 -0
  11. data/ext/commonmarker/cmark/build/CMakeFiles/Makefile.cmake +0 -67
  12. data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/build.make +1 -1
  13. data/ext/commonmarker/cmark/build/api_test/CMakeFiles/api_test.dir/link.txt +1 -1
  14. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/DependInfo.cmake +1 -1
  15. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/build.make +23 -23
  16. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/cmake_clean.cmake +1 -1
  17. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark.dir/link.txt +1 -1
  18. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/blocks.c.o +0 -0
  19. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark.c.o +0 -0
  20. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/cmark_ctype.c.o +0 -0
  21. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/commonmark.c.o +0 -0
  22. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/html.c.o +0 -0
  23. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/inlines.c.o +0 -0
  24. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/iterator.c.o +0 -0
  25. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/latex.c.o +0 -0
  26. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/man.c.o +0 -0
  27. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/node.c.o +0 -0
  28. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/render.c.o +0 -0
  29. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/scanners.c.o +0 -0
  30. data/ext/commonmarker/cmark/build/src/CMakeFiles/libcmark_static.dir/xml.c.o +0 -0
  31. data/ext/commonmarker/cmark/build/src/cmake_install.cmake +10 -2
  32. data/ext/commonmarker/cmark/build/src/cmark_version.h +2 -2
  33. data/ext/commonmarker/cmark/build/src/libcmark.a +0 -0
  34. data/ext/commonmarker/cmark/build/src/libcmark.pc +1 -1
  35. data/ext/commonmarker/cmark/build/testdir/CTestTestfile.cmake +1 -0
  36. data/ext/commonmarker/cmark/changelog.txt +85 -0
  37. data/ext/commonmarker/cmark/man/man3/cmark.3 +75 -34
  38. data/ext/commonmarker/cmark/src/CMakeLists.txt +13 -8
  39. data/ext/commonmarker/cmark/src/blocks.c +78 -70
  40. data/ext/commonmarker/cmark/src/chunk.h +5 -7
  41. data/ext/commonmarker/cmark/src/cmark.h +88 -34
  42. data/ext/commonmarker/cmark/src/cmark_ctype.c +6 -6
  43. data/ext/commonmarker/cmark/src/commonmark.c +24 -8
  44. data/ext/commonmarker/cmark/src/houdini_html_u.c +6 -5
  45. data/ext/commonmarker/cmark/src/html.c +33 -11
  46. data/ext/commonmarker/cmark/src/inlines.c +9 -10
  47. data/ext/commonmarker/cmark/src/iterator.c +2 -2
  48. data/ext/commonmarker/cmark/src/latex.c +54 -28
  49. data/ext/commonmarker/cmark/src/main.c +0 -9
  50. data/ext/commonmarker/cmark/src/man.c +17 -5
  51. data/ext/commonmarker/cmark/src/node.c +123 -44
  52. data/ext/commonmarker/cmark/src/node.h +8 -2
  53. data/ext/commonmarker/cmark/src/render.c +8 -1
  54. data/ext/commonmarker/cmark/src/render.h +1 -0
  55. data/ext/commonmarker/cmark/src/scanners.c +3755 -4379
  56. data/ext/commonmarker/cmark/src/scanners.h +7 -6
  57. data/ext/commonmarker/cmark/src/scanners.re +9 -10
  58. data/ext/commonmarker/cmark/src/utf8.c +6 -3
  59. data/ext/commonmarker/cmark/src/utf8.h +4 -2
  60. data/ext/commonmarker/cmark/src/xml.c +18 -4
  61. data/ext/commonmarker/cmark/test/CMakeLists.txt +11 -0
  62. data/ext/commonmarker/cmark/test/normalize.py +5 -1
  63. data/ext/commonmarker/cmark/test/roundtrip.bat +1 -0
  64. data/ext/commonmarker/cmark/test/roundtrip.sh +1 -1
  65. data/ext/commonmarker/cmark/test/spec.txt +257 -157
  66. data/ext/commonmarker/cmark/why-cmark-and-not-x.md +104 -0
  67. data/lib/commonmarker/config.rb +6 -6
  68. data/lib/commonmarker/version.rb +1 -1
  69. metadata +5 -5
  70. data/ext/commonmarker/cmark/src/bench.h +0 -27
  71. data/ext/commonmarker/cmark/wrappers/wrapper.lua +0 -239
@@ -21,9 +21,9 @@ bufsize_t _scan_html_block_end_5(const unsigned char *p);
21
21
  bufsize_t _scan_link_url(const unsigned char *p);
22
22
  bufsize_t _scan_link_title(const unsigned char *p);
23
23
  bufsize_t _scan_spacechars(const unsigned char *p);
24
- bufsize_t _scan_atx_header_start(const unsigned char *p);
25
- bufsize_t _scan_setext_header_line(const unsigned char *p);
26
- bufsize_t _scan_hrule(const unsigned char *p);
24
+ bufsize_t _scan_atx_heading_start(const unsigned char *p);
25
+ bufsize_t _scan_setext_heading_line(const unsigned char *p);
26
+ bufsize_t _scan_thematic_break(const unsigned char *p);
27
27
  bufsize_t _scan_open_code_fence(const unsigned char *p);
28
28
  bufsize_t _scan_close_code_fence(const unsigned char *p);
29
29
  bufsize_t _scan_entity(const unsigned char *p);
@@ -43,9 +43,10 @@ bufsize_t _scan_dangerous_url(const unsigned char *p);
43
43
  #define scan_link_url(c, n) _scan_at(&_scan_link_url, c, n)
44
44
  #define scan_link_title(c, n) _scan_at(&_scan_link_title, c, n)
45
45
  #define scan_spacechars(c, n) _scan_at(&_scan_spacechars, c, n)
46
- #define scan_atx_header_start(c, n) _scan_at(&_scan_atx_header_start, c, n)
47
- #define scan_setext_header_line(c, n) _scan_at(&_scan_setext_header_line, c, n)
48
- #define scan_hrule(c, n) _scan_at(&_scan_hrule, c, n)
46
+ #define scan_atx_heading_start(c, n) _scan_at(&_scan_atx_heading_start, c, n)
47
+ #define scan_setext_heading_line(c, n) \
48
+ _scan_at(&_scan_setext_heading_line, c, n)
49
+ #define scan_thematic_break(c, n) _scan_at(&_scan_thematic_break, c, n)
49
50
  #define scan_open_code_fence(c, n) _scan_at(&_scan_open_code_fence, c, n)
50
51
  #define scan_close_code_fence(c, n) _scan_at(&_scan_close_code_fence, c, n)
51
52
  #define scan_entity(c, n) _scan_at(&_scan_entity, c, n)
@@ -6,10 +6,9 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
6
6
  {
7
7
  bufsize_t res;
8
8
  unsigned char *ptr = (unsigned char *)c->data;
9
- unsigned char zero = '\0';
10
9
 
11
- if (ptr == NULL) {
12
- res = scanner(&zero);
10
+ if (ptr == NULL || offset > c->len) {
11
+ return 0;
13
12
  } else {
14
13
  unsigned char lim = ptr[c->len];
15
14
 
@@ -216,7 +215,7 @@ bufsize_t _scan_link_url(const unsigned char *p)
216
215
  const unsigned char *start = p;
217
216
  /*!re2c
218
217
  [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
219
- [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp | [\\][^()])* { return (bufsize_t)(p - start); }
218
+ [ \r\n]* (reg_char+ | escaped_char | [\\] | in_parens_nosp)* { return (bufsize_t)(p - start); }
220
219
  .? { return 0; }
221
220
  */
222
221
  }
@@ -247,8 +246,8 @@ bufsize_t _scan_spacechars(const unsigned char *p)
247
246
  */
248
247
  }
249
248
 
250
- // Match ATX header start.
251
- bufsize_t _scan_atx_header_start(const unsigned char *p)
249
+ // Match ATX heading start.
250
+ bufsize_t _scan_atx_heading_start(const unsigned char *p)
252
251
  {
253
252
  const unsigned char *marker = NULL;
254
253
  const unsigned char *start = p;
@@ -258,9 +257,9 @@ bufsize_t _scan_atx_header_start(const unsigned char *p)
258
257
  */
259
258
  }
260
259
 
261
- // Match setext header line. Return 1 for level-1 header,
260
+ // Match setext heading line. Return 1 for level-1 heading,
262
261
  // 2 for level-2, 0 for no match.
263
- bufsize_t _scan_setext_header_line(const unsigned char *p)
262
+ bufsize_t _scan_setext_heading_line(const unsigned char *p)
264
263
  {
265
264
  const unsigned char *marker = NULL;
266
265
  /*!re2c
@@ -270,10 +269,10 @@ bufsize_t _scan_setext_header_line(const unsigned char *p)
270
269
  */
271
270
  }
272
271
 
273
- // Scan a horizontal rule line: "...three or more hyphens, asterisks,
272
+ // Scan a thematic break line: "...three or more hyphens, asterisks,
274
273
  // or underscores on a line by themselves. If you wish, you may use
275
274
  // spaces between the hyphens or asterisks."
276
- bufsize_t _scan_hrule(const unsigned char *p)
275
+ bufsize_t _scan_thematic_break(const unsigned char *p)
277
276
  {
278
277
  const unsigned char *marker = NULL;
279
278
  const unsigned char *start = p;
@@ -107,7 +107,8 @@ static int utf8proc_valid(const uint8_t *str, bufsize_t str_len) {
107
107
  return length;
108
108
  }
109
109
 
110
- void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size) {
110
+ void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line,
111
+ bufsize_t size) {
111
112
  bufsize_t i = 0;
112
113
 
113
114
  while (i < size) {
@@ -146,7 +147,8 @@ void cmark_utf8proc_check(cmark_strbuf *ob, const uint8_t *line, bufsize_t size)
146
147
  }
147
148
  }
148
149
 
149
- int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst) {
150
+ int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len,
151
+ int32_t *dst) {
150
152
  int length;
151
153
  int32_t uc = -1;
152
154
 
@@ -222,7 +224,8 @@ void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf) {
222
224
  cmark_strbuf_put(buf, dst, len);
223
225
  }
224
226
 
225
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len) {
227
+ void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
228
+ bufsize_t len) {
226
229
  int32_t c;
227
230
 
228
231
  #define bufpush(x) cmark_utf8proc_encode_char(x, dest)
@@ -8,10 +8,12 @@
8
8
  extern "C" {
9
9
  #endif
10
10
 
11
- void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str, bufsize_t len);
11
+ void cmark_utf8proc_case_fold(cmark_strbuf *dest, const uint8_t *str,
12
+ bufsize_t len);
12
13
  void cmark_utf8proc_encode_char(int32_t uc, cmark_strbuf *buf);
13
14
  int cmark_utf8proc_iterate(const uint8_t *str, bufsize_t str_len, int32_t *dst);
14
- void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line, bufsize_t size);
15
+ void cmark_utf8proc_check(cmark_strbuf *dest, const uint8_t *line,
16
+ bufsize_t size);
15
17
  int cmark_utf8proc_is_space(int32_t uc);
16
18
  int cmark_utf8proc_is_punctuation(int32_t uc);
17
19
 
@@ -50,10 +50,13 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
50
50
  literal = false;
51
51
 
52
52
  switch (node->type) {
53
+ case CMARK_NODE_DOCUMENT:
54
+ cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
55
+ break;
53
56
  case CMARK_NODE_TEXT:
54
57
  case CMARK_NODE_CODE:
55
- case CMARK_NODE_HTML:
56
- case CMARK_NODE_INLINE_HTML:
58
+ case CMARK_NODE_HTML_BLOCK:
59
+ case CMARK_NODE_HTML_INLINE:
57
60
  cmark_strbuf_puts(xml, ">");
58
61
  escape_xml(xml, node->as.literal.data, node->as.literal.len);
59
62
  cmark_strbuf_puts(xml, "</");
@@ -83,8 +86,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
83
86
  (cmark_node_get_list_tight(node) ? "true" : "false"));
84
87
  cmark_strbuf_puts(xml, buffer);
85
88
  break;
86
- case CMARK_NODE_HEADER:
87
- sprintf(buffer, " level=\"%d\"", node->as.header.level);
89
+ case CMARK_NODE_HEADING:
90
+ sprintf(buffer, " level=\"%d\"", node->as.heading.level);
88
91
  cmark_strbuf_puts(xml, buffer);
89
92
  break;
90
93
  case CMARK_NODE_CODE_BLOCK:
@@ -99,6 +102,17 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
99
102
  cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
100
103
  literal = true;
101
104
  break;
105
+ case CMARK_NODE_CUSTOM_BLOCK:
106
+ case CMARK_NODE_CUSTOM_INLINE:
107
+ cmark_strbuf_puts(xml, " on_enter=\"");
108
+ escape_xml(xml, node->as.custom.on_enter.data,
109
+ node->as.custom.on_enter.len);
110
+ cmark_strbuf_putc(xml, '"');
111
+ cmark_strbuf_puts(xml, " on_exit=\"");
112
+ escape_xml(xml, node->as.custom.on_exit.data,
113
+ node->as.custom.on_exit.len);
114
+ cmark_strbuf_putc(xml, '"');
115
+ break;
102
116
  case CMARK_NODE_LINK:
103
117
  case CMARK_NODE_IMAGE:
104
118
  cmark_strbuf_puts(xml, " destination=\"");
@@ -16,6 +16,9 @@ if (WIN32)
16
16
  set_tests_properties(api_test PROPERTIES
17
17
  ENVIRONMENT "PATH=${WIN_DLL_DIR};$ENV{PATH}"
18
18
  )
19
+ set(ROUNDTRIP,"${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat")
20
+ else(WIN32)
21
+ set(ROUNDTRIP,"${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.sh")
19
22
  endif(WIN32)
20
23
 
21
24
  IF (PYTHONINTERP_FOUND)
@@ -43,6 +46,14 @@ IF (PYTHONINTERP_FOUND)
43
46
  ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/smart_punct.txt" "--program" "${CMAKE_CURRENT_BINARY_DIR}/../src/cmark --smart"
44
47
  )
45
48
 
49
+ add_test(roundtriptest_executable
50
+ ${PYTHON_EXECUTABLE}
51
+ "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize"
52
+ "--spec" "${CMAKE_CURRENT_SOURCE_DIR}/spec.txt" "--program"
53
+ "${ROUNDTRIP} ${CMAKE_CURRENT_BINARY_DIR}/../src/cmark"
54
+ )
55
+
56
+
46
57
  ELSE(PYTHONINTERP_FOUND)
47
58
 
48
59
  message("\n*** A python 3 interpreter is required to run the spec tests.\n")
@@ -1,5 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  from html.parser import HTMLParser
3
+ import urllib
3
4
 
4
5
  try:
5
6
  from html.parser import HTMLParseError
@@ -61,7 +62,10 @@ class MyHTMLParser(HTMLParser):
61
62
  attrs.sort()
62
63
  for (k,v) in attrs:
63
64
  self.output += " " + k
64
- if v != None:
65
+ if v in ['href','src']:
66
+ self.output += ("=" + '"' +
67
+ urllib.quote(urllib.unquote(v), safe='/') + '"')
68
+ elif v != None:
65
69
  self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"')
66
70
  self.output += ">"
67
71
  self.last_tag = tag
@@ -0,0 +1 @@
1
+ "%1" -t commonmark | "%1"
@@ -1,2 +1,2 @@
1
1
  #!/bin/sh
2
- ./build/src/cmark -t commonmark | ./build/src/cmark
2
+ "$1" -t commonmark | "$1"
@@ -36,11 +36,11 @@ questions it does not answer:
36
36
  users in real documents. (See [this comment by John
37
37
  Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).)
38
38
 
39
- 2. Is a blank line needed before a block quote or header?
39
+ 2. Is a blank line needed before a block quote or heading?
40
40
  Most implementations do not require the blank line. However,
41
41
  this can lead to unexpected results in hard-wrapped text, and
42
42
  also to ambiguities in parsing (note that some implementations
43
- put the header inside the blockquote, while others do not).
43
+ put the heading inside the blockquote, while others do not).
44
44
  (John Gruber has also spoken [in favor of requiring the blank
45
45
  lines](http://article.gmane.org/gmane.text.markdown.general/2146).)
46
46
 
@@ -85,8 +85,8 @@ questions it does not answer:
85
85
  10. item 2a
86
86
  ```
87
87
 
88
- 6. Is this one list with a horizontal rule in its second item,
89
- or two lists separated by a horizontal rule?
88
+ 6. Is this one list with a thematic break in its second item,
89
+ or two lists separated by a thematic break?
90
90
 
91
91
  ``` markdown
92
92
  * a
@@ -128,8 +128,8 @@ questions it does not answer:
128
128
  - and it can screw things up`
129
129
  ```
130
130
 
131
- 11. Can list items include section headers? (`Markdown.pl` does not
132
- allow this, but does allow blockquotes to include headers.)
131
+ 11. Can list items include section headings? (`Markdown.pl` does not
132
+ allow this, but does allow blockquotes to include headings.)
133
133
 
134
134
  ``` markdown
135
135
  - # Heading
@@ -325,9 +325,9 @@ with the replacement character (`U+FFFD`).
325
325
 
326
326
  We can think of a document as a sequence of
327
327
  [blocks](@block)---structural elements like paragraphs, block
328
- quotations, lists, headers, rules, and code blocks. Some blocks (like
328
+ quotations, lists, headings, rules, and code blocks. Some blocks (like
329
329
  block quotes and list items) contain other blocks; others (like
330
- headers and paragraphs) contain [inline](@inline) content---text,
330
+ headings and paragraphs) contain [inline](@inline) content---text,
331
331
  links, emphasized text, images, code, and so on.
332
332
 
333
333
  ## Precedence
@@ -348,7 +348,7 @@ two items, not a list with one item containing a code span:
348
348
 
349
349
  This means that parsing can proceed in two steps: first, the block
350
350
  structure of the document can be discerned; second, text lines inside
351
- paragraphs, headers, and other block constructs can be parsed for inline
351
+ paragraphs, headings, and other block constructs can be parsed for inline
352
352
  structure. The second step requires information about link reference
353
353
  definitions that will be available only at the end of the first
354
354
  step. Note that the first step requires processing lines in sequence,
@@ -367,12 +367,12 @@ which cannot.
367
367
  This section describes the different kinds of leaf block that make up a
368
368
  Markdown document.
369
369
 
370
- ## Horizontal rules
370
+ ## Thematic breaks
371
371
 
372
372
  A line consisting of 0-3 spaces of indentation, followed by a sequence
373
373
  of three or more matching `-`, `_`, or `*` characters, each followed
374
374
  optionally by any number of spaces, forms a
375
- [horizontal rule](@horizontal-rule).
375
+ [thematic break](@thematic-break).
376
376
 
377
377
  .
378
378
  ***
@@ -490,7 +490,7 @@ a------
490
490
  .
491
491
 
492
492
  It is required that all of the [non-whitespace character]s be the same.
493
- So, this is not a horizontal rule:
493
+ So, this is not a thematic break:
494
494
 
495
495
  .
496
496
  *-*
@@ -498,7 +498,7 @@ So, this is not a horizontal rule:
498
498
  <p><em>-</em></p>
499
499
  .
500
500
 
501
- Horizontal rules do not need blank lines before or after:
501
+ Thematic breaks do not need blank lines before or after:
502
502
 
503
503
  .
504
504
  - foo
@@ -514,7 +514,7 @@ Horizontal rules do not need blank lines before or after:
514
514
  </ul>
515
515
  .
516
516
 
517
- Horizontal rules can interrupt a paragraph:
517
+ Thematic breaks can interrupt a paragraph:
518
518
 
519
519
  .
520
520
  Foo
@@ -527,10 +527,10 @@ bar
527
527
  .
528
528
 
529
529
  If a line of dashes that meets the above conditions for being a
530
- horizontal rule could also be interpreted as the underline of a [setext
531
- header], the interpretation as a
532
- [setext header] takes precedence. Thus, for example,
533
- this is a setext header, not a paragraph followed by a horizontal rule:
530
+ thematic break could also be interpreted as the underline of a [setext
531
+ heading], the interpretation as a
532
+ [setext heading] takes precedence. Thus, for example,
533
+ this is a setext heading, not a paragraph followed by a thematic break:
534
534
 
535
535
  .
536
536
  Foo
@@ -541,8 +541,8 @@ bar
541
541
  <p>bar</p>
542
542
  .
543
543
 
544
- When both a horizontal rule and a list item are possible
545
- interpretations of a line, the horizontal rule takes precedence:
544
+ When both a thematic break and a list item are possible
545
+ interpretations of a line, the thematic break takes precedence:
546
546
 
547
547
  .
548
548
  * Foo
@@ -558,7 +558,7 @@ interpretations of a line, the horizontal rule takes precedence:
558
558
  </ul>
559
559
  .
560
560
 
561
- If you want a horizontal rule in a list item, use a different bullet:
561
+ If you want a thematic break in a list item, use a different bullet:
562
562
 
563
563
  .
564
564
  - Foo
@@ -572,21 +572,21 @@ If you want a horizontal rule in a list item, use a different bullet:
572
572
  </ul>
573
573
  .
574
574
 
575
- ## ATX headers
575
+ ## ATX headings
576
576
 
577
- An [ATX header](@atx-header)
577
+ An [ATX heading](@atx-heading)
578
578
  consists of a string of characters, parsed as inline content, between an
579
579
  opening sequence of 1--6 unescaped `#` characters and an optional
580
580
  closing sequence of any number of unescaped `#` characters.
581
- The opening sequence of `#` characters cannot be followed directly by a
582
- [non-whitespace character]. The optional closing sequence of `#`s must be
581
+ The opening sequence of `#` characters must be followed by a
582
+ [space] or by the end of line. The optional closing sequence of `#`s must be
583
583
  preceded by a [space] and may be followed by spaces only. The opening
584
584
  `#` character may be indented 0-3 spaces. The raw contents of the
585
- header are stripped of leading and trailing spaces before being parsed
586
- as inline content. The header level is equal to the number of `#`
585
+ heading are stripped of leading and trailing spaces before being parsed
586
+ as inline content. The heading level is equal to the number of `#`
587
587
  characters in the opening sequence.
588
588
 
589
- Simple headers:
589
+ Simple headings:
590
590
 
591
591
  .
592
592
  # foo
@@ -604,7 +604,7 @@ Simple headers:
604
604
  <h6>foo</h6>
605
605
  .
606
606
 
607
- More than six `#` characters is not a header:
607
+ More than six `#` characters is not a heading:
608
608
 
609
609
  .
610
610
  ####### foo
@@ -613,23 +613,31 @@ More than six `#` characters is not a header:
613
613
  .
614
614
 
615
615
  At least one space is required between the `#` characters and the
616
- header's contents, unless the header is empty. Note that many
616
+ heading's contents, unless the heading is empty. Note that many
617
617
  implementations currently do not require the space. However, the
618
618
  space was required by the
619
619
  [original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py),
620
620
  and it helps prevent things like the following from being parsed as
621
- headers:
621
+ headings:
622
622
 
623
623
  .
624
624
  #5 bolt
625
625
 
626
- #foobar
626
+ #hashtag
627
627
  .
628
628
  <p>#5 bolt</p>
629
- <p>#foobar</p>
629
+ <p>#hashtag</p>
630
630
  .
631
631
 
632
- This is not a header, because the first `#` is escaped:
632
+ A tab will not work:
633
+
634
+ .
635
+ #→foo
636
+ .
637
+ <p>#→foo</p>
638
+ .
639
+
640
+ This is not a heading, because the first `#` is escaped:
633
641
 
634
642
  .
635
643
  \## foo
@@ -712,7 +720,7 @@ Spaces are allowed after the closing sequence:
712
720
 
713
721
  A sequence of `#` characters with anything but [space]s following it
714
722
  is not a closing sequence, but counts as part of the contents of the
715
- header:
723
+ heading:
716
724
 
717
725
  .
718
726
  ### foo ### b
@@ -741,7 +749,7 @@ of the closing sequence:
741
749
  <h1>foo #</h1>
742
750
  .
743
751
 
744
- ATX headers need not be separated from surrounding content by blank
752
+ ATX headings need not be separated from surrounding content by blank
745
753
  lines, and they can interrupt paragraphs:
746
754
 
747
755
  .
@@ -764,7 +772,7 @@ Bar foo
764
772
  <p>Bar foo</p>
765
773
  .
766
774
 
767
- ATX headers can be empty:
775
+ ATX headings can be empty:
768
776
 
769
777
  .
770
778
  ##
@@ -776,33 +784,33 @@ ATX headers can be empty:
776
784
  <h3></h3>
777
785
  .
778
786
 
779
- ## Setext headers
787
+ ## Setext headings
780
788
 
781
- A [setext header](@setext-header)
789
+ A [setext heading](@setext-heading)
782
790
  consists of a line of text, containing at least one [non-whitespace character],
783
- with no more than 3 spaces indentation, followed by a [setext header
791
+ with no more than 3 spaces indentation, followed by a [setext heading
784
792
  underline]. The line of text must be
785
- one that, were it not followed by the setext header underline,
793
+ one that, were it not followed by the setext heading underline,
786
794
  would be interpreted as part of a paragraph: it cannot be
787
- interpretable as a [code fence], [ATX header][ATX headers],
788
- [block quote][block quotes], [horizontal rule][horizontal rules],
795
+ interpretable as a [code fence], [ATX heading][ATX headings],
796
+ [block quote][block quotes], [thematic break][thematic breaks],
789
797
  [list item][list items], or [HTML block][HTML blocks].
790
798
 
791
- A [setext header underline](@setext-header-underline) is a sequence of
799
+ A [setext heading underline](@setext-heading-underline) is a sequence of
792
800
  `=` characters or a sequence of `-` characters, with no more than 3
793
801
  spaces indentation and any number of trailing spaces. If a line
794
802
  containing a single `-` can be interpreted as an
795
803
  empty [list items], it should be interpreted this way
796
- and not as a [setext header underline].
804
+ and not as a [setext heading underline].
797
805
 
798
- The header is a level 1 header if `=` characters are used in the
799
- [setext header underline], and a level 2
800
- header if `-` characters are used. The contents of the header are the
806
+ The heading is a level 1 heading if `=` characters are used in the
807
+ [setext heading underline], and a level 2
808
+ heading if `-` characters are used. The contents of the heading are the
801
809
  result of parsing the first line as Markdown inline content.
802
810
 
803
- In general, a setext header need not be preceded or followed by a
811
+ In general, a setext heading need not be preceded or followed by a
804
812
  blank line. However, it cannot interrupt a paragraph, so when a
805
- setext header comes after a paragraph, a blank line is needed between
813
+ setext heading comes after a paragraph, a blank line is needed between
806
814
  them.
807
815
 
808
816
  Simple examples:
@@ -831,7 +839,7 @@ Foo
831
839
  <h1>Foo</h1>
832
840
  .
833
841
 
834
- The header content can be indented up to three spaces, and need
842
+ The heading content can be indented up to three spaces, and need
835
843
  not line up with the underlining:
836
844
 
837
845
  .
@@ -866,7 +874,7 @@ Foo
866
874
  <hr />
867
875
  .
868
876
 
869
- The setext header underline can be indented up to three spaces, and
877
+ The setext heading underline can be indented up to three spaces, and
870
878
  may have trailing spaces:
871
879
 
872
880
  .
@@ -886,7 +894,7 @@ Foo
886
894
  ---</p>
887
895
  .
888
896
 
889
- The setext header underline cannot contain internal spaces:
897
+ The setext heading underline cannot contain internal spaces:
890
898
 
891
899
  .
892
900
  Foo
@@ -920,7 +928,7 @@ Foo\
920
928
  .
921
929
 
922
930
  Since indicators of block structure take precedence over
923
- indicators of inline structure, the following are setext headers:
931
+ indicators of inline structure, the following are setext headings:
924
932
 
925
933
  .
926
934
  `Foo
@@ -937,7 +945,7 @@ of dashes"/>
937
945
  <p>of dashes&quot;/&gt;</p>
938
946
  .
939
947
 
940
- The setext header underline cannot be a [lazy continuation
948
+ The setext heading underline cannot be a [lazy continuation
941
949
  line] in a list item or block quote:
942
950
 
943
951
  .
@@ -960,7 +968,7 @@ line] in a list item or block quote:
960
968
  <hr />
961
969
  .
962
970
 
963
- A setext header cannot interrupt a paragraph:
971
+ A setext heading cannot interrupt a paragraph:
964
972
 
965
973
  .
966
974
  Foo
@@ -995,7 +1003,7 @@ Baz
995
1003
  <p>Baz</p>
996
1004
  .
997
1005
 
998
- Setext headers cannot be empty:
1006
+ Setext headings cannot be empty:
999
1007
 
1000
1008
  .
1001
1009
 
@@ -1004,9 +1012,9 @@ Setext headers cannot be empty:
1004
1012
  <p>====</p>
1005
1013
  .
1006
1014
 
1007
- Setext header text lines must not be interpretable as block
1015
+ Setext heading text lines must not be interpretable as block
1008
1016
  constructs other than paragraphs. So, the line of dashes
1009
- in these examples gets interpreted as a horizontal rule:
1017
+ in these examples gets interpreted as a thematic break:
1010
1018
 
1011
1019
  .
1012
1020
  ---
@@ -1045,7 +1053,7 @@ in these examples gets interpreted as a horizontal rule:
1045
1053
  <hr />
1046
1054
  .
1047
1055
 
1048
- If you want a header with `> foo` as its literal text, you can
1056
+ If you want a heading with `> foo` as its literal text, you can
1049
1057
  use backslash escapes:
1050
1058
 
1051
1059
  .
@@ -1192,17 +1200,17 @@ And indented code can occur immediately before and after other kinds of
1192
1200
  blocks:
1193
1201
 
1194
1202
  .
1195
- # Header
1203
+ # Heading
1196
1204
  foo
1197
- Header
1205
+ Heading
1198
1206
  ------
1199
1207
  foo
1200
1208
  ----
1201
1209
  .
1202
- <h1>Header</h1>
1210
+ <h1>Heading</h1>
1203
1211
  <pre><code>foo
1204
1212
  </code></pre>
1205
- <h2>Header</h2>
1213
+ <h2>Heading</h2>
1206
1214
  <pre><code>foo
1207
1215
  </code></pre>
1208
1216
  <hr />
@@ -1363,7 +1371,7 @@ aaa
1363
1371
  .
1364
1372
 
1365
1373
  Unclosed code blocks are closed by the end of the document
1366
- (or the enclosing [block quote] or [list item]):
1374
+ (or the enclosing [block quote][block quotes] or [list item][list items]):
1367
1375
 
1368
1376
  .
1369
1377
  ```
@@ -1987,8 +1995,8 @@ p {color:blue;}
1987
1995
  .
1988
1996
 
1989
1997
  If there is no matching end tag, the block will end at the
1990
- end of the document (or the enclosing [block quote] or
1991
- [list item]):
1998
+ end of the document (or the enclosing [block quote][block quotes]
1999
+ or [list item][list items]):
1992
2000
 
1993
2001
  .
1994
2002
  <style
@@ -2547,8 +2555,8 @@ Foo
2547
2555
  <p>[bar]</p>
2548
2556
  .
2549
2557
 
2550
- However, it can directly follow other block elements, such as headers
2551
- and horizontal rules, and it need not be followed by a blank line.
2558
+ However, it can directly follow other block elements, such as headings
2559
+ and thematic breaks, and it need not be followed by a blank line.
2552
2560
 
2553
2561
  .
2554
2562
  # [Foo]
@@ -3423,6 +3431,48 @@ A list item may contain any kind of block:
3423
3431
  </ol>
3424
3432
  .
3425
3433
 
3434
+ A list item that contains an indented code block will preserve
3435
+ empty lines within the code block verbatim, unless there are two
3436
+ or more empty lines in a row (since as described above, two
3437
+ blank lines end the list):
3438
+
3439
+ .
3440
+ - Foo
3441
+
3442
+ bar
3443
+
3444
+ baz
3445
+ .
3446
+ <ul>
3447
+ <li>
3448
+ <p>Foo</p>
3449
+ <pre><code>bar
3450
+
3451
+ baz
3452
+ </code></pre>
3453
+ </li>
3454
+ </ul>
3455
+ .
3456
+
3457
+ .
3458
+ - Foo
3459
+
3460
+ bar
3461
+
3462
+
3463
+ baz
3464
+ .
3465
+ <ul>
3466
+ <li>
3467
+ <p>Foo</p>
3468
+ <pre><code>bar
3469
+ </code></pre>
3470
+ </li>
3471
+ </ul>
3472
+ <pre><code> baz
3473
+ </code></pre>
3474
+ .
3475
+
3426
3476
  Note that ordered list start numbers must be nine digits or less:
3427
3477
 
3428
3478
  .
@@ -3994,7 +4044,7 @@ A list may be the first block in a list item:
3994
4044
  </ol>
3995
4045
  .
3996
4046
 
3997
- A list item can contain a header:
4047
+ A list item can contain a heading:
3998
4048
 
3999
4049
  .
4000
4050
  - # Foo
@@ -4812,7 +4862,7 @@ not have their usual Markdown meanings:
4812
4862
  \`not code`
4813
4863
  1\. not a list
4814
4864
  \* not a list
4815
- \# not a header
4865
+ \# not a heading
4816
4866
  \[foo]: /url "not a reference"
4817
4867
  .
4818
4868
  <p>*not emphasized*
@@ -4821,7 +4871,7 @@ not have their usual Markdown meanings:
4821
4871
  `not code`
4822
4872
  1. not a list
4823
4873
  * not a list
4824
- # not a header
4874
+ # not a heading
4825
4875
  [foo]: /url &quot;not a reference&quot;</p>
4826
4876
  .
4827
4877
 
@@ -4907,21 +4957,21 @@ foo
4907
4957
  .
4908
4958
 
4909
4959
 
4910
- ## Entities
4960
+ ## Entity and numeric character references
4911
4961
 
4912
- With the goal of making this standard as HTML-agnostic as possible, all
4913
- valid HTML entities (except in code blocks and code spans)
4914
- are recognized as such and converted into Unicode characters before
4915
- they are stored in the AST. This means that renderers to formats other
4916
- than HTML need not be HTML-entity aware. HTML renderers may either escape
4917
- Unicode characters as entities or leave them as they are. (However,
4918
- `"`, `&`, `<`, and `>` must always be rendered as entities.)
4962
+ All valid HTML entity references and numeric character
4963
+ references, except those occuring in code blocks, code spans,
4964
+ and raw HTML, are recognized as such and treated as equivalent to the
4965
+ corresponding Unicode characters. Conforming CommonMark parsers
4966
+ need not store information about whether a particular character
4967
+ was represented in the source using a Unicode character or
4968
+ an entity reference.
4919
4969
 
4920
- [Named entities](@name-entities) consist of `&` + any of the valid
4970
+ [Entity references](@entity-references) consist of `&` + any of the valid
4921
4971
  HTML5 entity names + `;`. The
4922
- [following document](https://html.spec.whatwg.org/multipage/entities.json)
4923
- is used as an authoritative source of the valid entity names and their
4924
- corresponding code points.
4972
+ document <https://html.spec.whatwg.org/multipage/entities.json>
4973
+ is used as an authoritative source for the valid entity
4974
+ references and their corresponding code points.
4925
4975
 
4926
4976
  .
4927
4977
  &nbsp; &amp; &copy; &AElig; &Dcaron;
@@ -4933,10 +4983,11 @@ corresponding code points.
4933
4983
  ∲ ≧̸</p>
4934
4984
  .
4935
4985
 
4936
- [Decimal entities](@decimal-entities)
4937
- consist of `&#` + a string of 1--8 arabic digits + `;`. Again, these
4938
- entities need to be recognised and transformed into their corresponding
4939
- Unicode code points. Invalid Unicode code points will be replaced by
4986
+ [Decimal numeric character
4987
+ references](@decimal-numeric-character-references)
4988
+ consist of `&#` + a string of 1--8 arabic digits + `;`. A
4989
+ numeric character reference is parsed as the corresponding
4990
+ Unicode character. Invalid Unicode code points will be replaced by
4940
4991
  the "unknown code point" character (`U+FFFD`). For security reasons,
4941
4992
  the code point `U+0000` will also be replaced by `U+FFFD`.
4942
4993
 
@@ -4946,10 +4997,11 @@ the code point `U+0000` will also be replaced by `U+FFFD`.
4946
4997
  <p># Ӓ Ϡ � �</p>
4947
4998
  .
4948
4999
 
4949
- [Hexadecimal entities](@hexadecimal-entities) consist of `&#` + either
4950
- `X` or `x` + a string of 1-8 hexadecimal digits + `;`. They will also
4951
- be parsed and turned into the corresponding Unicode code points in the
4952
- AST.
5000
+ [Hexadecimal numeric character
5001
+ references](@hexadecimal-numeric-character-references) consist of `&#` +
5002
+ either `X` or `x` + a string of 1-8 hexadecimal digits + `;`.
5003
+ They too are parsed as the corresponding Unicode character (this
5004
+ time specified with a hexadecimal numeral instead of decimal).
4953
5005
 
4954
5006
  .
4955
5007
  &#X22; &#XD06; &#xcab;
@@ -4960,14 +5012,16 @@ AST.
4960
5012
  Here are some nonentities:
4961
5013
 
4962
5014
  .
4963
- &nbsp &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
5015
+ &nbsp &x; &#; &#x;
5016
+ &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;
4964
5017
  .
4965
- <p>&amp;nbsp &amp;x; &amp;#; &amp;#x; &amp;ThisIsWayTooLongToBeAnEntityIsntIt; &amp;hi?;</p>
5018
+ <p>&amp;nbsp &amp;x; &amp;#; &amp;#x;
5019
+ &amp;ThisIsWayTooLongToBeAnEntityIsntIt; &amp;hi?;</p>
4966
5020
  .
4967
5021
 
4968
- Although HTML5 does accept some entities without a trailing semicolon
4969
- (such as `&copy`), these are not recognized as entities here, because it
4970
- makes the grammar too ambiguous:
5022
+ Although HTML5 does accept some entity references
5023
+ without a trailing semicolon (such as `&copy`), these are not
5024
+ recognized here, because it makes the grammar too ambiguous:
4971
5025
 
4972
5026
  .
4973
5027
  &copy
@@ -4976,7 +5030,7 @@ makes the grammar too ambiguous:
4976
5030
  .
4977
5031
 
4978
5032
  Strings that are not on the list of HTML5 named entities are not
4979
- recognized as entities either:
5033
+ recognized as entity references either:
4980
5034
 
4981
5035
  .
4982
5036
  &MadeUpEntity;
@@ -4984,9 +5038,9 @@ recognized as entities either:
4984
5038
  <p>&amp;MadeUpEntity;</p>
4985
5039
  .
4986
5040
 
4987
- Entities are recognized in any context besides code spans or
4988
- code blocks, including raw HTML, URLs, [link title]s, and
4989
- [fenced code block] [info string]s:
5041
+ Entity and numeric character references are recognized in any
5042
+ context besides code spans or code blocks or raw HTML, including
5043
+ URLs, [link title]s, and [fenced code block][] [info string]s:
4990
5044
 
4991
5045
  .
4992
5046
  <a href="&ouml;&ouml;.html">
@@ -5017,7 +5071,8 @@ foo
5017
5071
  </code></pre>
5018
5072
  .
5019
5073
 
5020
- Entities are treated as literal text in code spans and code blocks:
5074
+ Entity and numeric character references are treated as literal
5075
+ text in code spans and code blocks, and in raw HTML:
5021
5076
 
5022
5077
  .
5023
5078
  `f&ouml;&ouml;`
@@ -5032,6 +5087,12 @@ Entities are treated as literal text in code spans and code blocks:
5032
5087
  </code></pre>
5033
5088
  .
5034
5089
 
5090
+ .
5091
+ <a href="f&ouml;f&ouml;"/>
5092
+ .
5093
+ <a href="f&ouml;f&ouml;"/>
5094
+ .
5095
+
5035
5096
  ## Code spans
5036
5097
 
5037
5098
  A [backtick string](@backtick-string)
@@ -5304,7 +5365,7 @@ The following rules define emphasis and strong emphasis:
5304
5365
  2. A single `_` character [can open emphasis] iff
5305
5366
  it is part of a [left-flanking delimiter run]
5306
5367
  and either (a) not part of a [right-flanking delimiter run]
5307
- or (b) part of a [right-flanking delimeter run]
5368
+ or (b) part of a [right-flanking delimiter run]
5308
5369
  preceded by punctuation.
5309
5370
 
5310
5371
  3. A single `*` character [can close emphasis](@can-close-emphasis)
@@ -5313,7 +5374,7 @@ The following rules define emphasis and strong emphasis:
5313
5374
  4. A single `_` character [can close emphasis] iff
5314
5375
  it is part of a [right-flanking delimiter run]
5315
5376
  and either (a) not part of a [left-flanking delimiter run]
5316
- or (b) part of a [left-flanking delimeter run]
5377
+ or (b) part of a [left-flanking delimiter run]
5317
5378
  followed by punctuation.
5318
5379
 
5319
5380
  5. A double `**` [can open strong emphasis](@can-open-strong-emphasis)
@@ -5322,7 +5383,7 @@ The following rules define emphasis and strong emphasis:
5322
5383
  6. A double `__` [can open strong emphasis] iff
5323
5384
  it is part of a [left-flanking delimiter run]
5324
5385
  and either (a) not part of a [right-flanking delimiter run]
5325
- or (b) part of a [right-flanking delimeter run]
5386
+ or (b) part of a [right-flanking delimiter run]
5326
5387
  preceded by punctuation.
5327
5388
 
5328
5389
  7. A double `**` [can close strong emphasis](@can-close-strong-emphasis)
@@ -5331,7 +5392,7 @@ The following rules define emphasis and strong emphasis:
5331
5392
  8. A double `__` [can close strong emphasis]
5332
5393
  it is part of a [right-flanking delimiter run]
5333
5394
  and either (a) not part of a [left-flanking delimiter run]
5334
- or (b) part of a [left-flanking delimeter run]
5395
+ or (b) part of a [left-flanking delimiter run]
5335
5396
  followed by punctuation.
5336
5397
 
5337
5398
  9. Emphasis begins with a delimiter that [can open emphasis] and ends
@@ -6555,11 +6616,11 @@ A link can contain fragment identifiers and queries:
6555
6616
 
6556
6617
  [link](http://example.com#fragment)
6557
6618
 
6558
- [link](http://example.com?foo=bar&baz#fragment)
6619
+ [link](http://example.com?foo=3#frag)
6559
6620
  .
6560
6621
  <p><a href="#fragment">link</a></p>
6561
6622
  <p><a href="http://example.com#fragment">link</a></p>
6562
- <p><a href="http://example.com?foo=bar&amp;baz#fragment">link</a></p>
6623
+ <p><a href="http://example.com?foo=3#frag">link</a></p>
6563
6624
  .
6564
6625
 
6565
6626
  Note that a backslash before a non-escapable character is
@@ -6572,9 +6633,13 @@ just a backslash:
6572
6633
  .
6573
6634
 
6574
6635
  URL-escaping should be left alone inside the destination, as all
6575
- URL-escaped characters are also valid URL characters. HTML entities in
6576
- the destination will be parsed into the corresponding Unicode
6577
- code points, as usual, and optionally URL-escaped when written as HTML.
6636
+ URL-escaped characters are also valid URL characters. Entity and
6637
+ numerical character references in the destination will be parsed
6638
+ into the corresponding Unicode code points, as usual. These may
6639
+ be optionally URL-escaped when written as HTML, but this spec
6640
+ does not enforce any particular policy for rendering URLs in
6641
+ HTML or other formats. Renderers may make different decisions
6642
+ about how to escape or normalize URLs in the output.
6578
6643
 
6579
6644
  .
6580
6645
  [link](foo%20b&auml;)
@@ -6604,7 +6669,8 @@ Titles may be in single quotes, double quotes, or parentheses:
6604
6669
  <a href="/url" title="title">link</a></p>
6605
6670
  .
6606
6671
 
6607
- Backslash escapes and entities may be used in titles:
6672
+ Backslash escapes and entity and numeric character references
6673
+ may be used in titles:
6608
6674
 
6609
6675
  .
6610
6676
  [link](/url "title \"&quot;")
@@ -6632,15 +6698,16 @@ But it is easy to work around this by using a different quote type:
6632
6698
  title, and its test suite included a test demonstrating this.
6633
6699
  But it is hard to see a good rationale for the extra complexity this
6634
6700
  brings, since there are already many ways---backslash escaping,
6635
- entities, or using a different quote type for the enclosing title---to
6636
- write titles containing double quotes. `Markdown.pl`'s handling of
6637
- titles has a number of other strange features. For example, it allows
6638
- single-quoted titles in inline links, but not reference links. And, in
6639
- reference links but not inline links, it allows a title to begin with
6640
- `"` and end with `)`. `Markdown.pl` 1.0.1 even allows titles with no closing
6641
- quotation mark, though 1.0.2b8 does not. It seems preferable to adopt
6642
- a simple, rational rule that works the same way in inline links and
6643
- link reference definitions.)
6701
+ entity and numeric character references, or using a different
6702
+ quote type for the enclosing title---to write titles containing
6703
+ double quotes. `Markdown.pl`'s handling of titles has a number
6704
+ of other strange features. For example, it allows single-quoted
6705
+ titles in inline links, but not reference links. And, in
6706
+ reference links but not inline links, it allows a title to begin
6707
+ with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows
6708
+ titles with no closing quotation mark, though 1.0.2b8 does not.
6709
+ It seems preferable to adopt a simple, rational rule that works
6710
+ the same way in inline links and link reference definitions.)
6644
6711
 
6645
6712
  [Whitespace] is allowed around the destination and title:
6646
6713
 
@@ -6771,7 +6838,7 @@ There are three kinds of [reference link](@reference-link)s:
6771
6838
  and [shortcut](#shortcut-reference-link).
6772
6839
 
6773
6840
  A [full reference link](@full-reference-link)
6774
- consists of a [link text], optional [whitespace], and a [link label]
6841
+ consists of a [link text] immediately followed by a [link label]
6775
6842
  that [matches] a [link reference definition] elsewhere in the document.
6776
6843
 
6777
6844
  A [link label](@link-label) begins with a left bracket (`[`) and ends
@@ -6941,14 +7008,15 @@ purposes of determining matching:
6941
7008
  <p><a href="/url">Baz</a></p>
6942
7009
  .
6943
7010
 
6944
- There can be [whitespace] between the [link text] and the [link label]:
7011
+ No [whitespace] is allowed between the [link text] and the
7012
+ [link label]:
6945
7013
 
6946
7014
  .
6947
7015
  [foo] [bar]
6948
7016
 
6949
7017
  [bar]: /url "title"
6950
7018
  .
6951
- <p><a href="/url" title="title">foo</a></p>
7019
+ <p>[foo] <a href="/url" title="title">bar</a></p>
6952
7020
  .
6953
7021
 
6954
7022
  .
@@ -6957,9 +7025,37 @@ There can be [whitespace] between the [link text] and the [link label]:
6957
7025
 
6958
7026
  [bar]: /url "title"
6959
7027
  .
6960
- <p><a href="/url" title="title">foo</a></p>
7028
+ <p>[foo]
7029
+ <a href="/url" title="title">bar</a></p>
6961
7030
  .
6962
7031
 
7032
+ This is a departure from John Gruber's original Markdown syntax
7033
+ description, which explicitly allows whitespace between the link
7034
+ text and the link label. It brings reference links in line with
7035
+ [inline link]s, which (according to both original Markdown and
7036
+ this spec) cannot have whitespace after the link text. More
7037
+ importantly, it prevents inadvertent capture of consecutive
7038
+ [shortcut reference link]s. If whitespace is allowed between the
7039
+ link text and the link label, then in the following we will have
7040
+ a single reference link, not two shortcut reference links, as
7041
+ intended:
7042
+
7043
+ ``` markdown
7044
+ [foo]
7045
+ [bar]
7046
+
7047
+ [foo]: /url1
7048
+ [bar]: /url2
7049
+ ```
7050
+
7051
+ (Note that [shortcut reference link]s were introduced by Gruber
7052
+ himself in a beta version of `Markdown.pl`, but never included
7053
+ in the official syntax description. Without shortcut reference
7054
+ links, it is harmless to allow space between the link text and
7055
+ link label; but once shortcut references are introduced, it is
7056
+ too dangerous to allow this, as it frequently leads to
7057
+ unintended results.)
7058
+
6963
7059
  When there are multiple matching [link reference definition]s,
6964
7060
  the first is used:
6965
7061
 
@@ -7023,6 +7119,16 @@ backslash-escaped:
7023
7119
  <p><a href="/uri">foo</a></p>
7024
7120
  .
7025
7121
 
7122
+ Note that in this example `]` is not backslash-escaped:
7123
+
7124
+ .
7125
+ [bar\\]: /uri
7126
+
7127
+ [bar\\]
7128
+ .
7129
+ <p><a href="/uri">bar\</a></p>
7130
+ .
7131
+
7026
7132
  A [link label] must contain at least one [non-whitespace character]:
7027
7133
 
7028
7134
  .
@@ -7050,7 +7156,7 @@ A [link label] must contain at least one [non-whitespace character]:
7050
7156
  A [collapsed reference link](@collapsed-reference-link)
7051
7157
  consists of a [link label] that [matches] a
7052
7158
  [link reference definition] elsewhere in the
7053
- document, optional [whitespace], and the string `[]`.
7159
+ document, followed by the string `[]`.
7054
7160
  The contents of the first link label are parsed as inlines,
7055
7161
  which are used as the link's text. The link's URI and title are
7056
7162
  provided by the matching reference link definition. Thus,
@@ -7083,8 +7189,8 @@ The link labels are case-insensitive:
7083
7189
  .
7084
7190
 
7085
7191
 
7086
- As with full reference links, [whitespace] is allowed
7087
- between the two sets of brackets:
7192
+ As with full reference links, [whitespace] is not
7193
+ allowed between the two sets of brackets:
7088
7194
 
7089
7195
  .
7090
7196
  [foo]
@@ -7092,7 +7198,8 @@ between the two sets of brackets:
7092
7198
 
7093
7199
  [foo]: /url "title"
7094
7200
  .
7095
- <p><a href="/url" title="title">foo</a></p>
7201
+ <p><a href="/url" title="title">foo</a>
7202
+ []</p>
7096
7203
  .
7097
7204
 
7098
7205
  A [shortcut reference link](@shortcut-reference-link)
@@ -7313,7 +7420,7 @@ My ![foo bar](/path/to/train.jpg "title" )
7313
7420
  Reference-style:
7314
7421
 
7315
7422
  .
7316
- ![foo] [bar]
7423
+ ![foo][bar]
7317
7424
 
7318
7425
  [bar]: /url
7319
7426
  .
@@ -7321,7 +7428,7 @@ Reference-style:
7321
7428
  .
7322
7429
 
7323
7430
  .
7324
- ![foo] [bar]
7431
+ ![foo][bar]
7325
7432
 
7326
7433
  [BAR]: /url
7327
7434
  .
@@ -7356,7 +7463,7 @@ The labels are case-insensitive:
7356
7463
  <p><img src="/url" alt="Foo" title="title" /></p>
7357
7464
  .
7358
7465
 
7359
- As with full reference links, [whitespace] is allowed
7466
+ As with reference links, [whitespace] is not allowed
7360
7467
  between the two sets of brackets:
7361
7468
 
7362
7469
  .
@@ -7365,7 +7472,8 @@ between the two sets of brackets:
7365
7472
 
7366
7473
  [foo]: /url "title"
7367
7474
  .
7368
- <p><img src="/url" alt="foo" title="title" /></p>
7475
+ <p><img src="/url" alt="foo" title="title" />
7476
+ []</p>
7369
7477
  .
7370
7478
 
7371
7479
  Shortcut:
@@ -7639,7 +7747,7 @@ consists of `"`, zero or more
7639
7747
  characters not including `"`, and a final `"`.
7640
7748
 
7641
7749
  An [open tag](@open-tag) consists of a `<` character, a [tag name],
7642
- zero or more [attributes](@attribute], optional [whitespace], an optional `/`
7750
+ zero or more [attribute]s, optional [whitespace], an optional `/`
7643
7751
  character, and a `>` character.
7644
7752
 
7645
7753
  A [closing tag](@closing-tag) consists of the string `</`, a
@@ -7707,16 +7815,9 @@ _boolean zoop:33=zoop:33 /></p>
7707
7815
  Custom tag names can be used:
7708
7816
 
7709
7817
  .
7710
- <responsive-image src="foo.jpg" />
7711
-
7712
- <My-Tag>
7713
- foo
7714
- </My-Tag>
7818
+ Foo <responsive-image src="foo.jpg" />
7715
7819
  .
7716
- <responsive-image src="foo.jpg" />
7717
- <My-Tag>
7718
- foo
7719
- </My-Tag>
7820
+ <p>Foo <responsive-image src="foo.jpg" /></p>
7720
7821
  .
7721
7822
 
7722
7823
  Illegal tag names, not parsed as HTML:
@@ -7764,11 +7865,9 @@ Missing [whitespace]:
7764
7865
  Closing tags:
7765
7866
 
7766
7867
  .
7767
- </a>
7768
- </foo >
7868
+ </a></foo >
7769
7869
  .
7770
- </a>
7771
- </foo >
7870
+ <p></a></foo ></p>
7772
7871
  .
7773
7872
 
7774
7873
  Illegal attributes in closing tag:
@@ -7830,20 +7929,21 @@ foo <![CDATA[>&<]]>
7830
7929
  <p>foo <![CDATA[>&<]]></p>
7831
7930
  .
7832
7931
 
7833
- Entities are preserved in HTML attributes:
7932
+ Entity and numeric character references are preserved in HTML
7933
+ attributes:
7834
7934
 
7835
7935
  .
7836
- <a href="&ouml;">
7936
+ foo <a href="&ouml;">
7837
7937
  .
7838
- <a href="&ouml;">
7938
+ <p>foo <a href="&ouml;"></p>
7839
7939
  .
7840
7940
 
7841
7941
  Backslash escapes do not work in HTML attributes:
7842
7942
 
7843
7943
  .
7844
- <a href="\*">
7944
+ foo <a href="\*">
7845
7945
  .
7846
- <a href="\*">
7946
+ <p>foo <a href="\*"></p>
7847
7947
  .
7848
7948
 
7849
7949
  .
@@ -8062,7 +8162,7 @@ list items, and so on---is constructed. Text is assigned to these
8062
8162
  blocks but not parsed. Link reference definitions are parsed and a
8063
8163
  map of links is constructed.
8064
8164
 
8065
- 2. In the second phase, the raw text contents of paragraphs and headers
8165
+ 2. In the second phase, the raw text contents of paragraphs and headings
8066
8166
  are parsed into sequences of Markdown inline elements (strings,
8067
8167
  code spans, links, emphasis, and so on), using the map of link
8068
8168
  references constructed in phase 1.
@@ -8125,10 +8225,10 @@ matched block.
8125
8225
  3. Finally, we look at the remainder of the line (after block
8126
8226
  markers like `>`, list markers, and indentation have been consumed).
8127
8227
  This is text that can be incorporated into the last open
8128
- block (a paragraph, code block, header, or raw HTML).
8228
+ block (a paragraph, code block, heading, or raw HTML).
8129
8229
 
8130
- Setext headers are formed when we detect that the second line of
8131
- a paragraph is a setext header line.
8230
+ Setext headings are formed when we detect that the second line of
8231
+ a paragraph is a setext heading line.
8132
8232
 
8133
8233
  Reference link definitions are detected when a paragraph is closed;
8134
8234
  the accumulated text lines are parsed to see if they begin with
@@ -8237,7 +8337,7 @@ We thus obtain the final tree:
8237
8337
  Once all of the input has been parsed, all open blocks are closed.
8238
8338
 
8239
8339
  We then "walk the tree," visiting every node, and parse raw
8240
- string contents of paragraphs and headers as inlines. At this
8340
+ string contents of paragraphs and headings as inlines. At this
8241
8341
  point we have seen all the link reference definitions, so we can
8242
8342
  resolve reference links as we go.
8243
8343