commonmarker 0.16.8 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of commonmarker might be problematic. Click here for more details.

Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/commonmarker/cmark/CMakeLists.txt +3 -3
  4. data/ext/commonmarker/cmark/README.md +2 -2
  5. data/ext/commonmarker/cmark/api_test/CMakeLists.txt +2 -1
  6. data/ext/commonmarker/cmark/api_test/harness.c +27 -0
  7. data/ext/commonmarker/cmark/api_test/main.c +179 -3
  8. data/ext/commonmarker/cmark/changelog.txt +148 -0
  9. data/ext/commonmarker/cmark/extensions/autolink.c +8 -0
  10. data/ext/commonmarker/cmark/extensions/core-extensions.c +11 -1
  11. data/ext/commonmarker/cmark/extensions/core-extensions.h +1 -1
  12. data/ext/commonmarker/cmark/extensions/strikethrough.c +5 -0
  13. data/ext/commonmarker/cmark/extensions/table.c +44 -23
  14. data/ext/commonmarker/cmark/src/blocks.c +3 -2
  15. data/ext/commonmarker/cmark/src/cmark_extension_api.h +9 -0
  16. data/ext/commonmarker/cmark/src/inlines.c +208 -93
  17. data/ext/commonmarker/cmark/src/inlines.h +2 -2
  18. data/ext/commonmarker/cmark/src/iterator.c +1 -0
  19. data/ext/commonmarker/cmark/src/latex.c +11 -11
  20. data/ext/commonmarker/cmark/src/main.c +12 -11
  21. data/ext/commonmarker/cmark/src/node.h +1 -0
  22. data/ext/commonmarker/cmark/src/scanners.c +34 -24
  23. data/ext/commonmarker/cmark/src/scanners.re +1 -1
  24. data/ext/commonmarker/cmark/src/syntax_extension.c +5 -0
  25. data/ext/commonmarker/cmark/src/syntax_extension.h +1 -0
  26. data/ext/commonmarker/cmark/test/CMakeLists.txt +3 -2
  27. data/ext/commonmarker/cmark/test/cmark.py +2 -5
  28. data/ext/commonmarker/cmark/test/regression.txt +35 -1
  29. data/ext/commonmarker/cmark/test/smart_punct.txt +9 -0
  30. data/ext/commonmarker/cmark/test/spec.txt +88 -26
  31. data/ext/commonmarker/commonmarker.c +1 -1
  32. data/ext/commonmarker/extconf.rb +1 -1
  33. data/lib/commonmarker/version.rb +1 -1
  34. data/test/test_attributes.rb +1 -80
  35. metadata +2 -2
@@ -19,8 +19,8 @@ void cmark_parse_inlines(cmark_parser *parser,
19
19
  bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
20
20
  cmark_reference_map *refmap);
21
21
 
22
- void cmark_inlines_add_special_character(unsigned char c);
23
- void cmark_inlines_remove_special_character(unsigned char c);
22
+ void cmark_inlines_add_special_character(unsigned char c, bool emphasis);
23
+ void cmark_inlines_remove_special_character(unsigned char c, bool emphasis);
24
24
 
25
25
  #ifdef __cplusplus
26
26
  }
@@ -111,6 +111,7 @@ void cmark_consolidate_text_nodes(cmark_node *root) {
111
111
  while (tmp && tmp->type == CMARK_NODE_TEXT) {
112
112
  cmark_iter_next(iter); // advance pointer
113
113
  cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
114
+ cur->end_column = tmp->end_column;
114
115
  next = tmp->next;
115
116
  cmark_node_free(tmp);
116
117
  tmp = next;
@@ -256,24 +256,24 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
256
256
  CR();
257
257
  list_number = cmark_node_get_list_start(node);
258
258
  if (list_number > 1) {
259
- enumlevel = S_get_enumlevel(node);
260
- // latex normally supports only five levels
261
- if (enumlevel >= 1 && enumlevel <= 5) {
259
+ enumlevel = S_get_enumlevel(node);
260
+ // latex normally supports only five levels
261
+ if (enumlevel >= 1 && enumlevel <= 5) {
262
262
  snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",
263
263
  list_number);
264
264
  LIT("\\setcounter{enum");
265
- switch(enumlevel) {
266
- case 1: LIT("i"); break;
267
- case 2: LIT("ii"); break;
268
- case 3: LIT("iii"); break;
269
- case 4: LIT("iv"); break;
270
- case 5: LIT("v"); break;
271
- default: LIT("i"); break;
265
+ switch (enumlevel) {
266
+ case 1: LIT("i"); break;
267
+ case 2: LIT("ii"); break;
268
+ case 3: LIT("iii"); break;
269
+ case 4: LIT("iv"); break;
270
+ case 5: LIT("v"); break;
271
+ default: LIT("i"); break;
272
272
  }
273
273
  LIT("}{");
274
274
  OUT(list_number_string, false, NORMAL);
275
275
  LIT("}");
276
- }
276
+ }
277
277
  CR();
278
278
  }
279
279
  } else {
@@ -31,19 +31,20 @@ typedef enum {
31
31
  void print_usage() {
32
32
  printf("Usage: cmark-gfm [FILE*]\n");
33
33
  printf("Options:\n");
34
- printf(" --to, -t FORMAT Specify output format (html, xml, man, "
34
+ printf(" --to, -t FORMAT Specify output format (html, xml, man, "
35
35
  "commonmark, plaintext, latex)\n");
36
- printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n");
37
- printf(" --sourcepos Include source position attribute\n");
38
- printf(" --hardbreaks Treat newlines as hard line breaks\n");
39
- printf(" --nobreaks Render soft line breaks as spaces\n");
40
- printf(" --safe Suppress raw HTML and dangerous URLs\n");
41
- printf(" --smart Use smart punctuation\n");
36
+ printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n");
37
+ printf(" --sourcepos Include source position attribute\n");
38
+ printf(" --hardbreaks Treat newlines as hard line breaks\n");
39
+ printf(" --nobreaks Render soft line breaks as spaces\n");
40
+ printf(" --safe Suppress raw HTML and dangerous URLs\n");
41
+ printf(" --smart Use smart punctuation\n");
42
+ printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
42
43
  printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n");
43
44
  printf(" --extension, -e EXTENSION_NAME Specify an extension name to use\n");
44
45
  printf(" --list-extensions List available extensions and quit\n");
45
- printf(" --help, -h Print usage information\n");
46
- printf(" --version Print version\n");
46
+ printf(" --help, -h Print usage information\n");
47
+ printf(" --version Print version\n");
47
48
  }
48
49
 
49
50
  static bool print_document(cmark_node *document, writer_format writer,
@@ -110,7 +111,7 @@ int main(int argc, char *argv[]) {
110
111
  int options = CMARK_OPT_DEFAULT;
111
112
  int res = 1;
112
113
 
113
- cmark_register_plugin(core_extensions_registration);
114
+ core_extensions_ensure_registered();
114
115
 
115
116
  #if defined(_WIN32) && !defined(__CYGWIN__)
116
117
  _setmode(_fileno(stdin), _O_BINARY);
@@ -256,7 +257,7 @@ failure:
256
257
 
257
258
  #if DEBUG
258
259
  if (parser)
259
- cmark_parser_free(parser);
260
+ cmark_parser_free(parser);
260
261
 
261
262
  if (document)
262
263
  cmark_node_free(document);
@@ -68,6 +68,7 @@ struct cmark_node {
68
68
  int start_column;
69
69
  int end_line;
70
70
  int end_column;
71
+ int internal_offset;
71
72
  uint16_t type;
72
73
  uint16_t flags;
73
74
 
@@ -752,7 +752,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {
752
752
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
753
753
  0, 0, 0, 0, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128,
754
754
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
755
- 128, 128, 128, 128, 128, 128, 0, 128, 128, 128, 128, 128, 128, 128,
755
+ 128, 128, 128, 128, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128,
756
756
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
757
757
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
758
758
  128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
@@ -839,7 +839,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {
839
839
  }
840
840
  if (yych <= 0xEC) {
841
841
  if (yych <= 0xC1) {
842
- if (yych <= ' ')
842
+ if (yych <= '<')
843
843
  goto yy45;
844
844
  if (yych <= '>')
845
845
  goto yy85;
@@ -7887,35 +7887,45 @@ bufsize_t _scan_html_tag(const unsigned char *p) {
7887
7887
  unsigned char yych;
7888
7888
  static const unsigned char yybm[] = {
7889
7889
  /* table 1 .. 8: 0 */
7890
- 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 239,
7890
+ 0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238,
7891
7891
  239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
7892
- 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, 239,
7893
- 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
7894
- 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, 255,
7892
+ 239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239,
7893
+ 239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
7894
+ 239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255,
7895
7895
  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
7896
- 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, 239,
7896
+ 255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239,
7897
7897
  239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
7898
7898
  239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,
7899
- 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7900
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7901
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7902
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7903
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7904
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7899
+ 239, 239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7900
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7901
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7902
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7903
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7904
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7905
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7906
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7907
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7908
+ 0, 0, 0, 0,
7905
7909
  /* table 9 .. 11: 256 */
7906
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0,
7907
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7908
- 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 128, 0,
7909
- 0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
7910
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64,
7911
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7912
+ 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7913
+ 0, 0, 0, 160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160,
7914
+ 160, 160, 128, 0, 0, 0, 0, 0, 0, 160, 160, 160, 160, 160,
7910
7915
  160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
7911
- 160, 0, 0, 0, 0, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160,
7916
+ 160, 160, 160, 160, 160, 160, 160, 0, 0, 0, 0, 128, 0, 160,
7912
7917
  160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160,
7913
- 160, 160, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7914
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7915
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7916
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7917
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7918
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7918
+ 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0, 0, 0,
7919
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7920
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7921
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7922
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7923
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7924
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7925
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7926
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7927
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
7928
+ 0, 0, 0, 0,
7919
7929
  };
7920
7930
  yych = *p;
7921
7931
  if (yych <= '>') {
@@ -91,7 +91,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p)
91
91
  const unsigned char *marker = NULL;
92
92
  const unsigned char *start = p;
93
93
  /*!re2c
94
- scheme [:][^\x00-\x20>]*[>] { return (bufsize_t)(p - start); }
94
+ scheme [:][^\x00-\x20<>]*[>] { return (bufsize_t)(p - start); }
95
95
  * { return 0; }
96
96
  */
97
97
  }
@@ -36,6 +36,11 @@ cmark_node_type cmark_syntax_extension_add_node(int is_inline) {
36
36
  return *ref = (cmark_node_type) ((int) *ref + 1);
37
37
  }
38
38
 
39
+ void cmark_syntax_extension_set_emphasis(cmark_syntax_extension *extension,
40
+ bool emphasis) {
41
+ extension->emphasis = emphasis;
42
+ }
43
+
39
44
  void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension,
40
45
  cmark_open_block_func func) {
41
46
  extension->try_opening_block = func;
@@ -12,6 +12,7 @@ struct cmark_syntax_extension {
12
12
  cmark_llist * special_inline_chars;
13
13
  char * name;
14
14
  void * priv;
15
+ bool emphasis;
15
16
  cmark_free_func free_function;
16
17
  cmark_get_type_string_func get_type_string_func;
17
18
  cmark_can_contain_func can_contain_func;
@@ -14,8 +14,9 @@ if (CMARK_SHARED)
14
14
  endif()
15
15
 
16
16
  if (WIN32)
17
- file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR)
18
- set(NEWPATH "${WIN_DLL_DIR};$ENV{PATH}")
17
+ file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_SRC_DLL_DIR)
18
+ file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/extensions WIN_EXTENSIONS_DLL_DIR)
19
+ set(NEWPATH "${WIN_SRC_DLL_DIR};${WIN_EXTENSIONS_DLL_DIR};$ENV{PATH}")
19
20
  string(REPLACE ";" "\\;" NEWPATH "${NEWPATH}")
20
21
  set_tests_properties(api_test PROPERTIES ENVIRONMENT "PATH=${NEWPATH}")
21
22
  set(ROUNDTRIP "${CMAKE_CURRENT_SOURCE_DIR}/roundtrip.bat")
@@ -12,10 +12,7 @@ def pipe_through_prog(prog, text):
12
12
  return [p1.returncode, result.decode('utf-8'), err]
13
13
 
14
14
  def parse(lib, extlib, text, extensions):
15
- register_plugin = lib.cmark_register_plugin
16
- register_plugin.argtypes = [c_void_p]
17
-
18
- core_extensions_registration = extlib.core_extensions_registration
15
+ core_extensions_ensure_registered = extlib.core_extensions_ensure_registered
19
16
 
20
17
  find_syntax_extension = lib.cmark_find_syntax_extension
21
18
  find_syntax_extension.restype = c_void_p
@@ -35,7 +32,7 @@ def parse(lib, extlib, text, extensions):
35
32
  parser_finish.restype = c_void_p
36
33
  parser_finish.argtypes = [c_void_p]
37
34
 
38
- register_plugin(core_extensions_registration)
35
+ core_extensions_ensure_registered()
39
36
 
40
37
  parser = parser_new(0)
41
38
  for e in set(extensions):
@@ -81,7 +81,7 @@ Issue #193 - unescaped left angle brackets in link destination
81
81
 
82
82
  [a]: <te<st>
83
83
  .
84
- <p><a href="te%3Cst">a</a></p>
84
+ <p><a href="%3Cte%3Cst%3E">a</a></p>
85
85
  ````````````````````````````````
86
86
 
87
87
  Issue #192 - escaped spaces in link destination
@@ -92,3 +92,37 @@ Issue #192 - escaped spaces in link destination
92
92
  .
93
93
  <p>[a](te\ st)</p>
94
94
  ````````````````````````````````
95
+
96
+ Issue github/github#76615: multiple delimiter combinations gets sketchy
97
+
98
+
99
+ ```````````````````````````````` example strikethrough
100
+ ~~**_`this`_**~~
101
+ ~~***`this`***~~
102
+ ~~___`this`___~~
103
+
104
+ **_`this`_**
105
+ ***`this`***
106
+ ___`this`___
107
+
108
+ ~~**_this_**~~
109
+ ~~***this***~~
110
+ ~~___this___~~
111
+
112
+ **_this_**
113
+ ***this***
114
+ ___this___
115
+ .
116
+ <p><del><strong><em><code>this</code></em></strong></del><br />
117
+ <del><em><strong><code>this</code></strong></em></del><br />
118
+ <del><em><strong><code>this</code></strong></em></del></p>
119
+ <p><strong><em><code>this</code></em></strong><br />
120
+ <em><strong><code>this</code></strong></em><br />
121
+ <em><strong><code>this</code></strong></em></p>
122
+ <p><del><strong><em>this</em></strong></del><br />
123
+ <del><em><strong>this</strong></em></del><br />
124
+ <del><em><strong>this</strong></em></del></p>
125
+ <p><strong><em>this</em></strong><br />
126
+ <em><strong>this</strong></em><br />
127
+ <em><strong>this</strong></em></p>
128
+ ````````````````````````````````
@@ -78,6 +78,15 @@ left double quote, to facilitate this style:
78
78
  <p>“Second paragraph by same speaker, in fiction.”</p>
79
79
  ````````````````````````````````
80
80
 
81
+ A quote following a `]` or `)` character cannot
82
+ be an open quote:
83
+
84
+ ```````````````````````````````` example
85
+ [a]'s b'
86
+ .
87
+ <p>[a]’s b’</p>
88
+ ````````````````````````````````
89
+
81
90
  Quotes that are escaped come out as literal straight
82
91
  quotes:
83
92
 
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  title: GitHub Flavored Markdown Spec
3
- version: 0.27
4
- date: '2017-2-20'
3
+ version: 0.28
4
+ date: '2017-08-01'
5
5
  license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
6
6
  ...
7
7
 
@@ -27,10 +27,12 @@ GFM is converted to HTML to ensure security and consistency of the website.
27
27
  ## What is Markdown?
28
28
 
29
29
  Markdown is a plain text format for writing structured documents,
30
- based on conventions used for indicating formatting in email and
31
- usenet posts. It was developed in 2004 by John Gruber, who wrote
32
- the first Markdown-to-HTML converter in Perl, and it soon became
33
- ubiquitous. In the next decade, dozens of implementations were
30
+ based on conventions for indicating formatting in email
31
+ and usenet posts. It was developed by John Gruber (with
32
+ help from Aaron Swartz) and released in 2004 in the form of a
33
+ [syntax description](http://daringfireball.net/projects/markdown/syntax)
34
+ and a Perl script (`Markdown.pl`) for converting Markdown to
35
+ HTML. In the next decade, dozens of implementations were
34
36
  developed in many languages. Some extended the original
35
37
  Markdown syntax with conventions for footnotes, tables, and
36
38
  other document elements. Some allowed Markdown documents to be
@@ -328,7 +330,7 @@ form feed (`U+000C`), or carriage return (`U+000D`).
328
330
  characters].
329
331
 
330
332
  A [Unicode whitespace character](@) is
331
- any code point in the Unicode `Zs` class, or a tab (`U+0009`),
333
+ any code point in the Unicode `Zs` general category, or a tab (`U+0009`),
332
334
  carriage return (`U+000D`), newline (`U+000A`), or form feed
333
335
  (`U+000C`).
334
336
 
@@ -347,7 +349,7 @@ is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`,
347
349
 
348
350
  A [punctuation character](@) is an [ASCII
349
351
  punctuation character] or anything in
350
- the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
352
+ the general Unicode categories `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`.
351
353
 
352
354
  ## Tabs
353
355
 
@@ -418,7 +420,7 @@ as indentation with four spaces would:
418
420
  Normally the `>` that begins a block quote may be followed
419
421
  optionally by a space, which is not considered part of the
420
422
  content. In the following case `>` is followed by a tab,
421
- which is treated as if it were expanded into spaces.
423
+ which is treated as if it were expanded into three spaces.
422
424
  Since one of these spaces is considered part of the
423
425
  delimiter, `foo` is considered to be indented six spaces
424
426
  inside the block quote context, so we get an indented
@@ -497,7 +499,7 @@ We can think of a document as a sequence of
497
499
  quotations, lists, headings, rules, and code blocks. Some blocks (like
498
500
  block quotes and list items) contain other blocks; others (like
499
501
  headings and paragraphs) contain [inline](@) content---text,
500
- links, emphasized text, images, code, and so on.
502
+ links, emphasized text, images, code spans, and so on.
501
503
 
502
504
  ## Precedence
503
505
 
@@ -1659,6 +1661,15 @@ With tildes:
1659
1661
  </code></pre>
1660
1662
  ````````````````````````````````
1661
1663
 
1664
+ Fewer than three backticks is not enough:
1665
+
1666
+ ```````````````````````````````` example
1667
+ ``
1668
+ foo
1669
+ ``
1670
+ .
1671
+ <p><code>foo</code></p>
1672
+ ````````````````````````````````
1662
1673
 
1663
1674
  The closing code fence must use the same character as the opening
1664
1675
  fence:
@@ -2047,6 +2058,37 @@ or [closing tag] (with any [tag name] other than `script`,
2047
2058
  or the end of the line.\
2048
2059
  **End condition:** line is followed by a [blank line].
2049
2060
 
2061
+ HTML blocks continue until they are closed by their appropriate
2062
+ [end condition], or the last line of the document or other [container block].
2063
+ This means any HTML **within an HTML block** that might otherwise be recognised
2064
+ as a start condition will be ignored by the parser and passed through as-is,
2065
+ without changing the parser's state.
2066
+
2067
+ For instance, `<pre>` within a HTML block started by `<table>` will not affect
2068
+ the parser state; as the HTML block was started in by start condition 6, it
2069
+ will end at any blank line. This can be surprising:
2070
+
2071
+ ```````````````````````````````` example
2072
+ <table><tr><td>
2073
+ <pre>
2074
+ **Hello**,
2075
+
2076
+ _world_.
2077
+ </pre>
2078
+ </td></tr></table>
2079
+ .
2080
+ <table><tr><td>
2081
+ <pre>
2082
+ **Hello**,
2083
+ <p><em>world</em>.
2084
+ </pre></p>
2085
+ </td></tr></table>
2086
+ ````````````````````````````````
2087
+
2088
+ In this case, the HTML block is terminated by the newline — the `**hello**`
2089
+ text remains verbatim — and regular parsing resumes, with a paragraph,
2090
+ emphasised `world` and inline and block HTML following.
2091
+
2050
2092
  All types of [HTML blocks] except type 7 may interrupt
2051
2093
  a paragraph. Blocks of type 7 may not interrupt a paragraph.
2052
2094
  (This restriction is intended to prevent unwanted interpretation
@@ -3833,11 +3875,15 @@ The following rules define [list items]:
3833
3875
  If the list item is ordered, then it is also assigned a start
3834
3876
  number, based on the ordered list marker.
3835
3877
 
3836
- Exceptions: When the first list item in a [list] interrupts
3878
+ Exceptions:
3879
+
3880
+ 1. When the first list item in a [list] interrupts
3837
3881
  a paragraph---that is, when it starts on a line that would
3838
3882
  otherwise count as [paragraph continuation text]---then (a)
3839
3883
  the lines *Ls* must not begin with a blank line, and (b) if
3840
3884
  the list item is ordered, the start number must be 1.
3885
+ 2. If any line is a [thematic break][thematic breaks] then
3886
+ that line is not a list item.
3841
3887
 
3842
3888
  For example, let *Ls* be the lines
3843
3889
 
@@ -6049,6 +6095,15 @@ we just have literal backticks:
6049
6095
  <p>`foo</p>
6050
6096
  ````````````````````````````````
6051
6097
 
6098
+ The following case also illustrates the need for opening and
6099
+ closing backtick strings to be equal in length:
6100
+
6101
+ ```````````````````````````````` example
6102
+ `foo``bar``
6103
+ .
6104
+ <p>`foo<code>bar</code></p>
6105
+ ````````````````````````````````
6106
+
6052
6107
 
6053
6108
  ## Emphasis and strong emphasis
6054
6109
 
@@ -6098,19 +6153,20 @@ for efficient parsing strategies that do not backtrack.
6098
6153
 
6099
6154
  First, some definitions. A [delimiter run](@) is either
6100
6155
  a sequence of one or more `*` characters that is not preceded or
6101
- followed by a `*` character, or a sequence of one or more `_`
6102
- characters that is not preceded or followed by a `_` character.
6156
+ followed by a non-backslash-escaped `*` character, or a sequence
6157
+ of one or more `_` characters that is not preceded or followed by
6158
+ a non-backslash-escaped `_` character.
6103
6159
 
6104
6160
  A [left-flanking delimiter run](@) is
6105
6161
  a [delimiter run] that is (a) not followed by [Unicode whitespace],
6106
- and (b) either not followed by a [punctuation character], or
6162
+ and (b) not followed by a [punctuation character], or
6107
6163
  preceded by [Unicode whitespace] or a [punctuation character].
6108
6164
  For purposes of this definition, the beginning and the end of
6109
6165
  the line count as Unicode whitespace.
6110
6166
 
6111
6167
  A [right-flanking delimiter run](@) is
6112
6168
  a [delimiter run] that is (a) not preceded by [Unicode whitespace],
6113
- and (b) either not preceded by a [punctuation character], or
6169
+ and (b) not preceded by a [punctuation character], or
6114
6170
  followed by [Unicode whitespace] or a [punctuation character].
6115
6171
  For purposes of this definition, the beginning and the end of
6116
6172
  the line count as Unicode whitespace.
@@ -6189,7 +6245,7 @@ The following rules define emphasis and strong emphasis:
6189
6245
  7. A double `**` [can close strong emphasis](@)
6190
6246
  iff it is part of a [right-flanking delimiter run].
6191
6247
 
6192
- 8. A double `__` [can close strong emphasis]
6248
+ 8. A double `__` [can close strong emphasis] iff
6193
6249
  it is part of a [right-flanking delimiter run]
6194
6250
  and either (a) not part of a [left-flanking delimiter run]
6195
6251
  or (b) part of a [left-flanking delimiter run]
@@ -6230,7 +6286,7 @@ the following principles resolve ambiguity:
6230
6286
  `<em><em>...</em></em>`.
6231
6287
 
6232
6288
  14. An interpretation `<em><strong>...</strong></em>` is always
6233
- preferred to `<strong><em>..</em></strong>`.
6289
+ preferred to `<strong><em>...</em></strong>`.
6234
6290
 
6235
6291
  15. When two potential emphasis or strong emphasis spans overlap,
6236
6292
  so that the second begins before the first ends and ends after
@@ -7438,7 +7494,9 @@ A [link destination](@) consists of either
7438
7494
  - a nonempty sequence of characters that does not include
7439
7495
  ASCII space or control characters, and includes parentheses
7440
7496
  only if (a) they are backslash-escaped or (b) they are part of
7441
- a balanced pair of unescaped parentheses.
7497
+ a balanced pair of unescaped parentheses. (Implementations
7498
+ may impose limits on parentheses nesting to avoid performance
7499
+ issues, but at least three levels of nesting should be supported.)
7442
7500
 
7443
7501
  A [link title](@) consists of either
7444
7502
 
@@ -7544,7 +7602,7 @@ Parentheses inside the link destination may be escaped:
7544
7602
  <p><a href="(foo)">link</a></p>
7545
7603
  ````````````````````````````````
7546
7604
 
7547
- Any number parentheses are allowed without escaping, as long as they are
7605
+ Any number of parentheses are allowed without escaping, as long as they are
7548
7606
  balanced:
7549
7607
 
7550
7608
  ```````````````````````````````` example
@@ -7850,13 +7908,16 @@ that [matches] a [link reference definition] elsewhere in the document.
7850
7908
  A [link label](@) begins with a left bracket (`[`) and ends
7851
7909
  with the first right bracket (`]`) that is not backslash-escaped.
7852
7910
  Between these brackets there must be at least one [non-whitespace character].
7853
- Unescaped square bracket characters are not allowed in
7854
- [link labels]. A link label can have at most 999
7855
- characters inside the square brackets.
7911
+ Unescaped square bracket characters are not allowed inside the
7912
+ opening and closing square brackets of [link labels]. A link
7913
+ label can have at most 999 characters inside the square
7914
+ brackets.
7856
7915
 
7857
7916
  One label [matches](@)
7858
7917
  another just in case their normalized forms are equal. To normalize a
7859
- label, perform the *Unicode case fold* and collapse consecutive internal
7918
+ label, strip off the opening and closing brackets,
7919
+ perform the *Unicode case fold*, strip leading and trailing
7920
+ [whitespace] and collapse consecutive internal
7860
7921
  [whitespace] to a single space. If there are multiple
7861
7922
  matching reference link definitions, the one that comes first in the
7862
7923
  document is used. (It is desirable in such cases to emit a warning.)
@@ -8609,11 +8670,11 @@ The link labels are case-insensitive:
8609
8670
  ````````````````````````````````
8610
8671
 
8611
8672
 
8612
- If you just want bracketed text, you can backslash-escape the
8613
- opening `!` and `[`:
8673
+ If you just want a literal `!` followed by bracketed text, you can
8674
+ backslash-escape the opening `[`:
8614
8675
 
8615
8676
  ```````````````````````````````` example
8616
- \!\[foo]
8677
+ !\[foo]
8617
8678
 
8618
8679
  [foo]: /url "title"
8619
8680
  .
@@ -9840,3 +9901,4 @@ closers:
9840
9901
 
9841
9902
  After we're done, we remove all delimiters above `stack_bottom` from the
9842
9903
  delimiter stack.
9904
+