rmultimarkdown 6.4.0.4 → 6.7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +5 -5
  2. data/Rakefile +7 -13
  3. data/ext/Makefile +67 -55
  4. data/ext/extconf.rb +7 -5
  5. data/ext/mmd/aho-corasick.c +8 -8
  6. data/ext/mmd/aho-corasick.h +3 -3
  7. data/ext/mmd/argtable3.c +6537 -0
  8. data/ext/mmd/argtable3.h +273 -0
  9. data/ext/mmd/beamer.c +12 -1
  10. data/ext/mmd/char.c +120 -27
  11. data/ext/mmd/char.h +23 -23
  12. data/ext/mmd/critic_markup.c +7 -6
  13. data/ext/mmd/d_string.c +88 -32
  14. data/ext/mmd/{include/d_string.h → d_string.h} +50 -38
  15. data/ext/mmd/epub.c +36 -12
  16. data/ext/mmd/epub.h +2 -2
  17. data/ext/mmd/file.c +50 -40
  18. data/ext/mmd/file.h +2 -2
  19. data/ext/mmd/html.c +164 -99
  20. data/ext/mmd/html.h +3 -2
  21. data/ext/mmd/i18n.h +15 -11
  22. data/ext/mmd/itmz-lexer.c +16978 -0
  23. data/ext/mmd/itmz-lexer.h +132 -0
  24. data/ext/mmd/itmz-parser.c +1189 -0
  25. data/ext/mmd/itmz-parser.h +11 -0
  26. data/ext/mmd/itmz-reader.c +388 -0
  27. data/ext/mmd/itmz-reader.h +111 -0
  28. data/ext/mmd/itmz.c +567 -0
  29. data/ext/mmd/itmz.h +117 -0
  30. data/ext/mmd/latex.c +93 -41
  31. data/ext/mmd/lexer.c +3506 -2774
  32. data/ext/mmd/{include/libMultiMarkdown.h → libMultiMarkdown.h} +49 -2
  33. data/ext/mmd/main.c +612 -0
  34. data/ext/mmd/memoir.c +4 -1
  35. data/ext/mmd/miniz.c +6905 -6680
  36. data/ext/mmd/miniz.h +456 -476
  37. data/ext/mmd/mmd.c +399 -94
  38. data/ext/mmd/mmd.h +25 -25
  39. data/ext/mmd/object_pool.h +3 -3
  40. data/ext/mmd/opendocument-content.c +137 -69
  41. data/ext/mmd/opendocument-content.h +2 -2
  42. data/ext/mmd/opendocument.c +35 -14
  43. data/ext/mmd/opendocument.h +2 -2
  44. data/ext/mmd/opml-lexer.c +259 -637
  45. data/ext/mmd/opml-lexer.h +1 -17
  46. data/ext/mmd/opml-parser.c +194 -188
  47. data/ext/mmd/opml-reader.c +72 -142
  48. data/ext/mmd/opml-reader.h +1 -1
  49. data/ext/mmd/opml.c +13 -13
  50. data/ext/mmd/opml.h +1 -1
  51. data/ext/mmd/parser.c +1623 -1244
  52. data/ext/mmd/rng.c +8 -3
  53. data/ext/mmd/scanners.c +66625 -103198
  54. data/ext/mmd/scanners.h +1 -0
  55. data/ext/mmd/stack.c +62 -20
  56. data/ext/mmd/stack.h +10 -21
  57. data/ext/mmd/textbundle.c +23 -7
  58. data/ext/mmd/textbundle.h +2 -2
  59. data/ext/mmd/token.c +42 -16
  60. data/ext/mmd/{include/token.h → token.h} +22 -8
  61. data/ext/mmd/token_pairs.c +0 -16
  62. data/ext/mmd/transclude.c +6 -2
  63. data/ext/mmd/uthash.h +745 -745
  64. data/ext/mmd/version.h +8 -8
  65. data/ext/mmd/writer.c +225 -63
  66. data/ext/mmd/writer.h +50 -36
  67. data/ext/mmd/xml.c +855 -0
  68. data/ext/mmd/xml.h +134 -0
  69. data/ext/mmd/zip.c +71 -4
  70. data/ext/mmd/zip.h +7 -1
  71. data/ext/ruby_multi_markdown.c +9 -18
  72. data/lib/multi_markdown/version.rb +1 -1
  73. data/lib/multi_markdown.bundle +0 -0
  74. data/rmultimarkdown.gemspec +0 -2
  75. metadata +22 -28
  76. data/ext/mmd/char_lookup.c +0 -212
data/ext/mmd/mmd.c CHANGED
@@ -60,6 +60,8 @@
60
60
  #include "d_string.h"
61
61
  #include "epub.h"
62
62
  #include "i18n.h"
63
+ #include "itmz.h"
64
+ #include "itmz-reader.h"
63
65
  #include "lexer.h"
64
66
  #include "libMultiMarkdown.h"
65
67
  #include "mmd.h"
@@ -77,10 +79,10 @@
77
79
 
78
80
 
79
81
  // Basic parser function declarations
80
- void * ParseAlloc();
81
- void Parse();
82
- void ParseFree();
83
- void ParseTrace();
82
+ void * ParseAlloc(void *);
83
+ void Parse(void *, int, void *, void *);
84
+ void ParseFree(void *, void *);
85
+ void ParseTrace(FILE * stream, char * zPrefix);
84
86
 
85
87
  void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s);
86
88
 
@@ -124,6 +126,7 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
124
126
  e->quotes_lang = ENGLISH;
125
127
 
126
128
  e->abbreviation_stack = stack_new(0);
129
+ e->critic_stack = stack_new(0);
127
130
  e->citation_stack = stack_new(0);
128
131
  e->definition_stack = stack_new(0);
129
132
  e->footnote_stack = stack_new(0);
@@ -307,6 +310,7 @@ void mmd_engine_reset(mmd_engine * e) {
307
310
  }
308
311
 
309
312
  // Reset other stacks
313
+ e->critic_stack->size = 0;
310
314
  e->definition_stack->size = 0;
311
315
  e->header_stack->size = 0;
312
316
  e->table_stack->size = 0;
@@ -337,6 +341,7 @@ void mmd_engine_free(mmd_engine * e, bool freeDString) {
337
341
 
338
342
  // Takedown
339
343
  stack_free(e->abbreviation_stack);
344
+ stack_free(e->critic_stack);
340
345
  stack_free(e->citation_stack);
341
346
  stack_free(e->footnote_stack);
342
347
  stack_free(e->glossary_stack);
@@ -347,6 +352,18 @@ void mmd_engine_free(mmd_engine * e, bool freeDString) {
347
352
  }
348
353
 
349
354
 
355
+ /// Access DString directly
356
+ DString * mmd_engine_d_string(mmd_engine * e) {
357
+ return e->dstr;
358
+ }
359
+
360
+
361
+ /// Return token tree after previous parsing
362
+ token * mmd_engine_root(mmd_engine * e) {
363
+ return e->root;
364
+ }
365
+
366
+
350
367
  bool line_is_empty(token * t) {
351
368
  while (t) {
352
369
  switch (t->type) {
@@ -511,18 +528,34 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
511
528
  line->type = (first_child->type - HASH1) + LINE_ATX_1;
512
529
  first_child->type = (line->type - LINE_ATX_1) + MARKER_H1;
513
530
 
531
+ t = line->child->tail;
532
+
514
533
  // Strip trailing '#' sequence if present
515
- if (line->child->tail->type == TEXT_NL) {
516
- if ((line->child->tail->prev->type >= HASH1) &&
517
- (line->child->tail->prev->type <= HASH6)) {
518
- line->child->tail->prev->type -= HASH1;
519
- line->child->tail->prev->type += MARKER_H1;
520
- }
521
- } else {
522
- if ((line->child->tail->type >= HASH1) &&
523
- (line->child->tail->type <= HASH6)) {
524
- line->child->tail->type -= HASH1;
525
- line->child->tail->type += MARKER_H1;
534
+ while (t) {
535
+ switch (t->type) {
536
+ case INDENT_TAB:
537
+ case INDENT_SPACE:
538
+ case NON_INDENT_SPACE:
539
+ case TEXT_NL:
540
+ case TEXT_LINEBREAK:
541
+ case TEXT_LINEBREAK_SP:
542
+ t = t->prev;
543
+ break;
544
+
545
+ case HASH1:
546
+ case HASH2:
547
+ case HASH3:
548
+ case HASH4:
549
+ case HASH5:
550
+ case HASH6:
551
+ t->type -= HASH1;
552
+ t->type += MARKER_H1;
553
+ t = NULL;
554
+ break;
555
+
556
+ default:
557
+ // Break out of loop
558
+ t = NULL;
526
559
  }
527
560
  }
528
561
  } else {
@@ -609,6 +642,15 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
609
642
 
610
643
  case DASH_N:
611
644
  case DASH_M:
645
+
646
+ // This could be a table separator instead of a list
647
+ if (!(e->extensions & EXT_COMPATIBILITY)) {
648
+ if (scan_table_separator(&source[first_child->start])) {
649
+ line->type = LINE_TABLE_SEPARATOR;
650
+ break;
651
+ }
652
+ }
653
+
612
654
  if (scan_setext(&source[first_child->start])) {
613
655
  line->type = LINE_SETEXT_2;
614
656
  break;
@@ -748,6 +790,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
748
790
  line->type = LINE_EMPTY;
749
791
  break;
750
792
 
793
+ case TOC_SINGLE:
794
+ case TOC_RANGE:
751
795
  case TOC:
752
796
  line->type = (e->extensions & EXT_COMPATIBILITY) ? LINE_PLAIN : LINE_TOC;
753
797
  break;
@@ -870,6 +914,7 @@ void deindent_line(token * line) {
870
914
  if (line->child) {
871
915
  line->child->prev = NULL;
872
916
  line->child->tail = t->tail;
917
+ line->start = line->child->start;
873
918
  }
874
919
 
875
920
  token_free(t);
@@ -896,28 +941,47 @@ void deindent_block(mmd_engine * e, token * block) {
896
941
  }
897
942
 
898
943
 
944
+ void prune_first_child_from_line(token * line) {
945
+ token * t = line->child;
946
+
947
+ if (t) {
948
+ line->child = t->next;
949
+ t->next = NULL;
950
+
951
+ if (line->child) {
952
+ line->child->prev = NULL;
953
+ line->child->tail = t->tail;
954
+ }
955
+
956
+ token_free(t);
957
+ }
958
+ }
959
+
960
+
899
961
  /// Strip leading blockquote marker from line
900
962
  void strip_quote_markers_from_line(token * line, const char * source) {
901
963
  if (!line || !line->child) {
902
964
  return;
903
965
  }
904
966
 
905
- token * t;
967
+ token * t = NULL;
906
968
 
907
- switch (line->child->type) {
908
- case MARKER_BLOCKQUOTE:
909
- case NON_INDENT_SPACE:
910
- t = line->child;
911
- line->child = t->next;
912
- t->next = NULL;
969
+ while (line->child && t != line->child) {
970
+ t = line->child;
913
971
 
914
- if (line->child) {
915
- line->child->prev = NULL;
916
- line->child->tail = t->tail;
917
- }
972
+ switch (line->child->type) {
973
+ case TEXT_PLAIN:
974
+ if ((line->child->len == 1) && (source[line->child->start] == ' ')) {
975
+ prune_first_child_from_line(line);
976
+ }
918
977
 
919
- token_free(t);
920
- break;
978
+ break;
979
+
980
+ case MARKER_BLOCKQUOTE:
981
+ case NON_INDENT_SPACE:
982
+ prune_first_child_from_line(line);
983
+ break;
984
+ }
921
985
  }
922
986
 
923
987
  if (line->child && (line->child->type == TEXT_PLAIN)) {
@@ -1062,6 +1126,12 @@ token * mmd_tokenize_string(mmd_engine * e, size_t start, size_t len, bool stop_
1062
1126
  if (e->allow_meta && root->child == line) {
1063
1127
  if (line->type == LINE_SETEXT_2) {
1064
1128
  line->type = LINE_YAML;
1129
+ } else if (
1130
+ (line->type == LINE_META) &&
1131
+ scan_empty_meta_line(&e->dstr->str[line->start])) {
1132
+ // Don't start metadata with empty meta line (e.g. "foo:\n")
1133
+ e->allow_meta = false;
1134
+ line->type = LINE_PLAIN;
1065
1135
  } else if (line->type != LINE_META) {
1066
1136
  e->allow_meta = false;
1067
1137
  }
@@ -1117,13 +1187,12 @@ void mmd_parse_token_chain(mmd_engine * e, token * chain) {
1117
1187
 
1118
1188
  e->recurse_depth++;
1119
1189
 
1120
- void* pParser = ParseAlloc (malloc); // Create a parser (for lemon)
1190
+ void * pParser = ParseAlloc (malloc); // Create a parser (for lemon)
1121
1191
  token * walker = chain->child; // Walk the existing tree
1122
1192
  token * remainder; // Hold unparsed tail of chain
1123
1193
 
1124
- #ifndef NDEBUG
1125
- ParseTrace(stderr, "parser >>");
1126
- #endif
1194
+ // Enable to monitor parsing steps
1195
+ // ParseTrace(stderr, "parser >> ");
1127
1196
 
1128
1197
  // Remove existing token tree
1129
1198
  e->root = NULL;
@@ -1139,19 +1208,12 @@ void mmd_parse_token_chain(mmd_engine * e, token * chain) {
1139
1208
  remainder->prev = NULL;
1140
1209
  }
1141
1210
 
1142
- #ifndef NDEBUG
1143
- fprintf(stderr, "\nNew line\n");
1144
- #endif
1145
-
1146
1211
  Parse(pParser, walker->type, walker, e);
1147
1212
 
1148
1213
  walker = remainder;
1149
1214
  }
1150
1215
 
1151
1216
  // Signal finish to parser
1152
- #ifndef NDEBUG
1153
- fprintf(stderr, "\nFinish parse\n");
1154
- #endif
1155
1217
  Parse(pParser, 0, NULL, e);
1156
1218
 
1157
1219
  // Disconnect of (now empty) root
@@ -1249,6 +1311,7 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
1249
1311
  size_t lead_count, lag_count, pre_count, post_count;
1250
1312
 
1251
1313
  token * t = block->child;
1314
+ token * new;
1252
1315
 
1253
1316
  char * str = e->dstr->str;
1254
1317
 
@@ -1268,6 +1331,10 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
1268
1331
  case DOC_START_TOKEN:
1269
1332
  case BLOCK_BLOCKQUOTE:
1270
1333
  case BLOCK_DEF_ABBREVIATION:
1334
+ case BLOCK_DEF_CITATION:
1335
+ case BLOCK_DEF_FOOTNOTE:
1336
+ case BLOCK_DEF_GLOSSARY:
1337
+ case BLOCK_DEF_LINK:
1271
1338
  case BLOCK_DEFLIST:
1272
1339
  case BLOCK_DEFINITION:
1273
1340
  case BLOCK_H1:
@@ -1298,10 +1365,17 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
1298
1365
 
1299
1366
  case CRITIC_SUB_DIV:
1300
1367
  // Divide this into two tokens
1301
- t->child = token_new(CRITIC_SUB_DIV_B, t->start + 1, 1);
1302
- t->child->next = t->next;
1303
- t->next = t->child;
1304
- t->child = NULL;
1368
+ new = token_new(CRITIC_SUB_DIV_B, t->start + 1, 1);
1369
+
1370
+ new->next = t->next;
1371
+
1372
+ if (new->next) {
1373
+ new->next->prev = new;
1374
+ }
1375
+
1376
+ t->next = new;
1377
+ new->prev = t;
1378
+
1305
1379
  t->len = 1;
1306
1380
  t->type = CRITIC_SUB_DIV_A;
1307
1381
  break;
@@ -1506,8 +1580,12 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
1506
1580
  case QUOTE_DOUBLE:
1507
1581
  offset = t->start;
1508
1582
 
1509
- if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset - 1]))) {
1583
+ if (offset == 0) {
1584
+ t->can_close = 0;
1585
+ } else if (char_is_whitespace_or_line_ending(str[offset - 1])) {
1510
1586
  t->can_close = 0;
1587
+ } else if (! char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1])) {
1588
+ t->can_open = 0;
1511
1589
  }
1512
1590
 
1513
1591
  if (char_is_whitespace_or_line_ending(str[offset + 1])) {
@@ -1745,8 +1823,11 @@ void recursive_parse_indent(mmd_engine * e, token * block) {
1745
1823
  // Strip tokens?
1746
1824
  switch (block->type) {
1747
1825
  case BLOCK_DEFINITION:
1748
- // Strip leading ':' from definition
1749
- token_remove_first_child(block->child);
1826
+ // Flag leading ':' as markup
1827
+ block->child->child->type = MARKER_DEFLIST_COLON;
1828
+
1829
+ // Strip whitespace between colon and remainder of line
1830
+ strip_leading_whitespace(block->child->child->next, e->dstr->str);
1750
1831
  break;
1751
1832
  }
1752
1833
 
@@ -1763,16 +1844,41 @@ void is_list_loose(token * list) {
1763
1844
  return;
1764
1845
  }
1765
1846
 
1766
- while (walker->next != NULL) {
1847
+ if (walker->next == NULL) {
1848
+ // Single item list
1767
1849
  if (walker->type == BLOCK_LIST_ITEM) {
1768
1850
  if (walker->child->type == BLOCK_PARA) {
1769
- loose = true;
1851
+ walker = walker->child;
1852
+
1853
+ while (walker->next != NULL) {
1854
+ if (walker->type == BLOCK_EMPTY) {
1855
+ if (walker->next->type == BLOCK_PARA) {
1856
+ loose = true;
1857
+ }
1858
+ }
1859
+
1860
+ walker = walker->next;
1861
+ }
1770
1862
  } else {
1771
1863
  walker->type = BLOCK_LIST_ITEM_TIGHT;
1772
1864
  }
1773
1865
  }
1866
+ } else {
1867
+ while (walker->next != NULL) {
1868
+ if (walker->type == BLOCK_LIST_ITEM) {
1869
+ if (walker->child->type == BLOCK_PARA) {
1870
+ loose = true;
1871
+ } else {
1872
+ walker->type = BLOCK_LIST_ITEM_TIGHT;
1873
+ }
1874
+ }
1875
+
1876
+ walker = walker->next;
1877
+ }
1774
1878
 
1775
- walker = walker->next;
1879
+ if (walker->child && walker->child->next && (walker->child->next->next != NULL)) {
1880
+ loose = true;
1881
+ }
1776
1882
  }
1777
1883
 
1778
1884
  if (loose) {
@@ -1852,7 +1958,12 @@ meta:
1852
1958
  len = scan_meta_key(&source[l->start]);
1853
1959
  m = meta_new(source, l->start, len);
1854
1960
  start = l->start + len + 1;
1855
- len = l->start + l->len - start - 1;
1961
+ len = l->start + l->len - start;
1962
+
1963
+ if (char_is_line_ending(source[start + len])) {
1964
+ len--;
1965
+ }
1966
+
1856
1967
  d_string_append_c_array(d, &source[start], len);
1857
1968
  stack_push(e->metadata_stack, m);
1858
1969
  break;
@@ -1874,16 +1985,13 @@ plain:
1874
1985
  case LINE_YAML:
1875
1986
  break;
1876
1987
 
1877
- case LINE_TABLE:
1988
+ default:
1878
1989
  if (scan_meta_line(&source[l->start])) {
1879
1990
  goto meta;
1880
1991
  } else {
1881
1992
  goto plain;
1882
1993
  }
1883
1994
 
1884
- default:
1885
- fprintf(stderr, "ERROR!\n");
1886
- token_describe(l, NULL);
1887
1995
  break;
1888
1996
  }
1889
1997
 
@@ -1908,15 +2016,16 @@ void strip_line_tokens_from_deflist(mmd_engine * e, token * deflist) {
1908
2016
  walker->type = TEXT_EMPTY;
1909
2017
  break;
1910
2018
 
1911
- case LINE_PLAIN:
1912
- walker->type = BLOCK_TERM;
1913
-
1914
2019
  case BLOCK_TERM:
1915
2020
  break;
1916
2021
 
1917
2022
  case BLOCK_DEFINITION:
1918
2023
  strip_line_tokens_from_block(e, walker);
1919
2024
  break;
2025
+
2026
+ default:
2027
+ walker->type = BLOCK_TERM;
2028
+
1920
2029
  }
1921
2030
 
1922
2031
  walker = walker->next;
@@ -2000,11 +2109,6 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
2000
2109
  return;
2001
2110
  }
2002
2111
 
2003
- #ifndef NDEBUG
2004
- fprintf(stderr, "Strip line tokens from %d (%lu:%lu) (child %d)\n", block->type, block->start, block->len, block->child->type);
2005
- token_tree_describe(block, e->dstr->str);
2006
- #endif
2007
-
2008
2112
  token * l = block->child;
2009
2113
 
2010
2114
  // Custom actions
@@ -2046,18 +2150,33 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
2046
2150
  switch (l->type) {
2047
2151
  case LINE_SETEXT_1:
2048
2152
  case LINE_SETEXT_2:
2049
- if ((block->type == BLOCK_SETEXT_1) ||
2050
- (block->type == BLOCK_SETEXT_2)) {
2051
- temp = l->next;
2052
- tokens_prune(l, l);
2053
- l = temp;
2054
- break;
2153
+ temp = token_new_parent(l->child, MARKER_SETEXT_1 + l->type - LINE_SETEXT_1);
2154
+
2155
+ // Add contents of line to parent block
2156
+ token_append_child(block, temp);
2157
+
2158
+ // Disconnect line from it's contents
2159
+ l->child = NULL;
2160
+
2161
+ // Need to remember first line we strip
2162
+ if (children == NULL) {
2163
+ children = l;
2055
2164
  }
2056
2165
 
2166
+ // Advance to next line
2167
+ l = l->next;
2168
+ break;
2169
+
2057
2170
  case LINE_DEFINITION:
2058
2171
  if (block->type == BLOCK_DEFINITION) {
2059
- // Remove leading colon
2060
- token_remove_first_child(l);
2172
+ // Flag leading colon as markup
2173
+ if (l->child) {
2174
+ l->child->type = MARKER_DEFLIST_COLON;
2175
+
2176
+ temp = l->child->next;
2177
+
2178
+ strip_leading_whitespace(temp, e->dstr->str);
2179
+ }
2061
2180
  }
2062
2181
 
2063
2182
  case LINE_ATX_1:
@@ -2086,13 +2205,20 @@ handle_line:
2086
2205
  case LINE_INDENTED_SPACE:
2087
2206
 
2088
2207
  // Strip leading indent (Only the first one)
2089
- if (block->type != BLOCK_CODE_FENCED && l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) {
2208
+ if (
2209
+ (block->type != BLOCK_CODE_FENCED && block->type != BLOCK_HTML) &&
2210
+ l->child &&
2211
+ ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))
2212
+ ) {
2090
2213
  token_remove_first_child(l);
2091
2214
  }
2092
2215
 
2093
2216
  // If we're not a code block, strip additional indents
2094
- if ((block->type != BLOCK_CODE_INDENTED) &&
2095
- (block->type != BLOCK_CODE_FENCED)) {
2217
+ if (
2218
+ (block->type != BLOCK_CODE_INDENTED) &&
2219
+ (block->type != BLOCK_CODE_FENCED) &&
2220
+ (block->type != BLOCK_HTML)
2221
+ ) {
2096
2222
  while (l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) {
2097
2223
  token_remove_first_child(l);
2098
2224
  }
@@ -2126,10 +2252,9 @@ handle_line:
2126
2252
  strip_line_tokens_from_block(e, l);
2127
2253
 
2128
2254
  // Move children to parent
2129
- // Add ':' back
2130
- if (l->child->start > 0 && e->dstr->str[l->child->start - 1] == ':') {
2131
- temp = token_new(COLON, l->child->start - 1, 1);
2132
- token_append_child(block, temp);
2255
+ // Add ':' back?
2256
+ if (l->child && l->child->type == MARKER_DEFLIST_COLON) {
2257
+ l->child->type = COLON;
2133
2258
  }
2134
2259
 
2135
2260
  token_append_child(block, l->child);
@@ -2177,6 +2302,11 @@ handle_line:
2177
2302
 
2178
2303
  /// Parse part of the string into a token tree
2179
2304
  token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byte_len) {
2305
+ // Fix indeterminant length
2306
+ if (byte_len == -1) {
2307
+ byte_len = e->dstr->currentStringLength - byte_start;
2308
+ }
2309
+
2180
2310
  // First, clean up any leftovers from previous parse
2181
2311
 
2182
2312
  mmd_engine_reset(e);
@@ -2191,14 +2321,31 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt
2191
2321
  if (e->extensions & EXT_PARSE_OPML) {
2192
2322
  // Convert from OPML first (if not done earlier)
2193
2323
  mmd_convert_opml_string(e, byte_start, byte_len);
2324
+
2325
+ // Fix start/stop
2326
+ byte_start = 0;
2327
+ byte_len = e->dstr->currentStringLength;
2328
+ } else if (e->extensions & EXT_PARSE_ITMZ) {
2329
+ // Convert from ITMZ first (if not done earlier)
2330
+ mmd_convert_itmz_string(e, byte_start, byte_len);
2331
+
2332
+ // Fix start/stop
2333
+ byte_start = 0;
2334
+ byte_len = e->dstr->currentStringLength;
2194
2335
  }
2195
2336
 
2196
2337
  // Tokenize the string
2197
2338
  token * doc = mmd_tokenize_string(e, byte_start, byte_len, false);
2198
2339
 
2340
+ // Describe token chain for debugging purposes
2341
+ // token_describe(doc, NULL);
2342
+
2199
2343
  // Parse tokens into blocks
2200
2344
  mmd_parse_token_chain(e, doc);
2201
2345
 
2346
+ // Describe token blocks for debugging purposes
2347
+ // token_describe(doc, NULL);
2348
+
2202
2349
  if (doc) {
2203
2350
  // Parse blocks for pairs
2204
2351
  mmd_assign_ambidextrous_tokens_in_block(e, doc, 0);
@@ -2217,10 +2364,6 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt
2217
2364
  stack_free(pair_stack);
2218
2365
 
2219
2366
  pair_emphasis_tokens(doc);
2220
-
2221
- #ifndef NDEBUG
2222
- token_tree_describe(doc, e->dstr->str);
2223
- #endif
2224
2367
  }
2225
2368
 
2226
2369
  // Return original extensions
@@ -2268,6 +2411,7 @@ bool mmd_d_string_has_metadata(DString * source, size_t * end) {
2268
2411
  bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
2269
2412
  bool result = false;
2270
2413
  token * old_root;
2414
+ mmd_engine * temp = NULL;
2271
2415
 
2272
2416
  if (!e) {
2273
2417
  return false;
@@ -2287,11 +2431,35 @@ bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
2287
2431
  // Preserve existing parse tree (if any)
2288
2432
  old_root = e->root;
2289
2433
 
2290
- // Tokenize the string (up until first empty line)
2291
- token * doc = mmd_tokenize_string(e, 0, e->dstr->currentStringLength, true);
2434
+ token * doc = NULL;
2292
2435
 
2293
- // Parse tokens into blocks
2294
- mmd_parse_token_chain(e, doc);
2436
+ if (old_root &&
2437
+ (old_root->type == DOC_START_TOKEN) &&
2438
+ (old_root->len == e->dstr->currentStringLength)
2439
+ ) {
2440
+ // Already parsed
2441
+ doc = old_root;
2442
+ } else {
2443
+ // Store stack sizes
2444
+ temp = mmd_engine_create(NULL, 0);
2445
+
2446
+ temp->abbreviation_stack->size = e->abbreviation_stack->size;
2447
+ temp->citation_stack->size = e->citation_stack->size;
2448
+ temp->definition_stack->size = e->definition_stack->size;
2449
+ temp->footnote_stack->size = e->footnote_stack->size;
2450
+ temp->glossary_stack->size = e->glossary_stack->size;
2451
+ temp->header_stack->size = e->header_stack->size;
2452
+ temp->link_stack->size = e->link_stack->size;
2453
+ temp->metadata_stack->size = e->metadata_stack->size;
2454
+ temp->table_stack->size = e->table_stack->size;
2455
+
2456
+
2457
+ // Tokenize the string (up until first empty line)
2458
+ doc = mmd_tokenize_string(e, 0, e->dstr->currentStringLength, true);
2459
+
2460
+ // Parse tokens into blocks
2461
+ mmd_parse_token_chain(e, doc);
2462
+ }
2295
2463
 
2296
2464
  if (doc) {
2297
2465
  if (doc->child && doc->child->type == BLOCK_META) {
@@ -2302,7 +2470,35 @@ bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
2302
2470
  }
2303
2471
  }
2304
2472
 
2305
- token_tree_free(doc);
2473
+ if (old_root != doc) {
2474
+ token_tree_free(doc);
2475
+
2476
+ // Reset stack sizes
2477
+ // Except metadata stack, since we will need that for any subseqeunt requests
2478
+ // TODO: May need a more robust approach for this in the future
2479
+ e->abbreviation_stack->size = temp->abbreviation_stack->size;
2480
+ e->citation_stack->size = temp->citation_stack->size;
2481
+ e->definition_stack->size = temp->definition_stack->size;
2482
+ e->footnote_stack->size = temp->footnote_stack->size;
2483
+ e->glossary_stack->size = temp->glossary_stack->size;
2484
+ e->header_stack->size = temp->header_stack->size;
2485
+ e->link_stack->size = temp->link_stack->size;
2486
+ // e->metadata_stack->size = temp->metadata_stack->size;
2487
+ e->table_stack->size = temp->table_stack->size;
2488
+
2489
+ // And reset temp stack sizes
2490
+ temp->abbreviation_stack->size = 0;
2491
+ temp->citation_stack->size = 0;
2492
+ temp->definition_stack->size = 0;
2493
+ temp->footnote_stack->size = 0;
2494
+ temp->glossary_stack->size = 0;
2495
+ temp->header_stack->size = 0;
2496
+ temp->link_stack->size = 0;
2497
+ temp->metadata_stack->size = 0;
2498
+ temp->table_stack->size = 0;
2499
+
2500
+ mmd_engine_free(temp, true);
2501
+ }
2306
2502
  }
2307
2503
 
2308
2504
  // Restore previous parse tree
@@ -2703,7 +2899,7 @@ void mmd_engine_convert_to_file(mmd_engine * e, short format, const char * direc
2703
2899
 
2704
2900
  switch (format) {
2705
2901
  case FORMAT_EPUB:
2706
- epub_write_wrapper(filepath, output->str, e, directory);
2902
+ epub_write_wrapper(filepath, output, e, directory);
2707
2903
  break;
2708
2904
 
2709
2905
  case FORMAT_TEXTBUNDLE:
@@ -2711,7 +2907,7 @@ void mmd_engine_convert_to_file(mmd_engine * e, short format, const char * direc
2711
2907
  break;
2712
2908
 
2713
2909
  case FORMAT_TEXTBUNDLE_COMPRESSED:
2714
- textbundle_write_wrapper(filepath, output->str, e, directory);
2910
+ textbundle_write_wrapper(filepath, output, e, directory);
2715
2911
  break;
2716
2912
 
2717
2913
  default:
@@ -2767,6 +2963,9 @@ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char *
2767
2963
  if (e->extensions & EXT_PARSE_OPML) {
2768
2964
  // Convert from OPML first (if not done earlier)
2769
2965
  mmd_convert_opml_string(e, 0, e->dstr->currentStringLength);
2966
+ } else if (e->extensions & EXT_PARSE_ITMZ) {
2967
+ // Convert from ITMZ first (if not done earlier)
2968
+ mmd_convert_itmz_string(e, 0, e->dstr->currentStringLength);
2770
2969
  }
2771
2970
 
2772
2971
  // Simply return text (transclusion is handled externally)
@@ -2781,26 +2980,32 @@ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char *
2781
2980
 
2782
2981
  switch (format) {
2783
2982
  case FORMAT_EPUB:
2784
- result = epub_create(output->str, e, directory);
2983
+ result = epub_create(output, e, directory);
2785
2984
 
2786
2985
  d_string_free(output, true);
2787
2986
  break;
2788
2987
 
2789
2988
  case FORMAT_TEXTBUNDLE:
2790
2989
  case FORMAT_TEXTBUNDLE_COMPRESSED:
2791
- result = textbundle_create(output->str, e, directory);
2990
+ result = textbundle_create(output, e, directory);
2792
2991
 
2793
2992
  d_string_free(output, true);
2794
2993
  break;
2795
2994
 
2796
2995
  case FORMAT_ODT:
2797
- result = opendocument_text_create(output->str, e, directory);
2996
+ result = opendocument_text_create(output, e, directory);
2798
2997
 
2799
2998
  d_string_free(output, true);
2800
2999
  break;
2801
3000
 
2802
3001
  case FORMAT_FODT:
2803
- result = opendocument_flat_text_create(output->str, e, directory);
3002
+ result = opendocument_flat_text_create(output, e, directory);
3003
+
3004
+ d_string_free(output, true);
3005
+ break;
3006
+
3007
+ case FORMAT_ITMZ:
3008
+ result = itmz_create(output, e, directory);
2804
3009
 
2805
3010
  d_string_free(output, true);
2806
3011
  break;
@@ -2816,9 +3021,109 @@ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char *
2816
3021
  }
2817
3022
 
2818
3023
 
3024
+ /// Convert OPML string to MMD
3025
+ DString * mmd_string_convert_opml_to_text(const char * source) {
3026
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
3027
+
3028
+ DString * result = mmd_engine_convert_opml_to_text(e);
3029
+
3030
+ e->root = NULL;
3031
+ mmd_engine_free(e, true);
3032
+
3033
+ return result;
3034
+ }
3035
+
3036
+
3037
+ /// Convert OPML DString to MMD
3038
+ DString * mmd_d_string_convert_opml_to_text(DString * source) {
3039
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
3040
+
3041
+ DString * result = mmd_engine_convert_opml_to_text(e);
3042
+
3043
+ e->root = NULL;
3044
+ mmd_engine_free(e, false);
3045
+
3046
+ return result;
3047
+ }
3048
+
3049
+
3050
+ /// Convert OPML to text without modifying original engine source
3051
+ DString * mmd_engine_convert_opml_to_text(mmd_engine * e) {
3052
+ DString * original = d_string_new("");
3053
+ d_string_append_c_array(original, e->dstr->str, e->dstr->currentStringLength);
3054
+
3055
+ mmd_convert_opml_string(e, 0, e->dstr->currentStringLength);
3056
+
3057
+ // Swap original and engine
3058
+ char * temp = e->dstr->str;
3059
+ size_t size = e->dstr->currentStringLength;
3060
+
3061
+ // Replace engine copy with original OPML text
3062
+ e->dstr->str = original->str;
3063
+ e->dstr->currentStringLength = original->currentStringLength;
3064
+
3065
+ // Original now contains the processed text
3066
+ original->str = temp;
3067
+ original->currentStringLength = size;
3068
+
3069
+ return original;
3070
+ }
3071
+
3072
+
3073
+ /// Convert ITMZ string to MMD
3074
+ DString * mmd_string_convert_itmz_to_text(const char * source) {
3075
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
3076
+
3077
+ DString * result = mmd_engine_convert_itmz_to_text(e);
3078
+
3079
+ e->root = NULL;
3080
+ mmd_engine_free(e, true);
3081
+
3082
+ return result;
3083
+ }
3084
+
3085
+
3086
+ /// Convert ITMZ DString to MMD
3087
+ DString * mmd_d_string_convert_itmz_to_text(DString * source) {
3088
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
3089
+
3090
+ DString * result = mmd_engine_convert_itmz_to_text(e);
3091
+
3092
+ e->root = NULL;
3093
+ mmd_engine_free(e, false);
3094
+
3095
+ return result;
3096
+ }
3097
+
3098
+
3099
+ /// Convert ITMZ to text without modifying original engine source
3100
+ DString * mmd_engine_convert_itmz_to_text(mmd_engine * e) {
3101
+ DString * original = d_string_new("");
3102
+ d_string_append_c_array(original, e->dstr->str, e->dstr->currentStringLength);
3103
+
3104
+ mmd_convert_itmz_string(e, 0, e->dstr->currentStringLength);
3105
+
3106
+ // Swap original and engine
3107
+ char * temp = e->dstr->str;
3108
+ size_t size = e->dstr->currentStringLength;
3109
+
3110
+ // Replace engine copy with original ITMZ text
3111
+ e->dstr->str = original->str;
3112
+ e->dstr->currentStringLength = original->currentStringLength;
3113
+
3114
+ // Original now contains the processed text
3115
+ original->str = temp;
3116
+ original->currentStringLength = size;
3117
+
3118
+ return original;
3119
+ }
3120
+
3121
+
2819
3122
  /// Return string containing engine version.
2820
3123
  char * mmd_version(void) {
2821
- char * result;
2822
- result = my_strdup(MULTIMARKDOWN_VERSION);
3124
+ char * result = NULL;
3125
+ #ifndef TEST
3126
+ result = my_strdup(LIBMULTIMARKDOWN_VERSION);
3127
+ #endif
2823
3128
  return result;
2824
3129
  }