rmultimarkdown 6.4.0.4 → 6.7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Rakefile +7 -13
- data/ext/Makefile +67 -55
- data/ext/extconf.rb +7 -5
- data/ext/mmd/aho-corasick.c +8 -8
- data/ext/mmd/aho-corasick.h +3 -3
- data/ext/mmd/argtable3.c +6537 -0
- data/ext/mmd/argtable3.h +273 -0
- data/ext/mmd/beamer.c +12 -1
- data/ext/mmd/char.c +120 -27
- data/ext/mmd/char.h +23 -23
- data/ext/mmd/critic_markup.c +7 -6
- data/ext/mmd/d_string.c +88 -32
- data/ext/mmd/{include/d_string.h → d_string.h} +50 -38
- data/ext/mmd/epub.c +36 -12
- data/ext/mmd/epub.h +2 -2
- data/ext/mmd/file.c +50 -40
- data/ext/mmd/file.h +2 -2
- data/ext/mmd/html.c +164 -99
- data/ext/mmd/html.h +3 -2
- data/ext/mmd/i18n.h +15 -11
- data/ext/mmd/itmz-lexer.c +16978 -0
- data/ext/mmd/itmz-lexer.h +132 -0
- data/ext/mmd/itmz-parser.c +1189 -0
- data/ext/mmd/itmz-parser.h +11 -0
- data/ext/mmd/itmz-reader.c +388 -0
- data/ext/mmd/itmz-reader.h +111 -0
- data/ext/mmd/itmz.c +567 -0
- data/ext/mmd/itmz.h +117 -0
- data/ext/mmd/latex.c +93 -41
- data/ext/mmd/lexer.c +3506 -2774
- data/ext/mmd/{include/libMultiMarkdown.h → libMultiMarkdown.h} +49 -2
- data/ext/mmd/main.c +612 -0
- data/ext/mmd/memoir.c +4 -1
- data/ext/mmd/miniz.c +6905 -6680
- data/ext/mmd/miniz.h +456 -476
- data/ext/mmd/mmd.c +399 -94
- data/ext/mmd/mmd.h +25 -25
- data/ext/mmd/object_pool.h +3 -3
- data/ext/mmd/opendocument-content.c +137 -69
- data/ext/mmd/opendocument-content.h +2 -2
- data/ext/mmd/opendocument.c +35 -14
- data/ext/mmd/opendocument.h +2 -2
- data/ext/mmd/opml-lexer.c +259 -637
- data/ext/mmd/opml-lexer.h +1 -17
- data/ext/mmd/opml-parser.c +194 -188
- data/ext/mmd/opml-reader.c +72 -142
- data/ext/mmd/opml-reader.h +1 -1
- data/ext/mmd/opml.c +13 -13
- data/ext/mmd/opml.h +1 -1
- data/ext/mmd/parser.c +1623 -1244
- data/ext/mmd/rng.c +8 -3
- data/ext/mmd/scanners.c +66625 -103198
- data/ext/mmd/scanners.h +1 -0
- data/ext/mmd/stack.c +62 -20
- data/ext/mmd/stack.h +10 -21
- data/ext/mmd/textbundle.c +23 -7
- data/ext/mmd/textbundle.h +2 -2
- data/ext/mmd/token.c +42 -16
- data/ext/mmd/{include/token.h → token.h} +22 -8
- data/ext/mmd/token_pairs.c +0 -16
- data/ext/mmd/transclude.c +6 -2
- data/ext/mmd/uthash.h +745 -745
- data/ext/mmd/version.h +8 -8
- data/ext/mmd/writer.c +225 -63
- data/ext/mmd/writer.h +50 -36
- data/ext/mmd/xml.c +855 -0
- data/ext/mmd/xml.h +134 -0
- data/ext/mmd/zip.c +71 -4
- data/ext/mmd/zip.h +7 -1
- data/ext/ruby_multi_markdown.c +9 -18
- data/lib/multi_markdown/version.rb +1 -1
- data/lib/multi_markdown.bundle +0 -0
- data/rmultimarkdown.gemspec +0 -2
- metadata +22 -28
- data/ext/mmd/char_lookup.c +0 -212
data/ext/mmd/mmd.c
CHANGED
|
@@ -60,6 +60,8 @@
|
|
|
60
60
|
#include "d_string.h"
|
|
61
61
|
#include "epub.h"
|
|
62
62
|
#include "i18n.h"
|
|
63
|
+
#include "itmz.h"
|
|
64
|
+
#include "itmz-reader.h"
|
|
63
65
|
#include "lexer.h"
|
|
64
66
|
#include "libMultiMarkdown.h"
|
|
65
67
|
#include "mmd.h"
|
|
@@ -77,10 +79,10 @@
|
|
|
77
79
|
|
|
78
80
|
|
|
79
81
|
// Basic parser function declarations
|
|
80
|
-
void * ParseAlloc();
|
|
81
|
-
void Parse();
|
|
82
|
-
void ParseFree();
|
|
83
|
-
void ParseTrace();
|
|
82
|
+
void * ParseAlloc(void *);
|
|
83
|
+
void Parse(void *, int, void *, void *);
|
|
84
|
+
void ParseFree(void *, void *);
|
|
85
|
+
void ParseTrace(FILE * stream, char * zPrefix);
|
|
84
86
|
|
|
85
87
|
void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s);
|
|
86
88
|
|
|
@@ -124,6 +126,7 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
|
|
|
124
126
|
e->quotes_lang = ENGLISH;
|
|
125
127
|
|
|
126
128
|
e->abbreviation_stack = stack_new(0);
|
|
129
|
+
e->critic_stack = stack_new(0);
|
|
127
130
|
e->citation_stack = stack_new(0);
|
|
128
131
|
e->definition_stack = stack_new(0);
|
|
129
132
|
e->footnote_stack = stack_new(0);
|
|
@@ -307,6 +310,7 @@ void mmd_engine_reset(mmd_engine * e) {
|
|
|
307
310
|
}
|
|
308
311
|
|
|
309
312
|
// Reset other stacks
|
|
313
|
+
e->critic_stack->size = 0;
|
|
310
314
|
e->definition_stack->size = 0;
|
|
311
315
|
e->header_stack->size = 0;
|
|
312
316
|
e->table_stack->size = 0;
|
|
@@ -337,6 +341,7 @@ void mmd_engine_free(mmd_engine * e, bool freeDString) {
|
|
|
337
341
|
|
|
338
342
|
// Takedown
|
|
339
343
|
stack_free(e->abbreviation_stack);
|
|
344
|
+
stack_free(e->critic_stack);
|
|
340
345
|
stack_free(e->citation_stack);
|
|
341
346
|
stack_free(e->footnote_stack);
|
|
342
347
|
stack_free(e->glossary_stack);
|
|
@@ -347,6 +352,18 @@ void mmd_engine_free(mmd_engine * e, bool freeDString) {
|
|
|
347
352
|
}
|
|
348
353
|
|
|
349
354
|
|
|
355
|
+
/// Access DString directly
|
|
356
|
+
DString * mmd_engine_d_string(mmd_engine * e) {
|
|
357
|
+
return e->dstr;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
/// Return token tree after previous parsing
|
|
362
|
+
token * mmd_engine_root(mmd_engine * e) {
|
|
363
|
+
return e->root;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
|
|
350
367
|
bool line_is_empty(token * t) {
|
|
351
368
|
while (t) {
|
|
352
369
|
switch (t->type) {
|
|
@@ -511,18 +528,34 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
|
|
|
511
528
|
line->type = (first_child->type - HASH1) + LINE_ATX_1;
|
|
512
529
|
first_child->type = (line->type - LINE_ATX_1) + MARKER_H1;
|
|
513
530
|
|
|
531
|
+
t = line->child->tail;
|
|
532
|
+
|
|
514
533
|
// Strip trailing '#' sequence if present
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
534
|
+
while (t) {
|
|
535
|
+
switch (t->type) {
|
|
536
|
+
case INDENT_TAB:
|
|
537
|
+
case INDENT_SPACE:
|
|
538
|
+
case NON_INDENT_SPACE:
|
|
539
|
+
case TEXT_NL:
|
|
540
|
+
case TEXT_LINEBREAK:
|
|
541
|
+
case TEXT_LINEBREAK_SP:
|
|
542
|
+
t = t->prev;
|
|
543
|
+
break;
|
|
544
|
+
|
|
545
|
+
case HASH1:
|
|
546
|
+
case HASH2:
|
|
547
|
+
case HASH3:
|
|
548
|
+
case HASH4:
|
|
549
|
+
case HASH5:
|
|
550
|
+
case HASH6:
|
|
551
|
+
t->type -= HASH1;
|
|
552
|
+
t->type += MARKER_H1;
|
|
553
|
+
t = NULL;
|
|
554
|
+
break;
|
|
555
|
+
|
|
556
|
+
default:
|
|
557
|
+
// Break out of loop
|
|
558
|
+
t = NULL;
|
|
526
559
|
}
|
|
527
560
|
}
|
|
528
561
|
} else {
|
|
@@ -609,6 +642,15 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
|
|
|
609
642
|
|
|
610
643
|
case DASH_N:
|
|
611
644
|
case DASH_M:
|
|
645
|
+
|
|
646
|
+
// This could be a table separator instead of a list
|
|
647
|
+
if (!(e->extensions & EXT_COMPATIBILITY)) {
|
|
648
|
+
if (scan_table_separator(&source[first_child->start])) {
|
|
649
|
+
line->type = LINE_TABLE_SEPARATOR;
|
|
650
|
+
break;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
612
654
|
if (scan_setext(&source[first_child->start])) {
|
|
613
655
|
line->type = LINE_SETEXT_2;
|
|
614
656
|
break;
|
|
@@ -748,6 +790,8 @@ void mmd_assign_line_type(mmd_engine * e, token * line) {
|
|
|
748
790
|
line->type = LINE_EMPTY;
|
|
749
791
|
break;
|
|
750
792
|
|
|
793
|
+
case TOC_SINGLE:
|
|
794
|
+
case TOC_RANGE:
|
|
751
795
|
case TOC:
|
|
752
796
|
line->type = (e->extensions & EXT_COMPATIBILITY) ? LINE_PLAIN : LINE_TOC;
|
|
753
797
|
break;
|
|
@@ -870,6 +914,7 @@ void deindent_line(token * line) {
|
|
|
870
914
|
if (line->child) {
|
|
871
915
|
line->child->prev = NULL;
|
|
872
916
|
line->child->tail = t->tail;
|
|
917
|
+
line->start = line->child->start;
|
|
873
918
|
}
|
|
874
919
|
|
|
875
920
|
token_free(t);
|
|
@@ -896,28 +941,47 @@ void deindent_block(mmd_engine * e, token * block) {
|
|
|
896
941
|
}
|
|
897
942
|
|
|
898
943
|
|
|
944
|
+
void prune_first_child_from_line(token * line) {
|
|
945
|
+
token * t = line->child;
|
|
946
|
+
|
|
947
|
+
if (t) {
|
|
948
|
+
line->child = t->next;
|
|
949
|
+
t->next = NULL;
|
|
950
|
+
|
|
951
|
+
if (line->child) {
|
|
952
|
+
line->child->prev = NULL;
|
|
953
|
+
line->child->tail = t->tail;
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
token_free(t);
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
|
|
899
961
|
/// Strip leading blockquote marker from line
|
|
900
962
|
void strip_quote_markers_from_line(token * line, const char * source) {
|
|
901
963
|
if (!line || !line->child) {
|
|
902
964
|
return;
|
|
903
965
|
}
|
|
904
966
|
|
|
905
|
-
token * t;
|
|
967
|
+
token * t = NULL;
|
|
906
968
|
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
case NON_INDENT_SPACE:
|
|
910
|
-
t = line->child;
|
|
911
|
-
line->child = t->next;
|
|
912
|
-
t->next = NULL;
|
|
969
|
+
while (line->child && t != line->child) {
|
|
970
|
+
t = line->child;
|
|
913
971
|
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
line->child->
|
|
917
|
-
|
|
972
|
+
switch (line->child->type) {
|
|
973
|
+
case TEXT_PLAIN:
|
|
974
|
+
if ((line->child->len == 1) && (source[line->child->start] == ' ')) {
|
|
975
|
+
prune_first_child_from_line(line);
|
|
976
|
+
}
|
|
918
977
|
|
|
919
|
-
|
|
920
|
-
|
|
978
|
+
break;
|
|
979
|
+
|
|
980
|
+
case MARKER_BLOCKQUOTE:
|
|
981
|
+
case NON_INDENT_SPACE:
|
|
982
|
+
prune_first_child_from_line(line);
|
|
983
|
+
break;
|
|
984
|
+
}
|
|
921
985
|
}
|
|
922
986
|
|
|
923
987
|
if (line->child && (line->child->type == TEXT_PLAIN)) {
|
|
@@ -1062,6 +1126,12 @@ token * mmd_tokenize_string(mmd_engine * e, size_t start, size_t len, bool stop_
|
|
|
1062
1126
|
if (e->allow_meta && root->child == line) {
|
|
1063
1127
|
if (line->type == LINE_SETEXT_2) {
|
|
1064
1128
|
line->type = LINE_YAML;
|
|
1129
|
+
} else if (
|
|
1130
|
+
(line->type == LINE_META) &&
|
|
1131
|
+
scan_empty_meta_line(&e->dstr->str[line->start])) {
|
|
1132
|
+
// Don't start metadata with empty meta line (e.g. "foo:\n")
|
|
1133
|
+
e->allow_meta = false;
|
|
1134
|
+
line->type = LINE_PLAIN;
|
|
1065
1135
|
} else if (line->type != LINE_META) {
|
|
1066
1136
|
e->allow_meta = false;
|
|
1067
1137
|
}
|
|
@@ -1117,13 +1187,12 @@ void mmd_parse_token_chain(mmd_engine * e, token * chain) {
|
|
|
1117
1187
|
|
|
1118
1188
|
e->recurse_depth++;
|
|
1119
1189
|
|
|
1120
|
-
void* pParser = ParseAlloc (malloc); // Create a parser (for lemon)
|
|
1190
|
+
void * pParser = ParseAlloc (malloc); // Create a parser (for lemon)
|
|
1121
1191
|
token * walker = chain->child; // Walk the existing tree
|
|
1122
1192
|
token * remainder; // Hold unparsed tail of chain
|
|
1123
1193
|
|
|
1124
|
-
|
|
1125
|
-
ParseTrace(stderr, "parser >>");
|
|
1126
|
-
#endif
|
|
1194
|
+
// Enable to monitor parsing steps
|
|
1195
|
+
// ParseTrace(stderr, "parser >> ");
|
|
1127
1196
|
|
|
1128
1197
|
// Remove existing token tree
|
|
1129
1198
|
e->root = NULL;
|
|
@@ -1139,19 +1208,12 @@ void mmd_parse_token_chain(mmd_engine * e, token * chain) {
|
|
|
1139
1208
|
remainder->prev = NULL;
|
|
1140
1209
|
}
|
|
1141
1210
|
|
|
1142
|
-
#ifndef NDEBUG
|
|
1143
|
-
fprintf(stderr, "\nNew line\n");
|
|
1144
|
-
#endif
|
|
1145
|
-
|
|
1146
1211
|
Parse(pParser, walker->type, walker, e);
|
|
1147
1212
|
|
|
1148
1213
|
walker = remainder;
|
|
1149
1214
|
}
|
|
1150
1215
|
|
|
1151
1216
|
// Signal finish to parser
|
|
1152
|
-
#ifndef NDEBUG
|
|
1153
|
-
fprintf(stderr, "\nFinish parse\n");
|
|
1154
|
-
#endif
|
|
1155
1217
|
Parse(pParser, 0, NULL, e);
|
|
1156
1218
|
|
|
1157
1219
|
// Disconnect of (now empty) root
|
|
@@ -1249,6 +1311,7 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
|
|
|
1249
1311
|
size_t lead_count, lag_count, pre_count, post_count;
|
|
1250
1312
|
|
|
1251
1313
|
token * t = block->child;
|
|
1314
|
+
token * new;
|
|
1252
1315
|
|
|
1253
1316
|
char * str = e->dstr->str;
|
|
1254
1317
|
|
|
@@ -1268,6 +1331,10 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
|
|
|
1268
1331
|
case DOC_START_TOKEN:
|
|
1269
1332
|
case BLOCK_BLOCKQUOTE:
|
|
1270
1333
|
case BLOCK_DEF_ABBREVIATION:
|
|
1334
|
+
case BLOCK_DEF_CITATION:
|
|
1335
|
+
case BLOCK_DEF_FOOTNOTE:
|
|
1336
|
+
case BLOCK_DEF_GLOSSARY:
|
|
1337
|
+
case BLOCK_DEF_LINK:
|
|
1271
1338
|
case BLOCK_DEFLIST:
|
|
1272
1339
|
case BLOCK_DEFINITION:
|
|
1273
1340
|
case BLOCK_H1:
|
|
@@ -1298,10 +1365,17 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
|
|
|
1298
1365
|
|
|
1299
1366
|
case CRITIC_SUB_DIV:
|
|
1300
1367
|
// Divide this into two tokens
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1368
|
+
new = token_new(CRITIC_SUB_DIV_B, t->start + 1, 1);
|
|
1369
|
+
|
|
1370
|
+
new->next = t->next;
|
|
1371
|
+
|
|
1372
|
+
if (new->next) {
|
|
1373
|
+
new->next->prev = new;
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
t->next = new;
|
|
1377
|
+
new->prev = t;
|
|
1378
|
+
|
|
1305
1379
|
t->len = 1;
|
|
1306
1380
|
t->type = CRITIC_SUB_DIV_A;
|
|
1307
1381
|
break;
|
|
@@ -1506,8 +1580,12 @@ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size
|
|
|
1506
1580
|
case QUOTE_DOUBLE:
|
|
1507
1581
|
offset = t->start;
|
|
1508
1582
|
|
|
1509
|
-
if (
|
|
1583
|
+
if (offset == 0) {
|
|
1584
|
+
t->can_close = 0;
|
|
1585
|
+
} else if (char_is_whitespace_or_line_ending(str[offset - 1])) {
|
|
1510
1586
|
t->can_close = 0;
|
|
1587
|
+
} else if (! char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1])) {
|
|
1588
|
+
t->can_open = 0;
|
|
1511
1589
|
}
|
|
1512
1590
|
|
|
1513
1591
|
if (char_is_whitespace_or_line_ending(str[offset + 1])) {
|
|
@@ -1745,8 +1823,11 @@ void recursive_parse_indent(mmd_engine * e, token * block) {
|
|
|
1745
1823
|
// Strip tokens?
|
|
1746
1824
|
switch (block->type) {
|
|
1747
1825
|
case BLOCK_DEFINITION:
|
|
1748
|
-
//
|
|
1749
|
-
|
|
1826
|
+
// Flag leading ':' as markup
|
|
1827
|
+
block->child->child->type = MARKER_DEFLIST_COLON;
|
|
1828
|
+
|
|
1829
|
+
// Strip whitespace between colon and remainder of line
|
|
1830
|
+
strip_leading_whitespace(block->child->child->next, e->dstr->str);
|
|
1750
1831
|
break;
|
|
1751
1832
|
}
|
|
1752
1833
|
|
|
@@ -1763,16 +1844,41 @@ void is_list_loose(token * list) {
|
|
|
1763
1844
|
return;
|
|
1764
1845
|
}
|
|
1765
1846
|
|
|
1766
|
-
|
|
1847
|
+
if (walker->next == NULL) {
|
|
1848
|
+
// Single item list
|
|
1767
1849
|
if (walker->type == BLOCK_LIST_ITEM) {
|
|
1768
1850
|
if (walker->child->type == BLOCK_PARA) {
|
|
1769
|
-
|
|
1851
|
+
walker = walker->child;
|
|
1852
|
+
|
|
1853
|
+
while (walker->next != NULL) {
|
|
1854
|
+
if (walker->type == BLOCK_EMPTY) {
|
|
1855
|
+
if (walker->next->type == BLOCK_PARA) {
|
|
1856
|
+
loose = true;
|
|
1857
|
+
}
|
|
1858
|
+
}
|
|
1859
|
+
|
|
1860
|
+
walker = walker->next;
|
|
1861
|
+
}
|
|
1770
1862
|
} else {
|
|
1771
1863
|
walker->type = BLOCK_LIST_ITEM_TIGHT;
|
|
1772
1864
|
}
|
|
1773
1865
|
}
|
|
1866
|
+
} else {
|
|
1867
|
+
while (walker->next != NULL) {
|
|
1868
|
+
if (walker->type == BLOCK_LIST_ITEM) {
|
|
1869
|
+
if (walker->child->type == BLOCK_PARA) {
|
|
1870
|
+
loose = true;
|
|
1871
|
+
} else {
|
|
1872
|
+
walker->type = BLOCK_LIST_ITEM_TIGHT;
|
|
1873
|
+
}
|
|
1874
|
+
}
|
|
1875
|
+
|
|
1876
|
+
walker = walker->next;
|
|
1877
|
+
}
|
|
1774
1878
|
|
|
1775
|
-
walker
|
|
1879
|
+
if (walker->child && walker->child->next && (walker->child->next->next != NULL)) {
|
|
1880
|
+
loose = true;
|
|
1881
|
+
}
|
|
1776
1882
|
}
|
|
1777
1883
|
|
|
1778
1884
|
if (loose) {
|
|
@@ -1852,7 +1958,12 @@ meta:
|
|
|
1852
1958
|
len = scan_meta_key(&source[l->start]);
|
|
1853
1959
|
m = meta_new(source, l->start, len);
|
|
1854
1960
|
start = l->start + len + 1;
|
|
1855
|
-
len = l->start + l->len - start
|
|
1961
|
+
len = l->start + l->len - start;
|
|
1962
|
+
|
|
1963
|
+
if (char_is_line_ending(source[start + len])) {
|
|
1964
|
+
len--;
|
|
1965
|
+
}
|
|
1966
|
+
|
|
1856
1967
|
d_string_append_c_array(d, &source[start], len);
|
|
1857
1968
|
stack_push(e->metadata_stack, m);
|
|
1858
1969
|
break;
|
|
@@ -1874,16 +1985,13 @@ plain:
|
|
|
1874
1985
|
case LINE_YAML:
|
|
1875
1986
|
break;
|
|
1876
1987
|
|
|
1877
|
-
|
|
1988
|
+
default:
|
|
1878
1989
|
if (scan_meta_line(&source[l->start])) {
|
|
1879
1990
|
goto meta;
|
|
1880
1991
|
} else {
|
|
1881
1992
|
goto plain;
|
|
1882
1993
|
}
|
|
1883
1994
|
|
|
1884
|
-
default:
|
|
1885
|
-
fprintf(stderr, "ERROR!\n");
|
|
1886
|
-
token_describe(l, NULL);
|
|
1887
1995
|
break;
|
|
1888
1996
|
}
|
|
1889
1997
|
|
|
@@ -1908,15 +2016,16 @@ void strip_line_tokens_from_deflist(mmd_engine * e, token * deflist) {
|
|
|
1908
2016
|
walker->type = TEXT_EMPTY;
|
|
1909
2017
|
break;
|
|
1910
2018
|
|
|
1911
|
-
case LINE_PLAIN:
|
|
1912
|
-
walker->type = BLOCK_TERM;
|
|
1913
|
-
|
|
1914
2019
|
case BLOCK_TERM:
|
|
1915
2020
|
break;
|
|
1916
2021
|
|
|
1917
2022
|
case BLOCK_DEFINITION:
|
|
1918
2023
|
strip_line_tokens_from_block(e, walker);
|
|
1919
2024
|
break;
|
|
2025
|
+
|
|
2026
|
+
default:
|
|
2027
|
+
walker->type = BLOCK_TERM;
|
|
2028
|
+
|
|
1920
2029
|
}
|
|
1921
2030
|
|
|
1922
2031
|
walker = walker->next;
|
|
@@ -2000,11 +2109,6 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
|
|
|
2000
2109
|
return;
|
|
2001
2110
|
}
|
|
2002
2111
|
|
|
2003
|
-
#ifndef NDEBUG
|
|
2004
|
-
fprintf(stderr, "Strip line tokens from %d (%lu:%lu) (child %d)\n", block->type, block->start, block->len, block->child->type);
|
|
2005
|
-
token_tree_describe(block, e->dstr->str);
|
|
2006
|
-
#endif
|
|
2007
|
-
|
|
2008
2112
|
token * l = block->child;
|
|
2009
2113
|
|
|
2010
2114
|
// Custom actions
|
|
@@ -2046,18 +2150,33 @@ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
|
|
|
2046
2150
|
switch (l->type) {
|
|
2047
2151
|
case LINE_SETEXT_1:
|
|
2048
2152
|
case LINE_SETEXT_2:
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2153
|
+
temp = token_new_parent(l->child, MARKER_SETEXT_1 + l->type - LINE_SETEXT_1);
|
|
2154
|
+
|
|
2155
|
+
// Add contents of line to parent block
|
|
2156
|
+
token_append_child(block, temp);
|
|
2157
|
+
|
|
2158
|
+
// Disconnect line from it's contents
|
|
2159
|
+
l->child = NULL;
|
|
2160
|
+
|
|
2161
|
+
// Need to remember first line we strip
|
|
2162
|
+
if (children == NULL) {
|
|
2163
|
+
children = l;
|
|
2055
2164
|
}
|
|
2056
2165
|
|
|
2166
|
+
// Advance to next line
|
|
2167
|
+
l = l->next;
|
|
2168
|
+
break;
|
|
2169
|
+
|
|
2057
2170
|
case LINE_DEFINITION:
|
|
2058
2171
|
if (block->type == BLOCK_DEFINITION) {
|
|
2059
|
-
//
|
|
2060
|
-
|
|
2172
|
+
// Flag leading colon as markup
|
|
2173
|
+
if (l->child) {
|
|
2174
|
+
l->child->type = MARKER_DEFLIST_COLON;
|
|
2175
|
+
|
|
2176
|
+
temp = l->child->next;
|
|
2177
|
+
|
|
2178
|
+
strip_leading_whitespace(temp, e->dstr->str);
|
|
2179
|
+
}
|
|
2061
2180
|
}
|
|
2062
2181
|
|
|
2063
2182
|
case LINE_ATX_1:
|
|
@@ -2086,13 +2205,20 @@ handle_line:
|
|
|
2086
2205
|
case LINE_INDENTED_SPACE:
|
|
2087
2206
|
|
|
2088
2207
|
// Strip leading indent (Only the first one)
|
|
2089
|
-
if (
|
|
2208
|
+
if (
|
|
2209
|
+
(block->type != BLOCK_CODE_FENCED && block->type != BLOCK_HTML) &&
|
|
2210
|
+
l->child &&
|
|
2211
|
+
((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))
|
|
2212
|
+
) {
|
|
2090
2213
|
token_remove_first_child(l);
|
|
2091
2214
|
}
|
|
2092
2215
|
|
|
2093
2216
|
// If we're not a code block, strip additional indents
|
|
2094
|
-
if (
|
|
2095
|
-
|
|
2217
|
+
if (
|
|
2218
|
+
(block->type != BLOCK_CODE_INDENTED) &&
|
|
2219
|
+
(block->type != BLOCK_CODE_FENCED) &&
|
|
2220
|
+
(block->type != BLOCK_HTML)
|
|
2221
|
+
) {
|
|
2096
2222
|
while (l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) {
|
|
2097
2223
|
token_remove_first_child(l);
|
|
2098
2224
|
}
|
|
@@ -2126,10 +2252,9 @@ handle_line:
|
|
|
2126
2252
|
strip_line_tokens_from_block(e, l);
|
|
2127
2253
|
|
|
2128
2254
|
// Move children to parent
|
|
2129
|
-
// Add ':' back
|
|
2130
|
-
if (l->child
|
|
2131
|
-
|
|
2132
|
-
token_append_child(block, temp);
|
|
2255
|
+
// Add ':' back?
|
|
2256
|
+
if (l->child && l->child->type == MARKER_DEFLIST_COLON) {
|
|
2257
|
+
l->child->type = COLON;
|
|
2133
2258
|
}
|
|
2134
2259
|
|
|
2135
2260
|
token_append_child(block, l->child);
|
|
@@ -2177,6 +2302,11 @@ handle_line:
|
|
|
2177
2302
|
|
|
2178
2303
|
/// Parse part of the string into a token tree
|
|
2179
2304
|
token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byte_len) {
|
|
2305
|
+
// Fix indeterminant length
|
|
2306
|
+
if (byte_len == -1) {
|
|
2307
|
+
byte_len = e->dstr->currentStringLength - byte_start;
|
|
2308
|
+
}
|
|
2309
|
+
|
|
2180
2310
|
// First, clean up any leftovers from previous parse
|
|
2181
2311
|
|
|
2182
2312
|
mmd_engine_reset(e);
|
|
@@ -2191,14 +2321,31 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt
|
|
|
2191
2321
|
if (e->extensions & EXT_PARSE_OPML) {
|
|
2192
2322
|
// Convert from OPML first (if not done earlier)
|
|
2193
2323
|
mmd_convert_opml_string(e, byte_start, byte_len);
|
|
2324
|
+
|
|
2325
|
+
// Fix start/stop
|
|
2326
|
+
byte_start = 0;
|
|
2327
|
+
byte_len = e->dstr->currentStringLength;
|
|
2328
|
+
} else if (e->extensions & EXT_PARSE_ITMZ) {
|
|
2329
|
+
// Convert from ITMZ first (if not done earlier)
|
|
2330
|
+
mmd_convert_itmz_string(e, byte_start, byte_len);
|
|
2331
|
+
|
|
2332
|
+
// Fix start/stop
|
|
2333
|
+
byte_start = 0;
|
|
2334
|
+
byte_len = e->dstr->currentStringLength;
|
|
2194
2335
|
}
|
|
2195
2336
|
|
|
2196
2337
|
// Tokenize the string
|
|
2197
2338
|
token * doc = mmd_tokenize_string(e, byte_start, byte_len, false);
|
|
2198
2339
|
|
|
2340
|
+
// Describe token chain for debugging purposes
|
|
2341
|
+
// token_describe(doc, NULL);
|
|
2342
|
+
|
|
2199
2343
|
// Parse tokens into blocks
|
|
2200
2344
|
mmd_parse_token_chain(e, doc);
|
|
2201
2345
|
|
|
2346
|
+
// Describe token blocks for debugging purposes
|
|
2347
|
+
// token_describe(doc, NULL);
|
|
2348
|
+
|
|
2202
2349
|
if (doc) {
|
|
2203
2350
|
// Parse blocks for pairs
|
|
2204
2351
|
mmd_assign_ambidextrous_tokens_in_block(e, doc, 0);
|
|
@@ -2217,10 +2364,6 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt
|
|
|
2217
2364
|
stack_free(pair_stack);
|
|
2218
2365
|
|
|
2219
2366
|
pair_emphasis_tokens(doc);
|
|
2220
|
-
|
|
2221
|
-
#ifndef NDEBUG
|
|
2222
|
-
token_tree_describe(doc, e->dstr->str);
|
|
2223
|
-
#endif
|
|
2224
2367
|
}
|
|
2225
2368
|
|
|
2226
2369
|
// Return original extensions
|
|
@@ -2268,6 +2411,7 @@ bool mmd_d_string_has_metadata(DString * source, size_t * end) {
|
|
|
2268
2411
|
bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
|
|
2269
2412
|
bool result = false;
|
|
2270
2413
|
token * old_root;
|
|
2414
|
+
mmd_engine * temp = NULL;
|
|
2271
2415
|
|
|
2272
2416
|
if (!e) {
|
|
2273
2417
|
return false;
|
|
@@ -2287,11 +2431,35 @@ bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
|
|
|
2287
2431
|
// Preserve existing parse tree (if any)
|
|
2288
2432
|
old_root = e->root;
|
|
2289
2433
|
|
|
2290
|
-
|
|
2291
|
-
token * doc = mmd_tokenize_string(e, 0, e->dstr->currentStringLength, true);
|
|
2434
|
+
token * doc = NULL;
|
|
2292
2435
|
|
|
2293
|
-
|
|
2294
|
-
|
|
2436
|
+
if (old_root &&
|
|
2437
|
+
(old_root->type == DOC_START_TOKEN) &&
|
|
2438
|
+
(old_root->len == e->dstr->currentStringLength)
|
|
2439
|
+
) {
|
|
2440
|
+
// Already parsed
|
|
2441
|
+
doc = old_root;
|
|
2442
|
+
} else {
|
|
2443
|
+
// Store stack sizes
|
|
2444
|
+
temp = mmd_engine_create(NULL, 0);
|
|
2445
|
+
|
|
2446
|
+
temp->abbreviation_stack->size = e->abbreviation_stack->size;
|
|
2447
|
+
temp->citation_stack->size = e->citation_stack->size;
|
|
2448
|
+
temp->definition_stack->size = e->definition_stack->size;
|
|
2449
|
+
temp->footnote_stack->size = e->footnote_stack->size;
|
|
2450
|
+
temp->glossary_stack->size = e->glossary_stack->size;
|
|
2451
|
+
temp->header_stack->size = e->header_stack->size;
|
|
2452
|
+
temp->link_stack->size = e->link_stack->size;
|
|
2453
|
+
temp->metadata_stack->size = e->metadata_stack->size;
|
|
2454
|
+
temp->table_stack->size = e->table_stack->size;
|
|
2455
|
+
|
|
2456
|
+
|
|
2457
|
+
// Tokenize the string (up until first empty line)
|
|
2458
|
+
doc = mmd_tokenize_string(e, 0, e->dstr->currentStringLength, true);
|
|
2459
|
+
|
|
2460
|
+
// Parse tokens into blocks
|
|
2461
|
+
mmd_parse_token_chain(e, doc);
|
|
2462
|
+
}
|
|
2295
2463
|
|
|
2296
2464
|
if (doc) {
|
|
2297
2465
|
if (doc->child && doc->child->type == BLOCK_META) {
|
|
@@ -2302,7 +2470,35 @@ bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
|
|
|
2302
2470
|
}
|
|
2303
2471
|
}
|
|
2304
2472
|
|
|
2305
|
-
|
|
2473
|
+
if (old_root != doc) {
|
|
2474
|
+
token_tree_free(doc);
|
|
2475
|
+
|
|
2476
|
+
// Reset stack sizes
|
|
2477
|
+
// Except metadata stack, since we will need that for any subseqeunt requests
|
|
2478
|
+
// TODO: May need a more robust approach for this in the future
|
|
2479
|
+
e->abbreviation_stack->size = temp->abbreviation_stack->size;
|
|
2480
|
+
e->citation_stack->size = temp->citation_stack->size;
|
|
2481
|
+
e->definition_stack->size = temp->definition_stack->size;
|
|
2482
|
+
e->footnote_stack->size = temp->footnote_stack->size;
|
|
2483
|
+
e->glossary_stack->size = temp->glossary_stack->size;
|
|
2484
|
+
e->header_stack->size = temp->header_stack->size;
|
|
2485
|
+
e->link_stack->size = temp->link_stack->size;
|
|
2486
|
+
// e->metadata_stack->size = temp->metadata_stack->size;
|
|
2487
|
+
e->table_stack->size = temp->table_stack->size;
|
|
2488
|
+
|
|
2489
|
+
// And reset temp stack sizes
|
|
2490
|
+
temp->abbreviation_stack->size = 0;
|
|
2491
|
+
temp->citation_stack->size = 0;
|
|
2492
|
+
temp->definition_stack->size = 0;
|
|
2493
|
+
temp->footnote_stack->size = 0;
|
|
2494
|
+
temp->glossary_stack->size = 0;
|
|
2495
|
+
temp->header_stack->size = 0;
|
|
2496
|
+
temp->link_stack->size = 0;
|
|
2497
|
+
temp->metadata_stack->size = 0;
|
|
2498
|
+
temp->table_stack->size = 0;
|
|
2499
|
+
|
|
2500
|
+
mmd_engine_free(temp, true);
|
|
2501
|
+
}
|
|
2306
2502
|
}
|
|
2307
2503
|
|
|
2308
2504
|
// Restore previous parse tree
|
|
@@ -2703,7 +2899,7 @@ void mmd_engine_convert_to_file(mmd_engine * e, short format, const char * direc
|
|
|
2703
2899
|
|
|
2704
2900
|
switch (format) {
|
|
2705
2901
|
case FORMAT_EPUB:
|
|
2706
|
-
epub_write_wrapper(filepath, output
|
|
2902
|
+
epub_write_wrapper(filepath, output, e, directory);
|
|
2707
2903
|
break;
|
|
2708
2904
|
|
|
2709
2905
|
case FORMAT_TEXTBUNDLE:
|
|
@@ -2711,7 +2907,7 @@ void mmd_engine_convert_to_file(mmd_engine * e, short format, const char * direc
|
|
|
2711
2907
|
break;
|
|
2712
2908
|
|
|
2713
2909
|
case FORMAT_TEXTBUNDLE_COMPRESSED:
|
|
2714
|
-
textbundle_write_wrapper(filepath, output
|
|
2910
|
+
textbundle_write_wrapper(filepath, output, e, directory);
|
|
2715
2911
|
break;
|
|
2716
2912
|
|
|
2717
2913
|
default:
|
|
@@ -2767,6 +2963,9 @@ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char *
|
|
|
2767
2963
|
if (e->extensions & EXT_PARSE_OPML) {
|
|
2768
2964
|
// Convert from OPML first (if not done earlier)
|
|
2769
2965
|
mmd_convert_opml_string(e, 0, e->dstr->currentStringLength);
|
|
2966
|
+
} else if (e->extensions & EXT_PARSE_ITMZ) {
|
|
2967
|
+
// Convert from ITMZ first (if not done earlier)
|
|
2968
|
+
mmd_convert_itmz_string(e, 0, e->dstr->currentStringLength);
|
|
2770
2969
|
}
|
|
2771
2970
|
|
|
2772
2971
|
// Simply return text (transclusion is handled externally)
|
|
@@ -2781,26 +2980,32 @@ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char *
|
|
|
2781
2980
|
|
|
2782
2981
|
switch (format) {
|
|
2783
2982
|
case FORMAT_EPUB:
|
|
2784
|
-
result = epub_create(output
|
|
2983
|
+
result = epub_create(output, e, directory);
|
|
2785
2984
|
|
|
2786
2985
|
d_string_free(output, true);
|
|
2787
2986
|
break;
|
|
2788
2987
|
|
|
2789
2988
|
case FORMAT_TEXTBUNDLE:
|
|
2790
2989
|
case FORMAT_TEXTBUNDLE_COMPRESSED:
|
|
2791
|
-
result = textbundle_create(output
|
|
2990
|
+
result = textbundle_create(output, e, directory);
|
|
2792
2991
|
|
|
2793
2992
|
d_string_free(output, true);
|
|
2794
2993
|
break;
|
|
2795
2994
|
|
|
2796
2995
|
case FORMAT_ODT:
|
|
2797
|
-
result = opendocument_text_create(output
|
|
2996
|
+
result = opendocument_text_create(output, e, directory);
|
|
2798
2997
|
|
|
2799
2998
|
d_string_free(output, true);
|
|
2800
2999
|
break;
|
|
2801
3000
|
|
|
2802
3001
|
case FORMAT_FODT:
|
|
2803
|
-
result = opendocument_flat_text_create(output
|
|
3002
|
+
result = opendocument_flat_text_create(output, e, directory);
|
|
3003
|
+
|
|
3004
|
+
d_string_free(output, true);
|
|
3005
|
+
break;
|
|
3006
|
+
|
|
3007
|
+
case FORMAT_ITMZ:
|
|
3008
|
+
result = itmz_create(output, e, directory);
|
|
2804
3009
|
|
|
2805
3010
|
d_string_free(output, true);
|
|
2806
3011
|
break;
|
|
@@ -2816,9 +3021,109 @@ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char *
|
|
|
2816
3021
|
}
|
|
2817
3022
|
|
|
2818
3023
|
|
|
3024
|
+
/// Convert OPML string to MMD
|
|
3025
|
+
DString * mmd_string_convert_opml_to_text(const char * source) {
|
|
3026
|
+
mmd_engine * e = mmd_engine_create_with_string(source, 0);
|
|
3027
|
+
|
|
3028
|
+
DString * result = mmd_engine_convert_opml_to_text(e);
|
|
3029
|
+
|
|
3030
|
+
e->root = NULL;
|
|
3031
|
+
mmd_engine_free(e, true);
|
|
3032
|
+
|
|
3033
|
+
return result;
|
|
3034
|
+
}
|
|
3035
|
+
|
|
3036
|
+
|
|
3037
|
+
/// Convert OPML DString to MMD
|
|
3038
|
+
DString * mmd_d_string_convert_opml_to_text(DString * source) {
|
|
3039
|
+
mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
|
|
3040
|
+
|
|
3041
|
+
DString * result = mmd_engine_convert_opml_to_text(e);
|
|
3042
|
+
|
|
3043
|
+
e->root = NULL;
|
|
3044
|
+
mmd_engine_free(e, false);
|
|
3045
|
+
|
|
3046
|
+
return result;
|
|
3047
|
+
}
|
|
3048
|
+
|
|
3049
|
+
|
|
3050
|
+
/// Convert OPML to text without modifying original engine source
|
|
3051
|
+
DString * mmd_engine_convert_opml_to_text(mmd_engine * e) {
|
|
3052
|
+
DString * original = d_string_new("");
|
|
3053
|
+
d_string_append_c_array(original, e->dstr->str, e->dstr->currentStringLength);
|
|
3054
|
+
|
|
3055
|
+
mmd_convert_opml_string(e, 0, e->dstr->currentStringLength);
|
|
3056
|
+
|
|
3057
|
+
// Swap original and engine
|
|
3058
|
+
char * temp = e->dstr->str;
|
|
3059
|
+
size_t size = e->dstr->currentStringLength;
|
|
3060
|
+
|
|
3061
|
+
// Replace engine copy with original OPML text
|
|
3062
|
+
e->dstr->str = original->str;
|
|
3063
|
+
e->dstr->currentStringLength = original->currentStringLength;
|
|
3064
|
+
|
|
3065
|
+
// Original now contains the processed text
|
|
3066
|
+
original->str = temp;
|
|
3067
|
+
original->currentStringLength = size;
|
|
3068
|
+
|
|
3069
|
+
return original;
|
|
3070
|
+
}
|
|
3071
|
+
|
|
3072
|
+
|
|
3073
|
+
/// Convert ITMZ string to MMD
|
|
3074
|
+
DString * mmd_string_convert_itmz_to_text(const char * source) {
|
|
3075
|
+
mmd_engine * e = mmd_engine_create_with_string(source, 0);
|
|
3076
|
+
|
|
3077
|
+
DString * result = mmd_engine_convert_itmz_to_text(e);
|
|
3078
|
+
|
|
3079
|
+
e->root = NULL;
|
|
3080
|
+
mmd_engine_free(e, true);
|
|
3081
|
+
|
|
3082
|
+
return result;
|
|
3083
|
+
}
|
|
3084
|
+
|
|
3085
|
+
|
|
3086
|
+
/// Convert ITMZ DString to MMD
|
|
3087
|
+
DString * mmd_d_string_convert_itmz_to_text(DString * source) {
|
|
3088
|
+
mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
|
|
3089
|
+
|
|
3090
|
+
DString * result = mmd_engine_convert_itmz_to_text(e);
|
|
3091
|
+
|
|
3092
|
+
e->root = NULL;
|
|
3093
|
+
mmd_engine_free(e, false);
|
|
3094
|
+
|
|
3095
|
+
return result;
|
|
3096
|
+
}
|
|
3097
|
+
|
|
3098
|
+
|
|
3099
|
+
/// Convert ITMZ to text without modifying original engine source
|
|
3100
|
+
DString * mmd_engine_convert_itmz_to_text(mmd_engine * e) {
|
|
3101
|
+
DString * original = d_string_new("");
|
|
3102
|
+
d_string_append_c_array(original, e->dstr->str, e->dstr->currentStringLength);
|
|
3103
|
+
|
|
3104
|
+
mmd_convert_itmz_string(e, 0, e->dstr->currentStringLength);
|
|
3105
|
+
|
|
3106
|
+
// Swap original and engine
|
|
3107
|
+
char * temp = e->dstr->str;
|
|
3108
|
+
size_t size = e->dstr->currentStringLength;
|
|
3109
|
+
|
|
3110
|
+
// Replace engine copy with original ITMZ text
|
|
3111
|
+
e->dstr->str = original->str;
|
|
3112
|
+
e->dstr->currentStringLength = original->currentStringLength;
|
|
3113
|
+
|
|
3114
|
+
// Original now contains the processed text
|
|
3115
|
+
original->str = temp;
|
|
3116
|
+
original->currentStringLength = size;
|
|
3117
|
+
|
|
3118
|
+
return original;
|
|
3119
|
+
}
|
|
3120
|
+
|
|
3121
|
+
|
|
2819
3122
|
/// Return string containing engine version.
|
|
2820
3123
|
char * mmd_version(void) {
|
|
2821
|
-
char * result;
|
|
2822
|
-
|
|
3124
|
+
char * result = NULL;
|
|
3125
|
+
#ifndef TEST
|
|
3126
|
+
result = my_strdup(LIBMULTIMARKDOWN_VERSION);
|
|
3127
|
+
#endif
|
|
2823
3128
|
return result;
|
|
2824
3129
|
}
|