rmultimarkdown 4.7.1.1 → 6.2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +2 -2
  3. data/README.md +7 -9
  4. data/Rakefile +33 -18
  5. data/ext/Makefile +261 -0
  6. data/ext/extconf.rb +23 -3
  7. data/ext/mmd/aho-corasick.c +596 -0
  8. data/ext/mmd/aho-corasick.h +120 -0
  9. data/ext/mmd/beamer.c +344 -0
  10. data/ext/mmd/beamer.h +72 -0
  11. data/ext/mmd/char.c +156 -0
  12. data/ext/mmd/char.h +111 -0
  13. data/ext/mmd/char_lookup.c +212 -0
  14. data/ext/mmd/critic_markup.c +330 -0
  15. data/ext/mmd/critic_markup.h +94 -0
  16. data/ext/mmd/d_string.c +402 -0
  17. data/ext/mmd/epub.c +563 -0
  18. data/ext/mmd/epub.h +69 -0
  19. data/ext/mmd/fodt.c +2288 -0
  20. data/ext/mmd/fodt.h +81 -0
  21. data/ext/mmd/html.c +2460 -0
  22. data/ext/mmd/html.h +81 -0
  23. data/ext/mmd/i18n.h +170 -0
  24. data/ext/mmd/include/d_string.h +182 -0
  25. data/ext/mmd/include/libMultiMarkdown.h +548 -0
  26. data/ext/mmd/include/token.h +233 -0
  27. data/ext/mmd/latex.c +2435 -0
  28. data/ext/mmd/latex.h +83 -0
  29. data/ext/mmd/lexer.c +3001 -0
  30. data/ext/mmd/lexer.h +75 -0
  31. data/ext/mmd/memoir.c +138 -0
  32. data/ext/mmd/memoir.h +67 -0
  33. data/ext/mmd/miniz.c +7557 -0
  34. data/ext/mmd/miniz.h +1328 -0
  35. data/ext/mmd/mmd.c +2798 -0
  36. data/ext/mmd/mmd.h +120 -0
  37. data/ext/mmd/object_pool.c +141 -0
  38. data/ext/mmd/object_pool.h +101 -0
  39. data/ext/mmd/opendocument-content.c +2071 -0
  40. data/ext/mmd/opendocument-content.h +135 -0
  41. data/ext/mmd/opendocument.c +981 -0
  42. data/ext/mmd/opendocument.h +118 -0
  43. data/ext/mmd/parser.c +1760 -0
  44. data/ext/mmd/parser.h +39 -0
  45. data/{MultiMarkdown-4 → ext/mmd}/rng.c +90 -49
  46. data/ext/mmd/scanners.c +77512 -0
  47. data/ext/mmd/scanners.h +101 -0
  48. data/ext/mmd/stack.c +142 -0
  49. data/ext/mmd/stack.h +113 -0
  50. data/ext/mmd/textbundle.c +455 -0
  51. data/ext/mmd/textbundle.h +115 -0
  52. data/ext/mmd/token.c +773 -0
  53. data/ext/mmd/token_pairs.c +263 -0
  54. data/ext/mmd/token_pairs.h +123 -0
  55. data/ext/mmd/transclude.c +549 -0
  56. data/ext/mmd/transclude.h +87 -0
  57. data/ext/mmd/uthash.h +1074 -0
  58. data/ext/mmd/uuid.c +154 -0
  59. data/ext/mmd/uuid.h +77 -0
  60. data/ext/mmd/version.h +111 -0
  61. data/ext/mmd/writer.c +2652 -0
  62. data/ext/mmd/writer.h +260 -0
  63. data/ext/mmd/zip.c +210 -0
  64. data/ext/mmd/zip.h +120 -0
  65. data/ext/{multi_markdown.c → ruby_multi_markdown.c} +87 -18
  66. data/lib/multi_markdown.bundle +0 -0
  67. data/lib/multi_markdown.rb +5 -8
  68. data/lib/multi_markdown/version.rb +1 -1
  69. data/rmultimarkdown.gemspec +2 -2
  70. data/test/{extensions_test.rb.rb → extensions_test.rb} +10 -54
  71. data/test/multi_markdown_test.rb +13 -0
  72. metadata +67 -47
  73. data/MultiMarkdown-4/GLibFacade.c +0 -310
  74. data/MultiMarkdown-4/GLibFacade.h +0 -100
  75. data/MultiMarkdown-4/beamer.c +0 -182
  76. data/MultiMarkdown-4/beamer.h +0 -11
  77. data/MultiMarkdown-4/critic.c +0 -111
  78. data/MultiMarkdown-4/critic.h +0 -15
  79. data/MultiMarkdown-4/glib.h +0 -11
  80. data/MultiMarkdown-4/html.c +0 -1117
  81. data/MultiMarkdown-4/html.h +0 -14
  82. data/MultiMarkdown-4/latex.c +0 -1217
  83. data/MultiMarkdown-4/latex.h +0 -16
  84. data/MultiMarkdown-4/libMultiMarkdown.h +0 -177
  85. data/MultiMarkdown-4/lyx.c +0 -2265
  86. data/MultiMarkdown-4/lyx.h +0 -37
  87. data/MultiMarkdown-4/lyxbeamer.c +0 -265
  88. data/MultiMarkdown-4/lyxbeamer.h +0 -11
  89. data/MultiMarkdown-4/memoir.c +0 -80
  90. data/MultiMarkdown-4/memoir.h +0 -10
  91. data/MultiMarkdown-4/multimarkdown.c +0 -518
  92. data/MultiMarkdown-4/odf.c +0 -1222
  93. data/MultiMarkdown-4/odf.h +0 -18
  94. data/MultiMarkdown-4/opml.c +0 -189
  95. data/MultiMarkdown-4/opml.h +0 -15
  96. data/MultiMarkdown-4/parse_utilities.c +0 -884
  97. data/MultiMarkdown-4/parser.c +0 -16656
  98. data/MultiMarkdown-4/parser.h +0 -188
  99. data/MultiMarkdown-4/rtf.c +0 -665
  100. data/MultiMarkdown-4/rtf.h +0 -17
  101. data/MultiMarkdown-4/strtok.c +0 -56
  102. data/MultiMarkdown-4/strtok.h +0 -9
  103. data/MultiMarkdown-4/text.c +0 -53
  104. data/MultiMarkdown-4/text.h +0 -11
  105. data/MultiMarkdown-4/toc.c +0 -142
  106. data/MultiMarkdown-4/toc.h +0 -15
  107. data/MultiMarkdown-4/transclude.c +0 -307
  108. data/MultiMarkdown-4/transclude.h +0 -28
  109. data/MultiMarkdown-4/writer.c +0 -731
  110. data/MultiMarkdown-4/writer.h +0 -38
data/ext/mmd/mmd.c ADDED
@@ -0,0 +1,2798 @@
1
+ /**
2
+
3
+ MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
4
+
5
+ @file mmd.c
6
+
7
+ @brief Create MMD parsing engine
8
+
9
+
10
+ @author Fletcher T. Penney
11
+ @bug
12
+
13
+ **/
14
+
15
+ /*
16
+
17
+ Copyright © 2016 - 2017 Fletcher T. Penney.
18
+
19
+
20
+ The `MultiMarkdown 6` project is released under the MIT License..
21
+
22
+ GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
23
+
24
+ https://github.com/fletcher/MultiMarkdown-4/
25
+
26
+ MMD 4 is released under both the MIT License and GPL.
27
+
28
+
29
+ CuTest is released under the zlib/libpng license. See CuTest.c for the text
30
+ of the license.
31
+
32
+
33
+ ## The MIT License ##
34
+
35
+ Permission is hereby granted, free of charge, to any person obtaining a copy
36
+ of this software and associated documentation files (the "Software"), to deal
37
+ in the Software without restriction, including without limitation the rights
38
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
39
+ copies of the Software, and to permit persons to whom the Software is
40
+ furnished to do so, subject to the following conditions:
41
+
42
+ The above copyright notice and this permission notice shall be included in
43
+ all copies or substantial portions of the Software.
44
+
45
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
46
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
47
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
48
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
49
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
50
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51
+ THE SOFTWARE.
52
+
53
+ */
54
+
55
+ #include <stdio.h>
56
+ #include <stdlib.h>
57
+ #include <string.h>
58
+
59
+ #include "char.h"
60
+ #include "d_string.h"
61
+ #include "epub.h"
62
+ #include "i18n.h"
63
+ #include "lexer.h"
64
+ #include "libMultiMarkdown.h"
65
+ #include "mmd.h"
66
+ #include "object_pool.h"
67
+ #include "opendocument.h"
68
+ #include "parser.h"
69
+ #include "scanners.h"
70
+ #include "stack.h"
71
+ #include "textbundle.h"
72
+ #include "token.h"
73
+ #include "token_pairs.h"
74
+ #include "writer.h"
75
+ #include "version.h"
76
+
77
+
78
+ // Basic parser function declarations
79
+ void * ParseAlloc();
80
+ void Parse();
81
+ void ParseFree();
82
+ void ParseTrace();
83
+
84
+ void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s);
85
+
86
+
87
+ /// strdup() not available on all platforms
88
+ static char * my_strdup(const char * source) {
89
+ if (source == NULL) {
90
+ return NULL;
91
+ }
92
+
93
+ char * result = malloc(strlen(source) + 1);
94
+
95
+ if (result) {
96
+ strcpy(result, source);
97
+ }
98
+
99
+ return result;
100
+ }
101
+
102
+
103
+ /// Build MMD Engine
104
+ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) {
105
+ mmd_engine * e = malloc(sizeof(mmd_engine));
106
+
107
+ if (e) {
108
+ e->dstr = d;
109
+
110
+ e->root = NULL;
111
+
112
+ e->extensions = extensions;
113
+
114
+ e->recurse_depth = 0;
115
+
116
+ e->allow_meta = (extensions & EXT_COMPATIBILITY) ? false : true;
117
+
118
+ e->language = LC_EN;
119
+ e->quotes_lang = ENGLISH;
120
+
121
+ e->abbreviation_stack = stack_new(0);
122
+ e->citation_stack = stack_new(0);
123
+ e->definition_stack = stack_new(0);
124
+ e->footnote_stack = stack_new(0);
125
+ e->glossary_stack = stack_new(0);
126
+ e->header_stack = stack_new(0);
127
+ e->link_stack = stack_new(0);
128
+ e->metadata_stack = stack_new(0);
129
+ e->table_stack = stack_new(0);
130
+ e->asset_hash = NULL;
131
+
132
+ e->pairings1 = token_pair_engine_new();
133
+ e->pairings2 = token_pair_engine_new();
134
+ e->pairings3 = token_pair_engine_new();
135
+ e->pairings4 = token_pair_engine_new();
136
+
137
+ // CriticMarkup
138
+ if (extensions & EXT_CRITIC) {
139
+ token_pair_engine_add_pairing(e->pairings1, CRITIC_ADD_OPEN, CRITIC_ADD_CLOSE, PAIR_CRITIC_ADD, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
140
+ token_pair_engine_add_pairing(e->pairings1, CRITIC_DEL_OPEN, CRITIC_DEL_CLOSE, PAIR_CRITIC_DEL, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
141
+ token_pair_engine_add_pairing(e->pairings1, CRITIC_COM_OPEN, CRITIC_COM_CLOSE, PAIR_CRITIC_COM, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
142
+ token_pair_engine_add_pairing(e->pairings1, CRITIC_SUB_OPEN, CRITIC_SUB_DIV_A, PAIR_CRITIC_SUB_DEL, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
143
+ token_pair_engine_add_pairing(e->pairings1, CRITIC_SUB_DIV_B, CRITIC_SUB_CLOSE, PAIR_CRITIC_SUB_ADD, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
144
+ token_pair_engine_add_pairing(e->pairings1, CRITIC_HI_OPEN, CRITIC_HI_CLOSE, PAIR_CRITIC_HI, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
145
+ }
146
+
147
+ // HTML Comments
148
+ token_pair_engine_add_pairing(e->pairings2, HTML_COMMENT_START, HTML_COMMENT_STOP, PAIR_HTML_COMMENT, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
149
+
150
+ // Brackets, Parentheses, Angles
151
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
152
+
153
+ if (extensions & EXT_NOTES) {
154
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
155
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
156
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_GLOSSARY_LEFT, BRACKET_RIGHT, PAIR_BRACKET_GLOSSARY, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
157
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_ABBREVIATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_ABBREVIATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
158
+ } else {
159
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
160
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
161
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_GLOSSARY_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
162
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_ABBREVIATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
163
+ }
164
+
165
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_VARIABLE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_VARIABLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
166
+
167
+ token_pair_engine_add_pairing(e->pairings3, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
168
+ token_pair_engine_add_pairing(e->pairings3, PAREN_LEFT, PAREN_RIGHT, PAIR_PAREN, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
169
+ token_pair_engine_add_pairing(e->pairings3, ANGLE_LEFT, ANGLE_RIGHT, PAIR_ANGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
170
+ token_pair_engine_add_pairing(e->pairings3, BRACE_DOUBLE_LEFT, BRACE_DOUBLE_RIGHT, PAIR_BRACES, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
171
+
172
+ // Strong/Emph
173
+ token_pair_engine_add_pairing(e->pairings4, STAR, STAR, PAIR_STAR, 0);
174
+ token_pair_engine_add_pairing(e->pairings4, UL, UL, PAIR_UL, 0);
175
+
176
+ // Quotes and Backticks
177
+ token_pair_engine_add_pairing(e->pairings3, BACKTICK, BACKTICK, PAIR_BACKTICK, PAIRING_PRUNE_MATCH | PAIRING_MATCH_LENGTH);
178
+
179
+ token_pair_engine_add_pairing(e->pairings4, BACKTICK, QUOTE_RIGHT_ALT, PAIR_QUOTE_ALT, PAIRING_ALLOW_EMPTY | PAIRING_MATCH_LENGTH);
180
+ token_pair_engine_add_pairing(e->pairings4, QUOTE_SINGLE, QUOTE_SINGLE, PAIR_QUOTE_SINGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
181
+ token_pair_engine_add_pairing(e->pairings4, QUOTE_DOUBLE, QUOTE_DOUBLE, PAIR_QUOTE_DOUBLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
182
+
183
+ // Math
184
+ if (!(extensions & EXT_COMPATIBILITY)) {
185
+ token_pair_engine_add_pairing(e->pairings3, MATH_PAREN_OPEN, MATH_PAREN_CLOSE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
186
+ token_pair_engine_add_pairing(e->pairings3, MATH_BRACKET_OPEN, MATH_BRACKET_CLOSE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
187
+ token_pair_engine_add_pairing(e->pairings3, MATH_DOLLAR_SINGLE, MATH_DOLLAR_SINGLE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
188
+ token_pair_engine_add_pairing(e->pairings3, MATH_DOLLAR_DOUBLE, MATH_DOLLAR_DOUBLE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH);
189
+ }
190
+
191
+ // Superscript/Subscript
192
+ if (!(extensions & EXT_COMPATIBILITY)) {
193
+ token_pair_engine_add_pairing(e->pairings4, SUPERSCRIPT, SUPERSCRIPT, PAIR_SUPERSCRIPT, PAIRING_PRUNE_MATCH);
194
+ token_pair_engine_add_pairing(e->pairings4, SUBSCRIPT, SUBSCRIPT, PAIR_SUBSCRIPT, PAIRING_PRUNE_MATCH);
195
+ }
196
+
197
+ // Text Braces -- for raw text syntax
198
+ if (!(extensions & EXT_COMPATIBILITY)) {
199
+ token_pair_engine_add_pairing(e->pairings4, TEXT_BRACE_LEFT, TEXT_BRACE_RIGHT, PAIR_BRACE, PAIRING_PRUNE_MATCH);
200
+ token_pair_engine_add_pairing(e->pairings4, RAW_FILTER_LEFT, TEXT_BRACE_RIGHT, PAIR_RAW_FILTER, PAIRING_PRUNE_MATCH);
201
+ }
202
+ }
203
+
204
+ return e;
205
+ }
206
+
207
+ /// Create MMD Engine using an existing DString (A new copy is *not* made)
208
+ mmd_engine * mmd_engine_create_with_dstring(DString * d, unsigned long extensions) {
209
+ return mmd_engine_create(d, extensions);
210
+ }
211
+
212
+
213
+ /// Create MMD Engine using a C string (A private copy of the string will be
214
+ /// made. The one passed here can be freed by the calling function)
215
+ mmd_engine * mmd_engine_create_with_string(const char * str, unsigned long extensions) {
216
+ DString * d = d_string_new(str);
217
+
218
+ return mmd_engine_create(d, extensions);
219
+ }
220
+
221
+
222
+ /// Set language and smart quotes language
223
+ void mmd_engine_set_language(mmd_engine * e, short language) {
224
+ if (!e) {
225
+ return;
226
+ }
227
+
228
+ e->language = language;
229
+
230
+ switch (language) {
231
+ case LC_DE:
232
+ e->quotes_lang = GERMAN;
233
+ break;
234
+
235
+ case LC_EN:
236
+ e->quotes_lang = ENGLISH;
237
+ break;
238
+
239
+ case LC_ES:
240
+ e->quotes_lang = ENGLISH;
241
+ break;
242
+
243
+ case LC_FR:
244
+ e->quotes_lang = FRENCH;
245
+ break;
246
+
247
+ case LC_NL:
248
+ e->quotes_lang = DUTCH;
249
+ break;
250
+
251
+ case LC_SV:
252
+ e->quotes_lang = SWEDISH;
253
+ break;
254
+
255
+ default:
256
+ e->quotes_lang = ENGLISH;
257
+ }
258
+ }
259
+
260
+
261
+ void mmd_engine_reset(mmd_engine * e) {
262
+ if (e->root) {
263
+ token_tree_free(e->root);
264
+ e->root = NULL;
265
+ }
266
+
267
+ // Abbreviations need to be freed
268
+ while (e->abbreviation_stack->size) {
269
+ footnote_free(stack_pop(e->abbreviation_stack));
270
+ }
271
+
272
+ // Citations need to be freed
273
+ while (e->citation_stack->size) {
274
+ footnote_free(stack_pop(e->citation_stack));
275
+ }
276
+
277
+ // Footnotes need to be freed
278
+ while (e->footnote_stack->size) {
279
+ footnote_free(stack_pop(e->footnote_stack));
280
+ }
281
+
282
+ // Glossaries need to be freed
283
+ while (e->glossary_stack->size) {
284
+ footnote_free(stack_pop(e->glossary_stack));
285
+ }
286
+
287
+ // Links need to be freed
288
+ while (e->link_stack->size) {
289
+ link_free(stack_pop(e->link_stack));
290
+ }
291
+
292
+ // Metadata needs to be freed
293
+ while (e->metadata_stack->size) {
294
+ meta_free(stack_pop(e->metadata_stack));
295
+ }
296
+
297
+ // Free asset hash
298
+ asset * a, * a_tmp;
299
+ HASH_ITER(hh, e->asset_hash, a, a_tmp) {
300
+ HASH_DEL(e->asset_hash, a); // Remove item from hash
301
+ asset_free(a); // Free the asset
302
+ }
303
+
304
+ // Reset other stacks
305
+ e->definition_stack->size = 0;
306
+ e->header_stack->size = 0;
307
+ e->table_stack->size = 0;
308
+ }
309
+
310
+
311
+ /// Free an existing MMD Engine
312
+ void mmd_engine_free(mmd_engine * e, bool freeDString) {
313
+ if (e == NULL) {
314
+ return;
315
+ }
316
+
317
+ mmd_engine_reset(e);
318
+
319
+ if (freeDString) {
320
+ d_string_free(e->dstr, true);
321
+ }
322
+
323
+ token_pair_engine_free(e->pairings1);
324
+ token_pair_engine_free(e->pairings2);
325
+ token_pair_engine_free(e->pairings3);
326
+ token_pair_engine_free(e->pairings4);
327
+
328
+ // Pointers to blocks that are freed elsewhere
329
+ stack_free(e->definition_stack);
330
+ stack_free(e->header_stack);
331
+ stack_free(e->table_stack);
332
+
333
+ // Takedown
334
+ stack_free(e->abbreviation_stack);
335
+ stack_free(e->citation_stack);
336
+ stack_free(e->footnote_stack);
337
+ stack_free(e->glossary_stack);
338
+ stack_free(e->link_stack);
339
+ stack_free(e->metadata_stack);
340
+
341
+ free(e);
342
+ }
343
+
344
+
345
+ bool line_is_empty(token * t) {
346
+ while (t) {
347
+ switch (t->type) {
348
+ case NON_INDENT_SPACE:
349
+ case INDENT_TAB:
350
+ case INDENT_SPACE:
351
+ t = t->next;
352
+ break;
353
+
354
+ case TEXT_LINEBREAK:
355
+ case TEXT_NL:
356
+ return true;
357
+
358
+ default:
359
+ return false;
360
+ }
361
+ }
362
+
363
+ return true;
364
+ }
365
+
366
+
367
+ /// Determine what sort of line this is
368
+ void mmd_assign_line_type(mmd_engine * e, token * line) {
369
+ if (!line) {
370
+ return;
371
+ }
372
+
373
+ if (!line->child) {
374
+ line->type = LINE_EMPTY;
375
+ return;
376
+ }
377
+
378
+ const char * source = e->dstr->str;
379
+
380
+ token * t = NULL;
381
+ token * first_child = line->child;
382
+
383
+ short temp_short;
384
+ size_t scan_len;
385
+
386
+ // Skip non-indenting space
387
+ if (first_child->type == NON_INDENT_SPACE) {
388
+ //token_remove_first_child(line);
389
+ first_child = first_child->next;
390
+ } else if (first_child->type == TEXT_PLAIN && first_child->len == 1) {
391
+ if (source[first_child->start] == ' ') {
392
+ //token_remove_first_child(line);
393
+ first_child = first_child->next;
394
+ }
395
+ }
396
+
397
+ if (first_child == NULL) {
398
+ line->type = LINE_EMPTY;
399
+ return;
400
+ }
401
+
402
+ switch (first_child->type) {
403
+ case INDENT_TAB:
404
+ if (line_is_empty(first_child)) {
405
+ line->type = LINE_EMPTY;
406
+ e->allow_meta = false;
407
+ } else {
408
+ line->type = LINE_INDENTED_TAB;
409
+ }
410
+
411
+ break;
412
+
413
+ case INDENT_SPACE:
414
+ if (line_is_empty(first_child)) {
415
+ line->type = LINE_EMPTY;
416
+ e->allow_meta = false;
417
+ } else {
418
+ line->type = LINE_INDENTED_SPACE;
419
+ }
420
+
421
+ break;
422
+
423
+ case ANGLE_LEFT:
424
+ if (scan_html_block(&source[line->start])) {
425
+ line->type = LINE_HTML;
426
+ } else {
427
+ line->type = LINE_PLAIN;
428
+ }
429
+
430
+ break;
431
+
432
+ case ANGLE_RIGHT:
433
+ line->type = LINE_BLOCKQUOTE;
434
+ first_child->type = MARKER_BLOCKQUOTE;
435
+ break;
436
+
437
+ case BACKTICK:
438
+ if (e->extensions & EXT_COMPATIBILITY) {
439
+ line->type = LINE_PLAIN;
440
+ break;
441
+ }
442
+
443
+ scan_len = scan_fence_end(&source[first_child->start]);
444
+
445
+ if (scan_len) {
446
+ switch (first_child->len) {
447
+ case 3:
448
+ line->type = LINE_FENCE_BACKTICK_3;
449
+ break;
450
+
451
+ case 4:
452
+ line->type = LINE_FENCE_BACKTICK_4;
453
+ break;
454
+
455
+ default:
456
+ line->type = LINE_FENCE_BACKTICK_5;
457
+ break;
458
+ }
459
+
460
+ break;
461
+ } else {
462
+ scan_len = scan_fence_start(&source[first_child->start]);
463
+
464
+ if (scan_len) {
465
+ switch (first_child->len) {
466
+ case 3:
467
+ line->type = LINE_FENCE_BACKTICK_START_3;
468
+ break;
469
+
470
+ case 4:
471
+ line->type = LINE_FENCE_BACKTICK_START_4;
472
+ break;
473
+
474
+ default:
475
+ line->type = LINE_FENCE_BACKTICK_START_5;
476
+ break;
477
+ }
478
+
479
+ break;
480
+ }
481
+ }
482
+
483
+ line->type = LINE_PLAIN;
484
+ break;
485
+
486
+ case COLON:
487
+ line->type = LINE_PLAIN;
488
+
489
+ if (e->extensions & EXT_COMPATIBILITY) {
490
+ break;
491
+ }
492
+
493
+ if (scan_definition(&source[first_child->start])) {
494
+ line->type = LINE_DEFINITION;
495
+ }
496
+
497
+ break;
498
+
499
+ case HASH1:
500
+ case HASH2:
501
+ case HASH3:
502
+ case HASH4:
503
+ case HASH5:
504
+ case HASH6:
505
+ if (scan_atx(&source[first_child->start])) {
506
+ line->type = (first_child->type - HASH1) + LINE_ATX_1;
507
+ first_child->type = (line->type - LINE_ATX_1) + MARKER_H1;
508
+
509
+ // Strip trailing whitespace from '#' sequence
510
+ first_child->len = first_child->type - MARKER_H1 + 1;
511
+
512
+ // Strip trailing '#' sequence if present
513
+ if (line->child->tail->type == TEXT_NL) {
514
+ if ((line->child->tail->prev->type >= HASH1) &&
515
+ (line->child->tail->prev->type <= HASH6)) {
516
+ line->child->tail->prev->type -= HASH1;
517
+ line->child->tail->prev->type += MARKER_H1;
518
+ }
519
+ } else {
520
+ if ((line->child->tail->type >= HASH1) &&
521
+ (line->child->tail->type <= HASH6)) {
522
+ line->child->tail->type -= TEXT_EMPTY;
523
+ line->child->tail->type += MARKER_H1;
524
+ }
525
+ }
526
+ } else {
527
+ line->type = LINE_PLAIN;
528
+ }
529
+
530
+ break;
531
+
532
+ case HTML_COMMENT_START:
533
+ if (!first_child->next || !first_child->next->next) {
534
+ line->type = LINE_START_COMMENT;
535
+ } else {
536
+ line->type = LINE_PLAIN;
537
+ }
538
+
539
+ break;
540
+
541
+ case HTML_COMMENT_STOP:
542
+ if (!first_child->next || !first_child->next->next) {
543
+ line->type = LINE_STOP_COMMENT;
544
+ } else {
545
+ line->type = LINE_PLAIN;
546
+ }
547
+
548
+ break;
549
+
550
+ case TEXT_NUMBER_POSS_LIST:
551
+ if (first_child->next) {
552
+ switch (source[first_child->next->start]) {
553
+ case ' ':
554
+ case '\t':
555
+ line->type = LINE_LIST_ENUMERATED;
556
+ first_child->type = MARKER_LIST_ENUMERATOR;
557
+
558
+ switch (first_child->next->type) {
559
+ case TEXT_PLAIN:
560
+
561
+ // Strip whitespace between bullet and text
562
+ while (char_is_whitespace(source[first_child->next->start])) {
563
+ first_child->next->start++;
564
+ first_child->next->len--;
565
+ }
566
+
567
+ break;
568
+
569
+ case INDENT_SPACE:
570
+ case INDENT_TAB:
571
+ case NON_INDENT_SPACE:
572
+ t = first_child;
573
+
574
+ while (t->next && ((t->next->type == INDENT_SPACE) ||
575
+ (t->next->type == INDENT_TAB) ||
576
+ (t->next->type == NON_INDENT_SPACE))) {
577
+ tokens_prune(t->next, t->next);
578
+ }
579
+
580
+ break;
581
+ }
582
+
583
+ break;
584
+
585
+ default:
586
+ line->type = LINE_PLAIN;
587
+ first_child->type = TEXT_PLAIN;
588
+ break;
589
+ }
590
+ } else {
591
+ line->type = LINE_LIST_ENUMERATED;
592
+ first_child->type = MARKER_LIST_ENUMERATOR;
593
+ }
594
+
595
+ break;
596
+
597
+ case EQUAL:
598
+
599
+ // Could this be a setext heading marker?
600
+ if (scan_setext(&source[first_child->start])) {
601
+ line->type = LINE_SETEXT_1;
602
+ } else {
603
+ line->type = LINE_PLAIN;
604
+ }
605
+
606
+ break;
607
+
608
+ case DASH_N:
609
+ case DASH_M:
610
+ if (scan_setext(&source[first_child->start])) {
611
+ line->type = LINE_SETEXT_2;
612
+ break;
613
+ }
614
+
615
+ case STAR:
616
+ case UL:
617
+ // Could this be a horizontal rule?
618
+ t = first_child->next;
619
+ temp_short = first_child->len;
620
+
621
+ while (t) {
622
+ switch (t->type) {
623
+ case DASH_N:
624
+ case DASH_M:
625
+ if (t->type == first_child->type) {
626
+ t = t->next;
627
+
628
+ if (t) {
629
+ temp_short += t->len;
630
+ }
631
+ } else {
632
+ temp_short = 0;
633
+ t = NULL;
634
+ }
635
+
636
+ break;
637
+
638
+ case STAR:
639
+ case UL:
640
+ if (t->type == first_child->type) {
641
+ t = t->next;
642
+ temp_short++;
643
+ } else {
644
+ temp_short = 0;
645
+ t = NULL;
646
+ }
647
+
648
+ break;
649
+
650
+ case NON_INDENT_SPACE:
651
+ case INDENT_TAB:
652
+ case INDENT_SPACE:
653
+ t = t->next;
654
+ break;
655
+
656
+ case TEXT_PLAIN:
657
+ if ((t->len == 1) && (source[t->start] == ' ')) {
658
+ t = t->next;
659
+ break;
660
+ }
661
+
662
+ temp_short = 0;
663
+ t = NULL;
664
+ break;
665
+
666
+ case TEXT_NL:
667
+ case TEXT_LINEBREAK:
668
+ t = NULL;
669
+ break;
670
+
671
+ default:
672
+ temp_short = 0;
673
+ t = NULL;
674
+ break;
675
+ }
676
+ }
677
+
678
+ if (temp_short > 2) {
679
+ // This is a horizontal rule, not a list item
680
+ line->type = LINE_HR;
681
+ break;
682
+ }
683
+
684
+ if (first_child->type == UL) {
685
+ // Revert to plain for this type
686
+ line->type = LINE_PLAIN;
687
+ break;
688
+ }
689
+
690
+ // If longer than 1 character, then it can't be a list marker, so it's a
691
+ // plain line
692
+ if (first_child->len > 1) {
693
+ line->type = LINE_PLAIN;
694
+ break;
695
+ }
696
+
697
+ case PLUS:
698
+ if (!first_child->next) {
699
+ // TODO: Should this be an empty list item instead??
700
+ line->type = LINE_PLAIN;
701
+ } else {
702
+ switch (source[first_child->next->start]) {
703
+ case ' ':
704
+ case '\t':
705
+ line->type = LINE_LIST_BULLETED;
706
+ first_child->type = MARKER_LIST_BULLET;
707
+
708
+ switch (first_child->next->type) {
709
+ case TEXT_PLAIN:
710
+
711
+ // Strip whitespace between bullet and text
712
+ while (char_is_whitespace(source[first_child->next->start])) {
713
+ first_child->next->start++;
714
+ first_child->next->len--;
715
+ }
716
+
717
+ break;
718
+
719
+ case INDENT_SPACE:
720
+ case INDENT_TAB:
721
+ case NON_INDENT_SPACE:
722
+ t = first_child;
723
+
724
+ while (t->next && ((t->next->type == INDENT_SPACE) ||
725
+ (t->next->type == INDENT_TAB) ||
726
+ (t->next->type == NON_INDENT_SPACE))) {
727
+ tokens_prune(t->next, t->next);
728
+ }
729
+
730
+ break;
731
+ }
732
+
733
+ break;
734
+
735
+ default:
736
+ line->type = LINE_PLAIN;
737
+ break;
738
+ }
739
+ }
740
+
741
+ break;
742
+
743
+ case TEXT_LINEBREAK:
744
+ case TEXT_NL:
745
+ e->allow_meta = false;
746
+ line->type = LINE_EMPTY;
747
+ break;
748
+
749
+ case TOC:
750
+ line->type = (e->extensions & EXT_COMPATIBILITY) ? LINE_PLAIN : LINE_TOC;
751
+ break;
752
+
753
+ case BRACKET_LEFT:
754
+ if (e->extensions & EXT_COMPATIBILITY) {
755
+ scan_len = scan_ref_link_no_attributes(&source[line->start]);
756
+ line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
757
+ } else {
758
+ scan_len = scan_ref_link(&source[line->start]);
759
+ line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
760
+ }
761
+
762
+ break;
763
+
764
+ case BRACKET_ABBREVIATION_LEFT:
765
+ if (e->extensions & EXT_NOTES) {
766
+ scan_len = scan_ref_abbreviation(&source[line->start]);
767
+ line->type = (scan_len) ? LINE_DEF_ABBREVIATION : LINE_PLAIN;
768
+ } else {
769
+ scan_len = scan_ref_link_no_attributes(&source[line->start]);
770
+ line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
771
+ }
772
+
773
+ break;
774
+
775
+ case BRACKET_CITATION_LEFT:
776
+ if (e->extensions & EXT_NOTES) {
777
+ scan_len = scan_ref_citation(&source[line->start]);
778
+ line->type = (scan_len) ? LINE_DEF_CITATION : LINE_PLAIN;
779
+ } else {
780
+ scan_len = scan_ref_link_no_attributes(&source[line->start]);
781
+ line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
782
+ }
783
+
784
+ break;
785
+
786
+ case BRACKET_FOOTNOTE_LEFT:
787
+ if (e->extensions & EXT_NOTES) {
788
+ scan_len = scan_ref_foot(&source[line->start]);
789
+ line->type = (scan_len) ? LINE_DEF_FOOTNOTE : LINE_PLAIN;
790
+ } else {
791
+ scan_len = scan_ref_link_no_attributes(&source[line->start]);
792
+ line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
793
+ }
794
+
795
+ break;
796
+
797
+ case BRACKET_GLOSSARY_LEFT:
798
+ if (e->extensions & EXT_NOTES) {
799
+ scan_len = scan_ref_glossary(&source[line->start]);
800
+ line->type = (scan_len) ? LINE_DEF_GLOSSARY : LINE_PLAIN;
801
+ } else {
802
+ scan_len = scan_ref_link_no_attributes(&source[line->start]);
803
+ line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN;
804
+ }
805
+
806
+ break;
807
+
808
+ case PIPE:
809
+
810
+ // If PIPE is first, save checking later and assign LINE_TABLE now
811
+ if (!(e->extensions & EXT_COMPATIBILITY)) {
812
+ scan_len = scan_table_separator(&source[line->start]);
813
+ line->type = (scan_len) ? LINE_TABLE_SEPARATOR : LINE_TABLE;
814
+
815
+ break;
816
+ }
817
+
818
+ case TEXT_PLAIN:
819
+ if (e->allow_meta && !(e->extensions & EXT_COMPATIBILITY)) {
820
+ scan_len = scan_url(&source[line->start]);
821
+
822
+ if (scan_len == 0) {
823
+ scan_len = scan_meta_line(&source[line->start]);
824
+ line->type = (scan_len) ? LINE_META : LINE_PLAIN;
825
+ break;
826
+ }
827
+ }
828
+
829
+ default:
830
+ line->type = LINE_PLAIN;
831
+ break;
832
+ }
833
+
834
+ if ((line->type == LINE_PLAIN) &&
835
+ !(e->extensions & EXT_COMPATIBILITY)) {
836
+ // Check if this is a potential table line
837
+ token * walker = first_child;
838
+
839
+ while (walker != NULL) {
840
+ if (walker->type == PIPE) {
841
+ scan_len = scan_table_separator(&source[line->start]);
842
+ line->type = (scan_len) ? LINE_TABLE_SEPARATOR : LINE_TABLE;
843
+
844
+ return;
845
+ }
846
+
847
+ walker = walker->next;
848
+ }
849
+ }
850
+ }
851
+
852
+
853
+ /// Strip leading indenting space from line (if present)
854
+ void deindent_line(token * line) {
855
+ if (!line || !line->child) {
856
+ return;
857
+ }
858
+
859
+ token * t;
860
+
861
+ switch (line->child->type) {
862
+ case INDENT_TAB:
863
+ case INDENT_SPACE:
864
+ t = line->child;
865
+ line->child = t->next;
866
+ t->next = NULL;
867
+
868
+ if (line->child) {
869
+ line->child->prev = NULL;
870
+ line->child->tail = t->tail;
871
+ }
872
+
873
+ token_free(t);
874
+ break;
875
+ }
876
+ }
877
+
878
+
879
+ /// Strip leading indenting space from block
880
+ /// (for recursively parsing nested lists)
881
+ void deindent_block(mmd_engine * e, token * block) {
882
+ if (!block || !block->child) {
883
+ return;
884
+ }
885
+
886
+ token * t = block->child;
887
+
888
+ while (t != NULL) {
889
+ deindent_line(t);
890
+ mmd_assign_line_type(e, t);
891
+
892
+ t = t->next;
893
+ }
894
+ }
895
+
896
+
897
+ /// Strip leading blockquote marker from line
898
+ void strip_quote_markers_from_line(token * line, const char * source) {
899
+ if (!line || !line->child) {
900
+ return;
901
+ }
902
+
903
+ token * t;
904
+
905
+ switch (line->child->type) {
906
+ case MARKER_BLOCKQUOTE:
907
+ case NON_INDENT_SPACE:
908
+ t = line->child;
909
+ line->child = t->next;
910
+ t->next = NULL;
911
+
912
+ if (line->child) {
913
+ line->child->prev = NULL;
914
+ line->child->tail = t->tail;
915
+ }
916
+
917
+ token_free(t);
918
+ break;
919
+ }
920
+
921
+ if (line->child && (line->child->type == TEXT_PLAIN)) {
922
+ // Strip leading whitespace from first text token
923
+ t = line->child;
924
+
925
+ while (t->len && char_is_whitespace(source[t->start])) {
926
+ t->start++;
927
+ t->len--;
928
+ }
929
+
930
+ if (t->len == 0) {
931
+ line->child = t->next;
932
+ t->next = NULL;
933
+
934
+ if (line->child) {
935
+ line->child->prev = NULL;
936
+ line->child->tail = t->tail;
937
+ }
938
+
939
+ token_free(t);
940
+ }
941
+ }
942
+ }
943
+
944
+
945
+ /// Strip leading blockquote markers and non-indent space
946
+ /// (for recursively parsing blockquotes)
947
+ void strip_quote_markers_from_block(mmd_engine * e, token * block) {
948
+ if (!block || !block->child) {
949
+ return;
950
+ }
951
+
952
+ token * t = block->child;
953
+
954
+ while (t != NULL) {
955
+ strip_quote_markers_from_line(t, e->dstr->str);
956
+ mmd_assign_line_type(e, t);
957
+
958
+ t = t->next;
959
+ }
960
+ }
961
+
962
+
963
+ /// Create a token chain from source string
964
+ /// stop_on_empty_line allows us to stop parsing part of the way through
965
+ token * mmd_tokenize_string(mmd_engine * e, size_t start, size_t len, bool stop_on_empty_line) {
966
+ // Reset metadata flag
967
+ e->allow_meta = (e->extensions & EXT_COMPATIBILITY) ? false : true;
968
+
969
+
970
+ // Create a scanner (for re2c)
971
+ Scanner s;
972
+ s.start = &e->dstr->str[start];
973
+ s.cur = s.start;
974
+
975
+ // Strip trailing whitespace
976
+ // while (len && char_is_whitespace_or_line_ending(str[len - 1]))
977
+ // len--;
978
+
979
+ // Where do we stop parsing?
980
+ const char * stop = &e->dstr->str[start] + len;
981
+
982
+ int type; // TOKEN type
983
+ token * t; // Create tokens for incorporation
984
+
985
+ token * root = token_new(0, start, 0); // Store the final parse tree here
986
+ token * line = token_new(0, start, 0); // Store current line here
987
+
988
+ const char * last_stop = &e->dstr->str[start]; // Remember where last token ended
989
+
990
+ do {
991
+ // Scan for next token (type of 0 means there is nothing left);
992
+ type = scan(&s, stop);
993
+
994
+ //if (type && s.start != last_stop) {
995
+ if (s.start != last_stop) {
996
+ // We skipped characters between tokens
997
+
998
+ if (type) {
999
+ // Create a default token type for the skipped characters
1000
+ t = token_new(TEXT_PLAIN, (size_t)(last_stop - e->dstr->str), (size_t)(s.start - last_stop));
1001
+
1002
+ token_append_child(line, t);
1003
+ } else {
1004
+ if (stop > last_stop) {
1005
+ // Source text ends without newline
1006
+ t = token_new(TEXT_PLAIN, (size_t)(last_stop - e->dstr->str), (size_t)(stop - last_stop));
1007
+
1008
+ token_append_child(line, t);
1009
+ }
1010
+ }
1011
+ } else if (type == 0 && stop > last_stop) {
1012
+ // Source text ends without newline
1013
+ t = token_new(TEXT_PLAIN, (size_t)(last_stop - e->dstr->str), (size_t)(stop - last_stop));
1014
+ token_append_child(line, t);
1015
+ }
1016
+
1017
+
1018
+ switch (type) {
1019
+ case 0:
1020
+ // 0 means we finished with input
1021
+ // Add current line to root
1022
+
1023
+ // What sort of line is this?
1024
+ mmd_assign_line_type(e, line);
1025
+
1026
+ token_append_child(root, line);
1027
+ break;
1028
+
1029
+ case TEXT_NL_SP:
1030
+ case TEXT_LINEBREAK_SP:
1031
+ case TEXT_LINEBREAK:
1032
+ case TEXT_NL:
1033
+
1034
+ // We hit the end of a line
1035
+ switch (type) {
1036
+ case TEXT_NL_SP:
1037
+ t = token_new(TEXT_NL, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start) - 1);
1038
+ break;
1039
+
1040
+ case TEXT_LINEBREAK_SP:
1041
+ t = token_new(TEXT_LINEBREAK, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start) - 1);
1042
+ break;
1043
+
1044
+ default:
1045
+ t = token_new(type, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start));
1046
+ break;
1047
+ }
1048
+
1049
+ token_append_child(line, t);
1050
+
1051
+ // What sort of line is this?
1052
+ mmd_assign_line_type(e, line);
1053
+
1054
+ token_append_child(root, line);
1055
+
1056
+ // If this is first line, do we have proper metadata?
1057
+ if (e->allow_meta && root->child == line) {
1058
+ if (line->type == LINE_SETEXT_2) {
1059
+ line->type = LINE_YAML;
1060
+ } else if (line->type != LINE_META) {
1061
+ e->allow_meta = false;
1062
+ }
1063
+ }
1064
+
1065
+ if (stop_on_empty_line) {
1066
+ if (line->type == LINE_EMPTY) {
1067
+ return root;
1068
+ }
1069
+ }
1070
+
1071
+ switch (type) {
1072
+ case TEXT_NL_SP:
1073
+ line = token_new(0, s.cur - e->dstr->str - 1, 0);
1074
+ t = token_new(NON_INDENT_SPACE, (size_t)(s.cur - e->dstr->str - 1), 1);
1075
+ token_append_child(line, t);
1076
+ break;
1077
+
1078
+ case TEXT_LINEBREAK_SP:
1079
+ line = token_new(0, s.cur - e->dstr->str - 1, 0);
1080
+ t = token_new(NON_INDENT_SPACE, (size_t)(s.cur - e->dstr->str - 1), 1);
1081
+ token_append_child(line, t);
1082
+ break;
1083
+
1084
+ default:
1085
+ line = token_new(0, s.cur - e->dstr->str, 0);
1086
+ break;
1087
+ }
1088
+
1089
+ break;
1090
+
1091
+ default:
1092
+ t = token_new(type, (size_t)(s.start - e->dstr->str), (size_t)(s.cur - s.start));
1093
+ token_append_child(line, t);
1094
+ break;
1095
+ }
1096
+
1097
+ // Remember where token ends to detect skipped characters
1098
+ last_stop = s.cur;
1099
+ } while (type != 0);
1100
+
1101
+
1102
+ return root;
1103
+ }
1104
+
1105
+
1106
+ /// Parse token tree
1107
+ void mmd_parse_token_chain(mmd_engine * e, token * chain) {
1108
+
1109
+ if (e->recurse_depth == kMaxParseRecursiveDepth) {
1110
+ return;
1111
+ }
1112
+
1113
+ e->recurse_depth++;
1114
+
1115
+ void* pParser = ParseAlloc (malloc); // Create a parser (for lemon)
1116
+ token * walker = chain->child; // Walk the existing tree
1117
+ token * remainder; // Hold unparsed tail of chain
1118
+
1119
+ #ifndef NDEBUG
1120
+ ParseTrace(stderr, "parser >>");
1121
+ #endif
1122
+
1123
+ // Remove existing token tree
1124
+ e->root = NULL;
1125
+
1126
+ while (walker != NULL) {
1127
+ remainder = walker->next;
1128
+
1129
+ // Snip token from remainder
1130
+ walker->next = NULL;
1131
+ walker->tail = walker;
1132
+
1133
+ if (remainder) {
1134
+ remainder->prev = NULL;
1135
+ }
1136
+
1137
+ #ifndef NDEBUG
1138
+ fprintf(stderr, "\nNew line\n");
1139
+ #endif
1140
+
1141
+ Parse(pParser, walker->type, walker, e);
1142
+
1143
+ walker = remainder;
1144
+ }
1145
+
1146
+ // Signal finish to parser
1147
+ #ifndef NDEBUG
1148
+ fprintf(stderr, "\nFinish parse\n");
1149
+ #endif
1150
+ Parse(pParser, 0, NULL, e);
1151
+
1152
+ // Disconnect of (now empty) root
1153
+ chain->child = NULL;
1154
+ token_append_child(chain, e->root);
1155
+ e->root = NULL;
1156
+
1157
+ ParseFree(pParser, free);
1158
+
1159
+ e->recurse_depth--;
1160
+ }
1161
+
1162
+
1163
+ void mmd_pair_tokens_in_chain(token * head, token_pair_engine * e, stack * s) {
1164
+
1165
+ while (head != NULL) {
1166
+ mmd_pair_tokens_in_block(head, e, s);
1167
+
1168
+ head = head->next;
1169
+ }
1170
+ }
1171
+
1172
+
1173
+ /// Match token pairs inside block
1174
+ void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s) {
1175
+ if (block == NULL || e == NULL) {
1176
+ return;
1177
+ }
1178
+
1179
+ switch (block->type) {
1180
+ case BLOCK_BLOCKQUOTE:
1181
+ case BLOCK_DEFLIST:
1182
+ case BLOCK_DEFINITION:
1183
+ case BLOCK_DEF_ABBREVIATION:
1184
+ case BLOCK_DEF_CITATION:
1185
+ case BLOCK_DEF_FOOTNOTE:
1186
+ case BLOCK_DEF_GLOSSARY:
1187
+ case BLOCK_DEF_LINK:
1188
+ case BLOCK_H1:
1189
+ case BLOCK_H2:
1190
+ case BLOCK_H3:
1191
+ case BLOCK_H4:
1192
+ case BLOCK_H5:
1193
+ case BLOCK_H6:
1194
+ case BLOCK_PARA:
1195
+ case BLOCK_SETEXT_1:
1196
+ case BLOCK_SETEXT_2:
1197
+ case BLOCK_TERM:
1198
+ token_pairs_match_pairs_inside_token(block, e, s, 0);
1199
+ break;
1200
+
1201
+ case DOC_START_TOKEN:
1202
+ case BLOCK_LIST_BULLETED:
1203
+ case BLOCK_LIST_BULLETED_LOOSE:
1204
+ case BLOCK_LIST_ENUMERATED:
1205
+ case BLOCK_LIST_ENUMERATED_LOOSE:
1206
+ mmd_pair_tokens_in_chain(block->child, e, s);
1207
+ break;
1208
+
1209
+ case BLOCK_LIST_ITEM:
1210
+ case BLOCK_LIST_ITEM_TIGHT:
1211
+ token_pairs_match_pairs_inside_token(block, e, s, 0);
1212
+ mmd_pair_tokens_in_chain(block->child, e, s);
1213
+ break;
1214
+
1215
+ case LINE_TABLE:
1216
+ case BLOCK_TABLE:
1217
+ // TODO: Need to parse into cells first
1218
+ token_pairs_match_pairs_inside_token(block, e, s, 0);
1219
+ mmd_pair_tokens_in_chain(block->child, e, s);
1220
+ break;
1221
+
1222
+ case BLOCK_EMPTY:
1223
+ case BLOCK_CODE_INDENTED:
1224
+ case BLOCK_CODE_FENCED:
1225
+ default:
1226
+ // Nothing to do here
1227
+ return;
1228
+ }
1229
+ }
1230
+
1231
+
1232
+ /// Ambidextrous tokens can open OR close a pair. This routine gives the opportunity
1233
+ /// to change this behavior on case-by-case basis. For example, in `foo **bar** foo`, the
1234
+ /// first set of asterisks can open, but not close a pair. The second set can close, but not
1235
+ /// open a pair. This allows for complex behavior without having to bog down the tokenizer
1236
+ /// with figuring out which type of asterisk we have. Default behavior is that open and close
1237
+ /// are enabled, so we just have to figure out when to turn it off.
1238
+ void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, size_t start_offset) {
1239
+ if (block == NULL || block->child == NULL) {
1240
+ return;
1241
+ }
1242
+
1243
+ size_t offset; // Temp variable for use below
1244
+ size_t lead_count, lag_count, pre_count, post_count;
1245
+
1246
+ token * t = block->child;
1247
+
1248
+ char * str = e->dstr->str;
1249
+
1250
+ while (t != NULL) {
1251
+ switch (t->type) {
1252
+ case BLOCK_META:
1253
+
1254
+ // Do we treat this like metadata?
1255
+ if (!(e->extensions & EXT_COMPATIBILITY) &&
1256
+ !(e->extensions & EXT_NO_METADATA)) {
1257
+ break;
1258
+ }
1259
+
1260
+ // This is not metadata
1261
+ t->type = BLOCK_PARA;
1262
+
1263
+ case DOC_START_TOKEN:
1264
+ case BLOCK_BLOCKQUOTE:
1265
+ case BLOCK_DEF_ABBREVIATION:
1266
+ case BLOCK_DEFLIST:
1267
+ case BLOCK_DEFINITION:
1268
+ case BLOCK_H1:
1269
+ case BLOCK_H2:
1270
+ case BLOCK_H3:
1271
+ case BLOCK_H4:
1272
+ case BLOCK_H5:
1273
+ case BLOCK_H6:
1274
+ case BLOCK_LIST_BULLETED:
1275
+ case BLOCK_LIST_BULLETED_LOOSE:
1276
+ case BLOCK_LIST_ENUMERATED:
1277
+ case BLOCK_LIST_ENUMERATED_LOOSE:
1278
+ case BLOCK_LIST_ITEM:
1279
+ case BLOCK_LIST_ITEM_TIGHT:
1280
+ case BLOCK_PARA:
1281
+ case BLOCK_SETEXT_1:
1282
+ case BLOCK_SETEXT_2:
1283
+ case BLOCK_TABLE:
1284
+ case BLOCK_TERM:
1285
+ case LINE_LIST_BULLETED:
1286
+ case LINE_LIST_ENUMERATED:
1287
+ // Assign child tokens of blocks
1288
+ mmd_assign_ambidextrous_tokens_in_block(e, t, start_offset);
1289
+ break;
1290
+
1291
+ case CRITIC_SUB_DIV:
1292
+ // Divide this into two tokens
1293
+ t->child = token_new(CRITIC_SUB_DIV_B, t->start + 1, 1);
1294
+ t->child->next = t->next;
1295
+ t->next = t->child;
1296
+ t->child = NULL;
1297
+ t->len = 1;
1298
+ t->type = CRITIC_SUB_DIV_A;
1299
+ break;
1300
+
1301
+ case STAR:
1302
+ // Look left and skip over neighboring '*' characters
1303
+ offset = t->start;
1304
+
1305
+ while ((offset != 0) && ((str[offset] == '*') || (str[offset] == '_'))) {
1306
+ offset--;
1307
+ }
1308
+
1309
+ // We can only close if there is something to left besides whitespace
1310
+ if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset]))) {
1311
+ // Whitespace or punctuation to left, so can't close
1312
+ t->can_close = 0;
1313
+ }
1314
+
1315
+ // Look right and skip over neighboring '*' characters
1316
+ offset = t->start + 1;
1317
+
1318
+ while ((str[offset] == '*') || (str[offset] == '_')) {
1319
+ offset++;
1320
+ }
1321
+
1322
+ // We can only open if there is something to right besides whitespace/punctuation
1323
+ if (char_is_whitespace_or_line_ending(str[offset])) {
1324
+ // Whitespace to right, so can't open
1325
+ t->can_open = 0;
1326
+ }
1327
+
1328
+ // If we're in the middle of a word, then we need to be more precise
1329
+ if (t->can_open && t->can_close) {
1330
+ lead_count = 0; //!< '*' in run before current
1331
+ lag_count = 0; //!< '*' in run after current
1332
+ pre_count = 0; //!< '*' before word
1333
+ post_count = 0; //!< '*' after word
1334
+
1335
+ offset = t->start - 1;
1336
+
1337
+ // How many '*' in this run before current token?
1338
+ while (offset && (str[offset] == '*')) {
1339
+ lead_count++;
1340
+ offset--;
1341
+ }
1342
+
1343
+ // Skip over letters/numbers
1344
+ // TODO: Need to fix this to actually get run at beginning of word, not in middle,
1345
+ // e.g. **foo*bar*foo*bar**
1346
+ while (offset && (!char_is_whitespace_or_line_ending_or_punctuation(str[offset]))) {
1347
+ offset--;
1348
+ }
1349
+
1350
+ // Are there '*' at the beginning of this word?
1351
+ while ((offset != -1) && (str[offset] == '*')) {
1352
+ pre_count++;
1353
+ offset--;
1354
+ }
1355
+
1356
+ offset = t->start + 1;
1357
+
1358
+ // How many '*' in this run after current token?
1359
+ while (str[offset] == '*') {
1360
+ lag_count++;
1361
+ offset++;
1362
+ }
1363
+
1364
+ // Skip over letters/numbers
1365
+ // TODO: Same as above
1366
+ while (!char_is_whitespace_or_line_ending_or_punctuation(str[offset])) {
1367
+ offset++;
1368
+ }
1369
+
1370
+ // Are there '*' at the end of this word?
1371
+ while (offset && (str[offset] == '*')) {
1372
+ post_count++;
1373
+ offset++;
1374
+ }
1375
+
1376
+ // Are there '*' before/after word?
1377
+ if (pre_count + post_count > 0) {
1378
+ if (pre_count + post_count == lead_count + lag_count + 1) {
1379
+ // Same number outside as in the current run
1380
+ // **foo****bar**
1381
+ if (pre_count == post_count) {
1382
+ // **foo****bar**
1383
+ // We want to wrap the word, since
1384
+ // <strong>foo</strong><strong>bar</strong> doesn't make sense
1385
+ t->can_open = 0;
1386
+ t->can_close = 0;
1387
+ } else if (pre_count == 0) {
1388
+ // foo**bar**
1389
+ // Open only so we don't close outside the word
1390
+ t->can_close = 0;
1391
+ } else if (post_count == 0) {
1392
+ // **foo**bar
1393
+ // Close only so we don't close outside the word
1394
+ t->can_open = 0;
1395
+ }
1396
+ } else if (pre_count == lead_count + lag_count + 1 + post_count) {
1397
+ // ***foo**bar*
1398
+ // We want to close what's open
1399
+ t->can_open = 0;
1400
+ } else if (post_count == pre_count + lead_count + lag_count + 1) {
1401
+ // *foo**bar***
1402
+ // We want to open a set to close at the end
1403
+ t->can_close = 0;
1404
+ } else {
1405
+ if (pre_count != lead_count + lag_count + 1) {
1406
+ // **foo**bar -> close, otherwise don't
1407
+ t->can_close = 0;
1408
+ }
1409
+
1410
+ if (post_count != lead_count + lag_count + 1) {
1411
+ // foo**bar** -> open, otherwise don't
1412
+ t->can_open = 0;
1413
+ }
1414
+ }
1415
+ }
1416
+ }
1417
+
1418
+ break;
1419
+
1420
+ case UL:
1421
+ // Look left and skip over neighboring '_' characters
1422
+ offset = t->start;
1423
+
1424
+ while ((offset != 0) && ((str[offset] == '_') || (str[offset] == '*'))) {
1425
+ offset--;
1426
+ }
1427
+
1428
+ if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset]))) {
1429
+ // Whitespace to left, so can't close
1430
+ t->can_close = 0;
1431
+ }
1432
+
1433
+ // We don't allow intraword underscores (e.g. `foo_bar_foo`)
1434
+ if ((offset > 0) && (char_is_alphanumeric(str[offset]))) {
1435
+ // Letters to left, so can't open
1436
+ t->can_open = 0;
1437
+ }
1438
+
1439
+ // Look right and skip over neighboring '_' characters
1440
+ offset = t->start + 1;
1441
+
1442
+ while ((str[offset] == '*') || (str[offset] == '_')) {
1443
+ offset++;
1444
+ }
1445
+
1446
+ if (char_is_whitespace_or_line_ending(str[offset])) {
1447
+ // Whitespace to right, so can't open
1448
+ t->can_open = 0;
1449
+ }
1450
+
1451
+ if (char_is_alphanumeric(str[offset])) {
1452
+ // Letters to right, so can't close
1453
+ t->can_close = 0;
1454
+ }
1455
+
1456
+ break;
1457
+
1458
+ case BACKTICK:
1459
+ // Backticks are used for code spans, but also for ``foo'' double quote syntax.
1460
+ // We care only about the quote syntax.
1461
+ offset = t->start;
1462
+
1463
+ // TODO: This does potentially prevent ``foo `` from closing due to space before closer?
1464
+ // Bug or feature??
1465
+ if (t->len != 2) {
1466
+ break;
1467
+ }
1468
+
1469
+ if ((offset == 0) || (str[offset] != '`' && char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1]))) {
1470
+ // Whitespace or punctuation to left, so can't close
1471
+ t->can_close = 0;
1472
+ }
1473
+
1474
+ break;
1475
+
1476
+ case QUOTE_SINGLE:
1477
+ // Some of these are actually APOSTROPHE's and should not be paired
1478
+ offset = t->start;
1479
+
1480
+ if (!((offset == 0) ||
1481
+ (char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1])) ||
1482
+ (char_is_whitespace_or_line_ending_or_punctuation(str[offset + 1])))) {
1483
+ t->type = APOSTROPHE;
1484
+ break;
1485
+ }
1486
+
1487
+ if (offset && (char_is_punctuation(str[offset - 1])) &&
1488
+ (char_is_alphanumeric(str[offset + 1]))) {
1489
+ // If possessive apostrophe, e.g. `x`'s
1490
+ if (str[offset + 1] == 's' || str[offset + 1] == 'S') {
1491
+ if (char_is_whitespace_or_line_ending_or_punctuation(str[offset + 2])) {
1492
+ t->type = APOSTROPHE;
1493
+ break;
1494
+ }
1495
+ }
1496
+ }
1497
+
1498
+ case QUOTE_DOUBLE:
1499
+ offset = t->start;
1500
+
1501
+ if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset - 1]))) {
1502
+ t->can_close = 0;
1503
+ }
1504
+
1505
+ if (char_is_whitespace_or_line_ending(str[offset + 1])) {
1506
+ t->can_open = 0;
1507
+ }
1508
+
1509
+ break;
1510
+
1511
+ case DASH_N:
1512
+ if (!(e->extensions & EXT_SMART)) {
1513
+ break;
1514
+ }
1515
+
1516
+ // We want `1-2` to trigger a DASH_N, but regular hyphen otherwise (`a-b`)
1517
+ // This doesn't apply to `--` or `---`
1518
+ offset = t->start;
1519
+
1520
+ if (t->len == 1) {
1521
+ // Check whether we have '1-2'
1522
+ if ((offset == 0) || (!char_is_digit(str[offset - 1])) ||
1523
+ (!char_is_digit(str[offset + 1]))) {
1524
+ t->type = TEXT_PLAIN;
1525
+ }
1526
+ }
1527
+
1528
+ break;
1529
+
1530
+ case MATH_DOLLAR_SINGLE:
1531
+ case MATH_DOLLAR_DOUBLE:
1532
+ if (e->extensions & EXT_COMPATIBILITY) {
1533
+ break;
1534
+ }
1535
+
1536
+ offset = t->start;
1537
+
1538
+ // Look left
1539
+ if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset - 1]))) {
1540
+ // Whitespace to left, so can't close
1541
+ t->can_close = 0;
1542
+ } else if ((offset != 0) && (!char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1]))) {
1543
+ // No whitespace or punctuation to left, can't open
1544
+ t->can_open = 0;
1545
+ }
1546
+
1547
+ // Look right
1548
+ offset = t->start + t->len;
1549
+
1550
+ if (char_is_whitespace_or_line_ending(str[offset])) {
1551
+ // Whitespace to right, so can't open
1552
+ t->can_open = 0;
1553
+ } else if (!char_is_whitespace_or_line_ending_or_punctuation(str[offset])) {
1554
+ // No whitespace or punctuation to right, can't close
1555
+ t->can_close = 0;
1556
+ }
1557
+
1558
+ break;
1559
+
1560
+ case SUPERSCRIPT:
1561
+ case SUBSCRIPT:
1562
+ if (e->extensions & EXT_COMPATIBILITY) {
1563
+ t->type = TEXT_PLAIN;
1564
+ break;
1565
+ }
1566
+
1567
+ offset = t->start;
1568
+
1569
+ // Look left -- no whitespace to left
1570
+ if ((offset == 0) || (char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1]))) {
1571
+ t->can_open = 0;
1572
+ }
1573
+
1574
+ if ((offset != 0) && (char_is_whitespace_or_line_ending(str[offset - 1]))) {
1575
+ t->can_close = 0;
1576
+ }
1577
+
1578
+ offset = t->start + t->len;
1579
+
1580
+ if (char_is_whitespace_or_line_ending_or_punctuation(str[offset])) {
1581
+ t->can_open = 0;
1582
+ }
1583
+
1584
+ // We need to be contiguous in order to match
1585
+ if (t->can_close) {
1586
+ offset = t->start;
1587
+ t->can_close = 0;
1588
+
1589
+ while ((offset > 0) && !(char_is_whitespace_or_line_ending(str[offset - 1]))) {
1590
+ if (str[offset - 1] == str[t->start]) {
1591
+ t->can_close = 1;
1592
+ break;
1593
+ }
1594
+
1595
+ offset--;
1596
+ }
1597
+ }
1598
+
1599
+ // We need to be contiguous in order to match
1600
+ if (t->can_open) {
1601
+ offset = t->start + t->len;
1602
+ t->can_open = 0;
1603
+
1604
+ while (!(char_is_whitespace_or_line_ending(str[offset]))) {
1605
+ if (str[offset] == str[t->start]) {
1606
+ t->can_open = 1;
1607
+ break;
1608
+ }
1609
+
1610
+ offset++;
1611
+ }
1612
+
1613
+ // Are we a standalone, e.g x^2
1614
+ if (!t->can_close && !t->can_open) {
1615
+ offset = t->start + t->len;
1616
+
1617
+ while (!char_is_whitespace_or_line_ending_or_punctuation(str[offset])) {
1618
+ offset++;
1619
+ }
1620
+
1621
+ t->len = offset - t->start;
1622
+ t->can_close = 0;
1623
+
1624
+ // Shift next token right and move those characters as child node
1625
+ // It's possible that one (or more?) tokens are entirely subsumed.
1626
+ while (t->next && t->next->start + t->next->len < offset) {
1627
+ tokens_prune(t->next, t->next);
1628
+ }
1629
+
1630
+ if ((t->next != NULL) && ((t->next->type == TEXT_PLAIN) || (t->next->type == TEXT_NUMBER_POSS_LIST))) {
1631
+ t->next->len = t->next->start + t->next->len - offset;
1632
+ t->next->start = offset;
1633
+ }
1634
+
1635
+ t->child = token_new(TEXT_PLAIN, t->start + 1, t->len - 1);
1636
+ }
1637
+ }
1638
+
1639
+ break;
1640
+ }
1641
+
1642
+ t = t->next;
1643
+ }
1644
+
1645
+ }
1646
+
1647
+
1648
+ /// Strong/emph parsing is done using single `*` and `_` characters, which are
1649
+ /// then combined in a separate routine here to determine when
1650
+ /// consecutive characters should be interpreted as STRONG instead of EMPH
1651
+ /// \todo: Perhaps combining this with the routine when they are paired
1652
+ /// would improve performance?
1653
+ void pair_emphasis_tokens(token * t) {
1654
+ token * closer;
1655
+
1656
+ while (t != NULL) {
1657
+ if (t->mate != NULL) {
1658
+ switch (t->type) {
1659
+ case STAR:
1660
+ case UL:
1661
+ closer = t->mate;
1662
+
1663
+ if (t->next &&
1664
+ (t->next->mate == closer->prev) &&
1665
+ (t->type == t->next->type) &&
1666
+ (t->next->mate != t) &&
1667
+ (t->start + t->len == t->next->start) &&
1668
+ (closer->start == closer->prev->start + closer->prev->len)) {
1669
+
1670
+ // We have a strong pair
1671
+ t->type = STRONG_START;
1672
+ t->len = 2;
1673
+ closer->type = STRONG_STOP;
1674
+ closer->len = 2;
1675
+ closer->start--;
1676
+
1677
+ tokens_prune(t->next, t->next);
1678
+ tokens_prune(closer->prev, closer->prev);
1679
+
1680
+ token_prune_graft(t, closer, PAIR_STRONG);
1681
+ } else {
1682
+ t->type = EMPH_START;
1683
+ closer->type = EMPH_STOP;
1684
+ token_prune_graft(t, closer, PAIR_EMPH);
1685
+ }
1686
+
1687
+ break;
1688
+
1689
+ default:
1690
+ break;
1691
+ }
1692
+
1693
+ }
1694
+
1695
+ if (t->child != NULL) {
1696
+ switch (t->type) {
1697
+ case PAIR_BACKTICK:
1698
+ case PAIR_MATH:
1699
+ break;
1700
+
1701
+ default:
1702
+ pair_emphasis_tokens(t->child);
1703
+ break;
1704
+ }
1705
+ }
1706
+
1707
+ t = t->next;
1708
+ }
1709
+ }
1710
+
1711
+
1712
+ void recursive_parse_list_item(mmd_engine * e, token * block) {
1713
+ token * marker = token_copy(block->child->child);
1714
+
1715
+ // Strip list marker from first line
1716
+ token_remove_first_child(block->child);
1717
+
1718
+ // Remove one indent level from all lines to allow recursive parsing
1719
+ deindent_block(e, block);
1720
+
1721
+ mmd_parse_token_chain(e, block);
1722
+
1723
+ // Insert marker back in place
1724
+ marker->next = block->child->child;
1725
+
1726
+ if (block->child->child) {
1727
+ block->child->child->prev = marker;
1728
+ }
1729
+
1730
+ block->child->child = marker;
1731
+ }
1732
+
1733
+
1734
+ void recursive_parse_indent(mmd_engine * e, token * block) {
1735
+ // Remove one indent level from all lines to allow recursive parsing
1736
+ deindent_block(e, block);
1737
+
1738
+ // First line is now plain text
1739
+ block->child->type = LINE_PLAIN;
1740
+
1741
+ // Strip tokens?
1742
+ switch (block->type) {
1743
+ case BLOCK_DEFINITION:
1744
+ // Strip leading ':' from definition
1745
+ token_remove_first_child(block->child);
1746
+ break;
1747
+ }
1748
+
1749
+ mmd_parse_token_chain(e, block);
1750
+ }
1751
+
1752
+
1753
+ void is_list_loose(token * list) {
1754
+ bool loose = false;
1755
+
1756
+ token * walker = list->child;
1757
+
1758
+ if (walker == NULL) {
1759
+ return;
1760
+ }
1761
+
1762
+ while (walker->next != NULL) {
1763
+ if (walker->type == BLOCK_LIST_ITEM) {
1764
+ if (walker->child->type == BLOCK_PARA) {
1765
+ loose = true;
1766
+ } else {
1767
+ walker->type = BLOCK_LIST_ITEM_TIGHT;
1768
+ }
1769
+ }
1770
+
1771
+ walker = walker->next;
1772
+ }
1773
+
1774
+ if (loose) {
1775
+ switch (list->type) {
1776
+ case BLOCK_LIST_BULLETED:
1777
+ list->type = BLOCK_LIST_BULLETED_LOOSE;
1778
+ break;
1779
+
1780
+ case BLOCK_LIST_ENUMERATED:
1781
+ list->type = BLOCK_LIST_ENUMERATED_LOOSE;
1782
+ break;
1783
+ }
1784
+ }
1785
+ }
1786
+
1787
+
1788
+ /// Is this actually an HTML block?
1789
+ void is_para_html(mmd_engine * e, token * block) {
1790
+ if ((block == NULL) ||
1791
+ (block->child == NULL) ||
1792
+ (block->child->type != LINE_PLAIN)) {
1793
+ return;
1794
+ }
1795
+
1796
+ token * t = block->child->child;
1797
+
1798
+ if (t->type == ANGLE_LEFT || t->type == HTML_COMMENT_START) {
1799
+ if (scan_html_block(&(e->dstr->str[t->start]))) {
1800
+ block->type = BLOCK_HTML;
1801
+ return;
1802
+ }
1803
+
1804
+ if (scan_html_line(&(e->dstr->str[t->start]))) {
1805
+ block->type = BLOCK_HTML;
1806
+ return;
1807
+ }
1808
+ }
1809
+ }
1810
+
1811
+
1812
+ void recursive_parse_blockquote(mmd_engine * e, token * block) {
1813
+ // Strip blockquote markers (if present)
1814
+ strip_quote_markers_from_block(e, block);
1815
+
1816
+ mmd_parse_token_chain(e, block);
1817
+ }
1818
+
1819
+
1820
+ void metadata_stack_describe(mmd_engine * e) {
1821
+ meta * m;
1822
+
1823
+ for (int i = 0; i < e->metadata_stack->size; ++i) {
1824
+ m = stack_peek_index(e->metadata_stack, i);
1825
+ fprintf(stderr, "'%s': '%s'\n", m->key, m->value);
1826
+ }
1827
+ }
1828
+
1829
+
1830
+ void strip_line_tokens_from_metadata(mmd_engine * e, token * metadata) {
1831
+ token * l = metadata->child;
1832
+ char * source = e->dstr->str;
1833
+
1834
+ meta * m = NULL;
1835
+ size_t start, len;
1836
+
1837
+ DString * d = d_string_new("");
1838
+
1839
+ while (l) {
1840
+ switch (l->type) {
1841
+ case LINE_META:
1842
+ meta:
1843
+ if (m) {
1844
+ meta_set_value(m, d->str);
1845
+ d_string_erase(d, 0, -1);
1846
+ }
1847
+
1848
+ len = scan_meta_key(&source[l->start]);
1849
+ m = meta_new(source, l->start, len);
1850
+ start = l->start + len + 1;
1851
+ len = l->start + l->len - start - 1;
1852
+ d_string_append_c_array(d, &source[start], len);
1853
+ stack_push(e->metadata_stack, m);
1854
+ break;
1855
+
1856
+ case LINE_INDENTED_TAB:
1857
+ case LINE_INDENTED_SPACE:
1858
+ while (l->len && char_is_whitespace(source[l->start])) {
1859
+ l->start++;
1860
+ l->len--;
1861
+ }
1862
+
1863
+ case LINE_PLAIN:
1864
+ plain:
1865
+ d_string_append_c(d, '\n');
1866
+ d_string_append_c_array(d, &source[l->start], l->len);
1867
+ break;
1868
+
1869
+ case LINE_SETEXT_2:
1870
+ case LINE_YAML:
1871
+ break;
1872
+
1873
+ case LINE_TABLE:
1874
+ if (scan_meta_line(&source[l->start])) {
1875
+ goto meta;
1876
+ } else {
1877
+ goto plain;
1878
+ }
1879
+
1880
+ default:
1881
+ fprintf(stderr, "ERROR!\n");
1882
+ token_describe(l, NULL);
1883
+ break;
1884
+ }
1885
+
1886
+ l = l->next;
1887
+ }
1888
+
1889
+ // Finish last line
1890
+ if (m) {
1891
+ meta_set_value(m, d->str);
1892
+ }
1893
+
1894
+ d_string_free(d, true);
1895
+ }
1896
+
1897
+
1898
+ void strip_line_tokens_from_deflist(mmd_engine * e, token * deflist) {
1899
+ token * walker = deflist->child;
1900
+
1901
+ while (walker) {
1902
+ switch (walker->type) {
1903
+ case LINE_EMPTY:
1904
+ walker->type = TEXT_EMPTY;
1905
+ break;
1906
+
1907
+ case LINE_PLAIN:
1908
+ walker->type = BLOCK_TERM;
1909
+
1910
+ case BLOCK_TERM:
1911
+ break;
1912
+
1913
+ case BLOCK_DEFINITION:
1914
+ strip_line_tokens_from_block(e, walker);
1915
+ break;
1916
+ }
1917
+
1918
+ walker = walker->next;
1919
+ }
1920
+ }
1921
+
1922
+
1923
+ void strip_line_tokens_from_table(mmd_engine * e, token * table) {
1924
+ token * walker = table->child;
1925
+
1926
+ while (walker) {
1927
+ switch (walker->type) {
1928
+ case BLOCK_TABLE_SECTION:
1929
+ strip_line_tokens_from_block(e, walker);
1930
+ break;
1931
+
1932
+ case BLOCK_TABLE_HEADER:
1933
+ strip_line_tokens_from_block(e, walker);
1934
+ break;
1935
+
1936
+ case LINE_EMPTY:
1937
+ walker->type = TEXT_EMPTY;
1938
+ break;
1939
+ }
1940
+
1941
+ walker = walker->next;
1942
+ }
1943
+ }
1944
+
1945
+
1946
+ void parse_table_row_into_cells(token * row) {
1947
+ token * first = NULL;
1948
+ token * last = NULL;
1949
+
1950
+ token * walker = row->child;
1951
+
1952
+ if (walker) {
1953
+ if (walker->type == PIPE) {
1954
+ walker->type = TABLE_DIVIDER;
1955
+ first = walker->next;
1956
+ } else {
1957
+ first = walker;
1958
+ last = first;
1959
+ }
1960
+
1961
+ walker = walker->next;
1962
+ }
1963
+
1964
+ while (walker) {
1965
+ switch (walker->type) {
1966
+ case PIPE:
1967
+ token_prune_graft(first, last, TABLE_CELL);
1968
+ first = NULL;
1969
+ last = NULL;
1970
+ walker->type = TABLE_DIVIDER;
1971
+ break;
1972
+
1973
+ case TEXT_NL:
1974
+ case TEXT_LINEBREAK:
1975
+ break;
1976
+
1977
+ default:
1978
+ if (!first) {
1979
+ first = walker;
1980
+ }
1981
+
1982
+ last = walker;
1983
+ }
1984
+
1985
+ walker = walker->next;
1986
+ }
1987
+
1988
+ if (first) {
1989
+ token_prune_graft(first, last, TABLE_CELL);
1990
+ }
1991
+ }
1992
+
1993
+
1994
+ void strip_line_tokens_from_block(mmd_engine * e, token * block) {
1995
+ if ((block == NULL) || (block->child == NULL)) {
1996
+ return;
1997
+ }
1998
+
1999
+ #ifndef NDEBUG
2000
+ fprintf(stderr, "Strip line tokens from %d (%lu:%lu) (child %d)\n", block->type, block->start, block->len, block->child->type);
2001
+ token_tree_describe(block, e->dstr->str);
2002
+ #endif
2003
+
2004
+ token * l = block->child;
2005
+
2006
+ // Custom actions
2007
+ switch (block->type) {
2008
+ case BLOCK_META:
2009
+ // Handle metadata differently
2010
+ return strip_line_tokens_from_metadata(e, block);
2011
+
2012
+ case BLOCK_CODE_INDENTED:
2013
+
2014
+ // Strip trailing empty lines from indented code blocks
2015
+ while (l->tail->type == LINE_EMPTY) {
2016
+ token_remove_last_child(block);
2017
+ }
2018
+
2019
+ break;
2020
+
2021
+ case BLOCK_DEFLIST:
2022
+ // Handle definition lists
2023
+ return strip_line_tokens_from_deflist(e, block);
2024
+
2025
+ case BLOCK_TABLE:
2026
+ // Handle tables
2027
+ return strip_line_tokens_from_table(e, block);
2028
+ }
2029
+
2030
+ token * children = NULL;
2031
+ block->child = NULL;
2032
+
2033
+ token * temp;
2034
+
2035
+ // Move contents of line directly into the parent block
2036
+ while (l != NULL) {
2037
+ // Remove leading non-indent space from line
2038
+ if (block->type != BLOCK_CODE_FENCED && l->child && l->child->type == NON_INDENT_SPACE) {
2039
+ token_remove_first_child(l);
2040
+ }
2041
+
2042
+ switch (l->type) {
2043
+ case LINE_SETEXT_1:
2044
+ case LINE_SETEXT_2:
2045
+ if ((block->type == BLOCK_SETEXT_1) ||
2046
+ (block->type == BLOCK_SETEXT_2)) {
2047
+ temp = l->next;
2048
+ tokens_prune(l, l);
2049
+ l = temp;
2050
+ break;
2051
+ }
2052
+
2053
+ case LINE_DEFINITION:
2054
+ if (block->type == BLOCK_DEFINITION) {
2055
+ // Remove leading colon
2056
+ token_remove_first_child(l);
2057
+ }
2058
+
2059
+ case LINE_ATX_1:
2060
+ case LINE_ATX_2:
2061
+ case LINE_ATX_3:
2062
+ case LINE_ATX_4:
2063
+ case LINE_ATX_5:
2064
+ case LINE_ATX_6:
2065
+ case LINE_BLOCKQUOTE:
2066
+ case LINE_CONTINUATION:
2067
+ case LINE_DEF_ABBREVIATION:
2068
+ case LINE_DEF_CITATION:
2069
+ case LINE_DEF_FOOTNOTE:
2070
+ case LINE_DEF_GLOSSARY:
2071
+ case LINE_DEF_LINK:
2072
+ case LINE_EMPTY:
2073
+ case LINE_LIST_BULLETED:
2074
+ case LINE_LIST_ENUMERATED:
2075
+ case LINE_META:
2076
+ case LINE_PLAIN:
2077
+ case LINE_START_COMMENT:
2078
+ case LINE_STOP_COMMENT:
2079
+ handle_line:
2080
+
2081
+ case LINE_INDENTED_TAB:
2082
+ case LINE_INDENTED_SPACE:
2083
+
2084
+ // Strip leading indent (Only the first one)
2085
+ if (block->type != BLOCK_CODE_FENCED && l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) {
2086
+ token_remove_first_child(l);
2087
+ }
2088
+
2089
+ // If we're not a code block, strip additional indents
2090
+ if ((block->type != BLOCK_CODE_INDENTED) &&
2091
+ (block->type != BLOCK_CODE_FENCED)) {
2092
+ while (l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) {
2093
+ token_remove_first_child(l);
2094
+ }
2095
+ }
2096
+
2097
+ // Add contents of line to parent block
2098
+ token_append_child(block, l->child);
2099
+
2100
+ // Disconnect line from it's contents
2101
+ l->child = NULL;
2102
+
2103
+ // Need to remember first line we strip
2104
+ if (children == NULL) {
2105
+ children = l;
2106
+ }
2107
+
2108
+ // Advance to next line
2109
+ l = l->next;
2110
+ break;
2111
+
2112
+ case BLOCK_DEFINITION:
2113
+
2114
+ // Sometimes these get created unintentionally inside other blocks
2115
+ // Process inside it, then treat it like a line to be stripped
2116
+
2117
+ // Change to plain line
2118
+ if (l->child) {
2119
+ l->child->type = LINE_PLAIN;
2120
+ }
2121
+
2122
+ strip_line_tokens_from_block(e, l);
2123
+
2124
+ // Move children to parent
2125
+ // Add ':' back
2126
+ if (e->dstr->str[l->child->start - 1] == ':') {
2127
+ temp = token_new(COLON, l->child->start - 1, 1);
2128
+ token_append_child(block, temp);
2129
+ }
2130
+
2131
+ token_append_child(block, l->child);
2132
+ l->child = NULL;
2133
+
2134
+ if (children == NULL) {
2135
+ children = l;
2136
+ }
2137
+
2138
+ l = l->next;
2139
+ break;
2140
+
2141
+ case LINE_TABLE_SEPARATOR:
2142
+ case LINE_TABLE:
2143
+ if (block->type == BLOCK_TABLE_HEADER) {
2144
+ l->type = (l->type == LINE_TABLE) ? TABLE_ROW : LINE_TABLE_SEPARATOR;
2145
+ parse_table_row_into_cells(l);
2146
+ } else if (block->type == BLOCK_TABLE_SECTION) {
2147
+ l->type = TABLE_ROW;
2148
+ parse_table_row_into_cells(l);
2149
+ } else {
2150
+ goto handle_line;
2151
+ }
2152
+
2153
+ default:
2154
+ // token_describe(block, e->dstr->str);
2155
+ // fprintf(stderr, "Unspecified line type %d inside block type %d\n", l->type, block->type);
2156
+ // This is a block, need to remove it from chain and
2157
+ // Add to parent
2158
+ temp = l->next;
2159
+
2160
+ token_pop_link_from_chain(l);
2161
+ token_append_child(block, l);
2162
+
2163
+ // Advance to next line
2164
+ l = temp;
2165
+ break;
2166
+ }
2167
+ }
2168
+
2169
+ // Free token chain of line types
2170
+ token_tree_free(children);
2171
+ }
2172
+
2173
+
2174
+ /// Parse part of the string into a token tree
2175
+ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byte_len) {
2176
+ // First, clean up any leftovers from previous parse
2177
+
2178
+ mmd_engine_reset(e);
2179
+
2180
+ // Disable metadata unless we are starting at the beginnging
2181
+ size_t old_ext = e->extensions;
2182
+
2183
+ if (byte_start != 0) {
2184
+ e->extensions |= EXT_NO_METADATA;
2185
+ }
2186
+
2187
+
2188
+ // Tokenize the string
2189
+ token * doc = mmd_tokenize_string(e, byte_start, byte_len, false);
2190
+
2191
+ // Parse tokens into blocks
2192
+ mmd_parse_token_chain(e, doc);
2193
+
2194
+ if (doc) {
2195
+ // Parse blocks for pairs
2196
+ mmd_assign_ambidextrous_tokens_in_block(e, doc, 0);
2197
+
2198
+ // Prepare stack to be used for token pairing
2199
+ // This avoids allocating/freeing one for each iteration.
2200
+ stack * pair_stack = stack_new(0);
2201
+
2202
+
2203
+ mmd_pair_tokens_in_block(doc, e->pairings1, pair_stack);
2204
+ mmd_pair_tokens_in_block(doc, e->pairings2, pair_stack);
2205
+ mmd_pair_tokens_in_block(doc, e->pairings3, pair_stack);
2206
+ mmd_pair_tokens_in_block(doc, e->pairings4, pair_stack);
2207
+
2208
+ // Free stack
2209
+ stack_free(pair_stack);
2210
+
2211
+ pair_emphasis_tokens(doc);
2212
+
2213
+ #ifndef NDEBUG
2214
+ token_tree_describe(doc, e->dstr->str);
2215
+ #endif
2216
+ }
2217
+
2218
+ // Return original extensions
2219
+ e->extensions = old_ext;
2220
+
2221
+ return doc;
2222
+ }
2223
+
2224
+
2225
+ /// Parse the entire string into a token tree
2226
+ void mmd_engine_parse_string(mmd_engine * e) {
2227
+ if (e) {
2228
+ e->root = mmd_engine_parse_substring(e, 0, e->dstr->currentStringLength);
2229
+ }
2230
+ }
2231
+
2232
+
2233
+ /// Does the text have metadata?
2234
+ bool mmd_string_has_metadata(char * source, size_t * end) {
2235
+ bool result;
2236
+
2237
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
2238
+ result = mmd_engine_has_metadata(e, end);
2239
+
2240
+ mmd_engine_free(e, true);
2241
+
2242
+ return result;
2243
+ }
2244
+
2245
+
2246
+ /// Does the text have metadata?
2247
+ bool mmd_d_string_has_metadata(DString * source, size_t * end) {
2248
+ bool result;
2249
+
2250
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
2251
+ result = mmd_engine_has_metadata(e, end);
2252
+
2253
+ mmd_engine_free(e, false);
2254
+
2255
+ return result;
2256
+ }
2257
+
2258
+
2259
+ /// Does the text have metadata?
2260
+ bool mmd_engine_has_metadata(mmd_engine * e, size_t * end) {
2261
+ bool result = false;
2262
+
2263
+ if (!e) {
2264
+ return false;
2265
+ }
2266
+
2267
+ if (!(scan_meta_line(&e->dstr->str[0]))) {
2268
+ // First line is not metadata, so can't have metadata
2269
+ // Saves the time of an unnecessary parse
2270
+ // TODO: Need faster confirmation of actual metadata than full tokenizing
2271
+ if (end) {
2272
+ *end = 0;
2273
+ }
2274
+
2275
+ return false;
2276
+ }
2277
+
2278
+ // Free existing parse tree
2279
+ if (e->root) {
2280
+ token_tree_free(e->root);
2281
+ }
2282
+
2283
+ // Tokenize the string (up until first empty line)
2284
+ token * doc = mmd_tokenize_string(e, 0, e->dstr->currentStringLength, true);
2285
+
2286
+ // Parse tokens into blocks
2287
+ mmd_parse_token_chain(e, doc);
2288
+
2289
+ if (doc) {
2290
+ if (doc->child && doc->child->type == BLOCK_META) {
2291
+ result = true;
2292
+
2293
+ if (end) {
2294
+ *end = doc->child->len;
2295
+ }
2296
+ }
2297
+
2298
+ token_tree_free(doc);
2299
+ }
2300
+
2301
+ return result;
2302
+ }
2303
+
2304
+
2305
+ /// Return metadata keys, one per line
2306
+ /// Returned char * must be freed
2307
+ char * mmd_string_metadata_keys(char * source) {
2308
+ char * result;
2309
+
2310
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
2311
+ result = mmd_engine_metadata_keys(e);
2312
+
2313
+ mmd_engine_free(e, true);
2314
+
2315
+ return result;
2316
+ }
2317
+
2318
+
2319
+ /// Return metadata keys, one per line
2320
+ /// Returned char * must be freed
2321
+ char * mmd_d_string_metadata_keys(DString * source) {
2322
+ char * result;
2323
+
2324
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
2325
+ result = mmd_engine_metadata_keys(e);
2326
+
2327
+ mmd_engine_free(e, false);
2328
+
2329
+ return result;
2330
+ }
2331
+
2332
+
2333
+ /// Return metadata keys, one per line
2334
+ /// Returned char * must be freed
2335
+ char * mmd_engine_metadata_keys(mmd_engine * e) {
2336
+ if (e->metadata_stack->size == 0) {
2337
+ // Ensure we have checked for metadata
2338
+ if (!mmd_engine_has_metadata(e, NULL)) {
2339
+ return NULL;
2340
+ }
2341
+ }
2342
+
2343
+ char * result = NULL;
2344
+ DString * output = d_string_new("");
2345
+
2346
+ meta * m;
2347
+
2348
+ for (int i = 0; i < e->metadata_stack->size; ++i) {
2349
+ m = stack_peek_index(e->metadata_stack, i);
2350
+
2351
+ d_string_append_printf(output, "%s\n", m->key);
2352
+ }
2353
+
2354
+ result = output->str;
2355
+ d_string_free(output, false);
2356
+
2357
+ return result;
2358
+ }
2359
+
2360
+
2361
+ /// Extract desired metadata as string value
2362
+ /// Returned char * must be freed
2363
+ char * mmd_string_metavalue_for_key(char * source, const char * key) {
2364
+ char * result;
2365
+
2366
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
2367
+ result = mmd_engine_metavalue_for_key(e, key);
2368
+
2369
+ if (result) {
2370
+ // We need to return a copy of the string
2371
+ result = my_strdup(result);
2372
+ }
2373
+
2374
+ mmd_engine_free(e, true);
2375
+
2376
+ return result;
2377
+ }
2378
+
2379
+
2380
+ /// Extract desired metadata as string value
2381
+ /// Returned char * must be freed
2382
+ char * mmd_d_string_metavalue_for_key(DString * source, const char * key) {
2383
+ char * result;
2384
+
2385
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
2386
+ result = mmd_engine_metavalue_for_key(e, key);
2387
+
2388
+ if (result) {
2389
+ // We need to return a copy of the string
2390
+ result = my_strdup(result);
2391
+ }
2392
+
2393
+ mmd_engine_free(e, false);
2394
+
2395
+ return result;
2396
+ }
2397
+
2398
+
2399
+ /// Grab metadata without processing entire document
2400
+ /// Returned char * does not need to be freed
2401
+ char * mmd_engine_metavalue_for_key(mmd_engine * e, const char * key) {
2402
+ if (e->metadata_stack->size == 0) {
2403
+ // Ensure we have checked for metadata
2404
+ if (!mmd_engine_has_metadata(e, NULL)) {
2405
+ return NULL;
2406
+ }
2407
+ }
2408
+
2409
+ char * result = NULL;
2410
+ char * clean = label_from_string(key);
2411
+
2412
+ meta * m;
2413
+
2414
+ for (int i = 0; i < e->metadata_stack->size; ++i) {
2415
+ m = stack_peek_index(e->metadata_stack, i);
2416
+
2417
+ if (strcmp(clean, m->key) == 0) {
2418
+ // We have a match
2419
+ free(clean);
2420
+ return m->value;
2421
+ }
2422
+ }
2423
+
2424
+ free(clean);
2425
+ return result;
2426
+ }
2427
+
2428
+
2429
+ /// Grab list of all transcluded files, but we need to know directory to search,
2430
+ /// as well as the path to the file
2431
+ /// Returned stack needs to be freed
2432
+ stack * mmd_string_transclusion_manifest(const char * source, const char * search_path, const char * source_path) {
2433
+ stack * result;
2434
+
2435
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
2436
+
2437
+ result = mmd_engine_transclusion_manifest(e, search_path, source_path);
2438
+
2439
+ mmd_engine_free(e, true);
2440
+
2441
+ return result;
2442
+ }
2443
+
2444
+
2445
+ /// Grab list of all transcluded files, but we need to know directory to search,
2446
+ /// as well as the path to the file
2447
+ /// Returned stack needs to be freed
2448
+ stack * mmd_d_string_transclusion_manifest(DString * source, const char * search_path, const char * source_path) {
2449
+ stack * result;
2450
+
2451
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
2452
+
2453
+ result = mmd_engine_transclusion_manifest(e, search_path, source_path);
2454
+
2455
+ mmd_engine_free(e, false);
2456
+
2457
+ return result;
2458
+ }
2459
+
2460
+
2461
+ /// Grab list of all transcluded files, but we need to know directory to search,
2462
+ /// as well as the path to the file
2463
+ /// Returned stack needs to be freed
2464
+ stack * mmd_engine_transclusion_manifest(mmd_engine * e, const char * search_path, const char * source_path) {
2465
+ // Create empty manifest stack
2466
+ stack * manifest = stack_new(0);
2467
+
2468
+ // Copy source text for temporary buffer
2469
+ DString * buffer = d_string_new(e->dstr->str);
2470
+
2471
+ mmd_transclude_source(buffer, search_path, source_path, FORMAT_HTML, NULL, manifest);
2472
+
2473
+ d_string_free(buffer, true);
2474
+
2475
+ return manifest;
2476
+ }
2477
+
2478
+
2479
+ /// Insert/replace metadata in string, returning new string
2480
+ char * mmd_string_update_metavalue_for_key(const char * source, const char * key, const char * value) {
2481
+ mmd_engine * e = mmd_engine_create_with_string(source, 0);
2482
+ mmd_engine_update_metavalue_for_key(e, key, value);
2483
+
2484
+ DString * d = e->dstr;
2485
+
2486
+ mmd_engine_free(e, false);
2487
+
2488
+ char * result = d->str;
2489
+ d_string_free(d, false);
2490
+
2491
+ return result;
2492
+ }
2493
+
2494
+
2495
+ /// Insert/replace metadata value in DString
2496
+ void mmd_d_string_update_metavalue_for_key(DString * source, const char * key, const char * value) {
2497
+ mmd_engine * e = mmd_engine_create_with_dstring(source, 0);
2498
+ mmd_engine_update_metavalue_for_key(e, key, value);
2499
+
2500
+ mmd_engine_free(e, false);
2501
+ }
2502
+
2503
+
2504
+ /// Insert/replace metadata value in mmd_engine
2505
+ void mmd_engine_update_metavalue_for_key(mmd_engine * e, const char * key, const char * value) {
2506
+ bool has_meta = true;
2507
+ size_t meta_end = 0;
2508
+
2509
+ // Check for metadata and character
2510
+ if (!mmd_engine_has_metadata(e, &meta_end)) {
2511
+ has_meta = false;
2512
+ }
2513
+
2514
+ // Get clean metadata key for match
2515
+ char * clean = label_from_string(key);
2516
+
2517
+ // Determine range to excise and replace
2518
+ size_t start = -1;
2519
+ size_t end = -1;
2520
+ size_t len = -1;
2521
+
2522
+ meta * m;
2523
+
2524
+ for (int i = 0; i < e->metadata_stack->size; ++i) {
2525
+ m = stack_peek_index(e->metadata_stack, i);
2526
+
2527
+ if (strcmp(clean, m->key) == 0) {
2528
+ // We have a match
2529
+ start = m->start;
2530
+ } else if (start != -1) {
2531
+ // We have already found a match
2532
+ if (end == -1) {
2533
+ // This is the next metadata key, so determine length
2534
+ end = m->start;
2535
+ }
2536
+ }
2537
+ }
2538
+
2539
+ DString * temp = d_string_new(key);
2540
+ d_string_append(temp, ":\t");
2541
+ d_string_append(temp, value);
2542
+ d_string_append_c(temp, '\n');
2543
+
2544
+ if (start != -1) {
2545
+ // We're replacing existing metadata
2546
+
2547
+ // Figure out where to start
2548
+ char * begin = &(e->dstr->str[start]);
2549
+
2550
+ while (*begin != ':') {
2551
+ begin++;
2552
+ }
2553
+
2554
+ begin++;
2555
+
2556
+ while (char_is_whitespace(*begin)) {
2557
+ begin++;
2558
+ }
2559
+
2560
+ start = begin - e->dstr->str;
2561
+
2562
+ if (end == -1) {
2563
+ // Replace until the end of the metadata (last key)
2564
+ len = meta_end - start;
2565
+ } else {
2566
+ len = end - start;
2567
+ }
2568
+
2569
+ d_string_erase(e->dstr, start, len);
2570
+ d_string_insert(e->dstr, start, "\n");
2571
+ d_string_insert(e->dstr, start, value);
2572
+ } else if (meta_end != 0) {
2573
+ // We're appending metadata at the end
2574
+ d_string_insert(e->dstr, meta_end, temp->str);
2575
+ } else {
2576
+ // There is no metadata, so prepend before document
2577
+ d_string_append_c(temp, '\n');
2578
+ d_string_prepend(e->dstr, temp->str);
2579
+ }
2580
+
2581
+ d_string_free(temp, true);
2582
+ free(clean);
2583
+ }
2584
+
2585
+
2586
+ /// Convert MMD text to specified format, with specified extensions, and language
2587
+ /// Returned char * must be freed
2588
+ char * mmd_string_convert(const char * source, unsigned long extensions, short format, short language) {
2589
+ char * result;
2590
+
2591
+ mmd_engine * e = mmd_engine_create_with_string(source, extensions);
2592
+
2593
+ mmd_engine_set_language(e, language);
2594
+
2595
+ result = mmd_engine_convert(e, format);
2596
+
2597
+ mmd_engine_free(e, true); // The engine has a private copy of source that must be freed
2598
+
2599
+ return result;
2600
+ }
2601
+
2602
+
2603
+ /// Convert MMD text to specified format, with specified extensions, and language
2604
+ /// Returned char * must be freed
2605
+ char * mmd_d_string_convert(DString * source, unsigned long extensions, short format, short language) {
2606
+ char * result;
2607
+
2608
+ mmd_engine * e = mmd_engine_create_with_dstring(source, extensions);
2609
+
2610
+ mmd_engine_set_language(e, language);
2611
+
2612
+ result = mmd_engine_convert(e, format);
2613
+
2614
+ mmd_engine_free(e, false); // The engine doesn't own the DString, so don't free it.
2615
+
2616
+ return result;
2617
+ }
2618
+
2619
+
2620
+ /// Convert MMD text to specified format, with specified extensions, and language
2621
+ /// Returned char * must be freed
2622
+ char * mmd_engine_convert(mmd_engine * e, short format) {
2623
+ char * result;
2624
+
2625
+ mmd_engine_parse_string(e);
2626
+
2627
+ DString * output = d_string_new("");
2628
+
2629
+ mmd_engine_export_token_tree(output, e, format);
2630
+
2631
+ // Add newline to result
2632
+ d_string_append_c(output, '\n');
2633
+
2634
+ result = output->str;
2635
+
2636
+ d_string_free(output, false);
2637
+
2638
+ return result;
2639
+ }
2640
+
2641
+
2642
+ /// Convert MMD text and write results to specified file -- used for "complex" output formats requiring
2643
+ /// multiple documents (e.g. EPUB)
2644
+ void mmd_string_convert_to_file(const char * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath) {
2645
+
2646
+ mmd_engine * e = mmd_engine_create_with_string(source, extensions);
2647
+
2648
+ mmd_engine_set_language(e, language);
2649
+
2650
+ mmd_engine_parse_string(e);
2651
+
2652
+ mmd_engine_free(e, true); // The engine has a private copy of source, so free it.
2653
+ }
2654
+
2655
+
2656
+ /// Convert MMD text and write results to specified file -- used for "complex" output formats requiring
2657
+ /// multiple documents (e.g. EPUB)
2658
+ void mmd_d_string_convert_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath) {
2659
+
2660
+ mmd_engine * e = mmd_engine_create_with_dstring(source, extensions);
2661
+
2662
+ mmd_engine_set_language(e, language);
2663
+
2664
+ mmd_engine_convert_to_file(e, format, directory, filepath);
2665
+
2666
+ mmd_engine_free(e, false); // The engine doesn't own the DString, so don't free it.
2667
+ }
2668
+
2669
+
2670
+ /// Convert MMD text and write results to specified file -- used for "complex" output formats requiring
2671
+ /// multiple documents (e.g. EPUB)
2672
+ void mmd_engine_convert_to_file(mmd_engine * e, short format, const char * directory, const char * filepath) {
2673
+ FILE * output_stream;
2674
+
2675
+ DString * output = d_string_new("");
2676
+
2677
+ mmd_engine_parse_string(e);
2678
+
2679
+ mmd_engine_export_token_tree(output, e, format);
2680
+
2681
+ // Now we have the input source string, the output string, the (modified) parse tree, and engine stacks
2682
+
2683
+ switch (format) {
2684
+ case FORMAT_EPUB:
2685
+ epub_write_wrapper(filepath, output->str, e, directory);
2686
+ break;
2687
+
2688
+ case FORMAT_TEXTBUNDLE:
2689
+ // TODO: Need to implement this
2690
+ break;
2691
+
2692
+ case FORMAT_TEXTBUNDLE_COMPRESSED:
2693
+ textbundle_write_wrapper(filepath, output->str, e, directory);
2694
+ break;
2695
+
2696
+ default:
2697
+
2698
+ // Basic formats just write to file
2699
+ if (!(output_stream = fopen(filepath, "w"))) {
2700
+ // Failed to open file
2701
+ perror(filepath);
2702
+ } else {
2703
+ fputs(output->str, output_stream);
2704
+ fputc('\n', output_stream);
2705
+ fclose(output_stream);
2706
+ }
2707
+
2708
+ break;
2709
+ }
2710
+
2711
+ d_string_free(output, true);
2712
+ }
2713
+
2714
+
2715
+ DString * mmd_string_convert_to_data(const char * source, unsigned long extensions, short format, short language, const char * directory) {
2716
+ mmd_engine * e = mmd_engine_create_with_string(source, extensions);
2717
+
2718
+ mmd_engine_set_language(e, language);
2719
+
2720
+ DString * result = mmd_engine_convert_to_data(e, format, directory);
2721
+
2722
+ mmd_engine_free(e, true);
2723
+
2724
+ return result;
2725
+ }
2726
+
2727
+
2728
+ DString * mmd_d_string_convert_to_data(DString * source, unsigned long extensions, short format, short language, const char * directory) {
2729
+ mmd_engine * e = mmd_engine_create_with_dstring(source, extensions);
2730
+
2731
+ mmd_engine_set_language(e, language);
2732
+
2733
+ DString * result = mmd_engine_convert_to_data(e, format, directory);
2734
+
2735
+ mmd_engine_free(e, false); // The engine doesn't own the DString, so don't free it.
2736
+
2737
+ return result;
2738
+ }
2739
+
2740
+
2741
+ DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char * directory) {
2742
+ DString * output = d_string_new("");
2743
+ DString * result = NULL;
2744
+
2745
+ if (format == FORMAT_MMD) {
2746
+ // Simply return text (transclusion is handled externally)
2747
+ d_string_append_c_array(output, e->dstr->str, e->dstr->currentStringLength);
2748
+
2749
+ return output;
2750
+ }
2751
+
2752
+ mmd_engine_parse_string(e);
2753
+
2754
+ mmd_engine_export_token_tree(output, e, format);
2755
+
2756
+ switch (format) {
2757
+ case FORMAT_EPUB:
2758
+ result = epub_create(output->str, e, directory);
2759
+
2760
+ d_string_free(output, true);
2761
+ break;
2762
+
2763
+ case FORMAT_TEXTBUNDLE:
2764
+ case FORMAT_TEXTBUNDLE_COMPRESSED:
2765
+ result = textbundle_create(output->str, e, directory);
2766
+
2767
+ d_string_free(output, true);
2768
+ break;
2769
+
2770
+ case FORMAT_ODT:
2771
+ result = opendocument_text_create(output->str, e, directory);
2772
+
2773
+ d_string_free(output, true);
2774
+ break;
2775
+
2776
+ case FORMAT_FODT:
2777
+ result = opendocument_flat_text_create(output->str, e, directory);
2778
+
2779
+ d_string_free(output, true);
2780
+ break;
2781
+
2782
+ default:
2783
+ result = output;
2784
+ // Add newline to result
2785
+ d_string_append_c(result, '\n');
2786
+ break;
2787
+ }
2788
+
2789
+ return result;
2790
+ }
2791
+
2792
+
2793
+ /// Return string containing engine version.
2794
+ char * mmd_version(void) {
2795
+ char * result;
2796
+ result = my_strdup(MULTIMARKDOWN_VERSION);
2797
+ return result;
2798
+ }