rmultimarkdown 6.4.0.3 → 6.7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +5 -5
  2. data/Rakefile +7 -13
  3. data/ext/Makefile +67 -55
  4. data/ext/extconf.rb +8 -6
  5. data/ext/mmd/aho-corasick.c +8 -8
  6. data/ext/mmd/aho-corasick.h +3 -3
  7. data/ext/mmd/argtable3.c +6537 -0
  8. data/ext/mmd/argtable3.h +273 -0
  9. data/ext/mmd/beamer.c +12 -1
  10. data/ext/mmd/char.c +120 -27
  11. data/ext/mmd/char.h +23 -23
  12. data/ext/mmd/critic_markup.c +7 -6
  13. data/ext/mmd/d_string.c +88 -32
  14. data/ext/mmd/{include/d_string.h → d_string.h} +50 -38
  15. data/ext/mmd/epub.c +36 -12
  16. data/ext/mmd/epub.h +2 -2
  17. data/ext/mmd/file.c +50 -40
  18. data/ext/mmd/file.h +2 -2
  19. data/ext/mmd/html.c +164 -99
  20. data/ext/mmd/html.h +3 -2
  21. data/ext/mmd/i18n.h +15 -11
  22. data/ext/mmd/itmz-lexer.c +16978 -0
  23. data/ext/mmd/itmz-lexer.h +132 -0
  24. data/ext/mmd/itmz-parser.c +1189 -0
  25. data/ext/mmd/itmz-parser.h +11 -0
  26. data/ext/mmd/itmz-reader.c +388 -0
  27. data/ext/mmd/itmz-reader.h +111 -0
  28. data/ext/mmd/itmz.c +567 -0
  29. data/ext/mmd/itmz.h +117 -0
  30. data/ext/mmd/latex.c +93 -41
  31. data/ext/mmd/lexer.c +3506 -2774
  32. data/ext/mmd/{include/libMultiMarkdown.h → libMultiMarkdown.h} +49 -2
  33. data/ext/mmd/main.c +612 -0
  34. data/ext/mmd/memoir.c +4 -1
  35. data/ext/mmd/miniz.c +6905 -6680
  36. data/ext/mmd/miniz.h +456 -476
  37. data/ext/mmd/mmd.c +399 -94
  38. data/ext/mmd/mmd.h +25 -25
  39. data/ext/mmd/object_pool.h +3 -3
  40. data/ext/mmd/opendocument-content.c +137 -69
  41. data/ext/mmd/opendocument-content.h +2 -2
  42. data/ext/mmd/opendocument.c +35 -14
  43. data/ext/mmd/opendocument.h +2 -2
  44. data/ext/mmd/opml-lexer.c +259 -637
  45. data/ext/mmd/opml-lexer.h +1 -17
  46. data/ext/mmd/opml-parser.c +194 -188
  47. data/ext/mmd/opml-reader.c +72 -142
  48. data/ext/mmd/opml-reader.h +1 -1
  49. data/ext/mmd/opml.c +13 -13
  50. data/ext/mmd/opml.h +1 -1
  51. data/ext/mmd/parser.c +1623 -1244
  52. data/ext/mmd/rng.c +8 -3
  53. data/ext/mmd/scanners.c +66625 -103198
  54. data/ext/mmd/scanners.h +1 -0
  55. data/ext/mmd/stack.c +62 -20
  56. data/ext/mmd/stack.h +10 -21
  57. data/ext/mmd/textbundle.c +23 -7
  58. data/ext/mmd/textbundle.h +2 -2
  59. data/ext/mmd/token.c +42 -16
  60. data/ext/mmd/{include/token.h → token.h} +22 -8
  61. data/ext/mmd/token_pairs.c +0 -16
  62. data/ext/mmd/transclude.c +6 -2
  63. data/ext/mmd/uthash.h +745 -745
  64. data/ext/mmd/version.h +8 -8
  65. data/ext/mmd/writer.c +225 -63
  66. data/ext/mmd/writer.h +50 -36
  67. data/ext/mmd/xml.c +855 -0
  68. data/ext/mmd/xml.h +134 -0
  69. data/ext/mmd/zip.c +71 -4
  70. data/ext/mmd/zip.h +7 -1
  71. data/ext/ruby_multi_markdown.c +9 -18
  72. data/lib/multi_markdown/version.rb +1 -1
  73. data/lib/multi_markdown.bundle +0 -0
  74. data/rmultimarkdown.gemspec +0 -2
  75. metadata +22 -28
  76. data/ext/mmd/char_lookup.c +0 -212
@@ -0,0 +1,273 @@
1
+ /*******************************************************************************
2
+ * argtable3: Declares the main interfaces of the library
3
+ *
4
+ * This file is part of the argtable3 library.
5
+ *
6
+ * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann
7
+ * <sheitmann@users.sourceforge.net>
8
+ * All rights reserved.
9
+ *
10
+ * Redistribution and use in source and binary forms, with or without
11
+ * modification, are permitted provided that the following conditions are met:
12
+ * * Redistributions of source code must retain the above copyright
13
+ * notice, this list of conditions and the following disclaimer.
14
+ * * Redistributions in binary form must reproduce the above copyright
15
+ * notice, this list of conditions and the following disclaimer in the
16
+ * documentation and/or other materials provided with the distribution.
17
+ * * Neither the name of STEWART HEITMANN nor the names of its contributors
18
+ * may be used to endorse or promote products derived from this software
19
+ * without specific prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
+ * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT,
25
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ ******************************************************************************/
32
+
33
+ #ifndef ARGTABLE3
34
+ #define ARGTABLE3
35
+
36
+ #include <stdio.h> /* FILE */
37
+ #include <time.h> /* struct tm */
38
+
39
+ #ifdef __cplusplus
40
+ extern "C" {
41
+ #endif
42
+
43
+ #define ARG_REX_ICASE 1
44
+ #define ARG_DSTR_SIZE 200
45
+ #define ARG_CMD_NAME_LEN 100
46
+ #define ARG_CMD_DESCRIPTION_LEN 256
47
+
48
+ #ifndef ARG_REPLACE_GETOPT
49
+ #define ARG_REPLACE_GETOPT 1 /* use the embedded getopt as the system getopt(3) */
50
+ #endif /* ARG_REPLACE_GETOPT */
51
+
52
+ /* bit masks for arg_hdr.flag */
53
+ enum { ARG_TERMINATOR = 0x1, ARG_HASVALUE = 0x2, ARG_HASOPTVALUE = 0x4 };
54
+
55
+ #if defined(_WIN32)
56
+ #if defined(argtable3_EXPORTS)
57
+ #define ARG_EXTERN __declspec(dllexport)
58
+ #elif defined(argtable3_IMPORTS)
59
+ #define ARG_EXTERN __declspec(dllimport)
60
+ #else
61
+ #define ARG_EXTERN
62
+ #endif
63
+ #else
64
+ #define ARG_EXTERN
65
+ #endif
66
+
67
+ typedef struct _internal_arg_dstr * arg_dstr_t;
68
+ typedef void * arg_cmd_itr_t;
69
+
70
+ typedef void(arg_resetfn)(void * parent);
71
+ typedef int(arg_scanfn)(void * parent, const char * argval);
72
+ typedef int(arg_checkfn)(void * parent);
73
+ typedef void(arg_errorfn)(void * parent, arg_dstr_t ds, int error, const char * argval, const char * progname);
74
+ typedef void(arg_dstr_freefn)(char * buf);
75
+ typedef int(arg_cmdfn)(int argc, char * argv[], arg_dstr_t res);
76
+ typedef int(arg_comparefn)(const void * k1, const void * k2);
77
+
78
+ /*
79
+ * The arg_hdr struct defines properties that are common to all arg_xxx structs.
80
+ * The argtable library requires each arg_xxx struct to have an arg_hdr
81
+ * struct as its first data member.
82
+ * The argtable library functions then use this data to identify the
83
+ * properties of the command line option, such as its option tags,
84
+ * datatype string, and glossary strings, and so on.
85
+ * Moreover, the arg_hdr struct contains pointers to custom functions that
86
+ * are provided by each arg_xxx struct which perform the tasks of parsing
87
+ * that particular arg_xxx arguments, performing post-parse checks, and
88
+ * reporting errors.
89
+ * These functions are private to the individual arg_xxx source code
90
+ * and are the pointer to them are initiliased by that arg_xxx struct's
91
+ * constructor function. The user could alter them after construction
92
+ * if desired, but the original intention is for them to be set by the
93
+ * constructor and left unaltered.
94
+ */
95
+ typedef struct arg_hdr {
96
+ char flag; /* Modifier flags: ARG_TERMINATOR, ARG_HASVALUE. */
97
+ const char * shortopts; /* String defining the short options */
98
+ const char * longopts; /* String defiing the long options */
99
+ const char * datatype; /* Description of the argument data type */
100
+ const char * glossary; /* Description of the option as shown by arg_print_glossary function */
101
+ int mincount; /* Minimum number of occurences of this option accepted */
102
+ int maxcount; /* Maximum number of occurences if this option accepted */
103
+ void * parent; /* Pointer to parent arg_xxx struct */
104
+ arg_resetfn * resetfn; /* Pointer to parent arg_xxx reset function */
105
+ arg_scanfn * scanfn; /* Pointer to parent arg_xxx scan function */
106
+ arg_checkfn * checkfn; /* Pointer to parent arg_xxx check function */
107
+ arg_errorfn * errorfn; /* Pointer to parent arg_xxx error function */
108
+ void * priv; /* Pointer to private header data for use by arg_xxx functions */
109
+ } arg_hdr_t;
110
+
111
+ typedef struct arg_rem {
112
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
113
+ } arg_rem_t;
114
+
115
+ typedef struct arg_lit {
116
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
117
+ int count; /* Number of matching command line args */
118
+ } arg_lit_t;
119
+
120
+ typedef struct arg_int {
121
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
122
+ int count; /* Number of matching command line args */
123
+ int * ival; /* Array of parsed argument values */
124
+ } arg_int_t;
125
+
126
+ typedef struct arg_dbl {
127
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
128
+ int count; /* Number of matching command line args */
129
+ double * dval; /* Array of parsed argument values */
130
+ } arg_dbl_t;
131
+
132
+ typedef struct arg_str {
133
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
134
+ int count; /* Number of matching command line args */
135
+ const char ** sval; /* Array of parsed argument values */
136
+ } arg_str_t;
137
+
138
+ typedef struct arg_rex {
139
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
140
+ int count; /* Number of matching command line args */
141
+ const char ** sval; /* Array of parsed argument values */
142
+ } arg_rex_t;
143
+
144
+ typedef struct arg_file {
145
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
146
+ int count; /* Number of matching command line args*/
147
+ const char ** filename; /* Array of parsed filenames (eg: /home/foo.bar) */
148
+ const char ** basename; /* Array of parsed basenames (eg: foo.bar) */
149
+ const char ** extension; /* Array of parsed extensions (eg: .bar) */
150
+ } arg_file_t;
151
+
152
+ typedef struct arg_date {
153
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
154
+ const char * format; /* strptime format string used to parse the date */
155
+ int count; /* Number of matching command line args */
156
+ struct tm * tmval; /* Array of parsed time values */
157
+ } arg_date_t;
158
+
159
+ enum { ARG_ELIMIT = 1, ARG_EMALLOC, ARG_ENOMATCH, ARG_ELONGOPT, ARG_EMISSARG };
160
+ typedef struct arg_end {
161
+ struct arg_hdr hdr; /* The mandatory argtable header struct */
162
+ int count; /* Number of errors encountered */
163
+ int * error; /* Array of error codes */
164
+ void ** parent; /* Array of pointers to offending arg_xxx struct */
165
+ const char ** argval; /* Array of pointers to offending argv[] string */
166
+ } arg_end_t;
167
+
168
+ typedef struct arg_cmd_info {
169
+ char name[ARG_CMD_NAME_LEN];
170
+ char description[ARG_CMD_DESCRIPTION_LEN];
171
+ arg_cmdfn * proc;
172
+ } arg_cmd_info_t;
173
+
174
+ /**** arg_xxx constructor functions *********************************/
175
+
176
+ ARG_EXTERN struct arg_rem * arg_rem(const char * datatype, const char * glossary);
177
+
178
+ ARG_EXTERN struct arg_lit * arg_lit0(const char * shortopts, const char * longopts, const char * glossary);
179
+ ARG_EXTERN struct arg_lit * arg_lit1(const char * shortopts, const char * longopts, const char * glossary);
180
+ ARG_EXTERN struct arg_lit * arg_litn(const char * shortopts, const char * longopts, int mincount, int maxcount, const char * glossary);
181
+
182
+ ARG_EXTERN struct arg_int * arg_int0(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
183
+ ARG_EXTERN struct arg_int * arg_int1(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
184
+ ARG_EXTERN struct arg_int * arg_intn(const char * shortopts, const char * longopts, const char * datatype, int mincount, int maxcount, const char * glossary);
185
+
186
+ ARG_EXTERN struct arg_dbl * arg_dbl0(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
187
+ ARG_EXTERN struct arg_dbl * arg_dbl1(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
188
+ ARG_EXTERN struct arg_dbl * arg_dbln(const char * shortopts, const char * longopts, const char * datatype, int mincount, int maxcount, const char * glossary);
189
+
190
+ ARG_EXTERN struct arg_str * arg_str0(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
191
+ ARG_EXTERN struct arg_str * arg_str1(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
192
+ ARG_EXTERN struct arg_str * arg_strn(const char * shortopts, const char * longopts, const char * datatype, int mincount, int maxcount, const char * glossary);
193
+
194
+ ARG_EXTERN struct arg_rex * arg_rex0(const char * shortopts, const char * longopts, const char * pattern, const char * datatype, int flags, const char * glossary);
195
+ ARG_EXTERN struct arg_rex * arg_rex1(const char * shortopts, const char * longopts, const char * pattern, const char * datatype, int flags, const char * glossary);
196
+ ARG_EXTERN struct arg_rex * arg_rexn(const char * shortopts,
197
+ const char * longopts,
198
+ const char * pattern,
199
+ const char * datatype,
200
+ int mincount,
201
+ int maxcount,
202
+ int flags,
203
+ const char * glossary);
204
+
205
+ ARG_EXTERN struct arg_file * arg_file0(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
206
+ ARG_EXTERN struct arg_file * arg_file1(const char * shortopts, const char * longopts, const char * datatype, const char * glossary);
207
+ ARG_EXTERN struct arg_file * arg_filen(const char * shortopts, const char * longopts, const char * datatype, int mincount, int maxcount, const char * glossary);
208
+
209
+ ARG_EXTERN struct arg_date * arg_date0(const char * shortopts, const char * longopts, const char * format, const char * datatype, const char * glossary);
210
+ ARG_EXTERN struct arg_date * arg_date1(const char * shortopts, const char * longopts, const char * format, const char * datatype, const char * glossary);
211
+ ARG_EXTERN struct arg_date * arg_daten(const char * shortopts, const char * longopts, const char * format, const char * datatype, int mincount, int maxcount, const char * glossary);
212
+
213
+ ARG_EXTERN struct arg_end * arg_end(int maxerrors);
214
+
215
+ #define ARG_DSTR_STATIC ((arg_dstr_freefn*)0)
216
+ #define ARG_DSTR_VOLATILE ((arg_dstr_freefn*)1)
217
+ #define ARG_DSTR_DYNAMIC ((arg_dstr_freefn*)3)
218
+
219
+ /**** other functions *******************************************/
220
+ ARG_EXTERN int arg_nullcheck(void ** argtable);
221
+ ARG_EXTERN int arg_parse(int argc, char ** argv, void ** argtable);
222
+ ARG_EXTERN void arg_print_option(FILE * fp, const char * shortopts, const char * longopts, const char * datatype, const char * suffix);
223
+ ARG_EXTERN void arg_print_syntax(FILE * fp, void ** argtable, const char * suffix);
224
+ ARG_EXTERN void arg_print_syntaxv(FILE * fp, void ** argtable, const char * suffix);
225
+ ARG_EXTERN void arg_print_glossary(FILE * fp, void ** argtable, const char * format);
226
+ ARG_EXTERN void arg_print_glossary_gnu(FILE * fp, void ** argtable);
227
+ ARG_EXTERN void arg_print_errors(FILE * fp, struct arg_end * end, const char * progname);
228
+ ARG_EXTERN void arg_print_option_ds(arg_dstr_t ds, const char * shortopts, const char * longopts, const char * datatype, const char * suffix);
229
+ ARG_EXTERN void arg_print_syntax_ds(arg_dstr_t ds, void ** argtable, const char * suffix);
230
+ ARG_EXTERN void arg_print_syntaxv_ds(arg_dstr_t ds, void ** argtable, const char * suffix);
231
+ ARG_EXTERN void arg_print_glossary_ds(arg_dstr_t ds, void ** argtable, const char * format);
232
+ ARG_EXTERN void arg_print_glossary_gnu_ds(arg_dstr_t ds, void ** argtable);
233
+ ARG_EXTERN void arg_print_errors_ds(arg_dstr_t ds, struct arg_end * end, const char * progname);
234
+ ARG_EXTERN void arg_freetable(void ** argtable, size_t n);
235
+
236
+ ARG_EXTERN arg_dstr_t arg_dstr_create(void);
237
+ ARG_EXTERN void arg_dstr_destroy(arg_dstr_t ds);
238
+ ARG_EXTERN void arg_dstr_reset(arg_dstr_t ds);
239
+ ARG_EXTERN void arg_dstr_free(arg_dstr_t ds);
240
+ ARG_EXTERN void arg_dstr_set(arg_dstr_t ds, char * str, arg_dstr_freefn * free_proc);
241
+ ARG_EXTERN void arg_dstr_cat(arg_dstr_t ds, const char * str);
242
+ ARG_EXTERN void arg_dstr_catc(arg_dstr_t ds, char c);
243
+ ARG_EXTERN void arg_dstr_catf(arg_dstr_t ds, const char * fmt, ...);
244
+ ARG_EXTERN char * arg_dstr_cstr(arg_dstr_t ds);
245
+
246
+ ARG_EXTERN void arg_cmd_init(void);
247
+ ARG_EXTERN void arg_cmd_uninit(void);
248
+ ARG_EXTERN void arg_cmd_register(const char * name, arg_cmdfn * proc, const char * description);
249
+ ARG_EXTERN void arg_cmd_unregister(const char * name);
250
+ ARG_EXTERN int arg_cmd_dispatch(const char * name, int argc, char * argv[], arg_dstr_t res);
251
+ ARG_EXTERN unsigned int arg_cmd_count(void);
252
+ ARG_EXTERN arg_cmd_info_t * arg_cmd_info(const char * name);
253
+ ARG_EXTERN arg_cmd_itr_t arg_cmd_itr_create(void);
254
+ ARG_EXTERN void arg_cmd_itr_destroy(arg_cmd_itr_t itr);
255
+ ARG_EXTERN int arg_cmd_itr_advance(arg_cmd_itr_t itr);
256
+ ARG_EXTERN char * arg_cmd_itr_key(arg_cmd_itr_t itr);
257
+ ARG_EXTERN arg_cmd_info_t * arg_cmd_itr_value(arg_cmd_itr_t itr);
258
+ ARG_EXTERN int arg_cmd_itr_search(arg_cmd_itr_t itr, void * k);
259
+ ARG_EXTERN void arg_mgsort(void * data, int size, int esize, int i, int k, arg_comparefn * comparefn);
260
+ ARG_EXTERN void arg_make_get_help_msg(arg_dstr_t res);
261
+ ARG_EXTERN void arg_make_help_msg(arg_dstr_t ds, char * cmd_name, void ** argtable);
262
+ ARG_EXTERN void arg_make_syntax_err_msg(arg_dstr_t ds, void ** argtable, struct arg_end * end);
263
+ ARG_EXTERN int arg_make_syntax_err_help_msg(arg_dstr_t ds, char * name, int help, int nerrors, void ** argtable, struct arg_end * end, int * exitcode);
264
+ ARG_EXTERN void arg_set_module_name(const char * name);
265
+ ARG_EXTERN void arg_set_module_version(int major, int minor, int patch, const char * tag);
266
+
267
+ /**** deprecated functions, for back-compatibility only ********/
268
+ ARG_EXTERN void arg_free(void ** argtable);
269
+
270
+ #ifdef __cplusplus
271
+ }
272
+ #endif
273
+ #endif
data/ext/mmd/beamer.c CHANGED
@@ -56,6 +56,7 @@
56
56
  #include "latex.h"
57
57
  #include "beamer.h"
58
58
  #include "parser.h"
59
+ #include "stack.h"
59
60
 
60
61
  #define print(x) d_string_append(out, x)
61
62
  #define print_const(x) d_string_append_c_array(out, x, sizeof(x) - 1)
@@ -178,6 +179,7 @@ void mmd_export_token_beamer(DString * out, const char * source, token * t, scra
178
179
  // Raw source
179
180
  if (raw_filter_text_matches(temp_char, FORMAT_BEAMER)) {
180
181
  switch (t->child->tail->type) {
182
+ case CODE_FENCE_LINE:
181
183
  case LINE_FENCE_BACKTICK_3:
182
184
  case LINE_FENCE_BACKTICK_4:
183
185
  case LINE_FENCE_BACKTICK_5:
@@ -192,7 +194,10 @@ void mmd_export_token_beamer(DString * out, const char * source, token * t, scra
192
194
  d_string_append_c_array(out, &source[t->child->next->start], temp_token->start - t->child->next->start);
193
195
  scratch->padded = 1;
194
196
  } else {
195
- d_string_append_c_array(out, &source[t->child->start + t->child->len], t->start + t->len - t->child->next->start);
197
+ if (t->child->next) {
198
+ d_string_append_c_array(out, &source[t->child->start + t->child->len], t->start + t->len - t->child->next->start);
199
+ }
200
+
196
201
  scratch->padded = 0;
197
202
  }
198
203
  }
@@ -365,6 +370,12 @@ void mmd_end_complete_beamer(DString * out, const char * source, scratch_pad * s
365
370
 
366
371
  if (m) {
367
372
  printf("\\input{%s}\n\n", m->value);
373
+ } else {
374
+ m = extract_meta_from_stack(scratch, "latexconfig");
375
+
376
+ if (m) {
377
+ printf("\\input{mmd6-%s-footer}\n", m->value);
378
+ }
368
379
  }
369
380
 
370
381
  print_const("\\end{document}");
data/ext/mmd/char.c CHANGED
@@ -1,6 +1,6 @@
1
1
  /**
2
2
 
3
- MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
3
+ libCoreUtilities -- Reusable component libraries
4
4
 
5
5
  @file char.c
6
6
 
@@ -10,27 +10,16 @@
10
10
  @author Fletcher T. Penney
11
11
  @bug
12
12
 
13
- **/
13
+ **/
14
14
 
15
15
  /*
16
16
 
17
- Copyright © 2016 - 2017 Fletcher T. Penney.
17
+ Copyright © 2016-2020 Fletcher T. Penney.
18
18
 
19
19
 
20
- The `MultiMarkdown 6` project is released under the MIT License..
20
+ MIT License
21
21
 
22
- GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
23
-
24
- https://github.com/fletcher/MultiMarkdown-4/
25
-
26
- MMD 4 is released under both the MIT License and GPL.
27
-
28
-
29
- CuTest is released under the zlib/libpng license. See CuTest.c for the text
30
- of the license.
31
-
32
-
33
- ## The MIT License ##
22
+ Copyright (c) 2016-2020 Fletcher T. Penney
34
23
 
35
24
  Permission is hereby granted, free of charge, to any person obtaining a copy
36
25
  of this software and associated documentation files (the "Software"), to deal
@@ -39,34 +28,40 @@
39
28
  copies of the Software, and to permit persons to whom the Software is
40
29
  furnished to do so, subject to the following conditions:
41
30
 
42
- The above copyright notice and this permission notice shall be included in
43
- all copies or substantial portions of the Software.
31
+ The above copyright notice and this permission notice shall be included in all
32
+ copies or substantial portions of the Software.
44
33
 
45
34
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
46
35
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
47
36
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
48
37
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
49
38
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
50
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51
- THE SOFTWARE.
39
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
40
+ SOFTWARE.
52
41
 
53
42
  */
54
43
 
44
+
55
45
  #include <stdlib.h>
56
46
 
57
47
  #include "char.h"
58
48
 
49
+ #ifdef TEST
50
+ #include "CuTest.h"
51
+ #endif
52
+
59
53
 
60
54
  /// Create this lookup table using char_lookup.c
61
55
  static unsigned char smart_char_type[256] = {
62
56
  16, 0, 0, 0, 0, 0, 0, 0, 0, 1, 16, 0, 0, 16, 0, 0,
63
57
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
64
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
58
+ 1, 2, 2, 2, 2, 2, 2, 34, 2, 2, 2, 2, 2, 34, 2, 2,
65
59
  8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2,
66
- 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
67
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2,
68
- 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
69
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0,
60
+ 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68,
61
+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, 2, 2,
62
+ 2, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132,
63
+ 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 132, 2, 2, 2, 2, 0,
64
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70
65
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71
66
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72
67
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -74,7 +69,6 @@ static unsigned char smart_char_type[256] = {
74
69
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75
70
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76
71
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
78
72
  };
79
73
 
80
74
 
@@ -86,6 +80,8 @@ static int CHAR_WHITESPACE_OR_LINE_ENDING = CHAR_WHITESPACE | CHAR_LINE_ENDING;
86
80
 
87
81
  static int CHAR_WHITESPACE_OR_LINE_ENDING_OR_PUNCTUATION = CHAR_WHITESPACE | CHAR_LINE_ENDING | CHAR_PUNCTUATION;
88
82
 
83
+ static int CHAR_ALPHA_OR_INTRAWORD = CHAR_ALPHA | CHAR_INTRAWORD;
84
+
89
85
 
90
86
  // Is character whitespace?
91
87
  int char_is_whitespace(char c) {
@@ -111,7 +107,7 @@ int char_is_windows_line_ending(char * c) {
111
107
  }
112
108
 
113
109
  #ifdef TEST
114
- void Test_char_is_windows_line_ending(CuTest* tc) {
110
+ void Test_char_is_windows_line_ending(CuTest * tc) {
115
111
  char * test = "\r\n\n";
116
112
 
117
113
  CuAssertIntEquals(tc, 1, char_is_windows_line_ending(&test[0]));
@@ -140,6 +136,21 @@ int char_is_alphanumeric(char c) {
140
136
  return smart_char_type[(unsigned char) c] & CHAR_ALPHANUMERIC;
141
137
  }
142
138
 
139
+ // Is character lower case?
140
+ int char_is_lower_case(char c) {
141
+ return smart_char_type[(unsigned char) c] & CHAR_LOWER;
142
+ }
143
+
144
+ // Is character upper case?
145
+ int char_is_upper_case(char c) {
146
+ return smart_char_type[(unsigned char) c] & CHAR_UPPER;
147
+ }
148
+
149
+ // Is character a valid intraword character?
150
+ int char_is_intraword(char c) {
151
+ return smart_char_type[(unsigned char) c] & CHAR_ALPHA_OR_INTRAWORD;
152
+ }
153
+
143
154
  // Is character either whitespace or line ending?
144
155
  int char_is_whitespace_or_line_ending(char c) {
145
156
  return smart_char_type[(unsigned char) c] & CHAR_WHITESPACE_OR_LINE_ENDING;
@@ -154,3 +165,85 @@ int char_is_whitespace_or_punctuation(char c) {
154
165
  int char_is_whitespace_or_line_ending_or_punctuation(char c) {
155
166
  return smart_char_type[(unsigned char) c] & CHAR_WHITESPACE_OR_LINE_ENDING_OR_PUNCTUATION;
156
167
  }
168
+
169
+ // From https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
170
+
171
+ /*
172
+ * The utf8_check() function scans the '\0'-terminated string starting
173
+ * at s. It returns a pointer to the first byte of the first malformed
174
+ * or overlong UTF-8 sequence found, or NULL if the string contains
175
+ * only correct UTF-8. It also spots UTF-8 sequences that could cause
176
+ * trouble if converted to UTF-16, namely surrogate characters
177
+ * (U+D800..U+DFFF) and non-Unicode positions (U+FFFE..U+FFFF). This
178
+ * routine is very likely to find a malformed sequence if the input
179
+ * uses any other encoding than UTF-8. It therefore can be used as a
180
+ * very effective heuristic for distinguishing between UTF-8 and other
181
+ * encodings.
182
+ *
183
+ * I wrote this code mainly as a specification of functionality; there
184
+ * are no doubt performance optimizations possible for certain CPUs.
185
+ *
186
+ * Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2005-03-30
187
+ * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html
188
+ */
189
+
190
+ // Is the string valid UTF-8? (Returns pointer to first sequence)
191
+ unsigned char * utf8_check(unsigned char * s) {
192
+ while (*s) {
193
+ if (*s < 0x80)
194
+ /* 0xxxxxxx */
195
+ {
196
+ s++;
197
+ } else if ((s[0] & 0xe0) == 0xc0) {
198
+ /* 110XXXXx 10xxxxxx */
199
+ if ((s[1] & 0xc0) != 0x80 ||
200
+ (s[0] & 0xfe) == 0xc0) { /* overlong? */
201
+ return s;
202
+ } else {
203
+ s += 2;
204
+ }
205
+ } else if ((s[0] & 0xf0) == 0xe0) {
206
+ /* 1110XXXX 10Xxxxxx 10xxxxxx */
207
+ if ((s[1] & 0xc0) != 0x80 ||
208
+ (s[2] & 0xc0) != 0x80 ||
209
+ (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */
210
+ (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */
211
+ (s[0] == 0xef && s[1] == 0xbf &&
212
+ (s[2] & 0xfe) == 0xbe)) { /* U+FFFE or U+FFFF? */
213
+ return s;
214
+ } else {
215
+ s += 3;
216
+ }
217
+ } else if ((s[0] & 0xf8) == 0xf0) {
218
+ /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
219
+ if ((s[1] & 0xc0) != 0x80 ||
220
+ (s[2] & 0xc0) != 0x80 ||
221
+ (s[3] & 0xc0) != 0x80 ||
222
+ (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */
223
+ (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) { /* > U+10FFFF? */
224
+ return s;
225
+ } else {
226
+ s += 4;
227
+ }
228
+ } else {
229
+ return s;
230
+ }
231
+ }
232
+
233
+ return NULL;
234
+ }
235
+
236
+
237
+ #ifdef TEST
238
+ void Test_utf8_check(CuTest * tc) {
239
+ unsigned char * check;
240
+
241
+ CuAssertPtrEquals(tc, NULL, utf8_check((unsigned char *) "This is plain ASCII"));
242
+ CuAssertPtrEquals(tc, NULL, utf8_check((unsigned char *) "This ü UTF-8"));
243
+ CuAssertPtrEquals(tc, NULL, utf8_check((unsigned char *) "This 👪"));
244
+
245
+ CuAssertPtrEquals(tc, NULL, utf8_check((unsigned char *) "Ḽơᶉëᶆ ȋṕšᶙṁ ḍỡḽǭᵳ ʂǐť ӓṁệẗ, ĉṓɲṩḙċťᶒțûɾ ấɖḯƥĭṩčįɳġ ḝłįʈ, șếᶑ ᶁⱺ ẽḭŭŝḿꝋď ṫĕᶆᶈṓɍ ỉñḉīḑȋᵭṵńť ṷŧ ḹẩḇőꝛế éȶ đꝍꞎôꝛȇ ᵯáꞡᶇā ąⱡîɋṹẵ"));
246
+
247
+ CuAssertPtrNotNull(tc, utf8_check((unsigned char *) "\xe2\x28\xa1"));
248
+ }
249
+ #endif
data/ext/mmd/char.h CHANGED
@@ -1,6 +1,6 @@
1
1
  /**
2
2
 
3
- MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
3
+ libCoreUtilities -- Reusable component libraries
4
4
 
5
5
  @file char.h
6
6
 
@@ -10,27 +10,16 @@
10
10
  @author Fletcher T. Penney
11
11
  @bug
12
12
 
13
- **/
13
+ **/
14
14
 
15
15
  /*
16
16
 
17
- Copyright © 2016 - 2017 Fletcher T. Penney.
17
+ Copyright © 2016-2020 Fletcher T. Penney.
18
18
 
19
19
 
20
- The `MultiMarkdown 6` project is released under the MIT License..
20
+ MIT License
21
21
 
22
- GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
23
-
24
- https://github.com/fletcher/MultiMarkdown-4/
25
-
26
- MMD 4 is released under both the MIT License and GPL.
27
-
28
-
29
- CuTest is released under the zlib/libpng license. See CuTest.c for the text
30
- of the license.
31
-
32
-
33
- ## The MIT License ##
22
+ Copyright (c) 2016-2020 Fletcher T. Penney
34
23
 
35
24
  Permission is hereby granted, free of charge, to any person obtaining a copy
36
25
  of this software and associated documentation files (the "Software"), to deal
@@ -39,16 +28,16 @@
39
28
  copies of the Software, and to permit persons to whom the Software is
40
29
  furnished to do so, subject to the following conditions:
41
30
 
42
- The above copyright notice and this permission notice shall be included in
43
- all copies or substantial portions of the Software.
31
+ The above copyright notice and this permission notice shall be included in all
32
+ copies or substantial portions of the Software.
44
33
 
45
34
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
46
35
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
47
36
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
48
37
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
49
38
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
50
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51
- THE SOFTWARE.
39
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
40
+ SOFTWARE.
52
41
 
53
42
  */
54
43
 
@@ -56,9 +45,6 @@
56
45
  #ifndef CHAR_SMART_STRING_H
57
46
  #define CHAR_SMART_STRING_H
58
47
 
59
- #ifdef TEST
60
- #include "CuTest.h"
61
- #endif
62
48
 
63
49
  /// Define character types
64
50
  enum char_types {
@@ -67,6 +53,9 @@ enum char_types {
67
53
  CHAR_ALPHA = 1 << 2, //!< a-zA-Z
68
54
  CHAR_DIGIT = 1 << 3, //!< 0-9
69
55
  CHAR_LINE_ENDING = 1 << 4, //!< \n,\r,\0
56
+ CHAR_INTRAWORD = 1 << 5, //!< Punctuation that might be inside a word -'
57
+ CHAR_UPPER = 1 << 6, // ASCII upper case
58
+ CHAR_LOWER = 1 << 7, // ASCII lower case
70
59
  };
71
60
 
72
61
 
@@ -91,6 +80,15 @@ int char_is_digit(char c);
91
80
  // Is character alphanumeric?
92
81
  int char_is_alphanumeric(char c);
93
82
 
83
+ // Is character lower case?
84
+ int char_is_lower_case(char c);
85
+
86
+ // Is character upper case?
87
+ int char_is_upper_case(char c);
88
+
89
+ // Is character a valid intraword character?
90
+ int char_is_intraword(char c);
91
+
94
92
  // Is character either whitespace or line ending?
95
93
  int char_is_whitespace_or_line_ending(char c);
96
94
 
@@ -106,6 +104,8 @@ int char_is_whitespace_or_line_ending_or_punctuation(char c);
106
104
  // Is byte the first byte of a multibyte UTF-8 sequence?
107
105
  #define char_is_lead_multibyte(x) ((x & 0xC0) == 0xC0)
108
106
 
107
+ // Is the string valid UTF-8? (Returns pointer to first sequence)
108
+ unsigned char * utf8_check(unsigned char * s);
109
109
 
110
110
  #endif
111
111
 
@@ -125,9 +125,10 @@ token * mmd_critic_tokenize_string(const char * source, size_t start, size_t len
125
125
  }
126
126
 
127
127
  match_free(m);
128
- trie_free(ac);
129
128
  }
130
129
 
130
+ trie_free(ac);
131
+
131
132
  return root;
132
133
  }
133
134
 
@@ -346,10 +347,10 @@ void mmd_critic_markup_reject(DString * d) {
346
347
 
347
348
 
348
349
  #ifdef TEST
349
- void Test_critic(CuTest* tc) {
350
- #ifdef kUseObjectPool
350
+ void Test_critic(CuTest * tc) {
351
+ #ifdef kUseObjectPool
351
352
  token_pool_init();
352
- #endif
353
+ #endif
353
354
 
354
355
  DString * test = d_string_new("{--foo bar--}");
355
356
  mmd_critic_markup_reject(test);
@@ -416,11 +417,11 @@ void Test_critic(CuTest* tc) {
416
417
  mmd_critic_markup_reject(test);
417
418
  CuAssertStrEquals(tc, "", test->str);
418
419
 
419
- #ifdef kUseObjectPool
420
+ #ifdef kUseObjectPool
420
421
  // Decrement counter and clean up token pool
421
422
  token_pool_drain();
422
423
 
423
424
  token_pool_free();
424
- #endif
425
+ #endif
425
426
  }
426
427
  #endif