github-markdown-jekyll 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,77 @@
1
+ /*
2
+ * Copyright (c) 2011, Vicent Marti
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #ifndef UPSKIRT_HTML_H
18
+ #define UPSKIRT_HTML_H
19
+
20
+ #include "markdown.h"
21
+ #include "buffer.h"
22
+ #include <stdlib.h>
23
+
24
+ #ifdef __cplusplus
25
+ extern "C" {
26
+ #endif
27
+
28
+ struct html_renderopt {
29
+ struct {
30
+ int header_count;
31
+ int current_level;
32
+ int level_offset;
33
+ } toc_data;
34
+
35
+ unsigned int flags;
36
+
37
+ /* extra callbacks */
38
+ void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
39
+ };
40
+
41
+ typedef enum {
42
+ HTML_SKIP_HTML = (1 << 0),
43
+ HTML_SKIP_STYLE = (1 << 1),
44
+ HTML_SKIP_IMAGES = (1 << 2),
45
+ HTML_SKIP_LINKS = (1 << 3),
46
+ HTML_EXPAND_TABS = (1 << 4),
47
+ HTML_SAFELINK = (1 << 5),
48
+ HTML_TOC = (1 << 6),
49
+ HTML_HARD_WRAP = (1 << 7),
50
+ HTML_USE_XHTML = (1 << 8),
51
+ HTML_ESCAPE = (1 << 9),
52
+ } html_render_mode;
53
+
54
+ typedef enum {
55
+ HTML_TAG_NONE = 0,
56
+ HTML_TAG_OPEN,
57
+ HTML_TAG_CLOSE,
58
+ } html_tag;
59
+
60
+ int
61
+ sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
62
+
63
+ extern void
64
+ sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
65
+
66
+ extern void
67
+ sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
68
+
69
+ extern void
70
+ sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
71
+
72
+ #ifdef __cplusplus
73
+ }
74
+ #endif
75
+
76
+ #endif
77
+
@@ -0,0 +1,206 @@
1
+ /* C code produced by gperf version 3.0.3 */
2
+ /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
3
+ /* Computed positions: -k'1-2' */
4
+
5
+ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
6
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
7
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
8
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
9
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
10
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
11
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
12
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
13
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
14
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
15
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
16
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
17
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
18
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
19
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
20
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
21
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
22
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
23
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
24
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
25
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
26
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
27
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
28
+ /* The character set is not based on ISO-646. */
29
+ error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
30
+ #endif
31
+
32
+ /* maximum key range = 37, duplicates = 0 */
33
+
34
+ #ifndef GPERF_DOWNCASE
35
+ #define GPERF_DOWNCASE 1
36
+ static unsigned char gperf_downcase[256] =
37
+ {
38
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
39
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
40
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
41
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
42
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
43
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
44
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
45
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
46
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
47
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
48
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
49
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
50
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
51
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
52
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
53
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
54
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
55
+ 255
56
+ };
57
+ #endif
58
+
59
+ #ifndef GPERF_CASE_STRNCMP
60
+ #define GPERF_CASE_STRNCMP 1
61
+ static int
62
+ gperf_case_strncmp (s1, s2, n)
63
+ register const char *s1;
64
+ register const char *s2;
65
+ register unsigned int n;
66
+ {
67
+ for (; n > 0;)
68
+ {
69
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
70
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
71
+ if (c1 != 0 && c1 == c2)
72
+ {
73
+ n--;
74
+ continue;
75
+ }
76
+ return (int)c1 - (int)c2;
77
+ }
78
+ return 0;
79
+ }
80
+ #endif
81
+
82
+ #ifdef __GNUC__
83
+ __inline
84
+ #else
85
+ #ifdef __cplusplus
86
+ inline
87
+ #endif
88
+ #endif
89
+ static unsigned int
90
+ hash_block_tag (str, len)
91
+ register const char *str;
92
+ register unsigned int len;
93
+ {
94
+ static const unsigned char asso_values[] =
95
+ {
96
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
97
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
98
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
99
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
100
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
101
+ 8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
102
+ 38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
103
+ 5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
104
+ 0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
105
+ 38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
106
+ 0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
107
+ 15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
108
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
109
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
110
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
111
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
112
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
113
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
114
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
115
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
116
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
117
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
118
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
119
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
120
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
121
+ 38, 38, 38, 38, 38, 38, 38
122
+ };
123
+ register int hval = len;
124
+
125
+ switch (hval)
126
+ {
127
+ default:
128
+ hval += asso_values[(unsigned char)str[1]+1];
129
+ /*FALLTHROUGH*/
130
+ case 1:
131
+ hval += asso_values[(unsigned char)str[0]];
132
+ break;
133
+ }
134
+ return hval;
135
+ }
136
+
137
+ #ifdef __GNUC__
138
+ __inline
139
+ #ifdef __GNUC_STDC_INLINE__
140
+ __attribute__ ((__gnu_inline__))
141
+ #endif
142
+ #endif
143
+ const char *
144
+ find_block_tag (str, len)
145
+ register const char *str;
146
+ register unsigned int len;
147
+ {
148
+ enum
149
+ {
150
+ TOTAL_KEYWORDS = 24,
151
+ MIN_WORD_LENGTH = 1,
152
+ MAX_WORD_LENGTH = 10,
153
+ MIN_HASH_VALUE = 1,
154
+ MAX_HASH_VALUE = 37
155
+ };
156
+
157
+ static const char * const wordlist[] =
158
+ {
159
+ "",
160
+ "p",
161
+ "dl",
162
+ "div",
163
+ "math",
164
+ "table",
165
+ "",
166
+ "ul",
167
+ "del",
168
+ "form",
169
+ "blockquote",
170
+ "figure",
171
+ "ol",
172
+ "fieldset",
173
+ "",
174
+ "h1",
175
+ "",
176
+ "h6",
177
+ "pre",
178
+ "", "",
179
+ "script",
180
+ "h5",
181
+ "noscript",
182
+ "",
183
+ "style",
184
+ "iframe",
185
+ "h4",
186
+ "ins",
187
+ "", "", "",
188
+ "h3",
189
+ "", "", "", "",
190
+ "h2"
191
+ };
192
+
193
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
194
+ {
195
+ register int key = hash_block_tag (str, len);
196
+
197
+ if (key <= MAX_HASH_VALUE && key >= 0)
198
+ {
199
+ register const char *s = wordlist[key];
200
+
201
+ if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
202
+ return s;
203
+ }
204
+ }
205
+ return 0;
206
+ }
@@ -0,0 +1,2605 @@
1
+ /* markdown.c - generic markdown parser */
2
+
3
+ /*
4
+ * Copyright (c) 2009, Natacha Porté
5
+ * Copyright (c) 2011, Vicent Marti
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ */
19
+
20
+ #include "markdown.h"
21
+ #include "stack.h"
22
+
23
+ #include <assert.h>
24
+ #include <string.h>
25
+ #include <ctype.h>
26
+ #include <stdio.h>
27
+
28
+ #if defined(_WIN32)
29
+ #define strncasecmp _strnicmp
30
+ #endif
31
+
32
+ #define REF_TABLE_SIZE 8
33
+
34
+ #define BUFFER_BLOCK 0
35
+ #define BUFFER_SPAN 1
36
+
37
+ #define MKD_LI_END 8 /* internal list flag */
38
+
39
+ #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
40
+ #define GPERF_DOWNCASE 1
41
+ #define GPERF_CASE_STRNCMP 1
42
+ #include "html_blocks.h"
43
+
44
+ /***************
45
+ * LOCAL TYPES *
46
+ ***************/
47
+
48
+ /* link_ref: reference to a link */
49
+ struct link_ref {
50
+ unsigned int id;
51
+
52
+ struct buf *link;
53
+ struct buf *title;
54
+
55
+ struct link_ref *next;
56
+ };
57
+
58
+ /* char_trigger: function pointer to render active chars */
59
+ /* returns the number of chars taken care of */
60
+ /* data is the pointer of the beginning of the span */
61
+ /* offset is the number of valid chars before data */
62
+ struct sd_markdown;
63
+ typedef size_t
64
+ (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
65
+
66
+ static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
67
+ static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
68
+ static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
69
+ static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
70
+ static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
71
+ static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
72
+ static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
73
+ static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
74
+ static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
75
+ static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
76
+ static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
77
+
78
+ enum markdown_char_t {
79
+ MD_CHAR_NONE = 0,
80
+ MD_CHAR_EMPHASIS,
81
+ MD_CHAR_CODESPAN,
82
+ MD_CHAR_LINEBREAK,
83
+ MD_CHAR_LINK,
84
+ MD_CHAR_LANGLE,
85
+ MD_CHAR_ESCAPE,
86
+ MD_CHAR_ENTITITY,
87
+ MD_CHAR_AUTOLINK_URL,
88
+ MD_CHAR_AUTOLINK_EMAIL,
89
+ MD_CHAR_AUTOLINK_WWW,
90
+ MD_CHAR_SUPERSCRIPT,
91
+ };
92
+
93
+ static char_trigger markdown_char_ptrs[] = {
94
+ NULL,
95
+ &char_emphasis,
96
+ &char_codespan,
97
+ &char_linebreak,
98
+ &char_link,
99
+ &char_langle_tag,
100
+ &char_escape,
101
+ &char_entity,
102
+ &char_autolink_url,
103
+ &char_autolink_email,
104
+ &char_autolink_www,
105
+ &char_superscript,
106
+ };
107
+
108
+ /* render • structure containing one particular render */
109
+ struct sd_markdown {
110
+ struct sd_callbacks cb;
111
+ void *opaque;
112
+
113
+ struct link_ref *refs[REF_TABLE_SIZE];
114
+ uint8_t active_char[256];
115
+ struct stack work_bufs[2];
116
+ unsigned int ext_flags;
117
+ size_t max_nesting;
118
+ int in_link_body;
119
+ };
120
+
121
+ /***************************
122
+ * HELPER FUNCTIONS *
123
+ ***************************/
124
+
125
+ static inline struct buf *
126
+ rndr_newbuf(struct sd_markdown *rndr, int type)
127
+ {
128
+ static const size_t buf_size[2] = {256, 64};
129
+ struct buf *work = NULL;
130
+ struct stack *pool = &rndr->work_bufs[type];
131
+
132
+ if (pool->size < pool->asize &&
133
+ pool->item[pool->size] != NULL) {
134
+ work = pool->item[pool->size++];
135
+ work->size = 0;
136
+ } else {
137
+ work = bufnew(buf_size[type]);
138
+ stack_push(pool, work);
139
+ }
140
+
141
+ return work;
142
+ }
143
+
144
+ static inline void
145
+ rndr_popbuf(struct sd_markdown *rndr, int type)
146
+ {
147
+ rndr->work_bufs[type].size--;
148
+ }
149
+
150
+ static void
151
+ unscape_text(struct buf *ob, struct buf *src)
152
+ {
153
+ size_t i = 0, org;
154
+ while (i < src->size) {
155
+ org = i;
156
+ while (i < src->size && src->data[i] != '\\')
157
+ i++;
158
+
159
+ if (i > org)
160
+ bufput(ob, src->data + org, i - org);
161
+
162
+ if (i + 1 >= src->size)
163
+ break;
164
+
165
+ bufputc(ob, src->data[i + 1]);
166
+ i += 2;
167
+ }
168
+ }
169
+
170
+ static unsigned int
171
+ hash_link_ref(const uint8_t *link_ref, size_t length)
172
+ {
173
+ size_t i;
174
+ unsigned int hash = 0;
175
+
176
+ for (i = 0; i < length; ++i)
177
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
178
+
179
+ return hash;
180
+ }
181
+
182
+ static struct link_ref *
183
+ add_link_ref(
184
+ struct link_ref **references,
185
+ const uint8_t *name, size_t name_size)
186
+ {
187
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
188
+
189
+ if (!ref)
190
+ return NULL;
191
+
192
+ ref->id = hash_link_ref(name, name_size);
193
+ ref->next = references[ref->id % REF_TABLE_SIZE];
194
+
195
+ references[ref->id % REF_TABLE_SIZE] = ref;
196
+ return ref;
197
+ }
198
+
199
+ static struct link_ref *
200
+ find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
201
+ {
202
+ unsigned int hash = hash_link_ref(name, length);
203
+ struct link_ref *ref = NULL;
204
+
205
+ ref = references[hash % REF_TABLE_SIZE];
206
+
207
+ while (ref != NULL) {
208
+ if (ref->id == hash)
209
+ return ref;
210
+
211
+ ref = ref->next;
212
+ }
213
+
214
+ return NULL;
215
+ }
216
+
217
+ static void
218
+ free_link_refs(struct link_ref **references)
219
+ {
220
+ size_t i;
221
+
222
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
223
+ struct link_ref *r = references[i];
224
+ struct link_ref *next;
225
+
226
+ while (r) {
227
+ next = r->next;
228
+ bufrelease(r->link);
229
+ bufrelease(r->title);
230
+ free(r);
231
+ r = next;
232
+ }
233
+ }
234
+ }
235
+
236
+ /*
237
+ Wrap isalnum so that characters outside of the ASCII range don't count.
238
+ */
239
+ static inline int
240
+ _isalnum(int c)
241
+ {
242
+ return isalnum(c) && c < 0x7f;
243
+ }
244
+
245
+ /*
246
+ * Check whether a char is a Markdown space.
247
+
248
+ * Right now we only consider spaces the actual
249
+ * space and a newline: tabs and carriage returns
250
+ * are filtered out during the preprocessing phase.
251
+ *
252
+ * If we wanted to actually be UTF-8 compliant, we
253
+ * should instead extract an Unicode codepoint from
254
+ * this character and check for space properties.
255
+ */
256
+ static inline int
257
+ _isspace(int c)
258
+ {
259
+ return c == ' ' || c == '\n';
260
+ }
261
+
262
+ /****************************
263
+ * INLINE PARSING FUNCTIONS *
264
+ ****************************/
265
+
266
+ /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
267
+ /* this is less strict than the original markdown e-mail address matching */
268
+ static size_t
269
+ is_mail_autolink(uint8_t *data, size_t size)
270
+ {
271
+ size_t i = 0, nb = 0;
272
+
273
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
274
+ for (i = 0; i < size; ++i) {
275
+ if (isalnum(data[i]))
276
+ continue;
277
+
278
+ switch (data[i]) {
279
+ case '@':
280
+ nb++;
281
+
282
+ case '-':
283
+ case '.':
284
+ case '_':
285
+ break;
286
+
287
+ case '>':
288
+ return (nb == 1) ? i + 1 : 0;
289
+
290
+ default:
291
+ return 0;
292
+ }
293
+ }
294
+
295
+ return 0;
296
+ }
297
+
298
+ /* tag_length • returns the length of the given tag, or 0 is it's not valid */
299
+ static size_t
300
+ tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
301
+ {
302
+ size_t i, j;
303
+
304
+ /* a valid tag can't be shorter than 3 chars */
305
+ if (size < 3) return 0;
306
+
307
+ /* begins with a '<' optionally followed by '/', followed by letter or number */
308
+ if (data[0] != '<') return 0;
309
+ i = (data[1] == '/') ? 2 : 1;
310
+
311
+ if (!_isalnum(data[i]))
312
+ return 0;
313
+
314
+ /* scheme test */
315
+ *autolink = MKDA_NOT_AUTOLINK;
316
+
317
+ /* try to find the beginning of an URI */
318
+ while (i < size && (_isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
319
+ i++;
320
+
321
+ if (i > 1 && data[i] == '@') {
322
+ if ((j = is_mail_autolink(data + i, size - i)) != 0) {
323
+ *autolink = MKDA_EMAIL;
324
+ return i + j;
325
+ }
326
+ }
327
+
328
+ if (i > 2 && data[i] == ':') {
329
+ *autolink = MKDA_NORMAL;
330
+ i++;
331
+ }
332
+
333
+ /* completing autolink test: no whitespace or ' or " */
334
+ if (i >= size)
335
+ *autolink = MKDA_NOT_AUTOLINK;
336
+
337
+ else if (*autolink) {
338
+ j = i;
339
+
340
+ while (i < size) {
341
+ if (data[i] == '\\') i += 2;
342
+ else if (data[i] == '>' || data[i] == '\'' ||
343
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
344
+ break;
345
+ else i++;
346
+ }
347
+
348
+ if (i >= size) return 0;
349
+ if (i > j && data[i] == '>') return i + 1;
350
+ /* one of the forbidden chars has been found */
351
+ *autolink = MKDA_NOT_AUTOLINK;
352
+ }
353
+
354
+ /* looking for sometinhg looking like a tag end */
355
+ while (i < size && data[i] != '>') i++;
356
+ if (i >= size) return 0;
357
+ return i + 1;
358
+ }
359
+
360
+ /* parse_inline • parses inline markdown elements */
361
+ static void
362
+ parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
363
+ {
364
+ size_t i = 0, end = 0;
365
+ uint8_t action = 0;
366
+ struct buf work = { 0, 0, 0, 0 };
367
+
368
+ if (rndr->work_bufs[BUFFER_SPAN].size +
369
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
370
+ return;
371
+
372
+ while (i < size) {
373
+ /* copying inactive chars into the output */
374
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
375
+ end++;
376
+ }
377
+
378
+ if (rndr->cb.normal_text) {
379
+ work.data = data + i;
380
+ work.size = end - i;
381
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
382
+ }
383
+ else
384
+ bufput(ob, data + i, end - i);
385
+
386
+ if (end >= size) break;
387
+ i = end;
388
+
389
+ end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
390
+ if (!end) /* no action from the callback */
391
+ end = i + 1;
392
+ else {
393
+ i += end;
394
+ end = i;
395
+ }
396
+ }
397
+ }
398
+
399
+ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
400
+ static size_t
401
+ find_emph_char(uint8_t *data, size_t size, uint8_t c)
402
+ {
403
+ size_t i = 1;
404
+
405
+ while (i < size) {
406
+ while (i < size && data[i] != c && data[i] != '[')
407
+ i++;
408
+
409
+ if (i == size)
410
+ return 0;
411
+
412
+ if (data[i] == c)
413
+ return i;
414
+
415
+ /* not counting escaped chars */
416
+ if (i && data[i - 1] == '\\') {
417
+ i++; continue;
418
+ }
419
+
420
+ if (data[i] == '`') {
421
+ size_t span_nb = 0, bt;
422
+ size_t tmp_i = 0;
423
+
424
+ /* counting the number of opening backticks */
425
+ while (i < size && data[i] == '`') {
426
+ i++; span_nb++;
427
+ }
428
+
429
+ if (i >= size) return 0;
430
+
431
+ /* finding the matching closing sequence */
432
+ bt = 0;
433
+ while (i < size && bt < span_nb) {
434
+ if (!tmp_i && data[i] == c) tmp_i = i;
435
+ if (data[i] == '`') bt++;
436
+ else bt = 0;
437
+ i++;
438
+ }
439
+
440
+ if (i >= size) return tmp_i;
441
+ }
442
+ /* skipping a link */
443
+ else if (data[i] == '[') {
444
+ size_t tmp_i = 0;
445
+ uint8_t cc;
446
+
447
+ i++;
448
+ while (i < size && data[i] != ']') {
449
+ if (!tmp_i && data[i] == c) tmp_i = i;
450
+ i++;
451
+ }
452
+
453
+ i++;
454
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
455
+ i++;
456
+
457
+ if (i >= size)
458
+ return tmp_i;
459
+
460
+ switch (data[i]) {
461
+ case '[':
462
+ cc = ']'; break;
463
+
464
+ case '(':
465
+ cc = ')'; break;
466
+
467
+ default:
468
+ if (tmp_i)
469
+ return tmp_i;
470
+ else
471
+ continue;
472
+ }
473
+
474
+ i++;
475
+ while (i < size && data[i] != cc) {
476
+ if (!tmp_i && data[i] == c) tmp_i = i;
477
+ i++;
478
+ }
479
+
480
+ if (i >= size)
481
+ return tmp_i;
482
+
483
+ i++;
484
+ }
485
+ }
486
+
487
+ return 0;
488
+ }
489
+
490
+ /* parse_emph1 • parsing single emphase */
491
+ /* closed by a symbol not preceded by whitespace and not followed by symbol */
492
+ static size_t
493
+ parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
494
+ {
495
+ size_t i = 0, len;
496
+ struct buf *work = 0;
497
+ int r;
498
+
499
+ if (!rndr->cb.emphasis) return 0;
500
+
501
+ /* skipping one symbol if coming from emph3 */
502
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
503
+
504
+ while (i < size) {
505
+ len = find_emph_char(data + i, size - i, c);
506
+ if (!len) return 0;
507
+ i += len;
508
+ if (i >= size) return 0;
509
+
510
+ if (data[i] == c && !_isspace(data[i - 1])) {
511
+
512
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
513
+ if (i + 1 < size && (_isalnum(data[i + 1]) || data[i + 1] == c))
514
+ continue;
515
+ }
516
+
517
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
518
+ parse_inline(work, rndr, data, i);
519
+ r = rndr->cb.emphasis(ob, work, rndr->opaque);
520
+ rndr_popbuf(rndr, BUFFER_SPAN);
521
+ return r ? i + 1 : 0;
522
+ }
523
+ }
524
+
525
+ return 0;
526
+ }
527
+
528
+ /* parse_emph2 • parsing single emphase */
529
+ static size_t
530
+ parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
531
+ {
532
+ int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
533
+ size_t i = 0, len;
534
+ struct buf *work = 0;
535
+ int r;
536
+
537
+ render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
538
+
539
+ if (!render_method)
540
+ return 0;
541
+
542
+ while (i < size) {
543
+ len = find_emph_char(data + i, size - i, c);
544
+ if (!len) return 0;
545
+ i += len;
546
+
547
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
548
+
549
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
550
+ if (i + 2 < size && (_isalnum(data[i + 2]) || data[i + 2] == c))
551
+ continue;
552
+ }
553
+
554
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
555
+ parse_inline(work, rndr, data, i);
556
+ r = render_method(ob, work, rndr->opaque);
557
+ rndr_popbuf(rndr, BUFFER_SPAN);
558
+ return r ? i + 2 : 0;
559
+ }
560
+ i++;
561
+ }
562
+ return 0;
563
+ }
564
+
565
+ /* parse_emph3 • parsing single emphase */
566
+ /* finds the first closing tag, and delegates to the other emph */
567
+ static size_t
568
+ parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
569
+ {
570
+ size_t i = 0, len;
571
+ int r;
572
+
573
+ while (i < size) {
574
+ len = find_emph_char(data + i, size - i, c);
575
+ if (!len) return 0;
576
+ i += len;
577
+
578
+ /* skip whitespace preceded symbols */
579
+ if (data[i] != c || _isspace(data[i - 1]))
580
+ continue;
581
+
582
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
583
+
584
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
585
+ if (i + 3 < size && (_isalnum(data[i + 3]) || data[i + 3] == c))
586
+ continue;
587
+ }
588
+
589
+ /* triple symbol found */
590
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
591
+
592
+ parse_inline(work, rndr, data, i);
593
+ r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
594
+ rndr_popbuf(rndr, BUFFER_SPAN);
595
+ return r ? i + 3 : 0;
596
+
597
+ } else if (i + 1 < size && data[i + 1] == c) {
598
+ /* double symbol found, handing over to emph1 */
599
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
600
+ if (!len) return 0;
601
+ else return len - 2;
602
+
603
+ } else {
604
+ /* single symbol found, handing over to emph2 */
605
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
606
+ if (!len) return 0;
607
+ else return len - 1;
608
+ }
609
+ }
610
+ return 0;
611
+ }
612
+
613
+ /* char_emphasis • single and double emphasis parsing */
614
+ static size_t
615
+ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
616
+ {
617
+ uint8_t c = data[0];
618
+ size_t ret;
619
+
620
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
621
+ if (offset > 0 && !_isspace(data[-1]) && (_isalnum(data[-1]) || data[-1] == data[0])) {
622
+ return 0;
623
+ }
624
+ }
625
+
626
+ if (size > 2 && data[1] != c) {
627
+ /* whitespace cannot follow an opening emphasis;
628
+ * strikethrough only takes two characters '~~' */
629
+ if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
630
+ return 0;
631
+
632
+ return ret + 1;
633
+ }
634
+
635
+ if (size > 3 && data[1] == c && data[2] != c) {
636
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
637
+ return 0;
638
+
639
+ return ret + 2;
640
+ }
641
+
642
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
643
+ if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
644
+ return 0;
645
+
646
+ return ret + 3;
647
+ }
648
+
649
+ return 0;
650
+ }
651
+
652
+
653
+ /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
654
+ static size_t
655
+ char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
656
+ {
657
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
658
+ return 0;
659
+
660
+ /* removing the last space from ob and rendering */
661
+ while (ob->size && ob->data[ob->size - 1] == ' ')
662
+ ob->size--;
663
+
664
+ return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
665
+ }
666
+
667
+
668
+ /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
669
+ static size_t
670
+ char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
671
+ {
672
+ size_t end, nb = 0, i, f_begin, f_end;
673
+
674
+ /* counting the number of backticks in the delimiter */
675
+ while (nb < size && data[nb] == '`')
676
+ nb++;
677
+
678
+ /* finding the next delimiter */
679
+ i = 0;
680
+ for (end = nb; end < size && i < nb; end++) {
681
+ if (data[end] == '`') i++;
682
+ else i = 0;
683
+ }
684
+
685
+ if (i < nb && end >= size)
686
+ return 0; /* no matching delimiter */
687
+
688
+ /* trimming outside whitespaces */
689
+ f_begin = nb;
690
+ while (f_begin < end && data[f_begin] == ' ')
691
+ f_begin++;
692
+
693
+ f_end = end - nb;
694
+ while (f_end > nb && data[f_end-1] == ' ')
695
+ f_end--;
696
+
697
+ /* real code span */
698
+ if (f_begin < f_end) {
699
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
700
+ if (!rndr->cb.codespan(ob, &work, rndr->opaque))
701
+ end = 0;
702
+ } else {
703
+ if (!rndr->cb.codespan(ob, 0, rndr->opaque))
704
+ end = 0;
705
+ }
706
+
707
+ return end;
708
+ }
709
+
710
+
711
+ /* char_escape • '\\' backslash escape */
712
+ static size_t
713
+ char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
714
+ {
715
+ static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
716
+ struct buf work = { 0, 0, 0, 0 };
717
+
718
+ if (size > 1) {
719
+ if (strchr(escape_chars, data[1]) == NULL)
720
+ return 0;
721
+
722
+ if (rndr->cb.normal_text) {
723
+ work.data = data + 1;
724
+ work.size = 1;
725
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
726
+ }
727
+ else bufputc(ob, data[1]);
728
+ } else if (size == 1) {
729
+ bufputc(ob, data[0]);
730
+ }
731
+
732
+ return 2;
733
+ }
734
+
735
+ /* char_entity • '&' escaped when it doesn't belong to an entity */
736
+ /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
737
+ static size_t
738
+ char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
739
+ {
740
+ size_t end = 1;
741
+ struct buf work = { 0, 0, 0, 0 };
742
+
743
+ if (end < size && data[end] == '#')
744
+ end++;
745
+
746
+ while (end < size && _isalnum(data[end]))
747
+ end++;
748
+
749
+ if (end < size && data[end] == ';')
750
+ end++; /* real entity */
751
+ else
752
+ return 0; /* lone '&' */
753
+
754
+ if (rndr->cb.entity) {
755
+ work.data = data;
756
+ work.size = end;
757
+ rndr->cb.entity(ob, &work, rndr->opaque);
758
+ }
759
+ else bufput(ob, data, end);
760
+
761
+ return end;
762
+ }
763
+
764
+ /* char_langle_tag • '<' when tags or autolinks are allowed */
765
+ static size_t
766
+ char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
767
+ {
768
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
769
+ size_t end = tag_length(data, size, &altype);
770
+ struct buf work = { data, end, 0, 0 };
771
+ int ret = 0;
772
+
773
+ if (end > 2) {
774
+ if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
775
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
776
+ work.data = data + 1;
777
+ work.size = end - 2;
778
+ unscape_text(u_link, &work);
779
+ ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
780
+ rndr_popbuf(rndr, BUFFER_SPAN);
781
+ }
782
+ else if (rndr->cb.raw_html_tag)
783
+ ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
784
+ }
785
+
786
+ if (!ret) return 0;
787
+ else return end;
788
+ }
789
+
790
+ static size_t
791
+ char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
792
+ {
793
+ struct buf *link, *link_url, *link_text;
794
+ size_t link_len, rewind;
795
+
796
+ if (!rndr->cb.link || rndr->in_link_body)
797
+ return 0;
798
+
799
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
800
+
801
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
802
+ link_url = rndr_newbuf(rndr, BUFFER_SPAN);
803
+ BUFPUTSL(link_url, "http://");
804
+ bufput(link_url, link->data, link->size);
805
+
806
+ ob->size -= rewind;
807
+ if (rndr->cb.normal_text) {
808
+ link_text = rndr_newbuf(rndr, BUFFER_SPAN);
809
+ rndr->cb.normal_text(link_text, link, rndr->opaque);
810
+ rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
811
+ rndr_popbuf(rndr, BUFFER_SPAN);
812
+ } else {
813
+ rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
814
+ }
815
+ rndr_popbuf(rndr, BUFFER_SPAN);
816
+ }
817
+
818
+ rndr_popbuf(rndr, BUFFER_SPAN);
819
+ return link_len;
820
+ }
821
+
822
+ static size_t
823
+ char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
824
+ {
825
+ struct buf *link;
826
+ size_t link_len, rewind;
827
+
828
+ if (!rndr->cb.autolink || rndr->in_link_body)
829
+ return 0;
830
+
831
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
832
+
833
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
834
+ ob->size -= rewind;
835
+ rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
836
+ }
837
+
838
+ rndr_popbuf(rndr, BUFFER_SPAN);
839
+ return link_len;
840
+ }
841
+
842
+ static size_t
843
+ char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
844
+ {
845
+ struct buf *link;
846
+ size_t link_len, rewind;
847
+
848
+ if (!rndr->cb.autolink || rndr->in_link_body)
849
+ return 0;
850
+
851
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
852
+
853
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
854
+ ob->size -= rewind;
855
+ rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
856
+ }
857
+
858
+ rndr_popbuf(rndr, BUFFER_SPAN);
859
+ return link_len;
860
+ }
861
+
862
+ /* char_link • '[': parsing a link or an image */
863
+ static size_t
864
+ char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
865
+ {
866
+ int is_img = (offset && data[-1] == '!'), level;
867
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
868
+ struct buf *content = 0;
869
+ struct buf *link = 0;
870
+ struct buf *title = 0;
871
+ struct buf *u_link = 0;
872
+ size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
873
+ int text_has_nl = 0, ret = 0;
874
+ int in_title = 0, qtype = 0;
875
+
876
+ /* checking whether the correct renderer exists */
877
+ if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
878
+ goto cleanup;
879
+
880
+ /* looking for the matching closing bracket */
881
+ for (level = 1; i < size; i++) {
882
+ if (data[i] == '\n')
883
+ text_has_nl = 1;
884
+
885
+ else if (data[i - 1] == '\\')
886
+ continue;
887
+
888
+ else if (data[i] == '[')
889
+ level++;
890
+
891
+ else if (data[i] == ']') {
892
+ level--;
893
+ if (level <= 0)
894
+ break;
895
+ }
896
+ }
897
+
898
+ if (i >= size)
899
+ goto cleanup;
900
+
901
+ txt_e = i;
902
+ i++;
903
+
904
+ /* skip any amount of whitespace or newline */
905
+ /* (this is much more laxist than original markdown syntax) */
906
+ while (i < size && _isspace(data[i]))
907
+ i++;
908
+
909
+ /* inline style link */
910
+ if (i < size && data[i] == '(') {
911
+ /* skipping initial whitespace */
912
+ i++;
913
+
914
+ while (i < size && _isspace(data[i]))
915
+ i++;
916
+
917
+ link_b = i;
918
+
919
+ /* looking for link end: ' " ) */
920
+ /* Count the number of open parenthesis */
921
+ size_t nb_p = 0;
922
+
923
+ while (i < size) {
924
+ if (data[i] == '\\') i += 2;
925
+ else if (data[i] == '(' && i != 0) {
926
+ nb_p++; i++;
927
+ }
928
+ else if (data[i] == ')') {
929
+ if (nb_p == 0) break;
930
+ nb_p--; i++;
931
+ } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
932
+ else i++;
933
+ }
934
+
935
+ if (i >= size) goto cleanup;
936
+ link_e = i;
937
+
938
+ /* looking for title end if present */
939
+ if (data[i] == '\'' || data[i] == '"') {
940
+ qtype = data[i];
941
+ in_title = 1;
942
+ i++;
943
+ title_b = i;
944
+
945
+ while (i < size) {
946
+ if (data[i] == '\\') i += 2;
947
+ else if (data[i] == qtype) {in_title = 0; i++;}
948
+ else if ((data[i] == ')') && !in_title) break;
949
+ else i++;
950
+ }
951
+
952
+ if (i >= size) goto cleanup;
953
+
954
+ /* skipping whitespaces after title */
955
+ title_e = i - 1;
956
+ while (title_e > title_b && _isspace(data[title_e]))
957
+ title_e--;
958
+
959
+ /* checking for closing quote presence */
960
+ if (data[title_e] != '\'' && data[title_e] != '"') {
961
+ title_b = title_e = 0;
962
+ link_e = i;
963
+ }
964
+ }
965
+
966
+ /* remove whitespace at the end of the link */
967
+ while (link_e > link_b && _isspace(data[link_e - 1]))
968
+ link_e--;
969
+
970
+ /* remove optional angle brackets around the link */
971
+ if (data[link_b] == '<') link_b++;
972
+ if (data[link_e - 1] == '>') link_e--;
973
+
974
+ /* building escaped link and title */
975
+ if (link_e > link_b) {
976
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
977
+ bufput(link, data + link_b, link_e - link_b);
978
+ }
979
+
980
+ if (title_e > title_b) {
981
+ title = rndr_newbuf(rndr, BUFFER_SPAN);
982
+ bufput(title, data + title_b, title_e - title_b);
983
+ }
984
+
985
+ i++;
986
+ }
987
+
988
+ /* reference style link */
989
+ else if (i < size && data[i] == '[') {
990
+ struct buf id = { 0, 0, 0, 0 };
991
+ struct link_ref *lr;
992
+
993
+ /* looking for the id */
994
+ i++;
995
+ link_b = i;
996
+ while (i < size && data[i] != ']') i++;
997
+ if (i >= size) goto cleanup;
998
+ link_e = i;
999
+
1000
+ /* finding the link_ref */
1001
+ if (link_b == link_e) {
1002
+ if (text_has_nl) {
1003
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1004
+ size_t j;
1005
+
1006
+ for (j = 1; j < txt_e; j++) {
1007
+ if (data[j] != '\n')
1008
+ bufputc(b, data[j]);
1009
+ else if (data[j - 1] != ' ')
1010
+ bufputc(b, ' ');
1011
+ }
1012
+
1013
+ id.data = b->data;
1014
+ id.size = b->size;
1015
+ } else {
1016
+ id.data = data + 1;
1017
+ id.size = txt_e - 1;
1018
+ }
1019
+ } else {
1020
+ id.data = data + link_b;
1021
+ id.size = link_e - link_b;
1022
+ }
1023
+
1024
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1025
+ if (!lr)
1026
+ goto cleanup;
1027
+
1028
+ /* keeping link and title from link_ref */
1029
+ link = lr->link;
1030
+ title = lr->title;
1031
+ i++;
1032
+ }
1033
+
1034
+ /* shortcut reference style link */
1035
+ else {
1036
+ struct buf id = { 0, 0, 0, 0 };
1037
+ struct link_ref *lr;
1038
+
1039
+ /* crafting the id */
1040
+ if (text_has_nl) {
1041
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1042
+ size_t j;
1043
+
1044
+ for (j = 1; j < txt_e; j++) {
1045
+ if (data[j] != '\n')
1046
+ bufputc(b, data[j]);
1047
+ else if (data[j - 1] != ' ')
1048
+ bufputc(b, ' ');
1049
+ }
1050
+
1051
+ id.data = b->data;
1052
+ id.size = b->size;
1053
+ } else {
1054
+ id.data = data + 1;
1055
+ id.size = txt_e - 1;
1056
+ }
1057
+
1058
+ /* finding the link_ref */
1059
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1060
+ if (!lr)
1061
+ goto cleanup;
1062
+
1063
+ /* keeping link and title from link_ref */
1064
+ link = lr->link;
1065
+ title = lr->title;
1066
+
1067
+ /* rewinding the whitespace */
1068
+ i = txt_e + 1;
1069
+ }
1070
+
1071
+ /* building content: img alt is escaped, link content is parsed */
1072
+ if (txt_e > 1) {
1073
+ content = rndr_newbuf(rndr, BUFFER_SPAN);
1074
+ if (is_img) {
1075
+ bufput(content, data + 1, txt_e - 1);
1076
+ } else {
1077
+ /* disable autolinking when parsing inline the
1078
+ * content of a link */
1079
+ rndr->in_link_body = 1;
1080
+ parse_inline(content, rndr, data + 1, txt_e - 1);
1081
+ rndr->in_link_body = 0;
1082
+ }
1083
+ }
1084
+
1085
+ if (link) {
1086
+ u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1087
+ unscape_text(u_link, link);
1088
+ }
1089
+
1090
+ /* calling the relevant rendering function */
1091
+ if (is_img) {
1092
+ if (ob->size && ob->data[ob->size - 1] == '!')
1093
+ ob->size -= 1;
1094
+
1095
+ ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1096
+ } else {
1097
+ ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1098
+ }
1099
+
1100
+ /* cleanup */
1101
+ cleanup:
1102
+ rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1103
+ return ret ? i : 0;
1104
+ }
1105
+
1106
+ static size_t
1107
+ char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1108
+ {
1109
+ size_t sup_start, sup_len;
1110
+ struct buf *sup;
1111
+
1112
+ if (!rndr->cb.superscript)
1113
+ return 0;
1114
+
1115
+ if (size < 2)
1116
+ return 0;
1117
+
1118
+ if (data[1] == '(') {
1119
+ sup_start = sup_len = 2;
1120
+
1121
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1122
+ sup_len++;
1123
+
1124
+ if (sup_len == size)
1125
+ return 0;
1126
+ } else {
1127
+ sup_start = sup_len = 1;
1128
+
1129
+ while (sup_len < size && !_isspace(data[sup_len]))
1130
+ sup_len++;
1131
+ }
1132
+
1133
+ if (sup_len - sup_start == 0)
1134
+ return (sup_start == 2) ? 3 : 0;
1135
+
1136
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
1137
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1138
+ rndr->cb.superscript(ob, sup, rndr->opaque);
1139
+ rndr_popbuf(rndr, BUFFER_SPAN);
1140
+
1141
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
1142
+ }
1143
+
1144
+ /*********************************
1145
+ * BLOCK-LEVEL PARSING FUNCTIONS *
1146
+ *********************************/
1147
+
1148
+ /* is_empty • returns the line length when it is empty, 0 otherwise */
1149
+ static size_t
1150
+ is_empty(uint8_t *data, size_t size)
1151
+ {
1152
+ size_t i;
1153
+
1154
+ for (i = 0; i < size && data[i] != '\n'; i++)
1155
+ if (data[i] != ' ')
1156
+ return 0;
1157
+
1158
+ return i + 1;
1159
+ }
1160
+
1161
+ /* is_hrule • returns whether a line is a horizontal rule */
1162
+ static int
1163
+ is_hrule(uint8_t *data, size_t size)
1164
+ {
1165
+ size_t i = 0, n = 0;
1166
+ uint8_t c;
1167
+
1168
+ /* skipping initial spaces */
1169
+ if (size < 3) return 0;
1170
+ if (data[0] == ' ') { i++;
1171
+ if (data[1] == ' ') { i++;
1172
+ if (data[2] == ' ') { i++; } } }
1173
+
1174
+ /* looking at the hrule uint8_t */
1175
+ if (i + 2 >= size
1176
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1177
+ return 0;
1178
+ c = data[i];
1179
+
1180
+ /* the whole line must be the char or whitespace */
1181
+ while (i < size && data[i] != '\n') {
1182
+ if (data[i] == c) n++;
1183
+ else if (data[i] != ' ')
1184
+ return 0;
1185
+
1186
+ i++;
1187
+ }
1188
+
1189
+ return n >= 3;
1190
+ }
1191
+
1192
+ /* check if a line begins with a code fence; return the
1193
+ * width of the code fence */
1194
+ static size_t
1195
+ prefix_codefence(uint8_t *data, size_t size)
1196
+ {
1197
+ size_t i = 0, n = 0;
1198
+ uint8_t c;
1199
+
1200
+ /* skipping initial spaces */
1201
+ if (size < 3) return 0;
1202
+ if (data[0] == ' ') { i++;
1203
+ if (data[1] == ' ') { i++;
1204
+ if (data[2] == ' ') { i++; } } }
1205
+
1206
+ /* looking at the hrule uint8_t */
1207
+ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1208
+ return 0;
1209
+
1210
+ c = data[i];
1211
+
1212
+ /* the whole line must be the uint8_t or whitespace */
1213
+ while (i < size && data[i] == c) {
1214
+ n++; i++;
1215
+ }
1216
+
1217
+ if (n < 3)
1218
+ return 0;
1219
+
1220
+ return i;
1221
+ }
1222
+
1223
+ /* check if a line is a code fence; return its size if it is */
1224
+ static size_t
1225
+ is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1226
+ {
1227
+ size_t i = 0, syn_len = 0;
1228
+ uint8_t *syn_start;
1229
+
1230
+ i = prefix_codefence(data, size);
1231
+ if (i == 0)
1232
+ return 0;
1233
+
1234
+ while (i < size && data[i] == ' ')
1235
+ i++;
1236
+
1237
+ syn_start = data + i;
1238
+
1239
+ if (i < size && data[i] == '{') {
1240
+ i++; syn_start++;
1241
+
1242
+ while (i < size && data[i] != '}' && data[i] != '\n') {
1243
+ syn_len++; i++;
1244
+ }
1245
+
1246
+ if (i == size || data[i] != '}')
1247
+ return 0;
1248
+
1249
+ /* strip all whitespace at the beginning and the end
1250
+ * of the {} block */
1251
+ while (syn_len > 0 && _isspace(syn_start[0])) {
1252
+ syn_start++; syn_len--;
1253
+ }
1254
+
1255
+ while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
1256
+ syn_len--;
1257
+
1258
+ i++;
1259
+ } else {
1260
+ while (i < size && !_isspace(data[i])) {
1261
+ syn_len++; i++;
1262
+ }
1263
+ }
1264
+
1265
+ if (syntax) {
1266
+ syntax->data = syn_start;
1267
+ syntax->size = syn_len;
1268
+ }
1269
+
1270
+ while (i < size && data[i] != '\n') {
1271
+ if (!_isspace(data[i]))
1272
+ return 0;
1273
+
1274
+ i++;
1275
+ }
1276
+
1277
+ return i + 1;
1278
+ }
1279
+
1280
+ /* is_atxheader • returns whether the line is a hash-prefixed header */
1281
+ static int
1282
+ is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1283
+ {
1284
+ if (data[0] != '#')
1285
+ return 0;
1286
+
1287
+ if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1288
+ size_t level = 0;
1289
+
1290
+ while (level < size && level < 6 && data[level] == '#')
1291
+ level++;
1292
+
1293
+ if (level < size && data[level] != ' ')
1294
+ return 0;
1295
+ }
1296
+
1297
+ return 1;
1298
+ }
1299
+
1300
+ /* is_headerline • returns whether the line is a setext-style hdr underline */
1301
+ static int
1302
+ is_headerline(uint8_t *data, size_t size)
1303
+ {
1304
+ size_t i = 0;
1305
+
1306
+ /* test of level 1 header */
1307
+ if (data[i] == '=') {
1308
+ for (i = 1; i < size && data[i] == '='; i++);
1309
+ while (i < size && data[i] == ' ') i++;
1310
+ return (i >= size || data[i] == '\n') ? 1 : 0; }
1311
+
1312
+ /* test of level 2 header */
1313
+ if (data[i] == '-') {
1314
+ for (i = 1; i < size && data[i] == '-'; i++);
1315
+ while (i < size && data[i] == ' ') i++;
1316
+ return (i >= size || data[i] == '\n') ? 2 : 0; }
1317
+
1318
+ return 0;
1319
+ }
1320
+
1321
+ static int
1322
+ is_next_headerline(uint8_t *data, size_t size)
1323
+ {
1324
+ size_t i = 0;
1325
+
1326
+ while (i < size && data[i] != '\n')
1327
+ i++;
1328
+
1329
+ if (++i >= size)
1330
+ return 0;
1331
+
1332
+ return is_headerline(data + i, size - i);
1333
+ }
1334
+
1335
+ /* prefix_quote • returns blockquote prefix length */
1336
+ static size_t
1337
+ prefix_quote(uint8_t *data, size_t size)
1338
+ {
1339
+ size_t i = 0;
1340
+ if (i < size && data[i] == ' ') i++;
1341
+ if (i < size && data[i] == ' ') i++;
1342
+ if (i < size && data[i] == ' ') i++;
1343
+
1344
+ if (i < size && data[i] == '>') {
1345
+ if (i + 1 < size && data[i + 1] == ' ')
1346
+ return i + 2;
1347
+
1348
+ return i + 1;
1349
+ }
1350
+
1351
+ return 0;
1352
+ }
1353
+
1354
+ /* prefix_code • returns prefix length for block code*/
1355
+ static size_t
1356
+ prefix_code(uint8_t *data, size_t size)
1357
+ {
1358
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
1359
+ && data[2] == ' ' && data[3] == ' ') return 4;
1360
+
1361
+ return 0;
1362
+ }
1363
+
1364
+ /* prefix_oli • returns ordered list item prefix */
1365
+ static size_t
1366
+ prefix_oli(uint8_t *data, size_t size)
1367
+ {
1368
+ size_t i = 0;
1369
+
1370
+ if (i < size && data[i] == ' ') i++;
1371
+ if (i < size && data[i] == ' ') i++;
1372
+ if (i < size && data[i] == ' ') i++;
1373
+
1374
+ if (i >= size || data[i] < '0' || data[i] > '9')
1375
+ return 0;
1376
+
1377
+ while (i < size && data[i] >= '0' && data[i] <= '9')
1378
+ i++;
1379
+
1380
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1381
+ return 0;
1382
+
1383
+ if (is_next_headerline(data + i, size - i))
1384
+ return 0;
1385
+
1386
+ return i + 2;
1387
+ }
1388
+
1389
+ /* prefix_uli • returns ordered list item prefix */
1390
+ static size_t
1391
+ prefix_uli(uint8_t *data, size_t size)
1392
+ {
1393
+ size_t i = 0;
1394
+
1395
+ if (i < size && data[i] == ' ') i++;
1396
+ if (i < size && data[i] == ' ') i++;
1397
+ if (i < size && data[i] == ' ') i++;
1398
+
1399
+ if (i + 1 >= size ||
1400
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1401
+ data[i + 1] != ' ')
1402
+ return 0;
1403
+
1404
+ if (is_next_headerline(data + i, size - i))
1405
+ return 0;
1406
+
1407
+ return i + 2;
1408
+ }
1409
+
1410
+
1411
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1412
+ static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1413
+ uint8_t *data, size_t size);
1414
+
1415
+
1416
+ /* parse_blockquote • handles parsing of a blockquote fragment */
1417
+ static size_t
1418
+ parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1419
+ {
1420
+ size_t beg, end = 0, pre, work_size = 0;
1421
+ uint8_t *work_data = 0;
1422
+ struct buf *out = 0;
1423
+
1424
+ out = rndr_newbuf(rndr, BUFFER_BLOCK);
1425
+ beg = 0;
1426
+ while (beg < size) {
1427
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1428
+
1429
+ pre = prefix_quote(data + beg, end - beg);
1430
+
1431
+ if (pre)
1432
+ beg += pre; /* skipping prefix */
1433
+
1434
+ /* empty line followed by non-quote line */
1435
+ else if (is_empty(data + beg, end - beg) &&
1436
+ (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1437
+ !is_empty(data + end, size - end))))
1438
+ break;
1439
+
1440
+ if (beg < end) { /* copy into the in-place working buffer */
1441
+ /* bufput(work, data + beg, end - beg); */
1442
+ if (!work_data)
1443
+ work_data = data + beg;
1444
+ else if (data + beg != work_data + work_size)
1445
+ memmove(work_data + work_size, data + beg, end - beg);
1446
+ work_size += end - beg;
1447
+ }
1448
+ beg = end;
1449
+ }
1450
+
1451
+ parse_block(out, rndr, work_data, work_size);
1452
+ if (rndr->cb.blockquote)
1453
+ rndr->cb.blockquote(ob, out, rndr->opaque);
1454
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1455
+ return end;
1456
+ }
1457
+
1458
+ static size_t
1459
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1460
+
1461
+ /* parse_blockquote • handles parsing of a regular paragraph */
1462
+ static size_t
1463
+ parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1464
+ {
1465
+ size_t i = 0, end = 0;
1466
+ int level = 0;
1467
+ struct buf work = { data, 0, 0, 0 };
1468
+
1469
+ while (i < size) {
1470
+ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1471
+
1472
+ if (is_empty(data + i, size - i))
1473
+ break;
1474
+
1475
+ if ((level = is_headerline(data + i, size - i)) != 0)
1476
+ break;
1477
+
1478
+ if (is_atxheader(rndr, data + i, size - i) ||
1479
+ is_hrule(data + i, size - i) ||
1480
+ prefix_quote(data + i, size - i)) {
1481
+ end = i;
1482
+ break;
1483
+ }
1484
+
1485
+ /*
1486
+ * Early termination of a paragraph with the same logic
1487
+ * as Markdown 1.0.0. If this logic is applied, the
1488
+ * Markdown 1.0.3 test suite won't pass cleanly
1489
+ *
1490
+ * :: If the first character in a new line is not a letter,
1491
+ * let's check to see if there's some kind of block starting
1492
+ * here
1493
+ */
1494
+ if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !_isalnum(data[i])) {
1495
+ if (prefix_oli(data + i, size - i) ||
1496
+ prefix_uli(data + i, size - i)) {
1497
+ end = i;
1498
+ break;
1499
+ }
1500
+
1501
+ /* see if an html block starts here */
1502
+ if (data[i] == '<' && rndr->cb.blockhtml &&
1503
+ parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1504
+ end = i;
1505
+ break;
1506
+ }
1507
+
1508
+ /* see if a code fence starts here */
1509
+ if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
1510
+ is_codefence(data + i, size - i, NULL) != 0) {
1511
+ end = i;
1512
+ break;
1513
+ }
1514
+ }
1515
+
1516
+ i = end;
1517
+ }
1518
+
1519
+ work.size = i;
1520
+ while (work.size && data[work.size - 1] == '\n')
1521
+ work.size--;
1522
+
1523
+ if (!level) {
1524
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1525
+ parse_inline(tmp, rndr, work.data, work.size);
1526
+ if (rndr->cb.paragraph)
1527
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1528
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1529
+ } else {
1530
+ struct buf *header_work;
1531
+
1532
+ if (work.size) {
1533
+ size_t beg;
1534
+ i = work.size;
1535
+ work.size -= 1;
1536
+
1537
+ while (work.size && data[work.size] != '\n')
1538
+ work.size -= 1;
1539
+
1540
+ beg = work.size + 1;
1541
+ while (work.size && data[work.size - 1] == '\n')
1542
+ work.size -= 1;
1543
+
1544
+ if (work.size > 0) {
1545
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1546
+ parse_inline(tmp, rndr, work.data, work.size);
1547
+
1548
+ if (rndr->cb.paragraph)
1549
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1550
+
1551
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1552
+ work.data += beg;
1553
+ work.size = i - beg;
1554
+ }
1555
+ else work.size = i;
1556
+ }
1557
+
1558
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1559
+ parse_inline(header_work, rndr, work.data, work.size);
1560
+
1561
+ if (rndr->cb.header)
1562
+ rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1563
+
1564
+ rndr_popbuf(rndr, BUFFER_SPAN);
1565
+ }
1566
+
1567
+ return end;
1568
+ }
1569
+
1570
+ /* parse_fencedcode • handles parsing of a block-level code fragment */
1571
+ static size_t
1572
+ parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1573
+ {
1574
+ size_t beg, end;
1575
+ struct buf *work = 0;
1576
+ struct buf lang = { 0, 0, 0, 0 };
1577
+
1578
+ beg = is_codefence(data, size, &lang);
1579
+ if (beg == 0) return 0;
1580
+
1581
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1582
+
1583
+ while (beg < size) {
1584
+ size_t fence_end;
1585
+ struct buf fence_trail = { 0, 0, 0, 0 };
1586
+
1587
+ fence_end = is_codefence(data + beg, size - beg, &fence_trail);
1588
+ if (fence_end != 0 && fence_trail.size == 0) {
1589
+ beg += fence_end;
1590
+ break;
1591
+ }
1592
+
1593
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1594
+
1595
+ if (beg < end) {
1596
+ /* verbatim copy to the working buffer,
1597
+ escaping entities */
1598
+ if (is_empty(data + beg, end - beg))
1599
+ bufputc(work, '\n');
1600
+ else bufput(work, data + beg, end - beg);
1601
+ }
1602
+ beg = end;
1603
+ }
1604
+
1605
+ if (work->size && work->data[work->size - 1] != '\n')
1606
+ bufputc(work, '\n');
1607
+
1608
+ if (rndr->cb.blockcode)
1609
+ rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1610
+
1611
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1612
+ return beg;
1613
+ }
1614
+
1615
+ static size_t
1616
+ parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1617
+ {
1618
+ size_t beg, end, pre;
1619
+ struct buf *work = 0;
1620
+
1621
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1622
+
1623
+ beg = 0;
1624
+ while (beg < size) {
1625
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1626
+ pre = prefix_code(data + beg, end - beg);
1627
+
1628
+ if (pre)
1629
+ beg += pre; /* skipping prefix */
1630
+ else if (!is_empty(data + beg, end - beg))
1631
+ /* non-empty non-prefixed line breaks the pre */
1632
+ break;
1633
+
1634
+ if (beg < end) {
1635
+ /* verbatim copy to the working buffer,
1636
+ escaping entities */
1637
+ if (is_empty(data + beg, end - beg))
1638
+ bufputc(work, '\n');
1639
+ else bufput(work, data + beg, end - beg);
1640
+ }
1641
+ beg = end;
1642
+ }
1643
+
1644
+ while (work->size && work->data[work->size - 1] == '\n')
1645
+ work->size -= 1;
1646
+
1647
+ bufputc(work, '\n');
1648
+
1649
+ if (rndr->cb.blockcode)
1650
+ rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1651
+
1652
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1653
+ return beg;
1654
+ }
1655
+
1656
+ /* parse_listitem • parsing of a single list item */
1657
+ /* assuming initial prefix is already removed */
1658
+ static size_t
1659
+ parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1660
+ {
1661
+ struct buf *work = 0, *inter = 0;
1662
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, previous_indent = 0, i;
1663
+ int in_empty = 0, has_inside_empty = 0, has_trailing_empty = 0, in_fence = 0, previous_indent_diff = 0;
1664
+
1665
+ /* keeping track of the first indentation prefix */
1666
+ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1667
+ orgpre++;
1668
+
1669
+ beg = prefix_uli(data, size);
1670
+ if (!beg)
1671
+ beg = prefix_oli(data, size);
1672
+
1673
+ if (!beg)
1674
+ return 0;
1675
+
1676
+ /* skipping to the beginning of the following line */
1677
+ end = beg;
1678
+ while (end < size && data[end - 1] != '\n')
1679
+ end++;
1680
+
1681
+ /* getting working buffers */
1682
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
1683
+ inter = rndr_newbuf(rndr, BUFFER_SPAN);
1684
+
1685
+ /* putting the first line into the working buffer */
1686
+ bufput(work, data + beg, end - beg);
1687
+ beg = end;
1688
+
1689
+ /* process the following lines */
1690
+ while (beg < size) {
1691
+ size_t has_next_uli = 0, has_next_oli = 0;
1692
+
1693
+ end++;
1694
+
1695
+ while (end < size && data[end - 1] != '\n')
1696
+ end++;
1697
+
1698
+ /* process an empty line */
1699
+ if (is_empty(data + beg, end - beg)) {
1700
+ in_empty = 1;
1701
+ beg = end;
1702
+ continue;
1703
+ }
1704
+
1705
+ /* calculating the indentation */
1706
+ i = 0;
1707
+ while (i < 4 && beg + i < end && data[beg + i] == ' ')
1708
+ i++;
1709
+
1710
+ /* don't bother calculating, this is probably the first item if == 0 */
1711
+ if (previous_indent > 0) {
1712
+ previous_indent_diff = i - previous_indent;
1713
+ /* allow indentations between 2 and 4 spaces to count as a new list */
1714
+ if (previous_indent_diff > 1 && previous_indent_diff < 4)
1715
+ i = 2;
1716
+ }
1717
+
1718
+ pre = i;
1719
+ previous_indent = pre;
1720
+
1721
+ if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
1722
+ if (is_codefence(data + beg + i, end - beg - i, NULL) != 0) {
1723
+ /* If the fenced code isn't indented, then end the list */
1724
+ if (pre == 0 && !in_fence) {
1725
+ *flags |= MKD_LI_END;
1726
+ break;
1727
+ }
1728
+ in_fence = !in_fence;
1729
+ }
1730
+ }
1731
+
1732
+ /* Only check for new list items if we are **not** inside
1733
+ * a fenced code block */
1734
+ if (!in_fence) {
1735
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1736
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1737
+ }
1738
+
1739
+ /* checking for ul/ol switch */
1740
+ if (in_empty && (
1741
+ ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1742
+ (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1743
+ *flags |= MKD_LI_END;
1744
+ }
1745
+
1746
+ /* checking for a new item */
1747
+ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1748
+ /* the following item must have the same indentation */
1749
+ if (pre == orgpre) {
1750
+ if (in_empty)
1751
+ has_trailing_empty = 1;
1752
+ break;
1753
+ }
1754
+
1755
+ if (!sublist)
1756
+ sublist = work->size;
1757
+ }
1758
+
1759
+ /* joining only indented stuff after empty lines;
1760
+ * note that now we only require 1 space of indentation
1761
+ * to continue a list */
1762
+ if (in_empty && pre == 0) {
1763
+ *flags |= MKD_LI_END;
1764
+ break;
1765
+ }
1766
+ else if (in_empty) {
1767
+ bufputc(work, '\n');
1768
+ has_inside_empty = 1;
1769
+ }
1770
+
1771
+ in_empty = 0;
1772
+
1773
+ /* adding the line without prefix into the working buffer */
1774
+ bufput(work, data + beg + i, end - beg - i);
1775
+ beg = end;
1776
+ }
1777
+
1778
+ /* render of li contents */
1779
+ if (has_inside_empty || has_trailing_empty)
1780
+ *flags |= MKD_LI_BLOCK;
1781
+
1782
+ if (*flags & MKD_LI_BLOCK) {
1783
+ /* intermediate render of block li */
1784
+ if (sublist && sublist < work->size) {
1785
+ parse_block(inter, rndr, work->data, sublist);
1786
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1787
+ }
1788
+ else
1789
+ parse_block(inter, rndr, work->data, work->size);
1790
+ } else {
1791
+ /* intermediate render of inline li */
1792
+ if (sublist && sublist < work->size) {
1793
+ parse_inline(inter, rndr, work->data, sublist);
1794
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1795
+ }
1796
+ else
1797
+ parse_inline(inter, rndr, work->data, work->size);
1798
+ }
1799
+
1800
+ if (!has_trailing_empty)
1801
+ *flags &= ~MKD_LI_BLOCK;
1802
+
1803
+ /* render of li itself */
1804
+ if (rndr->cb.listitem)
1805
+ rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1806
+
1807
+ rndr_popbuf(rndr, BUFFER_SPAN);
1808
+ rndr_popbuf(rndr, BUFFER_SPAN);
1809
+ return beg;
1810
+ }
1811
+
1812
+
1813
+ /* parse_list • parsing ordered or unordered list block */
1814
+ static size_t
1815
+ parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1816
+ {
1817
+ struct buf *work = 0;
1818
+ size_t i = 0, j;
1819
+
1820
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1821
+
1822
+ while (i < size) {
1823
+ j = parse_listitem(work, rndr, data + i, size - i, &flags);
1824
+ i += j;
1825
+
1826
+ if (!j || (flags & MKD_LI_END))
1827
+ break;
1828
+ }
1829
+
1830
+ if (rndr->cb.list)
1831
+ rndr->cb.list(ob, work, flags, rndr->opaque);
1832
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1833
+ return i;
1834
+ }
1835
+
1836
+ /* parse_atxheader • parsing of atx-style headers */
1837
+ static size_t
1838
+ parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1839
+ {
1840
+ size_t level = 0;
1841
+ size_t i, end, skip;
1842
+
1843
+ while (level < size && level < 6 && data[level] == '#')
1844
+ level++;
1845
+
1846
+ for (i = level; i < size && data[i] == ' '; i++);
1847
+
1848
+ for (end = i; end < size && data[end] != '\n'; end++);
1849
+ skip = end;
1850
+
1851
+ while (end && data[end - 1] == '#')
1852
+ end--;
1853
+
1854
+ while (end && data[end - 1] == ' ')
1855
+ end--;
1856
+
1857
+ if (end > i) {
1858
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1859
+
1860
+ parse_inline(work, rndr, data + i, end - i);
1861
+
1862
+ if (rndr->cb.header)
1863
+ rndr->cb.header(ob, work, (int)level, rndr->opaque);
1864
+
1865
+ rndr_popbuf(rndr, BUFFER_SPAN);
1866
+ }
1867
+
1868
+ return skip;
1869
+ }
1870
+
1871
+
1872
+ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1873
+ /* returns the length on match, 0 otherwise */
1874
+ static size_t
1875
+ htmlblock_end_tag(
1876
+ const char *tag,
1877
+ size_t tag_len,
1878
+ struct sd_markdown *rndr,
1879
+ uint8_t *data,
1880
+ size_t size)
1881
+ {
1882
+ size_t i, w;
1883
+
1884
+ /* checking if tag is a match */
1885
+ if (tag_len + 3 >= size ||
1886
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1887
+ data[tag_len + 2] != '>')
1888
+ return 0;
1889
+
1890
+ /* checking white lines */
1891
+ i = tag_len + 3;
1892
+ w = 0;
1893
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
1894
+ return 0; /* non-blank after tag */
1895
+ i += w;
1896
+ w = 0;
1897
+
1898
+ if (i < size)
1899
+ w = is_empty(data + i, size - i);
1900
+
1901
+ return i + w;
1902
+ }
1903
+
1904
+ static size_t
1905
+ htmlblock_end(const char *curtag,
1906
+ struct sd_markdown *rndr,
1907
+ uint8_t *data,
1908
+ size_t size,
1909
+ int start_of_line)
1910
+ {
1911
+ size_t tag_size = strlen(curtag);
1912
+ size_t i = 1, end_tag;
1913
+ int block_lines = 0;
1914
+
1915
+ while (i < size) {
1916
+ i++;
1917
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1918
+ if (data[i] == '\n')
1919
+ block_lines++;
1920
+
1921
+ i++;
1922
+ }
1923
+
1924
+ /* If we are only looking for unindented tags, skip the tag
1925
+ * if it doesn't follow a newline.
1926
+ *
1927
+ * The only exception to this is if the tag is still on the
1928
+ * initial line; in that case it still counts as a closing
1929
+ * tag
1930
+ */
1931
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1932
+ continue;
1933
+
1934
+ if (i + 2 + tag_size >= size)
1935
+ break;
1936
+
1937
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1938
+ if (end_tag)
1939
+ return i + end_tag - 1;
1940
+ }
1941
+
1942
+ return 0;
1943
+ }
1944
+
1945
+
1946
+ /* parse_htmlblock • parsing of inline HTML block */
1947
+ static size_t
1948
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1949
+ {
1950
+ size_t i, j = 0, tag_end;
1951
+ const char *curtag = NULL;
1952
+ struct buf work = { data, 0, 0, 0 };
1953
+
1954
+ /* identification of the opening tag */
1955
+ if (size < 2 || data[0] != '<')
1956
+ return 0;
1957
+
1958
+ i = 1;
1959
+ while (i < size && data[i] != '>' && data[i] != ' ')
1960
+ i++;
1961
+
1962
+ if (i < size)
1963
+ curtag = find_block_tag((char *)data + 1, (int)i - 1);
1964
+
1965
+ /* handling of special cases */
1966
+ if (!curtag) {
1967
+
1968
+ /* HTML comment, laxist form */
1969
+ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1970
+ i = 5;
1971
+
1972
+ while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1973
+ i++;
1974
+
1975
+ i++;
1976
+
1977
+ if (i < size)
1978
+ j = is_empty(data + i, size - i);
1979
+
1980
+ if (j) {
1981
+ work.size = i + j;
1982
+ if (do_render && rndr->cb.blockhtml)
1983
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1984
+ return work.size;
1985
+ }
1986
+ }
1987
+
1988
+ /* HR, which is the only self-closing block tag considered */
1989
+ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1990
+ i = 3;
1991
+ while (i < size && data[i] != '>')
1992
+ i++;
1993
+
1994
+ if (i + 1 < size) {
1995
+ i++;
1996
+ j = is_empty(data + i, size - i);
1997
+ if (j) {
1998
+ work.size = i + j;
1999
+ if (do_render && rndr->cb.blockhtml)
2000
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2001
+ return work.size;
2002
+ }
2003
+ }
2004
+ }
2005
+
2006
+ /* no special case recognised */
2007
+ return 0;
2008
+ }
2009
+
2010
+ /* looking for an unindented matching closing tag */
2011
+ /* followed by a blank line */
2012
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
2013
+
2014
+ /* if not found, trying a second pass looking for indented match */
2015
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
2016
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
2017
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
2018
+ }
2019
+
2020
+ if (!tag_end)
2021
+ return 0;
2022
+
2023
+ /* the end of the block has been found */
2024
+ work.size = tag_end;
2025
+ if (do_render && rndr->cb.blockhtml)
2026
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
2027
+
2028
+ return tag_end;
2029
+ }
2030
+
2031
+ static void
2032
+ parse_table_row(
2033
+ struct buf *ob,
2034
+ struct sd_markdown *rndr,
2035
+ uint8_t *data,
2036
+ size_t size,
2037
+ size_t columns,
2038
+ int *col_data,
2039
+ int header_flag)
2040
+ {
2041
+ size_t i = 0, col;
2042
+ struct buf *row_work = 0;
2043
+
2044
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
2045
+ return;
2046
+
2047
+ row_work = rndr_newbuf(rndr, BUFFER_SPAN);
2048
+
2049
+ if (i < size && data[i] == '|')
2050
+ i++;
2051
+
2052
+ for (col = 0; col < columns && i < size; ++col) {
2053
+ size_t cell_start, cell_end;
2054
+ struct buf *cell_work;
2055
+
2056
+ cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
2057
+
2058
+ while (i < size && _isspace(data[i]))
2059
+ i++;
2060
+
2061
+ cell_start = i;
2062
+
2063
+ while (i < size && data[i] != '|')
2064
+ i++;
2065
+
2066
+ cell_end = i - 1;
2067
+
2068
+ while (cell_end > cell_start && _isspace(data[cell_end]))
2069
+ cell_end--;
2070
+
2071
+ parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
2072
+ rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
2073
+
2074
+ rndr_popbuf(rndr, BUFFER_SPAN);
2075
+ i++;
2076
+ }
2077
+
2078
+ for (; col < columns; ++col) {
2079
+ struct buf empty_cell = { 0, 0, 0, 0 };
2080
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
2081
+ }
2082
+
2083
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
2084
+
2085
+ rndr_popbuf(rndr, BUFFER_SPAN);
2086
+ }
2087
+
2088
+ static size_t
2089
+ parse_table_header(
2090
+ struct buf *ob,
2091
+ struct sd_markdown *rndr,
2092
+ uint8_t *data,
2093
+ size_t size,
2094
+ size_t *columns,
2095
+ int **column_data)
2096
+ {
2097
+ int pipes;
2098
+ size_t i = 0, col, header_end, under_end;
2099
+
2100
+ pipes = 0;
2101
+ while (i < size && data[i] != '\n')
2102
+ if (data[i++] == '|')
2103
+ pipes++;
2104
+
2105
+ if (i == size || pipes == 0)
2106
+ return 0;
2107
+
2108
+ header_end = i;
2109
+
2110
+ while (header_end > 0 && _isspace(data[header_end - 1]))
2111
+ header_end--;
2112
+
2113
+ if (data[0] == '|')
2114
+ pipes--;
2115
+
2116
+ if (header_end && data[header_end - 1] == '|')
2117
+ pipes--;
2118
+
2119
+ *columns = pipes + 1;
2120
+ *column_data = calloc(*columns, sizeof(int));
2121
+
2122
+ /* Parse the header underline */
2123
+ i++;
2124
+ if (i < size && data[i] == '|')
2125
+ i++;
2126
+
2127
+ under_end = i;
2128
+ while (under_end < size && data[under_end] != '\n')
2129
+ under_end++;
2130
+
2131
+ for (col = 0; col < *columns && i < under_end; ++col) {
2132
+ size_t dashes = 0;
2133
+
2134
+ while (i < under_end && data[i] == ' ')
2135
+ i++;
2136
+
2137
+ if (data[i] == ':') {
2138
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2139
+ dashes++;
2140
+ }
2141
+
2142
+ while (i < under_end && data[i] == '-') {
2143
+ i++; dashes++;
2144
+ }
2145
+
2146
+ if (i < under_end && data[i] == ':') {
2147
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2148
+ dashes++;
2149
+ }
2150
+
2151
+ while (i < under_end && data[i] == ' ')
2152
+ i++;
2153
+
2154
+ if (i < under_end && data[i] != '|')
2155
+ break;
2156
+
2157
+ if (dashes < 3)
2158
+ break;
2159
+
2160
+ i++;
2161
+ }
2162
+
2163
+ if (col < *columns)
2164
+ return 0;
2165
+
2166
+ parse_table_row(
2167
+ ob, rndr, data,
2168
+ header_end,
2169
+ *columns,
2170
+ *column_data,
2171
+ MKD_TABLE_HEADER
2172
+ );
2173
+
2174
+ return under_end + 1;
2175
+ }
2176
+
2177
+ static size_t
2178
+ parse_table(
2179
+ struct buf *ob,
2180
+ struct sd_markdown *rndr,
2181
+ uint8_t *data,
2182
+ size_t size)
2183
+ {
2184
+ size_t i;
2185
+
2186
+ struct buf *header_work = 0;
2187
+ struct buf *body_work = 0;
2188
+
2189
+ size_t columns;
2190
+ int *col_data = NULL;
2191
+
2192
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2193
+ body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2194
+
2195
+ i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2196
+ if (i > 0) {
2197
+
2198
+ while (i < size) {
2199
+ size_t row_start;
2200
+ int pipes = 0;
2201
+
2202
+ row_start = i;
2203
+
2204
+ while (i < size && data[i] != '\n')
2205
+ if (data[i++] == '|')
2206
+ pipes++;
2207
+
2208
+ if (pipes == 0 || i == size) {
2209
+ i = row_start;
2210
+ break;
2211
+ }
2212
+
2213
+ parse_table_row(
2214
+ body_work,
2215
+ rndr,
2216
+ data + row_start,
2217
+ i - row_start,
2218
+ columns,
2219
+ col_data, 0
2220
+ );
2221
+
2222
+ i++;
2223
+ }
2224
+
2225
+ if (rndr->cb.table)
2226
+ rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2227
+ }
2228
+
2229
+ free(col_data);
2230
+ rndr_popbuf(rndr, BUFFER_SPAN);
2231
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2232
+ return i;
2233
+ }
2234
+
2235
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
2236
+ static void
2237
+ parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2238
+ {
2239
+ size_t beg, end, i;
2240
+ uint8_t *txt_data;
2241
+ beg = 0;
2242
+
2243
+ if (rndr->work_bufs[BUFFER_SPAN].size +
2244
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2245
+ return;
2246
+
2247
+ while (beg < size) {
2248
+ txt_data = data + beg;
2249
+ end = size - beg;
2250
+
2251
+ if (is_atxheader(rndr, txt_data, end))
2252
+ beg += parse_atxheader(ob, rndr, txt_data, end);
2253
+
2254
+ else if (data[beg] == '<' && rndr->cb.blockhtml &&
2255
+ (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2256
+ beg += i;
2257
+
2258
+ else if ((i = is_empty(txt_data, end)) != 0)
2259
+ beg += i;
2260
+
2261
+ else if (is_hrule(txt_data, end)) {
2262
+ if (rndr->cb.hrule)
2263
+ rndr->cb.hrule(ob, rndr->opaque);
2264
+
2265
+ while (beg < size && data[beg] != '\n')
2266
+ beg++;
2267
+
2268
+ beg++;
2269
+ }
2270
+
2271
+ else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2272
+ (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2273
+ beg += i;
2274
+
2275
+ else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2276
+ (i = parse_table(ob, rndr, txt_data, end)) != 0)
2277
+ beg += i;
2278
+
2279
+ else if (prefix_quote(txt_data, end))
2280
+ beg += parse_blockquote(ob, rndr, txt_data, end);
2281
+
2282
+ else if (prefix_code(txt_data, end))
2283
+ beg += parse_blockcode(ob, rndr, txt_data, end);
2284
+
2285
+ else if (prefix_uli(txt_data, end))
2286
+ beg += parse_list(ob, rndr, txt_data, end, 0);
2287
+
2288
+ else if (prefix_oli(txt_data, end))
2289
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2290
+
2291
+ else
2292
+ beg += parse_paragraph(ob, rndr, txt_data, end);
2293
+ }
2294
+ }
2295
+
2296
+
2297
+
2298
+ /*********************
2299
+ * REFERENCE PARSING *
2300
+ *********************/
2301
+
2302
+ /* is_ref • returns whether a line is a reference or not */
2303
+ static int
2304
+ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2305
+ {
2306
+ /* int n; */
2307
+ size_t i = 0;
2308
+ size_t id_offset, id_end;
2309
+ size_t link_offset, link_end;
2310
+ size_t title_offset, title_end;
2311
+ size_t line_end;
2312
+
2313
+ /* up to 3 optional leading spaces */
2314
+ if (beg + 3 >= end) return 0;
2315
+ if (data[beg] == ' ') { i = 1;
2316
+ if (data[beg + 1] == ' ') { i = 2;
2317
+ if (data[beg + 2] == ' ') { i = 3;
2318
+ if (data[beg + 3] == ' ') return 0; } } }
2319
+ i += beg;
2320
+
2321
+ /* id part: anything but a newline between brackets */
2322
+ if (data[i] != '[') return 0;
2323
+ i++;
2324
+ id_offset = i;
2325
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2326
+ i++;
2327
+ if (i >= end || data[i] != ']') return 0;
2328
+ id_end = i;
2329
+
2330
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2331
+ i++;
2332
+ if (i >= end || data[i] != ':') return 0;
2333
+ i++;
2334
+ while (i < end && data[i] == ' ') i++;
2335
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2336
+ i++;
2337
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2338
+ while (i < end && data[i] == ' ') i++;
2339
+ if (i >= end) return 0;
2340
+
2341
+ /* link: whitespace-free sequence, optionally between angle brackets */
2342
+ if (data[i] == '<')
2343
+ i++;
2344
+
2345
+ link_offset = i;
2346
+
2347
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2348
+ i++;
2349
+
2350
+ if (data[i - 1] == '>') link_end = i - 1;
2351
+ else link_end = i;
2352
+
2353
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2354
+ while (i < end && data[i] == ' ') i++;
2355
+ if (i < end && data[i] != '\n' && data[i] != '\r'
2356
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2357
+ return 0;
2358
+ line_end = 0;
2359
+ /* computing end-of-line */
2360
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2361
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2362
+ line_end = i + 1;
2363
+
2364
+ /* optional (space|tab)* spacer after a newline */
2365
+ if (line_end) {
2366
+ i = line_end + 1;
2367
+ while (i < end && data[i] == ' ') i++; }
2368
+
2369
+ /* optional title: any non-newline sequence enclosed in '"()
2370
+ alone on its line */
2371
+ title_offset = title_end = 0;
2372
+ if (i + 1 < end
2373
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2374
+ i++;
2375
+ title_offset = i;
2376
+ /* looking for EOL */
2377
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2378
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2379
+ title_end = i + 1;
2380
+ else title_end = i;
2381
+ /* stepping back */
2382
+ i -= 1;
2383
+ while (i > title_offset && data[i] == ' ')
2384
+ i -= 1;
2385
+ if (i > title_offset
2386
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2387
+ line_end = title_end;
2388
+ title_end = i; } }
2389
+
2390
+ if (!line_end || link_end == link_offset)
2391
+ return 0; /* garbage after the link empty link */
2392
+
2393
+ /* a valid ref has been found, filling-in return structures */
2394
+ if (last)
2395
+ *last = line_end;
2396
+
2397
+ if (refs) {
2398
+ struct link_ref *ref;
2399
+
2400
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2401
+ if (!ref)
2402
+ return 0;
2403
+
2404
+ ref->link = bufnew(link_end - link_offset);
2405
+ bufput(ref->link, data + link_offset, link_end - link_offset);
2406
+
2407
+ if (title_end > title_offset) {
2408
+ ref->title = bufnew(title_end - title_offset);
2409
+ bufput(ref->title, data + title_offset, title_end - title_offset);
2410
+ }
2411
+ }
2412
+
2413
+ return 1;
2414
+ }
2415
+
2416
+ static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2417
+ {
2418
+ size_t i = 0, tab = 0;
2419
+
2420
+ while (i < size) {
2421
+ size_t org = i;
2422
+
2423
+ while (i < size && line[i] != '\t') {
2424
+ i++; tab++;
2425
+ }
2426
+
2427
+ if (i > org)
2428
+ bufput(ob, line + org, i - org);
2429
+
2430
+ if (i >= size)
2431
+ break;
2432
+
2433
+ do {
2434
+ bufputc(ob, ' '); tab++;
2435
+ } while (tab % 4);
2436
+
2437
+ i++;
2438
+ }
2439
+ }
2440
+
2441
+ /**********************
2442
+ * EXPORTED FUNCTIONS *
2443
+ **********************/
2444
+
2445
+ struct sd_markdown *
2446
+ sd_markdown_new(
2447
+ unsigned int extensions,
2448
+ size_t max_nesting,
2449
+ const struct sd_callbacks *callbacks,
2450
+ void *opaque)
2451
+ {
2452
+ struct sd_markdown *md = NULL;
2453
+
2454
+ assert(max_nesting > 0 && callbacks);
2455
+
2456
+ md = malloc(sizeof(struct sd_markdown));
2457
+ if (!md)
2458
+ return NULL;
2459
+
2460
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2461
+
2462
+ stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2463
+ stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2464
+
2465
+ memset(md->active_char, 0x0, 256);
2466
+
2467
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2468
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
2469
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
2470
+ if (extensions & MKDEXT_STRIKETHROUGH)
2471
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
2472
+ }
2473
+
2474
+ if (md->cb.codespan)
2475
+ md->active_char['`'] = MD_CHAR_CODESPAN;
2476
+
2477
+ if (md->cb.linebreak)
2478
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
2479
+
2480
+ if (md->cb.image || md->cb.link)
2481
+ md->active_char['['] = MD_CHAR_LINK;
2482
+
2483
+ md->active_char['<'] = MD_CHAR_LANGLE;
2484
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
2485
+ md->active_char['&'] = MD_CHAR_ENTITITY;
2486
+
2487
+ if (extensions & MKDEXT_AUTOLINK) {
2488
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2489
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2490
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2491
+ }
2492
+
2493
+ if (extensions & MKDEXT_SUPERSCRIPT)
2494
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2495
+
2496
+ /* Extension data */
2497
+ md->ext_flags = extensions;
2498
+ md->opaque = opaque;
2499
+ md->max_nesting = max_nesting;
2500
+ md->in_link_body = 0;
2501
+
2502
+ return md;
2503
+ }
2504
+
2505
+ void
2506
+ sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2507
+ {
2508
+ #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2509
+ static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2510
+
2511
+ struct buf *text;
2512
+ size_t beg, end;
2513
+
2514
+ text = bufnew(64);
2515
+ if (!text)
2516
+ return;
2517
+
2518
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
2519
+ bufgrow(text, doc_size);
2520
+
2521
+ /* reset the references table */
2522
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2523
+
2524
+ /* first pass: looking for references, copying everything else */
2525
+ beg = 0;
2526
+
2527
+ /* Skip a possible UTF-8 BOM, even though the Unicode standard
2528
+ * discourages having these in UTF-8 documents */
2529
+ if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2530
+ beg += 3;
2531
+
2532
+ while (beg < doc_size) /* iterating over lines */
2533
+ if (is_ref(document, beg, doc_size, &end, md->refs))
2534
+ beg = end;
2535
+ else { /* skipping to the next line */
2536
+ end = beg;
2537
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2538
+ end++;
2539
+
2540
+ /* adding the line body if present */
2541
+ if (end > beg)
2542
+ expand_tabs(text, document + beg, end - beg);
2543
+
2544
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2545
+ /* add one \n per newline */
2546
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2547
+ bufputc(text, '\n');
2548
+ end++;
2549
+ }
2550
+
2551
+ beg = end;
2552
+ }
2553
+
2554
+ /* pre-grow the output buffer to minimize allocations */
2555
+ bufgrow(ob, MARKDOWN_GROW(text->size));
2556
+
2557
+ /* second pass: actual rendering */
2558
+ if (md->cb.doc_header)
2559
+ md->cb.doc_header(ob, md->opaque);
2560
+
2561
+ if (text->size) {
2562
+ /* adding a final newline if not already present */
2563
+ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2564
+ bufputc(text, '\n');
2565
+
2566
+ parse_block(ob, md, text->data, text->size);
2567
+ }
2568
+
2569
+ if (md->cb.doc_footer)
2570
+ md->cb.doc_footer(ob, md->opaque);
2571
+
2572
+ /* clean-up */
2573
+ bufrelease(text);
2574
+ free_link_refs(md->refs);
2575
+
2576
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
2577
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2578
+ }
2579
+
2580
+ void
2581
+ sd_markdown_free(struct sd_markdown *md)
2582
+ {
2583
+ size_t i;
2584
+
2585
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2586
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2587
+
2588
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2589
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2590
+
2591
+ stack_free(&md->work_bufs[BUFFER_SPAN]);
2592
+ stack_free(&md->work_bufs[BUFFER_BLOCK]);
2593
+
2594
+ free(md);
2595
+ }
2596
+
2597
+ void
2598
+ sd_version(int *ver_major, int *ver_minor, int *ver_revision)
2599
+ {
2600
+ *ver_major = SUNDOWN_VER_MAJOR;
2601
+ *ver_minor = SUNDOWN_VER_MINOR;
2602
+ *ver_revision = SUNDOWN_VER_REVISION;
2603
+ }
2604
+
2605
+ /* vim: set filetype=c: */