github-markdown 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,69 @@
1
+ /*
2
+ * Copyright (c) 2011, Vicent Marti
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #ifndef UPSKIRT_HTML_H
18
+ #define UPSKIRT_HTML_H
19
+
20
+ #include "markdown.h"
21
+ #include "buffer.h"
22
+ #include <stdlib.h>
23
+
24
+ struct html_renderopt {
25
+ struct {
26
+ int header_count;
27
+ int current_level;
28
+ int level_offset;
29
+ } toc_data;
30
+
31
+ unsigned int flags;
32
+
33
+ /* extra callbacks */
34
+ void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
35
+ };
36
+
37
+ typedef enum {
38
+ HTML_SKIP_HTML = (1 << 0),
39
+ HTML_SKIP_STYLE = (1 << 1),
40
+ HTML_SKIP_IMAGES = (1 << 2),
41
+ HTML_SKIP_LINKS = (1 << 3),
42
+ HTML_EXPAND_TABS = (1 << 4),
43
+ HTML_SAFELINK = (1 << 5),
44
+ HTML_TOC = (1 << 6),
45
+ HTML_HARD_WRAP = (1 << 7),
46
+ HTML_USE_XHTML = (1 << 8),
47
+ HTML_ESCAPE = (1 << 9),
48
+ } html_render_mode;
49
+
50
+ typedef enum {
51
+ HTML_TAG_NONE = 0,
52
+ HTML_TAG_OPEN,
53
+ HTML_TAG_CLOSE,
54
+ } html_tag;
55
+
56
+ int
57
+ sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
58
+
59
+ extern void
60
+ sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
61
+
62
+ extern void
63
+ sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
64
+
65
+ extern void
66
+ sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
67
+
68
+ #endif
69
+
@@ -0,0 +1,206 @@
1
+ /* C code produced by gperf version 3.0.3 */
2
+ /* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
3
+ /* Computed positions: -k'1-2' */
4
+
5
+ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
6
+ && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
7
+ && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
8
+ && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
9
+ && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
10
+ && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
11
+ && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
12
+ && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
13
+ && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
14
+ && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
15
+ && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
16
+ && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
17
+ && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
18
+ && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
19
+ && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
20
+ && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
21
+ && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
22
+ && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
23
+ && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
24
+ && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
25
+ && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
26
+ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
27
+ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
28
+ /* The character set is not based on ISO-646. */
29
+ error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
30
+ #endif
31
+
32
+ /* maximum key range = 37, duplicates = 0 */
33
+
34
+ #ifndef GPERF_DOWNCASE
35
+ #define GPERF_DOWNCASE 1
36
+ static unsigned char gperf_downcase[256] =
37
+ {
38
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
39
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
40
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
41
+ 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
42
+ 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
43
+ 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
44
+ 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
45
+ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
46
+ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
47
+ 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
48
+ 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
49
+ 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
50
+ 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
51
+ 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
52
+ 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
53
+ 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
54
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
55
+ 255
56
+ };
57
+ #endif
58
+
59
+ #ifndef GPERF_CASE_STRNCMP
60
+ #define GPERF_CASE_STRNCMP 1
61
+ static int
62
+ gperf_case_strncmp (s1, s2, n)
63
+ register const char *s1;
64
+ register const char *s2;
65
+ register unsigned int n;
66
+ {
67
+ for (; n > 0;)
68
+ {
69
+ unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
70
+ unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
71
+ if (c1 != 0 && c1 == c2)
72
+ {
73
+ n--;
74
+ continue;
75
+ }
76
+ return (int)c1 - (int)c2;
77
+ }
78
+ return 0;
79
+ }
80
+ #endif
81
+
82
+ #ifdef __GNUC__
83
+ __inline
84
+ #else
85
+ #ifdef __cplusplus
86
+ inline
87
+ #endif
88
+ #endif
89
+ static unsigned int
90
+ hash_block_tag (str, len)
91
+ register const char *str;
92
+ register unsigned int len;
93
+ {
94
+ static const unsigned char asso_values[] =
95
+ {
96
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
97
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
98
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
99
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
100
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
101
+ 8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
102
+ 38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
103
+ 5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
104
+ 0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
105
+ 38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
106
+ 0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
107
+ 15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
108
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
109
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
110
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
111
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
112
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
113
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
114
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
115
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
116
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
117
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
118
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
119
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
120
+ 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
121
+ 38, 38, 38, 38, 38, 38, 38
122
+ };
123
+ register int hval = len;
124
+
125
+ switch (hval)
126
+ {
127
+ default:
128
+ hval += asso_values[(unsigned char)str[1]+1];
129
+ /*FALLTHROUGH*/
130
+ case 1:
131
+ hval += asso_values[(unsigned char)str[0]];
132
+ break;
133
+ }
134
+ return hval;
135
+ }
136
+
137
+ #ifdef __GNUC__
138
+ __inline
139
+ #ifdef __GNUC_STDC_INLINE__
140
+ __attribute__ ((__gnu_inline__))
141
+ #endif
142
+ #endif
143
+ const char *
144
+ find_block_tag (str, len)
145
+ register const char *str;
146
+ register unsigned int len;
147
+ {
148
+ enum
149
+ {
150
+ TOTAL_KEYWORDS = 24,
151
+ MIN_WORD_LENGTH = 1,
152
+ MAX_WORD_LENGTH = 10,
153
+ MIN_HASH_VALUE = 1,
154
+ MAX_HASH_VALUE = 37
155
+ };
156
+
157
+ static const char * const wordlist[] =
158
+ {
159
+ "",
160
+ "p",
161
+ "dl",
162
+ "div",
163
+ "math",
164
+ "table",
165
+ "",
166
+ "ul",
167
+ "del",
168
+ "form",
169
+ "blockquote",
170
+ "figure",
171
+ "ol",
172
+ "fieldset",
173
+ "",
174
+ "h1",
175
+ "",
176
+ "h6",
177
+ "pre",
178
+ "", "",
179
+ "script",
180
+ "h5",
181
+ "noscript",
182
+ "",
183
+ "style",
184
+ "iframe",
185
+ "h4",
186
+ "ins",
187
+ "", "", "",
188
+ "h3",
189
+ "", "", "", "",
190
+ "h2"
191
+ };
192
+
193
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
194
+ {
195
+ register int key = hash_block_tag (str, len);
196
+
197
+ if (key <= MAX_HASH_VALUE && key >= 0)
198
+ {
199
+ register const char *s = wordlist[key];
200
+
201
+ if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
202
+ return s;
203
+ }
204
+ }
205
+ return 0;
206
+ }
@@ -0,0 +1,2505 @@
1
+ /* markdown.c - generic markdown parser */
2
+
3
+ /*
4
+ * Copyright (c) 2009, Natacha Porté
5
+ * Copyright (c) 2011, Vicent Marti
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ */
19
+
20
+ #include "markdown.h"
21
+ #include "stack.h"
22
+
23
+ #include <assert.h>
24
+ #include <string.h>
25
+ #include <ctype.h>
26
+ #include <stdio.h>
27
+
28
+ #if defined(_WIN32)
29
+ #define strncasecmp _strnicmp
30
+ #endif
31
+
32
+ #define REF_TABLE_SIZE 8
33
+
34
+ #define BUFFER_BLOCK 0
35
+ #define BUFFER_SPAN 1
36
+
37
+ #define MKD_LI_END 8 /* internal list flag */
38
+
39
+ #define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
40
+ #define GPERF_DOWNCASE 1
41
+ #define GPERF_CASE_STRNCMP 1
42
+ #include "html_blocks.h"
43
+
44
+ /***************
45
+ * LOCAL TYPES *
46
+ ***************/
47
+
48
+ /* link_ref: reference to a link */
49
+ struct link_ref {
50
+ unsigned int id;
51
+
52
+ struct buf *link;
53
+ struct buf *title;
54
+
55
+ struct link_ref *next;
56
+ };
57
+
58
+ /* char_trigger: function pointer to render active chars */
59
+ /* returns the number of chars taken care of */
60
+ /* data is the pointer of the beginning of the span */
61
+ /* offset is the number of valid chars before data */
62
+ struct sd_markdown;
63
+ typedef size_t
64
+ (*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
65
+
66
+ static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
67
+ static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
68
+ static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
69
+ static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
70
+ static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
71
+ static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
72
+ static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
73
+ static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
74
+ static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
75
+ static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
76
+ static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
77
+
78
+ enum markdown_char_t {
79
+ MD_CHAR_NONE = 0,
80
+ MD_CHAR_EMPHASIS,
81
+ MD_CHAR_CODESPAN,
82
+ MD_CHAR_LINEBREAK,
83
+ MD_CHAR_LINK,
84
+ MD_CHAR_LANGLE,
85
+ MD_CHAR_ESCAPE,
86
+ MD_CHAR_ENTITITY,
87
+ MD_CHAR_AUTOLINK_URL,
88
+ MD_CHAR_AUTOLINK_EMAIL,
89
+ MD_CHAR_AUTOLINK_WWW,
90
+ MD_CHAR_SUPERSCRIPT,
91
+ };
92
+
93
+ static char_trigger markdown_char_ptrs[] = {
94
+ NULL,
95
+ &char_emphasis,
96
+ &char_codespan,
97
+ &char_linebreak,
98
+ &char_link,
99
+ &char_langle_tag,
100
+ &char_escape,
101
+ &char_entity,
102
+ &char_autolink_url,
103
+ &char_autolink_email,
104
+ &char_autolink_www,
105
+ &char_superscript,
106
+ };
107
+
108
+ /* render • structure containing one particular render */
109
+ struct sd_markdown {
110
+ struct sd_callbacks cb;
111
+ void *opaque;
112
+
113
+ struct link_ref *refs[REF_TABLE_SIZE];
114
+ uint8_t active_char[256];
115
+ struct stack work_bufs[2];
116
+ unsigned int ext_flags;
117
+ size_t max_nesting;
118
+ int in_link_body;
119
+ };
120
+
121
+ /***************************
122
+ * HELPER FUNCTIONS *
123
+ ***************************/
124
+
125
+ static inline struct buf *
126
+ rndr_newbuf(struct sd_markdown *rndr, int type)
127
+ {
128
+ static const size_t buf_size[2] = {256, 64};
129
+ struct buf *work = NULL;
130
+ struct stack *pool = &rndr->work_bufs[type];
131
+
132
+ if (pool->size < pool->asize &&
133
+ pool->item[pool->size] != NULL) {
134
+ work = pool->item[pool->size++];
135
+ work->size = 0;
136
+ } else {
137
+ work = bufnew(buf_size[type]);
138
+ stack_push(pool, work);
139
+ }
140
+
141
+ return work;
142
+ }
143
+
144
+ static inline void
145
+ rndr_popbuf(struct sd_markdown *rndr, int type)
146
+ {
147
+ rndr->work_bufs[type].size--;
148
+ }
149
+
150
+ static void
151
+ unscape_text(struct buf *ob, struct buf *src)
152
+ {
153
+ size_t i = 0, org;
154
+ while (i < src->size) {
155
+ org = i;
156
+ while (i < src->size && src->data[i] != '\\')
157
+ i++;
158
+
159
+ if (i > org)
160
+ bufput(ob, src->data + org, i - org);
161
+
162
+ if (i + 1 >= src->size)
163
+ break;
164
+
165
+ bufputc(ob, src->data[i + 1]);
166
+ i += 2;
167
+ }
168
+ }
169
+
170
+ static unsigned int
171
+ hash_link_ref(const uint8_t *link_ref, size_t length)
172
+ {
173
+ size_t i;
174
+ unsigned int hash = 0;
175
+
176
+ for (i = 0; i < length; ++i)
177
+ hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
178
+
179
+ return hash;
180
+ }
181
+
182
+ static struct link_ref *
183
+ add_link_ref(
184
+ struct link_ref **references,
185
+ const uint8_t *name, size_t name_size)
186
+ {
187
+ struct link_ref *ref = calloc(1, sizeof(struct link_ref));
188
+
189
+ if (!ref)
190
+ return NULL;
191
+
192
+ ref->id = hash_link_ref(name, name_size);
193
+ ref->next = references[ref->id % REF_TABLE_SIZE];
194
+
195
+ references[ref->id % REF_TABLE_SIZE] = ref;
196
+ return ref;
197
+ }
198
+
199
+ static struct link_ref *
200
+ find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
201
+ {
202
+ unsigned int hash = hash_link_ref(name, length);
203
+ struct link_ref *ref = NULL;
204
+
205
+ ref = references[hash % REF_TABLE_SIZE];
206
+
207
+ while (ref != NULL) {
208
+ if (ref->id == hash)
209
+ return ref;
210
+
211
+ ref = ref->next;
212
+ }
213
+
214
+ return NULL;
215
+ }
216
+
217
+ static void
218
+ free_link_refs(struct link_ref **references)
219
+ {
220
+ size_t i;
221
+
222
+ for (i = 0; i < REF_TABLE_SIZE; ++i) {
223
+ struct link_ref *r = references[i];
224
+ struct link_ref *next;
225
+
226
+ while (r) {
227
+ next = r->next;
228
+ bufrelease(r->link);
229
+ bufrelease(r->title);
230
+ free(r);
231
+ r = next;
232
+ }
233
+ }
234
+ }
235
+
236
+ /*
237
+ * Check whether a char is a Markdown space.
238
+
239
+ * Right now we only consider spaces the actual
240
+ * space and a newline: tabs and carriage returns
241
+ * are filtered out during the preprocessing phase.
242
+ *
243
+ * If we wanted to actually be UTF-8 compliant, we
244
+ * should instead extract an Unicode codepoint from
245
+ * this character and check for space properties.
246
+ */
247
+ static inline int
248
+ _isspace(int c)
249
+ {
250
+ return c == ' ' || c == '\n';
251
+ }
252
+
253
+ /****************************
254
+ * INLINE PARSING FUNCTIONS *
255
+ ****************************/
256
+
257
+ /* is_mail_autolink • looks for the address part of a mail autolink and '>' */
258
+ /* this is less strict than the original markdown e-mail address matching */
259
+ static size_t
260
+ is_mail_autolink(uint8_t *data, size_t size)
261
+ {
262
+ size_t i = 0, nb = 0;
263
+
264
+ /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
265
+ for (i = 0; i < size; ++i) {
266
+ if (isalnum(data[i]))
267
+ continue;
268
+
269
+ switch (data[i]) {
270
+ case '@':
271
+ nb++;
272
+
273
+ case '-':
274
+ case '.':
275
+ case '_':
276
+ break;
277
+
278
+ case '>':
279
+ return (nb == 1) ? i + 1 : 0;
280
+
281
+ default:
282
+ return 0;
283
+ }
284
+ }
285
+
286
+ return 0;
287
+ }
288
+
289
+ /* tag_length • returns the length of the given tag, or 0 is it's not valid */
290
+ static size_t
291
+ tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
292
+ {
293
+ size_t i, j;
294
+
295
+ /* a valid tag can't be shorter than 3 chars */
296
+ if (size < 3) return 0;
297
+
298
+ /* begins with a '<' optionally followed by '/', followed by letter or number */
299
+ if (data[0] != '<') return 0;
300
+ i = (data[1] == '/') ? 2 : 1;
301
+
302
+ if (!isalnum(data[i]))
303
+ return 0;
304
+
305
+ /* scheme test */
306
+ *autolink = MKDA_NOT_AUTOLINK;
307
+
308
+ /* try to find the beginning of an URI */
309
+ while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
310
+ i++;
311
+
312
+ if (i > 1 && data[i] == '@') {
313
+ if ((j = is_mail_autolink(data + i, size - i)) != 0) {
314
+ *autolink = MKDA_EMAIL;
315
+ return i + j;
316
+ }
317
+ }
318
+
319
+ if (i > 2 && data[i] == ':') {
320
+ *autolink = MKDA_NORMAL;
321
+ i++;
322
+ }
323
+
324
+ /* completing autolink test: no whitespace or ' or " */
325
+ if (i >= size)
326
+ *autolink = MKDA_NOT_AUTOLINK;
327
+
328
+ else if (*autolink) {
329
+ j = i;
330
+
331
+ while (i < size) {
332
+ if (data[i] == '\\') i += 2;
333
+ else if (data[i] == '>' || data[i] == '\'' ||
334
+ data[i] == '"' || data[i] == ' ' || data[i] == '\n')
335
+ break;
336
+ else i++;
337
+ }
338
+
339
+ if (i >= size) return 0;
340
+ if (i > j && data[i] == '>') return i + 1;
341
+ /* one of the forbidden chars has been found */
342
+ *autolink = MKDA_NOT_AUTOLINK;
343
+ }
344
+
345
+ /* looking for sometinhg looking like a tag end */
346
+ while (i < size && data[i] != '>') i++;
347
+ if (i >= size) return 0;
348
+ return i + 1;
349
+ }
350
+
351
+ /* parse_inline • parses inline markdown elements */
352
+ static void
353
+ parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
354
+ {
355
+ size_t i = 0, end = 0;
356
+ uint8_t action = 0;
357
+ struct buf work = { 0, 0, 0, 0 };
358
+
359
+ if (rndr->work_bufs[BUFFER_SPAN].size +
360
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
361
+ return;
362
+
363
+ while (i < size) {
364
+ /* copying inactive chars into the output */
365
+ while (end < size && (action = rndr->active_char[data[end]]) == 0) {
366
+ end++;
367
+ }
368
+
369
+ if (rndr->cb.normal_text) {
370
+ work.data = data + i;
371
+ work.size = end - i;
372
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
373
+ }
374
+ else
375
+ bufput(ob, data + i, end - i);
376
+
377
+ if (end >= size) break;
378
+ i = end;
379
+
380
+ end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
381
+ if (!end) /* no action from the callback */
382
+ end = i + 1;
383
+ else {
384
+ i += end;
385
+ end = i;
386
+ }
387
+ }
388
+ }
389
+
390
+ /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
391
+ static size_t
392
+ find_emph_char(uint8_t *data, size_t size, uint8_t c)
393
+ {
394
+ size_t i = 1;
395
+
396
+ while (i < size) {
397
+ while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
398
+ i++;
399
+
400
+ if (i == size)
401
+ return 0;
402
+
403
+ if (data[i] == c)
404
+ return i;
405
+
406
+ /* not counting escaped chars */
407
+ if (i && data[i - 1] == '\\') {
408
+ i++; continue;
409
+ }
410
+
411
+ if (data[i] == '`') {
412
+ size_t span_nb = 0, bt;
413
+ size_t tmp_i = 0;
414
+
415
+ /* counting the number of opening backticks */
416
+ while (i < size && data[i] == '`') {
417
+ i++; span_nb++;
418
+ }
419
+
420
+ if (i >= size) return 0;
421
+
422
+ /* finding the matching closing sequence */
423
+ bt = 0;
424
+ while (i < size && bt < span_nb) {
425
+ if (!tmp_i && data[i] == c) tmp_i = i;
426
+ if (data[i] == '`') bt++;
427
+ else bt = 0;
428
+ i++;
429
+ }
430
+
431
+ if (i >= size) return tmp_i;
432
+ }
433
+ /* skipping a link */
434
+ else if (data[i] == '[') {
435
+ size_t tmp_i = 0;
436
+ uint8_t cc;
437
+
438
+ i++;
439
+ while (i < size && data[i] != ']') {
440
+ if (!tmp_i && data[i] == c) tmp_i = i;
441
+ i++;
442
+ }
443
+
444
+ i++;
445
+ while (i < size && (data[i] == ' ' || data[i] == '\n'))
446
+ i++;
447
+
448
+ if (i >= size)
449
+ return tmp_i;
450
+
451
+ switch (data[i]) {
452
+ case '[':
453
+ cc = ']'; break;
454
+
455
+ case '(':
456
+ cc = ')'; break;
457
+
458
+ default:
459
+ if (tmp_i)
460
+ return tmp_i;
461
+ else
462
+ continue;
463
+ }
464
+
465
+ i++;
466
+ while (i < size && data[i] != cc) {
467
+ if (!tmp_i && data[i] == c) tmp_i = i;
468
+ i++;
469
+ }
470
+
471
+ if (i >= size)
472
+ return tmp_i;
473
+
474
+ i++;
475
+ }
476
+ }
477
+
478
+ return 0;
479
+ }
480
+
481
+ /* parse_emph1 • parsing single emphase */
482
+ /* closed by a symbol not preceded by whitespace and not followed by symbol */
483
+ static size_t
484
+ parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
485
+ {
486
+ size_t i = 0, len;
487
+ struct buf *work = 0;
488
+ int r;
489
+
490
+ if (!rndr->cb.emphasis) return 0;
491
+
492
+ /* skipping one symbol if coming from emph3 */
493
+ if (size > 1 && data[0] == c && data[1] == c) i = 1;
494
+
495
+ while (i < size) {
496
+ len = find_emph_char(data + i, size - i, c);
497
+ if (!len) return 0;
498
+ i += len;
499
+ if (i >= size) return 0;
500
+
501
+ if (data[i] == c && !_isspace(data[i - 1])) {
502
+
503
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
504
+ if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
505
+ continue;
506
+ }
507
+
508
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
509
+ parse_inline(work, rndr, data, i);
510
+ r = rndr->cb.emphasis(ob, work, rndr->opaque);
511
+ rndr_popbuf(rndr, BUFFER_SPAN);
512
+ return r ? i + 1 : 0;
513
+ }
514
+ }
515
+
516
+ return 0;
517
+ }
518
+
519
+ /* parse_emph2 • parsing single emphase */
520
+ static size_t
521
+ parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
522
+ {
523
+ int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
524
+ size_t i = 0, len;
525
+ struct buf *work = 0;
526
+ int r;
527
+
528
+ render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
529
+
530
+ if (!render_method)
531
+ return 0;
532
+
533
+ while (i < size) {
534
+ len = find_emph_char(data + i, size - i, c);
535
+ if (!len) return 0;
536
+ i += len;
537
+
538
+ if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
539
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
540
+ parse_inline(work, rndr, data, i);
541
+ r = render_method(ob, work, rndr->opaque);
542
+ rndr_popbuf(rndr, BUFFER_SPAN);
543
+ return r ? i + 2 : 0;
544
+ }
545
+ i++;
546
+ }
547
+ return 0;
548
+ }
549
+
550
+ /* parse_emph3 • parsing single emphase */
551
+ /* finds the first closing tag, and delegates to the other emph */
552
+ static size_t
553
+ parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
554
+ {
555
+ size_t i = 0, len;
556
+ int r;
557
+
558
+ while (i < size) {
559
+ len = find_emph_char(data + i, size - i, c);
560
+ if (!len) return 0;
561
+ i += len;
562
+
563
+ /* skip whitespace preceded symbols */
564
+ if (data[i] != c || _isspace(data[i - 1]))
565
+ continue;
566
+
567
+ if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
568
+ /* triple symbol found */
569
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
570
+
571
+ parse_inline(work, rndr, data, i);
572
+ r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
573
+ rndr_popbuf(rndr, BUFFER_SPAN);
574
+ return r ? i + 3 : 0;
575
+
576
+ } else if (i + 1 < size && data[i + 1] == c) {
577
+ /* double symbol found, handing over to emph1 */
578
+ len = parse_emph1(ob, rndr, data - 2, size + 2, c);
579
+ if (!len) return 0;
580
+ else return len - 2;
581
+
582
+ } else {
583
+ /* single symbol found, handing over to emph2 */
584
+ len = parse_emph2(ob, rndr, data - 1, size + 1, c);
585
+ if (!len) return 0;
586
+ else return len - 1;
587
+ }
588
+ }
589
+ return 0;
590
+ }
591
+
592
+ /* char_emphasis • single and double emphasis parsing */
593
+ static size_t
594
+ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
595
+ {
596
+ uint8_t c = data[0];
597
+ size_t ret;
598
+
599
+ if (size > 2 && data[1] != c) {
600
+ /* whitespace cannot follow an opening emphasis;
601
+ * strikethrough only takes two characters '~~' */
602
+ if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
603
+ return 0;
604
+
605
+ return ret + 1;
606
+ }
607
+
608
+ if (size > 3 && data[1] == c && data[2] != c) {
609
+ if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
610
+ return 0;
611
+
612
+ return ret + 2;
613
+ }
614
+
615
+ if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
616
+ if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
617
+ return 0;
618
+
619
+ return ret + 3;
620
+ }
621
+
622
+ return 0;
623
+ }
624
+
625
+
626
+ /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
627
+ static size_t
628
+ char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
629
+ {
630
+ if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
631
+ return 0;
632
+
633
+ /* removing the last space from ob and rendering */
634
+ while (ob->size && ob->data[ob->size - 1] == ' ')
635
+ ob->size--;
636
+
637
+ return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
638
+ }
639
+
640
+
641
+ /* char_codespan • '`' parsing a code span (assuming codespan != 0) */
642
+ static size_t
643
+ char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
644
+ {
645
+ size_t end, nb = 0, i, f_begin, f_end;
646
+
647
+ /* counting the number of backticks in the delimiter */
648
+ while (nb < size && data[nb] == '`')
649
+ nb++;
650
+
651
+ /* finding the next delimiter */
652
+ i = 0;
653
+ for (end = nb; end < size && i < nb; end++) {
654
+ if (data[end] == '`') i++;
655
+ else i = 0;
656
+ }
657
+
658
+ if (i < nb && end >= size)
659
+ return 0; /* no matching delimiter */
660
+
661
+ /* trimming outside whitespaces */
662
+ f_begin = nb;
663
+ while (f_begin < end && data[f_begin] == ' ')
664
+ f_begin++;
665
+
666
+ f_end = end - nb;
667
+ while (f_end > nb && data[f_end-1] == ' ')
668
+ f_end--;
669
+
670
+ /* real code span */
671
+ if (f_begin < f_end) {
672
+ struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
673
+ if (!rndr->cb.codespan(ob, &work, rndr->opaque))
674
+ end = 0;
675
+ } else {
676
+ if (!rndr->cb.codespan(ob, 0, rndr->opaque))
677
+ end = 0;
678
+ }
679
+
680
+ return end;
681
+ }
682
+
683
+
684
+ /* char_escape • '\\' backslash escape */
685
+ static size_t
686
+ char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
687
+ {
688
+ static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
689
+ struct buf work = { 0, 0, 0, 0 };
690
+
691
+ if (size > 1) {
692
+ if (strchr(escape_chars, data[1]) == NULL)
693
+ return 0;
694
+
695
+ if (rndr->cb.normal_text) {
696
+ work.data = data + 1;
697
+ work.size = 1;
698
+ rndr->cb.normal_text(ob, &work, rndr->opaque);
699
+ }
700
+ else bufputc(ob, data[1]);
701
+ } else if (size == 1) {
702
+ bufputc(ob, data[0]);
703
+ }
704
+
705
+ return 2;
706
+ }
707
+
708
+ /* char_entity • '&' escaped when it doesn't belong to an entity */
709
+ /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
710
+ static size_t
711
+ char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
712
+ {
713
+ size_t end = 1;
714
+ struct buf work = { 0, 0, 0, 0 };
715
+
716
+ if (end < size && data[end] == '#')
717
+ end++;
718
+
719
+ while (end < size && isalnum(data[end]))
720
+ end++;
721
+
722
+ if (end < size && data[end] == ';')
723
+ end++; /* real entity */
724
+ else
725
+ return 0; /* lone '&' */
726
+
727
+ if (rndr->cb.entity) {
728
+ work.data = data;
729
+ work.size = end;
730
+ rndr->cb.entity(ob, &work, rndr->opaque);
731
+ }
732
+ else bufput(ob, data, end);
733
+
734
+ return end;
735
+ }
736
+
737
+ /* char_langle_tag • '<' when tags or autolinks are allowed */
738
+ static size_t
739
+ char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
740
+ {
741
+ enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
742
+ size_t end = tag_length(data, size, &altype);
743
+ struct buf work = { data, end, 0, 0 };
744
+ int ret = 0;
745
+
746
+ if (end > 2) {
747
+ if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
748
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
749
+ work.data = data + 1;
750
+ work.size = end - 2;
751
+ unscape_text(u_link, &work);
752
+ ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
753
+ rndr_popbuf(rndr, BUFFER_SPAN);
754
+ }
755
+ else if (rndr->cb.raw_html_tag)
756
+ ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
757
+ }
758
+
759
+ if (!ret) return 0;
760
+ else return end;
761
+ }
762
+
763
+ static size_t
764
+ char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
765
+ {
766
+ struct buf *link, *link_url, *link_text;
767
+ size_t link_len, rewind;
768
+
769
+ if (!rndr->cb.link || rndr->in_link_body)
770
+ return 0;
771
+
772
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
773
+
774
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
775
+ link_url = rndr_newbuf(rndr, BUFFER_SPAN);
776
+ BUFPUTSL(link_url, "http://");
777
+ bufput(link_url, link->data, link->size);
778
+
779
+ ob->size -= rewind;
780
+ if (rndr->cb.normal_text) {
781
+ link_text = rndr_newbuf(rndr, BUFFER_SPAN);
782
+ rndr->cb.normal_text(link_text, link, rndr->opaque);
783
+ rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
784
+ rndr_popbuf(rndr, BUFFER_SPAN);
785
+ } else {
786
+ rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
787
+ }
788
+ rndr_popbuf(rndr, BUFFER_SPAN);
789
+ }
790
+
791
+ rndr_popbuf(rndr, BUFFER_SPAN);
792
+ return link_len;
793
+ }
794
+
795
+ static size_t
796
+ char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
797
+ {
798
+ struct buf *link;
799
+ size_t link_len, rewind;
800
+
801
+ if (!rndr->cb.autolink || rndr->in_link_body)
802
+ return 0;
803
+
804
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
805
+
806
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
807
+ ob->size -= rewind;
808
+ rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
809
+ }
810
+
811
+ rndr_popbuf(rndr, BUFFER_SPAN);
812
+ return link_len;
813
+ }
814
+
815
+ static size_t
816
+ char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
817
+ {
818
+ struct buf *link;
819
+ size_t link_len, rewind;
820
+
821
+ if (!rndr->cb.autolink || rndr->in_link_body)
822
+ return 0;
823
+
824
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
825
+
826
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
827
+ ob->size -= rewind;
828
+ rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
829
+ }
830
+
831
+ rndr_popbuf(rndr, BUFFER_SPAN);
832
+ return link_len;
833
+ }
834
+
835
+ /* char_link • '[': parsing a link or an image */
836
+ static size_t
837
+ char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
838
+ {
839
+ int is_img = (offset && data[-1] == '!'), level;
840
+ size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
841
+ struct buf *content = 0;
842
+ struct buf *link = 0;
843
+ struct buf *title = 0;
844
+ struct buf *u_link = 0;
845
+ size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
846
+ int text_has_nl = 0, ret = 0;
847
+ int in_title = 0, qtype = 0;
848
+
849
+ /* checking whether the correct renderer exists */
850
+ if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
851
+ goto cleanup;
852
+
853
+ /* looking for the matching closing bracket */
854
+ for (level = 1; i < size; i++) {
855
+ if (data[i] == '\n')
856
+ text_has_nl = 1;
857
+
858
+ else if (data[i - 1] == '\\')
859
+ continue;
860
+
861
+ else if (data[i] == '[')
862
+ level++;
863
+
864
+ else if (data[i] == ']') {
865
+ level--;
866
+ if (level <= 0)
867
+ break;
868
+ }
869
+ }
870
+
871
+ if (i >= size)
872
+ goto cleanup;
873
+
874
+ txt_e = i;
875
+ i++;
876
+
877
+ /* skip any amount of whitespace or newline */
878
+ /* (this is much more laxist than original markdown syntax) */
879
+ while (i < size && _isspace(data[i]))
880
+ i++;
881
+
882
+ /* inline style link */
883
+ if (i < size && data[i] == '(') {
884
+ /* skipping initial whitespace */
885
+ i++;
886
+
887
+ while (i < size && _isspace(data[i]))
888
+ i++;
889
+
890
+ link_b = i;
891
+
892
+ /* looking for link end: ' " ) */
893
+ while (i < size) {
894
+ if (data[i] == '\\') i += 2;
895
+ else if (data[i] == ')') break;
896
+ else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
897
+ else i++;
898
+ }
899
+
900
+ if (i >= size) goto cleanup;
901
+ link_e = i;
902
+
903
+ /* looking for title end if present */
904
+ if (data[i] == '\'' || data[i] == '"') {
905
+ qtype = data[i];
906
+ in_title = 1;
907
+ i++;
908
+ title_b = i;
909
+
910
+ while (i < size) {
911
+ if (data[i] == '\\') i += 2;
912
+ else if (data[i] == qtype) {in_title = 0; i++;}
913
+ else if ((data[i] == ')') && !in_title) break;
914
+ else i++;
915
+ }
916
+
917
+ if (i >= size) goto cleanup;
918
+
919
+ /* skipping whitespaces after title */
920
+ title_e = i - 1;
921
+ while (title_e > title_b && _isspace(data[title_e]))
922
+ title_e--;
923
+
924
+ /* checking for closing quote presence */
925
+ if (data[title_e] != '\'' && data[title_e] != '"') {
926
+ title_b = title_e = 0;
927
+ link_e = i;
928
+ }
929
+ }
930
+
931
+ /* remove whitespace at the end of the link */
932
+ while (link_e > link_b && _isspace(data[link_e - 1]))
933
+ link_e--;
934
+
935
+ /* remove optional angle brackets around the link */
936
+ if (data[link_b] == '<') link_b++;
937
+ if (data[link_e - 1] == '>') link_e--;
938
+
939
+ /* building escaped link and title */
940
+ if (link_e > link_b) {
941
+ link = rndr_newbuf(rndr, BUFFER_SPAN);
942
+ bufput(link, data + link_b, link_e - link_b);
943
+ }
944
+
945
+ if (title_e > title_b) {
946
+ title = rndr_newbuf(rndr, BUFFER_SPAN);
947
+ bufput(title, data + title_b, title_e - title_b);
948
+ }
949
+
950
+ i++;
951
+ }
952
+
953
+ /* reference style link */
954
+ else if (i < size && data[i] == '[') {
955
+ struct buf id = { 0, 0, 0, 0 };
956
+ struct link_ref *lr;
957
+
958
+ /* looking for the id */
959
+ i++;
960
+ link_b = i;
961
+ while (i < size && data[i] != ']') i++;
962
+ if (i >= size) goto cleanup;
963
+ link_e = i;
964
+
965
+ /* finding the link_ref */
966
+ if (link_b == link_e) {
967
+ if (text_has_nl) {
968
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
969
+ size_t j;
970
+
971
+ for (j = 1; j < txt_e; j++) {
972
+ if (data[j] != '\n')
973
+ bufputc(b, data[j]);
974
+ else if (data[j - 1] != ' ')
975
+ bufputc(b, ' ');
976
+ }
977
+
978
+ id.data = b->data;
979
+ id.size = b->size;
980
+ } else {
981
+ id.data = data + 1;
982
+ id.size = txt_e - 1;
983
+ }
984
+ } else {
985
+ id.data = data + link_b;
986
+ id.size = link_e - link_b;
987
+ }
988
+
989
+ lr = find_link_ref(rndr->refs, id.data, id.size);
990
+ if (!lr)
991
+ goto cleanup;
992
+
993
+ /* keeping link and title from link_ref */
994
+ link = lr->link;
995
+ title = lr->title;
996
+ i++;
997
+ }
998
+
999
+ /* shortcut reference style link */
1000
+ else {
1001
+ struct buf id = { 0, 0, 0, 0 };
1002
+ struct link_ref *lr;
1003
+
1004
+ /* crafting the id */
1005
+ if (text_has_nl) {
1006
+ struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
1007
+ size_t j;
1008
+
1009
+ for (j = 1; j < txt_e; j++) {
1010
+ if (data[j] != '\n')
1011
+ bufputc(b, data[j]);
1012
+ else if (data[j - 1] != ' ')
1013
+ bufputc(b, ' ');
1014
+ }
1015
+
1016
+ id.data = b->data;
1017
+ id.size = b->size;
1018
+ } else {
1019
+ id.data = data + 1;
1020
+ id.size = txt_e - 1;
1021
+ }
1022
+
1023
+ /* finding the link_ref */
1024
+ lr = find_link_ref(rndr->refs, id.data, id.size);
1025
+ if (!lr)
1026
+ goto cleanup;
1027
+
1028
+ /* keeping link and title from link_ref */
1029
+ link = lr->link;
1030
+ title = lr->title;
1031
+
1032
+ /* rewinding the whitespace */
1033
+ i = txt_e + 1;
1034
+ }
1035
+
1036
+ /* building content: img alt is escaped, link content is parsed */
1037
+ if (txt_e > 1) {
1038
+ content = rndr_newbuf(rndr, BUFFER_SPAN);
1039
+ if (is_img) {
1040
+ bufput(content, data + 1, txt_e - 1);
1041
+ } else {
1042
+ /* disable autolinking when parsing inline the
1043
+ * content of a link */
1044
+ rndr->in_link_body = 1;
1045
+ parse_inline(content, rndr, data + 1, txt_e - 1);
1046
+ rndr->in_link_body = 0;
1047
+ }
1048
+ }
1049
+
1050
+ if (link) {
1051
+ u_link = rndr_newbuf(rndr, BUFFER_SPAN);
1052
+ unscape_text(u_link, link);
1053
+ }
1054
+
1055
+ /* calling the relevant rendering function */
1056
+ if (is_img) {
1057
+ if (ob->size && ob->data[ob->size - 1] == '!')
1058
+ ob->size -= 1;
1059
+
1060
+ ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
1061
+ } else {
1062
+ ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
1063
+ }
1064
+
1065
+ /* cleanup */
1066
+ cleanup:
1067
+ rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1068
+ return ret ? i : 0;
1069
+ }
1070
+
1071
+ static size_t
1072
+ char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
1073
+ {
1074
+ size_t sup_start, sup_len;
1075
+ struct buf *sup;
1076
+
1077
+ if (!rndr->cb.superscript)
1078
+ return 0;
1079
+
1080
+ if (size < 2)
1081
+ return 0;
1082
+
1083
+ if (data[1] == '(') {
1084
+ sup_start = sup_len = 2;
1085
+
1086
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
1087
+ sup_len++;
1088
+
1089
+ if (sup_len == size)
1090
+ return 0;
1091
+ } else {
1092
+ sup_start = sup_len = 1;
1093
+
1094
+ while (sup_len < size && !_isspace(data[sup_len]))
1095
+ sup_len++;
1096
+ }
1097
+
1098
+ if (sup_len - sup_start == 0)
1099
+ return (sup_start == 2) ? 3 : 0;
1100
+
1101
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
1102
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
1103
+ rndr->cb.superscript(ob, sup, rndr->opaque);
1104
+ rndr_popbuf(rndr, BUFFER_SPAN);
1105
+
1106
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
1107
+ }
1108
+
1109
+ /*********************************
1110
+ * BLOCK-LEVEL PARSING FUNCTIONS *
1111
+ *********************************/
1112
+
1113
+ /* is_empty • returns the line length when it is empty, 0 otherwise */
1114
+ static size_t
1115
+ is_empty(uint8_t *data, size_t size)
1116
+ {
1117
+ size_t i;
1118
+
1119
+ for (i = 0; i < size && data[i] != '\n'; i++)
1120
+ if (data[i] != ' ')
1121
+ return 0;
1122
+
1123
+ return i + 1;
1124
+ }
1125
+
1126
+ /* is_hrule • returns whether a line is a horizontal rule */
1127
+ static int
1128
+ is_hrule(uint8_t *data, size_t size)
1129
+ {
1130
+ size_t i = 0, n = 0;
1131
+ uint8_t c;
1132
+
1133
+ /* skipping initial spaces */
1134
+ if (size < 3) return 0;
1135
+ if (data[0] == ' ') { i++;
1136
+ if (data[1] == ' ') { i++;
1137
+ if (data[2] == ' ') { i++; } } }
1138
+
1139
+ /* looking at the hrule uint8_t */
1140
+ if (i + 2 >= size
1141
+ || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1142
+ return 0;
1143
+ c = data[i];
1144
+
1145
+ /* the whole line must be the char or whitespace */
1146
+ while (i < size && data[i] != '\n') {
1147
+ if (data[i] == c) n++;
1148
+ else if (data[i] != ' ')
1149
+ return 0;
1150
+
1151
+ i++;
1152
+ }
1153
+
1154
+ return n >= 3;
1155
+ }
1156
+
1157
+ /* check if a line is a code fence; return its size if it is */
1158
+ static size_t
1159
+ is_codefence(uint8_t *data, size_t size, struct buf *syntax)
1160
+ {
1161
+ size_t i = 0, n = 0;
1162
+ uint8_t c;
1163
+
1164
+ /* skipping initial spaces */
1165
+ if (size < 3) return 0;
1166
+ if (data[0] == ' ') { i++;
1167
+ if (data[1] == ' ') { i++;
1168
+ if (data[2] == ' ') { i++; } } }
1169
+
1170
+ /* looking at the hrule uint8_t */
1171
+ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
1172
+ return 0;
1173
+
1174
+ c = data[i];
1175
+
1176
+ /* the whole line must be the uint8_t or whitespace */
1177
+ while (i < size && data[i] == c) {
1178
+ n++; i++;
1179
+ }
1180
+
1181
+ if (n < 3)
1182
+ return 0;
1183
+
1184
+ if (syntax != NULL) {
1185
+ size_t syn = 0;
1186
+
1187
+ while (i < size && data[i] == ' ')
1188
+ i++;
1189
+
1190
+ syntax->data = data + i;
1191
+
1192
+ if (i < size && data[i] == '{') {
1193
+ i++; syntax->data++;
1194
+
1195
+ while (i < size && data[i] != '}' && data[i] != '\n') {
1196
+ syn++; i++;
1197
+ }
1198
+
1199
+ if (i == size || data[i] != '}')
1200
+ return 0;
1201
+
1202
+ /* strip all whitespace at the beginning and the end
1203
+ * of the {} block */
1204
+ while (syn > 0 && _isspace(syntax->data[0])) {
1205
+ syntax->data++; syn--;
1206
+ }
1207
+
1208
+ while (syn > 0 && _isspace(syntax->data[syn - 1]))
1209
+ syn--;
1210
+
1211
+ i++;
1212
+ } else {
1213
+ while (i < size && !_isspace(data[i])) {
1214
+ syn++; i++;
1215
+ }
1216
+ }
1217
+
1218
+ syntax->size = syn;
1219
+ }
1220
+
1221
+ while (i < size && data[i] != '\n') {
1222
+ if (!_isspace(data[i]))
1223
+ return 0;
1224
+
1225
+ i++;
1226
+ }
1227
+
1228
+ return i + 1;
1229
+ }
1230
+
1231
+ /* is_atxheader • returns whether the line is a hash-prefixed header */
1232
+ static int
1233
+ is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
1234
+ {
1235
+ if (data[0] != '#')
1236
+ return 0;
1237
+
1238
+ if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
1239
+ size_t level = 0;
1240
+
1241
+ while (level < size && level < 6 && data[level] == '#')
1242
+ level++;
1243
+
1244
+ if (level < size && data[level] != ' ')
1245
+ return 0;
1246
+ }
1247
+
1248
+ return 1;
1249
+ }
1250
+
1251
+ /* is_headerline • returns whether the line is a setext-style hdr underline */
1252
+ static int
1253
+ is_headerline(uint8_t *data, size_t size)
1254
+ {
1255
+ size_t i = 0;
1256
+
1257
+ /* test of level 1 header */
1258
+ if (data[i] == '=') {
1259
+ for (i = 1; i < size && data[i] == '='; i++);
1260
+ while (i < size && data[i] == ' ') i++;
1261
+ return (i >= size || data[i] == '\n') ? 1 : 0; }
1262
+
1263
+ /* test of level 2 header */
1264
+ if (data[i] == '-') {
1265
+ for (i = 1; i < size && data[i] == '-'; i++);
1266
+ while (i < size && data[i] == ' ') i++;
1267
+ return (i >= size || data[i] == '\n') ? 2 : 0; }
1268
+
1269
+ return 0;
1270
+ }
1271
+
1272
+ static int
1273
+ is_next_headerline(uint8_t *data, size_t size)
1274
+ {
1275
+ size_t i = 0;
1276
+
1277
+ while (i < size && data[i] != '\n')
1278
+ i++;
1279
+
1280
+ if (++i >= size)
1281
+ return 0;
1282
+
1283
+ return is_headerline(data + i, size - i);
1284
+ }
1285
+
1286
+ /* prefix_quote • returns blockquote prefix length */
1287
+ static size_t
1288
+ prefix_quote(uint8_t *data, size_t size)
1289
+ {
1290
+ size_t i = 0;
1291
+ if (i < size && data[i] == ' ') i++;
1292
+ if (i < size && data[i] == ' ') i++;
1293
+ if (i < size && data[i] == ' ') i++;
1294
+
1295
+ if (i < size && data[i] == '>') {
1296
+ if (i + 1 < size && data[i + 1] == ' ')
1297
+ return i + 2;
1298
+
1299
+ return i + 1;
1300
+ }
1301
+
1302
+ return 0;
1303
+ }
1304
+
1305
+ /* prefix_code • returns prefix length for block code*/
1306
+ static size_t
1307
+ prefix_code(uint8_t *data, size_t size)
1308
+ {
1309
+ if (size > 3 && data[0] == ' ' && data[1] == ' '
1310
+ && data[2] == ' ' && data[3] == ' ') return 4;
1311
+
1312
+ return 0;
1313
+ }
1314
+
1315
+ /* prefix_oli • returns ordered list item prefix */
1316
+ static size_t
1317
+ prefix_oli(uint8_t *data, size_t size)
1318
+ {
1319
+ size_t i = 0;
1320
+
1321
+ if (i < size && data[i] == ' ') i++;
1322
+ if (i < size && data[i] == ' ') i++;
1323
+ if (i < size && data[i] == ' ') i++;
1324
+
1325
+ if (i >= size || data[i] < '0' || data[i] > '9')
1326
+ return 0;
1327
+
1328
+ while (i < size && data[i] >= '0' && data[i] <= '9')
1329
+ i++;
1330
+
1331
+ if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1332
+ return 0;
1333
+
1334
+ if (is_next_headerline(data + i, size - i))
1335
+ return 0;
1336
+
1337
+ return i + 2;
1338
+ }
1339
+
1340
+ /* prefix_uli • returns ordered list item prefix */
1341
+ static size_t
1342
+ prefix_uli(uint8_t *data, size_t size)
1343
+ {
1344
+ size_t i = 0;
1345
+
1346
+ if (i < size && data[i] == ' ') i++;
1347
+ if (i < size && data[i] == ' ') i++;
1348
+ if (i < size && data[i] == ' ') i++;
1349
+
1350
+ if (i + 1 >= size ||
1351
+ (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1352
+ data[i + 1] != ' ')
1353
+ return 0;
1354
+
1355
+ if (is_next_headerline(data + i, size - i))
1356
+ return 0;
1357
+
1358
+ return i + 2;
1359
+ }
1360
+
1361
+
1362
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
1363
+ static void parse_block(struct buf *ob, struct sd_markdown *rndr,
1364
+ uint8_t *data, size_t size);
1365
+
1366
+
1367
+ /* parse_blockquote • handles parsing of a blockquote fragment */
1368
+ static size_t
1369
+ parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1370
+ {
1371
+ size_t beg, end = 0, pre, work_size = 0;
1372
+ uint8_t *work_data = 0;
1373
+ struct buf *out = 0;
1374
+
1375
+ out = rndr_newbuf(rndr, BUFFER_BLOCK);
1376
+ beg = 0;
1377
+ while (beg < size) {
1378
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1379
+
1380
+ pre = prefix_quote(data + beg, end - beg);
1381
+
1382
+ if (pre)
1383
+ beg += pre; /* skipping prefix */
1384
+
1385
+ /* empty line followed by non-quote line */
1386
+ else if (is_empty(data + beg, end - beg) &&
1387
+ (end >= size || (prefix_quote(data + end, size - end) == 0 &&
1388
+ !is_empty(data + end, size - end))))
1389
+ break;
1390
+
1391
+ if (beg < end) { /* copy into the in-place working buffer */
1392
+ /* bufput(work, data + beg, end - beg); */
1393
+ if (!work_data)
1394
+ work_data = data + beg;
1395
+ else if (data + beg != work_data + work_size)
1396
+ memmove(work_data + work_size, data + beg, end - beg);
1397
+ work_size += end - beg;
1398
+ }
1399
+ beg = end;
1400
+ }
1401
+
1402
+ parse_block(out, rndr, work_data, work_size);
1403
+ if (rndr->cb.blockquote)
1404
+ rndr->cb.blockquote(ob, out, rndr->opaque);
1405
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1406
+ return end;
1407
+ }
1408
+
1409
+ static size_t
1410
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
1411
+
1412
+ /* parse_blockquote • handles parsing of a regular paragraph */
1413
+ static size_t
1414
+ parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1415
+ {
1416
+ size_t i = 0, end = 0;
1417
+ int level = 0;
1418
+ struct buf work = { data, 0, 0, 0 };
1419
+
1420
+ while (i < size) {
1421
+ for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
1422
+
1423
+ if (is_empty(data + i, size - i))
1424
+ break;
1425
+
1426
+ if ((level = is_headerline(data + i, size - i)) != 0)
1427
+ break;
1428
+
1429
+ if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
1430
+ if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
1431
+ end = i;
1432
+ break;
1433
+ }
1434
+ }
1435
+
1436
+ if (is_atxheader(rndr, data + i, size - i) ||
1437
+ is_hrule(data + i, size - i) ||
1438
+ prefix_quote(data + i, size - i)) {
1439
+ end = i;
1440
+ break;
1441
+ }
1442
+
1443
+ i = end;
1444
+ }
1445
+
1446
+ work.size = i;
1447
+ while (work.size && data[work.size - 1] == '\n')
1448
+ work.size--;
1449
+
1450
+ if (!level) {
1451
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1452
+ parse_inline(tmp, rndr, work.data, work.size);
1453
+ if (rndr->cb.paragraph)
1454
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1455
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1456
+ } else {
1457
+ struct buf *header_work;
1458
+
1459
+ if (work.size) {
1460
+ size_t beg;
1461
+ i = work.size;
1462
+ work.size -= 1;
1463
+
1464
+ while (work.size && data[work.size] != '\n')
1465
+ work.size -= 1;
1466
+
1467
+ beg = work.size + 1;
1468
+ while (work.size && data[work.size - 1] == '\n')
1469
+ work.size -= 1;
1470
+
1471
+ if (work.size > 0) {
1472
+ struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
1473
+ parse_inline(tmp, rndr, work.data, work.size);
1474
+
1475
+ if (rndr->cb.paragraph)
1476
+ rndr->cb.paragraph(ob, tmp, rndr->opaque);
1477
+
1478
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1479
+ work.data += beg;
1480
+ work.size = i - beg;
1481
+ }
1482
+ else work.size = i;
1483
+ }
1484
+
1485
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
1486
+ parse_inline(header_work, rndr, work.data, work.size);
1487
+
1488
+ if (rndr->cb.header)
1489
+ rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
1490
+
1491
+ rndr_popbuf(rndr, BUFFER_SPAN);
1492
+ }
1493
+
1494
+ return end;
1495
+ }
1496
+
1497
+ /* parse_fencedcode • handles parsing of a block-level code fragment */
1498
+ static size_t
1499
+ parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1500
+ {
1501
+ size_t beg, end;
1502
+ struct buf *work = 0;
1503
+ struct buf lang = { 0, 0, 0, 0 };
1504
+
1505
+ beg = is_codefence(data, size, &lang);
1506
+ if (beg == 0) return 0;
1507
+
1508
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1509
+
1510
+ while (beg < size) {
1511
+ size_t fence_end;
1512
+
1513
+ fence_end = is_codefence(data + beg, size - beg, NULL);
1514
+ if (fence_end != 0) {
1515
+ beg += fence_end;
1516
+ break;
1517
+ }
1518
+
1519
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
1520
+
1521
+ if (beg < end) {
1522
+ /* verbatim copy to the working buffer,
1523
+ escaping entities */
1524
+ if (is_empty(data + beg, end - beg))
1525
+ bufputc(work, '\n');
1526
+ else bufput(work, data + beg, end - beg);
1527
+ }
1528
+ beg = end;
1529
+ }
1530
+
1531
+ if (work->size && work->data[work->size - 1] != '\n')
1532
+ bufputc(work, '\n');
1533
+
1534
+ if (rndr->cb.blockcode)
1535
+ rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
1536
+
1537
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1538
+ return beg;
1539
+ }
1540
+
1541
+ static size_t
1542
+ parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1543
+ {
1544
+ size_t beg, end, pre;
1545
+ struct buf *work = 0;
1546
+
1547
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1548
+
1549
+ beg = 0;
1550
+ while (beg < size) {
1551
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
1552
+ pre = prefix_code(data + beg, end - beg);
1553
+
1554
+ if (pre)
1555
+ beg += pre; /* skipping prefix */
1556
+ else if (!is_empty(data + beg, end - beg))
1557
+ /* non-empty non-prefixed line breaks the pre */
1558
+ break;
1559
+
1560
+ if (beg < end) {
1561
+ /* verbatim copy to the working buffer,
1562
+ escaping entities */
1563
+ if (is_empty(data + beg, end - beg))
1564
+ bufputc(work, '\n');
1565
+ else bufput(work, data + beg, end - beg);
1566
+ }
1567
+ beg = end;
1568
+ }
1569
+
1570
+ while (work->size && work->data[work->size - 1] == '\n')
1571
+ work->size -= 1;
1572
+
1573
+ bufputc(work, '\n');
1574
+
1575
+ if (rndr->cb.blockcode)
1576
+ rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
1577
+
1578
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1579
+ return beg;
1580
+ }
1581
+
1582
+ /* parse_listitem • parsing of a single list item */
1583
+ /* assuming initial prefix is already removed */
1584
+ static size_t
1585
+ parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
1586
+ {
1587
+ struct buf *work = 0, *inter = 0;
1588
+ size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
1589
+ int in_empty = 0, has_inside_empty = 0;
1590
+ size_t has_next_uli, has_next_oli;
1591
+
1592
+ /* keeping track of the first indentation prefix */
1593
+ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
1594
+ orgpre++;
1595
+
1596
+ beg = prefix_uli(data, size);
1597
+ if (!beg)
1598
+ beg = prefix_oli(data, size);
1599
+
1600
+ if (!beg)
1601
+ return 0;
1602
+
1603
+ /* skipping to the beginning of the following line */
1604
+ end = beg;
1605
+ while (end < size && data[end - 1] != '\n')
1606
+ end++;
1607
+
1608
+ /* getting working buffers */
1609
+ work = rndr_newbuf(rndr, BUFFER_SPAN);
1610
+ inter = rndr_newbuf(rndr, BUFFER_SPAN);
1611
+
1612
+ /* putting the first line into the working buffer */
1613
+ bufput(work, data + beg, end - beg);
1614
+ beg = end;
1615
+
1616
+ /* process the following lines */
1617
+ while (beg < size) {
1618
+ end++;
1619
+
1620
+ while (end < size && data[end - 1] != '\n')
1621
+ end++;
1622
+
1623
+ /* process an empty line */
1624
+ if (is_empty(data + beg, end - beg)) {
1625
+ in_empty = 1;
1626
+ beg = end;
1627
+ continue;
1628
+ }
1629
+
1630
+ /* calculating the indentation */
1631
+ i = 0;
1632
+ while (i < 4 && beg + i < end && data[beg + i] == ' ')
1633
+ i++;
1634
+
1635
+ pre = i;
1636
+
1637
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
1638
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
1639
+
1640
+ /* checking for ul/ol switch */
1641
+ if (in_empty && (
1642
+ ((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
1643
+ (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
1644
+ *flags |= MKD_LI_END;
1645
+ break; /* the following item must have same list type */
1646
+ }
1647
+
1648
+ /* checking for a new item */
1649
+ if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
1650
+ if (in_empty)
1651
+ has_inside_empty = 1;
1652
+
1653
+ if (pre == orgpre) /* the following item must have */
1654
+ break; /* the same indentation */
1655
+
1656
+ if (!sublist)
1657
+ sublist = work->size;
1658
+ }
1659
+ /* joining only indented stuff after empty lines */
1660
+ else if (in_empty && i < 4) {
1661
+ *flags |= MKD_LI_END;
1662
+ break;
1663
+ }
1664
+ else if (in_empty) {
1665
+ bufputc(work, '\n');
1666
+ has_inside_empty = 1;
1667
+ }
1668
+
1669
+ in_empty = 0;
1670
+
1671
+ /* adding the line without prefix into the working buffer */
1672
+ bufput(work, data + beg + i, end - beg - i);
1673
+ beg = end;
1674
+ }
1675
+
1676
+ /* render of li contents */
1677
+ if (has_inside_empty)
1678
+ *flags |= MKD_LI_BLOCK;
1679
+
1680
+ if (*flags & MKD_LI_BLOCK) {
1681
+ /* intermediate render of block li */
1682
+ if (sublist && sublist < work->size) {
1683
+ parse_block(inter, rndr, work->data, sublist);
1684
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1685
+ }
1686
+ else
1687
+ parse_block(inter, rndr, work->data, work->size);
1688
+ } else {
1689
+ /* intermediate render of inline li */
1690
+ if (sublist && sublist < work->size) {
1691
+ parse_inline(inter, rndr, work->data, sublist);
1692
+ parse_block(inter, rndr, work->data + sublist, work->size - sublist);
1693
+ }
1694
+ else
1695
+ parse_inline(inter, rndr, work->data, work->size);
1696
+ }
1697
+
1698
+ /* render of li itself */
1699
+ if (rndr->cb.listitem)
1700
+ rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
1701
+
1702
+ rndr_popbuf(rndr, BUFFER_SPAN);
1703
+ rndr_popbuf(rndr, BUFFER_SPAN);
1704
+ return beg;
1705
+ }
1706
+
1707
+
1708
+ /* parse_list • parsing ordered or unordered list block */
1709
+ static size_t
1710
+ parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
1711
+ {
1712
+ struct buf *work = 0;
1713
+ size_t i = 0, j;
1714
+
1715
+ work = rndr_newbuf(rndr, BUFFER_BLOCK);
1716
+
1717
+ while (i < size) {
1718
+ j = parse_listitem(work, rndr, data + i, size - i, &flags);
1719
+ i += j;
1720
+
1721
+ if (!j || (flags & MKD_LI_END))
1722
+ break;
1723
+ }
1724
+
1725
+ if (rndr->cb.list)
1726
+ rndr->cb.list(ob, work, flags, rndr->opaque);
1727
+ rndr_popbuf(rndr, BUFFER_BLOCK);
1728
+ return i;
1729
+ }
1730
+
1731
+ /* parse_atxheader • parsing of atx-style headers */
1732
+ static size_t
1733
+ parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
1734
+ {
1735
+ size_t level = 0;
1736
+ size_t i, end, skip;
1737
+
1738
+ while (level < size && level < 6 && data[level] == '#')
1739
+ level++;
1740
+
1741
+ for (i = level; i < size && data[i] == ' '; i++);
1742
+
1743
+ for (end = i; end < size && data[end] != '\n'; end++);
1744
+ skip = end;
1745
+
1746
+ while (end && data[end - 1] == '#')
1747
+ end--;
1748
+
1749
+ while (end && data[end - 1] == ' ')
1750
+ end--;
1751
+
1752
+ if (end > i) {
1753
+ struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
1754
+
1755
+ parse_inline(work, rndr, data + i, end - i);
1756
+
1757
+ if (rndr->cb.header)
1758
+ rndr->cb.header(ob, work, (int)level, rndr->opaque);
1759
+
1760
+ rndr_popbuf(rndr, BUFFER_SPAN);
1761
+ }
1762
+
1763
+ return skip;
1764
+ }
1765
+
1766
+
1767
+ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
1768
+ /* returns the length on match, 0 otherwise */
1769
+ static size_t
1770
+ htmlblock_end_tag(
1771
+ const char *tag,
1772
+ size_t tag_len,
1773
+ struct sd_markdown *rndr,
1774
+ uint8_t *data,
1775
+ size_t size)
1776
+ {
1777
+ size_t i, w;
1778
+
1779
+ /* checking if tag is a match */
1780
+ if (tag_len + 3 >= size ||
1781
+ strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
1782
+ data[tag_len + 2] != '>')
1783
+ return 0;
1784
+
1785
+ /* checking white lines */
1786
+ i = tag_len + 3;
1787
+ w = 0;
1788
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
1789
+ return 0; /* non-blank after tag */
1790
+ i += w;
1791
+ w = 0;
1792
+
1793
+ if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
1794
+ if (i < size)
1795
+ w = is_empty(data + i, size - i);
1796
+ } else {
1797
+ if (i < size && (w = is_empty(data + i, size - i)) == 0)
1798
+ return 0; /* non-blank line after tag line */
1799
+ }
1800
+
1801
+ return i + w;
1802
+ }
1803
+
1804
+ static size_t
1805
+ htmlblock_end(const char *curtag,
1806
+ struct sd_markdown *rndr,
1807
+ uint8_t *data,
1808
+ size_t size,
1809
+ int start_of_line)
1810
+ {
1811
+ size_t tag_size = strlen(curtag);
1812
+ size_t i = 1, end_tag;
1813
+ int block_lines = 0;
1814
+
1815
+ while (i < size) {
1816
+ i++;
1817
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
1818
+ if (data[i] == '\n')
1819
+ block_lines++;
1820
+
1821
+ i++;
1822
+ }
1823
+
1824
+ /* If we are only looking for unindented tags, skip the tag
1825
+ * if it doesn't follow a newline.
1826
+ *
1827
+ * The only exception to this is if the tag is still on the
1828
+ * initial line; in that case it still counts as a closing
1829
+ * tag
1830
+ */
1831
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
1832
+ continue;
1833
+
1834
+ if (i + 2 + tag_size >= size)
1835
+ break;
1836
+
1837
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
1838
+ if (end_tag)
1839
+ return i + end_tag - 1;
1840
+ }
1841
+
1842
+ return 0;
1843
+ }
1844
+
1845
+
1846
+ /* parse_htmlblock • parsing of inline HTML block */
1847
+ static size_t
1848
+ parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
1849
+ {
1850
+ size_t i, j = 0, tag_end;
1851
+ const char *curtag = NULL;
1852
+ struct buf work = { data, 0, 0, 0 };
1853
+
1854
+ /* identification of the opening tag */
1855
+ if (size < 2 || data[0] != '<')
1856
+ return 0;
1857
+
1858
+ i = 1;
1859
+ while (i < size && data[i] != '>' && data[i] != ' ')
1860
+ i++;
1861
+
1862
+ if (i < size)
1863
+ curtag = find_block_tag((char *)data + 1, (int)i - 1);
1864
+
1865
+ /* handling of special cases */
1866
+ if (!curtag) {
1867
+
1868
+ /* HTML comment, laxist form */
1869
+ if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
1870
+ i = 5;
1871
+
1872
+ while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
1873
+ i++;
1874
+
1875
+ i++;
1876
+
1877
+ if (i < size)
1878
+ j = is_empty(data + i, size - i);
1879
+
1880
+ if (j) {
1881
+ work.size = i + j;
1882
+ if (do_render && rndr->cb.blockhtml)
1883
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1884
+ return work.size;
1885
+ }
1886
+ }
1887
+
1888
+ /* HR, which is the only self-closing block tag considered */
1889
+ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
1890
+ i = 3;
1891
+ while (i < size && data[i] != '>')
1892
+ i++;
1893
+
1894
+ if (i + 1 < size) {
1895
+ i++;
1896
+ j = is_empty(data + i, size - i);
1897
+ if (j) {
1898
+ work.size = i + j;
1899
+ if (do_render && rndr->cb.blockhtml)
1900
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1901
+ return work.size;
1902
+ }
1903
+ }
1904
+ }
1905
+
1906
+ /* no special case recognised */
1907
+ return 0;
1908
+ }
1909
+
1910
+ /* looking for an unindented matching closing tag */
1911
+ /* followed by a blank line */
1912
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
1913
+
1914
+ /* if not found, trying a second pass looking for indented match */
1915
+ /* but not if tag is "ins" or "del" (following original Markdown.pl) */
1916
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
1917
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
1918
+ }
1919
+
1920
+ if (!tag_end)
1921
+ return 0;
1922
+
1923
+ /* the end of the block has been found */
1924
+ work.size = tag_end;
1925
+ if (do_render && rndr->cb.blockhtml)
1926
+ rndr->cb.blockhtml(ob, &work, rndr->opaque);
1927
+
1928
+ return tag_end;
1929
+ }
1930
+
1931
+ static void
1932
+ parse_table_row(
1933
+ struct buf *ob,
1934
+ struct sd_markdown *rndr,
1935
+ uint8_t *data,
1936
+ size_t size,
1937
+ size_t columns,
1938
+ int *col_data,
1939
+ int header_flag)
1940
+ {
1941
+ size_t i = 0, col;
1942
+ struct buf *row_work = 0;
1943
+
1944
+ if (!rndr->cb.table_cell || !rndr->cb.table_row)
1945
+ return;
1946
+
1947
+ row_work = rndr_newbuf(rndr, BUFFER_SPAN);
1948
+
1949
+ if (i < size && data[i] == '|')
1950
+ i++;
1951
+
1952
+ for (col = 0; col < columns && i < size; ++col) {
1953
+ size_t cell_start, cell_end;
1954
+ struct buf *cell_work;
1955
+
1956
+ cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
1957
+
1958
+ while (i < size && _isspace(data[i]))
1959
+ i++;
1960
+
1961
+ cell_start = i;
1962
+
1963
+ while (i < size && data[i] != '|')
1964
+ i++;
1965
+
1966
+ cell_end = i - 1;
1967
+
1968
+ while (cell_end > cell_start && _isspace(data[cell_end]))
1969
+ cell_end--;
1970
+
1971
+ parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
1972
+ rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
1973
+
1974
+ rndr_popbuf(rndr, BUFFER_SPAN);
1975
+ i++;
1976
+ }
1977
+
1978
+ for (; col < columns; ++col) {
1979
+ struct buf empty_cell = { 0, 0, 0, 0 };
1980
+ rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
1981
+ }
1982
+
1983
+ rndr->cb.table_row(ob, row_work, rndr->opaque);
1984
+
1985
+ rndr_popbuf(rndr, BUFFER_SPAN);
1986
+ }
1987
+
1988
+ static size_t
1989
+ parse_table_header(
1990
+ struct buf *ob,
1991
+ struct sd_markdown *rndr,
1992
+ uint8_t *data,
1993
+ size_t size,
1994
+ size_t *columns,
1995
+ int **column_data)
1996
+ {
1997
+ int pipes;
1998
+ size_t i = 0, col, header_end, under_end;
1999
+
2000
+ pipes = 0;
2001
+ while (i < size && data[i] != '\n')
2002
+ if (data[i++] == '|')
2003
+ pipes++;
2004
+
2005
+ if (i == size || pipes == 0)
2006
+ return 0;
2007
+
2008
+ header_end = i;
2009
+
2010
+ while (header_end > 0 && _isspace(data[header_end - 1]))
2011
+ header_end--;
2012
+
2013
+ if (data[0] == '|')
2014
+ pipes--;
2015
+
2016
+ if (header_end && data[header_end - 1] == '|')
2017
+ pipes--;
2018
+
2019
+ *columns = pipes + 1;
2020
+ *column_data = calloc(*columns, sizeof(int));
2021
+
2022
+ /* Parse the header underline */
2023
+ i++;
2024
+ if (i < size && data[i] == '|')
2025
+ i++;
2026
+
2027
+ under_end = i;
2028
+ while (under_end < size && data[under_end] != '\n')
2029
+ under_end++;
2030
+
2031
+ for (col = 0; col < *columns && i < under_end; ++col) {
2032
+ size_t dashes = 0;
2033
+
2034
+ while (i < under_end && data[i] == ' ')
2035
+ i++;
2036
+
2037
+ if (data[i] == ':') {
2038
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
2039
+ dashes++;
2040
+ }
2041
+
2042
+ while (i < under_end && data[i] == '-') {
2043
+ i++; dashes++;
2044
+ }
2045
+
2046
+ if (i < under_end && data[i] == ':') {
2047
+ i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
2048
+ dashes++;
2049
+ }
2050
+
2051
+ while (i < under_end && data[i] == ' ')
2052
+ i++;
2053
+
2054
+ if (i < under_end && data[i] != '|')
2055
+ break;
2056
+
2057
+ if (dashes < 3)
2058
+ break;
2059
+
2060
+ i++;
2061
+ }
2062
+
2063
+ if (col < *columns)
2064
+ return 0;
2065
+
2066
+ parse_table_row(
2067
+ ob, rndr, data,
2068
+ header_end,
2069
+ *columns,
2070
+ *column_data,
2071
+ MKD_TABLE_HEADER
2072
+ );
2073
+
2074
+ return under_end + 1;
2075
+ }
2076
+
2077
+ static size_t
2078
+ parse_table(
2079
+ struct buf *ob,
2080
+ struct sd_markdown *rndr,
2081
+ uint8_t *data,
2082
+ size_t size)
2083
+ {
2084
+ size_t i;
2085
+
2086
+ struct buf *header_work = 0;
2087
+ struct buf *body_work = 0;
2088
+
2089
+ size_t columns;
2090
+ int *col_data = NULL;
2091
+
2092
+ header_work = rndr_newbuf(rndr, BUFFER_SPAN);
2093
+ body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
2094
+
2095
+ i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
2096
+ if (i > 0) {
2097
+
2098
+ while (i < size) {
2099
+ size_t row_start;
2100
+ int pipes = 0;
2101
+
2102
+ row_start = i;
2103
+
2104
+ while (i < size && data[i] != '\n')
2105
+ if (data[i++] == '|')
2106
+ pipes++;
2107
+
2108
+ if (pipes == 0 || i == size) {
2109
+ i = row_start;
2110
+ break;
2111
+ }
2112
+
2113
+ parse_table_row(
2114
+ body_work,
2115
+ rndr,
2116
+ data + row_start,
2117
+ i - row_start,
2118
+ columns,
2119
+ col_data, 0
2120
+ );
2121
+
2122
+ i++;
2123
+ }
2124
+
2125
+ if (rndr->cb.table)
2126
+ rndr->cb.table(ob, header_work, body_work, rndr->opaque);
2127
+ }
2128
+
2129
+ free(col_data);
2130
+ rndr_popbuf(rndr, BUFFER_SPAN);
2131
+ rndr_popbuf(rndr, BUFFER_BLOCK);
2132
+ return i;
2133
+ }
2134
+
2135
+ /* parse_block • parsing of one block, returning next uint8_t to parse */
2136
+ static void
2137
+ parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
2138
+ {
2139
+ size_t beg, end, i;
2140
+ uint8_t *txt_data;
2141
+ beg = 0;
2142
+
2143
+ if (rndr->work_bufs[BUFFER_SPAN].size +
2144
+ rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
2145
+ return;
2146
+
2147
+ while (beg < size) {
2148
+ txt_data = data + beg;
2149
+ end = size - beg;
2150
+
2151
+ if (is_atxheader(rndr, txt_data, end))
2152
+ beg += parse_atxheader(ob, rndr, txt_data, end);
2153
+
2154
+ else if (data[beg] == '<' && rndr->cb.blockhtml &&
2155
+ (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
2156
+ beg += i;
2157
+
2158
+ else if ((i = is_empty(txt_data, end)) != 0)
2159
+ beg += i;
2160
+
2161
+ else if (is_hrule(txt_data, end)) {
2162
+ if (rndr->cb.hrule)
2163
+ rndr->cb.hrule(ob, rndr->opaque);
2164
+
2165
+ while (beg < size && data[beg] != '\n')
2166
+ beg++;
2167
+
2168
+ beg++;
2169
+ }
2170
+
2171
+ else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
2172
+ (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
2173
+ beg += i;
2174
+
2175
+ else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
2176
+ (i = parse_table(ob, rndr, txt_data, end)) != 0)
2177
+ beg += i;
2178
+
2179
+ else if (prefix_quote(txt_data, end))
2180
+ beg += parse_blockquote(ob, rndr, txt_data, end);
2181
+
2182
+ else if (prefix_code(txt_data, end))
2183
+ beg += parse_blockcode(ob, rndr, txt_data, end);
2184
+
2185
+ else if (prefix_uli(txt_data, end))
2186
+ beg += parse_list(ob, rndr, txt_data, end, 0);
2187
+
2188
+ else if (prefix_oli(txt_data, end))
2189
+ beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
2190
+
2191
+ else
2192
+ beg += parse_paragraph(ob, rndr, txt_data, end);
2193
+ }
2194
+ }
2195
+
2196
+
2197
+
2198
+ /*********************
2199
+ * REFERENCE PARSING *
2200
+ *********************/
2201
+
2202
+ /* is_ref • returns whether a line is a reference or not */
2203
+ static int
2204
+ is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
2205
+ {
2206
+ /* int n; */
2207
+ size_t i = 0;
2208
+ size_t id_offset, id_end;
2209
+ size_t link_offset, link_end;
2210
+ size_t title_offset, title_end;
2211
+ size_t line_end;
2212
+
2213
+ /* up to 3 optional leading spaces */
2214
+ if (beg + 3 >= end) return 0;
2215
+ if (data[beg] == ' ') { i = 1;
2216
+ if (data[beg + 1] == ' ') { i = 2;
2217
+ if (data[beg + 2] == ' ') { i = 3;
2218
+ if (data[beg + 3] == ' ') return 0; } } }
2219
+ i += beg;
2220
+
2221
+ /* id part: anything but a newline between brackets */
2222
+ if (data[i] != '[') return 0;
2223
+ i++;
2224
+ id_offset = i;
2225
+ while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
2226
+ i++;
2227
+ if (i >= end || data[i] != ']') return 0;
2228
+ id_end = i;
2229
+
2230
+ /* spacer: colon (space | tab)* newline? (space | tab)* */
2231
+ i++;
2232
+ if (i >= end || data[i] != ':') return 0;
2233
+ i++;
2234
+ while (i < end && data[i] == ' ') i++;
2235
+ if (i < end && (data[i] == '\n' || data[i] == '\r')) {
2236
+ i++;
2237
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
2238
+ while (i < end && data[i] == ' ') i++;
2239
+ if (i >= end) return 0;
2240
+
2241
+ /* link: whitespace-free sequence, optionally between angle brackets */
2242
+ if (data[i] == '<')
2243
+ i++;
2244
+
2245
+ link_offset = i;
2246
+
2247
+ while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
2248
+ i++;
2249
+
2250
+ if (data[i - 1] == '>') link_end = i - 1;
2251
+ else link_end = i;
2252
+
2253
+ /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
2254
+ while (i < end && data[i] == ' ') i++;
2255
+ if (i < end && data[i] != '\n' && data[i] != '\r'
2256
+ && data[i] != '\'' && data[i] != '"' && data[i] != '(')
2257
+ return 0;
2258
+ line_end = 0;
2259
+ /* computing end-of-line */
2260
+ if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
2261
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2262
+ line_end = i + 1;
2263
+
2264
+ /* optional (space|tab)* spacer after a newline */
2265
+ if (line_end) {
2266
+ i = line_end + 1;
2267
+ while (i < end && data[i] == ' ') i++; }
2268
+
2269
+ /* optional title: any non-newline sequence enclosed in '"()
2270
+ alone on its line */
2271
+ title_offset = title_end = 0;
2272
+ if (i + 1 < end
2273
+ && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
2274
+ i++;
2275
+ title_offset = i;
2276
+ /* looking for EOL */
2277
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
2278
+ if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
2279
+ title_end = i + 1;
2280
+ else title_end = i;
2281
+ /* stepping back */
2282
+ i -= 1;
2283
+ while (i > title_offset && data[i] == ' ')
2284
+ i -= 1;
2285
+ if (i > title_offset
2286
+ && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
2287
+ line_end = title_end;
2288
+ title_end = i; } }
2289
+
2290
+ if (!line_end || link_end == link_offset)
2291
+ return 0; /* garbage after the link empty link */
2292
+
2293
+ /* a valid ref has been found, filling-in return structures */
2294
+ if (last)
2295
+ *last = line_end;
2296
+
2297
+ if (refs) {
2298
+ struct link_ref *ref;
2299
+
2300
+ ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
2301
+ if (!ref)
2302
+ return 0;
2303
+
2304
+ ref->link = bufnew(link_end - link_offset);
2305
+ bufput(ref->link, data + link_offset, link_end - link_offset);
2306
+
2307
+ if (title_end > title_offset) {
2308
+ ref->title = bufnew(title_end - title_offset);
2309
+ bufput(ref->title, data + title_offset, title_end - title_offset);
2310
+ }
2311
+ }
2312
+
2313
+ return 1;
2314
+ }
2315
+
2316
+ static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
2317
+ {
2318
+ size_t i = 0, tab = 0;
2319
+
2320
+ while (i < size) {
2321
+ size_t org = i;
2322
+
2323
+ while (i < size && line[i] != '\t') {
2324
+ i++; tab++;
2325
+ }
2326
+
2327
+ if (i > org)
2328
+ bufput(ob, line + org, i - org);
2329
+
2330
+ if (i >= size)
2331
+ break;
2332
+
2333
+ do {
2334
+ bufputc(ob, ' '); tab++;
2335
+ } while (tab % 4);
2336
+
2337
+ i++;
2338
+ }
2339
+ }
2340
+
2341
+ /**********************
2342
+ * EXPORTED FUNCTIONS *
2343
+ **********************/
2344
+
2345
+ struct sd_markdown *
2346
+ sd_markdown_new(
2347
+ unsigned int extensions,
2348
+ size_t max_nesting,
2349
+ const struct sd_callbacks *callbacks,
2350
+ void *opaque)
2351
+ {
2352
+ struct sd_markdown *md = NULL;
2353
+
2354
+ assert(max_nesting > 0 && callbacks);
2355
+
2356
+ md = malloc(sizeof(struct sd_markdown));
2357
+ if (!md)
2358
+ return NULL;
2359
+
2360
+ memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
2361
+
2362
+ stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
2363
+ stack_init(&md->work_bufs[BUFFER_SPAN], 8);
2364
+
2365
+ memset(md->active_char, 0x0, 256);
2366
+
2367
+ if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
2368
+ md->active_char['*'] = MD_CHAR_EMPHASIS;
2369
+ md->active_char['_'] = MD_CHAR_EMPHASIS;
2370
+ if (extensions & MKDEXT_STRIKETHROUGH)
2371
+ md->active_char['~'] = MD_CHAR_EMPHASIS;
2372
+ }
2373
+
2374
+ if (md->cb.codespan)
2375
+ md->active_char['`'] = MD_CHAR_CODESPAN;
2376
+
2377
+ if (md->cb.linebreak)
2378
+ md->active_char['\n'] = MD_CHAR_LINEBREAK;
2379
+
2380
+ if (md->cb.image || md->cb.link)
2381
+ md->active_char['['] = MD_CHAR_LINK;
2382
+
2383
+ md->active_char['<'] = MD_CHAR_LANGLE;
2384
+ md->active_char['\\'] = MD_CHAR_ESCAPE;
2385
+ md->active_char['&'] = MD_CHAR_ENTITITY;
2386
+
2387
+ if (extensions & MKDEXT_AUTOLINK) {
2388
+ md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
2389
+ md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
2390
+ md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
2391
+ }
2392
+
2393
+ if (extensions & MKDEXT_SUPERSCRIPT)
2394
+ md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
2395
+
2396
+ /* Extension data */
2397
+ md->ext_flags = extensions;
2398
+ md->opaque = opaque;
2399
+ md->max_nesting = max_nesting;
2400
+ md->in_link_body = 0;
2401
+
2402
+ return md;
2403
+ }
2404
+
2405
+ void
2406
+ sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
2407
+ {
2408
+ #define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
2409
+ static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
2410
+
2411
+ struct buf *text;
2412
+ size_t beg, end;
2413
+
2414
+ text = bufnew(64);
2415
+ if (!text)
2416
+ return;
2417
+
2418
+ /* Preallocate enough space for our buffer to avoid expanding while copying */
2419
+ bufgrow(text, doc_size);
2420
+
2421
+ /* reset the references table */
2422
+ memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
2423
+
2424
+ /* first pass: looking for references, copying everything else */
2425
+ beg = 0;
2426
+
2427
+ /* Skip a possible UTF-8 BOM, even though the Unicode standard
2428
+ * discourages having these in UTF-8 documents */
2429
+ if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
2430
+ beg += 3;
2431
+
2432
+ while (beg < doc_size) /* iterating over lines */
2433
+ if (is_ref(document, beg, doc_size, &end, md->refs))
2434
+ beg = end;
2435
+ else { /* skipping to the next line */
2436
+ end = beg;
2437
+ while (end < doc_size && document[end] != '\n' && document[end] != '\r')
2438
+ end++;
2439
+
2440
+ /* adding the line body if present */
2441
+ if (end > beg)
2442
+ expand_tabs(text, document + beg, end - beg);
2443
+
2444
+ while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
2445
+ /* add one \n per newline */
2446
+ if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
2447
+ bufputc(text, '\n');
2448
+ end++;
2449
+ }
2450
+
2451
+ beg = end;
2452
+ }
2453
+
2454
+ /* pre-grow the output buffer to minimize allocations */
2455
+ bufgrow(ob, MARKDOWN_GROW(text->size));
2456
+
2457
+ /* second pass: actual rendering */
2458
+ if (md->cb.doc_header)
2459
+ md->cb.doc_header(ob, md->opaque);
2460
+
2461
+ if (text->size) {
2462
+ /* adding a final newline if not already present */
2463
+ if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
2464
+ bufputc(text, '\n');
2465
+
2466
+ parse_block(ob, md, text->data, text->size);
2467
+ }
2468
+
2469
+ if (md->cb.doc_footer)
2470
+ md->cb.doc_footer(ob, md->opaque);
2471
+
2472
+ /* clean-up */
2473
+ bufrelease(text);
2474
+ free_link_refs(md->refs);
2475
+
2476
+ assert(md->work_bufs[BUFFER_SPAN].size == 0);
2477
+ assert(md->work_bufs[BUFFER_BLOCK].size == 0);
2478
+ }
2479
+
2480
+ void
2481
+ sd_markdown_free(struct sd_markdown *md)
2482
+ {
2483
+ size_t i;
2484
+
2485
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
2486
+ bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
2487
+
2488
+ for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
2489
+ bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
2490
+
2491
+ stack_free(&md->work_bufs[BUFFER_SPAN]);
2492
+ stack_free(&md->work_bufs[BUFFER_BLOCK]);
2493
+
2494
+ free(md);
2495
+ }
2496
+
2497
+ void
2498
+ sd_version(int *ver_major, int *ver_minor, int *ver_revision)
2499
+ {
2500
+ *ver_major = UPSKIRT_VER_MAJOR;
2501
+ *ver_minor = UPSKIRT_VER_MINOR;
2502
+ *ver_revision = UPSKIRT_VER_REVISION;
2503
+ }
2504
+
2505
+ /* vim: set filetype=c: */