redcarpet 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of redcarpet might be problematic. Click here for more details.
- data/COPYING +14 -0
- data/README.markdown +38 -0
- data/Rakefile +135 -0
- data/bin/redcarpet +13 -0
- data/ext/array.c +300 -0
- data/ext/array.h +148 -0
- data/ext/buffer.c +318 -0
- data/ext/buffer.h +147 -0
- data/ext/extconf.rb +4 -0
- data/ext/markdown.c +1590 -0
- data/ext/markdown.h +124 -0
- data/ext/redcarpet.c +86 -0
- data/ext/render.c +499 -0
- data/lib/markdown.rb +1 -0
- data/lib/redcarpet.rb +72 -0
- data/redcarpet.gemspec +39 -0
- data/test/benchmark.rb +56 -0
- data/test/benchmark.txt +306 -0
- data/test/markdown_test.rb +176 -0
- data/test/redcarpet_test.rb +106 -0
- metadata +88 -0
data/ext/buffer.h
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
/* buffer.h - automatic buffer structure */
|
2
|
+
|
3
|
+
/*
|
4
|
+
* Copyright (c) 2008, Natacha Porté
|
5
|
+
*
|
6
|
+
* Permission to use, copy, modify, and distribute this software for any
|
7
|
+
* purpose with or without fee is hereby granted, provided that the above
|
8
|
+
* copyright notice and this permission notice appear in all copies.
|
9
|
+
*
|
10
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
11
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
12
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
13
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
14
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
15
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
16
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#ifndef LITHIUM_BUFFER_H
|
20
|
+
#define LITHIUM_BUFFER_H
|
21
|
+
|
22
|
+
#include <stddef.h>
|
23
|
+
|
24
|
+
|
25
|
+
/********************
|
26
|
+
* TYPE DEFINITIONS *
|
27
|
+
********************/
|
28
|
+
|
29
|
+
/* struct buf • character array buffer */
|
30
|
+
struct buf {
|
31
|
+
char * data; /* actual character data */
|
32
|
+
size_t size; /* size of the string */
|
33
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
34
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
35
|
+
int ref; }; /* reference count */
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
/**********
|
40
|
+
* MACROS *
|
41
|
+
**********/
|
42
|
+
|
43
|
+
/* CONST_BUF • global buffer from a string litteral */
|
44
|
+
#define CONST_BUF(name, string) \
|
45
|
+
static struct buf name = { string, sizeof string -1, sizeof string }
|
46
|
+
|
47
|
+
|
48
|
+
/* VOLATILE_BUF • macro for creating a volatile buffer on the stack */
|
49
|
+
#define VOLATILE_BUF(name, strname) \
|
50
|
+
struct buf name = { strname, strlen(strname) }
|
51
|
+
|
52
|
+
|
53
|
+
/* BUFPUTSL • optimized bufputs of a string litteral */
|
54
|
+
#define BUFPUTSL(output, litteral) \
|
55
|
+
bufput(output, litteral, sizeof litteral - 1)
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
/********************
|
60
|
+
* BUFFER FUNCTIONS *
|
61
|
+
********************/
|
62
|
+
|
63
|
+
/* bufcasecmp • case-insensitive buffer comparison */
|
64
|
+
int
|
65
|
+
bufcasecmp(const struct buf *, const struct buf *);
|
66
|
+
|
67
|
+
/* bufcmp • case-sensitive buffer comparison */
|
68
|
+
int
|
69
|
+
bufcmp(const struct buf *, const struct buf *);
|
70
|
+
|
71
|
+
/* bufcmps • case-sensitive comparison of a string to a buffer */
|
72
|
+
int
|
73
|
+
bufcmps(const struct buf *, const char *);
|
74
|
+
|
75
|
+
/* bufprefix * compare the beggining of a buffer with a string */
|
76
|
+
int
|
77
|
+
bufprefix(const struct buf *buf, const char *prefix);
|
78
|
+
|
79
|
+
/* bufdup • buffer duplication */
|
80
|
+
struct buf *
|
81
|
+
bufdup(const struct buf *, size_t)
|
82
|
+
__attribute__ ((malloc));
|
83
|
+
|
84
|
+
/* bufgrow • increasing the allocated size to the given value */
|
85
|
+
int
|
86
|
+
bufgrow(struct buf *, size_t);
|
87
|
+
|
88
|
+
/* bufnew • allocation of a new buffer */
|
89
|
+
struct buf *
|
90
|
+
bufnew(size_t)
|
91
|
+
__attribute__ ((malloc));
|
92
|
+
|
93
|
+
/* bufnullterm • NUL-termination of the string array (making a C-string) */
|
94
|
+
void
|
95
|
+
bufnullterm(struct buf *);
|
96
|
+
|
97
|
+
/* bufprintf • formatted printing to a buffer */
|
98
|
+
void
|
99
|
+
bufprintf(struct buf *, const char *, ...)
|
100
|
+
__attribute__ ((format (printf, 2, 3)));
|
101
|
+
|
102
|
+
/* bufput • appends raw data to a buffer */
|
103
|
+
void
|
104
|
+
bufput(struct buf *, const void*, size_t);
|
105
|
+
|
106
|
+
/* bufputs • appends a NUL-terminated string to a buffer */
|
107
|
+
void
|
108
|
+
bufputs(struct buf *, const char*);
|
109
|
+
|
110
|
+
/* bufputc • appends a single char to a buffer */
|
111
|
+
void
|
112
|
+
bufputc(struct buf *, char);
|
113
|
+
|
114
|
+
/* bufrelease • decrease the reference count and free the buffer if needed */
|
115
|
+
void
|
116
|
+
bufrelease(struct buf *);
|
117
|
+
|
118
|
+
/* bufreset • frees internal data of the buffer */
|
119
|
+
void
|
120
|
+
bufreset(struct buf *);
|
121
|
+
|
122
|
+
/* bufset • safely assigns a buffer to another */
|
123
|
+
void
|
124
|
+
bufset(struct buf **, struct buf *);
|
125
|
+
|
126
|
+
/* bufslurp • removes a given number of bytes from the head of the array */
|
127
|
+
void
|
128
|
+
bufslurp(struct buf *, size_t);
|
129
|
+
|
130
|
+
/* buftoi • converts the numbers at the beginning of the buf into an int */
|
131
|
+
int
|
132
|
+
buftoi(struct buf *, size_t, size_t *);
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
#ifdef BUFFER_STDARG
|
137
|
+
#include <stdarg.h>
|
138
|
+
|
139
|
+
/* vbufprintf • stdarg variant of formatted printing into a buffer */
|
140
|
+
void
|
141
|
+
vbufprintf(struct buf *, const char*, va_list);
|
142
|
+
|
143
|
+
#endif /* def BUFFER_STDARG */
|
144
|
+
|
145
|
+
#endif /* ndef LITHIUM_BUFFER_H */
|
146
|
+
|
147
|
+
/* vim: set filetype=c: */
|
data/ext/extconf.rb
ADDED
data/ext/markdown.c
ADDED
@@ -0,0 +1,1590 @@
|
|
1
|
+
/* markdown.c - generic markdown parser */
|
2
|
+
|
3
|
+
/*
|
4
|
+
* Copyright (c) 2009, Natacha Porté
|
5
|
+
*
|
6
|
+
* Permission to use, copy, modify, and distribute this software for any
|
7
|
+
* purpose with or without fee is hereby granted, provided that the above
|
8
|
+
* copyright notice and this permission notice appear in all copies.
|
9
|
+
*
|
10
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
11
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
12
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
13
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
14
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
15
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
16
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#include "markdown.h"
|
20
|
+
|
21
|
+
#include "array.h"
|
22
|
+
|
23
|
+
#include <assert.h>
|
24
|
+
#include <string.h>
|
25
|
+
#include <strings.h> /* for strncasecmp */
|
26
|
+
|
27
|
+
#define TEXT_UNIT 64 /* unit for the copy of the input buffer */
|
28
|
+
#define WORK_UNIT 64 /* block-level working buffer */
|
29
|
+
|
30
|
+
#define MKD_LI_END 8 /* internal list flag */
|
31
|
+
|
32
|
+
/***************
|
33
|
+
* LOCAL TYPES *
|
34
|
+
***************/
|
35
|
+
|
36
|
+
/* link_ref • reference to a link */
|
37
|
+
struct link_ref {
|
38
|
+
struct buf * id;
|
39
|
+
struct buf * link;
|
40
|
+
struct buf * title; };
|
41
|
+
|
42
|
+
|
43
|
+
/* char_trigger • function pointer to render active chars */
|
44
|
+
/* returns the number of chars taken care of */
|
45
|
+
/* data is the pointer of the beginning of the span */
|
46
|
+
/* offset is the number of valid chars before data */
|
47
|
+
struct render;
|
48
|
+
typedef size_t
|
49
|
+
(*char_trigger)(struct buf *ob, struct render *rndr,
|
50
|
+
char *data, size_t offset, size_t size);
|
51
|
+
|
52
|
+
|
53
|
+
/* render • structure containing one particular render */
|
54
|
+
struct render {
|
55
|
+
struct mkd_renderer make;
|
56
|
+
struct array refs;
|
57
|
+
char_trigger active_char[256];
|
58
|
+
struct parray work;
|
59
|
+
};
|
60
|
+
|
61
|
+
|
62
|
+
/* html_tag • structure for quick HTML tag search (inspired from discount) */
|
63
|
+
struct html_tag {
|
64
|
+
const char *text;
|
65
|
+
size_t size;
|
66
|
+
};
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
/********************
|
71
|
+
* GLOBAL VARIABLES *
|
72
|
+
********************/
|
73
|
+
|
74
|
+
/* block_tags • recognised block tags, sorted by cmp_html_tag */
|
75
|
+
static struct html_tag block_tags[] = {
|
76
|
+
/*0*/ { "p", 1 },
|
77
|
+
{ "dl", 2 },
|
78
|
+
{ "h1", 2 },
|
79
|
+
{ "h2", 2 },
|
80
|
+
{ "h3", 2 },
|
81
|
+
{ "h4", 2 },
|
82
|
+
{ "h5", 2 },
|
83
|
+
{ "h6", 2 },
|
84
|
+
{ "ol", 2 },
|
85
|
+
{ "ul", 2 },
|
86
|
+
/*10*/ { "del", 3 },
|
87
|
+
{ "div", 3 },
|
88
|
+
/*12*/ { "ins", 3 },
|
89
|
+
{ "pre", 3 },
|
90
|
+
{ "form", 4 },
|
91
|
+
{ "math", 4 },
|
92
|
+
{ "table", 5 },
|
93
|
+
{ "iframe", 6 },
|
94
|
+
{ "script", 6 },
|
95
|
+
{ "fieldset", 8 },
|
96
|
+
{ "noscript", 8 },
|
97
|
+
{ "blockquote", 10 } };
|
98
|
+
|
99
|
+
#define INS_TAG (block_tags + 12)
|
100
|
+
#define DEL_TAG (block_tags + 10)
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
/***************************
|
105
|
+
* STATIC HELPER FUNCTIONS *
|
106
|
+
***************************/
|
107
|
+
|
108
|
+
/* cmp_link_ref • comparison function for link_ref sorted arrays */
|
109
|
+
static int
|
110
|
+
cmp_link_ref(void *key, void *array_entry) {
|
111
|
+
struct link_ref *lr = array_entry;
|
112
|
+
return bufcasecmp(key, lr->id); }
|
113
|
+
|
114
|
+
|
115
|
+
/* cmp_link_ref_sort • comparison function for link_ref qsort */
|
116
|
+
static int
|
117
|
+
cmp_link_ref_sort(const void *a, const void *b) {
|
118
|
+
const struct link_ref *lra = a;
|
119
|
+
const struct link_ref *lrb = b;
|
120
|
+
return bufcasecmp(lra->id, lrb->id); }
|
121
|
+
|
122
|
+
|
123
|
+
/* cmp_html_tag • comparison function for bsearch() (stolen from discount) */
|
124
|
+
static int
|
125
|
+
cmp_html_tag(const void *a, const void *b) {
|
126
|
+
const struct html_tag *hta = a;
|
127
|
+
const struct html_tag *htb = b;
|
128
|
+
if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
|
129
|
+
return strncasecmp(hta->text, htb->text, hta->size); }
|
130
|
+
|
131
|
+
|
132
|
+
/* find_block_tag • returns the current block tag */
|
133
|
+
static struct html_tag *
|
134
|
+
find_block_tag(char *data, size_t size) {
|
135
|
+
size_t i = 0;
|
136
|
+
struct html_tag key;
|
137
|
+
|
138
|
+
/* looking for the word end */
|
139
|
+
while (i < size && ((data[i] >= '0' && data[i] <= '9')
|
140
|
+
|| (data[i] >= 'A' && data[i] <= 'Z')
|
141
|
+
|| (data[i] >= 'a' && data[i] <= 'z')))
|
142
|
+
i += 1;
|
143
|
+
if (i >= size) return 0;
|
144
|
+
|
145
|
+
/* binary search of the tag */
|
146
|
+
key.text = data;
|
147
|
+
key.size = i;
|
148
|
+
return bsearch(&key, block_tags,
|
149
|
+
sizeof block_tags / sizeof block_tags[0],
|
150
|
+
sizeof block_tags[0], cmp_html_tag); }
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
/****************************
|
155
|
+
* INLINE PARSING FUNCTIONS *
|
156
|
+
****************************/
|
157
|
+
|
158
|
+
/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
|
159
|
+
/* this is less strict than the original markdown e-mail address matching */
|
160
|
+
static size_t
|
161
|
+
is_mail_autolink(char *data, size_t size) {
|
162
|
+
size_t i = 0, nb = 0;
|
163
|
+
/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
|
164
|
+
while (i < size && (data[i] == '-' || data[i] == '.'
|
165
|
+
|| data[i] == '_' || data[i] == '@'
|
166
|
+
|| (data[i] >= 'a' && data[i] <= 'z')
|
167
|
+
|| (data[i] >= 'A' && data[i] <= 'Z')
|
168
|
+
|| (data[i] >= '0' && data[i] <= '9'))) {
|
169
|
+
if (data[i] == '@') nb += 1;
|
170
|
+
i += 1; }
|
171
|
+
if (i >= size || data[i] != '>' || nb != 1) return 0;
|
172
|
+
return i + 1; }
|
173
|
+
|
174
|
+
|
175
|
+
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
|
176
|
+
static size_t
|
177
|
+
tag_length(char *data, size_t size, enum mkd_autolink *autolink) {
|
178
|
+
size_t i, j;
|
179
|
+
|
180
|
+
/* a valid tag can't be shorter than 3 chars */
|
181
|
+
if (size < 3) return 0;
|
182
|
+
|
183
|
+
/* begins with a '<' optionally followed by '/', followed by letter */
|
184
|
+
if (data[0] != '<') return 0;
|
185
|
+
i = (data[1] == '/') ? 2 : 1;
|
186
|
+
if ((data[i] < 'a' || data[i] > 'z')
|
187
|
+
&& (data[i] < 'A' || data[i] > 'Z')) return 0;
|
188
|
+
|
189
|
+
/* scheme test */
|
190
|
+
*autolink = MKDA_NOT_AUTOLINK;
|
191
|
+
|
192
|
+
/* try to find the beggining of an URI */
|
193
|
+
while (i < size && (isalpha(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
|
194
|
+
i++;
|
195
|
+
|
196
|
+
if (i > 2 && data[i] == ':') {
|
197
|
+
*autolink = MKDA_NORMAL;
|
198
|
+
i++;
|
199
|
+
}
|
200
|
+
|
201
|
+
/*
|
202
|
+
* FIXME: check for double slashes after the URI id?
|
203
|
+
* There are some protocols that don't have them, e.g.
|
204
|
+
* news:resource
|
205
|
+
*/
|
206
|
+
|
207
|
+
/* completing autolink test: no whitespace or ' or " */
|
208
|
+
if (i >= size || i == '>')
|
209
|
+
*autolink = MKDA_NOT_AUTOLINK;
|
210
|
+
else if (*autolink) {
|
211
|
+
j = i;
|
212
|
+
while (i < size && data[i] != '>' && data[i] != '\''
|
213
|
+
&& data[i] != '"' && data[i] != ' ' && data[i] != '\t'
|
214
|
+
&& data[i] != '\t')
|
215
|
+
i += 1;
|
216
|
+
if (i >= size) return 0;
|
217
|
+
if (i > j && data[i] == '>') return i + 1;
|
218
|
+
/* one of the forbidden chars has been found */
|
219
|
+
*autolink = MKDA_NOT_AUTOLINK; }
|
220
|
+
else if ((j = is_mail_autolink(data + i, size - i)) != 0) {
|
221
|
+
*autolink = (i == 8)
|
222
|
+
? MKDA_EXPLICIT_EMAIL : MKDA_IMPLICIT_EMAIL;
|
223
|
+
return i + j; }
|
224
|
+
|
225
|
+
/* looking for sometinhg looking like a tag end */
|
226
|
+
while (i < size && data[i] != '>') i += 1;
|
227
|
+
if (i >= size) return 0;
|
228
|
+
return i + 1; }
|
229
|
+
|
230
|
+
|
231
|
+
/* parse_inline • parses inline markdown elements */
|
232
|
+
static void
|
233
|
+
parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size) {
|
234
|
+
size_t i = 0, end = 0;
|
235
|
+
char_trigger action = 0;
|
236
|
+
struct buf work = { 0, 0, 0, 0, 0 };
|
237
|
+
|
238
|
+
while (i < size) {
|
239
|
+
/* copying inactive chars into the output */
|
240
|
+
while (end < size
|
241
|
+
&& (action = rndr->active_char[(unsigned char)data[end]]) == 0)
|
242
|
+
end += 1;
|
243
|
+
if (rndr->make.normal_text) {
|
244
|
+
work.data = data + i;
|
245
|
+
work.size = end - i;
|
246
|
+
rndr->make.normal_text(ob, &work, &rndr->make.render_options);
|
247
|
+
}
|
248
|
+
else
|
249
|
+
bufput(ob, data + i, end - i);
|
250
|
+
if (end >= size) break;
|
251
|
+
i = end;
|
252
|
+
|
253
|
+
/* calling the trigger */
|
254
|
+
end = action(ob, rndr, data + i, i, size - i);
|
255
|
+
if (!end) /* no action from the callback */
|
256
|
+
end = i + 1;
|
257
|
+
else {
|
258
|
+
i += end;
|
259
|
+
end = i; } } }
|
260
|
+
|
261
|
+
|
262
|
+
/* find_emph_char • looks for the next emph char, skipping other constructs */
|
263
|
+
static size_t
|
264
|
+
find_emph_char(char *data, size_t size, char c) {
|
265
|
+
size_t i = 1;
|
266
|
+
|
267
|
+
while (i < size) {
|
268
|
+
while (i < size && data[i] != c
|
269
|
+
&& data[i] != '`' && data[i] != '[')
|
270
|
+
i += 1;
|
271
|
+
if (data[i] == c) return i;
|
272
|
+
|
273
|
+
/* not counting escaped chars */
|
274
|
+
if (i && data[i - 1] == '\\') { i += 1; continue; }
|
275
|
+
|
276
|
+
/* skipping a code span */
|
277
|
+
if (data[i] == '`') {
|
278
|
+
size_t tmp_i = 0;
|
279
|
+
i += 1;
|
280
|
+
while (i < size && data[i] != '`') {
|
281
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
282
|
+
i += 1; }
|
283
|
+
if (i >= size) return tmp_i;
|
284
|
+
i += 1; }
|
285
|
+
|
286
|
+
/* skipping a link */
|
287
|
+
else if (data[i] == '[') {
|
288
|
+
size_t tmp_i = 0;
|
289
|
+
char cc;
|
290
|
+
i += 1;
|
291
|
+
while (i < size && data[i] != ']') {
|
292
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
293
|
+
i += 1; }
|
294
|
+
i += 1;
|
295
|
+
while (i < size && (data[i] == ' '
|
296
|
+
|| data[i] == '\t' || data[i] == '\n'))
|
297
|
+
i += 1;
|
298
|
+
if (i >= size) return tmp_i;
|
299
|
+
if (data[i] != '[' && data[i] != '(') { /* not a link*/
|
300
|
+
if (tmp_i) return tmp_i;
|
301
|
+
else continue; }
|
302
|
+
cc = data[i];
|
303
|
+
i += 1;
|
304
|
+
while (i < size && data[i] != cc) {
|
305
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
306
|
+
i += 1; }
|
307
|
+
if (i >= size) return tmp_i;
|
308
|
+
i += 1; } }
|
309
|
+
return 0; }
|
310
|
+
|
311
|
+
|
312
|
+
/* parse_emph1 • parsing single emphase */
|
313
|
+
/* closed by a symbol not preceded by whitespace and not followed by symbol */
|
314
|
+
static size_t
|
315
|
+
parse_emph1(struct buf *ob, struct render *rndr,
|
316
|
+
char *data, size_t size, char c) {
|
317
|
+
size_t i = 0, len;
|
318
|
+
struct buf *work = 0;
|
319
|
+
int r;
|
320
|
+
|
321
|
+
if (!rndr->make.emphasis) return 0;
|
322
|
+
|
323
|
+
/* skipping one symbol if coming from emph3 */
|
324
|
+
if (size > 1 && data[0] == c && data[1] == c) i = 1;
|
325
|
+
|
326
|
+
while (i < size) {
|
327
|
+
len = find_emph_char(data + i, size - i, c);
|
328
|
+
if (!len) return 0;
|
329
|
+
i += len;
|
330
|
+
if (i >= size) return 0;
|
331
|
+
|
332
|
+
if (i + 1 < size && data[i + 1] == c) {
|
333
|
+
i += 1;
|
334
|
+
continue;
|
335
|
+
}
|
336
|
+
|
337
|
+
if (data[i] == c && !isspace(data[i - 1])) {
|
338
|
+
|
339
|
+
if ((rndr->make.parser_options.flags & PARSER_STRICT) == 0) {
|
340
|
+
if (!(i + 1 == size || isspace(data[i + 1]) || ispunct(data[i + 1])))
|
341
|
+
continue;
|
342
|
+
}
|
343
|
+
|
344
|
+
if (rndr->work.size < rndr->work.asize) {
|
345
|
+
work = rndr->work.item[rndr->work.size ++];
|
346
|
+
work->size = 0;
|
347
|
+
} else {
|
348
|
+
work = bufnew(WORK_UNIT);
|
349
|
+
parr_push(&rndr->work, work);
|
350
|
+
}
|
351
|
+
|
352
|
+
parse_inline(work, rndr, data, i);
|
353
|
+
r = rndr->make.emphasis(ob, work, c, &rndr->make.render_options);
|
354
|
+
rndr->work.size -= 1;
|
355
|
+
return r ? i + 1 : 0;
|
356
|
+
}
|
357
|
+
}
|
358
|
+
|
359
|
+
return 0;
|
360
|
+
}
|
361
|
+
|
362
|
+
|
363
|
+
/* parse_emph2 • parsing single emphase */
|
364
|
+
static size_t
|
365
|
+
parse_emph2(struct buf *ob, struct render *rndr,
|
366
|
+
char *data, size_t size, char c) {
|
367
|
+
size_t i = 0, len;
|
368
|
+
struct buf *work = 0;
|
369
|
+
int r;
|
370
|
+
|
371
|
+
if (!rndr->make.double_emphasis) return 0;
|
372
|
+
|
373
|
+
while (i < size) {
|
374
|
+
len = find_emph_char(data + i, size - i, c);
|
375
|
+
if (!len) return 0;
|
376
|
+
i += len;
|
377
|
+
if (i + 1 < size && data[i] == c && data[i + 1] == c
|
378
|
+
&& i && data[i - 1] != ' '
|
379
|
+
&& data[i - 1] != '\t' && data[i - 1] != '\n') {
|
380
|
+
if (rndr->work.size < rndr->work.asize) {
|
381
|
+
work = rndr->work.item[rndr->work.size ++];
|
382
|
+
work->size = 0; }
|
383
|
+
else {
|
384
|
+
work = bufnew(WORK_UNIT);
|
385
|
+
parr_push(&rndr->work, work); }
|
386
|
+
parse_inline(work, rndr, data, i);
|
387
|
+
r = rndr->make.double_emphasis(ob, work, c,
|
388
|
+
&rndr->make.render_options);
|
389
|
+
rndr->work.size -= 1;
|
390
|
+
return r ? i + 2 : 0; }
|
391
|
+
i += 1; }
|
392
|
+
return 0; }
|
393
|
+
|
394
|
+
|
395
|
+
/* parse_emph3 • parsing single emphase */
|
396
|
+
/* finds the first closing tag, and delegates to the other emph */
|
397
|
+
static size_t
|
398
|
+
parse_emph3(struct buf *ob, struct render *rndr,
|
399
|
+
char *data, size_t size, char c) {
|
400
|
+
size_t i = 0, len;
|
401
|
+
int r;
|
402
|
+
|
403
|
+
while (i < size) {
|
404
|
+
len = find_emph_char(data + i, size - i, c);
|
405
|
+
if (!len) return 0;
|
406
|
+
i += len;
|
407
|
+
|
408
|
+
/* skip whitespace preceded symbols */
|
409
|
+
if (data[i] != c || data[i - 1] == ' '
|
410
|
+
|| data[i - 1] == '\t' || data[i - 1] == '\n')
|
411
|
+
continue;
|
412
|
+
|
413
|
+
if (i + 2 < size && data[i + 1] == c && data[i + 2] == c
|
414
|
+
&& rndr->make.triple_emphasis) {
|
415
|
+
/* triple symbol found */
|
416
|
+
struct buf *work = 0;
|
417
|
+
if (rndr->work.size < rndr->work.asize) {
|
418
|
+
work = rndr->work.item[rndr->work.size ++];
|
419
|
+
work->size = 0; }
|
420
|
+
else {
|
421
|
+
work = bufnew(WORK_UNIT);
|
422
|
+
parr_push(&rndr->work, work); }
|
423
|
+
parse_inline(work, rndr, data, i);
|
424
|
+
r = rndr->make.triple_emphasis(ob, work, c,
|
425
|
+
&rndr->make.render_options);
|
426
|
+
rndr->work.size -= 1;
|
427
|
+
return r ? i + 3 : 0; }
|
428
|
+
else if (i + 1 < size && data[i + 1] == c) {
|
429
|
+
/* double symbol found, handing over to emph1 */
|
430
|
+
len = parse_emph1(ob, rndr, data - 2, size + 2, c);
|
431
|
+
if (!len) return 0;
|
432
|
+
else return len - 2; }
|
433
|
+
else {
|
434
|
+
/* single symbol found, handing over to emph2 */
|
435
|
+
len = parse_emph2(ob, rndr, data - 1, size + 1, c);
|
436
|
+
if (!len) return 0;
|
437
|
+
else return len - 1; } }
|
438
|
+
return 0; }
|
439
|
+
|
440
|
+
|
441
|
+
/* char_emphasis • single and double emphasis parsing */
|
442
|
+
static size_t
|
443
|
+
char_emphasis(struct buf *ob, struct render *rndr,
|
444
|
+
char *data, size_t offset, size_t size) {
|
445
|
+
char c = data[0];
|
446
|
+
size_t ret;
|
447
|
+
if (size > 2 && data[1] != c) {
|
448
|
+
/* whitespace cannot follow an opening emphasis */
|
449
|
+
if (data[1] == ' ' || data[1] == '\t' || data[1] == '\n'
|
450
|
+
|| (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
|
451
|
+
return 0;
|
452
|
+
return ret + 1; }
|
453
|
+
if (size > 3 && data[1] == c && data[2] != c) {
|
454
|
+
if (data[2] == ' ' || data[2] == '\t' || data[2] == '\n'
|
455
|
+
|| (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
|
456
|
+
return 0;
|
457
|
+
return ret + 2; }
|
458
|
+
if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
|
459
|
+
if (data[3] == ' ' || data[3] == '\t' || data[3] == '\n'
|
460
|
+
|| (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
|
461
|
+
return 0;
|
462
|
+
return ret + 3; }
|
463
|
+
return 0; }
|
464
|
+
|
465
|
+
|
466
|
+
/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
|
467
|
+
static size_t
|
468
|
+
char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) {
|
469
|
+
|
470
|
+
if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
|
471
|
+
return 0;
|
472
|
+
|
473
|
+
/* removing the last space from ob and rendering */
|
474
|
+
while (ob->size && ob->data[ob->size - 1] == ' ')
|
475
|
+
ob->size--;
|
476
|
+
|
477
|
+
return rndr->make.linebreak(ob, &rndr->make.render_options) ? 1 : 0;
|
478
|
+
}
|
479
|
+
|
480
|
+
|
481
|
+
/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
|
482
|
+
static size_t
|
483
|
+
char_codespan(struct buf *ob, struct render *rndr,
|
484
|
+
char *data, size_t offset, size_t size) {
|
485
|
+
size_t end, nb = 0, i, f_begin, f_end;
|
486
|
+
|
487
|
+
/* counting the number of backticks in the delimiter */
|
488
|
+
while (nb < size && data[nb] == '`') nb += 1;
|
489
|
+
|
490
|
+
/* finding the next delimiter */
|
491
|
+
i = 0;
|
492
|
+
for (end = nb; end < size && i < nb; end += 1)
|
493
|
+
if (data[end] == '`') i += 1;
|
494
|
+
else i = 0;
|
495
|
+
if (i < nb && end >= size) return 0; /* no matching delimiter */
|
496
|
+
|
497
|
+
/* trimming outside whitespaces */
|
498
|
+
f_begin = nb;
|
499
|
+
while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\t'))
|
500
|
+
f_begin += 1;
|
501
|
+
f_end = end - nb;
|
502
|
+
while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\t'))
|
503
|
+
f_end -= 1;
|
504
|
+
|
505
|
+
/* real code span */
|
506
|
+
if (f_begin < f_end) {
|
507
|
+
struct buf work = { data + f_begin, f_end - f_begin, 0, 0, 0 };
|
508
|
+
if (!rndr->make.codespan(ob, &work, &rndr->make.render_options))
|
509
|
+
end = 0; }
|
510
|
+
else {
|
511
|
+
if (!rndr->make.codespan(ob, 0, &rndr->make.render_options))
|
512
|
+
end = 0; }
|
513
|
+
return end; }
|
514
|
+
|
515
|
+
|
516
|
+
/* char_escape • '\\' backslash escape */
|
517
|
+
static size_t
|
518
|
+
char_escape(struct buf *ob, struct render *rndr,
|
519
|
+
char *data, size_t offset, size_t size) {
|
520
|
+
struct buf work = { 0, 0, 0, 0, 0 };
|
521
|
+
if (size > 1) {
|
522
|
+
if (rndr->make.normal_text) {
|
523
|
+
work.data = data + 1;
|
524
|
+
work.size = 1;
|
525
|
+
rndr->make.normal_text(ob, &work, &rndr->make.render_options); }
|
526
|
+
else bufputc(ob, data[1]); }
|
527
|
+
return 2; }
|
528
|
+
|
529
|
+
|
530
|
+
/* char_entity • '&' escaped when it doesn't belong to an entity */
|
531
|
+
/* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
|
532
|
+
static size_t
|
533
|
+
char_entity(struct buf *ob, struct render *rndr,
|
534
|
+
char *data, size_t offset, size_t size) {
|
535
|
+
size_t end = 1;
|
536
|
+
struct buf work;
|
537
|
+
if (end < size && data[end] == '#') end += 1;
|
538
|
+
while (end < size
|
539
|
+
&& ((data[end] >= '0' && data[end] <= '9')
|
540
|
+
|| (data[end] >= 'a' && data[end] <= 'z')
|
541
|
+
|| (data[end] >= 'A' && data[end] <= 'Z')))
|
542
|
+
end += 1;
|
543
|
+
if (end < size && data[end] == ';') {
|
544
|
+
/* real entity */
|
545
|
+
end += 1; }
|
546
|
+
else {
|
547
|
+
/* lone '&' */
|
548
|
+
return 0; }
|
549
|
+
if (rndr->make.entity) {
|
550
|
+
work.data = data;
|
551
|
+
work.size = end;
|
552
|
+
rndr->make.entity(ob, &work, &rndr->make.render_options); }
|
553
|
+
else bufput(ob, data, end);
|
554
|
+
return end; }
|
555
|
+
|
556
|
+
|
557
|
+
/* char_langle_tag • '<' when tags or autolinks are allowed */
|
558
|
+
static size_t
|
559
|
+
char_langle_tag(struct buf *ob, struct render *rndr,
|
560
|
+
char *data, size_t offset, size_t size) {
|
561
|
+
enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
|
562
|
+
size_t end = tag_length(data, size, &altype);
|
563
|
+
struct buf work = { data, end, 0, 0, 0 };
|
564
|
+
int ret = 0;
|
565
|
+
if (end) {
|
566
|
+
if (rndr->make.autolink && altype != MKDA_NOT_AUTOLINK) {
|
567
|
+
work.data = data + 1;
|
568
|
+
work.size = end - 2;
|
569
|
+
ret = rndr->make.autolink(ob, &work, altype,
|
570
|
+
&rndr->make.render_options); }
|
571
|
+
else if (rndr->make.raw_html_tag)
|
572
|
+
ret = rndr->make.raw_html_tag(ob, &work,
|
573
|
+
&rndr->make.render_options); }
|
574
|
+
if (!ret) return 0;
|
575
|
+
else return end; }
|
576
|
+
|
577
|
+
|
578
|
+
/* char_link • '[': parsing a link or an image */
|
579
|
+
static size_t
|
580
|
+
char_link(struct buf *ob, struct render *rndr,
|
581
|
+
char *data, size_t offset, size_t size) {
|
582
|
+
int is_img = (offset && data[-1] == '!'), level;
|
583
|
+
size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
|
584
|
+
struct buf *content = 0;
|
585
|
+
struct buf *link = 0;
|
586
|
+
struct buf *title = 0;
|
587
|
+
size_t org_work_size = rndr->work.size;
|
588
|
+
int text_has_nl = 0, ret;
|
589
|
+
|
590
|
+
/* checking whether the correct renderer exists */
|
591
|
+
if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
|
592
|
+
return 0;
|
593
|
+
|
594
|
+
/* looking for the matching closing bracket */
|
595
|
+
for (level = 1; i < size; i += 1)
|
596
|
+
if (data[i] == '\n') text_has_nl = 1;
|
597
|
+
else if (data[i - 1] == '\\') continue;
|
598
|
+
else if (data[i] == '[') level += 1;
|
599
|
+
else if (data[i] == ']') {
|
600
|
+
level -= 1;
|
601
|
+
if (level <= 0) break; }
|
602
|
+
if (i >= size) return 0;
|
603
|
+
txt_e = i;
|
604
|
+
i += 1;
|
605
|
+
|
606
|
+
/* skip any amount of whitespace or newline */
|
607
|
+
/* (this is much more laxist than original markdown syntax) */
|
608
|
+
while (i < size
|
609
|
+
&& (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
|
610
|
+
i += 1;
|
611
|
+
|
612
|
+
/* inline style link */
|
613
|
+
if (i < size && data[i] == '(') {
|
614
|
+
/* skipping initial whitespace */
|
615
|
+
i += 1;
|
616
|
+
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
|
617
|
+
link_b = i;
|
618
|
+
|
619
|
+
/* looking for link end: ' " ) */
|
620
|
+
while (i < size
|
621
|
+
&& data[i] != '\'' && data[i] != '"' && data[i] != ')')
|
622
|
+
i += 1;
|
623
|
+
if (i >= size) return 0;
|
624
|
+
link_e = i;
|
625
|
+
|
626
|
+
/* looking for title end if present */
|
627
|
+
if (data[i] == '\'' || data[i] == '"') {
|
628
|
+
i += 1;
|
629
|
+
title_b = i;
|
630
|
+
while (i < size && data[i] != ')')
|
631
|
+
i += 1;
|
632
|
+
if (i >= size) return 0;
|
633
|
+
|
634
|
+
/* skipping whitespaces after title */
|
635
|
+
title_e = i - 1;
|
636
|
+
while (title_e > title_b && (data[title_e] == ' '
|
637
|
+
|| data[title_e] == '\t' || data[title_e] == '\n'))
|
638
|
+
title_e -= 1;
|
639
|
+
|
640
|
+
/* checking for closing quote presence */
|
641
|
+
if (data[title_e] != '\'' && data[title_e] != '"') {
|
642
|
+
title_b = title_e = 0;
|
643
|
+
link_e = i; } }
|
644
|
+
|
645
|
+
/* remove whitespace at the end of the link */
|
646
|
+
while (link_e > link_b
|
647
|
+
&& (data[link_e - 1] == ' ' || data[link_e - 1] == '\t'))
|
648
|
+
link_e -= 1;
|
649
|
+
|
650
|
+
/* remove optional angle brackets around the link */
|
651
|
+
if (data[link_b] == '<') link_b += 1;
|
652
|
+
if (data[link_e - 1] == '>') link_e -= 1;
|
653
|
+
|
654
|
+
/* building escaped link and title */
|
655
|
+
if (link_e > link_b) {
|
656
|
+
if (rndr->work.size < rndr->work.asize) {
|
657
|
+
link = rndr->work.item[rndr->work.size ++];
|
658
|
+
link->size = 0; }
|
659
|
+
else {
|
660
|
+
link = bufnew(WORK_UNIT);
|
661
|
+
parr_push(&rndr->work, link); }
|
662
|
+
bufput(link, data + link_b, link_e - link_b); }
|
663
|
+
if (title_e > title_b) {
|
664
|
+
if (rndr->work.size < rndr->work.asize) {
|
665
|
+
title = rndr->work.item[rndr->work.size ++];
|
666
|
+
title->size = 0; }
|
667
|
+
else {
|
668
|
+
title = bufnew(WORK_UNIT);
|
669
|
+
parr_push(&rndr->work, title); }
|
670
|
+
bufput(title, data + title_b, title_e - title_b);}
|
671
|
+
|
672
|
+
i += 1; }
|
673
|
+
|
674
|
+
/* reference style link */
|
675
|
+
else if (i < size && data[i] == '[') {
|
676
|
+
struct buf id = { 0, 0, 0, 0, 0 };
|
677
|
+
struct link_ref *lr;
|
678
|
+
|
679
|
+
/* looking for the id */
|
680
|
+
i += 1;
|
681
|
+
link_b = i;
|
682
|
+
while (i < size && data[i] != ']') i += 1;
|
683
|
+
if (i >= size) return 0;
|
684
|
+
link_e = i;
|
685
|
+
|
686
|
+
/* finding the link_ref */
|
687
|
+
if (link_b == link_e) {
|
688
|
+
if (text_has_nl) {
|
689
|
+
struct buf *b = 0;
|
690
|
+
size_t j;
|
691
|
+
if (rndr->work.size < rndr->work.asize) {
|
692
|
+
b = rndr->work.item[rndr->work.size ++];
|
693
|
+
b->size = 0; }
|
694
|
+
else {
|
695
|
+
b = bufnew(WORK_UNIT);
|
696
|
+
parr_push(&rndr->work, b); }
|
697
|
+
for (j = 1; j < txt_e; j += 1)
|
698
|
+
if (data[j] != '\n')
|
699
|
+
bufputc(b, data[j]);
|
700
|
+
else if (data[j - 1] != ' ')
|
701
|
+
bufputc(b, ' ');
|
702
|
+
id.data = b->data;
|
703
|
+
id.size = b->size; }
|
704
|
+
else {
|
705
|
+
id.data = data + 1;
|
706
|
+
id.size = txt_e - 1; } }
|
707
|
+
else {
|
708
|
+
id.data = data + link_b;
|
709
|
+
id.size = link_e - link_b; }
|
710
|
+
lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
|
711
|
+
if (!lr) return 0;
|
712
|
+
|
713
|
+
/* keeping link and title from link_ref */
|
714
|
+
link = lr->link;
|
715
|
+
title = lr->title;
|
716
|
+
i += 1; }
|
717
|
+
|
718
|
+
/* shortcut reference style link */
|
719
|
+
else {
|
720
|
+
struct buf id = { 0, 0, 0, 0, 0 };
|
721
|
+
struct link_ref *lr;
|
722
|
+
|
723
|
+
/* crafting the id */
|
724
|
+
if (text_has_nl) {
|
725
|
+
struct buf *b = 0;
|
726
|
+
size_t j;
|
727
|
+
if (rndr->work.size < rndr->work.asize) {
|
728
|
+
b = rndr->work.item[rndr->work.size ++];
|
729
|
+
b->size = 0; }
|
730
|
+
else {
|
731
|
+
b = bufnew(WORK_UNIT);
|
732
|
+
parr_push(&rndr->work, b); }
|
733
|
+
for (j = 1; j < txt_e; j += 1)
|
734
|
+
if (data[j] != '\n')
|
735
|
+
bufputc(b, data[j]);
|
736
|
+
else if (data[j - 1] != ' ')
|
737
|
+
bufputc(b, ' ');
|
738
|
+
id.data = b->data;
|
739
|
+
id.size = b->size; }
|
740
|
+
else {
|
741
|
+
id.data = data + 1;
|
742
|
+
id.size = txt_e - 1; }
|
743
|
+
|
744
|
+
/* finding the link_ref */
|
745
|
+
lr = arr_sorted_find(&rndr->refs, &id, cmp_link_ref);
|
746
|
+
if (!lr) return 0;
|
747
|
+
|
748
|
+
/* keeping link and title from link_ref */
|
749
|
+
link = lr->link;
|
750
|
+
title = lr->title;
|
751
|
+
|
752
|
+
/* rewinding the whitespace */
|
753
|
+
i = txt_e + 1; }
|
754
|
+
|
755
|
+
/* building content: img alt is escaped, link content is parsed */
|
756
|
+
if (txt_e > 1) {
|
757
|
+
if (rndr->work.size < rndr->work.asize) {
|
758
|
+
content = rndr->work.item[rndr->work.size ++];
|
759
|
+
content->size = 0; }
|
760
|
+
else {
|
761
|
+
content = bufnew(WORK_UNIT);
|
762
|
+
parr_push(&rndr->work, content); }
|
763
|
+
if (is_img) bufput(content, data + 1, txt_e - 1);
|
764
|
+
else parse_inline(content, rndr, data + 1, txt_e - 1); }
|
765
|
+
|
766
|
+
/* calling the relevant rendering function */
|
767
|
+
ret = 0;
|
768
|
+
if (is_img) {
|
769
|
+
if (ob->size && ob->data[ob->size - 1] == '!') ob->size -= 1;
|
770
|
+
ret = rndr->make.image(ob, link, title, content,
|
771
|
+
&rndr->make.render_options); }
|
772
|
+
else ret = rndr->make.link(ob, link, title, content, &rndr->make.render_options);
|
773
|
+
|
774
|
+
/* cleanup */
|
775
|
+
rndr->work.size = (int)org_work_size;
|
776
|
+
return ret ? i : 0;
|
777
|
+
}
|
778
|
+
|
779
|
+
|
780
|
+
|
781
|
+
/*********************************
|
782
|
+
* BLOCK-LEVEL PARSING FUNCTIONS *
|
783
|
+
*********************************/
|
784
|
+
|
785
|
+
/* is_empty • returns the line length when it is empty, 0 otherwise */
|
786
|
+
static size_t
|
787
|
+
is_empty(char *data, size_t size) {
|
788
|
+
size_t i;
|
789
|
+
for (i = 0; i < size && data[i] != '\n'; i += 1)
|
790
|
+
if (data[i] != ' ' && data[i] != '\t') return 0;
|
791
|
+
return i + 1; }
|
792
|
+
|
793
|
+
|
794
|
+
/* is_hrule • returns whether a line is a horizontal rule */
|
795
|
+
static int
|
796
|
+
is_hrule(char *data, size_t size) {
|
797
|
+
size_t i = 0, n = 0;
|
798
|
+
char c;
|
799
|
+
|
800
|
+
/* skipping initial spaces */
|
801
|
+
if (size < 3) return 0;
|
802
|
+
if (data[0] == ' ') { i += 1;
|
803
|
+
if (data[1] == ' ') { i += 1;
|
804
|
+
if (data[2] == ' ') { i += 1; } } }
|
805
|
+
|
806
|
+
/* looking at the hrule char */
|
807
|
+
if (i + 2 >= size
|
808
|
+
|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
|
809
|
+
return 0;
|
810
|
+
c = data[i];
|
811
|
+
|
812
|
+
/* the whole line must be the char or whitespace */
|
813
|
+
while (i < size && data[i] != '\n') {
|
814
|
+
if (data[i] == c) n += 1;
|
815
|
+
else if (data[i] != ' ' && data[i] != '\t')
|
816
|
+
return 0;
|
817
|
+
i += 1; }
|
818
|
+
|
819
|
+
return n >= 3; }
|
820
|
+
|
821
|
+
|
822
|
+
/* is_headerline • returns whether the line is a setext-style hdr underline */
|
823
|
+
static int
|
824
|
+
is_headerline(char *data, size_t size) {
|
825
|
+
size_t i = 0;
|
826
|
+
|
827
|
+
/* test of level 1 header */
|
828
|
+
if (data[i] == '=') {
|
829
|
+
for (i = 1; i < size && data[i] == '='; i += 1);
|
830
|
+
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
|
831
|
+
return (i >= size || data[i] == '\n') ? 1 : 0; }
|
832
|
+
|
833
|
+
/* test of level 2 header */
|
834
|
+
if (data[i] == '-') {
|
835
|
+
for (i = 1; i < size && data[i] == '-'; i += 1);
|
836
|
+
while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
|
837
|
+
return (i >= size || data[i] == '\n') ? 2 : 0; }
|
838
|
+
|
839
|
+
return 0; }
|
840
|
+
|
841
|
+
|
842
|
+
/* prefix_quote • returns blockquote prefix length */
|
843
|
+
static size_t
|
844
|
+
prefix_quote(char *data, size_t size) {
|
845
|
+
size_t i = 0;
|
846
|
+
if (i < size && data[i] == ' ') i += 1;
|
847
|
+
if (i < size && data[i] == ' ') i += 1;
|
848
|
+
if (i < size && data[i] == ' ') i += 1;
|
849
|
+
if (i < size && data[i] == '>') {
|
850
|
+
if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
|
851
|
+
return i + 2;
|
852
|
+
else return i + 1; }
|
853
|
+
else return 0; }
|
854
|
+
|
855
|
+
|
856
|
+
/* prefix_code • returns prefix length for block code*/
|
857
|
+
static size_t
|
858
|
+
prefix_code(char *data, size_t size) {
|
859
|
+
if (size > 0 && data[0] == '\t') return 1;
|
860
|
+
if (size > 3 && data[0] == ' ' && data[1] == ' '
|
861
|
+
&& data[2] == ' ' && data[3] == ' ') return 4;
|
862
|
+
return 0; }
|
863
|
+
|
864
|
+
/* prefix_oli • returns ordered list item prefix */
|
865
|
+
static size_t
|
866
|
+
prefix_oli(char *data, size_t size) {
|
867
|
+
size_t i = 0;
|
868
|
+
if (i < size && data[i] == ' ') i += 1;
|
869
|
+
if (i < size && data[i] == ' ') i += 1;
|
870
|
+
if (i < size && data[i] == ' ') i += 1;
|
871
|
+
if (i >= size || data[i] < '0' || data[i] > '9') return 0;
|
872
|
+
while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
|
873
|
+
if (i + 1 >= size || data[i] != '.'
|
874
|
+
|| (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
|
875
|
+
return i + 2; }
|
876
|
+
|
877
|
+
|
878
|
+
/* prefix_uli • returns ordered list item prefix */
|
879
|
+
static size_t
|
880
|
+
prefix_uli(char *data, size_t size) {
|
881
|
+
size_t i = 0;
|
882
|
+
if (i < size && data[i] == ' ') i += 1;
|
883
|
+
if (i < size && data[i] == ' ') i += 1;
|
884
|
+
if (i < size && data[i] == ' ') i += 1;
|
885
|
+
if (i + 1 >= size
|
886
|
+
|| (data[i] != '*' && data[i] != '+' && data[i] != '-')
|
887
|
+
|| (data[i + 1] != ' ' && data[i + 1] != '\t'))
|
888
|
+
return 0;
|
889
|
+
return i + 2; }
|
890
|
+
|
891
|
+
|
892
|
+
/* parse_block • parsing of one block, returning next char to parse */
|
893
|
+
static void parse_block(struct buf *ob, struct render *rndr,
|
894
|
+
char *data, size_t size, int depth);
|
895
|
+
|
896
|
+
|
897
|
+
/* parse_blockquote • hanldes parsing of a blockquote fragment */
|
898
|
+
static size_t
|
899
|
+
parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size, int depth) {
|
900
|
+
size_t beg, end = 0, pre, work_size = 0;
|
901
|
+
char *work_data = 0;
|
902
|
+
struct buf *out = 0;
|
903
|
+
|
904
|
+
if (rndr->work.size < rndr->work.asize) {
|
905
|
+
out = rndr->work.item[rndr->work.size ++];
|
906
|
+
out->size = 0; }
|
907
|
+
else {
|
908
|
+
out = bufnew(WORK_UNIT);
|
909
|
+
parr_push(&rndr->work, out); }
|
910
|
+
|
911
|
+
beg = 0;
|
912
|
+
while (beg < size) {
|
913
|
+
for (end = beg + 1; end < size && data[end - 1] != '\n';
|
914
|
+
end += 1);
|
915
|
+
pre = prefix_quote(data + beg, end - beg);
|
916
|
+
if (pre) beg += pre; /* skipping prefix */
|
917
|
+
else if (is_empty(data + beg, end - beg)
|
918
|
+
&& (end >= size || (prefix_quote(data + end, size - end) == 0
|
919
|
+
&& !is_empty(data + end, size - end))))
|
920
|
+
/* empty line followed by non-quote line */
|
921
|
+
break;
|
922
|
+
if (beg < end) { /* copy into the in-place working buffer */
|
923
|
+
/* bufput(work, data + beg, end - beg); */
|
924
|
+
if (!work_data)
|
925
|
+
work_data = data + beg;
|
926
|
+
else if (data + beg != work_data + work_size)
|
927
|
+
memmove(work_data + work_size, data + beg,
|
928
|
+
end - beg);
|
929
|
+
work_size += end - beg; }
|
930
|
+
beg = end; }
|
931
|
+
|
932
|
+
parse_block(out, rndr, work_data, work_size, depth + 1);
|
933
|
+
if (rndr->make.blockquote)
|
934
|
+
rndr->make.blockquote(ob, out, &rndr->make.render_options);
|
935
|
+
rndr->work.size -= 1;
|
936
|
+
return end;
|
937
|
+
}
|
938
|
+
|
939
|
+
|
940
|
+
/* parse_blockquote • hanldes parsing of a regular paragraph */
|
941
|
+
static size_t
|
942
|
+
parse_paragraph(struct buf *ob, struct render *rndr,
|
943
|
+
char *data, size_t size) {
|
944
|
+
size_t i = 0, end = 0;
|
945
|
+
int level = 0;
|
946
|
+
struct buf work = { data, 0, 0, 0, 0 }; /* volatile working buffer */
|
947
|
+
|
948
|
+
while (i < size) {
|
949
|
+
for (end = i + 1; end < size && data[end - 1] != '\n';
|
950
|
+
end += 1);
|
951
|
+
if (is_empty(data + i, size - i)
|
952
|
+
|| (level = is_headerline(data + i, size - i)) != 0)
|
953
|
+
break;
|
954
|
+
if (data[i] == '#'
|
955
|
+
|| is_hrule(data + i, size - i)) {
|
956
|
+
end = i;
|
957
|
+
break; }
|
958
|
+
i = end; }
|
959
|
+
|
960
|
+
work.size = i;
|
961
|
+
while (work.size && data[work.size - 1] == '\n')
|
962
|
+
work.size -= 1;
|
963
|
+
if (!level) {
|
964
|
+
struct buf *tmp = 0;
|
965
|
+
if (rndr->work.size < rndr->work.asize) {
|
966
|
+
tmp = rndr->work.item[rndr->work.size ++];
|
967
|
+
tmp->size = 0; }
|
968
|
+
else {
|
969
|
+
tmp = bufnew(WORK_UNIT);
|
970
|
+
parr_push(&rndr->work, tmp); }
|
971
|
+
parse_inline(tmp, rndr, work.data, work.size);
|
972
|
+
if (rndr->make.paragraph)
|
973
|
+
rndr->make.paragraph(ob, tmp, &rndr->make.render_options);
|
974
|
+
rndr->work.size -= 1; }
|
975
|
+
else {
|
976
|
+
if (work.size) {
|
977
|
+
size_t beg;
|
978
|
+
i = work.size;
|
979
|
+
work.size -= 1;
|
980
|
+
while (work.size && data[work.size] != '\n')
|
981
|
+
work.size -= 1;
|
982
|
+
beg = work.size + 1;
|
983
|
+
while (work.size && data[work.size - 1] == '\n')
|
984
|
+
work.size -= 1;
|
985
|
+
if (work.size) {
|
986
|
+
struct buf *tmp = 0;
|
987
|
+
if (rndr->work.size < rndr->work.asize) {
|
988
|
+
tmp=rndr->work.item[rndr->work.size++];
|
989
|
+
tmp->size = 0; }
|
990
|
+
else {
|
991
|
+
tmp = bufnew(WORK_UNIT);
|
992
|
+
parr_push(&rndr->work, tmp); }
|
993
|
+
parse_inline(tmp, rndr, work.data, work.size);
|
994
|
+
if (rndr->make.paragraph)
|
995
|
+
rndr->make.paragraph(ob, tmp,
|
996
|
+
&rndr->make.render_options);
|
997
|
+
rndr->work.size -= 1;
|
998
|
+
work.data += beg;
|
999
|
+
work.size = i - beg; }
|
1000
|
+
else work.size = i; }
|
1001
|
+
if (rndr->make.header)
|
1002
|
+
rndr->make.header(ob, &work, level, &rndr->make.render_options);}
|
1003
|
+
return end; }
|
1004
|
+
|
1005
|
+
|
1006
|
+
/* parse_blockquote • hanldes parsing of a block-level code fragment */
|
1007
|
+
static size_t
|
1008
|
+
parse_blockcode(struct buf *ob, struct render *rndr,
|
1009
|
+
char *data, size_t size) {
|
1010
|
+
size_t beg, end, pre;
|
1011
|
+
struct buf *work = 0;
|
1012
|
+
|
1013
|
+
if (rndr->work.size < rndr->work.asize) {
|
1014
|
+
work = rndr->work.item[rndr->work.size ++];
|
1015
|
+
work->size = 0; }
|
1016
|
+
else {
|
1017
|
+
work = bufnew(WORK_UNIT);
|
1018
|
+
parr_push(&rndr->work, work); }
|
1019
|
+
|
1020
|
+
beg = 0;
|
1021
|
+
while (beg < size) {
|
1022
|
+
for (end = beg + 1; end < size && data[end - 1] != '\n';
|
1023
|
+
end += 1);
|
1024
|
+
pre = prefix_code(data + beg, end - beg);
|
1025
|
+
if (pre) beg += pre; /* skipping prefix */
|
1026
|
+
else if (!is_empty(data + beg, end - beg))
|
1027
|
+
/* non-empty non-prefixed line breaks the pre */
|
1028
|
+
break;
|
1029
|
+
if (beg < end) {
|
1030
|
+
/* verbatim copy to the working buffer,
|
1031
|
+
escaping entities */
|
1032
|
+
if (is_empty(data + beg, end - beg))
|
1033
|
+
bufputc(work, '\n');
|
1034
|
+
else bufput(work, data + beg, end - beg); }
|
1035
|
+
beg = end; }
|
1036
|
+
|
1037
|
+
while (work->size && work->data[work->size - 1] == '\n')
|
1038
|
+
work->size -= 1;
|
1039
|
+
bufputc(work, '\n');
|
1040
|
+
if (rndr->make.blockcode)
|
1041
|
+
rndr->make.blockcode(ob, work, &rndr->make.render_options);
|
1042
|
+
rndr->work.size -= 1;
|
1043
|
+
return beg; }
|
1044
|
+
|
1045
|
+
|
1046
|
+
/* parse_listitem • parsing of a single list item */
|
1047
|
+
/* assuming initial prefix is already removed */
|
1048
|
+
static size_t
|
1049
|
+
parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int *flags, int depth) {
|
1050
|
+
struct buf *work = 0, *inter = 0;
|
1051
|
+
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
|
1052
|
+
int in_empty = 0, has_inside_empty = 0;
|
1053
|
+
|
1054
|
+
/* keeping book of the first indentation prefix */
|
1055
|
+
if (size > 1 && data[0] == ' ') { orgpre = 1;
|
1056
|
+
if (size > 2 && data[1] == ' ') { orgpre = 2;
|
1057
|
+
if (size > 3 && data[2] == ' ') { orgpre = 3; } } }
|
1058
|
+
beg = prefix_uli(data, size);
|
1059
|
+
if (!beg) beg = prefix_oli(data, size);
|
1060
|
+
if (!beg) return 0;
|
1061
|
+
/* skipping to the beginning of the following line */
|
1062
|
+
end = beg;
|
1063
|
+
while (end < size && data[end - 1] != '\n') end += 1;
|
1064
|
+
|
1065
|
+
/* getting working buffers */
|
1066
|
+
if (rndr->work.size < rndr->work.asize) {
|
1067
|
+
work = rndr->work.item[rndr->work.size ++];
|
1068
|
+
work->size = 0; }
|
1069
|
+
else {
|
1070
|
+
work = bufnew(WORK_UNIT);
|
1071
|
+
parr_push(&rndr->work, work); }
|
1072
|
+
if (rndr->work.size < rndr->work.asize) {
|
1073
|
+
inter = rndr->work.item[rndr->work.size ++];
|
1074
|
+
inter->size = 0; }
|
1075
|
+
else {
|
1076
|
+
inter = bufnew(WORK_UNIT);
|
1077
|
+
parr_push(&rndr->work, inter); }
|
1078
|
+
|
1079
|
+
/* putting the first line into the working buffer */
|
1080
|
+
bufput(work, data + beg, end - beg);
|
1081
|
+
beg = end;
|
1082
|
+
|
1083
|
+
/* process the following lines */
|
1084
|
+
while (beg < size) {
|
1085
|
+
end += 1;
|
1086
|
+
while (end < size && data[end - 1] != '\n') end += 1;
|
1087
|
+
|
1088
|
+
/* process an empty line */
|
1089
|
+
if (is_empty(data + beg, end - beg)) {
|
1090
|
+
in_empty = 1;
|
1091
|
+
beg = end;
|
1092
|
+
continue; }
|
1093
|
+
|
1094
|
+
/* calculating the indentation */
|
1095
|
+
i = 0;
|
1096
|
+
if (end - beg > 1 && data[beg] == ' ') { i = 1;
|
1097
|
+
if (end - beg > 2 && data[beg + 1] == ' ') { i = 2;
|
1098
|
+
if (end - beg > 3 && data[beg + 2] == ' ') { i = 3;
|
1099
|
+
if (end - beg > 3 && data[beg + 3] == ' ') { i = 4; } } } }
|
1100
|
+
pre = i;
|
1101
|
+
if (data[beg] == '\t') { i = 1; pre = 8; }
|
1102
|
+
|
1103
|
+
/* checking for a new item */
|
1104
|
+
if ((prefix_uli(data + beg + i, end - beg - i)
|
1105
|
+
&& !is_hrule(data + beg + i, end - beg - i))
|
1106
|
+
|| prefix_oli(data + beg + i, end - beg - i)) {
|
1107
|
+
if (in_empty) has_inside_empty = 1;
|
1108
|
+
if (pre == orgpre) /* the following item must have */
|
1109
|
+
break; /* the same indentation */
|
1110
|
+
if (!sublist) sublist = work->size; }
|
1111
|
+
|
1112
|
+
/* joining only indented stuff after empty lines */
|
1113
|
+
else if (in_empty && i < 4 && data[beg] != '\t') {
|
1114
|
+
*flags |= MKD_LI_END;
|
1115
|
+
break; }
|
1116
|
+
else if (in_empty) {
|
1117
|
+
bufputc(work, '\n');
|
1118
|
+
has_inside_empty = 1; }
|
1119
|
+
in_empty = 0;
|
1120
|
+
|
1121
|
+
/* adding the line without prefix into the working buffer */
|
1122
|
+
bufput(work, data + beg + i, end - beg - i);
|
1123
|
+
beg = end; }
|
1124
|
+
|
1125
|
+
/* render of li contents */
|
1126
|
+
if (has_inside_empty) *flags |= MKD_LI_BLOCK;
|
1127
|
+
if (*flags & MKD_LI_BLOCK) {
|
1128
|
+
/* intermediate render of block li */
|
1129
|
+
if (sublist && sublist < work->size) {
|
1130
|
+
parse_block(inter, rndr, work->data, sublist, depth + 1);
|
1131
|
+
parse_block(inter, rndr, work->data + sublist, work->size - sublist, depth + 1);
|
1132
|
+
}
|
1133
|
+
else
|
1134
|
+
parse_block(inter, rndr, work->data, work->size, depth + 1);
|
1135
|
+
} else {
|
1136
|
+
/* intermediate render of inline li */
|
1137
|
+
if (sublist && sublist < work->size) {
|
1138
|
+
parse_inline(inter, rndr, work->data, sublist);
|
1139
|
+
parse_block(inter, rndr, work->data + sublist, work->size - sublist, depth + 1);
|
1140
|
+
}
|
1141
|
+
else
|
1142
|
+
parse_inline(inter, rndr, work->data, work->size);
|
1143
|
+
}
|
1144
|
+
|
1145
|
+
/* render of li itself */
|
1146
|
+
if (rndr->make.listitem)
|
1147
|
+
rndr->make.listitem(ob, inter, *flags, &rndr->make.render_options);
|
1148
|
+
rndr->work.size -= 2;
|
1149
|
+
return beg;
|
1150
|
+
}
|
1151
|
+
|
1152
|
+
|
1153
|
+
/* parse_list • parsing ordered or unordered list block */
|
1154
|
+
static size_t
|
1155
|
+
parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int flags, int depth) {
|
1156
|
+
struct buf *work = 0;
|
1157
|
+
size_t i = 0, j;
|
1158
|
+
|
1159
|
+
if (rndr->work.size < rndr->work.asize) {
|
1160
|
+
work = rndr->work.item[rndr->work.size ++];
|
1161
|
+
work->size = 0; }
|
1162
|
+
else {
|
1163
|
+
work = bufnew(WORK_UNIT);
|
1164
|
+
parr_push(&rndr->work, work); }
|
1165
|
+
|
1166
|
+
while (i < size) {
|
1167
|
+
j = parse_listitem(work, rndr, data + i, size - i, &flags, depth + 1);
|
1168
|
+
i += j;
|
1169
|
+
|
1170
|
+
if (!j || (flags & MKD_LI_END))
|
1171
|
+
break;
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
if (rndr->make.list)
|
1175
|
+
rndr->make.list(ob, work, flags, &rndr->make.render_options);
|
1176
|
+
rndr->work.size -= 1;
|
1177
|
+
return i;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
|
1181
|
+
/* parse_atxheader • parsing of atx-style headers */
|
1182
|
+
static size_t
|
1183
|
+
parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size) {
|
1184
|
+
size_t level = 0;
|
1185
|
+
size_t i, end, skip;
|
1186
|
+
struct buf work = { data, 0, 0, 0, 0 };
|
1187
|
+
|
1188
|
+
if (!size || data[0] != '#') return 0;
|
1189
|
+
while (level < size && level < 6 && data[level] == '#') level += 1;
|
1190
|
+
for (i = level; i < size && (data[i] == ' ' || data[i] == '\t');
|
1191
|
+
i += 1);
|
1192
|
+
work.data = data + i;
|
1193
|
+
for (end = i; end < size && data[end] != '\n'; end += 1);
|
1194
|
+
skip = end;
|
1195
|
+
while (end && data[end - 1] == '#') end -= 1;
|
1196
|
+
while (end && (data[end - 1] == ' ' || data[end - 1] == '\t')) end -= 1;
|
1197
|
+
work.size = end - i;
|
1198
|
+
if (rndr->make.header)
|
1199
|
+
rndr->make.header(ob, &work, (int)level, &rndr->make.render_options);
|
1200
|
+
return skip;
|
1201
|
+
}
|
1202
|
+
|
1203
|
+
|
1204
|
+
/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
|
1205
|
+
/* returns the length on match, 0 otherwise */
|
1206
|
+
static size_t
|
1207
|
+
htmlblock_end(struct html_tag *tag, char *data, size_t size) {
|
1208
|
+
size_t i, w;
|
1209
|
+
|
1210
|
+
/* assuming data[0] == '<' && data[1] == '/' already tested */
|
1211
|
+
|
1212
|
+
/* checking tag is a match */
|
1213
|
+
if (tag->size + 3 >= size
|
1214
|
+
|| strncasecmp(data + 2, tag->text, tag->size)
|
1215
|
+
|| data[tag->size + 2] != '>')
|
1216
|
+
return 0;
|
1217
|
+
|
1218
|
+
/* checking white lines */
|
1219
|
+
i = tag->size + 3;
|
1220
|
+
w = 0;
|
1221
|
+
if (i < size && (w = is_empty(data + i, size - i)) == 0)
|
1222
|
+
return 0; /* non-blank after tag */
|
1223
|
+
i += w;
|
1224
|
+
w = 0;
|
1225
|
+
|
1226
|
+
|
1227
|
+
#ifdef UPSKIRT_NEWLINE_AFTER_TAGS
|
1228
|
+
if (i < size && (w = is_empty(data + i, size - i)) == 0)
|
1229
|
+
return 0; /* non-blank line after tag line */
|
1230
|
+
#else
|
1231
|
+
if (i < size)
|
1232
|
+
w = is_empty(data + i, size - i);
|
1233
|
+
#endif
|
1234
|
+
|
1235
|
+
return i + w;
|
1236
|
+
}
|
1237
|
+
|
1238
|
+
|
1239
|
+
/* parse_htmlblock • parsing of inline HTML block */
|
1240
|
+
static size_t
|
1241
|
+
parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size) {
|
1242
|
+
size_t i, j = 0;
|
1243
|
+
struct html_tag *curtag;
|
1244
|
+
int found;
|
1245
|
+
struct buf work = { data, 0, 0, 0, 0 };
|
1246
|
+
|
1247
|
+
/* identification of the opening tag */
|
1248
|
+
if (size < 2 || data[0] != '<') return 0;
|
1249
|
+
curtag = find_block_tag(data + 1, size - 1);
|
1250
|
+
|
1251
|
+
/* handling of special cases */
|
1252
|
+
if (!curtag) {
|
1253
|
+
/* HTML comment, laxist form */
|
1254
|
+
if (size > 5 && data[1] == '!'
|
1255
|
+
&& data[2] == '-' && data[3] == '-') {
|
1256
|
+
i = 5;
|
1257
|
+
while (i < size
|
1258
|
+
&& !(data[i - 2] == '-' && data[i - 1] == '-'
|
1259
|
+
&& data[i] == '>'))
|
1260
|
+
i += 1;
|
1261
|
+
i += 1;
|
1262
|
+
if (i < size)
|
1263
|
+
j = is_empty(data + i, size - i);
|
1264
|
+
if (j) {
|
1265
|
+
work.size = i + j;
|
1266
|
+
if (rndr->make.blockhtml)
|
1267
|
+
rndr->make.blockhtml(ob, &work,
|
1268
|
+
&rndr->make.render_options);
|
1269
|
+
return work.size; } }
|
1270
|
+
|
1271
|
+
/* HR, which is the only self-closing block tag considered */
|
1272
|
+
if (size > 4
|
1273
|
+
&& (data[1] == 'h' || data[1] == 'H')
|
1274
|
+
&& (data[2] == 'r' || data[2] == 'R')) {
|
1275
|
+
i = 3;
|
1276
|
+
while (i < size && data[i] != '>')
|
1277
|
+
i += 1;
|
1278
|
+
if (i + 1 < size) {
|
1279
|
+
i += 1;
|
1280
|
+
j = is_empty(data + i, size - i);
|
1281
|
+
if (j) {
|
1282
|
+
work.size = i + j;
|
1283
|
+
if (rndr->make.blockhtml)
|
1284
|
+
rndr->make.blockhtml(ob, &work,
|
1285
|
+
&rndr->make.render_options);
|
1286
|
+
return work.size; } } }
|
1287
|
+
|
1288
|
+
/* no special case recognised */
|
1289
|
+
return 0; }
|
1290
|
+
|
1291
|
+
/* looking for an unindented matching closing tag */
|
1292
|
+
/* followed by a blank line */
|
1293
|
+
i = 1;
|
1294
|
+
found = 0;
|
1295
|
+
#if 0
|
1296
|
+
while (i < size) {
|
1297
|
+
i += 1;
|
1298
|
+
while (i < size && !(data[i - 2] == '\n'
|
1299
|
+
&& data[i - 1] == '<' && data[i] == '/'))
|
1300
|
+
i += 1;
|
1301
|
+
if (i + 2 + curtag->size >= size) break;
|
1302
|
+
j = htmlblock_end(curtag, data + i - 1, size - i + 1);
|
1303
|
+
if (j) {
|
1304
|
+
i += j - 1;
|
1305
|
+
found = 1;
|
1306
|
+
break; } }
|
1307
|
+
#endif
|
1308
|
+
|
1309
|
+
/* if not found, trying a second pass looking for indented match */
|
1310
|
+
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
|
1311
|
+
if (!found && curtag != INS_TAG && curtag != DEL_TAG) {
|
1312
|
+
i = 1;
|
1313
|
+
while (i < size) {
|
1314
|
+
i += 1;
|
1315
|
+
while (i < size
|
1316
|
+
&& !(data[i - 1] == '<' && data[i] == '/'))
|
1317
|
+
i += 1;
|
1318
|
+
if (i + 2 + curtag->size >= size) break;
|
1319
|
+
j = htmlblock_end(curtag, data + i - 1, size - i + 1);
|
1320
|
+
if (j) {
|
1321
|
+
i += j - 1;
|
1322
|
+
found = 1;
|
1323
|
+
break; } } }
|
1324
|
+
|
1325
|
+
if (!found) return 0;
|
1326
|
+
|
1327
|
+
/* the end of the block has been found */
|
1328
|
+
work.size = i;
|
1329
|
+
if (rndr->make.blockhtml)
|
1330
|
+
rndr->make.blockhtml(ob, &work, &rndr->make.render_options);
|
1331
|
+
return i; }
|
1332
|
+
|
1333
|
+
|
1334
|
+
/* parse_block • parsing of one block, returning next char to parse */
|
1335
|
+
static void
|
1336
|
+
parse_block(struct buf *ob, struct render *rndr, char *data, size_t size, int depth) {
|
1337
|
+
size_t beg, end, i;
|
1338
|
+
char *txt_data;
|
1339
|
+
beg = 0;
|
1340
|
+
|
1341
|
+
if (depth >= rndr->make.parser_options.recursion_depth)
|
1342
|
+
return;
|
1343
|
+
|
1344
|
+
while (beg < size) {
|
1345
|
+
txt_data = data + beg;
|
1346
|
+
end = size - beg;
|
1347
|
+
if (data[beg] == '#')
|
1348
|
+
beg += parse_atxheader(ob, rndr, txt_data, end);
|
1349
|
+
else if (data[beg] == '<' && rndr->make.blockhtml
|
1350
|
+
&& (i = parse_htmlblock(ob, rndr, txt_data, end)) != 0)
|
1351
|
+
beg += i;
|
1352
|
+
else if ((i = is_empty(txt_data, end)) != 0)
|
1353
|
+
beg += i;
|
1354
|
+
else if (is_hrule(txt_data, end)) {
|
1355
|
+
if (rndr->make.hrule)
|
1356
|
+
rndr->make.hrule(ob, &rndr->make.render_options);
|
1357
|
+
while (beg < size && data[beg] != '\n') beg += 1;
|
1358
|
+
beg += 1; }
|
1359
|
+
else if (prefix_quote(txt_data, end))
|
1360
|
+
beg += parse_blockquote(ob, rndr, txt_data, end, depth + 1);
|
1361
|
+
else if (prefix_code(txt_data, end))
|
1362
|
+
beg += parse_blockcode(ob, rndr, txt_data, end);
|
1363
|
+
else if (prefix_uli(txt_data, end))
|
1364
|
+
beg += parse_list(ob, rndr, txt_data, end, 0, depth + 1);
|
1365
|
+
else if (prefix_oli(txt_data, end))
|
1366
|
+
beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED, depth + 1);
|
1367
|
+
else
|
1368
|
+
beg += parse_paragraph(ob, rndr, txt_data, end);
|
1369
|
+
}
|
1370
|
+
}
|
1371
|
+
|
1372
|
+
|
1373
|
+
|
1374
|
+
/*********************
|
1375
|
+
* REFERENCE PARSING *
|
1376
|
+
*********************/
|
1377
|
+
|
1378
|
+
/* is_ref • returns whether a line is a reference or not */
|
1379
|
+
static int
|
1380
|
+
is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs) {
|
1381
|
+
/* int n; */
|
1382
|
+
size_t i = 0;
|
1383
|
+
size_t id_offset, id_end;
|
1384
|
+
size_t link_offset, link_end;
|
1385
|
+
size_t title_offset, title_end;
|
1386
|
+
size_t line_end;
|
1387
|
+
struct link_ref *lr;
|
1388
|
+
/* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
|
1389
|
+
|
1390
|
+
/* up to 3 optional leading spaces */
|
1391
|
+
if (beg + 3 >= end) return 0;
|
1392
|
+
if (data[beg] == ' ') { i = 1;
|
1393
|
+
if (data[beg + 1] == ' ') { i = 2;
|
1394
|
+
if (data[beg + 2] == ' ') { i = 3;
|
1395
|
+
if (data[beg + 3] == ' ') return 0; } } }
|
1396
|
+
i += beg;
|
1397
|
+
|
1398
|
+
/* id part: anything but a newline between brackets */
|
1399
|
+
if (data[i] != '[') return 0;
|
1400
|
+
i += 1;
|
1401
|
+
id_offset = i;
|
1402
|
+
while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
|
1403
|
+
i += 1;
|
1404
|
+
if (i >= end || data[i] != ']') return 0;
|
1405
|
+
id_end = i;
|
1406
|
+
|
1407
|
+
/* spacer: colon (space | tab)* newline? (space | tab)* */
|
1408
|
+
i += 1;
|
1409
|
+
if (i >= end || data[i] != ':') return 0;
|
1410
|
+
i += 1;
|
1411
|
+
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
|
1412
|
+
if (i < end && (data[i] == '\n' || data[i] == '\r')) {
|
1413
|
+
i += 1;
|
1414
|
+
if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
|
1415
|
+
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
|
1416
|
+
if (i >= end) return 0;
|
1417
|
+
|
1418
|
+
/* link: whitespace-free sequence, optionally between angle brackets */
|
1419
|
+
if (data[i] == '<') i += 1;
|
1420
|
+
link_offset = i;
|
1421
|
+
while (i < end && data[i] != ' ' && data[i] != '\t'
|
1422
|
+
&& data[i] != '\n' && data[i] != '\r') i += 1;
|
1423
|
+
if (data[i - 1] == '>') link_end = i - 1;
|
1424
|
+
else link_end = i;
|
1425
|
+
|
1426
|
+
/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
|
1427
|
+
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
|
1428
|
+
if (i < end && data[i] != '\n' && data[i] != '\r'
|
1429
|
+
&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
|
1430
|
+
return 0;
|
1431
|
+
line_end = 0;
|
1432
|
+
/* computing end-of-line */
|
1433
|
+
if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
|
1434
|
+
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
|
1435
|
+
line_end = i + 1;
|
1436
|
+
|
1437
|
+
/* optional (space|tab)* spacer after a newline */
|
1438
|
+
if (line_end) {
|
1439
|
+
i = line_end + 1;
|
1440
|
+
while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
|
1441
|
+
|
1442
|
+
/* optional title: any non-newline sequence enclosed in '"()
|
1443
|
+
alone on its line */
|
1444
|
+
title_offset = title_end = 0;
|
1445
|
+
if (i + 1 < end
|
1446
|
+
&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
|
1447
|
+
i += 1;
|
1448
|
+
title_offset = i;
|
1449
|
+
/* looking for EOL */
|
1450
|
+
while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
|
1451
|
+
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
|
1452
|
+
title_end = i + 1;
|
1453
|
+
else title_end = i;
|
1454
|
+
/* stepping back */
|
1455
|
+
i -= 1;
|
1456
|
+
while (i > title_offset && (data[i] == ' ' || data[i] == '\t'))
|
1457
|
+
i -= 1;
|
1458
|
+
if (i > title_offset
|
1459
|
+
&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
|
1460
|
+
line_end = title_end;
|
1461
|
+
title_end = i; } }
|
1462
|
+
if (!line_end) return 0; /* garbage after the link */
|
1463
|
+
|
1464
|
+
/* a valid ref has been found, filling-in return structures */
|
1465
|
+
if (last) *last = line_end;
|
1466
|
+
if (!refs) return 1;
|
1467
|
+
lr = arr_item(refs, arr_newitem(refs));
|
1468
|
+
lr->id = bufnew(id_end - id_offset);
|
1469
|
+
bufput(lr->id, data + id_offset, id_end - id_offset);
|
1470
|
+
lr->link = bufnew(link_end - link_offset);
|
1471
|
+
bufput(lr->link, data + link_offset, link_end - link_offset);
|
1472
|
+
if (title_end > title_offset) {
|
1473
|
+
lr->title = bufnew(title_end - title_offset);
|
1474
|
+
bufput(lr->title, data + title_offset,
|
1475
|
+
title_end - title_offset); }
|
1476
|
+
else lr->title = 0;
|
1477
|
+
return 1; }
|
1478
|
+
|
1479
|
+
|
1480
|
+
|
1481
|
+
/**********************
|
1482
|
+
* EXPORTED FUNCTIONS *
|
1483
|
+
**********************/
|
1484
|
+
static void expand_tabs(struct buf *ob, const char *line, size_t size)
|
1485
|
+
{
|
1486
|
+
size_t i = 0, tab = 0;
|
1487
|
+
|
1488
|
+
while (i < size) {
|
1489
|
+
size_t org = i;
|
1490
|
+
|
1491
|
+
while (i < size && line[i] != '\t') {
|
1492
|
+
i++; tab++;
|
1493
|
+
}
|
1494
|
+
|
1495
|
+
if (i > org)
|
1496
|
+
bufput(ob, line + org, i - org);
|
1497
|
+
|
1498
|
+
if (i >= size)
|
1499
|
+
break;
|
1500
|
+
|
1501
|
+
bufputc(ob, ' '); tab++;
|
1502
|
+
|
1503
|
+
while ((tab % 4) != 0) {
|
1504
|
+
bufputc(ob, ' '); tab++;
|
1505
|
+
}
|
1506
|
+
|
1507
|
+
i++;
|
1508
|
+
}
|
1509
|
+
}
|
1510
|
+
|
1511
|
+
/* markdown • parses the input buffer and renders it into the output buffer */
|
1512
|
+
void
|
1513
|
+
markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer) {
|
1514
|
+
struct link_ref *lr;
|
1515
|
+
struct buf *text = bufnew(TEXT_UNIT);
|
1516
|
+
size_t i, beg, end;
|
1517
|
+
struct render rndr;
|
1518
|
+
|
1519
|
+
/* filling the render structure */
|
1520
|
+
if (!rndrer) return;
|
1521
|
+
rndr.make = *rndrer;
|
1522
|
+
arr_init(&rndr.refs, sizeof (struct link_ref));
|
1523
|
+
parr_init(&rndr.work);
|
1524
|
+
for (i = 0; i < 256; i += 1) rndr.active_char[i] = 0;
|
1525
|
+
if ((rndr.make.emphasis || rndr.make.double_emphasis
|
1526
|
+
|| rndr.make.triple_emphasis)
|
1527
|
+
&& rndr.make.emph_chars)
|
1528
|
+
for (i = 0; rndr.make.emph_chars[i]; i += 1)
|
1529
|
+
rndr.active_char[(unsigned char)rndr.make.emph_chars[i]]
|
1530
|
+
= char_emphasis;
|
1531
|
+
if (rndr.make.codespan) rndr.active_char['`'] = char_codespan;
|
1532
|
+
if (rndr.make.linebreak) rndr.active_char['\n'] = char_linebreak;
|
1533
|
+
if (rndr.make.image || rndr.make.link)
|
1534
|
+
rndr.active_char['['] = char_link;
|
1535
|
+
rndr.active_char['<'] = char_langle_tag;
|
1536
|
+
rndr.active_char['\\'] = char_escape;
|
1537
|
+
rndr.active_char['&'] = char_entity;
|
1538
|
+
|
1539
|
+
/* first pass: looking for references, copying everything else */
|
1540
|
+
beg = 0;
|
1541
|
+
while (beg < ib->size) /* iterating over lines */
|
1542
|
+
if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
|
1543
|
+
beg = end;
|
1544
|
+
else { /* skipping to the next line */
|
1545
|
+
end = beg;
|
1546
|
+
while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
|
1547
|
+
end += 1;
|
1548
|
+
|
1549
|
+
/* adding the line body if present */
|
1550
|
+
if (end > beg)
|
1551
|
+
expand_tabs(text, ib->data + beg, end - beg);
|
1552
|
+
|
1553
|
+
while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
|
1554
|
+
/* add one \n per newline */
|
1555
|
+
if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
|
1556
|
+
bufputc(text, '\n');
|
1557
|
+
end += 1;
|
1558
|
+
}
|
1559
|
+
|
1560
|
+
beg = end;
|
1561
|
+
}
|
1562
|
+
|
1563
|
+
/* sorting the reference array */
|
1564
|
+
if (rndr.refs.size)
|
1565
|
+
qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit,
|
1566
|
+
cmp_link_ref_sort);
|
1567
|
+
|
1568
|
+
/* adding a final newline if not already present */
|
1569
|
+
if (!text->size) return;
|
1570
|
+
if (text->data[text->size - 1] != '\n'
|
1571
|
+
&& text->data[text->size - 1] != '\r')
|
1572
|
+
bufputc(text, '\n');
|
1573
|
+
|
1574
|
+
/* second pass: actual rendering */
|
1575
|
+
parse_block(ob, &rndr, text->data, text->size, 0 /* initial depth */);
|
1576
|
+
|
1577
|
+
/* clean-up */
|
1578
|
+
bufrelease(text);
|
1579
|
+
lr = rndr.refs.base;
|
1580
|
+
for (i = 0; i < (size_t)rndr.refs.size; i += 1) {
|
1581
|
+
bufrelease(lr[i].id);
|
1582
|
+
bufrelease(lr[i].link);
|
1583
|
+
bufrelease(lr[i].title); }
|
1584
|
+
arr_free(&rndr.refs);
|
1585
|
+
assert(rndr.work.size == 0);
|
1586
|
+
for (i = 0; i < (size_t)rndr.work.asize; i += 1)
|
1587
|
+
bufrelease(rndr.work.item[i]);
|
1588
|
+
parr_free(&rndr.work); }
|
1589
|
+
|
1590
|
+
/* vim: set filetype=c: */
|