redcarpet 2.0.0b3 → 2.0.0b4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of redcarpet might be problematic. Click here for more details.
- data/README.markdown +39 -5
- data/Rakefile +2 -2
- data/ext/redcarpet/autolink.c +12 -12
- data/ext/redcarpet/autolink.h +4 -4
- data/ext/redcarpet/buffer.c +123 -218
- data/ext/redcarpet/buffer.h +49 -112
- data/ext/redcarpet/html.c +83 -117
- data/ext/redcarpet/html.h +9 -10
- data/ext/redcarpet/html_blocks.h +205 -0
- data/ext/redcarpet/html_smartypants.c +63 -39
- data/ext/redcarpet/markdown.c +456 -377
- data/ext/redcarpet/markdown.h +43 -29
- data/ext/redcarpet/rc_markdown.c +60 -34
- data/ext/redcarpet/rc_render.c +29 -35
- data/ext/redcarpet/redcarpet.h +4 -3
- data/ext/redcarpet/stack.c +81 -0
- data/ext/redcarpet/stack.h +21 -0
- data/lib/redcarpet.rb +4 -23
- data/redcarpet.gemspec +6 -4
- data/test/redcarpet_test.rb +39 -43
- metadata +56 -33
- data/ext/redcarpet/array.c +0 -300
- data/ext/redcarpet/array.h +0 -147
data/ext/redcarpet/markdown.c
CHANGED
@@ -18,49 +18,58 @@
|
|
18
18
|
*/
|
19
19
|
|
20
20
|
#include "markdown.h"
|
21
|
-
#include "
|
21
|
+
#include "stack.h"
|
22
22
|
|
23
23
|
#include <assert.h>
|
24
24
|
#include <string.h>
|
25
|
-
//#include <strings.h> /* for strncasecmp */
|
26
25
|
#include <ctype.h>
|
27
26
|
#include <stdio.h>
|
28
27
|
|
28
|
+
#define REF_TABLE_SIZE 8
|
29
|
+
|
29
30
|
#define BUFFER_BLOCK 0
|
30
31
|
#define BUFFER_SPAN 1
|
31
32
|
|
32
33
|
#define MKD_LI_END 8 /* internal list flag */
|
33
34
|
|
35
|
+
#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
|
36
|
+
#define GPERF_DOWNCASE 1
|
37
|
+
#define GPERF_CASE_STRNCMP 1
|
38
|
+
#include "html_blocks.h"
|
39
|
+
|
34
40
|
/***************
|
35
41
|
* LOCAL TYPES *
|
36
42
|
***************/
|
37
43
|
|
38
|
-
/* link_ref
|
44
|
+
/* link_ref: reference to a link */
|
39
45
|
struct link_ref {
|
40
|
-
|
46
|
+
unsigned int id;
|
47
|
+
|
41
48
|
struct buf *link;
|
42
49
|
struct buf *title;
|
50
|
+
|
51
|
+
struct link_ref *next;
|
43
52
|
};
|
44
53
|
|
45
|
-
/* char_trigger
|
54
|
+
/* char_trigger: function pointer to render active chars */
|
46
55
|
/* returns the number of chars taken care of */
|
47
56
|
/* data is the pointer of the beginning of the span */
|
48
57
|
/* offset is the number of valid chars before data */
|
49
|
-
struct
|
58
|
+
struct sd_markdown;
|
50
59
|
typedef size_t
|
51
|
-
(*char_trigger)(struct buf *ob, struct
|
52
|
-
|
53
|
-
static size_t char_emphasis(struct buf *ob, struct
|
54
|
-
static size_t char_linebreak(struct buf *ob, struct
|
55
|
-
static size_t char_codespan(struct buf *ob, struct
|
56
|
-
static size_t char_escape(struct buf *ob, struct
|
57
|
-
static size_t char_entity(struct buf *ob, struct
|
58
|
-
static size_t char_langle_tag(struct buf *ob, struct
|
59
|
-
static size_t char_autolink_url(struct buf *ob, struct
|
60
|
-
static size_t char_autolink_email(struct buf *ob, struct
|
61
|
-
static size_t char_autolink_www(struct buf *ob, struct
|
62
|
-
static size_t char_link(struct buf *ob, struct
|
63
|
-
static size_t char_superscript(struct buf *ob, struct
|
60
|
+
(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
61
|
+
|
62
|
+
static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
63
|
+
static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
64
|
+
static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
65
|
+
static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
66
|
+
static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
67
|
+
static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
68
|
+
static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
69
|
+
static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
70
|
+
static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
71
|
+
static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
72
|
+
static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
64
73
|
|
65
74
|
enum markdown_char_t {
|
66
75
|
MD_CHAR_NONE = 0,
|
@@ -93,84 +102,46 @@ static char_trigger markdown_char_ptrs[] = {
|
|
93
102
|
};
|
94
103
|
|
95
104
|
/* render • structure containing one particular render */
|
96
|
-
struct
|
105
|
+
struct sd_markdown {
|
97
106
|
struct sd_callbacks cb;
|
98
107
|
void *opaque;
|
99
108
|
|
100
|
-
struct
|
101
|
-
|
102
|
-
struct
|
109
|
+
struct link_ref *refs[REF_TABLE_SIZE];
|
110
|
+
uint8_t active_char[256];
|
111
|
+
struct stack work_bufs[2];
|
103
112
|
unsigned int ext_flags;
|
104
113
|
size_t max_nesting;
|
105
114
|
};
|
106
115
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
size_t size;
|
111
|
-
};
|
116
|
+
/***************************
|
117
|
+
* HELPER FUNCTIONS *
|
118
|
+
***************************/
|
112
119
|
|
113
120
|
static inline struct buf *
|
114
|
-
rndr_newbuf(struct
|
121
|
+
rndr_newbuf(struct sd_markdown *rndr, int type)
|
115
122
|
{
|
116
123
|
static const size_t buf_size[2] = {256, 64};
|
117
124
|
struct buf *work = NULL;
|
118
|
-
struct
|
125
|
+
struct stack *pool = &rndr->work_bufs[type];
|
119
126
|
|
120
|
-
if (
|
121
|
-
|
127
|
+
if (pool->size < pool->asize &&
|
128
|
+
pool->item[pool->size] != NULL) {
|
129
|
+
work = pool->item[pool->size++];
|
122
130
|
work->size = 0;
|
123
131
|
} else {
|
124
132
|
work = bufnew(buf_size[type]);
|
125
|
-
|
133
|
+
stack_push(pool, work);
|
126
134
|
}
|
127
135
|
|
128
136
|
return work;
|
129
137
|
}
|
130
138
|
|
131
139
|
static inline void
|
132
|
-
rndr_popbuf(struct
|
140
|
+
rndr_popbuf(struct sd_markdown *rndr, int type)
|
133
141
|
{
|
134
142
|
rndr->work_bufs[type].size--;
|
135
143
|
}
|
136
144
|
|
137
|
-
/********************
|
138
|
-
* GLOBAL VARIABLES *
|
139
|
-
********************/
|
140
|
-
|
141
|
-
/* block_tags • recognised block tags, sorted by cmp_html_tag */
|
142
|
-
static struct html_tag block_tags[] = {
|
143
|
-
/*0*/ { "p", 1 },
|
144
|
-
{ "dl", 2 },
|
145
|
-
{ "h1", 2 },
|
146
|
-
{ "h2", 2 },
|
147
|
-
{ "h3", 2 },
|
148
|
-
{ "h4", 2 },
|
149
|
-
{ "h5", 2 },
|
150
|
-
{ "h6", 2 },
|
151
|
-
{ "ol", 2 },
|
152
|
-
{ "ul", 2 },
|
153
|
-
{ "del", 3 }, /* 10 */
|
154
|
-
{ "div", 3 },
|
155
|
-
{ "ins", 3 }, /* 12 */
|
156
|
-
{ "pre", 3 },
|
157
|
-
{ "form", 4 },
|
158
|
-
{ "math", 4 },
|
159
|
-
{ "table", 5 },
|
160
|
-
{ "figure", 6 },
|
161
|
-
{ "iframe", 6 },
|
162
|
-
{ "script", 6 },
|
163
|
-
{ "fieldset", 8 },
|
164
|
-
{ "noscript", 8 },
|
165
|
-
{ "blockquote", 10 }
|
166
|
-
};
|
167
|
-
|
168
|
-
#define INS_TAG (block_tags + 12)
|
169
|
-
#define DEL_TAG (block_tags + 10)
|
170
|
-
|
171
|
-
/***************************
|
172
|
-
* HELPER FUNCTIONS *
|
173
|
-
***************************/
|
174
145
|
static void
|
175
146
|
unscape_text(struct buf *ob, struct buf *src)
|
176
147
|
{
|
@@ -191,54 +162,87 @@ unscape_text(struct buf *ob, struct buf *src)
|
|
191
162
|
}
|
192
163
|
}
|
193
164
|
|
194
|
-
|
195
|
-
|
196
|
-
cmp_link_ref(void *key, void *array_entry)
|
165
|
+
static unsigned int
|
166
|
+
hash_link_ref(const uint8_t *link_ref, size_t length)
|
197
167
|
{
|
198
|
-
|
199
|
-
|
168
|
+
size_t i;
|
169
|
+
unsigned int hash = 0;
|
170
|
+
|
171
|
+
for (i = 0; i < length; ++i)
|
172
|
+
hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
|
173
|
+
|
174
|
+
return hash;
|
200
175
|
}
|
201
176
|
|
202
|
-
|
203
|
-
|
204
|
-
|
177
|
+
static struct link_ref *
|
178
|
+
add_link_ref(
|
179
|
+
struct link_ref **references,
|
180
|
+
const uint8_t *name, size_t name_size)
|
205
181
|
{
|
206
|
-
|
207
|
-
|
208
|
-
|
182
|
+
struct link_ref *ref = calloc(1, sizeof(struct link_ref));
|
183
|
+
|
184
|
+
if (!ref)
|
185
|
+
return NULL;
|
186
|
+
|
187
|
+
ref->id = hash_link_ref(name, name_size);
|
188
|
+
ref->next = references[ref->id % REF_TABLE_SIZE];
|
189
|
+
|
190
|
+
references[ref->id % REF_TABLE_SIZE] = ref;
|
191
|
+
return ref;
|
209
192
|
}
|
210
193
|
|
211
|
-
|
212
|
-
|
213
|
-
cmp_html_tag(const void *a, const void *b)
|
194
|
+
static struct link_ref *
|
195
|
+
find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
|
214
196
|
{
|
215
|
-
|
216
|
-
|
217
|
-
if (hta->size != htb->size) return (int)(hta->size - htb->size);
|
218
|
-
return strncasecmp(hta->text, htb->text, hta->size);
|
219
|
-
}
|
197
|
+
unsigned int hash = hash_link_ref(name, length);
|
198
|
+
struct link_ref *ref = NULL;
|
220
199
|
|
200
|
+
ref = references[hash % REF_TABLE_SIZE];
|
201
|
+
|
202
|
+
while (ref != NULL) {
|
203
|
+
if (ref->id == hash)
|
204
|
+
return ref;
|
205
|
+
|
206
|
+
ref = ref->next;
|
207
|
+
}
|
221
208
|
|
222
|
-
|
223
|
-
|
224
|
-
|
209
|
+
return NULL;
|
210
|
+
}
|
211
|
+
|
212
|
+
static void
|
213
|
+
free_link_refs(struct link_ref **references)
|
225
214
|
{
|
226
|
-
size_t i
|
227
|
-
struct html_tag key;
|
215
|
+
size_t i;
|
228
216
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|| (data[i] >= 'a' && data[i] <= 'z')))
|
233
|
-
i++;
|
234
|
-
if (i >= size) return 0;
|
217
|
+
for (i = 0; i < REF_TABLE_SIZE; ++i) {
|
218
|
+
struct link_ref *r = references[i];
|
219
|
+
struct link_ref *next;
|
235
220
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
221
|
+
while (r) {
|
222
|
+
next = r->next;
|
223
|
+
bufrelease(r->link);
|
224
|
+
bufrelease(r->title);
|
225
|
+
free(r);
|
226
|
+
r = next;
|
227
|
+
}
|
228
|
+
}
|
229
|
+
}
|
230
|
+
|
231
|
+
/*
|
232
|
+
* Check whether a char is a Markdown space.
|
233
|
+
|
234
|
+
* Right now we only consider spaces the actual
|
235
|
+
* space and a newline: tabs and carriage returns
|
236
|
+
* are filtered out during the preprocessing phase.
|
237
|
+
*
|
238
|
+
* If we wanted to actually be UTF-8 compliant, we
|
239
|
+
* should instead extract an Unicode codepoint from
|
240
|
+
* this character and check for space properties.
|
241
|
+
*/
|
242
|
+
static inline int
|
243
|
+
_isspace(int c)
|
244
|
+
{
|
245
|
+
return c == ' ' || c == '\n';
|
242
246
|
}
|
243
247
|
|
244
248
|
/****************************
|
@@ -248,7 +252,7 @@ find_block_tag(char *data, size_t size)
|
|
248
252
|
/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
|
249
253
|
/* this is less strict than the original markdown e-mail address matching */
|
250
254
|
static size_t
|
251
|
-
is_mail_autolink(
|
255
|
+
is_mail_autolink(uint8_t *data, size_t size)
|
252
256
|
{
|
253
257
|
size_t i = 0, nb = 0;
|
254
258
|
|
@@ -279,7 +283,7 @@ is_mail_autolink(char *data, size_t size)
|
|
279
283
|
|
280
284
|
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
|
281
285
|
static size_t
|
282
|
-
tag_length(
|
286
|
+
tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
|
283
287
|
{
|
284
288
|
size_t i, j;
|
285
289
|
|
@@ -322,7 +326,8 @@ tag_length(char *data, size_t size, enum mkd_autolink *autolink)
|
|
322
326
|
while (i < size) {
|
323
327
|
if (data[i] == '\\') i += 2;
|
324
328
|
else if (data[i] == '>' || data[i] == '\'' ||
|
325
|
-
data[i] == '"' ||
|
329
|
+
data[i] == '"' || data[i] == ' ' || data[i] == '\n')
|
330
|
+
break;
|
326
331
|
else i++;
|
327
332
|
}
|
328
333
|
|
@@ -340,19 +345,19 @@ tag_length(char *data, size_t size, enum mkd_autolink *autolink)
|
|
340
345
|
|
341
346
|
/* parse_inline • parses inline markdown elements */
|
342
347
|
static void
|
343
|
-
parse_inline(struct buf *ob, struct
|
348
|
+
parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
344
349
|
{
|
345
350
|
size_t i = 0, end = 0;
|
346
|
-
|
347
|
-
struct buf work = { 0, 0, 0, 0
|
351
|
+
uint8_t action = 0;
|
352
|
+
struct buf work = { 0, 0, 0, 0 };
|
348
353
|
|
349
354
|
if (rndr->work_bufs[BUFFER_SPAN].size +
|
350
|
-
rndr->work_bufs[BUFFER_BLOCK].size >
|
355
|
+
rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
|
351
356
|
return;
|
352
357
|
|
353
358
|
while (i < size) {
|
354
359
|
/* copying inactive chars into the output */
|
355
|
-
while (end < size && (action = rndr->active_char[
|
360
|
+
while (end < size && (action = rndr->active_char[data[end]]) == 0) {
|
356
361
|
end++;
|
357
362
|
}
|
358
363
|
|
@@ -371,16 +376,16 @@ parse_inline(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
371
376
|
end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
|
372
377
|
if (!end) /* no action from the callback */
|
373
378
|
end = i + 1;
|
374
|
-
else {
|
379
|
+
else {
|
375
380
|
i += end;
|
376
381
|
end = i;
|
377
|
-
}
|
382
|
+
}
|
378
383
|
}
|
379
384
|
}
|
380
385
|
|
381
|
-
/* find_emph_char • looks for the next emph
|
386
|
+
/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
|
382
387
|
static size_t
|
383
|
-
find_emph_char(
|
388
|
+
find_emph_char(uint8_t *data, size_t size, uint8_t c)
|
384
389
|
{
|
385
390
|
size_t i = 1;
|
386
391
|
|
@@ -399,25 +404,33 @@ find_emph_char(char *data, size_t size, char c)
|
|
399
404
|
i++; continue;
|
400
405
|
}
|
401
406
|
|
402
|
-
/* skipping a code span */
|
403
407
|
if (data[i] == '`') {
|
408
|
+
size_t span_nb = 0, bt;
|
404
409
|
size_t tmp_i = 0;
|
405
410
|
|
406
|
-
|
407
|
-
while (i < size && data[i]
|
408
|
-
|
409
|
-
i++;
|
411
|
+
/* counting the number of opening backticks */
|
412
|
+
while (i < size && data[i] == '`') {
|
413
|
+
i++; span_nb++;
|
410
414
|
}
|
411
415
|
|
412
|
-
if (i >= size)
|
413
|
-
return tmp_i;
|
416
|
+
if (i >= size) return 0;
|
414
417
|
|
415
|
-
|
418
|
+
/* finding the matching closing sequence */
|
419
|
+
bt = 0;
|
420
|
+
while (i < size && bt < span_nb) {
|
421
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
422
|
+
if (data[i] == '`') bt++;
|
423
|
+
else bt = 0;
|
424
|
+
i++;
|
425
|
+
}
|
426
|
+
|
427
|
+
if (i >= size) return tmp_i;
|
428
|
+
i++;
|
416
429
|
}
|
417
430
|
/* skipping a link */
|
418
431
|
else if (data[i] == '[') {
|
419
432
|
size_t tmp_i = 0;
|
420
|
-
|
433
|
+
uint8_t cc;
|
421
434
|
|
422
435
|
i++;
|
423
436
|
while (i < size && data[i] != ']') {
|
@@ -426,18 +439,26 @@ find_emph_char(char *data, size_t size, char c)
|
|
426
439
|
}
|
427
440
|
|
428
441
|
i++;
|
429
|
-
while (i < size && (data[i] == ' ' || data[i] == '\
|
442
|
+
while (i < size && (data[i] == ' ' || data[i] == '\n'))
|
430
443
|
i++;
|
431
444
|
|
432
445
|
if (i >= size)
|
433
446
|
return tmp_i;
|
434
447
|
|
435
|
-
|
436
|
-
|
437
|
-
|
448
|
+
switch (data[i]) {
|
449
|
+
case '[':
|
450
|
+
cc = ']'; break;
|
451
|
+
|
452
|
+
case '(':
|
453
|
+
cc = ')'; break;
|
454
|
+
|
455
|
+
default:
|
456
|
+
if (tmp_i)
|
457
|
+
return tmp_i;
|
458
|
+
else
|
459
|
+
continue;
|
438
460
|
}
|
439
461
|
|
440
|
-
cc = data[i];
|
441
462
|
i++;
|
442
463
|
while (i < size && data[i] != cc) {
|
443
464
|
if (!tmp_i && data[i] == c) tmp_i = i;
|
@@ -457,7 +478,7 @@ find_emph_char(char *data, size_t size, char c)
|
|
457
478
|
/* parse_emph1 • parsing single emphase */
|
458
479
|
/* closed by a symbol not preceded by whitespace and not followed by symbol */
|
459
480
|
static size_t
|
460
|
-
parse_emph1(struct buf *ob, struct
|
481
|
+
parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
|
461
482
|
{
|
462
483
|
size_t i = 0, len;
|
463
484
|
struct buf *work = 0;
|
@@ -474,15 +495,10 @@ parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c
|
|
474
495
|
i += len;
|
475
496
|
if (i >= size) return 0;
|
476
497
|
|
477
|
-
if (i
|
478
|
-
i++;
|
479
|
-
continue;
|
480
|
-
}
|
481
|
-
|
482
|
-
if (data[i] == c && !isspace(data[i - 1])) {
|
498
|
+
if (data[i] == c && !_isspace(data[i - 1])) {
|
483
499
|
|
484
500
|
if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
|
485
|
-
if (!(i + 1 == size ||
|
501
|
+
if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
|
486
502
|
continue;
|
487
503
|
}
|
488
504
|
|
@@ -499,9 +515,9 @@ parse_emph1(struct buf *ob, struct render *rndr, char *data, size_t size, char c
|
|
499
515
|
|
500
516
|
/* parse_emph2 • parsing single emphase */
|
501
517
|
static size_t
|
502
|
-
parse_emph2(struct buf *ob, struct
|
518
|
+
parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
|
503
519
|
{
|
504
|
-
int (*render_method)(struct buf *ob, struct buf *text, void *opaque);
|
520
|
+
int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
|
505
521
|
size_t i = 0, len;
|
506
522
|
struct buf *work = 0;
|
507
523
|
int r;
|
@@ -510,13 +526,13 @@ parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c
|
|
510
526
|
|
511
527
|
if (!render_method)
|
512
528
|
return 0;
|
513
|
-
|
529
|
+
|
514
530
|
while (i < size) {
|
515
531
|
len = find_emph_char(data + i, size - i, c);
|
516
532
|
if (!len) return 0;
|
517
533
|
i += len;
|
518
534
|
|
519
|
-
if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !
|
535
|
+
if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
|
520
536
|
work = rndr_newbuf(rndr, BUFFER_SPAN);
|
521
537
|
parse_inline(work, rndr, data, i);
|
522
538
|
r = render_method(ob, work, rndr->opaque);
|
@@ -531,7 +547,7 @@ parse_emph2(struct buf *ob, struct render *rndr, char *data, size_t size, char c
|
|
531
547
|
/* parse_emph3 • parsing single emphase */
|
532
548
|
/* finds the first closing tag, and delegates to the other emph */
|
533
549
|
static size_t
|
534
|
-
parse_emph3(struct buf *ob, struct
|
550
|
+
parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
|
535
551
|
{
|
536
552
|
size_t i = 0, len;
|
537
553
|
int r;
|
@@ -542,7 +558,7 @@ parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c
|
|
542
558
|
i += len;
|
543
559
|
|
544
560
|
/* skip whitespace preceded symbols */
|
545
|
-
if (data[i] != c ||
|
561
|
+
if (data[i] != c || _isspace(data[i - 1]))
|
546
562
|
continue;
|
547
563
|
|
548
564
|
if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
|
@@ -567,46 +583,46 @@ parse_emph3(struct buf *ob, struct render *rndr, char *data, size_t size, char c
|
|
567
583
|
else return len - 1;
|
568
584
|
}
|
569
585
|
}
|
570
|
-
return 0;
|
586
|
+
return 0;
|
571
587
|
}
|
572
588
|
|
573
589
|
/* char_emphasis • single and double emphasis parsing */
|
574
590
|
static size_t
|
575
|
-
char_emphasis(struct buf *ob, struct
|
591
|
+
char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
576
592
|
{
|
577
|
-
|
593
|
+
uint8_t c = data[0];
|
578
594
|
size_t ret;
|
579
595
|
|
580
596
|
if (size > 2 && data[1] != c) {
|
581
597
|
/* whitespace cannot follow an opening emphasis;
|
582
598
|
* strikethrough only takes two characters '~~' */
|
583
|
-
if (c == '~' ||
|
599
|
+
if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
|
584
600
|
return 0;
|
585
601
|
|
586
602
|
return ret + 1;
|
587
603
|
}
|
588
604
|
|
589
605
|
if (size > 3 && data[1] == c && data[2] != c) {
|
590
|
-
if (
|
606
|
+
if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
|
591
607
|
return 0;
|
592
608
|
|
593
609
|
return ret + 2;
|
594
610
|
}
|
595
611
|
|
596
612
|
if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
|
597
|
-
if (c == '~' ||
|
613
|
+
if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
|
598
614
|
return 0;
|
599
615
|
|
600
616
|
return ret + 3;
|
601
617
|
}
|
602
618
|
|
603
|
-
return 0;
|
619
|
+
return 0;
|
604
620
|
}
|
605
621
|
|
606
622
|
|
607
623
|
/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
|
608
624
|
static size_t
|
609
|
-
char_linebreak(struct buf *ob, struct
|
625
|
+
char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
610
626
|
{
|
611
627
|
if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
|
612
628
|
return 0;
|
@@ -621,7 +637,7 @@ char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, s
|
|
621
637
|
|
622
638
|
/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
|
623
639
|
static size_t
|
624
|
-
char_codespan(struct buf *ob, struct
|
640
|
+
char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
625
641
|
{
|
626
642
|
size_t end, nb = 0, i, f_begin, f_end;
|
627
643
|
|
@@ -641,16 +657,16 @@ char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, si
|
|
641
657
|
|
642
658
|
/* trimming outside whitespaces */
|
643
659
|
f_begin = nb;
|
644
|
-
while (f_begin < end &&
|
660
|
+
while (f_begin < end && data[f_begin] == ' ')
|
645
661
|
f_begin++;
|
646
662
|
|
647
663
|
f_end = end - nb;
|
648
|
-
while (f_end > nb &&
|
664
|
+
while (f_end > nb && data[f_end-1] == ' ')
|
649
665
|
f_end--;
|
650
666
|
|
651
667
|
/* real code span */
|
652
668
|
if (f_begin < f_end) {
|
653
|
-
struct buf work = { data + f_begin, f_end - f_begin, 0, 0
|
669
|
+
struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
|
654
670
|
if (!rndr->cb.codespan(ob, &work, rndr->opaque))
|
655
671
|
end = 0;
|
656
672
|
} else {
|
@@ -664,10 +680,10 @@ char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, si
|
|
664
680
|
|
665
681
|
/* char_escape • '\\' backslash escape */
|
666
682
|
static size_t
|
667
|
-
char_escape(struct buf *ob, struct
|
683
|
+
char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
668
684
|
{
|
669
685
|
static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>";
|
670
|
-
struct buf work = { 0, 0, 0, 0
|
686
|
+
struct buf work = { 0, 0, 0, 0 };
|
671
687
|
|
672
688
|
if (size > 1) {
|
673
689
|
if (strchr(escape_chars, data[1]) == NULL)
|
@@ -687,10 +703,10 @@ char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size
|
|
687
703
|
/* char_entity • '&' escaped when it doesn't belong to an entity */
|
688
704
|
/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
|
689
705
|
static size_t
|
690
|
-
char_entity(struct buf *ob, struct
|
706
|
+
char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
691
707
|
{
|
692
708
|
size_t end = 1;
|
693
|
-
struct buf work;
|
709
|
+
struct buf work = { 0, 0, 0, 0 };
|
694
710
|
|
695
711
|
if (end < size && data[end] == '#')
|
696
712
|
end++;
|
@@ -715,11 +731,11 @@ char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size
|
|
715
731
|
|
716
732
|
/* char_langle_tag • '<' when tags or autolinks are allowed */
|
717
733
|
static size_t
|
718
|
-
char_langle_tag(struct buf *ob, struct
|
734
|
+
char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
719
735
|
{
|
720
736
|
enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
|
721
737
|
size_t end = tag_length(data, size, &altype);
|
722
|
-
struct buf work = { data, end, 0, 0
|
738
|
+
struct buf work = { data, end, 0, 0 };
|
723
739
|
int ret = 0;
|
724
740
|
|
725
741
|
if (end > 2) {
|
@@ -740,7 +756,7 @@ char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset,
|
|
740
756
|
}
|
741
757
|
|
742
758
|
static size_t
|
743
|
-
char_autolink_www(struct buf *ob, struct
|
759
|
+
char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
744
760
|
{
|
745
761
|
struct buf *link, *link_url;
|
746
762
|
size_t link_len, rewind;
|
@@ -765,7 +781,7 @@ char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset
|
|
765
781
|
}
|
766
782
|
|
767
783
|
static size_t
|
768
|
-
char_autolink_email(struct buf *ob, struct
|
784
|
+
char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
769
785
|
{
|
770
786
|
struct buf *link;
|
771
787
|
size_t link_len, rewind;
|
@@ -785,7 +801,7 @@ char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offs
|
|
785
801
|
}
|
786
802
|
|
787
803
|
static size_t
|
788
|
-
char_autolink_url(struct buf *ob, struct
|
804
|
+
char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
789
805
|
{
|
790
806
|
struct buf *link;
|
791
807
|
size_t link_len, rewind;
|
@@ -806,7 +822,7 @@ char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset
|
|
806
822
|
|
807
823
|
/* char_link • '[': parsing a link or an image */
|
808
824
|
static size_t
|
809
|
-
char_link(struct buf *ob, struct
|
825
|
+
char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
810
826
|
{
|
811
827
|
int is_img = (offset && data[-1] == '!'), level;
|
812
828
|
size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
|
@@ -847,7 +863,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
847
863
|
|
848
864
|
/* skip any amount of whitespace or newline */
|
849
865
|
/* (this is much more laxist than original markdown syntax) */
|
850
|
-
while (i < size &&
|
866
|
+
while (i < size && _isspace(data[i]))
|
851
867
|
i++;
|
852
868
|
|
853
869
|
/* inline style link */
|
@@ -855,7 +871,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
855
871
|
/* skipping initial whitespace */
|
856
872
|
i++;
|
857
873
|
|
858
|
-
while (i < size &&
|
874
|
+
while (i < size && _isspace(data[i]))
|
859
875
|
i++;
|
860
876
|
|
861
877
|
link_b = i;
|
@@ -885,7 +901,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
885
901
|
|
886
902
|
/* skipping whitespaces after title */
|
887
903
|
title_e = i - 1;
|
888
|
-
while (title_e > title_b &&
|
904
|
+
while (title_e > title_b && _isspace(data[title_e]))
|
889
905
|
title_e--;
|
890
906
|
|
891
907
|
/* checking for closing quote presence */
|
@@ -896,7 +912,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
896
912
|
}
|
897
913
|
|
898
914
|
/* remove whitespace at the end of the link */
|
899
|
-
while (link_e > link_b &&
|
915
|
+
while (link_e > link_b && _isspace(data[link_e - 1]))
|
900
916
|
link_e--;
|
901
917
|
|
902
918
|
/* remove optional angle brackets around the link */
|
@@ -919,7 +935,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
919
935
|
|
920
936
|
/* reference style link */
|
921
937
|
else if (i < size && data[i] == '[') {
|
922
|
-
struct buf id = { 0, 0, 0, 0
|
938
|
+
struct buf id = { 0, 0, 0, 0 };
|
923
939
|
struct link_ref *lr;
|
924
940
|
|
925
941
|
/* looking for the id */
|
@@ -953,8 +969,9 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
953
969
|
id.size = link_e - link_b;
|
954
970
|
}
|
955
971
|
|
956
|
-
lr =
|
957
|
-
if (!lr)
|
972
|
+
lr = find_link_ref(rndr->refs, id.data, id.size);
|
973
|
+
if (!lr)
|
974
|
+
goto cleanup;
|
958
975
|
|
959
976
|
/* keeping link and title from link_ref */
|
960
977
|
link = lr->link;
|
@@ -964,7 +981,7 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
964
981
|
|
965
982
|
/* shortcut reference style link */
|
966
983
|
else {
|
967
|
-
struct buf id = { 0, 0, 0, 0
|
984
|
+
struct buf id = { 0, 0, 0, 0 };
|
968
985
|
struct link_ref *lr;
|
969
986
|
|
970
987
|
/* crafting the id */
|
@@ -987,8 +1004,9 @@ char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t
|
|
987
1004
|
}
|
988
1005
|
|
989
1006
|
/* finding the link_ref */
|
990
|
-
lr =
|
991
|
-
if (!lr)
|
1007
|
+
lr = find_link_ref(rndr->refs, id.data, id.size);
|
1008
|
+
if (!lr)
|
1009
|
+
goto cleanup;
|
992
1010
|
|
993
1011
|
/* keeping link and title from link_ref */
|
994
1012
|
link = lr->link;
|
@@ -1027,7 +1045,7 @@ cleanup:
|
|
1027
1045
|
}
|
1028
1046
|
|
1029
1047
|
static size_t
|
1030
|
-
char_superscript(struct buf *ob, struct
|
1048
|
+
char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
1031
1049
|
{
|
1032
1050
|
size_t sup_start, sup_len;
|
1033
1051
|
struct buf *sup;
|
@@ -1049,7 +1067,7 @@ char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset,
|
|
1049
1067
|
} else {
|
1050
1068
|
sup_start = sup_len = 1;
|
1051
1069
|
|
1052
|
-
while (sup_len < size && !
|
1070
|
+
while (sup_len < size && !_isspace(data[sup_len]))
|
1053
1071
|
sup_len++;
|
1054
1072
|
}
|
1055
1073
|
|
@@ -1070,20 +1088,23 @@ char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset,
|
|
1070
1088
|
|
1071
1089
|
/* is_empty • returns the line length when it is empty, 0 otherwise */
|
1072
1090
|
static size_t
|
1073
|
-
is_empty(
|
1091
|
+
is_empty(uint8_t *data, size_t size)
|
1074
1092
|
{
|
1075
1093
|
size_t i;
|
1094
|
+
|
1076
1095
|
for (i = 0; i < size && data[i] != '\n'; i++)
|
1077
|
-
if (data[i] != ' '
|
1096
|
+
if (data[i] != ' ')
|
1097
|
+
return 0;
|
1098
|
+
|
1078
1099
|
return i + 1;
|
1079
1100
|
}
|
1080
1101
|
|
1081
1102
|
/* is_hrule • returns whether a line is a horizontal rule */
|
1082
1103
|
static int
|
1083
|
-
is_hrule(
|
1104
|
+
is_hrule(uint8_t *data, size_t size)
|
1084
1105
|
{
|
1085
1106
|
size_t i = 0, n = 0;
|
1086
|
-
|
1107
|
+
uint8_t c;
|
1087
1108
|
|
1088
1109
|
/* skipping initial spaces */
|
1089
1110
|
if (size < 3) return 0;
|
@@ -1091,7 +1112,7 @@ is_hrule(char *data, size_t size)
|
|
1091
1112
|
if (data[1] == ' ') { i++;
|
1092
1113
|
if (data[2] == ' ') { i++; } } }
|
1093
1114
|
|
1094
|
-
/* looking at the hrule
|
1115
|
+
/* looking at the hrule uint8_t */
|
1095
1116
|
if (i + 2 >= size
|
1096
1117
|
|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
|
1097
1118
|
return 0;
|
@@ -1100,19 +1121,21 @@ is_hrule(char *data, size_t size)
|
|
1100
1121
|
/* the whole line must be the char or whitespace */
|
1101
1122
|
while (i < size && data[i] != '\n') {
|
1102
1123
|
if (data[i] == c) n++;
|
1103
|
-
else if (data[i] != ' '
|
1124
|
+
else if (data[i] != ' ')
|
1104
1125
|
return 0;
|
1105
|
-
|
1126
|
+
|
1127
|
+
i++;
|
1128
|
+
}
|
1106
1129
|
|
1107
1130
|
return n >= 3;
|
1108
1131
|
}
|
1109
1132
|
|
1110
1133
|
/* check if a line is a code fence; return its size if it is */
|
1111
1134
|
static size_t
|
1112
|
-
is_codefence(
|
1135
|
+
is_codefence(uint8_t *data, size_t size, struct buf *syntax)
|
1113
1136
|
{
|
1114
1137
|
size_t i = 0, n = 0;
|
1115
|
-
|
1138
|
+
uint8_t c;
|
1116
1139
|
|
1117
1140
|
/* skipping initial spaces */
|
1118
1141
|
if (size < 3) return 0;
|
@@ -1120,13 +1143,13 @@ is_codefence(char *data, size_t size, struct buf *syntax)
|
|
1120
1143
|
if (data[1] == ' ') { i++;
|
1121
1144
|
if (data[2] == ' ') { i++; } } }
|
1122
1145
|
|
1123
|
-
/* looking at the hrule
|
1146
|
+
/* looking at the hrule uint8_t */
|
1124
1147
|
if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
|
1125
1148
|
return 0;
|
1126
1149
|
|
1127
1150
|
c = data[i];
|
1128
1151
|
|
1129
|
-
/* the whole line must be the
|
1152
|
+
/* the whole line must be the uint8_t or whitespace */
|
1130
1153
|
while (i < size && data[i] == c) {
|
1131
1154
|
n++; i++;
|
1132
1155
|
}
|
@@ -1137,7 +1160,7 @@ is_codefence(char *data, size_t size, struct buf *syntax)
|
|
1137
1160
|
if (syntax != NULL) {
|
1138
1161
|
size_t syn = 0;
|
1139
1162
|
|
1140
|
-
while (i < size &&
|
1163
|
+
while (i < size && data[i] == ' ')
|
1141
1164
|
i++;
|
1142
1165
|
|
1143
1166
|
syntax->data = data + i;
|
@@ -1154,16 +1177,16 @@ is_codefence(char *data, size_t size, struct buf *syntax)
|
|
1154
1177
|
|
1155
1178
|
/* strip all whitespace at the beginning and the end
|
1156
1179
|
* of the {} block */
|
1157
|
-
while (syn > 0 &&
|
1180
|
+
while (syn > 0 && _isspace(syntax->data[0])) {
|
1158
1181
|
syntax->data++; syn--;
|
1159
1182
|
}
|
1160
1183
|
|
1161
|
-
while (syn > 0 &&
|
1184
|
+
while (syn > 0 && _isspace(syntax->data[syn - 1]))
|
1162
1185
|
syn--;
|
1163
1186
|
|
1164
1187
|
i++;
|
1165
1188
|
} else {
|
1166
|
-
while (i < size && !
|
1189
|
+
while (i < size && !_isspace(data[i])) {
|
1167
1190
|
syn++; i++;
|
1168
1191
|
}
|
1169
1192
|
}
|
@@ -1172,7 +1195,7 @@ is_codefence(char *data, size_t size, struct buf *syntax)
|
|
1172
1195
|
}
|
1173
1196
|
|
1174
1197
|
while (i < size && data[i] != '\n') {
|
1175
|
-
if (!
|
1198
|
+
if (!_isspace(data[i]))
|
1176
1199
|
return 0;
|
1177
1200
|
|
1178
1201
|
i++;
|
@@ -1183,7 +1206,7 @@ is_codefence(char *data, size_t size, struct buf *syntax)
|
|
1183
1206
|
|
1184
1207
|
/* is_atxheader • returns whether the line is a hash-prefixed header */
|
1185
1208
|
static int
|
1186
|
-
is_atxheader(struct
|
1209
|
+
is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1187
1210
|
{
|
1188
1211
|
if (data[0] != '#')
|
1189
1212
|
return 0;
|
@@ -1194,7 +1217,7 @@ is_atxheader(struct render *rndr, char *data, size_t size)
|
|
1194
1217
|
while (level < size && level < 6 && data[level] == '#')
|
1195
1218
|
level++;
|
1196
1219
|
|
1197
|
-
if (level < size && data[level] != ' '
|
1220
|
+
if (level < size && data[level] != ' ')
|
1198
1221
|
return 0;
|
1199
1222
|
}
|
1200
1223
|
|
@@ -1203,92 +1226,126 @@ is_atxheader(struct render *rndr, char *data, size_t size)
|
|
1203
1226
|
|
1204
1227
|
/* is_headerline • returns whether the line is a setext-style hdr underline */
|
1205
1228
|
static int
|
1206
|
-
is_headerline(
|
1229
|
+
is_headerline(uint8_t *data, size_t size)
|
1207
1230
|
{
|
1208
1231
|
size_t i = 0;
|
1209
1232
|
|
1210
1233
|
/* test of level 1 header */
|
1211
1234
|
if (data[i] == '=') {
|
1212
1235
|
for (i = 1; i < size && data[i] == '='; i++);
|
1213
|
-
while (i < size &&
|
1236
|
+
while (i < size && data[i] == ' ') i++;
|
1214
1237
|
return (i >= size || data[i] == '\n') ? 1 : 0; }
|
1215
1238
|
|
1216
1239
|
/* test of level 2 header */
|
1217
1240
|
if (data[i] == '-') {
|
1218
1241
|
for (i = 1; i < size && data[i] == '-'; i++);
|
1219
|
-
while (i < size &&
|
1242
|
+
while (i < size && data[i] == ' ') i++;
|
1220
1243
|
return (i >= size || data[i] == '\n') ? 2 : 0; }
|
1221
1244
|
|
1222
1245
|
return 0;
|
1223
1246
|
}
|
1224
1247
|
|
1248
|
+
static int
|
1249
|
+
is_next_headerline(uint8_t *data, size_t size)
|
1250
|
+
{
|
1251
|
+
size_t i = 0;
|
1252
|
+
|
1253
|
+
while (i < size && data[i] != '\n')
|
1254
|
+
i++;
|
1255
|
+
|
1256
|
+
if (++i >= size)
|
1257
|
+
return 0;
|
1258
|
+
|
1259
|
+
return is_headerline(data + i, size - i);
|
1260
|
+
}
|
1261
|
+
|
1225
1262
|
/* prefix_quote • returns blockquote prefix length */
|
1226
1263
|
static size_t
|
1227
|
-
prefix_quote(
|
1264
|
+
prefix_quote(uint8_t *data, size_t size)
|
1228
1265
|
{
|
1229
1266
|
size_t i = 0;
|
1230
1267
|
if (i < size && data[i] == ' ') i++;
|
1231
1268
|
if (i < size && data[i] == ' ') i++;
|
1232
1269
|
if (i < size && data[i] == ' ') i++;
|
1270
|
+
|
1233
1271
|
if (i < size && data[i] == '>') {
|
1234
|
-
if (i + 1 < size &&
|
1272
|
+
if (i + 1 < size && data[i + 1] == ' ')
|
1235
1273
|
return i + 2;
|
1236
|
-
|
1237
|
-
|
1274
|
+
|
1275
|
+
return i + 1;
|
1276
|
+
}
|
1277
|
+
|
1278
|
+
return 0;
|
1238
1279
|
}
|
1239
1280
|
|
1240
1281
|
/* prefix_code • returns prefix length for block code*/
|
1241
1282
|
static size_t
|
1242
|
-
prefix_code(
|
1283
|
+
prefix_code(uint8_t *data, size_t size)
|
1243
1284
|
{
|
1244
|
-
if (size > 0 && data[0] == '\t') return 1;
|
1245
1285
|
if (size > 3 && data[0] == ' ' && data[1] == ' '
|
1246
|
-
|
1286
|
+
&& data[2] == ' ' && data[3] == ' ') return 4;
|
1287
|
+
|
1247
1288
|
return 0;
|
1248
1289
|
}
|
1249
1290
|
|
1250
1291
|
/* prefix_oli • returns ordered list item prefix */
|
1251
1292
|
static size_t
|
1252
|
-
prefix_oli(
|
1293
|
+
prefix_oli(uint8_t *data, size_t size)
|
1253
1294
|
{
|
1254
1295
|
size_t i = 0;
|
1296
|
+
|
1255
1297
|
if (i < size && data[i] == ' ') i++;
|
1256
1298
|
if (i < size && data[i] == ' ') i++;
|
1257
1299
|
if (i < size && data[i] == ' ') i++;
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1300
|
+
|
1301
|
+
if (i >= size || data[i] < '0' || data[i] > '9')
|
1302
|
+
return 0;
|
1303
|
+
|
1304
|
+
while (i < size && data[i] >= '0' && data[i] <= '9')
|
1305
|
+
i++;
|
1306
|
+
|
1307
|
+
if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
|
1308
|
+
return 0;
|
1309
|
+
|
1310
|
+
if (is_next_headerline(data + i, size - i))
|
1311
|
+
return 0;
|
1312
|
+
|
1262
1313
|
return i + 2;
|
1263
1314
|
}
|
1264
1315
|
|
1265
1316
|
/* prefix_uli • returns ordered list item prefix */
|
1266
1317
|
static size_t
|
1267
|
-
prefix_uli(
|
1318
|
+
prefix_uli(uint8_t *data, size_t size)
|
1268
1319
|
{
|
1269
1320
|
size_t i = 0;
|
1321
|
+
|
1270
1322
|
if (i < size && data[i] == ' ') i++;
|
1271
1323
|
if (i < size && data[i] == ' ') i++;
|
1272
1324
|
if (i < size && data[i] == ' ') i++;
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1325
|
+
|
1326
|
+
if (i + 1 >= size ||
|
1327
|
+
(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
|
1328
|
+
data[i + 1] != ' ')
|
1329
|
+
return 0;
|
1330
|
+
|
1331
|
+
if (is_next_headerline(data + i, size - i))
|
1276
1332
|
return 0;
|
1333
|
+
|
1277
1334
|
return i + 2;
|
1278
1335
|
}
|
1279
1336
|
|
1280
1337
|
|
1281
|
-
/* parse_block • parsing of one block, returning next
|
1282
|
-
static void parse_block(struct buf *ob, struct
|
1283
|
-
|
1338
|
+
/* parse_block • parsing of one block, returning next uint8_t to parse */
|
1339
|
+
static void parse_block(struct buf *ob, struct sd_markdown *rndr,
|
1340
|
+
uint8_t *data, size_t size);
|
1284
1341
|
|
1285
1342
|
|
1286
1343
|
/* parse_blockquote • handles parsing of a blockquote fragment */
|
1287
1344
|
static size_t
|
1288
|
-
parse_blockquote(struct buf *ob, struct
|
1345
|
+
parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1289
1346
|
{
|
1290
1347
|
size_t beg, end = 0, pre, work_size = 0;
|
1291
|
-
|
1348
|
+
uint8_t *work_data = 0;
|
1292
1349
|
struct buf *out = 0;
|
1293
1350
|
|
1294
1351
|
out = rndr_newbuf(rndr, BUFFER_BLOCK);
|
@@ -1326,15 +1383,15 @@ parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1326
1383
|
}
|
1327
1384
|
|
1328
1385
|
static size_t
|
1329
|
-
parse_htmlblock(struct buf *ob, struct
|
1386
|
+
parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
|
1330
1387
|
|
1331
1388
|
/* parse_blockquote • handles parsing of a regular paragraph */
|
1332
1389
|
static size_t
|
1333
|
-
parse_paragraph(struct buf *ob, struct
|
1390
|
+
parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1334
1391
|
{
|
1335
1392
|
size_t i = 0, end = 0;
|
1336
1393
|
int level = 0;
|
1337
|
-
struct buf work = { data, 0, 0, 0
|
1394
|
+
struct buf work = { data, 0, 0, 0 };
|
1338
1395
|
|
1339
1396
|
while (i < size) {
|
1340
1397
|
for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
|
@@ -1410,11 +1467,11 @@ parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1410
1467
|
|
1411
1468
|
/* parse_fencedcode • handles parsing of a block-level code fragment */
|
1412
1469
|
static size_t
|
1413
|
-
parse_fencedcode(struct buf *ob, struct
|
1470
|
+
parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1414
1471
|
{
|
1415
1472
|
size_t beg, end;
|
1416
1473
|
struct buf *work = 0;
|
1417
|
-
struct buf lang = { 0, 0, 0, 0
|
1474
|
+
struct buf lang = { 0, 0, 0, 0 };
|
1418
1475
|
|
1419
1476
|
beg = is_codefence(data, size, &lang);
|
1420
1477
|
if (beg == 0) return 0;
|
@@ -1453,7 +1510,7 @@ parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1453
1510
|
}
|
1454
1511
|
|
1455
1512
|
static size_t
|
1456
|
-
parse_blockcode(struct buf *ob, struct
|
1513
|
+
parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1457
1514
|
{
|
1458
1515
|
size_t beg, end, pre;
|
1459
1516
|
struct buf *work = 0;
|
@@ -1496,7 +1553,7 @@ parse_blockcode(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1496
1553
|
/* parse_listitem • parsing of a single list item */
|
1497
1554
|
/* assuming initial prefix is already removed */
|
1498
1555
|
static size_t
|
1499
|
-
parse_listitem(struct buf *ob, struct
|
1556
|
+
parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
|
1500
1557
|
{
|
1501
1558
|
struct buf *work = 0, *inter = 0;
|
1502
1559
|
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
|
@@ -1546,7 +1603,6 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
|
|
1546
1603
|
i++;
|
1547
1604
|
|
1548
1605
|
pre = i;
|
1549
|
-
if (data[beg] == '\t') { i = 1; pre = 8; }
|
1550
1606
|
|
1551
1607
|
/* checking for a new item */
|
1552
1608
|
if ((prefix_uli(data + beg + i, end - beg - i) &&
|
@@ -1562,7 +1618,7 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
|
|
1562
1618
|
sublist = work->size;
|
1563
1619
|
}
|
1564
1620
|
/* joining only indented stuff after empty lines */
|
1565
|
-
else if (in_empty && i < 4
|
1621
|
+
else if (in_empty && i < 4) {
|
1566
1622
|
*flags |= MKD_LI_END;
|
1567
1623
|
break;
|
1568
1624
|
}
|
@@ -1586,7 +1642,7 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
|
|
1586
1642
|
/* intermediate render of block li */
|
1587
1643
|
if (sublist && sublist < work->size) {
|
1588
1644
|
parse_block(inter, rndr, work->data, sublist);
|
1589
|
-
parse_block(inter, rndr, work->data + sublist, work->size - sublist);
|
1645
|
+
parse_block(inter, rndr, work->data + sublist, work->size - sublist);
|
1590
1646
|
}
|
1591
1647
|
else
|
1592
1648
|
parse_block(inter, rndr, work->data, work->size);
|
@@ -1612,7 +1668,7 @@ parse_listitem(struct buf *ob, struct render *rndr, char *data, size_t size, int
|
|
1612
1668
|
|
1613
1669
|
/* parse_list • parsing ordered or unordered list block */
|
1614
1670
|
static size_t
|
1615
|
-
parse_list(struct buf *ob, struct
|
1671
|
+
parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
|
1616
1672
|
{
|
1617
1673
|
struct buf *work = 0;
|
1618
1674
|
size_t i = 0, j;
|
@@ -1635,7 +1691,7 @@ parse_list(struct buf *ob, struct render *rndr, char *data, size_t size, int fla
|
|
1635
1691
|
|
1636
1692
|
/* parse_atxheader • parsing of atx-style headers */
|
1637
1693
|
static size_t
|
1638
|
-
parse_atxheader(struct buf *ob, struct
|
1694
|
+
parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1639
1695
|
{
|
1640
1696
|
size_t level = 0;
|
1641
1697
|
size_t i, end, skip;
|
@@ -1643,7 +1699,7 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1643
1699
|
while (level < size && level < 6 && data[level] == '#')
|
1644
1700
|
level++;
|
1645
1701
|
|
1646
|
-
for (i = level; i < size &&
|
1702
|
+
for (i = level; i < size && data[i] == ' '; i++);
|
1647
1703
|
|
1648
1704
|
for (end = i; end < size && data[end] != '\n'; end++);
|
1649
1705
|
skip = end;
|
@@ -1651,7 +1707,7 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1651
1707
|
while (end && data[end - 1] == '#')
|
1652
1708
|
end--;
|
1653
1709
|
|
1654
|
-
while (end &&
|
1710
|
+
while (end && data[end - 1] == ' ')
|
1655
1711
|
end--;
|
1656
1712
|
|
1657
1713
|
if (end > i) {
|
@@ -1672,20 +1728,18 @@ parse_atxheader(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1672
1728
|
/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
|
1673
1729
|
/* returns the length on match, 0 otherwise */
|
1674
1730
|
static size_t
|
1675
|
-
htmlblock_end(
|
1731
|
+
htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1676
1732
|
{
|
1677
1733
|
size_t i, w;
|
1678
1734
|
|
1679
|
-
/* assuming data[0] == '<' && data[1] == '/' already tested */
|
1680
|
-
|
1681
1735
|
/* checking if tag is a match */
|
1682
|
-
if (
|
1683
|
-
|
1684
|
-
|
1736
|
+
if (tag_len + 3 >= size ||
|
1737
|
+
strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
|
1738
|
+
data[tag_len + 2] != '>')
|
1685
1739
|
return 0;
|
1686
1740
|
|
1687
1741
|
/* checking white lines */
|
1688
|
-
i =
|
1742
|
+
i = tag_len + 3;
|
1689
1743
|
w = 0;
|
1690
1744
|
if (i < size && (w = is_empty(data + i, size - i)) == 0)
|
1691
1745
|
return 0; /* non-blank after tag */
|
@@ -1706,16 +1760,25 @@ htmlblock_end(struct html_tag *tag, struct render *rndr, char *data, size_t size
|
|
1706
1760
|
|
1707
1761
|
/* parse_htmlblock • parsing of inline HTML block */
|
1708
1762
|
static size_t
|
1709
|
-
parse_htmlblock(struct buf *ob, struct
|
1763
|
+
parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
|
1710
1764
|
{
|
1711
1765
|
size_t i, j = 0;
|
1712
|
-
|
1766
|
+
const char *curtag = NULL;
|
1713
1767
|
int found;
|
1714
|
-
struct buf work = { data, 0, 0, 0
|
1768
|
+
struct buf work = { data, 0, 0, 0 };
|
1715
1769
|
|
1716
1770
|
/* identification of the opening tag */
|
1717
|
-
if (size < 2 || data[0] != '<')
|
1718
|
-
|
1771
|
+
if (size < 2 || data[0] != '<')
|
1772
|
+
return 0;
|
1773
|
+
|
1774
|
+
i = 1;
|
1775
|
+
|
1776
|
+
/* look for the closing `>` in the opening tag */
|
1777
|
+
while (i < size && data[i] != '>' && data[i] != ' ')
|
1778
|
+
i++;
|
1779
|
+
|
1780
|
+
if (i < size && data[i] == '>')
|
1781
|
+
curtag = find_block_tag((char *)data + 1, i - 1);
|
1719
1782
|
|
1720
1783
|
/* handling of special cases */
|
1721
1784
|
if (!curtag) {
|
@@ -1737,7 +1800,7 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
|
|
1737
1800
|
if (do_render && rndr->cb.blockhtml)
|
1738
1801
|
rndr->cb.blockhtml(ob, &work, rndr->opaque);
|
1739
1802
|
return work.size;
|
1740
|
-
}
|
1803
|
+
}
|
1741
1804
|
}
|
1742
1805
|
|
1743
1806
|
/* HR, which is the only self-closing block tag considered */
|
@@ -1755,7 +1818,7 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
|
|
1755
1818
|
rndr->cb.blockhtml(ob, &work, rndr->opaque);
|
1756
1819
|
return work.size;
|
1757
1820
|
}
|
1758
|
-
}
|
1821
|
+
}
|
1759
1822
|
}
|
1760
1823
|
|
1761
1824
|
/* no special case recognised */
|
@@ -1769,24 +1832,25 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
|
|
1769
1832
|
|
1770
1833
|
/* if not found, trying a second pass looking for indented match */
|
1771
1834
|
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
|
1772
|
-
if (curtag !=
|
1835
|
+
if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
|
1836
|
+
size_t tag_size = strlen(curtag);
|
1773
1837
|
i = 1;
|
1774
1838
|
while (i < size) {
|
1775
1839
|
i++;
|
1776
1840
|
while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
|
1777
1841
|
i++;
|
1778
1842
|
|
1779
|
-
if (i + 2 +
|
1843
|
+
if (i + 2 + tag_size >= size)
|
1780
1844
|
break;
|
1781
1845
|
|
1782
|
-
j = htmlblock_end(curtag, rndr, data + i - 1, size - i + 1);
|
1846
|
+
j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
|
1783
1847
|
|
1784
1848
|
if (j) {
|
1785
1849
|
i += j - 1;
|
1786
1850
|
found = 1;
|
1787
1851
|
break;
|
1788
1852
|
}
|
1789
|
-
}
|
1853
|
+
}
|
1790
1854
|
}
|
1791
1855
|
|
1792
1856
|
if (!found) return 0;
|
@@ -1800,11 +1864,14 @@ parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, in
|
|
1800
1864
|
}
|
1801
1865
|
|
1802
1866
|
static void
|
1803
|
-
parse_table_row(struct buf *ob, struct
|
1867
|
+
parse_table_row(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, size_t columns, int *col_data)
|
1804
1868
|
{
|
1805
1869
|
size_t i = 0, col;
|
1806
1870
|
struct buf *row_work = 0;
|
1807
1871
|
|
1872
|
+
if (!rndr->cb.table_cell || !rndr->cb.table_row)
|
1873
|
+
return;
|
1874
|
+
|
1808
1875
|
row_work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1809
1876
|
|
1810
1877
|
if (i < size && data[i] == '|')
|
@@ -1816,7 +1883,7 @@ parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, si
|
|
1816
1883
|
|
1817
1884
|
cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1818
1885
|
|
1819
|
-
while (i < size &&
|
1886
|
+
while (i < size && _isspace(data[i]))
|
1820
1887
|
i++;
|
1821
1888
|
|
1822
1889
|
cell_start = i;
|
@@ -1826,31 +1893,28 @@ parse_table_row(struct buf *ob, struct render *rndr, char *data, size_t size, si
|
|
1826
1893
|
|
1827
1894
|
cell_end = i - 1;
|
1828
1895
|
|
1829
|
-
while (cell_end > cell_start &&
|
1896
|
+
while (cell_end > cell_start && _isspace(data[cell_end]))
|
1830
1897
|
cell_end--;
|
1831
1898
|
|
1832
1899
|
parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
|
1833
|
-
|
1834
|
-
rndr->cb.table_cell(row_work, cell_work, col_data ? col_data[col] : 0, rndr->opaque);
|
1900
|
+
rndr->cb.table_cell(row_work, cell_work, col_data[col], rndr->opaque);
|
1835
1901
|
|
1836
1902
|
rndr_popbuf(rndr, BUFFER_SPAN);
|
1837
1903
|
i++;
|
1838
1904
|
}
|
1839
1905
|
|
1840
1906
|
for (; col < columns; ++col) {
|
1841
|
-
struct buf empty_cell = {0, 0, 0, 0
|
1842
|
-
|
1843
|
-
rndr->cb.table_cell(row_work, &empty_cell, col_data ? col_data[col] : 0, rndr->opaque);
|
1907
|
+
struct buf empty_cell = { 0, 0, 0, 0 };
|
1908
|
+
rndr->cb.table_cell(row_work, &empty_cell, col_data[col], rndr->opaque);
|
1844
1909
|
}
|
1845
1910
|
|
1846
|
-
|
1847
|
-
rndr->cb.table_row(ob, row_work, rndr->opaque);
|
1911
|
+
rndr->cb.table_row(ob, row_work, rndr->opaque);
|
1848
1912
|
|
1849
1913
|
rndr_popbuf(rndr, BUFFER_SPAN);
|
1850
1914
|
}
|
1851
1915
|
|
1852
1916
|
static size_t
|
1853
|
-
parse_table_header(struct buf *ob, struct
|
1917
|
+
parse_table_header(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, size_t *columns, int **column_data)
|
1854
1918
|
{
|
1855
1919
|
int pipes;
|
1856
1920
|
size_t i = 0, col, header_end, under_end;
|
@@ -1886,7 +1950,9 @@ parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size,
|
|
1886
1950
|
for (col = 0; col < *columns && i < under_end; ++col) {
|
1887
1951
|
size_t dashes = 0;
|
1888
1952
|
|
1889
|
-
|
1953
|
+
(*column_data)[col] |= MKD_TABLE_HEADER;
|
1954
|
+
|
1955
|
+
while (i < under_end && data[i] == ' ')
|
1890
1956
|
i++;
|
1891
1957
|
|
1892
1958
|
if (data[i] == ':') {
|
@@ -1903,7 +1969,7 @@ parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size,
|
|
1903
1969
|
dashes++;
|
1904
1970
|
}
|
1905
1971
|
|
1906
|
-
while (i < under_end &&
|
1972
|
+
while (i < under_end && data[i] == ' ')
|
1907
1973
|
i++;
|
1908
1974
|
|
1909
1975
|
if (i < under_end && data[i] != '|')
|
@@ -1923,7 +1989,7 @@ parse_table_header(struct buf *ob, struct render *rndr, char *data, size_t size,
|
|
1923
1989
|
}
|
1924
1990
|
|
1925
1991
|
static size_t
|
1926
|
-
parse_table(struct buf *ob, struct
|
1992
|
+
parse_table(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1927
1993
|
{
|
1928
1994
|
size_t i;
|
1929
1995
|
|
@@ -1968,16 +2034,16 @@ parse_table(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
1968
2034
|
return i;
|
1969
2035
|
}
|
1970
2036
|
|
1971
|
-
/* parse_block • parsing of one block, returning next
|
2037
|
+
/* parse_block • parsing of one block, returning next uint8_t to parse */
|
1972
2038
|
static void
|
1973
|
-
parse_block(struct buf *ob, struct
|
2039
|
+
parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1974
2040
|
{
|
1975
2041
|
size_t beg, end, i;
|
1976
|
-
|
2042
|
+
uint8_t *txt_data;
|
1977
2043
|
beg = 0;
|
1978
2044
|
|
1979
2045
|
if (rndr->work_bufs[BUFFER_SPAN].size +
|
1980
|
-
rndr->work_bufs[BUFFER_BLOCK].size >
|
2046
|
+
rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
|
1981
2047
|
return;
|
1982
2048
|
|
1983
2049
|
while (beg < size) {
|
@@ -2037,7 +2103,7 @@ parse_block(struct buf *ob, struct render *rndr, char *data, size_t size)
|
|
2037
2103
|
|
2038
2104
|
/* is_ref • returns whether a line is a reference or not */
|
2039
2105
|
static int
|
2040
|
-
is_ref(
|
2106
|
+
is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
|
2041
2107
|
{
|
2042
2108
|
/* int n; */
|
2043
2109
|
size_t i = 0;
|
@@ -2045,8 +2111,6 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
|
|
2045
2111
|
size_t link_offset, link_end;
|
2046
2112
|
size_t title_offset, title_end;
|
2047
2113
|
size_t line_end;
|
2048
|
-
struct link_ref *lr;
|
2049
|
-
/* struct buf id = { 0, 0, 0, 0, 0 }; / * volatile buf for id search */
|
2050
2114
|
|
2051
2115
|
/* up to 3 optional leading spaces */
|
2052
2116
|
if (beg + 3 >= end) return 0;
|
@@ -2069,23 +2133,27 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
|
|
2069
2133
|
i++;
|
2070
2134
|
if (i >= end || data[i] != ':') return 0;
|
2071
2135
|
i++;
|
2072
|
-
while (i < end &&
|
2136
|
+
while (i < end && data[i] == ' ') i++;
|
2073
2137
|
if (i < end && (data[i] == '\n' || data[i] == '\r')) {
|
2074
2138
|
i++;
|
2075
2139
|
if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
|
2076
|
-
while (i < end &&
|
2140
|
+
while (i < end && data[i] == ' ') i++;
|
2077
2141
|
if (i >= end) return 0;
|
2078
2142
|
|
2079
2143
|
/* link: whitespace-free sequence, optionally between angle brackets */
|
2080
|
-
if (data[i] == '<')
|
2144
|
+
if (data[i] == '<')
|
2145
|
+
i++;
|
2146
|
+
|
2081
2147
|
link_offset = i;
|
2082
|
-
|
2083
|
-
|
2148
|
+
|
2149
|
+
while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
|
2150
|
+
i++;
|
2151
|
+
|
2084
2152
|
if (data[i - 1] == '>') link_end = i - 1;
|
2085
2153
|
else link_end = i;
|
2086
2154
|
|
2087
2155
|
/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
|
2088
|
-
while (i < end &&
|
2156
|
+
while (i < end && data[i] == ' ') i++;
|
2089
2157
|
if (i < end && data[i] != '\n' && data[i] != '\r'
|
2090
2158
|
&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
|
2091
2159
|
return 0;
|
@@ -2098,7 +2166,7 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
|
|
2098
2166
|
/* optional (space|tab)* spacer after a newline */
|
2099
2167
|
if (line_end) {
|
2100
2168
|
i = line_end + 1;
|
2101
|
-
while (i < end &&
|
2169
|
+
while (i < end && data[i] == ' ') i++; }
|
2102
2170
|
|
2103
2171
|
/* optional title: any non-newline sequence enclosed in '"()
|
2104
2172
|
alone on its line */
|
@@ -2114,31 +2182,38 @@ is_ref(char *data, size_t beg, size_t end, size_t *last, struct array *refs)
|
|
2114
2182
|
else title_end = i;
|
2115
2183
|
/* stepping back */
|
2116
2184
|
i -= 1;
|
2117
|
-
while (i > title_offset &&
|
2185
|
+
while (i > title_offset && data[i] == ' ')
|
2118
2186
|
i -= 1;
|
2119
2187
|
if (i > title_offset
|
2120
2188
|
&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
|
2121
2189
|
line_end = title_end;
|
2122
2190
|
title_end = i; } }
|
2123
|
-
|
2191
|
+
|
2192
|
+
if (!line_end)
|
2193
|
+
return 0; /* garbage after the link */
|
2124
2194
|
|
2125
2195
|
/* a valid ref has been found, filling-in return structures */
|
2126
|
-
if (last)
|
2127
|
-
|
2128
|
-
|
2129
|
-
|
2130
|
-
|
2131
|
-
|
2132
|
-
|
2133
|
-
|
2134
|
-
|
2135
|
-
bufput(
|
2136
|
-
|
2137
|
-
|
2138
|
-
|
2196
|
+
if (last)
|
2197
|
+
*last = line_end;
|
2198
|
+
|
2199
|
+
if (refs) {
|
2200
|
+
struct link_ref *ref;
|
2201
|
+
|
2202
|
+
ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
|
2203
|
+
|
2204
|
+
ref->link = bufnew(link_end - link_offset);
|
2205
|
+
bufput(ref->link, data + link_offset, link_end - link_offset);
|
2206
|
+
|
2207
|
+
if (title_end > title_offset) {
|
2208
|
+
ref->title = bufnew(title_end - title_offset);
|
2209
|
+
bufput(ref->title, data + title_offset, title_end - title_offset);
|
2210
|
+
}
|
2211
|
+
}
|
2212
|
+
|
2213
|
+
return 1;
|
2139
2214
|
}
|
2140
2215
|
|
2141
|
-
static void expand_tabs(struct buf *ob, const
|
2216
|
+
static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
|
2142
2217
|
{
|
2143
2218
|
size_t i = 0, tab = 0;
|
2144
2219
|
|
@@ -2167,93 +2242,100 @@ static void expand_tabs(struct buf *ob, const char *line, size_t size)
|
|
2167
2242
|
* EXPORTED FUNCTIONS *
|
2168
2243
|
**********************/
|
2169
2244
|
|
2170
|
-
|
2171
|
-
|
2172
|
-
sd_markdown(struct buf *ob,
|
2173
|
-
const struct buf *ib,
|
2245
|
+
struct sd_markdown *
|
2246
|
+
sd_markdown_new(
|
2174
2247
|
unsigned int extensions,
|
2248
|
+
size_t max_nesting,
|
2175
2249
|
const struct sd_callbacks *callbacks,
|
2176
|
-
void *opaque)
|
2177
|
-
|
2178
|
-
|
2179
|
-
|
2180
|
-
struct link_ref *lr;
|
2181
|
-
struct buf *text;
|
2182
|
-
size_t i, beg, end;
|
2183
|
-
struct render rndr;
|
2184
|
-
|
2185
|
-
/* filling the render structure */
|
2186
|
-
if (!callbacks)
|
2187
|
-
return;
|
2250
|
+
void *opaque)
|
2251
|
+
{
|
2252
|
+
struct sd_markdown *md = NULL;
|
2188
2253
|
|
2189
|
-
|
2190
|
-
if (!text)
|
2191
|
-
return;
|
2254
|
+
assert(max_nesting > 0 && callbacks);
|
2192
2255
|
|
2193
|
-
|
2194
|
-
|
2256
|
+
md = malloc(sizeof(struct sd_markdown));
|
2257
|
+
if (!md)
|
2258
|
+
return NULL;
|
2195
2259
|
|
2196
|
-
memcpy(&
|
2197
|
-
arr_init(&rndr.refs, sizeof (struct link_ref));
|
2198
|
-
parr_init(&rndr.work_bufs[BUFFER_BLOCK]);
|
2199
|
-
parr_init(&rndr.work_bufs[BUFFER_SPAN]);
|
2260
|
+
memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
|
2200
2261
|
|
2201
|
-
|
2202
|
-
|
2262
|
+
stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
|
2263
|
+
stack_init(&md->work_bufs[BUFFER_SPAN], 8);
|
2203
2264
|
|
2204
|
-
memset(
|
2265
|
+
memset(md->active_char, 0x0, 256);
|
2205
2266
|
|
2206
|
-
if (
|
2207
|
-
|
2208
|
-
|
2267
|
+
if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
|
2268
|
+
md->active_char['*'] = MD_CHAR_EMPHASIS;
|
2269
|
+
md->active_char['_'] = MD_CHAR_EMPHASIS;
|
2209
2270
|
if (extensions & MKDEXT_STRIKETHROUGH)
|
2210
|
-
|
2271
|
+
md->active_char['~'] = MD_CHAR_EMPHASIS;
|
2211
2272
|
}
|
2212
2273
|
|
2213
|
-
if (
|
2214
|
-
|
2274
|
+
if (md->cb.codespan)
|
2275
|
+
md->active_char['`'] = MD_CHAR_CODESPAN;
|
2215
2276
|
|
2216
|
-
if (
|
2217
|
-
|
2277
|
+
if (md->cb.linebreak)
|
2278
|
+
md->active_char['\n'] = MD_CHAR_LINEBREAK;
|
2218
2279
|
|
2219
|
-
if (
|
2220
|
-
|
2280
|
+
if (md->cb.image || md->cb.link)
|
2281
|
+
md->active_char['['] = MD_CHAR_LINK;
|
2221
2282
|
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2283
|
+
md->active_char['<'] = MD_CHAR_LANGLE;
|
2284
|
+
md->active_char['\\'] = MD_CHAR_ESCAPE;
|
2285
|
+
md->active_char['&'] = MD_CHAR_ENTITITY;
|
2225
2286
|
|
2226
2287
|
if (extensions & MKDEXT_AUTOLINK) {
|
2227
|
-
|
2228
|
-
|
2229
|
-
|
2288
|
+
md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
|
2289
|
+
md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
|
2290
|
+
md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
|
2230
2291
|
}
|
2231
2292
|
|
2232
2293
|
if (extensions & MKDEXT_SUPERSCRIPT)
|
2233
|
-
|
2294
|
+
md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
|
2234
2295
|
|
2235
2296
|
/* Extension data */
|
2236
|
-
|
2237
|
-
|
2238
|
-
|
2297
|
+
md->ext_flags = extensions;
|
2298
|
+
md->opaque = opaque;
|
2299
|
+
md->max_nesting = max_nesting;
|
2300
|
+
|
2301
|
+
return md;
|
2302
|
+
}
|
2303
|
+
|
2304
|
+
void
|
2305
|
+
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
|
2306
|
+
{
|
2307
|
+
static const float MARKDOWN_GROW_FACTOR = 1.4f;
|
2308
|
+
|
2309
|
+
struct buf *text;
|
2310
|
+
size_t beg, end;
|
2311
|
+
|
2312
|
+
text = bufnew(64);
|
2313
|
+
if (!text)
|
2314
|
+
return;
|
2315
|
+
|
2316
|
+
/* Preallocate enough space for our buffer to avoid expanding while copying */
|
2317
|
+
bufgrow(text, doc_size);
|
2318
|
+
|
2319
|
+
/* reset the references table */
|
2320
|
+
memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
|
2239
2321
|
|
2240
2322
|
/* first pass: looking for references, copying everything else */
|
2241
2323
|
beg = 0;
|
2242
|
-
while (beg <
|
2243
|
-
if (is_ref(
|
2324
|
+
while (beg < doc_size) /* iterating over lines */
|
2325
|
+
if (is_ref(document, beg, doc_size, &end, md->refs))
|
2244
2326
|
beg = end;
|
2245
2327
|
else { /* skipping to the next line */
|
2246
2328
|
end = beg;
|
2247
|
-
while (end <
|
2329
|
+
while (end < doc_size && document[end] != '\n' && document[end] != '\r')
|
2248
2330
|
end++;
|
2249
2331
|
|
2250
2332
|
/* adding the line body if present */
|
2251
2333
|
if (end > beg)
|
2252
|
-
expand_tabs(text,
|
2334
|
+
expand_tabs(text, document + beg, end - beg);
|
2253
2335
|
|
2254
|
-
while (end <
|
2336
|
+
while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
|
2255
2337
|
/* add one \n per newline */
|
2256
|
-
if (
|
2338
|
+
if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
|
2257
2339
|
bufputc(text, '\n');
|
2258
2340
|
end++;
|
2259
2341
|
}
|
@@ -2261,50 +2343,47 @@ sd_markdown(struct buf *ob,
|
|
2261
2343
|
beg = end;
|
2262
2344
|
}
|
2263
2345
|
|
2264
|
-
/* sorting the reference array */
|
2265
|
-
if (rndr.refs.size)
|
2266
|
-
qsort(rndr.refs.base, rndr.refs.size, rndr.refs.unit, cmp_link_ref_sort);
|
2267
|
-
|
2268
2346
|
/* pre-grow the output buffer to minimize allocations */
|
2269
2347
|
bufgrow(ob, text->size * MARKDOWN_GROW_FACTOR);
|
2270
2348
|
|
2271
2349
|
/* second pass: actual rendering */
|
2272
|
-
if (
|
2273
|
-
|
2350
|
+
if (md->cb.doc_header)
|
2351
|
+
md->cb.doc_header(ob, md->opaque);
|
2274
2352
|
|
2275
2353
|
if (text->size) {
|
2276
2354
|
/* adding a final newline if not already present */
|
2277
2355
|
if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
|
2278
2356
|
bufputc(text, '\n');
|
2279
2357
|
|
2280
|
-
parse_block(ob,
|
2358
|
+
parse_block(ob, md, text->data, text->size);
|
2281
2359
|
}
|
2282
2360
|
|
2283
|
-
if (
|
2284
|
-
|
2361
|
+
if (md->cb.doc_footer)
|
2362
|
+
md->cb.doc_footer(ob, md->opaque);
|
2285
2363
|
|
2286
2364
|
/* clean-up */
|
2287
2365
|
bufrelease(text);
|
2288
|
-
|
2289
|
-
for (i = 0; i < (size_t)rndr.refs.size; i++) {
|
2290
|
-
bufrelease(lr[i].id);
|
2291
|
-
bufrelease(lr[i].link);
|
2292
|
-
bufrelease(lr[i].title);
|
2293
|
-
}
|
2366
|
+
free_link_refs(md->refs);
|
2294
2367
|
|
2295
|
-
|
2368
|
+
assert(md->work_bufs[BUFFER_SPAN].size == 0);
|
2369
|
+
assert(md->work_bufs[BUFFER_BLOCK].size == 0);
|
2370
|
+
}
|
2371
|
+
|
2372
|
+
void
|
2373
|
+
sd_markdown_free(struct sd_markdown *md)
|
2374
|
+
{
|
2375
|
+
size_t i;
|
2296
2376
|
|
2297
|
-
|
2298
|
-
|
2377
|
+
for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
|
2378
|
+
bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
|
2299
2379
|
|
2300
|
-
for (i = 0; i < (size_t)
|
2301
|
-
bufrelease(
|
2380
|
+
for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
|
2381
|
+
bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
|
2302
2382
|
|
2303
|
-
|
2304
|
-
|
2383
|
+
stack_free(&md->work_bufs[BUFFER_SPAN]);
|
2384
|
+
stack_free(&md->work_bufs[BUFFER_BLOCK]);
|
2305
2385
|
|
2306
|
-
|
2307
|
-
parr_free(&rndr.work_bufs[BUFFER_BLOCK]);
|
2386
|
+
free(md);
|
2308
2387
|
}
|
2309
2388
|
|
2310
2389
|
void
|