github-markdown 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +87 -0
- data/bin/gfm +26 -0
- data/ext/markdown/autolink.c +264 -0
- data/ext/markdown/autolink.h +36 -0
- data/ext/markdown/buffer.c +223 -0
- data/ext/markdown/buffer.h +88 -0
- data/ext/markdown/extconf.rb +4 -0
- data/ext/markdown/gh-markdown.c +204 -0
- data/ext/markdown/houdini.h +29 -0
- data/ext/markdown/houdini_href_e.c +108 -0
- data/ext/markdown/houdini_html_e.c +84 -0
- data/ext/markdown/html.c +635 -0
- data/ext/markdown/html.h +69 -0
- data/ext/markdown/html_blocks.h +206 -0
- data/ext/markdown/markdown.c +2505 -0
- data/ext/markdown/markdown.h +130 -0
- data/ext/markdown/stack.c +81 -0
- data/ext/markdown/stack.h +21 -0
- data/github-markdown.gemspec +40 -0
- data/lib/github/markdown.rb +38 -0
- data/test/gfm_test.rb +26 -0
- metadata +98 -0
data/ext/markdown/html.h
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2011, Vicent Marti
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef UPSKIRT_HTML_H
|
18
|
+
#define UPSKIRT_HTML_H
|
19
|
+
|
20
|
+
#include "markdown.h"
|
21
|
+
#include "buffer.h"
|
22
|
+
#include <stdlib.h>
|
23
|
+
|
24
|
+
struct html_renderopt {
|
25
|
+
struct {
|
26
|
+
int header_count;
|
27
|
+
int current_level;
|
28
|
+
int level_offset;
|
29
|
+
} toc_data;
|
30
|
+
|
31
|
+
unsigned int flags;
|
32
|
+
|
33
|
+
/* extra callbacks */
|
34
|
+
void (*link_attributes)(struct buf *ob, const struct buf *url, void *self);
|
35
|
+
};
|
36
|
+
|
37
|
+
typedef enum {
|
38
|
+
HTML_SKIP_HTML = (1 << 0),
|
39
|
+
HTML_SKIP_STYLE = (1 << 1),
|
40
|
+
HTML_SKIP_IMAGES = (1 << 2),
|
41
|
+
HTML_SKIP_LINKS = (1 << 3),
|
42
|
+
HTML_EXPAND_TABS = (1 << 4),
|
43
|
+
HTML_SAFELINK = (1 << 5),
|
44
|
+
HTML_TOC = (1 << 6),
|
45
|
+
HTML_HARD_WRAP = (1 << 7),
|
46
|
+
HTML_USE_XHTML = (1 << 8),
|
47
|
+
HTML_ESCAPE = (1 << 9),
|
48
|
+
} html_render_mode;
|
49
|
+
|
50
|
+
typedef enum {
|
51
|
+
HTML_TAG_NONE = 0,
|
52
|
+
HTML_TAG_OPEN,
|
53
|
+
HTML_TAG_CLOSE,
|
54
|
+
} html_tag;
|
55
|
+
|
56
|
+
int
|
57
|
+
sdhtml_is_tag(const uint8_t *tag_data, size_t tag_size, const char *tagname);
|
58
|
+
|
59
|
+
extern void
|
60
|
+
sdhtml_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr, unsigned int render_flags);
|
61
|
+
|
62
|
+
extern void
|
63
|
+
sdhtml_toc_renderer(struct sd_callbacks *callbacks, struct html_renderopt *options_ptr);
|
64
|
+
|
65
|
+
extern void
|
66
|
+
sdhtml_smartypants(struct buf *ob, const uint8_t *text, size_t size);
|
67
|
+
|
68
|
+
#endif
|
69
|
+
|
@@ -0,0 +1,206 @@
|
|
1
|
+
/* C code produced by gperf version 3.0.3 */
|
2
|
+
/* Command-line: gperf -N find_block_tag -H hash_block_tag -C -c -E --ignore-case html_block_names.txt */
|
3
|
+
/* Computed positions: -k'1-2' */
|
4
|
+
|
5
|
+
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
6
|
+
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
7
|
+
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
8
|
+
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
9
|
+
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
10
|
+
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
11
|
+
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
12
|
+
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
13
|
+
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
14
|
+
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
15
|
+
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
16
|
+
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
17
|
+
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
18
|
+
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
19
|
+
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
20
|
+
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
21
|
+
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
22
|
+
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
23
|
+
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
24
|
+
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
25
|
+
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
26
|
+
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
27
|
+
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
|
28
|
+
/* The character set is not based on ISO-646. */
|
29
|
+
error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>."
|
30
|
+
#endif
|
31
|
+
|
32
|
+
/* maximum key range = 37, duplicates = 0 */
|
33
|
+
|
34
|
+
#ifndef GPERF_DOWNCASE
|
35
|
+
#define GPERF_DOWNCASE 1
|
36
|
+
static unsigned char gperf_downcase[256] =
|
37
|
+
{
|
38
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
39
|
+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
40
|
+
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
|
41
|
+
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
42
|
+
60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
|
43
|
+
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
44
|
+
122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
|
45
|
+
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
|
46
|
+
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
|
47
|
+
135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
|
48
|
+
150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
|
49
|
+
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
|
50
|
+
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
|
51
|
+
195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
|
52
|
+
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
|
53
|
+
225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
|
54
|
+
240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
|
55
|
+
255
|
56
|
+
};
|
57
|
+
#endif
|
58
|
+
|
59
|
+
#ifndef GPERF_CASE_STRNCMP
|
60
|
+
#define GPERF_CASE_STRNCMP 1
|
61
|
+
static int
|
62
|
+
gperf_case_strncmp (s1, s2, n)
|
63
|
+
register const char *s1;
|
64
|
+
register const char *s2;
|
65
|
+
register unsigned int n;
|
66
|
+
{
|
67
|
+
for (; n > 0;)
|
68
|
+
{
|
69
|
+
unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
|
70
|
+
unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
|
71
|
+
if (c1 != 0 && c1 == c2)
|
72
|
+
{
|
73
|
+
n--;
|
74
|
+
continue;
|
75
|
+
}
|
76
|
+
return (int)c1 - (int)c2;
|
77
|
+
}
|
78
|
+
return 0;
|
79
|
+
}
|
80
|
+
#endif
|
81
|
+
|
82
|
+
#ifdef __GNUC__
|
83
|
+
__inline
|
84
|
+
#else
|
85
|
+
#ifdef __cplusplus
|
86
|
+
inline
|
87
|
+
#endif
|
88
|
+
#endif
|
89
|
+
static unsigned int
|
90
|
+
hash_block_tag (str, len)
|
91
|
+
register const char *str;
|
92
|
+
register unsigned int len;
|
93
|
+
{
|
94
|
+
static const unsigned char asso_values[] =
|
95
|
+
{
|
96
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
97
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
98
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
99
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
100
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
101
|
+
8, 30, 25, 20, 15, 10, 38, 38, 38, 38,
|
102
|
+
38, 38, 38, 38, 38, 38, 0, 38, 0, 38,
|
103
|
+
5, 5, 5, 15, 0, 38, 38, 0, 15, 10,
|
104
|
+
0, 38, 38, 15, 0, 5, 38, 38, 38, 38,
|
105
|
+
38, 38, 38, 38, 38, 38, 38, 38, 0, 38,
|
106
|
+
0, 38, 5, 5, 5, 15, 0, 38, 38, 0,
|
107
|
+
15, 10, 0, 38, 38, 15, 0, 5, 38, 38,
|
108
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
109
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
110
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
111
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
112
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
113
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
114
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
115
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
116
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
117
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
118
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
119
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
120
|
+
38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
|
121
|
+
38, 38, 38, 38, 38, 38, 38
|
122
|
+
};
|
123
|
+
register int hval = len;
|
124
|
+
|
125
|
+
switch (hval)
|
126
|
+
{
|
127
|
+
default:
|
128
|
+
hval += asso_values[(unsigned char)str[1]+1];
|
129
|
+
/*FALLTHROUGH*/
|
130
|
+
case 1:
|
131
|
+
hval += asso_values[(unsigned char)str[0]];
|
132
|
+
break;
|
133
|
+
}
|
134
|
+
return hval;
|
135
|
+
}
|
136
|
+
|
137
|
+
#ifdef __GNUC__
|
138
|
+
__inline
|
139
|
+
#ifdef __GNUC_STDC_INLINE__
|
140
|
+
__attribute__ ((__gnu_inline__))
|
141
|
+
#endif
|
142
|
+
#endif
|
143
|
+
const char *
|
144
|
+
find_block_tag (str, len)
|
145
|
+
register const char *str;
|
146
|
+
register unsigned int len;
|
147
|
+
{
|
148
|
+
enum
|
149
|
+
{
|
150
|
+
TOTAL_KEYWORDS = 24,
|
151
|
+
MIN_WORD_LENGTH = 1,
|
152
|
+
MAX_WORD_LENGTH = 10,
|
153
|
+
MIN_HASH_VALUE = 1,
|
154
|
+
MAX_HASH_VALUE = 37
|
155
|
+
};
|
156
|
+
|
157
|
+
static const char * const wordlist[] =
|
158
|
+
{
|
159
|
+
"",
|
160
|
+
"p",
|
161
|
+
"dl",
|
162
|
+
"div",
|
163
|
+
"math",
|
164
|
+
"table",
|
165
|
+
"",
|
166
|
+
"ul",
|
167
|
+
"del",
|
168
|
+
"form",
|
169
|
+
"blockquote",
|
170
|
+
"figure",
|
171
|
+
"ol",
|
172
|
+
"fieldset",
|
173
|
+
"",
|
174
|
+
"h1",
|
175
|
+
"",
|
176
|
+
"h6",
|
177
|
+
"pre",
|
178
|
+
"", "",
|
179
|
+
"script",
|
180
|
+
"h5",
|
181
|
+
"noscript",
|
182
|
+
"",
|
183
|
+
"style",
|
184
|
+
"iframe",
|
185
|
+
"h4",
|
186
|
+
"ins",
|
187
|
+
"", "", "",
|
188
|
+
"h3",
|
189
|
+
"", "", "", "",
|
190
|
+
"h2"
|
191
|
+
};
|
192
|
+
|
193
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
194
|
+
{
|
195
|
+
register int key = hash_block_tag (str, len);
|
196
|
+
|
197
|
+
if (key <= MAX_HASH_VALUE && key >= 0)
|
198
|
+
{
|
199
|
+
register const char *s = wordlist[key];
|
200
|
+
|
201
|
+
if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && !gperf_case_strncmp (str, s, len) && s[len] == '\0')
|
202
|
+
return s;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
return 0;
|
206
|
+
}
|
@@ -0,0 +1,2505 @@
|
|
1
|
+
/* markdown.c - generic markdown parser */
|
2
|
+
|
3
|
+
/*
|
4
|
+
* Copyright (c) 2009, Natacha Porté
|
5
|
+
* Copyright (c) 2011, Vicent Marti
|
6
|
+
*
|
7
|
+
* Permission to use, copy, modify, and distribute this software for any
|
8
|
+
* purpose with or without fee is hereby granted, provided that the above
|
9
|
+
* copyright notice and this permission notice appear in all copies.
|
10
|
+
*
|
11
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
12
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
13
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
14
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
15
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
16
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
17
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "markdown.h"
|
21
|
+
#include "stack.h"
|
22
|
+
|
23
|
+
#include <assert.h>
|
24
|
+
#include <string.h>
|
25
|
+
#include <ctype.h>
|
26
|
+
#include <stdio.h>
|
27
|
+
|
28
|
+
#if defined(_WIN32)
|
29
|
+
#define strncasecmp _strnicmp
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#define REF_TABLE_SIZE 8
|
33
|
+
|
34
|
+
#define BUFFER_BLOCK 0
|
35
|
+
#define BUFFER_SPAN 1
|
36
|
+
|
37
|
+
#define MKD_LI_END 8 /* internal list flag */
|
38
|
+
|
39
|
+
#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n)
|
40
|
+
#define GPERF_DOWNCASE 1
|
41
|
+
#define GPERF_CASE_STRNCMP 1
|
42
|
+
#include "html_blocks.h"
|
43
|
+
|
44
|
+
/***************
|
45
|
+
* LOCAL TYPES *
|
46
|
+
***************/
|
47
|
+
|
48
|
+
/* link_ref: reference to a link */
|
49
|
+
struct link_ref {
|
50
|
+
unsigned int id;
|
51
|
+
|
52
|
+
struct buf *link;
|
53
|
+
struct buf *title;
|
54
|
+
|
55
|
+
struct link_ref *next;
|
56
|
+
};
|
57
|
+
|
58
|
+
/* char_trigger: function pointer to render active chars */
|
59
|
+
/* returns the number of chars taken care of */
|
60
|
+
/* data is the pointer of the beginning of the span */
|
61
|
+
/* offset is the number of valid chars before data */
|
62
|
+
struct sd_markdown;
|
63
|
+
typedef size_t
|
64
|
+
(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
65
|
+
|
66
|
+
static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
67
|
+
static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
68
|
+
static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
69
|
+
static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
70
|
+
static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
71
|
+
static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
72
|
+
static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
73
|
+
static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
74
|
+
static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
75
|
+
static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
76
|
+
static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size);
|
77
|
+
|
78
|
+
enum markdown_char_t {
|
79
|
+
MD_CHAR_NONE = 0,
|
80
|
+
MD_CHAR_EMPHASIS,
|
81
|
+
MD_CHAR_CODESPAN,
|
82
|
+
MD_CHAR_LINEBREAK,
|
83
|
+
MD_CHAR_LINK,
|
84
|
+
MD_CHAR_LANGLE,
|
85
|
+
MD_CHAR_ESCAPE,
|
86
|
+
MD_CHAR_ENTITITY,
|
87
|
+
MD_CHAR_AUTOLINK_URL,
|
88
|
+
MD_CHAR_AUTOLINK_EMAIL,
|
89
|
+
MD_CHAR_AUTOLINK_WWW,
|
90
|
+
MD_CHAR_SUPERSCRIPT,
|
91
|
+
};
|
92
|
+
|
93
|
+
static char_trigger markdown_char_ptrs[] = {
|
94
|
+
NULL,
|
95
|
+
&char_emphasis,
|
96
|
+
&char_codespan,
|
97
|
+
&char_linebreak,
|
98
|
+
&char_link,
|
99
|
+
&char_langle_tag,
|
100
|
+
&char_escape,
|
101
|
+
&char_entity,
|
102
|
+
&char_autolink_url,
|
103
|
+
&char_autolink_email,
|
104
|
+
&char_autolink_www,
|
105
|
+
&char_superscript,
|
106
|
+
};
|
107
|
+
|
108
|
+
/* render • structure containing one particular render */
|
109
|
+
struct sd_markdown {
|
110
|
+
struct sd_callbacks cb;
|
111
|
+
void *opaque;
|
112
|
+
|
113
|
+
struct link_ref *refs[REF_TABLE_SIZE];
|
114
|
+
uint8_t active_char[256];
|
115
|
+
struct stack work_bufs[2];
|
116
|
+
unsigned int ext_flags;
|
117
|
+
size_t max_nesting;
|
118
|
+
int in_link_body;
|
119
|
+
};
|
120
|
+
|
121
|
+
/***************************
|
122
|
+
* HELPER FUNCTIONS *
|
123
|
+
***************************/
|
124
|
+
|
125
|
+
static inline struct buf *
|
126
|
+
rndr_newbuf(struct sd_markdown *rndr, int type)
|
127
|
+
{
|
128
|
+
static const size_t buf_size[2] = {256, 64};
|
129
|
+
struct buf *work = NULL;
|
130
|
+
struct stack *pool = &rndr->work_bufs[type];
|
131
|
+
|
132
|
+
if (pool->size < pool->asize &&
|
133
|
+
pool->item[pool->size] != NULL) {
|
134
|
+
work = pool->item[pool->size++];
|
135
|
+
work->size = 0;
|
136
|
+
} else {
|
137
|
+
work = bufnew(buf_size[type]);
|
138
|
+
stack_push(pool, work);
|
139
|
+
}
|
140
|
+
|
141
|
+
return work;
|
142
|
+
}
|
143
|
+
|
144
|
+
static inline void
|
145
|
+
rndr_popbuf(struct sd_markdown *rndr, int type)
|
146
|
+
{
|
147
|
+
rndr->work_bufs[type].size--;
|
148
|
+
}
|
149
|
+
|
150
|
+
static void
|
151
|
+
unscape_text(struct buf *ob, struct buf *src)
|
152
|
+
{
|
153
|
+
size_t i = 0, org;
|
154
|
+
while (i < src->size) {
|
155
|
+
org = i;
|
156
|
+
while (i < src->size && src->data[i] != '\\')
|
157
|
+
i++;
|
158
|
+
|
159
|
+
if (i > org)
|
160
|
+
bufput(ob, src->data + org, i - org);
|
161
|
+
|
162
|
+
if (i + 1 >= src->size)
|
163
|
+
break;
|
164
|
+
|
165
|
+
bufputc(ob, src->data[i + 1]);
|
166
|
+
i += 2;
|
167
|
+
}
|
168
|
+
}
|
169
|
+
|
170
|
+
static unsigned int
|
171
|
+
hash_link_ref(const uint8_t *link_ref, size_t length)
|
172
|
+
{
|
173
|
+
size_t i;
|
174
|
+
unsigned int hash = 0;
|
175
|
+
|
176
|
+
for (i = 0; i < length; ++i)
|
177
|
+
hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
|
178
|
+
|
179
|
+
return hash;
|
180
|
+
}
|
181
|
+
|
182
|
+
static struct link_ref *
|
183
|
+
add_link_ref(
|
184
|
+
struct link_ref **references,
|
185
|
+
const uint8_t *name, size_t name_size)
|
186
|
+
{
|
187
|
+
struct link_ref *ref = calloc(1, sizeof(struct link_ref));
|
188
|
+
|
189
|
+
if (!ref)
|
190
|
+
return NULL;
|
191
|
+
|
192
|
+
ref->id = hash_link_ref(name, name_size);
|
193
|
+
ref->next = references[ref->id % REF_TABLE_SIZE];
|
194
|
+
|
195
|
+
references[ref->id % REF_TABLE_SIZE] = ref;
|
196
|
+
return ref;
|
197
|
+
}
|
198
|
+
|
199
|
+
static struct link_ref *
|
200
|
+
find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
|
201
|
+
{
|
202
|
+
unsigned int hash = hash_link_ref(name, length);
|
203
|
+
struct link_ref *ref = NULL;
|
204
|
+
|
205
|
+
ref = references[hash % REF_TABLE_SIZE];
|
206
|
+
|
207
|
+
while (ref != NULL) {
|
208
|
+
if (ref->id == hash)
|
209
|
+
return ref;
|
210
|
+
|
211
|
+
ref = ref->next;
|
212
|
+
}
|
213
|
+
|
214
|
+
return NULL;
|
215
|
+
}
|
216
|
+
|
217
|
+
static void
|
218
|
+
free_link_refs(struct link_ref **references)
|
219
|
+
{
|
220
|
+
size_t i;
|
221
|
+
|
222
|
+
for (i = 0; i < REF_TABLE_SIZE; ++i) {
|
223
|
+
struct link_ref *r = references[i];
|
224
|
+
struct link_ref *next;
|
225
|
+
|
226
|
+
while (r) {
|
227
|
+
next = r->next;
|
228
|
+
bufrelease(r->link);
|
229
|
+
bufrelease(r->title);
|
230
|
+
free(r);
|
231
|
+
r = next;
|
232
|
+
}
|
233
|
+
}
|
234
|
+
}
|
235
|
+
|
236
|
+
/*
|
237
|
+
* Check whether a char is a Markdown space.
|
238
|
+
|
239
|
+
* Right now we only consider spaces the actual
|
240
|
+
* space and a newline: tabs and carriage returns
|
241
|
+
* are filtered out during the preprocessing phase.
|
242
|
+
*
|
243
|
+
* If we wanted to actually be UTF-8 compliant, we
|
244
|
+
* should instead extract an Unicode codepoint from
|
245
|
+
* this character and check for space properties.
|
246
|
+
*/
|
247
|
+
static inline int
|
248
|
+
_isspace(int c)
|
249
|
+
{
|
250
|
+
return c == ' ' || c == '\n';
|
251
|
+
}
|
252
|
+
|
253
|
+
/****************************
|
254
|
+
* INLINE PARSING FUNCTIONS *
|
255
|
+
****************************/
|
256
|
+
|
257
|
+
/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
|
258
|
+
/* this is less strict than the original markdown e-mail address matching */
|
259
|
+
static size_t
|
260
|
+
is_mail_autolink(uint8_t *data, size_t size)
|
261
|
+
{
|
262
|
+
size_t i = 0, nb = 0;
|
263
|
+
|
264
|
+
/* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
|
265
|
+
for (i = 0; i < size; ++i) {
|
266
|
+
if (isalnum(data[i]))
|
267
|
+
continue;
|
268
|
+
|
269
|
+
switch (data[i]) {
|
270
|
+
case '@':
|
271
|
+
nb++;
|
272
|
+
|
273
|
+
case '-':
|
274
|
+
case '.':
|
275
|
+
case '_':
|
276
|
+
break;
|
277
|
+
|
278
|
+
case '>':
|
279
|
+
return (nb == 1) ? i + 1 : 0;
|
280
|
+
|
281
|
+
default:
|
282
|
+
return 0;
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
return 0;
|
287
|
+
}
|
288
|
+
|
289
|
+
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
|
290
|
+
static size_t
|
291
|
+
tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink)
|
292
|
+
{
|
293
|
+
size_t i, j;
|
294
|
+
|
295
|
+
/* a valid tag can't be shorter than 3 chars */
|
296
|
+
if (size < 3) return 0;
|
297
|
+
|
298
|
+
/* begins with a '<' optionally followed by '/', followed by letter or number */
|
299
|
+
if (data[0] != '<') return 0;
|
300
|
+
i = (data[1] == '/') ? 2 : 1;
|
301
|
+
|
302
|
+
if (!isalnum(data[i]))
|
303
|
+
return 0;
|
304
|
+
|
305
|
+
/* scheme test */
|
306
|
+
*autolink = MKDA_NOT_AUTOLINK;
|
307
|
+
|
308
|
+
/* try to find the beginning of an URI */
|
309
|
+
while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
|
310
|
+
i++;
|
311
|
+
|
312
|
+
if (i > 1 && data[i] == '@') {
|
313
|
+
if ((j = is_mail_autolink(data + i, size - i)) != 0) {
|
314
|
+
*autolink = MKDA_EMAIL;
|
315
|
+
return i + j;
|
316
|
+
}
|
317
|
+
}
|
318
|
+
|
319
|
+
if (i > 2 && data[i] == ':') {
|
320
|
+
*autolink = MKDA_NORMAL;
|
321
|
+
i++;
|
322
|
+
}
|
323
|
+
|
324
|
+
/* completing autolink test: no whitespace or ' or " */
|
325
|
+
if (i >= size)
|
326
|
+
*autolink = MKDA_NOT_AUTOLINK;
|
327
|
+
|
328
|
+
else if (*autolink) {
|
329
|
+
j = i;
|
330
|
+
|
331
|
+
while (i < size) {
|
332
|
+
if (data[i] == '\\') i += 2;
|
333
|
+
else if (data[i] == '>' || data[i] == '\'' ||
|
334
|
+
data[i] == '"' || data[i] == ' ' || data[i] == '\n')
|
335
|
+
break;
|
336
|
+
else i++;
|
337
|
+
}
|
338
|
+
|
339
|
+
if (i >= size) return 0;
|
340
|
+
if (i > j && data[i] == '>') return i + 1;
|
341
|
+
/* one of the forbidden chars has been found */
|
342
|
+
*autolink = MKDA_NOT_AUTOLINK;
|
343
|
+
}
|
344
|
+
|
345
|
+
/* looking for sometinhg looking like a tag end */
|
346
|
+
while (i < size && data[i] != '>') i++;
|
347
|
+
if (i >= size) return 0;
|
348
|
+
return i + 1;
|
349
|
+
}
|
350
|
+
|
351
|
+
/* parse_inline • parses inline markdown elements */
|
352
|
+
static void
|
353
|
+
parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
354
|
+
{
|
355
|
+
size_t i = 0, end = 0;
|
356
|
+
uint8_t action = 0;
|
357
|
+
struct buf work = { 0, 0, 0, 0 };
|
358
|
+
|
359
|
+
if (rndr->work_bufs[BUFFER_SPAN].size +
|
360
|
+
rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
|
361
|
+
return;
|
362
|
+
|
363
|
+
while (i < size) {
|
364
|
+
/* copying inactive chars into the output */
|
365
|
+
while (end < size && (action = rndr->active_char[data[end]]) == 0) {
|
366
|
+
end++;
|
367
|
+
}
|
368
|
+
|
369
|
+
if (rndr->cb.normal_text) {
|
370
|
+
work.data = data + i;
|
371
|
+
work.size = end - i;
|
372
|
+
rndr->cb.normal_text(ob, &work, rndr->opaque);
|
373
|
+
}
|
374
|
+
else
|
375
|
+
bufput(ob, data + i, end - i);
|
376
|
+
|
377
|
+
if (end >= size) break;
|
378
|
+
i = end;
|
379
|
+
|
380
|
+
end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i);
|
381
|
+
if (!end) /* no action from the callback */
|
382
|
+
end = i + 1;
|
383
|
+
else {
|
384
|
+
i += end;
|
385
|
+
end = i;
|
386
|
+
}
|
387
|
+
}
|
388
|
+
}
|
389
|
+
|
390
|
+
/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
|
391
|
+
static size_t
|
392
|
+
find_emph_char(uint8_t *data, size_t size, uint8_t c)
|
393
|
+
{
|
394
|
+
size_t i = 1;
|
395
|
+
|
396
|
+
while (i < size) {
|
397
|
+
while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
|
398
|
+
i++;
|
399
|
+
|
400
|
+
if (i == size)
|
401
|
+
return 0;
|
402
|
+
|
403
|
+
if (data[i] == c)
|
404
|
+
return i;
|
405
|
+
|
406
|
+
/* not counting escaped chars */
|
407
|
+
if (i && data[i - 1] == '\\') {
|
408
|
+
i++; continue;
|
409
|
+
}
|
410
|
+
|
411
|
+
if (data[i] == '`') {
|
412
|
+
size_t span_nb = 0, bt;
|
413
|
+
size_t tmp_i = 0;
|
414
|
+
|
415
|
+
/* counting the number of opening backticks */
|
416
|
+
while (i < size && data[i] == '`') {
|
417
|
+
i++; span_nb++;
|
418
|
+
}
|
419
|
+
|
420
|
+
if (i >= size) return 0;
|
421
|
+
|
422
|
+
/* finding the matching closing sequence */
|
423
|
+
bt = 0;
|
424
|
+
while (i < size && bt < span_nb) {
|
425
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
426
|
+
if (data[i] == '`') bt++;
|
427
|
+
else bt = 0;
|
428
|
+
i++;
|
429
|
+
}
|
430
|
+
|
431
|
+
if (i >= size) return tmp_i;
|
432
|
+
}
|
433
|
+
/* skipping a link */
|
434
|
+
else if (data[i] == '[') {
|
435
|
+
size_t tmp_i = 0;
|
436
|
+
uint8_t cc;
|
437
|
+
|
438
|
+
i++;
|
439
|
+
while (i < size && data[i] != ']') {
|
440
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
441
|
+
i++;
|
442
|
+
}
|
443
|
+
|
444
|
+
i++;
|
445
|
+
while (i < size && (data[i] == ' ' || data[i] == '\n'))
|
446
|
+
i++;
|
447
|
+
|
448
|
+
if (i >= size)
|
449
|
+
return tmp_i;
|
450
|
+
|
451
|
+
switch (data[i]) {
|
452
|
+
case '[':
|
453
|
+
cc = ']'; break;
|
454
|
+
|
455
|
+
case '(':
|
456
|
+
cc = ')'; break;
|
457
|
+
|
458
|
+
default:
|
459
|
+
if (tmp_i)
|
460
|
+
return tmp_i;
|
461
|
+
else
|
462
|
+
continue;
|
463
|
+
}
|
464
|
+
|
465
|
+
i++;
|
466
|
+
while (i < size && data[i] != cc) {
|
467
|
+
if (!tmp_i && data[i] == c) tmp_i = i;
|
468
|
+
i++;
|
469
|
+
}
|
470
|
+
|
471
|
+
if (i >= size)
|
472
|
+
return tmp_i;
|
473
|
+
|
474
|
+
i++;
|
475
|
+
}
|
476
|
+
}
|
477
|
+
|
478
|
+
return 0;
|
479
|
+
}
|
480
|
+
|
481
|
+
/* parse_emph1 • parsing single emphase */
|
482
|
+
/* closed by a symbol not preceded by whitespace and not followed by symbol */
|
483
|
+
static size_t
|
484
|
+
parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
|
485
|
+
{
|
486
|
+
size_t i = 0, len;
|
487
|
+
struct buf *work = 0;
|
488
|
+
int r;
|
489
|
+
|
490
|
+
if (!rndr->cb.emphasis) return 0;
|
491
|
+
|
492
|
+
/* skipping one symbol if coming from emph3 */
|
493
|
+
if (size > 1 && data[0] == c && data[1] == c) i = 1;
|
494
|
+
|
495
|
+
while (i < size) {
|
496
|
+
len = find_emph_char(data + i, size - i, c);
|
497
|
+
if (!len) return 0;
|
498
|
+
i += len;
|
499
|
+
if (i >= size) return 0;
|
500
|
+
|
501
|
+
if (data[i] == c && !_isspace(data[i - 1])) {
|
502
|
+
|
503
|
+
if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
|
504
|
+
if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
|
505
|
+
continue;
|
506
|
+
}
|
507
|
+
|
508
|
+
work = rndr_newbuf(rndr, BUFFER_SPAN);
|
509
|
+
parse_inline(work, rndr, data, i);
|
510
|
+
r = rndr->cb.emphasis(ob, work, rndr->opaque);
|
511
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
512
|
+
return r ? i + 1 : 0;
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
516
|
+
return 0;
|
517
|
+
}
|
518
|
+
|
519
|
+
/* parse_emph2 • parsing single emphase */
|
520
|
+
static size_t
|
521
|
+
parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
|
522
|
+
{
|
523
|
+
int (*render_method)(struct buf *ob, const struct buf *text, void *opaque);
|
524
|
+
size_t i = 0, len;
|
525
|
+
struct buf *work = 0;
|
526
|
+
int r;
|
527
|
+
|
528
|
+
render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis;
|
529
|
+
|
530
|
+
if (!render_method)
|
531
|
+
return 0;
|
532
|
+
|
533
|
+
while (i < size) {
|
534
|
+
len = find_emph_char(data + i, size - i, c);
|
535
|
+
if (!len) return 0;
|
536
|
+
i += len;
|
537
|
+
|
538
|
+
if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
|
539
|
+
work = rndr_newbuf(rndr, BUFFER_SPAN);
|
540
|
+
parse_inline(work, rndr, data, i);
|
541
|
+
r = render_method(ob, work, rndr->opaque);
|
542
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
543
|
+
return r ? i + 2 : 0;
|
544
|
+
}
|
545
|
+
i++;
|
546
|
+
}
|
547
|
+
return 0;
|
548
|
+
}
|
549
|
+
|
550
|
+
/* parse_emph3 • parsing single emphase */
|
551
|
+
/* finds the first closing tag, and delegates to the other emph */
|
552
|
+
static size_t
|
553
|
+
parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c)
|
554
|
+
{
|
555
|
+
size_t i = 0, len;
|
556
|
+
int r;
|
557
|
+
|
558
|
+
while (i < size) {
|
559
|
+
len = find_emph_char(data + i, size - i, c);
|
560
|
+
if (!len) return 0;
|
561
|
+
i += len;
|
562
|
+
|
563
|
+
/* skip whitespace preceded symbols */
|
564
|
+
if (data[i] != c || _isspace(data[i - 1]))
|
565
|
+
continue;
|
566
|
+
|
567
|
+
if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) {
|
568
|
+
/* triple symbol found */
|
569
|
+
struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
|
570
|
+
|
571
|
+
parse_inline(work, rndr, data, i);
|
572
|
+
r = rndr->cb.triple_emphasis(ob, work, rndr->opaque);
|
573
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
574
|
+
return r ? i + 3 : 0;
|
575
|
+
|
576
|
+
} else if (i + 1 < size && data[i + 1] == c) {
|
577
|
+
/* double symbol found, handing over to emph1 */
|
578
|
+
len = parse_emph1(ob, rndr, data - 2, size + 2, c);
|
579
|
+
if (!len) return 0;
|
580
|
+
else return len - 2;
|
581
|
+
|
582
|
+
} else {
|
583
|
+
/* single symbol found, handing over to emph2 */
|
584
|
+
len = parse_emph2(ob, rndr, data - 1, size + 1, c);
|
585
|
+
if (!len) return 0;
|
586
|
+
else return len - 1;
|
587
|
+
}
|
588
|
+
}
|
589
|
+
return 0;
|
590
|
+
}
|
591
|
+
|
592
|
+
/* char_emphasis • single and double emphasis parsing */
|
593
|
+
static size_t
|
594
|
+
char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
595
|
+
{
|
596
|
+
uint8_t c = data[0];
|
597
|
+
size_t ret;
|
598
|
+
|
599
|
+
if (size > 2 && data[1] != c) {
|
600
|
+
/* whitespace cannot follow an opening emphasis;
|
601
|
+
* strikethrough only takes two characters '~~' */
|
602
|
+
if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
|
603
|
+
return 0;
|
604
|
+
|
605
|
+
return ret + 1;
|
606
|
+
}
|
607
|
+
|
608
|
+
if (size > 3 && data[1] == c && data[2] != c) {
|
609
|
+
if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0)
|
610
|
+
return 0;
|
611
|
+
|
612
|
+
return ret + 2;
|
613
|
+
}
|
614
|
+
|
615
|
+
if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
|
616
|
+
if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0)
|
617
|
+
return 0;
|
618
|
+
|
619
|
+
return ret + 3;
|
620
|
+
}
|
621
|
+
|
622
|
+
return 0;
|
623
|
+
}
|
624
|
+
|
625
|
+
|
626
|
+
/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
|
627
|
+
static size_t
|
628
|
+
char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
629
|
+
{
|
630
|
+
if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
|
631
|
+
return 0;
|
632
|
+
|
633
|
+
/* removing the last space from ob and rendering */
|
634
|
+
while (ob->size && ob->data[ob->size - 1] == ' ')
|
635
|
+
ob->size--;
|
636
|
+
|
637
|
+
return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0;
|
638
|
+
}
|
639
|
+
|
640
|
+
|
641
|
+
/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
|
642
|
+
static size_t
|
643
|
+
char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
644
|
+
{
|
645
|
+
size_t end, nb = 0, i, f_begin, f_end;
|
646
|
+
|
647
|
+
/* counting the number of backticks in the delimiter */
|
648
|
+
while (nb < size && data[nb] == '`')
|
649
|
+
nb++;
|
650
|
+
|
651
|
+
/* finding the next delimiter */
|
652
|
+
i = 0;
|
653
|
+
for (end = nb; end < size && i < nb; end++) {
|
654
|
+
if (data[end] == '`') i++;
|
655
|
+
else i = 0;
|
656
|
+
}
|
657
|
+
|
658
|
+
if (i < nb && end >= size)
|
659
|
+
return 0; /* no matching delimiter */
|
660
|
+
|
661
|
+
/* trimming outside whitespaces */
|
662
|
+
f_begin = nb;
|
663
|
+
while (f_begin < end && data[f_begin] == ' ')
|
664
|
+
f_begin++;
|
665
|
+
|
666
|
+
f_end = end - nb;
|
667
|
+
while (f_end > nb && data[f_end-1] == ' ')
|
668
|
+
f_end--;
|
669
|
+
|
670
|
+
/* real code span */
|
671
|
+
if (f_begin < f_end) {
|
672
|
+
struct buf work = { data + f_begin, f_end - f_begin, 0, 0 };
|
673
|
+
if (!rndr->cb.codespan(ob, &work, rndr->opaque))
|
674
|
+
end = 0;
|
675
|
+
} else {
|
676
|
+
if (!rndr->cb.codespan(ob, 0, rndr->opaque))
|
677
|
+
end = 0;
|
678
|
+
}
|
679
|
+
|
680
|
+
return end;
|
681
|
+
}
|
682
|
+
|
683
|
+
|
684
|
+
/* char_escape • '\\' backslash escape */
|
685
|
+
static size_t
|
686
|
+
char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
687
|
+
{
|
688
|
+
static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~";
|
689
|
+
struct buf work = { 0, 0, 0, 0 };
|
690
|
+
|
691
|
+
if (size > 1) {
|
692
|
+
if (strchr(escape_chars, data[1]) == NULL)
|
693
|
+
return 0;
|
694
|
+
|
695
|
+
if (rndr->cb.normal_text) {
|
696
|
+
work.data = data + 1;
|
697
|
+
work.size = 1;
|
698
|
+
rndr->cb.normal_text(ob, &work, rndr->opaque);
|
699
|
+
}
|
700
|
+
else bufputc(ob, data[1]);
|
701
|
+
} else if (size == 1) {
|
702
|
+
bufputc(ob, data[0]);
|
703
|
+
}
|
704
|
+
|
705
|
+
return 2;
|
706
|
+
}
|
707
|
+
|
708
|
+
/* char_entity • '&' escaped when it doesn't belong to an entity */
|
709
|
+
/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
|
710
|
+
static size_t
|
711
|
+
char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
712
|
+
{
|
713
|
+
size_t end = 1;
|
714
|
+
struct buf work = { 0, 0, 0, 0 };
|
715
|
+
|
716
|
+
if (end < size && data[end] == '#')
|
717
|
+
end++;
|
718
|
+
|
719
|
+
while (end < size && isalnum(data[end]))
|
720
|
+
end++;
|
721
|
+
|
722
|
+
if (end < size && data[end] == ';')
|
723
|
+
end++; /* real entity */
|
724
|
+
else
|
725
|
+
return 0; /* lone '&' */
|
726
|
+
|
727
|
+
if (rndr->cb.entity) {
|
728
|
+
work.data = data;
|
729
|
+
work.size = end;
|
730
|
+
rndr->cb.entity(ob, &work, rndr->opaque);
|
731
|
+
}
|
732
|
+
else bufput(ob, data, end);
|
733
|
+
|
734
|
+
return end;
|
735
|
+
}
|
736
|
+
|
737
|
+
/* char_langle_tag • '<' when tags or autolinks are allowed */
|
738
|
+
static size_t
|
739
|
+
char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
740
|
+
{
|
741
|
+
enum mkd_autolink altype = MKDA_NOT_AUTOLINK;
|
742
|
+
size_t end = tag_length(data, size, &altype);
|
743
|
+
struct buf work = { data, end, 0, 0 };
|
744
|
+
int ret = 0;
|
745
|
+
|
746
|
+
if (end > 2) {
|
747
|
+
if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) {
|
748
|
+
struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
|
749
|
+
work.data = data + 1;
|
750
|
+
work.size = end - 2;
|
751
|
+
unscape_text(u_link, &work);
|
752
|
+
ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque);
|
753
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
754
|
+
}
|
755
|
+
else if (rndr->cb.raw_html_tag)
|
756
|
+
ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque);
|
757
|
+
}
|
758
|
+
|
759
|
+
if (!ret) return 0;
|
760
|
+
else return end;
|
761
|
+
}
|
762
|
+
|
763
|
+
static size_t
|
764
|
+
char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
765
|
+
{
|
766
|
+
struct buf *link, *link_url, *link_text;
|
767
|
+
size_t link_len, rewind;
|
768
|
+
|
769
|
+
if (!rndr->cb.link || rndr->in_link_body)
|
770
|
+
return 0;
|
771
|
+
|
772
|
+
link = rndr_newbuf(rndr, BUFFER_SPAN);
|
773
|
+
|
774
|
+
if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
|
775
|
+
link_url = rndr_newbuf(rndr, BUFFER_SPAN);
|
776
|
+
BUFPUTSL(link_url, "http://");
|
777
|
+
bufput(link_url, link->data, link->size);
|
778
|
+
|
779
|
+
ob->size -= rewind;
|
780
|
+
if (rndr->cb.normal_text) {
|
781
|
+
link_text = rndr_newbuf(rndr, BUFFER_SPAN);
|
782
|
+
rndr->cb.normal_text(link_text, link, rndr->opaque);
|
783
|
+
rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque);
|
784
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
785
|
+
} else {
|
786
|
+
rndr->cb.link(ob, link_url, NULL, link, rndr->opaque);
|
787
|
+
}
|
788
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
789
|
+
}
|
790
|
+
|
791
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
792
|
+
return link_len;
|
793
|
+
}
|
794
|
+
|
795
|
+
static size_t
|
796
|
+
char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
797
|
+
{
|
798
|
+
struct buf *link;
|
799
|
+
size_t link_len, rewind;
|
800
|
+
|
801
|
+
if (!rndr->cb.autolink || rndr->in_link_body)
|
802
|
+
return 0;
|
803
|
+
|
804
|
+
link = rndr_newbuf(rndr, BUFFER_SPAN);
|
805
|
+
|
806
|
+
if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
|
807
|
+
ob->size -= rewind;
|
808
|
+
rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
|
809
|
+
}
|
810
|
+
|
811
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
812
|
+
return link_len;
|
813
|
+
}
|
814
|
+
|
815
|
+
static size_t
|
816
|
+
char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
817
|
+
{
|
818
|
+
struct buf *link;
|
819
|
+
size_t link_len, rewind;
|
820
|
+
|
821
|
+
if (!rndr->cb.autolink || rndr->in_link_body)
|
822
|
+
return 0;
|
823
|
+
|
824
|
+
link = rndr_newbuf(rndr, BUFFER_SPAN);
|
825
|
+
|
826
|
+
if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
|
827
|
+
ob->size -= rewind;
|
828
|
+
rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
|
829
|
+
}
|
830
|
+
|
831
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
832
|
+
return link_len;
|
833
|
+
}
|
834
|
+
|
835
|
+
/* char_link • '[': parsing a link or an image */
|
836
|
+
static size_t
|
837
|
+
char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
838
|
+
{
|
839
|
+
int is_img = (offset && data[-1] == '!'), level;
|
840
|
+
size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
|
841
|
+
struct buf *content = 0;
|
842
|
+
struct buf *link = 0;
|
843
|
+
struct buf *title = 0;
|
844
|
+
struct buf *u_link = 0;
|
845
|
+
size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size;
|
846
|
+
int text_has_nl = 0, ret = 0;
|
847
|
+
int in_title = 0, qtype = 0;
|
848
|
+
|
849
|
+
/* checking whether the correct renderer exists */
|
850
|
+
if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link))
|
851
|
+
goto cleanup;
|
852
|
+
|
853
|
+
/* looking for the matching closing bracket */
|
854
|
+
for (level = 1; i < size; i++) {
|
855
|
+
if (data[i] == '\n')
|
856
|
+
text_has_nl = 1;
|
857
|
+
|
858
|
+
else if (data[i - 1] == '\\')
|
859
|
+
continue;
|
860
|
+
|
861
|
+
else if (data[i] == '[')
|
862
|
+
level++;
|
863
|
+
|
864
|
+
else if (data[i] == ']') {
|
865
|
+
level--;
|
866
|
+
if (level <= 0)
|
867
|
+
break;
|
868
|
+
}
|
869
|
+
}
|
870
|
+
|
871
|
+
if (i >= size)
|
872
|
+
goto cleanup;
|
873
|
+
|
874
|
+
txt_e = i;
|
875
|
+
i++;
|
876
|
+
|
877
|
+
/* skip any amount of whitespace or newline */
|
878
|
+
/* (this is much more laxist than original markdown syntax) */
|
879
|
+
while (i < size && _isspace(data[i]))
|
880
|
+
i++;
|
881
|
+
|
882
|
+
/* inline style link */
|
883
|
+
if (i < size && data[i] == '(') {
|
884
|
+
/* skipping initial whitespace */
|
885
|
+
i++;
|
886
|
+
|
887
|
+
while (i < size && _isspace(data[i]))
|
888
|
+
i++;
|
889
|
+
|
890
|
+
link_b = i;
|
891
|
+
|
892
|
+
/* looking for link end: ' " ) */
|
893
|
+
while (i < size) {
|
894
|
+
if (data[i] == '\\') i += 2;
|
895
|
+
else if (data[i] == ')') break;
|
896
|
+
else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
|
897
|
+
else i++;
|
898
|
+
}
|
899
|
+
|
900
|
+
if (i >= size) goto cleanup;
|
901
|
+
link_e = i;
|
902
|
+
|
903
|
+
/* looking for title end if present */
|
904
|
+
if (data[i] == '\'' || data[i] == '"') {
|
905
|
+
qtype = data[i];
|
906
|
+
in_title = 1;
|
907
|
+
i++;
|
908
|
+
title_b = i;
|
909
|
+
|
910
|
+
while (i < size) {
|
911
|
+
if (data[i] == '\\') i += 2;
|
912
|
+
else if (data[i] == qtype) {in_title = 0; i++;}
|
913
|
+
else if ((data[i] == ')') && !in_title) break;
|
914
|
+
else i++;
|
915
|
+
}
|
916
|
+
|
917
|
+
if (i >= size) goto cleanup;
|
918
|
+
|
919
|
+
/* skipping whitespaces after title */
|
920
|
+
title_e = i - 1;
|
921
|
+
while (title_e > title_b && _isspace(data[title_e]))
|
922
|
+
title_e--;
|
923
|
+
|
924
|
+
/* checking for closing quote presence */
|
925
|
+
if (data[title_e] != '\'' && data[title_e] != '"') {
|
926
|
+
title_b = title_e = 0;
|
927
|
+
link_e = i;
|
928
|
+
}
|
929
|
+
}
|
930
|
+
|
931
|
+
/* remove whitespace at the end of the link */
|
932
|
+
while (link_e > link_b && _isspace(data[link_e - 1]))
|
933
|
+
link_e--;
|
934
|
+
|
935
|
+
/* remove optional angle brackets around the link */
|
936
|
+
if (data[link_b] == '<') link_b++;
|
937
|
+
if (data[link_e - 1] == '>') link_e--;
|
938
|
+
|
939
|
+
/* building escaped link and title */
|
940
|
+
if (link_e > link_b) {
|
941
|
+
link = rndr_newbuf(rndr, BUFFER_SPAN);
|
942
|
+
bufput(link, data + link_b, link_e - link_b);
|
943
|
+
}
|
944
|
+
|
945
|
+
if (title_e > title_b) {
|
946
|
+
title = rndr_newbuf(rndr, BUFFER_SPAN);
|
947
|
+
bufput(title, data + title_b, title_e - title_b);
|
948
|
+
}
|
949
|
+
|
950
|
+
i++;
|
951
|
+
}
|
952
|
+
|
953
|
+
/* reference style link */
|
954
|
+
else if (i < size && data[i] == '[') {
|
955
|
+
struct buf id = { 0, 0, 0, 0 };
|
956
|
+
struct link_ref *lr;
|
957
|
+
|
958
|
+
/* looking for the id */
|
959
|
+
i++;
|
960
|
+
link_b = i;
|
961
|
+
while (i < size && data[i] != ']') i++;
|
962
|
+
if (i >= size) goto cleanup;
|
963
|
+
link_e = i;
|
964
|
+
|
965
|
+
/* finding the link_ref */
|
966
|
+
if (link_b == link_e) {
|
967
|
+
if (text_has_nl) {
|
968
|
+
struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
|
969
|
+
size_t j;
|
970
|
+
|
971
|
+
for (j = 1; j < txt_e; j++) {
|
972
|
+
if (data[j] != '\n')
|
973
|
+
bufputc(b, data[j]);
|
974
|
+
else if (data[j - 1] != ' ')
|
975
|
+
bufputc(b, ' ');
|
976
|
+
}
|
977
|
+
|
978
|
+
id.data = b->data;
|
979
|
+
id.size = b->size;
|
980
|
+
} else {
|
981
|
+
id.data = data + 1;
|
982
|
+
id.size = txt_e - 1;
|
983
|
+
}
|
984
|
+
} else {
|
985
|
+
id.data = data + link_b;
|
986
|
+
id.size = link_e - link_b;
|
987
|
+
}
|
988
|
+
|
989
|
+
lr = find_link_ref(rndr->refs, id.data, id.size);
|
990
|
+
if (!lr)
|
991
|
+
goto cleanup;
|
992
|
+
|
993
|
+
/* keeping link and title from link_ref */
|
994
|
+
link = lr->link;
|
995
|
+
title = lr->title;
|
996
|
+
i++;
|
997
|
+
}
|
998
|
+
|
999
|
+
/* shortcut reference style link */
|
1000
|
+
else {
|
1001
|
+
struct buf id = { 0, 0, 0, 0 };
|
1002
|
+
struct link_ref *lr;
|
1003
|
+
|
1004
|
+
/* crafting the id */
|
1005
|
+
if (text_has_nl) {
|
1006
|
+
struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN);
|
1007
|
+
size_t j;
|
1008
|
+
|
1009
|
+
for (j = 1; j < txt_e; j++) {
|
1010
|
+
if (data[j] != '\n')
|
1011
|
+
bufputc(b, data[j]);
|
1012
|
+
else if (data[j - 1] != ' ')
|
1013
|
+
bufputc(b, ' ');
|
1014
|
+
}
|
1015
|
+
|
1016
|
+
id.data = b->data;
|
1017
|
+
id.size = b->size;
|
1018
|
+
} else {
|
1019
|
+
id.data = data + 1;
|
1020
|
+
id.size = txt_e - 1;
|
1021
|
+
}
|
1022
|
+
|
1023
|
+
/* finding the link_ref */
|
1024
|
+
lr = find_link_ref(rndr->refs, id.data, id.size);
|
1025
|
+
if (!lr)
|
1026
|
+
goto cleanup;
|
1027
|
+
|
1028
|
+
/* keeping link and title from link_ref */
|
1029
|
+
link = lr->link;
|
1030
|
+
title = lr->title;
|
1031
|
+
|
1032
|
+
/* rewinding the whitespace */
|
1033
|
+
i = txt_e + 1;
|
1034
|
+
}
|
1035
|
+
|
1036
|
+
/* building content: img alt is escaped, link content is parsed */
|
1037
|
+
if (txt_e > 1) {
|
1038
|
+
content = rndr_newbuf(rndr, BUFFER_SPAN);
|
1039
|
+
if (is_img) {
|
1040
|
+
bufput(content, data + 1, txt_e - 1);
|
1041
|
+
} else {
|
1042
|
+
/* disable autolinking when parsing inline the
|
1043
|
+
* content of a link */
|
1044
|
+
rndr->in_link_body = 1;
|
1045
|
+
parse_inline(content, rndr, data + 1, txt_e - 1);
|
1046
|
+
rndr->in_link_body = 0;
|
1047
|
+
}
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
if (link) {
|
1051
|
+
u_link = rndr_newbuf(rndr, BUFFER_SPAN);
|
1052
|
+
unscape_text(u_link, link);
|
1053
|
+
}
|
1054
|
+
|
1055
|
+
/* calling the relevant rendering function */
|
1056
|
+
if (is_img) {
|
1057
|
+
if (ob->size && ob->data[ob->size - 1] == '!')
|
1058
|
+
ob->size -= 1;
|
1059
|
+
|
1060
|
+
ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque);
|
1061
|
+
} else {
|
1062
|
+
ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque);
|
1063
|
+
}
|
1064
|
+
|
1065
|
+
/* cleanup */
|
1066
|
+
cleanup:
|
1067
|
+
rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
|
1068
|
+
return ret ? i : 0;
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
static size_t
|
1072
|
+
char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
|
1073
|
+
{
|
1074
|
+
size_t sup_start, sup_len;
|
1075
|
+
struct buf *sup;
|
1076
|
+
|
1077
|
+
if (!rndr->cb.superscript)
|
1078
|
+
return 0;
|
1079
|
+
|
1080
|
+
if (size < 2)
|
1081
|
+
return 0;
|
1082
|
+
|
1083
|
+
if (data[1] == '(') {
|
1084
|
+
sup_start = sup_len = 2;
|
1085
|
+
|
1086
|
+
while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
|
1087
|
+
sup_len++;
|
1088
|
+
|
1089
|
+
if (sup_len == size)
|
1090
|
+
return 0;
|
1091
|
+
} else {
|
1092
|
+
sup_start = sup_len = 1;
|
1093
|
+
|
1094
|
+
while (sup_len < size && !_isspace(data[sup_len]))
|
1095
|
+
sup_len++;
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
if (sup_len - sup_start == 0)
|
1099
|
+
return (sup_start == 2) ? 3 : 0;
|
1100
|
+
|
1101
|
+
sup = rndr_newbuf(rndr, BUFFER_SPAN);
|
1102
|
+
parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
|
1103
|
+
rndr->cb.superscript(ob, sup, rndr->opaque);
|
1104
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1105
|
+
|
1106
|
+
return (sup_start == 2) ? sup_len + 1 : sup_len;
|
1107
|
+
}
|
1108
|
+
|
1109
|
+
/*********************************
|
1110
|
+
* BLOCK-LEVEL PARSING FUNCTIONS *
|
1111
|
+
*********************************/
|
1112
|
+
|
1113
|
+
/* is_empty • returns the line length when it is empty, 0 otherwise */
|
1114
|
+
static size_t
|
1115
|
+
is_empty(uint8_t *data, size_t size)
|
1116
|
+
{
|
1117
|
+
size_t i;
|
1118
|
+
|
1119
|
+
for (i = 0; i < size && data[i] != '\n'; i++)
|
1120
|
+
if (data[i] != ' ')
|
1121
|
+
return 0;
|
1122
|
+
|
1123
|
+
return i + 1;
|
1124
|
+
}
|
1125
|
+
|
1126
|
+
/* is_hrule • returns whether a line is a horizontal rule */
|
1127
|
+
static int
|
1128
|
+
is_hrule(uint8_t *data, size_t size)
|
1129
|
+
{
|
1130
|
+
size_t i = 0, n = 0;
|
1131
|
+
uint8_t c;
|
1132
|
+
|
1133
|
+
/* skipping initial spaces */
|
1134
|
+
if (size < 3) return 0;
|
1135
|
+
if (data[0] == ' ') { i++;
|
1136
|
+
if (data[1] == ' ') { i++;
|
1137
|
+
if (data[2] == ' ') { i++; } } }
|
1138
|
+
|
1139
|
+
/* looking at the hrule uint8_t */
|
1140
|
+
if (i + 2 >= size
|
1141
|
+
|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
|
1142
|
+
return 0;
|
1143
|
+
c = data[i];
|
1144
|
+
|
1145
|
+
/* the whole line must be the char or whitespace */
|
1146
|
+
while (i < size && data[i] != '\n') {
|
1147
|
+
if (data[i] == c) n++;
|
1148
|
+
else if (data[i] != ' ')
|
1149
|
+
return 0;
|
1150
|
+
|
1151
|
+
i++;
|
1152
|
+
}
|
1153
|
+
|
1154
|
+
return n >= 3;
|
1155
|
+
}
|
1156
|
+
|
1157
|
+
/* check if a line is a code fence; return its size if it is */
|
1158
|
+
static size_t
|
1159
|
+
is_codefence(uint8_t *data, size_t size, struct buf *syntax)
|
1160
|
+
{
|
1161
|
+
size_t i = 0, n = 0;
|
1162
|
+
uint8_t c;
|
1163
|
+
|
1164
|
+
/* skipping initial spaces */
|
1165
|
+
if (size < 3) return 0;
|
1166
|
+
if (data[0] == ' ') { i++;
|
1167
|
+
if (data[1] == ' ') { i++;
|
1168
|
+
if (data[2] == ' ') { i++; } } }
|
1169
|
+
|
1170
|
+
/* looking at the hrule uint8_t */
|
1171
|
+
if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
|
1172
|
+
return 0;
|
1173
|
+
|
1174
|
+
c = data[i];
|
1175
|
+
|
1176
|
+
/* the whole line must be the uint8_t or whitespace */
|
1177
|
+
while (i < size && data[i] == c) {
|
1178
|
+
n++; i++;
|
1179
|
+
}
|
1180
|
+
|
1181
|
+
if (n < 3)
|
1182
|
+
return 0;
|
1183
|
+
|
1184
|
+
if (syntax != NULL) {
|
1185
|
+
size_t syn = 0;
|
1186
|
+
|
1187
|
+
while (i < size && data[i] == ' ')
|
1188
|
+
i++;
|
1189
|
+
|
1190
|
+
syntax->data = data + i;
|
1191
|
+
|
1192
|
+
if (i < size && data[i] == '{') {
|
1193
|
+
i++; syntax->data++;
|
1194
|
+
|
1195
|
+
while (i < size && data[i] != '}' && data[i] != '\n') {
|
1196
|
+
syn++; i++;
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
if (i == size || data[i] != '}')
|
1200
|
+
return 0;
|
1201
|
+
|
1202
|
+
/* strip all whitespace at the beginning and the end
|
1203
|
+
* of the {} block */
|
1204
|
+
while (syn > 0 && _isspace(syntax->data[0])) {
|
1205
|
+
syntax->data++; syn--;
|
1206
|
+
}
|
1207
|
+
|
1208
|
+
while (syn > 0 && _isspace(syntax->data[syn - 1]))
|
1209
|
+
syn--;
|
1210
|
+
|
1211
|
+
i++;
|
1212
|
+
} else {
|
1213
|
+
while (i < size && !_isspace(data[i])) {
|
1214
|
+
syn++; i++;
|
1215
|
+
}
|
1216
|
+
}
|
1217
|
+
|
1218
|
+
syntax->size = syn;
|
1219
|
+
}
|
1220
|
+
|
1221
|
+
while (i < size && data[i] != '\n') {
|
1222
|
+
if (!_isspace(data[i]))
|
1223
|
+
return 0;
|
1224
|
+
|
1225
|
+
i++;
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
return i + 1;
|
1229
|
+
}
|
1230
|
+
|
1231
|
+
/* is_atxheader • returns whether the line is a hash-prefixed header */
|
1232
|
+
static int
|
1233
|
+
is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1234
|
+
{
|
1235
|
+
if (data[0] != '#')
|
1236
|
+
return 0;
|
1237
|
+
|
1238
|
+
if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) {
|
1239
|
+
size_t level = 0;
|
1240
|
+
|
1241
|
+
while (level < size && level < 6 && data[level] == '#')
|
1242
|
+
level++;
|
1243
|
+
|
1244
|
+
if (level < size && data[level] != ' ')
|
1245
|
+
return 0;
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
return 1;
|
1249
|
+
}
|
1250
|
+
|
1251
|
+
/* is_headerline • returns whether the line is a setext-style hdr underline */
|
1252
|
+
static int
|
1253
|
+
is_headerline(uint8_t *data, size_t size)
|
1254
|
+
{
|
1255
|
+
size_t i = 0;
|
1256
|
+
|
1257
|
+
/* test of level 1 header */
|
1258
|
+
if (data[i] == '=') {
|
1259
|
+
for (i = 1; i < size && data[i] == '='; i++);
|
1260
|
+
while (i < size && data[i] == ' ') i++;
|
1261
|
+
return (i >= size || data[i] == '\n') ? 1 : 0; }
|
1262
|
+
|
1263
|
+
/* test of level 2 header */
|
1264
|
+
if (data[i] == '-') {
|
1265
|
+
for (i = 1; i < size && data[i] == '-'; i++);
|
1266
|
+
while (i < size && data[i] == ' ') i++;
|
1267
|
+
return (i >= size || data[i] == '\n') ? 2 : 0; }
|
1268
|
+
|
1269
|
+
return 0;
|
1270
|
+
}
|
1271
|
+
|
1272
|
+
static int
|
1273
|
+
is_next_headerline(uint8_t *data, size_t size)
|
1274
|
+
{
|
1275
|
+
size_t i = 0;
|
1276
|
+
|
1277
|
+
while (i < size && data[i] != '\n')
|
1278
|
+
i++;
|
1279
|
+
|
1280
|
+
if (++i >= size)
|
1281
|
+
return 0;
|
1282
|
+
|
1283
|
+
return is_headerline(data + i, size - i);
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
/* prefix_quote • returns blockquote prefix length */
|
1287
|
+
static size_t
|
1288
|
+
prefix_quote(uint8_t *data, size_t size)
|
1289
|
+
{
|
1290
|
+
size_t i = 0;
|
1291
|
+
if (i < size && data[i] == ' ') i++;
|
1292
|
+
if (i < size && data[i] == ' ') i++;
|
1293
|
+
if (i < size && data[i] == ' ') i++;
|
1294
|
+
|
1295
|
+
if (i < size && data[i] == '>') {
|
1296
|
+
if (i + 1 < size && data[i + 1] == ' ')
|
1297
|
+
return i + 2;
|
1298
|
+
|
1299
|
+
return i + 1;
|
1300
|
+
}
|
1301
|
+
|
1302
|
+
return 0;
|
1303
|
+
}
|
1304
|
+
|
1305
|
+
/* prefix_code • returns prefix length for block code*/
|
1306
|
+
static size_t
|
1307
|
+
prefix_code(uint8_t *data, size_t size)
|
1308
|
+
{
|
1309
|
+
if (size > 3 && data[0] == ' ' && data[1] == ' '
|
1310
|
+
&& data[2] == ' ' && data[3] == ' ') return 4;
|
1311
|
+
|
1312
|
+
return 0;
|
1313
|
+
}
|
1314
|
+
|
1315
|
+
/* prefix_oli • returns ordered list item prefix */
|
1316
|
+
static size_t
|
1317
|
+
prefix_oli(uint8_t *data, size_t size)
|
1318
|
+
{
|
1319
|
+
size_t i = 0;
|
1320
|
+
|
1321
|
+
if (i < size && data[i] == ' ') i++;
|
1322
|
+
if (i < size && data[i] == ' ') i++;
|
1323
|
+
if (i < size && data[i] == ' ') i++;
|
1324
|
+
|
1325
|
+
if (i >= size || data[i] < '0' || data[i] > '9')
|
1326
|
+
return 0;
|
1327
|
+
|
1328
|
+
while (i < size && data[i] >= '0' && data[i] <= '9')
|
1329
|
+
i++;
|
1330
|
+
|
1331
|
+
if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
|
1332
|
+
return 0;
|
1333
|
+
|
1334
|
+
if (is_next_headerline(data + i, size - i))
|
1335
|
+
return 0;
|
1336
|
+
|
1337
|
+
return i + 2;
|
1338
|
+
}
|
1339
|
+
|
1340
|
+
/* prefix_uli • returns ordered list item prefix */
|
1341
|
+
static size_t
|
1342
|
+
prefix_uli(uint8_t *data, size_t size)
|
1343
|
+
{
|
1344
|
+
size_t i = 0;
|
1345
|
+
|
1346
|
+
if (i < size && data[i] == ' ') i++;
|
1347
|
+
if (i < size && data[i] == ' ') i++;
|
1348
|
+
if (i < size && data[i] == ' ') i++;
|
1349
|
+
|
1350
|
+
if (i + 1 >= size ||
|
1351
|
+
(data[i] != '*' && data[i] != '+' && data[i] != '-') ||
|
1352
|
+
data[i + 1] != ' ')
|
1353
|
+
return 0;
|
1354
|
+
|
1355
|
+
if (is_next_headerline(data + i, size - i))
|
1356
|
+
return 0;
|
1357
|
+
|
1358
|
+
return i + 2;
|
1359
|
+
}
|
1360
|
+
|
1361
|
+
|
1362
|
+
/* parse_block • parsing of one block, returning next uint8_t to parse */
|
1363
|
+
static void parse_block(struct buf *ob, struct sd_markdown *rndr,
|
1364
|
+
uint8_t *data, size_t size);
|
1365
|
+
|
1366
|
+
|
1367
|
+
/* parse_blockquote • handles parsing of a blockquote fragment */
|
1368
|
+
static size_t
|
1369
|
+
parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1370
|
+
{
|
1371
|
+
size_t beg, end = 0, pre, work_size = 0;
|
1372
|
+
uint8_t *work_data = 0;
|
1373
|
+
struct buf *out = 0;
|
1374
|
+
|
1375
|
+
out = rndr_newbuf(rndr, BUFFER_BLOCK);
|
1376
|
+
beg = 0;
|
1377
|
+
while (beg < size) {
|
1378
|
+
for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
|
1379
|
+
|
1380
|
+
pre = prefix_quote(data + beg, end - beg);
|
1381
|
+
|
1382
|
+
if (pre)
|
1383
|
+
beg += pre; /* skipping prefix */
|
1384
|
+
|
1385
|
+
/* empty line followed by non-quote line */
|
1386
|
+
else if (is_empty(data + beg, end - beg) &&
|
1387
|
+
(end >= size || (prefix_quote(data + end, size - end) == 0 &&
|
1388
|
+
!is_empty(data + end, size - end))))
|
1389
|
+
break;
|
1390
|
+
|
1391
|
+
if (beg < end) { /* copy into the in-place working buffer */
|
1392
|
+
/* bufput(work, data + beg, end - beg); */
|
1393
|
+
if (!work_data)
|
1394
|
+
work_data = data + beg;
|
1395
|
+
else if (data + beg != work_data + work_size)
|
1396
|
+
memmove(work_data + work_size, data + beg, end - beg);
|
1397
|
+
work_size += end - beg;
|
1398
|
+
}
|
1399
|
+
beg = end;
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
parse_block(out, rndr, work_data, work_size);
|
1403
|
+
if (rndr->cb.blockquote)
|
1404
|
+
rndr->cb.blockquote(ob, out, rndr->opaque);
|
1405
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
1406
|
+
return end;
|
1407
|
+
}
|
1408
|
+
|
1409
|
+
static size_t
|
1410
|
+
parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render);
|
1411
|
+
|
1412
|
+
/* parse_blockquote • handles parsing of a regular paragraph */
|
1413
|
+
static size_t
|
1414
|
+
parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1415
|
+
{
|
1416
|
+
size_t i = 0, end = 0;
|
1417
|
+
int level = 0;
|
1418
|
+
struct buf work = { data, 0, 0, 0 };
|
1419
|
+
|
1420
|
+
while (i < size) {
|
1421
|
+
for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
|
1422
|
+
|
1423
|
+
if (is_empty(data + i, size - i))
|
1424
|
+
break;
|
1425
|
+
|
1426
|
+
if ((level = is_headerline(data + i, size - i)) != 0)
|
1427
|
+
break;
|
1428
|
+
|
1429
|
+
if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
|
1430
|
+
if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
|
1431
|
+
end = i;
|
1432
|
+
break;
|
1433
|
+
}
|
1434
|
+
}
|
1435
|
+
|
1436
|
+
if (is_atxheader(rndr, data + i, size - i) ||
|
1437
|
+
is_hrule(data + i, size - i) ||
|
1438
|
+
prefix_quote(data + i, size - i)) {
|
1439
|
+
end = i;
|
1440
|
+
break;
|
1441
|
+
}
|
1442
|
+
|
1443
|
+
i = end;
|
1444
|
+
}
|
1445
|
+
|
1446
|
+
work.size = i;
|
1447
|
+
while (work.size && data[work.size - 1] == '\n')
|
1448
|
+
work.size--;
|
1449
|
+
|
1450
|
+
if (!level) {
|
1451
|
+
struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
|
1452
|
+
parse_inline(tmp, rndr, work.data, work.size);
|
1453
|
+
if (rndr->cb.paragraph)
|
1454
|
+
rndr->cb.paragraph(ob, tmp, rndr->opaque);
|
1455
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
1456
|
+
} else {
|
1457
|
+
struct buf *header_work;
|
1458
|
+
|
1459
|
+
if (work.size) {
|
1460
|
+
size_t beg;
|
1461
|
+
i = work.size;
|
1462
|
+
work.size -= 1;
|
1463
|
+
|
1464
|
+
while (work.size && data[work.size] != '\n')
|
1465
|
+
work.size -= 1;
|
1466
|
+
|
1467
|
+
beg = work.size + 1;
|
1468
|
+
while (work.size && data[work.size - 1] == '\n')
|
1469
|
+
work.size -= 1;
|
1470
|
+
|
1471
|
+
if (work.size > 0) {
|
1472
|
+
struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK);
|
1473
|
+
parse_inline(tmp, rndr, work.data, work.size);
|
1474
|
+
|
1475
|
+
if (rndr->cb.paragraph)
|
1476
|
+
rndr->cb.paragraph(ob, tmp, rndr->opaque);
|
1477
|
+
|
1478
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
1479
|
+
work.data += beg;
|
1480
|
+
work.size = i - beg;
|
1481
|
+
}
|
1482
|
+
else work.size = i;
|
1483
|
+
}
|
1484
|
+
|
1485
|
+
header_work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1486
|
+
parse_inline(header_work, rndr, work.data, work.size);
|
1487
|
+
|
1488
|
+
if (rndr->cb.header)
|
1489
|
+
rndr->cb.header(ob, header_work, (int)level, rndr->opaque);
|
1490
|
+
|
1491
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1492
|
+
}
|
1493
|
+
|
1494
|
+
return end;
|
1495
|
+
}
|
1496
|
+
|
1497
|
+
/* parse_fencedcode • handles parsing of a block-level code fragment */
|
1498
|
+
static size_t
|
1499
|
+
parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1500
|
+
{
|
1501
|
+
size_t beg, end;
|
1502
|
+
struct buf *work = 0;
|
1503
|
+
struct buf lang = { 0, 0, 0, 0 };
|
1504
|
+
|
1505
|
+
beg = is_codefence(data, size, &lang);
|
1506
|
+
if (beg == 0) return 0;
|
1507
|
+
|
1508
|
+
work = rndr_newbuf(rndr, BUFFER_BLOCK);
|
1509
|
+
|
1510
|
+
while (beg < size) {
|
1511
|
+
size_t fence_end;
|
1512
|
+
|
1513
|
+
fence_end = is_codefence(data + beg, size - beg, NULL);
|
1514
|
+
if (fence_end != 0) {
|
1515
|
+
beg += fence_end;
|
1516
|
+
break;
|
1517
|
+
}
|
1518
|
+
|
1519
|
+
for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
|
1520
|
+
|
1521
|
+
if (beg < end) {
|
1522
|
+
/* verbatim copy to the working buffer,
|
1523
|
+
escaping entities */
|
1524
|
+
if (is_empty(data + beg, end - beg))
|
1525
|
+
bufputc(work, '\n');
|
1526
|
+
else bufput(work, data + beg, end - beg);
|
1527
|
+
}
|
1528
|
+
beg = end;
|
1529
|
+
}
|
1530
|
+
|
1531
|
+
if (work->size && work->data[work->size - 1] != '\n')
|
1532
|
+
bufputc(work, '\n');
|
1533
|
+
|
1534
|
+
if (rndr->cb.blockcode)
|
1535
|
+
rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque);
|
1536
|
+
|
1537
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
1538
|
+
return beg;
|
1539
|
+
}
|
1540
|
+
|
1541
|
+
static size_t
|
1542
|
+
parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1543
|
+
{
|
1544
|
+
size_t beg, end, pre;
|
1545
|
+
struct buf *work = 0;
|
1546
|
+
|
1547
|
+
work = rndr_newbuf(rndr, BUFFER_BLOCK);
|
1548
|
+
|
1549
|
+
beg = 0;
|
1550
|
+
while (beg < size) {
|
1551
|
+
for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
|
1552
|
+
pre = prefix_code(data + beg, end - beg);
|
1553
|
+
|
1554
|
+
if (pre)
|
1555
|
+
beg += pre; /* skipping prefix */
|
1556
|
+
else if (!is_empty(data + beg, end - beg))
|
1557
|
+
/* non-empty non-prefixed line breaks the pre */
|
1558
|
+
break;
|
1559
|
+
|
1560
|
+
if (beg < end) {
|
1561
|
+
/* verbatim copy to the working buffer,
|
1562
|
+
escaping entities */
|
1563
|
+
if (is_empty(data + beg, end - beg))
|
1564
|
+
bufputc(work, '\n');
|
1565
|
+
else bufput(work, data + beg, end - beg);
|
1566
|
+
}
|
1567
|
+
beg = end;
|
1568
|
+
}
|
1569
|
+
|
1570
|
+
while (work->size && work->data[work->size - 1] == '\n')
|
1571
|
+
work->size -= 1;
|
1572
|
+
|
1573
|
+
bufputc(work, '\n');
|
1574
|
+
|
1575
|
+
if (rndr->cb.blockcode)
|
1576
|
+
rndr->cb.blockcode(ob, work, NULL, rndr->opaque);
|
1577
|
+
|
1578
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
1579
|
+
return beg;
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
/* parse_listitem • parsing of a single list item */
|
1583
|
+
/* assuming initial prefix is already removed */
|
1584
|
+
static size_t
|
1585
|
+
parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
|
1586
|
+
{
|
1587
|
+
struct buf *work = 0, *inter = 0;
|
1588
|
+
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
|
1589
|
+
int in_empty = 0, has_inside_empty = 0;
|
1590
|
+
size_t has_next_uli, has_next_oli;
|
1591
|
+
|
1592
|
+
/* keeping track of the first indentation prefix */
|
1593
|
+
while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
|
1594
|
+
orgpre++;
|
1595
|
+
|
1596
|
+
beg = prefix_uli(data, size);
|
1597
|
+
if (!beg)
|
1598
|
+
beg = prefix_oli(data, size);
|
1599
|
+
|
1600
|
+
if (!beg)
|
1601
|
+
return 0;
|
1602
|
+
|
1603
|
+
/* skipping to the beginning of the following line */
|
1604
|
+
end = beg;
|
1605
|
+
while (end < size && data[end - 1] != '\n')
|
1606
|
+
end++;
|
1607
|
+
|
1608
|
+
/* getting working buffers */
|
1609
|
+
work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1610
|
+
inter = rndr_newbuf(rndr, BUFFER_SPAN);
|
1611
|
+
|
1612
|
+
/* putting the first line into the working buffer */
|
1613
|
+
bufput(work, data + beg, end - beg);
|
1614
|
+
beg = end;
|
1615
|
+
|
1616
|
+
/* process the following lines */
|
1617
|
+
while (beg < size) {
|
1618
|
+
end++;
|
1619
|
+
|
1620
|
+
while (end < size && data[end - 1] != '\n')
|
1621
|
+
end++;
|
1622
|
+
|
1623
|
+
/* process an empty line */
|
1624
|
+
if (is_empty(data + beg, end - beg)) {
|
1625
|
+
in_empty = 1;
|
1626
|
+
beg = end;
|
1627
|
+
continue;
|
1628
|
+
}
|
1629
|
+
|
1630
|
+
/* calculating the indentation */
|
1631
|
+
i = 0;
|
1632
|
+
while (i < 4 && beg + i < end && data[beg + i] == ' ')
|
1633
|
+
i++;
|
1634
|
+
|
1635
|
+
pre = i;
|
1636
|
+
|
1637
|
+
has_next_uli = prefix_uli(data + beg + i, end - beg - i);
|
1638
|
+
has_next_oli = prefix_oli(data + beg + i, end - beg - i);
|
1639
|
+
|
1640
|
+
/* checking for ul/ol switch */
|
1641
|
+
if (in_empty && (
|
1642
|
+
((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
|
1643
|
+
(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
|
1644
|
+
*flags |= MKD_LI_END;
|
1645
|
+
break; /* the following item must have same list type */
|
1646
|
+
}
|
1647
|
+
|
1648
|
+
/* checking for a new item */
|
1649
|
+
if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) {
|
1650
|
+
if (in_empty)
|
1651
|
+
has_inside_empty = 1;
|
1652
|
+
|
1653
|
+
if (pre == orgpre) /* the following item must have */
|
1654
|
+
break; /* the same indentation */
|
1655
|
+
|
1656
|
+
if (!sublist)
|
1657
|
+
sublist = work->size;
|
1658
|
+
}
|
1659
|
+
/* joining only indented stuff after empty lines */
|
1660
|
+
else if (in_empty && i < 4) {
|
1661
|
+
*flags |= MKD_LI_END;
|
1662
|
+
break;
|
1663
|
+
}
|
1664
|
+
else if (in_empty) {
|
1665
|
+
bufputc(work, '\n');
|
1666
|
+
has_inside_empty = 1;
|
1667
|
+
}
|
1668
|
+
|
1669
|
+
in_empty = 0;
|
1670
|
+
|
1671
|
+
/* adding the line without prefix into the working buffer */
|
1672
|
+
bufput(work, data + beg + i, end - beg - i);
|
1673
|
+
beg = end;
|
1674
|
+
}
|
1675
|
+
|
1676
|
+
/* render of li contents */
|
1677
|
+
if (has_inside_empty)
|
1678
|
+
*flags |= MKD_LI_BLOCK;
|
1679
|
+
|
1680
|
+
if (*flags & MKD_LI_BLOCK) {
|
1681
|
+
/* intermediate render of block li */
|
1682
|
+
if (sublist && sublist < work->size) {
|
1683
|
+
parse_block(inter, rndr, work->data, sublist);
|
1684
|
+
parse_block(inter, rndr, work->data + sublist, work->size - sublist);
|
1685
|
+
}
|
1686
|
+
else
|
1687
|
+
parse_block(inter, rndr, work->data, work->size);
|
1688
|
+
} else {
|
1689
|
+
/* intermediate render of inline li */
|
1690
|
+
if (sublist && sublist < work->size) {
|
1691
|
+
parse_inline(inter, rndr, work->data, sublist);
|
1692
|
+
parse_block(inter, rndr, work->data + sublist, work->size - sublist);
|
1693
|
+
}
|
1694
|
+
else
|
1695
|
+
parse_inline(inter, rndr, work->data, work->size);
|
1696
|
+
}
|
1697
|
+
|
1698
|
+
/* render of li itself */
|
1699
|
+
if (rndr->cb.listitem)
|
1700
|
+
rndr->cb.listitem(ob, inter, *flags, rndr->opaque);
|
1701
|
+
|
1702
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1703
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1704
|
+
return beg;
|
1705
|
+
}
|
1706
|
+
|
1707
|
+
|
1708
|
+
/* parse_list • parsing ordered or unordered list block */
|
1709
|
+
static size_t
|
1710
|
+
parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags)
|
1711
|
+
{
|
1712
|
+
struct buf *work = 0;
|
1713
|
+
size_t i = 0, j;
|
1714
|
+
|
1715
|
+
work = rndr_newbuf(rndr, BUFFER_BLOCK);
|
1716
|
+
|
1717
|
+
while (i < size) {
|
1718
|
+
j = parse_listitem(work, rndr, data + i, size - i, &flags);
|
1719
|
+
i += j;
|
1720
|
+
|
1721
|
+
if (!j || (flags & MKD_LI_END))
|
1722
|
+
break;
|
1723
|
+
}
|
1724
|
+
|
1725
|
+
if (rndr->cb.list)
|
1726
|
+
rndr->cb.list(ob, work, flags, rndr->opaque);
|
1727
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
1728
|
+
return i;
|
1729
|
+
}
|
1730
|
+
|
1731
|
+
/* parse_atxheader • parsing of atx-style headers */
|
1732
|
+
static size_t
|
1733
|
+
parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
1734
|
+
{
|
1735
|
+
size_t level = 0;
|
1736
|
+
size_t i, end, skip;
|
1737
|
+
|
1738
|
+
while (level < size && level < 6 && data[level] == '#')
|
1739
|
+
level++;
|
1740
|
+
|
1741
|
+
for (i = level; i < size && data[i] == ' '; i++);
|
1742
|
+
|
1743
|
+
for (end = i; end < size && data[end] != '\n'; end++);
|
1744
|
+
skip = end;
|
1745
|
+
|
1746
|
+
while (end && data[end - 1] == '#')
|
1747
|
+
end--;
|
1748
|
+
|
1749
|
+
while (end && data[end - 1] == ' ')
|
1750
|
+
end--;
|
1751
|
+
|
1752
|
+
if (end > i) {
|
1753
|
+
struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1754
|
+
|
1755
|
+
parse_inline(work, rndr, data + i, end - i);
|
1756
|
+
|
1757
|
+
if (rndr->cb.header)
|
1758
|
+
rndr->cb.header(ob, work, (int)level, rndr->opaque);
|
1759
|
+
|
1760
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1761
|
+
}
|
1762
|
+
|
1763
|
+
return skip;
|
1764
|
+
}
|
1765
|
+
|
1766
|
+
|
1767
|
+
/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
|
1768
|
+
/* returns the length on match, 0 otherwise */
|
1769
|
+
static size_t
|
1770
|
+
htmlblock_end_tag(
|
1771
|
+
const char *tag,
|
1772
|
+
size_t tag_len,
|
1773
|
+
struct sd_markdown *rndr,
|
1774
|
+
uint8_t *data,
|
1775
|
+
size_t size)
|
1776
|
+
{
|
1777
|
+
size_t i, w;
|
1778
|
+
|
1779
|
+
/* checking if tag is a match */
|
1780
|
+
if (tag_len + 3 >= size ||
|
1781
|
+
strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
|
1782
|
+
data[tag_len + 2] != '>')
|
1783
|
+
return 0;
|
1784
|
+
|
1785
|
+
/* checking white lines */
|
1786
|
+
i = tag_len + 3;
|
1787
|
+
w = 0;
|
1788
|
+
if (i < size && (w = is_empty(data + i, size - i)) == 0)
|
1789
|
+
return 0; /* non-blank after tag */
|
1790
|
+
i += w;
|
1791
|
+
w = 0;
|
1792
|
+
|
1793
|
+
if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
|
1794
|
+
if (i < size)
|
1795
|
+
w = is_empty(data + i, size - i);
|
1796
|
+
} else {
|
1797
|
+
if (i < size && (w = is_empty(data + i, size - i)) == 0)
|
1798
|
+
return 0; /* non-blank line after tag line */
|
1799
|
+
}
|
1800
|
+
|
1801
|
+
return i + w;
|
1802
|
+
}
|
1803
|
+
|
1804
|
+
static size_t
|
1805
|
+
htmlblock_end(const char *curtag,
|
1806
|
+
struct sd_markdown *rndr,
|
1807
|
+
uint8_t *data,
|
1808
|
+
size_t size,
|
1809
|
+
int start_of_line)
|
1810
|
+
{
|
1811
|
+
size_t tag_size = strlen(curtag);
|
1812
|
+
size_t i = 1, end_tag;
|
1813
|
+
int block_lines = 0;
|
1814
|
+
|
1815
|
+
while (i < size) {
|
1816
|
+
i++;
|
1817
|
+
while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
|
1818
|
+
if (data[i] == '\n')
|
1819
|
+
block_lines++;
|
1820
|
+
|
1821
|
+
i++;
|
1822
|
+
}
|
1823
|
+
|
1824
|
+
/* If we are only looking for unindented tags, skip the tag
|
1825
|
+
* if it doesn't follow a newline.
|
1826
|
+
*
|
1827
|
+
* The only exception to this is if the tag is still on the
|
1828
|
+
* initial line; in that case it still counts as a closing
|
1829
|
+
* tag
|
1830
|
+
*/
|
1831
|
+
if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
|
1832
|
+
continue;
|
1833
|
+
|
1834
|
+
if (i + 2 + tag_size >= size)
|
1835
|
+
break;
|
1836
|
+
|
1837
|
+
end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
|
1838
|
+
if (end_tag)
|
1839
|
+
return i + end_tag - 1;
|
1840
|
+
}
|
1841
|
+
|
1842
|
+
return 0;
|
1843
|
+
}
|
1844
|
+
|
1845
|
+
|
1846
|
+
/* parse_htmlblock • parsing of inline HTML block */
|
1847
|
+
static size_t
|
1848
|
+
parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
|
1849
|
+
{
|
1850
|
+
size_t i, j = 0, tag_end;
|
1851
|
+
const char *curtag = NULL;
|
1852
|
+
struct buf work = { data, 0, 0, 0 };
|
1853
|
+
|
1854
|
+
/* identification of the opening tag */
|
1855
|
+
if (size < 2 || data[0] != '<')
|
1856
|
+
return 0;
|
1857
|
+
|
1858
|
+
i = 1;
|
1859
|
+
while (i < size && data[i] != '>' && data[i] != ' ')
|
1860
|
+
i++;
|
1861
|
+
|
1862
|
+
if (i < size)
|
1863
|
+
curtag = find_block_tag((char *)data + 1, (int)i - 1);
|
1864
|
+
|
1865
|
+
/* handling of special cases */
|
1866
|
+
if (!curtag) {
|
1867
|
+
|
1868
|
+
/* HTML comment, laxist form */
|
1869
|
+
if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
|
1870
|
+
i = 5;
|
1871
|
+
|
1872
|
+
while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
|
1873
|
+
i++;
|
1874
|
+
|
1875
|
+
i++;
|
1876
|
+
|
1877
|
+
if (i < size)
|
1878
|
+
j = is_empty(data + i, size - i);
|
1879
|
+
|
1880
|
+
if (j) {
|
1881
|
+
work.size = i + j;
|
1882
|
+
if (do_render && rndr->cb.blockhtml)
|
1883
|
+
rndr->cb.blockhtml(ob, &work, rndr->opaque);
|
1884
|
+
return work.size;
|
1885
|
+
}
|
1886
|
+
}
|
1887
|
+
|
1888
|
+
/* HR, which is the only self-closing block tag considered */
|
1889
|
+
if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
|
1890
|
+
i = 3;
|
1891
|
+
while (i < size && data[i] != '>')
|
1892
|
+
i++;
|
1893
|
+
|
1894
|
+
if (i + 1 < size) {
|
1895
|
+
i++;
|
1896
|
+
j = is_empty(data + i, size - i);
|
1897
|
+
if (j) {
|
1898
|
+
work.size = i + j;
|
1899
|
+
if (do_render && rndr->cb.blockhtml)
|
1900
|
+
rndr->cb.blockhtml(ob, &work, rndr->opaque);
|
1901
|
+
return work.size;
|
1902
|
+
}
|
1903
|
+
}
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
/* no special case recognised */
|
1907
|
+
return 0;
|
1908
|
+
}
|
1909
|
+
|
1910
|
+
/* looking for an unindented matching closing tag */
|
1911
|
+
/* followed by a blank line */
|
1912
|
+
tag_end = htmlblock_end(curtag, rndr, data, size, 1);
|
1913
|
+
|
1914
|
+
/* if not found, trying a second pass looking for indented match */
|
1915
|
+
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
|
1916
|
+
if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
|
1917
|
+
tag_end = htmlblock_end(curtag, rndr, data, size, 0);
|
1918
|
+
}
|
1919
|
+
|
1920
|
+
if (!tag_end)
|
1921
|
+
return 0;
|
1922
|
+
|
1923
|
+
/* the end of the block has been found */
|
1924
|
+
work.size = tag_end;
|
1925
|
+
if (do_render && rndr->cb.blockhtml)
|
1926
|
+
rndr->cb.blockhtml(ob, &work, rndr->opaque);
|
1927
|
+
|
1928
|
+
return tag_end;
|
1929
|
+
}
|
1930
|
+
|
1931
|
+
static void
|
1932
|
+
parse_table_row(
|
1933
|
+
struct buf *ob,
|
1934
|
+
struct sd_markdown *rndr,
|
1935
|
+
uint8_t *data,
|
1936
|
+
size_t size,
|
1937
|
+
size_t columns,
|
1938
|
+
int *col_data,
|
1939
|
+
int header_flag)
|
1940
|
+
{
|
1941
|
+
size_t i = 0, col;
|
1942
|
+
struct buf *row_work = 0;
|
1943
|
+
|
1944
|
+
if (!rndr->cb.table_cell || !rndr->cb.table_row)
|
1945
|
+
return;
|
1946
|
+
|
1947
|
+
row_work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1948
|
+
|
1949
|
+
if (i < size && data[i] == '|')
|
1950
|
+
i++;
|
1951
|
+
|
1952
|
+
for (col = 0; col < columns && i < size; ++col) {
|
1953
|
+
size_t cell_start, cell_end;
|
1954
|
+
struct buf *cell_work;
|
1955
|
+
|
1956
|
+
cell_work = rndr_newbuf(rndr, BUFFER_SPAN);
|
1957
|
+
|
1958
|
+
while (i < size && _isspace(data[i]))
|
1959
|
+
i++;
|
1960
|
+
|
1961
|
+
cell_start = i;
|
1962
|
+
|
1963
|
+
while (i < size && data[i] != '|')
|
1964
|
+
i++;
|
1965
|
+
|
1966
|
+
cell_end = i - 1;
|
1967
|
+
|
1968
|
+
while (cell_end > cell_start && _isspace(data[cell_end]))
|
1969
|
+
cell_end--;
|
1970
|
+
|
1971
|
+
parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start);
|
1972
|
+
rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque);
|
1973
|
+
|
1974
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1975
|
+
i++;
|
1976
|
+
}
|
1977
|
+
|
1978
|
+
for (; col < columns; ++col) {
|
1979
|
+
struct buf empty_cell = { 0, 0, 0, 0 };
|
1980
|
+
rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque);
|
1981
|
+
}
|
1982
|
+
|
1983
|
+
rndr->cb.table_row(ob, row_work, rndr->opaque);
|
1984
|
+
|
1985
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
1986
|
+
}
|
1987
|
+
|
1988
|
+
static size_t
|
1989
|
+
parse_table_header(
|
1990
|
+
struct buf *ob,
|
1991
|
+
struct sd_markdown *rndr,
|
1992
|
+
uint8_t *data,
|
1993
|
+
size_t size,
|
1994
|
+
size_t *columns,
|
1995
|
+
int **column_data)
|
1996
|
+
{
|
1997
|
+
int pipes;
|
1998
|
+
size_t i = 0, col, header_end, under_end;
|
1999
|
+
|
2000
|
+
pipes = 0;
|
2001
|
+
while (i < size && data[i] != '\n')
|
2002
|
+
if (data[i++] == '|')
|
2003
|
+
pipes++;
|
2004
|
+
|
2005
|
+
if (i == size || pipes == 0)
|
2006
|
+
return 0;
|
2007
|
+
|
2008
|
+
header_end = i;
|
2009
|
+
|
2010
|
+
while (header_end > 0 && _isspace(data[header_end - 1]))
|
2011
|
+
header_end--;
|
2012
|
+
|
2013
|
+
if (data[0] == '|')
|
2014
|
+
pipes--;
|
2015
|
+
|
2016
|
+
if (header_end && data[header_end - 1] == '|')
|
2017
|
+
pipes--;
|
2018
|
+
|
2019
|
+
*columns = pipes + 1;
|
2020
|
+
*column_data = calloc(*columns, sizeof(int));
|
2021
|
+
|
2022
|
+
/* Parse the header underline */
|
2023
|
+
i++;
|
2024
|
+
if (i < size && data[i] == '|')
|
2025
|
+
i++;
|
2026
|
+
|
2027
|
+
under_end = i;
|
2028
|
+
while (under_end < size && data[under_end] != '\n')
|
2029
|
+
under_end++;
|
2030
|
+
|
2031
|
+
for (col = 0; col < *columns && i < under_end; ++col) {
|
2032
|
+
size_t dashes = 0;
|
2033
|
+
|
2034
|
+
while (i < under_end && data[i] == ' ')
|
2035
|
+
i++;
|
2036
|
+
|
2037
|
+
if (data[i] == ':') {
|
2038
|
+
i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L;
|
2039
|
+
dashes++;
|
2040
|
+
}
|
2041
|
+
|
2042
|
+
while (i < under_end && data[i] == '-') {
|
2043
|
+
i++; dashes++;
|
2044
|
+
}
|
2045
|
+
|
2046
|
+
if (i < under_end && data[i] == ':') {
|
2047
|
+
i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R;
|
2048
|
+
dashes++;
|
2049
|
+
}
|
2050
|
+
|
2051
|
+
while (i < under_end && data[i] == ' ')
|
2052
|
+
i++;
|
2053
|
+
|
2054
|
+
if (i < under_end && data[i] != '|')
|
2055
|
+
break;
|
2056
|
+
|
2057
|
+
if (dashes < 3)
|
2058
|
+
break;
|
2059
|
+
|
2060
|
+
i++;
|
2061
|
+
}
|
2062
|
+
|
2063
|
+
if (col < *columns)
|
2064
|
+
return 0;
|
2065
|
+
|
2066
|
+
parse_table_row(
|
2067
|
+
ob, rndr, data,
|
2068
|
+
header_end,
|
2069
|
+
*columns,
|
2070
|
+
*column_data,
|
2071
|
+
MKD_TABLE_HEADER
|
2072
|
+
);
|
2073
|
+
|
2074
|
+
return under_end + 1;
|
2075
|
+
}
|
2076
|
+
|
2077
|
+
static size_t
|
2078
|
+
parse_table(
|
2079
|
+
struct buf *ob,
|
2080
|
+
struct sd_markdown *rndr,
|
2081
|
+
uint8_t *data,
|
2082
|
+
size_t size)
|
2083
|
+
{
|
2084
|
+
size_t i;
|
2085
|
+
|
2086
|
+
struct buf *header_work = 0;
|
2087
|
+
struct buf *body_work = 0;
|
2088
|
+
|
2089
|
+
size_t columns;
|
2090
|
+
int *col_data = NULL;
|
2091
|
+
|
2092
|
+
header_work = rndr_newbuf(rndr, BUFFER_SPAN);
|
2093
|
+
body_work = rndr_newbuf(rndr, BUFFER_BLOCK);
|
2094
|
+
|
2095
|
+
i = parse_table_header(header_work, rndr, data, size, &columns, &col_data);
|
2096
|
+
if (i > 0) {
|
2097
|
+
|
2098
|
+
while (i < size) {
|
2099
|
+
size_t row_start;
|
2100
|
+
int pipes = 0;
|
2101
|
+
|
2102
|
+
row_start = i;
|
2103
|
+
|
2104
|
+
while (i < size && data[i] != '\n')
|
2105
|
+
if (data[i++] == '|')
|
2106
|
+
pipes++;
|
2107
|
+
|
2108
|
+
if (pipes == 0 || i == size) {
|
2109
|
+
i = row_start;
|
2110
|
+
break;
|
2111
|
+
}
|
2112
|
+
|
2113
|
+
parse_table_row(
|
2114
|
+
body_work,
|
2115
|
+
rndr,
|
2116
|
+
data + row_start,
|
2117
|
+
i - row_start,
|
2118
|
+
columns,
|
2119
|
+
col_data, 0
|
2120
|
+
);
|
2121
|
+
|
2122
|
+
i++;
|
2123
|
+
}
|
2124
|
+
|
2125
|
+
if (rndr->cb.table)
|
2126
|
+
rndr->cb.table(ob, header_work, body_work, rndr->opaque);
|
2127
|
+
}
|
2128
|
+
|
2129
|
+
free(col_data);
|
2130
|
+
rndr_popbuf(rndr, BUFFER_SPAN);
|
2131
|
+
rndr_popbuf(rndr, BUFFER_BLOCK);
|
2132
|
+
return i;
|
2133
|
+
}
|
2134
|
+
|
2135
|
+
/* parse_block • parsing of one block, returning next uint8_t to parse */
|
2136
|
+
static void
|
2137
|
+
parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size)
|
2138
|
+
{
|
2139
|
+
size_t beg, end, i;
|
2140
|
+
uint8_t *txt_data;
|
2141
|
+
beg = 0;
|
2142
|
+
|
2143
|
+
if (rndr->work_bufs[BUFFER_SPAN].size +
|
2144
|
+
rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
|
2145
|
+
return;
|
2146
|
+
|
2147
|
+
while (beg < size) {
|
2148
|
+
txt_data = data + beg;
|
2149
|
+
end = size - beg;
|
2150
|
+
|
2151
|
+
if (is_atxheader(rndr, txt_data, end))
|
2152
|
+
beg += parse_atxheader(ob, rndr, txt_data, end);
|
2153
|
+
|
2154
|
+
else if (data[beg] == '<' && rndr->cb.blockhtml &&
|
2155
|
+
(i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0)
|
2156
|
+
beg += i;
|
2157
|
+
|
2158
|
+
else if ((i = is_empty(txt_data, end)) != 0)
|
2159
|
+
beg += i;
|
2160
|
+
|
2161
|
+
else if (is_hrule(txt_data, end)) {
|
2162
|
+
if (rndr->cb.hrule)
|
2163
|
+
rndr->cb.hrule(ob, rndr->opaque);
|
2164
|
+
|
2165
|
+
while (beg < size && data[beg] != '\n')
|
2166
|
+
beg++;
|
2167
|
+
|
2168
|
+
beg++;
|
2169
|
+
}
|
2170
|
+
|
2171
|
+
else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
|
2172
|
+
(i = parse_fencedcode(ob, rndr, txt_data, end)) != 0)
|
2173
|
+
beg += i;
|
2174
|
+
|
2175
|
+
else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 &&
|
2176
|
+
(i = parse_table(ob, rndr, txt_data, end)) != 0)
|
2177
|
+
beg += i;
|
2178
|
+
|
2179
|
+
else if (prefix_quote(txt_data, end))
|
2180
|
+
beg += parse_blockquote(ob, rndr, txt_data, end);
|
2181
|
+
|
2182
|
+
else if (prefix_code(txt_data, end))
|
2183
|
+
beg += parse_blockcode(ob, rndr, txt_data, end);
|
2184
|
+
|
2185
|
+
else if (prefix_uli(txt_data, end))
|
2186
|
+
beg += parse_list(ob, rndr, txt_data, end, 0);
|
2187
|
+
|
2188
|
+
else if (prefix_oli(txt_data, end))
|
2189
|
+
beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED);
|
2190
|
+
|
2191
|
+
else
|
2192
|
+
beg += parse_paragraph(ob, rndr, txt_data, end);
|
2193
|
+
}
|
2194
|
+
}
|
2195
|
+
|
2196
|
+
|
2197
|
+
|
2198
|
+
/*********************
|
2199
|
+
* REFERENCE PARSING *
|
2200
|
+
*********************/
|
2201
|
+
|
2202
|
+
/* is_ref • returns whether a line is a reference or not */
|
2203
|
+
static int
|
2204
|
+
is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
|
2205
|
+
{
|
2206
|
+
/* int n; */
|
2207
|
+
size_t i = 0;
|
2208
|
+
size_t id_offset, id_end;
|
2209
|
+
size_t link_offset, link_end;
|
2210
|
+
size_t title_offset, title_end;
|
2211
|
+
size_t line_end;
|
2212
|
+
|
2213
|
+
/* up to 3 optional leading spaces */
|
2214
|
+
if (beg + 3 >= end) return 0;
|
2215
|
+
if (data[beg] == ' ') { i = 1;
|
2216
|
+
if (data[beg + 1] == ' ') { i = 2;
|
2217
|
+
if (data[beg + 2] == ' ') { i = 3;
|
2218
|
+
if (data[beg + 3] == ' ') return 0; } } }
|
2219
|
+
i += beg;
|
2220
|
+
|
2221
|
+
/* id part: anything but a newline between brackets */
|
2222
|
+
if (data[i] != '[') return 0;
|
2223
|
+
i++;
|
2224
|
+
id_offset = i;
|
2225
|
+
while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
|
2226
|
+
i++;
|
2227
|
+
if (i >= end || data[i] != ']') return 0;
|
2228
|
+
id_end = i;
|
2229
|
+
|
2230
|
+
/* spacer: colon (space | tab)* newline? (space | tab)* */
|
2231
|
+
i++;
|
2232
|
+
if (i >= end || data[i] != ':') return 0;
|
2233
|
+
i++;
|
2234
|
+
while (i < end && data[i] == ' ') i++;
|
2235
|
+
if (i < end && (data[i] == '\n' || data[i] == '\r')) {
|
2236
|
+
i++;
|
2237
|
+
if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
|
2238
|
+
while (i < end && data[i] == ' ') i++;
|
2239
|
+
if (i >= end) return 0;
|
2240
|
+
|
2241
|
+
/* link: whitespace-free sequence, optionally between angle brackets */
|
2242
|
+
if (data[i] == '<')
|
2243
|
+
i++;
|
2244
|
+
|
2245
|
+
link_offset = i;
|
2246
|
+
|
2247
|
+
while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
|
2248
|
+
i++;
|
2249
|
+
|
2250
|
+
if (data[i - 1] == '>') link_end = i - 1;
|
2251
|
+
else link_end = i;
|
2252
|
+
|
2253
|
+
/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
|
2254
|
+
while (i < end && data[i] == ' ') i++;
|
2255
|
+
if (i < end && data[i] != '\n' && data[i] != '\r'
|
2256
|
+
&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
|
2257
|
+
return 0;
|
2258
|
+
line_end = 0;
|
2259
|
+
/* computing end-of-line */
|
2260
|
+
if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
|
2261
|
+
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
|
2262
|
+
line_end = i + 1;
|
2263
|
+
|
2264
|
+
/* optional (space|tab)* spacer after a newline */
|
2265
|
+
if (line_end) {
|
2266
|
+
i = line_end + 1;
|
2267
|
+
while (i < end && data[i] == ' ') i++; }
|
2268
|
+
|
2269
|
+
/* optional title: any non-newline sequence enclosed in '"()
|
2270
|
+
alone on its line */
|
2271
|
+
title_offset = title_end = 0;
|
2272
|
+
if (i + 1 < end
|
2273
|
+
&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
|
2274
|
+
i++;
|
2275
|
+
title_offset = i;
|
2276
|
+
/* looking for EOL */
|
2277
|
+
while (i < end && data[i] != '\n' && data[i] != '\r') i++;
|
2278
|
+
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
|
2279
|
+
title_end = i + 1;
|
2280
|
+
else title_end = i;
|
2281
|
+
/* stepping back */
|
2282
|
+
i -= 1;
|
2283
|
+
while (i > title_offset && data[i] == ' ')
|
2284
|
+
i -= 1;
|
2285
|
+
if (i > title_offset
|
2286
|
+
&& (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
|
2287
|
+
line_end = title_end;
|
2288
|
+
title_end = i; } }
|
2289
|
+
|
2290
|
+
if (!line_end || link_end == link_offset)
|
2291
|
+
return 0; /* garbage after the link empty link */
|
2292
|
+
|
2293
|
+
/* a valid ref has been found, filling-in return structures */
|
2294
|
+
if (last)
|
2295
|
+
*last = line_end;
|
2296
|
+
|
2297
|
+
if (refs) {
|
2298
|
+
struct link_ref *ref;
|
2299
|
+
|
2300
|
+
ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
|
2301
|
+
if (!ref)
|
2302
|
+
return 0;
|
2303
|
+
|
2304
|
+
ref->link = bufnew(link_end - link_offset);
|
2305
|
+
bufput(ref->link, data + link_offset, link_end - link_offset);
|
2306
|
+
|
2307
|
+
if (title_end > title_offset) {
|
2308
|
+
ref->title = bufnew(title_end - title_offset);
|
2309
|
+
bufput(ref->title, data + title_offset, title_end - title_offset);
|
2310
|
+
}
|
2311
|
+
}
|
2312
|
+
|
2313
|
+
return 1;
|
2314
|
+
}
|
2315
|
+
|
2316
|
+
static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size)
|
2317
|
+
{
|
2318
|
+
size_t i = 0, tab = 0;
|
2319
|
+
|
2320
|
+
while (i < size) {
|
2321
|
+
size_t org = i;
|
2322
|
+
|
2323
|
+
while (i < size && line[i] != '\t') {
|
2324
|
+
i++; tab++;
|
2325
|
+
}
|
2326
|
+
|
2327
|
+
if (i > org)
|
2328
|
+
bufput(ob, line + org, i - org);
|
2329
|
+
|
2330
|
+
if (i >= size)
|
2331
|
+
break;
|
2332
|
+
|
2333
|
+
do {
|
2334
|
+
bufputc(ob, ' '); tab++;
|
2335
|
+
} while (tab % 4);
|
2336
|
+
|
2337
|
+
i++;
|
2338
|
+
}
|
2339
|
+
}
|
2340
|
+
|
2341
|
+
/**********************
|
2342
|
+
* EXPORTED FUNCTIONS *
|
2343
|
+
**********************/
|
2344
|
+
|
2345
|
+
struct sd_markdown *
|
2346
|
+
sd_markdown_new(
|
2347
|
+
unsigned int extensions,
|
2348
|
+
size_t max_nesting,
|
2349
|
+
const struct sd_callbacks *callbacks,
|
2350
|
+
void *opaque)
|
2351
|
+
{
|
2352
|
+
struct sd_markdown *md = NULL;
|
2353
|
+
|
2354
|
+
assert(max_nesting > 0 && callbacks);
|
2355
|
+
|
2356
|
+
md = malloc(sizeof(struct sd_markdown));
|
2357
|
+
if (!md)
|
2358
|
+
return NULL;
|
2359
|
+
|
2360
|
+
memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks));
|
2361
|
+
|
2362
|
+
stack_init(&md->work_bufs[BUFFER_BLOCK], 4);
|
2363
|
+
stack_init(&md->work_bufs[BUFFER_SPAN], 8);
|
2364
|
+
|
2365
|
+
memset(md->active_char, 0x0, 256);
|
2366
|
+
|
2367
|
+
if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) {
|
2368
|
+
md->active_char['*'] = MD_CHAR_EMPHASIS;
|
2369
|
+
md->active_char['_'] = MD_CHAR_EMPHASIS;
|
2370
|
+
if (extensions & MKDEXT_STRIKETHROUGH)
|
2371
|
+
md->active_char['~'] = MD_CHAR_EMPHASIS;
|
2372
|
+
}
|
2373
|
+
|
2374
|
+
if (md->cb.codespan)
|
2375
|
+
md->active_char['`'] = MD_CHAR_CODESPAN;
|
2376
|
+
|
2377
|
+
if (md->cb.linebreak)
|
2378
|
+
md->active_char['\n'] = MD_CHAR_LINEBREAK;
|
2379
|
+
|
2380
|
+
if (md->cb.image || md->cb.link)
|
2381
|
+
md->active_char['['] = MD_CHAR_LINK;
|
2382
|
+
|
2383
|
+
md->active_char['<'] = MD_CHAR_LANGLE;
|
2384
|
+
md->active_char['\\'] = MD_CHAR_ESCAPE;
|
2385
|
+
md->active_char['&'] = MD_CHAR_ENTITITY;
|
2386
|
+
|
2387
|
+
if (extensions & MKDEXT_AUTOLINK) {
|
2388
|
+
md->active_char[':'] = MD_CHAR_AUTOLINK_URL;
|
2389
|
+
md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
|
2390
|
+
md->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
|
2391
|
+
}
|
2392
|
+
|
2393
|
+
if (extensions & MKDEXT_SUPERSCRIPT)
|
2394
|
+
md->active_char['^'] = MD_CHAR_SUPERSCRIPT;
|
2395
|
+
|
2396
|
+
/* Extension data */
|
2397
|
+
md->ext_flags = extensions;
|
2398
|
+
md->opaque = opaque;
|
2399
|
+
md->max_nesting = max_nesting;
|
2400
|
+
md->in_link_body = 0;
|
2401
|
+
|
2402
|
+
return md;
|
2403
|
+
}
|
2404
|
+
|
2405
|
+
void
|
2406
|
+
sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md)
|
2407
|
+
{
|
2408
|
+
#define MARKDOWN_GROW(x) ((x) + ((x) >> 1))
|
2409
|
+
static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
|
2410
|
+
|
2411
|
+
struct buf *text;
|
2412
|
+
size_t beg, end;
|
2413
|
+
|
2414
|
+
text = bufnew(64);
|
2415
|
+
if (!text)
|
2416
|
+
return;
|
2417
|
+
|
2418
|
+
/* Preallocate enough space for our buffer to avoid expanding while copying */
|
2419
|
+
bufgrow(text, doc_size);
|
2420
|
+
|
2421
|
+
/* reset the references table */
|
2422
|
+
memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
|
2423
|
+
|
2424
|
+
/* first pass: looking for references, copying everything else */
|
2425
|
+
beg = 0;
|
2426
|
+
|
2427
|
+
/* Skip a possible UTF-8 BOM, even though the Unicode standard
|
2428
|
+
* discourages having these in UTF-8 documents */
|
2429
|
+
if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0)
|
2430
|
+
beg += 3;
|
2431
|
+
|
2432
|
+
while (beg < doc_size) /* iterating over lines */
|
2433
|
+
if (is_ref(document, beg, doc_size, &end, md->refs))
|
2434
|
+
beg = end;
|
2435
|
+
else { /* skipping to the next line */
|
2436
|
+
end = beg;
|
2437
|
+
while (end < doc_size && document[end] != '\n' && document[end] != '\r')
|
2438
|
+
end++;
|
2439
|
+
|
2440
|
+
/* adding the line body if present */
|
2441
|
+
if (end > beg)
|
2442
|
+
expand_tabs(text, document + beg, end - beg);
|
2443
|
+
|
2444
|
+
while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) {
|
2445
|
+
/* add one \n per newline */
|
2446
|
+
if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n'))
|
2447
|
+
bufputc(text, '\n');
|
2448
|
+
end++;
|
2449
|
+
}
|
2450
|
+
|
2451
|
+
beg = end;
|
2452
|
+
}
|
2453
|
+
|
2454
|
+
/* pre-grow the output buffer to minimize allocations */
|
2455
|
+
bufgrow(ob, MARKDOWN_GROW(text->size));
|
2456
|
+
|
2457
|
+
/* second pass: actual rendering */
|
2458
|
+
if (md->cb.doc_header)
|
2459
|
+
md->cb.doc_header(ob, md->opaque);
|
2460
|
+
|
2461
|
+
if (text->size) {
|
2462
|
+
/* adding a final newline if not already present */
|
2463
|
+
if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r')
|
2464
|
+
bufputc(text, '\n');
|
2465
|
+
|
2466
|
+
parse_block(ob, md, text->data, text->size);
|
2467
|
+
}
|
2468
|
+
|
2469
|
+
if (md->cb.doc_footer)
|
2470
|
+
md->cb.doc_footer(ob, md->opaque);
|
2471
|
+
|
2472
|
+
/* clean-up */
|
2473
|
+
bufrelease(text);
|
2474
|
+
free_link_refs(md->refs);
|
2475
|
+
|
2476
|
+
assert(md->work_bufs[BUFFER_SPAN].size == 0);
|
2477
|
+
assert(md->work_bufs[BUFFER_BLOCK].size == 0);
|
2478
|
+
}
|
2479
|
+
|
2480
|
+
void
|
2481
|
+
sd_markdown_free(struct sd_markdown *md)
|
2482
|
+
{
|
2483
|
+
size_t i;
|
2484
|
+
|
2485
|
+
for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i)
|
2486
|
+
bufrelease(md->work_bufs[BUFFER_SPAN].item[i]);
|
2487
|
+
|
2488
|
+
for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i)
|
2489
|
+
bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]);
|
2490
|
+
|
2491
|
+
stack_free(&md->work_bufs[BUFFER_SPAN]);
|
2492
|
+
stack_free(&md->work_bufs[BUFFER_BLOCK]);
|
2493
|
+
|
2494
|
+
free(md);
|
2495
|
+
}
|
2496
|
+
|
2497
|
+
void
|
2498
|
+
sd_version(int *ver_major, int *ver_minor, int *ver_revision)
|
2499
|
+
{
|
2500
|
+
*ver_major = UPSKIRT_VER_MAJOR;
|
2501
|
+
*ver_minor = UPSKIRT_VER_MINOR;
|
2502
|
+
*ver_revision = UPSKIRT_VER_REVISION;
|
2503
|
+
}
|
2504
|
+
|
2505
|
+
/* vim: set filetype=c: */
|