github-markdown 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +87 -0
- data/bin/gfm +26 -0
- data/ext/markdown/autolink.c +264 -0
- data/ext/markdown/autolink.h +36 -0
- data/ext/markdown/buffer.c +223 -0
- data/ext/markdown/buffer.h +88 -0
- data/ext/markdown/extconf.rb +4 -0
- data/ext/markdown/gh-markdown.c +204 -0
- data/ext/markdown/houdini.h +29 -0
- data/ext/markdown/houdini_href_e.c +108 -0
- data/ext/markdown/houdini_html_e.c +84 -0
- data/ext/markdown/html.c +635 -0
- data/ext/markdown/html.h +69 -0
- data/ext/markdown/html_blocks.h +206 -0
- data/ext/markdown/markdown.c +2505 -0
- data/ext/markdown/markdown.h +130 -0
- data/ext/markdown/stack.c +81 -0
- data/ext/markdown/stack.h +21 -0
- data/github-markdown.gemspec +40 -0
- data/lib/github/markdown.rb +38 -0
- data/test/gfm_test.rb +26 -0
- metadata +98 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef BUFFER_H__
|
19
|
+
#define BUFFER_H__
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
24
|
+
|
25
|
+
#if defined(_MSC_VER)
|
26
|
+
#define __attribute__(x)
|
27
|
+
#define inline
|
28
|
+
#endif
|
29
|
+
|
30
|
+
typedef enum {
|
31
|
+
BUF_OK = 0,
|
32
|
+
BUF_ENOMEM = -1,
|
33
|
+
} buferror_t;
|
34
|
+
|
35
|
+
/* struct buf: character array buffer */
|
36
|
+
struct buf {
|
37
|
+
uint8_t *data; /* actual character data */
|
38
|
+
size_t size; /* size of the string */
|
39
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
40
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
41
|
+
};
|
42
|
+
|
43
|
+
/* CONST_BUF: global buffer from a string litteral */
|
44
|
+
#define BUF_STATIC(string) \
|
45
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
46
|
+
|
47
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
48
|
+
#define BUF_VOLATILE(strname) \
|
49
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
50
|
+
|
51
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
52
|
+
#define BUFPUTSL(output, literal) \
|
53
|
+
bufput(output, literal, sizeof literal - 1)
|
54
|
+
|
55
|
+
/* bufgrow: increasing the allocated size to the given value */
|
56
|
+
int bufgrow(struct buf *, size_t);
|
57
|
+
|
58
|
+
/* bufnew: allocation of a new buffer */
|
59
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
60
|
+
|
61
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
62
|
+
const char *bufcstr(struct buf *);
|
63
|
+
|
64
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
65
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
66
|
+
|
67
|
+
/* bufput: appends raw data to a buffer */
|
68
|
+
void bufput(struct buf *, const void *, size_t);
|
69
|
+
|
70
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
71
|
+
void bufputs(struct buf *, const char *);
|
72
|
+
|
73
|
+
/* bufputc: appends a single char to a buffer */
|
74
|
+
void bufputc(struct buf *, int);
|
75
|
+
|
76
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
77
|
+
void bufrelease(struct buf *);
|
78
|
+
|
79
|
+
/* bufreset: frees internal data of the buffer */
|
80
|
+
void bufreset(struct buf *);
|
81
|
+
|
82
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
83
|
+
void bufslurp(struct buf *, size_t);
|
84
|
+
|
85
|
+
/* bufprintf: formatted printing to a buffer */
|
86
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
87
|
+
|
88
|
+
#endif
|
@@ -0,0 +1,204 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2012, GitHub, Inc
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <ruby.h>
|
18
|
+
|
19
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
20
|
+
# include <ruby/encoding.h>
|
21
|
+
# define geefem_str_new(str, len) rb_enc_str_new(str, len, rb_utf8_encoding())
|
22
|
+
#else
|
23
|
+
# define geefem_str_new(str, len) rb_str_new(str, len)
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#include "markdown.h"
|
27
|
+
#include "html.h"
|
28
|
+
|
29
|
+
static struct {
|
30
|
+
struct sd_markdown *md;
|
31
|
+
struct html_renderopt render_opts;
|
32
|
+
} g_markdown, g_GFM;
|
33
|
+
|
34
|
+
static void
|
35
|
+
rndr_blockcode_github(
|
36
|
+
struct buf *ob,
|
37
|
+
const struct buf *text,
|
38
|
+
const struct buf *lang,
|
39
|
+
void *opaque)
|
40
|
+
{
|
41
|
+
if (ob->size)
|
42
|
+
bufputc(ob, '\n');
|
43
|
+
|
44
|
+
if (!text || !text->size) {
|
45
|
+
BUFPUTSL(ob, "<pre><code></code></pre>");
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
|
49
|
+
if (lang && lang->size) {
|
50
|
+
size_t i = 0, lang_size;
|
51
|
+
const char *lang_name = NULL;
|
52
|
+
|
53
|
+
while (i < lang->size && !isspace(lang->data[i]))
|
54
|
+
i++;
|
55
|
+
|
56
|
+
if (lang->data[0] == '.') {
|
57
|
+
lang_name = lang->data + 1;
|
58
|
+
lang_size = i - 1;
|
59
|
+
} else {
|
60
|
+
lang_name = lang->data;
|
61
|
+
lang_size = i;
|
62
|
+
}
|
63
|
+
|
64
|
+
if (rb_block_given_p()) {
|
65
|
+
VALUE hilight;
|
66
|
+
|
67
|
+
hilight = rb_yield_values(2,
|
68
|
+
geefem_str_new(text->data, text->size),
|
69
|
+
geefem_str_new(lang_name, lang_size));
|
70
|
+
|
71
|
+
if (!NIL_P(hilight)) {
|
72
|
+
Check_Type(hilight, T_STRING);
|
73
|
+
bufput(ob, RSTRING_PTR(hilight), RSTRING_LEN(hilight));
|
74
|
+
return;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
BUFPUTSL(ob, "<pre lang=\"");
|
79
|
+
houdini_escape_html0(ob, lang_name, lang_size, 0);
|
80
|
+
BUFPUTSL(ob, "\"><code>");
|
81
|
+
|
82
|
+
} else {
|
83
|
+
BUFPUTSL(ob, "<pre><code>");
|
84
|
+
}
|
85
|
+
|
86
|
+
houdini_escape_html0(ob, text->data, text->size, 0);
|
87
|
+
BUFPUTSL(ob, "</code></pre>\n");
|
88
|
+
}
|
89
|
+
|
90
|
+
static VALUE rb_ghmd_to_html(VALUE self, VALUE rb_text, VALUE rb_mode)
|
91
|
+
{
|
92
|
+
struct buf *output_buf;
|
93
|
+
struct sd_markdown *md = NULL;
|
94
|
+
ID mode;
|
95
|
+
|
96
|
+
if (NIL_P(rb_text))
|
97
|
+
return Qnil;
|
98
|
+
|
99
|
+
Check_Type(rb_mode, T_SYMBOL);
|
100
|
+
mode = SYM2ID(rb_mode);
|
101
|
+
|
102
|
+
/* check for rendering mode */
|
103
|
+
if (mode == rb_intern("markdown")) {
|
104
|
+
md = g_markdown.md;
|
105
|
+
} else if (mode == rb_intern("gfm")) {
|
106
|
+
md = g_GFM.md;
|
107
|
+
} else {
|
108
|
+
rb_raise(rb_eTypeError, "Invalid render mode");
|
109
|
+
}
|
110
|
+
|
111
|
+
Check_Type(rb_text, T_STRING);
|
112
|
+
|
113
|
+
/* initialize buffers */
|
114
|
+
output_buf = bufnew(128);
|
115
|
+
|
116
|
+
/* render the magic */
|
117
|
+
sd_markdown_render(output_buf, RSTRING_PTR(rb_text), RSTRING_LEN(rb_text), md);
|
118
|
+
|
119
|
+
/* build the Ruby string */
|
120
|
+
rb_text = geefem_str_new(output_buf->data, output_buf->size);
|
121
|
+
|
122
|
+
bufrelease(output_buf);
|
123
|
+
return rb_text;
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
/* Max recursion nesting when parsing Markdown documents */
|
128
|
+
static const int GITHUB_MD_NESTING = 16;
|
129
|
+
|
130
|
+
/* Default flags for all Markdown pipelines:
|
131
|
+
*
|
132
|
+
* - NO_INTRA_EMPHASIS: disallow emphasis inside of words
|
133
|
+
* - LAX_HTML_BLOCKS: do not require an empty line after
|
134
|
+
* a HTML block, unlike Markdown.pl does
|
135
|
+
* - STRIKETHROUGH: strike out words with `~~`, same semantics
|
136
|
+
* as emphasis
|
137
|
+
* - TABLES: the tables extension from PHP-Markdown extra
|
138
|
+
* - FENCED_CODE: the fenced code blocks extension from
|
139
|
+
* PHP-Markdown extra, but working with ``` besides ~~~.
|
140
|
+
*/
|
141
|
+
static const int GITHUB_MD_FLAGS =
|
142
|
+
MKDEXT_NO_INTRA_EMPHASIS |
|
143
|
+
MKDEXT_LAX_HTML_BLOCKS |
|
144
|
+
MKDEXT_STRIKETHROUGH |
|
145
|
+
MKDEXT_TABLES |
|
146
|
+
MKDEXT_FENCED_CODE;
|
147
|
+
|
148
|
+
/* Init the default pipeline */
|
149
|
+
static void rb_ghmd__init_md(void)
|
150
|
+
{
|
151
|
+
struct sd_callbacks callbacks;
|
152
|
+
|
153
|
+
/* No extra flags to the Markdown renderer */
|
154
|
+
sdhtml_renderer(&callbacks, &g_markdown.render_opts, 0);
|
155
|
+
callbacks.blockcode = &rndr_blockcode_github;
|
156
|
+
|
157
|
+
g_markdown.md = sd_markdown_new(
|
158
|
+
GITHUB_MD_FLAGS,
|
159
|
+
GITHUB_MD_NESTING,
|
160
|
+
&callbacks,
|
161
|
+
&g_markdown.render_opts
|
162
|
+
);
|
163
|
+
}
|
164
|
+
|
165
|
+
/* Init the GFM pipeline */
|
166
|
+
static void rb_ghmd__init_gfm(void)
|
167
|
+
{
|
168
|
+
struct sd_callbacks callbacks;
|
169
|
+
|
170
|
+
/*
|
171
|
+
* The following extensions to the HTML output are enabled:
|
172
|
+
*
|
173
|
+
* - HARD_WRAP: line breaks are replaced with <br>
|
174
|
+
* entities
|
175
|
+
*/
|
176
|
+
sdhtml_renderer(&callbacks, &g_GFM.render_opts, HTML_HARD_WRAP);
|
177
|
+
callbacks.blockcode = &rndr_blockcode_github;
|
178
|
+
|
179
|
+
/* The following extensions to the parser are enabled, on top
|
180
|
+
* of the common ones:
|
181
|
+
*
|
182
|
+
* - SPACE_HEADERS: require a space between the `#` and the
|
183
|
+
* name of a header (prevents collisions with the Issues
|
184
|
+
* filter)
|
185
|
+
*/
|
186
|
+
g_GFM.md = sd_markdown_new(
|
187
|
+
GITHUB_MD_FLAGS | MKDEXT_SPACE_HEADERS,
|
188
|
+
GITHUB_MD_NESTING,
|
189
|
+
&callbacks,
|
190
|
+
&g_GFM.render_opts
|
191
|
+
);
|
192
|
+
}
|
193
|
+
|
194
|
+
void Init_markdown()
|
195
|
+
{
|
196
|
+
VALUE rb_mGitHub = rb_const_get(rb_cObject, rb_intern("GitHub"));
|
197
|
+
VALUE rb_cMarkdown = rb_define_class_under(rb_mGitHub, "Markdown", rb_cObject);
|
198
|
+
|
199
|
+
rb_define_singleton_method(rb_cMarkdown, "to_html", rb_ghmd_to_html, 2);
|
200
|
+
|
201
|
+
rb_ghmd__init_md();
|
202
|
+
rb_ghmd__init_gfm();
|
203
|
+
}
|
204
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef HOUDINI_H__
|
2
|
+
#define HOUDINI_H__
|
3
|
+
|
4
|
+
#include "buffer.h"
|
5
|
+
|
6
|
+
#ifdef HOUDINI_USE_LOCALE
|
7
|
+
# define _isxdigit(c) isxdigit(c)
|
8
|
+
# define _isdigit(c) isdigit(c)
|
9
|
+
#else
|
10
|
+
/*
|
11
|
+
* Helper _isdigit methods -- do not trust the current locale
|
12
|
+
* */
|
13
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
14
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
15
|
+
#endif
|
16
|
+
|
17
|
+
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
18
|
+
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
19
|
+
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
20
|
+
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
|
21
|
+
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
22
|
+
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
23
|
+
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
24
|
+
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
25
|
+
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
26
|
+
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
27
|
+
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
28
|
+
|
29
|
+
#endif
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
8
|
+
|
9
|
+
/*
|
10
|
+
* The following characters will not be escaped:
|
11
|
+
*
|
12
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
13
|
+
*
|
14
|
+
* Note that this character set is the addition of:
|
15
|
+
*
|
16
|
+
* - The characters which are safe to be in an URL
|
17
|
+
* - The characters which are *not* safe to be in
|
18
|
+
* an URL because they are RESERVED characters.
|
19
|
+
*
|
20
|
+
* We asume (lazily) that any RESERVED char that
|
21
|
+
* appears inside an URL is actually meant to
|
22
|
+
* have its native function (i.e. as an URL
|
23
|
+
* component/separator) and hence needs no escaping.
|
24
|
+
*
|
25
|
+
* There are two exceptions: the chacters & (amp)
|
26
|
+
* and ' (single quote) do not appear in the table.
|
27
|
+
* They are meant to appear in the URL as components,
|
28
|
+
* yet they require special HTML-entity escaping
|
29
|
+
* to generate valid HTML markup.
|
30
|
+
*
|
31
|
+
* All other characters will be escaped to %XX.
|
32
|
+
*
|
33
|
+
*/
|
34
|
+
static const char HREF_SAFE[] = {
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
39
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
41
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
42
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
50
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
+
};
|
52
|
+
|
53
|
+
void
|
54
|
+
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
55
|
+
{
|
56
|
+
static const char hex_chars[] = "0123456789ABCDEF";
|
57
|
+
size_t i = 0, org;
|
58
|
+
char hex_str[3];
|
59
|
+
|
60
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
61
|
+
hex_str[0] = '%';
|
62
|
+
|
63
|
+
while (i < size) {
|
64
|
+
org = i;
|
65
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
66
|
+
i++;
|
67
|
+
|
68
|
+
if (i > org)
|
69
|
+
bufput(ob, src + org, i - org);
|
70
|
+
|
71
|
+
/* escaping */
|
72
|
+
if (i >= size)
|
73
|
+
break;
|
74
|
+
|
75
|
+
switch (src[i]) {
|
76
|
+
/* amp appears all the time in URLs, but needs
|
77
|
+
* HTML-entity escaping to be inside an href */
|
78
|
+
case '&':
|
79
|
+
BUFPUTSL(ob, "&");
|
80
|
+
break;
|
81
|
+
|
82
|
+
/* the single quote is a valid URL character
|
83
|
+
* according to the standard; it needs HTML
|
84
|
+
* entity escaping too */
|
85
|
+
case '\'':
|
86
|
+
BUFPUTSL(ob, "'");
|
87
|
+
break;
|
88
|
+
|
89
|
+
/* the space can be escaped to %20 or a plus
|
90
|
+
* sign. we're going with the generic escape
|
91
|
+
* for now. the plus thing is more commonly seen
|
92
|
+
* when building GET strings */
|
93
|
+
#if 0
|
94
|
+
case ' ':
|
95
|
+
bufputc(ob, '+');
|
96
|
+
break;
|
97
|
+
#endif
|
98
|
+
|
99
|
+
/* every other character goes with a %XX escaping */
|
100
|
+
default:
|
101
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
102
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
103
|
+
bufput(ob, hex_str, 3);
|
104
|
+
}
|
105
|
+
|
106
|
+
i++;
|
107
|
+
}
|
108
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
8
|
+
|
9
|
+
/**
|
10
|
+
* According to the OWASP rules:
|
11
|
+
*
|
12
|
+
* & --> &
|
13
|
+
* < --> <
|
14
|
+
* > --> >
|
15
|
+
* " --> "
|
16
|
+
* ' --> ' ' is not recommended
|
17
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
18
|
+
*
|
19
|
+
*/
|
20
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
};
|
38
|
+
|
39
|
+
static const char *HTML_ESCAPES[] = {
|
40
|
+
"",
|
41
|
+
""",
|
42
|
+
"&",
|
43
|
+
"'",
|
44
|
+
"/",
|
45
|
+
"<",
|
46
|
+
">"
|
47
|
+
};
|
48
|
+
|
49
|
+
void
|
50
|
+
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
51
|
+
{
|
52
|
+
size_t i = 0, org, esc = 0;
|
53
|
+
|
54
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
55
|
+
|
56
|
+
while (i < size) {
|
57
|
+
org = i;
|
58
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
59
|
+
i++;
|
60
|
+
|
61
|
+
if (i > org)
|
62
|
+
bufput(ob, src + org, i - org);
|
63
|
+
|
64
|
+
/* escaping */
|
65
|
+
if (i >= size)
|
66
|
+
break;
|
67
|
+
|
68
|
+
/* The forward slash is only escaped in secure mode */
|
69
|
+
if (src[i] == '/' && !secure) {
|
70
|
+
bufputc(ob, '/');
|
71
|
+
} else {
|
72
|
+
bufputs(ob, HTML_ESCAPES[esc]);
|
73
|
+
}
|
74
|
+
|
75
|
+
i++;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
void
|
80
|
+
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
81
|
+
{
|
82
|
+
houdini_escape_html0(ob, src, size, 1);
|
83
|
+
}
|
84
|
+
|