github-markdown 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +87 -0
- data/bin/gfm +26 -0
- data/ext/markdown/autolink.c +264 -0
- data/ext/markdown/autolink.h +36 -0
- data/ext/markdown/buffer.c +223 -0
- data/ext/markdown/buffer.h +88 -0
- data/ext/markdown/extconf.rb +4 -0
- data/ext/markdown/gh-markdown.c +204 -0
- data/ext/markdown/houdini.h +29 -0
- data/ext/markdown/houdini_href_e.c +108 -0
- data/ext/markdown/houdini_html_e.c +84 -0
- data/ext/markdown/html.c +635 -0
- data/ext/markdown/html.h +69 -0
- data/ext/markdown/html_blocks.h +206 -0
- data/ext/markdown/markdown.c +2505 -0
- data/ext/markdown/markdown.h +130 -0
- data/ext/markdown/stack.c +81 -0
- data/ext/markdown/stack.h +21 -0
- data/github-markdown.gemspec +40 -0
- data/lib/github/markdown.rb +38 -0
- data/test/gfm_test.rb +26 -0
- metadata +98 -0
@@ -0,0 +1,88 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
4
|
+
*
|
5
|
+
* Permission to use, copy, modify, and distribute this software for any
|
6
|
+
* purpose with or without fee is hereby granted, provided that the above
|
7
|
+
* copyright notice and this permission notice appear in all copies.
|
8
|
+
*
|
9
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
10
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
11
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
12
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
13
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
14
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
15
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
*/
|
17
|
+
|
18
|
+
#ifndef BUFFER_H__
|
19
|
+
#define BUFFER_H__
|
20
|
+
|
21
|
+
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
24
|
+
|
25
|
+
#if defined(_MSC_VER)
|
26
|
+
#define __attribute__(x)
|
27
|
+
#define inline
|
28
|
+
#endif
|
29
|
+
|
30
|
+
typedef enum {
|
31
|
+
BUF_OK = 0,
|
32
|
+
BUF_ENOMEM = -1,
|
33
|
+
} buferror_t;
|
34
|
+
|
35
|
+
/* struct buf: character array buffer */
|
36
|
+
struct buf {
|
37
|
+
uint8_t *data; /* actual character data */
|
38
|
+
size_t size; /* size of the string */
|
39
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
40
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
41
|
+
};
|
42
|
+
|
43
|
+
/* CONST_BUF: global buffer from a string litteral */
|
44
|
+
#define BUF_STATIC(string) \
|
45
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
46
|
+
|
47
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
48
|
+
#define BUF_VOLATILE(strname) \
|
49
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
50
|
+
|
51
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
52
|
+
#define BUFPUTSL(output, literal) \
|
53
|
+
bufput(output, literal, sizeof literal - 1)
|
54
|
+
|
55
|
+
/* bufgrow: increasing the allocated size to the given value */
|
56
|
+
int bufgrow(struct buf *, size_t);
|
57
|
+
|
58
|
+
/* bufnew: allocation of a new buffer */
|
59
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
60
|
+
|
61
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
62
|
+
const char *bufcstr(struct buf *);
|
63
|
+
|
64
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
65
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
66
|
+
|
67
|
+
/* bufput: appends raw data to a buffer */
|
68
|
+
void bufput(struct buf *, const void *, size_t);
|
69
|
+
|
70
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
71
|
+
void bufputs(struct buf *, const char *);
|
72
|
+
|
73
|
+
/* bufputc: appends a single char to a buffer */
|
74
|
+
void bufputc(struct buf *, int);
|
75
|
+
|
76
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
77
|
+
void bufrelease(struct buf *);
|
78
|
+
|
79
|
+
/* bufreset: frees internal data of the buffer */
|
80
|
+
void bufreset(struct buf *);
|
81
|
+
|
82
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
83
|
+
void bufslurp(struct buf *, size_t);
|
84
|
+
|
85
|
+
/* bufprintf: formatted printing to a buffer */
|
86
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
87
|
+
|
88
|
+
#endif
|
@@ -0,0 +1,204 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2012, GitHub, Inc
|
3
|
+
*
|
4
|
+
* Permission to use, copy, modify, and distribute this software for any
|
5
|
+
* purpose with or without fee is hereby granted, provided that the above
|
6
|
+
* copyright notice and this permission notice appear in all copies.
|
7
|
+
*
|
8
|
+
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
9
|
+
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
10
|
+
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
11
|
+
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
12
|
+
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
13
|
+
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
14
|
+
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <ruby.h>
|
18
|
+
|
19
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
20
|
+
# include <ruby/encoding.h>
|
21
|
+
# define geefem_str_new(str, len) rb_enc_str_new(str, len, rb_utf8_encoding())
|
22
|
+
#else
|
23
|
+
# define geefem_str_new(str, len) rb_str_new(str, len)
|
24
|
+
#endif
|
25
|
+
|
26
|
+
#include "markdown.h"
|
27
|
+
#include "html.h"
|
28
|
+
|
29
|
+
static struct {
|
30
|
+
struct sd_markdown *md;
|
31
|
+
struct html_renderopt render_opts;
|
32
|
+
} g_markdown, g_GFM;
|
33
|
+
|
34
|
+
static void
|
35
|
+
rndr_blockcode_github(
|
36
|
+
struct buf *ob,
|
37
|
+
const struct buf *text,
|
38
|
+
const struct buf *lang,
|
39
|
+
void *opaque)
|
40
|
+
{
|
41
|
+
if (ob->size)
|
42
|
+
bufputc(ob, '\n');
|
43
|
+
|
44
|
+
if (!text || !text->size) {
|
45
|
+
BUFPUTSL(ob, "<pre><code></code></pre>");
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
|
49
|
+
if (lang && lang->size) {
|
50
|
+
size_t i = 0, lang_size;
|
51
|
+
const char *lang_name = NULL;
|
52
|
+
|
53
|
+
while (i < lang->size && !isspace(lang->data[i]))
|
54
|
+
i++;
|
55
|
+
|
56
|
+
if (lang->data[0] == '.') {
|
57
|
+
lang_name = lang->data + 1;
|
58
|
+
lang_size = i - 1;
|
59
|
+
} else {
|
60
|
+
lang_name = lang->data;
|
61
|
+
lang_size = i;
|
62
|
+
}
|
63
|
+
|
64
|
+
if (rb_block_given_p()) {
|
65
|
+
VALUE hilight;
|
66
|
+
|
67
|
+
hilight = rb_yield_values(2,
|
68
|
+
geefem_str_new(text->data, text->size),
|
69
|
+
geefem_str_new(lang_name, lang_size));
|
70
|
+
|
71
|
+
if (!NIL_P(hilight)) {
|
72
|
+
Check_Type(hilight, T_STRING);
|
73
|
+
bufput(ob, RSTRING_PTR(hilight), RSTRING_LEN(hilight));
|
74
|
+
return;
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
BUFPUTSL(ob, "<pre lang=\"");
|
79
|
+
houdini_escape_html0(ob, lang_name, lang_size, 0);
|
80
|
+
BUFPUTSL(ob, "\"><code>");
|
81
|
+
|
82
|
+
} else {
|
83
|
+
BUFPUTSL(ob, "<pre><code>");
|
84
|
+
}
|
85
|
+
|
86
|
+
houdini_escape_html0(ob, text->data, text->size, 0);
|
87
|
+
BUFPUTSL(ob, "</code></pre>\n");
|
88
|
+
}
|
89
|
+
|
90
|
+
static VALUE rb_ghmd_to_html(VALUE self, VALUE rb_text, VALUE rb_mode)
|
91
|
+
{
|
92
|
+
struct buf *output_buf;
|
93
|
+
struct sd_markdown *md = NULL;
|
94
|
+
ID mode;
|
95
|
+
|
96
|
+
if (NIL_P(rb_text))
|
97
|
+
return Qnil;
|
98
|
+
|
99
|
+
Check_Type(rb_mode, T_SYMBOL);
|
100
|
+
mode = SYM2ID(rb_mode);
|
101
|
+
|
102
|
+
/* check for rendering mode */
|
103
|
+
if (mode == rb_intern("markdown")) {
|
104
|
+
md = g_markdown.md;
|
105
|
+
} else if (mode == rb_intern("gfm")) {
|
106
|
+
md = g_GFM.md;
|
107
|
+
} else {
|
108
|
+
rb_raise(rb_eTypeError, "Invalid render mode");
|
109
|
+
}
|
110
|
+
|
111
|
+
Check_Type(rb_text, T_STRING);
|
112
|
+
|
113
|
+
/* initialize buffers */
|
114
|
+
output_buf = bufnew(128);
|
115
|
+
|
116
|
+
/* render the magic */
|
117
|
+
sd_markdown_render(output_buf, RSTRING_PTR(rb_text), RSTRING_LEN(rb_text), md);
|
118
|
+
|
119
|
+
/* build the Ruby string */
|
120
|
+
rb_text = geefem_str_new(output_buf->data, output_buf->size);
|
121
|
+
|
122
|
+
bufrelease(output_buf);
|
123
|
+
return rb_text;
|
124
|
+
}
|
125
|
+
|
126
|
+
|
127
|
+
/* Max recursion nesting when parsing Markdown documents */
|
128
|
+
static const int GITHUB_MD_NESTING = 16;
|
129
|
+
|
130
|
+
/* Default flags for all Markdown pipelines:
|
131
|
+
*
|
132
|
+
* - NO_INTRA_EMPHASIS: disallow emphasis inside of words
|
133
|
+
* - LAX_HTML_BLOCKS: do not require an empty line after
|
134
|
+
* a HTML block, unlike Markdown.pl does
|
135
|
+
* - STRIKETHROUGH: strike out words with `~~`, same semantics
|
136
|
+
* as emphasis
|
137
|
+
* - TABLES: the tables extension from PHP-Markdown extra
|
138
|
+
* - FENCED_CODE: the fenced code blocks extension from
|
139
|
+
* PHP-Markdown extra, but working with ``` besides ~~~.
|
140
|
+
*/
|
141
|
+
static const int GITHUB_MD_FLAGS =
|
142
|
+
MKDEXT_NO_INTRA_EMPHASIS |
|
143
|
+
MKDEXT_LAX_HTML_BLOCKS |
|
144
|
+
MKDEXT_STRIKETHROUGH |
|
145
|
+
MKDEXT_TABLES |
|
146
|
+
MKDEXT_FENCED_CODE;
|
147
|
+
|
148
|
+
/* Init the default pipeline */
|
149
|
+
static void rb_ghmd__init_md(void)
|
150
|
+
{
|
151
|
+
struct sd_callbacks callbacks;
|
152
|
+
|
153
|
+
/* No extra flags to the Markdown renderer */
|
154
|
+
sdhtml_renderer(&callbacks, &g_markdown.render_opts, 0);
|
155
|
+
callbacks.blockcode = &rndr_blockcode_github;
|
156
|
+
|
157
|
+
g_markdown.md = sd_markdown_new(
|
158
|
+
GITHUB_MD_FLAGS,
|
159
|
+
GITHUB_MD_NESTING,
|
160
|
+
&callbacks,
|
161
|
+
&g_markdown.render_opts
|
162
|
+
);
|
163
|
+
}
|
164
|
+
|
165
|
+
/* Init the GFM pipeline */
|
166
|
+
static void rb_ghmd__init_gfm(void)
|
167
|
+
{
|
168
|
+
struct sd_callbacks callbacks;
|
169
|
+
|
170
|
+
/*
|
171
|
+
* The following extensions to the HTML output are enabled:
|
172
|
+
*
|
173
|
+
* - HARD_WRAP: line breaks are replaced with <br>
|
174
|
+
* entities
|
175
|
+
*/
|
176
|
+
sdhtml_renderer(&callbacks, &g_GFM.render_opts, HTML_HARD_WRAP);
|
177
|
+
callbacks.blockcode = &rndr_blockcode_github;
|
178
|
+
|
179
|
+
/* The following extensions to the parser are enabled, on top
|
180
|
+
* of the common ones:
|
181
|
+
*
|
182
|
+
* - SPACE_HEADERS: require a space between the `#` and the
|
183
|
+
* name of a header (prevents collisions with the Issues
|
184
|
+
* filter)
|
185
|
+
*/
|
186
|
+
g_GFM.md = sd_markdown_new(
|
187
|
+
GITHUB_MD_FLAGS | MKDEXT_SPACE_HEADERS,
|
188
|
+
GITHUB_MD_NESTING,
|
189
|
+
&callbacks,
|
190
|
+
&g_GFM.render_opts
|
191
|
+
);
|
192
|
+
}
|
193
|
+
|
194
|
+
void Init_markdown()
|
195
|
+
{
|
196
|
+
VALUE rb_mGitHub = rb_const_get(rb_cObject, rb_intern("GitHub"));
|
197
|
+
VALUE rb_cMarkdown = rb_define_class_under(rb_mGitHub, "Markdown", rb_cObject);
|
198
|
+
|
199
|
+
rb_define_singleton_method(rb_cMarkdown, "to_html", rb_ghmd_to_html, 2);
|
200
|
+
|
201
|
+
rb_ghmd__init_md();
|
202
|
+
rb_ghmd__init_gfm();
|
203
|
+
}
|
204
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#ifndef HOUDINI_H__
|
2
|
+
#define HOUDINI_H__
|
3
|
+
|
4
|
+
#include "buffer.h"
|
5
|
+
|
6
|
+
#ifdef HOUDINI_USE_LOCALE
|
7
|
+
# define _isxdigit(c) isxdigit(c)
|
8
|
+
# define _isdigit(c) isdigit(c)
|
9
|
+
#else
|
10
|
+
/*
|
11
|
+
* Helper _isdigit methods -- do not trust the current locale
|
12
|
+
* */
|
13
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
14
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
15
|
+
#endif
|
16
|
+
|
17
|
+
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
18
|
+
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
19
|
+
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
20
|
+
extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
|
21
|
+
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
22
|
+
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
23
|
+
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
24
|
+
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
25
|
+
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
26
|
+
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
27
|
+
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
28
|
+
|
29
|
+
#endif
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
8
|
+
|
9
|
+
/*
|
10
|
+
* The following characters will not be escaped:
|
11
|
+
*
|
12
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
13
|
+
*
|
14
|
+
* Note that this character set is the addition of:
|
15
|
+
*
|
16
|
+
* - The characters which are safe to be in an URL
|
17
|
+
* - The characters which are *not* safe to be in
|
18
|
+
* an URL because they are RESERVED characters.
|
19
|
+
*
|
20
|
+
* We asume (lazily) that any RESERVED char that
|
21
|
+
* appears inside an URL is actually meant to
|
22
|
+
* have its native function (i.e. as an URL
|
23
|
+
* component/separator) and hence needs no escaping.
|
24
|
+
*
|
25
|
+
* There are two exceptions: the chacters & (amp)
|
26
|
+
* and ' (single quote) do not appear in the table.
|
27
|
+
* They are meant to appear in the URL as components,
|
28
|
+
* yet they require special HTML-entity escaping
|
29
|
+
* to generate valid HTML markup.
|
30
|
+
*
|
31
|
+
* All other characters will be escaped to %XX.
|
32
|
+
*
|
33
|
+
*/
|
34
|
+
static const char HREF_SAFE[] = {
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
39
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
41
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
42
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
50
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
+
};
|
52
|
+
|
53
|
+
void
|
54
|
+
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
55
|
+
{
|
56
|
+
static const char hex_chars[] = "0123456789ABCDEF";
|
57
|
+
size_t i = 0, org;
|
58
|
+
char hex_str[3];
|
59
|
+
|
60
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
61
|
+
hex_str[0] = '%';
|
62
|
+
|
63
|
+
while (i < size) {
|
64
|
+
org = i;
|
65
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
66
|
+
i++;
|
67
|
+
|
68
|
+
if (i > org)
|
69
|
+
bufput(ob, src + org, i - org);
|
70
|
+
|
71
|
+
/* escaping */
|
72
|
+
if (i >= size)
|
73
|
+
break;
|
74
|
+
|
75
|
+
switch (src[i]) {
|
76
|
+
/* amp appears all the time in URLs, but needs
|
77
|
+
* HTML-entity escaping to be inside an href */
|
78
|
+
case '&':
|
79
|
+
BUFPUTSL(ob, "&");
|
80
|
+
break;
|
81
|
+
|
82
|
+
/* the single quote is a valid URL character
|
83
|
+
* according to the standard; it needs HTML
|
84
|
+
* entity escaping too */
|
85
|
+
case '\'':
|
86
|
+
BUFPUTSL(ob, "'");
|
87
|
+
break;
|
88
|
+
|
89
|
+
/* the space can be escaped to %20 or a plus
|
90
|
+
* sign. we're going with the generic escape
|
91
|
+
* for now. the plus thing is more commonly seen
|
92
|
+
* when building GET strings */
|
93
|
+
#if 0
|
94
|
+
case ' ':
|
95
|
+
bufputc(ob, '+');
|
96
|
+
break;
|
97
|
+
#endif
|
98
|
+
|
99
|
+
/* every other character goes with a %XX escaping */
|
100
|
+
default:
|
101
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
102
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
103
|
+
bufput(ob, hex_str, 3);
|
104
|
+
}
|
105
|
+
|
106
|
+
i++;
|
107
|
+
}
|
108
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
8
|
+
|
9
|
+
/**
|
10
|
+
* According to the OWASP rules:
|
11
|
+
*
|
12
|
+
* & --> &
|
13
|
+
* < --> <
|
14
|
+
* > --> >
|
15
|
+
* " --> "
|
16
|
+
* ' --> ' ' is not recommended
|
17
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
18
|
+
*
|
19
|
+
*/
|
20
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
};
|
38
|
+
|
39
|
+
static const char *HTML_ESCAPES[] = {
|
40
|
+
"",
|
41
|
+
""",
|
42
|
+
"&",
|
43
|
+
"'",
|
44
|
+
"/",
|
45
|
+
"<",
|
46
|
+
">"
|
47
|
+
};
|
48
|
+
|
49
|
+
void
|
50
|
+
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
51
|
+
{
|
52
|
+
size_t i = 0, org, esc = 0;
|
53
|
+
|
54
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
55
|
+
|
56
|
+
while (i < size) {
|
57
|
+
org = i;
|
58
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
59
|
+
i++;
|
60
|
+
|
61
|
+
if (i > org)
|
62
|
+
bufput(ob, src + org, i - org);
|
63
|
+
|
64
|
+
/* escaping */
|
65
|
+
if (i >= size)
|
66
|
+
break;
|
67
|
+
|
68
|
+
/* The forward slash is only escaped in secure mode */
|
69
|
+
if (src[i] == '/' && !secure) {
|
70
|
+
bufputc(ob, '/');
|
71
|
+
} else {
|
72
|
+
bufputs(ob, HTML_ESCAPES[esc]);
|
73
|
+
}
|
74
|
+
|
75
|
+
i++;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
void
|
80
|
+
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
81
|
+
{
|
82
|
+
houdini_escape_html0(ob, src, size, 1);
|
83
|
+
}
|
84
|
+
|