github-markdown 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,88 @@
1
+ /*
2
+ * Copyright (c) 2008, Natacha Porté
3
+ * Copyright (c) 2011, Vicent Martí
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+
18
+ #ifndef BUFFER_H__
19
+ #define BUFFER_H__
20
+
21
+ #include <stddef.h>
22
+ #include <stdarg.h>
23
+ #include <stdint.h>
24
+
25
+ #if defined(_MSC_VER)
26
+ #define __attribute__(x)
27
+ #define inline
28
+ #endif
29
+
30
+ typedef enum {
31
+ BUF_OK = 0,
32
+ BUF_ENOMEM = -1,
33
+ } buferror_t;
34
+
35
+ /* struct buf: character array buffer */
36
+ struct buf {
37
+ uint8_t *data; /* actual character data */
38
+ size_t size; /* size of the string */
39
+ size_t asize; /* allocated size (0 = volatile buffer) */
40
+ size_t unit; /* reallocation unit size (0 = read-only buffer) */
41
+ };
42
+
43
+ /* CONST_BUF: global buffer from a string litteral */
44
+ #define BUF_STATIC(string) \
45
+ { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
46
+
47
+ /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
48
+ #define BUF_VOLATILE(strname) \
49
+ { (uint8_t *)strname, strlen(strname), 0, 0, 0 }
50
+
51
+ /* BUFPUTSL: optimized bufputs of a string litteral */
52
+ #define BUFPUTSL(output, literal) \
53
+ bufput(output, literal, sizeof literal - 1)
54
+
55
+ /* bufgrow: increasing the allocated size to the given value */
56
+ int bufgrow(struct buf *, size_t);
57
+
58
+ /* bufnew: allocation of a new buffer */
59
+ struct buf *bufnew(size_t) __attribute__ ((malloc));
60
+
61
+ /* bufnullterm: NUL-termination of the string array (making a C-string) */
62
+ const char *bufcstr(struct buf *);
63
+
64
+ /* bufprefix: compare the beginning of a buffer with a string */
65
+ int bufprefix(const struct buf *buf, const char *prefix);
66
+
67
+ /* bufput: appends raw data to a buffer */
68
+ void bufput(struct buf *, const void *, size_t);
69
+
70
+ /* bufputs: appends a NUL-terminated string to a buffer */
71
+ void bufputs(struct buf *, const char *);
72
+
73
+ /* bufputc: appends a single char to a buffer */
74
+ void bufputc(struct buf *, int);
75
+
76
+ /* bufrelease: decrease the reference count and free the buffer if needed */
77
+ void bufrelease(struct buf *);
78
+
79
+ /* bufreset: frees internal data of the buffer */
80
+ void bufreset(struct buf *);
81
+
82
+ /* bufslurp: removes a given number of bytes from the head of the array */
83
+ void bufslurp(struct buf *, size_t);
84
+
85
+ /* bufprintf: formatted printing to a buffer */
86
+ void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
87
+
88
+ #endif
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('github/markdown')
4
+ create_makefile('github/markdown')
@@ -0,0 +1,204 @@
1
+ /*
2
+ * Copyright (c) 2012, GitHub, Inc
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include <ruby.h>
18
+
19
+ #ifdef HAVE_RUBY_ENCODING_H
20
+ # include <ruby/encoding.h>
21
+ # define geefem_str_new(str, len) rb_enc_str_new(str, len, rb_utf8_encoding())
22
+ #else
23
+ # define geefem_str_new(str, len) rb_str_new(str, len)
24
+ #endif
25
+
26
+ #include "markdown.h"
27
+ #include "html.h"
28
+
29
+ static struct {
30
+ struct sd_markdown *md;
31
+ struct html_renderopt render_opts;
32
+ } g_markdown, g_GFM;
33
+
34
+ static void
35
+ rndr_blockcode_github(
36
+ struct buf *ob,
37
+ const struct buf *text,
38
+ const struct buf *lang,
39
+ void *opaque)
40
+ {
41
+ if (ob->size)
42
+ bufputc(ob, '\n');
43
+
44
+ if (!text || !text->size) {
45
+ BUFPUTSL(ob, "<pre><code></code></pre>");
46
+ return;
47
+ }
48
+
49
+ if (lang && lang->size) {
50
+ size_t i = 0, lang_size;
51
+ const char *lang_name = NULL;
52
+
53
+ while (i < lang->size && !isspace(lang->data[i]))
54
+ i++;
55
+
56
+ if (lang->data[0] == '.') {
57
+ lang_name = lang->data + 1;
58
+ lang_size = i - 1;
59
+ } else {
60
+ lang_name = lang->data;
61
+ lang_size = i;
62
+ }
63
+
64
+ if (rb_block_given_p()) {
65
+ VALUE hilight;
66
+
67
+ hilight = rb_yield_values(2,
68
+ geefem_str_new(text->data, text->size),
69
+ geefem_str_new(lang_name, lang_size));
70
+
71
+ if (!NIL_P(hilight)) {
72
+ Check_Type(hilight, T_STRING);
73
+ bufput(ob, RSTRING_PTR(hilight), RSTRING_LEN(hilight));
74
+ return;
75
+ }
76
+ }
77
+
78
+ BUFPUTSL(ob, "<pre lang=\"");
79
+ houdini_escape_html0(ob, lang_name, lang_size, 0);
80
+ BUFPUTSL(ob, "\"><code>");
81
+
82
+ } else {
83
+ BUFPUTSL(ob, "<pre><code>");
84
+ }
85
+
86
+ houdini_escape_html0(ob, text->data, text->size, 0);
87
+ BUFPUTSL(ob, "</code></pre>\n");
88
+ }
89
+
90
+ static VALUE rb_ghmd_to_html(VALUE self, VALUE rb_text, VALUE rb_mode)
91
+ {
92
+ struct buf *output_buf;
93
+ struct sd_markdown *md = NULL;
94
+ ID mode;
95
+
96
+ if (NIL_P(rb_text))
97
+ return Qnil;
98
+
99
+ Check_Type(rb_mode, T_SYMBOL);
100
+ mode = SYM2ID(rb_mode);
101
+
102
+ /* check for rendering mode */
103
+ if (mode == rb_intern("markdown")) {
104
+ md = g_markdown.md;
105
+ } else if (mode == rb_intern("gfm")) {
106
+ md = g_GFM.md;
107
+ } else {
108
+ rb_raise(rb_eTypeError, "Invalid render mode");
109
+ }
110
+
111
+ Check_Type(rb_text, T_STRING);
112
+
113
+ /* initialize buffers */
114
+ output_buf = bufnew(128);
115
+
116
+ /* render the magic */
117
+ sd_markdown_render(output_buf, RSTRING_PTR(rb_text), RSTRING_LEN(rb_text), md);
118
+
119
+ /* build the Ruby string */
120
+ rb_text = geefem_str_new(output_buf->data, output_buf->size);
121
+
122
+ bufrelease(output_buf);
123
+ return rb_text;
124
+ }
125
+
126
+
127
+ /* Max recursion nesting when parsing Markdown documents */
128
+ static const int GITHUB_MD_NESTING = 16;
129
+
130
+ /* Default flags for all Markdown pipelines:
131
+ *
132
+ * - NO_INTRA_EMPHASIS: disallow emphasis inside of words
133
+ * - LAX_HTML_BLOCKS: do not require an empty line after
134
+ * a HTML block, unlike Markdown.pl does
135
+ * - STRIKETHROUGH: strike out words with `~~`, same semantics
136
+ * as emphasis
137
+ * - TABLES: the tables extension from PHP-Markdown extra
138
+ * - FENCED_CODE: the fenced code blocks extension from
139
+ * PHP-Markdown extra, but working with ``` besides ~~~.
140
+ */
141
+ static const int GITHUB_MD_FLAGS =
142
+ MKDEXT_NO_INTRA_EMPHASIS |
143
+ MKDEXT_LAX_HTML_BLOCKS |
144
+ MKDEXT_STRIKETHROUGH |
145
+ MKDEXT_TABLES |
146
+ MKDEXT_FENCED_CODE;
147
+
148
+ /* Init the default pipeline */
149
+ static void rb_ghmd__init_md(void)
150
+ {
151
+ struct sd_callbacks callbacks;
152
+
153
+ /* No extra flags to the Markdown renderer */
154
+ sdhtml_renderer(&callbacks, &g_markdown.render_opts, 0);
155
+ callbacks.blockcode = &rndr_blockcode_github;
156
+
157
+ g_markdown.md = sd_markdown_new(
158
+ GITHUB_MD_FLAGS,
159
+ GITHUB_MD_NESTING,
160
+ &callbacks,
161
+ &g_markdown.render_opts
162
+ );
163
+ }
164
+
165
+ /* Init the GFM pipeline */
166
+ static void rb_ghmd__init_gfm(void)
167
+ {
168
+ struct sd_callbacks callbacks;
169
+
170
+ /*
171
+ * The following extensions to the HTML output are enabled:
172
+ *
173
+ * - HARD_WRAP: line breaks are replaced with <br>
174
+ * entities
175
+ */
176
+ sdhtml_renderer(&callbacks, &g_GFM.render_opts, HTML_HARD_WRAP);
177
+ callbacks.blockcode = &rndr_blockcode_github;
178
+
179
+ /* The following extensions to the parser are enabled, on top
180
+ * of the common ones:
181
+ *
182
+ * - SPACE_HEADERS: require a space between the `#` and the
183
+ * name of a header (prevents collisions with the Issues
184
+ * filter)
185
+ */
186
+ g_GFM.md = sd_markdown_new(
187
+ GITHUB_MD_FLAGS | MKDEXT_SPACE_HEADERS,
188
+ GITHUB_MD_NESTING,
189
+ &callbacks,
190
+ &g_GFM.render_opts
191
+ );
192
+ }
193
+
194
+ void Init_markdown()
195
+ {
196
+ VALUE rb_mGitHub = rb_const_get(rb_cObject, rb_intern("GitHub"));
197
+ VALUE rb_cMarkdown = rb_define_class_under(rb_mGitHub, "Markdown", rb_cObject);
198
+
199
+ rb_define_singleton_method(rb_cMarkdown, "to_html", rb_ghmd_to_html, 2);
200
+
201
+ rb_ghmd__init_md();
202
+ rb_ghmd__init_gfm();
203
+ }
204
+
@@ -0,0 +1,29 @@
1
+ #ifndef HOUDINI_H__
2
+ #define HOUDINI_H__
3
+
4
+ #include "buffer.h"
5
+
6
+ #ifdef HOUDINI_USE_LOCALE
7
+ # define _isxdigit(c) isxdigit(c)
8
+ # define _isdigit(c) isdigit(c)
9
+ #else
10
+ /*
11
+ * Helper _isdigit methods -- do not trust the current locale
12
+ * */
13
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
14
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
15
+ #endif
16
+
17
+ extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
18
+ extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
19
+ extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
20
+ extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
21
+ extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
22
+ extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
23
+ extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
24
+ extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
25
+ extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
26
+ extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
27
+ extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
28
+
29
+ #endif
@@ -0,0 +1,108 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
8
+
9
+ /*
10
+ * The following characters will not be escaped:
11
+ *
12
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
13
+ *
14
+ * Note that this character set is the addition of:
15
+ *
16
+ * - The characters which are safe to be in an URL
17
+ * - The characters which are *not* safe to be in
18
+ * an URL because they are RESERVED characters.
19
+ *
20
+ * We asume (lazily) that any RESERVED char that
21
+ * appears inside an URL is actually meant to
22
+ * have its native function (i.e. as an URL
23
+ * component/separator) and hence needs no escaping.
24
+ *
25
+ * There are two exceptions: the chacters & (amp)
26
+ * and ' (single quote) do not appear in the table.
27
+ * They are meant to appear in the URL as components,
28
+ * yet they require special HTML-entity escaping
29
+ * to generate valid HTML markup.
30
+ *
31
+ * All other characters will be escaped to %XX.
32
+ *
33
+ */
34
+ static const char HREF_SAFE[] = {
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
39
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
41
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ };
52
+
53
+ void
54
+ houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
55
+ {
56
+ static const char hex_chars[] = "0123456789ABCDEF";
57
+ size_t i = 0, org;
58
+ char hex_str[3];
59
+
60
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
61
+ hex_str[0] = '%';
62
+
63
+ while (i < size) {
64
+ org = i;
65
+ while (i < size && HREF_SAFE[src[i]] != 0)
66
+ i++;
67
+
68
+ if (i > org)
69
+ bufput(ob, src + org, i - org);
70
+
71
+ /* escaping */
72
+ if (i >= size)
73
+ break;
74
+
75
+ switch (src[i]) {
76
+ /* amp appears all the time in URLs, but needs
77
+ * HTML-entity escaping to be inside an href */
78
+ case '&':
79
+ BUFPUTSL(ob, "&amp;");
80
+ break;
81
+
82
+ /* the single quote is a valid URL character
83
+ * according to the standard; it needs HTML
84
+ * entity escaping too */
85
+ case '\'':
86
+ BUFPUTSL(ob, "&#x27;");
87
+ break;
88
+
89
+ /* the space can be escaped to %20 or a plus
90
+ * sign. we're going with the generic escape
91
+ * for now. the plus thing is more commonly seen
92
+ * when building GET strings */
93
+ #if 0
94
+ case ' ':
95
+ bufputc(ob, '+');
96
+ break;
97
+ #endif
98
+
99
+ /* every other character goes with a %XX escaping */
100
+ default:
101
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
102
+ hex_str[2] = hex_chars[src[i] & 0xF];
103
+ bufput(ob, hex_str, 3);
104
+ }
105
+
106
+ i++;
107
+ }
108
+ }
@@ -0,0 +1,84 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
8
+
9
+ /**
10
+ * According to the OWASP rules:
11
+ *
12
+ * & --> &amp;
13
+ * < --> &lt;
14
+ * > --> &gt;
15
+ * " --> &quot;
16
+ * ' --> &#x27; &apos; is not recommended
17
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
18
+ *
19
+ */
20
+ static const char HTML_ESCAPE_TABLE[] = {
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ };
38
+
39
+ static const char *HTML_ESCAPES[] = {
40
+ "",
41
+ "&quot;",
42
+ "&amp;",
43
+ "&#39;",
44
+ "&#47;",
45
+ "&lt;",
46
+ "&gt;"
47
+ };
48
+
49
+ void
50
+ houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
51
+ {
52
+ size_t i = 0, org, esc = 0;
53
+
54
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
55
+
56
+ while (i < size) {
57
+ org = i;
58
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
59
+ i++;
60
+
61
+ if (i > org)
62
+ bufput(ob, src + org, i - org);
63
+
64
+ /* escaping */
65
+ if (i >= size)
66
+ break;
67
+
68
+ /* The forward slash is only escaped in secure mode */
69
+ if (src[i] == '/' && !secure) {
70
+ bufputc(ob, '/');
71
+ } else {
72
+ bufputs(ob, HTML_ESCAPES[esc]);
73
+ }
74
+
75
+ i++;
76
+ }
77
+ }
78
+
79
+ void
80
+ houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
81
+ {
82
+ houdini_escape_html0(ob, src, size, 1);
83
+ }
84
+