github-markdown 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,88 @@
1
+ /*
2
+ * Copyright (c) 2008, Natacha Porté
3
+ * Copyright (c) 2011, Vicent Martí
4
+ *
5
+ * Permission to use, copy, modify, and distribute this software for any
6
+ * purpose with or without fee is hereby granted, provided that the above
7
+ * copyright notice and this permission notice appear in all copies.
8
+ *
9
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+ */
17
+
18
+ #ifndef BUFFER_H__
19
+ #define BUFFER_H__
20
+
21
+ #include <stddef.h>
22
+ #include <stdarg.h>
23
+ #include <stdint.h>
24
+
25
+ #if defined(_MSC_VER)
26
+ #define __attribute__(x)
27
+ #define inline
28
+ #endif
29
+
30
+ typedef enum {
31
+ BUF_OK = 0,
32
+ BUF_ENOMEM = -1,
33
+ } buferror_t;
34
+
35
+ /* struct buf: character array buffer */
36
+ struct buf {
37
+ uint8_t *data; /* actual character data */
38
+ size_t size; /* size of the string */
39
+ size_t asize; /* allocated size (0 = volatile buffer) */
40
+ size_t unit; /* reallocation unit size (0 = read-only buffer) */
41
+ };
42
+
43
+ /* CONST_BUF: global buffer from a string litteral */
44
+ #define BUF_STATIC(string) \
45
+ { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
46
+
47
+ /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
48
+ #define BUF_VOLATILE(strname) \
49
+ { (uint8_t *)strname, strlen(strname), 0, 0, 0 }
50
+
51
+ /* BUFPUTSL: optimized bufputs of a string litteral */
52
+ #define BUFPUTSL(output, literal) \
53
+ bufput(output, literal, sizeof literal - 1)
54
+
55
+ /* bufgrow: increasing the allocated size to the given value */
56
+ int bufgrow(struct buf *, size_t);
57
+
58
+ /* bufnew: allocation of a new buffer */
59
+ struct buf *bufnew(size_t) __attribute__ ((malloc));
60
+
61
+ /* bufnullterm: NUL-termination of the string array (making a C-string) */
62
+ const char *bufcstr(struct buf *);
63
+
64
+ /* bufprefix: compare the beginning of a buffer with a string */
65
+ int bufprefix(const struct buf *buf, const char *prefix);
66
+
67
+ /* bufput: appends raw data to a buffer */
68
+ void bufput(struct buf *, const void *, size_t);
69
+
70
+ /* bufputs: appends a NUL-terminated string to a buffer */
71
+ void bufputs(struct buf *, const char *);
72
+
73
+ /* bufputc: appends a single char to a buffer */
74
+ void bufputc(struct buf *, int);
75
+
76
+ /* bufrelease: decrease the reference count and free the buffer if needed */
77
+ void bufrelease(struct buf *);
78
+
79
+ /* bufreset: frees internal data of the buffer */
80
+ void bufreset(struct buf *);
81
+
82
+ /* bufslurp: removes a given number of bytes from the head of the array */
83
+ void bufslurp(struct buf *, size_t);
84
+
85
+ /* bufprintf: formatted printing to a buffer */
86
+ void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
87
+
88
+ #endif
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('github/markdown')
4
+ create_makefile('github/markdown')
@@ -0,0 +1,204 @@
1
+ /*
2
+ * Copyright (c) 2012, GitHub, Inc
3
+ *
4
+ * Permission to use, copy, modify, and distribute this software for any
5
+ * purpose with or without fee is hereby granted, provided that the above
6
+ * copyright notice and this permission notice appear in all copies.
7
+ *
8
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
+ */
16
+
17
+ #include <ruby.h>
18
+
19
+ #ifdef HAVE_RUBY_ENCODING_H
20
+ # include <ruby/encoding.h>
21
+ # define geefem_str_new(str, len) rb_enc_str_new(str, len, rb_utf8_encoding())
22
+ #else
23
+ # define geefem_str_new(str, len) rb_str_new(str, len)
24
+ #endif
25
+
26
+ #include "markdown.h"
27
+ #include "html.h"
28
+
29
+ static struct {
30
+ struct sd_markdown *md;
31
+ struct html_renderopt render_opts;
32
+ } g_markdown, g_GFM;
33
+
34
+ static void
35
+ rndr_blockcode_github(
36
+ struct buf *ob,
37
+ const struct buf *text,
38
+ const struct buf *lang,
39
+ void *opaque)
40
+ {
41
+ if (ob->size)
42
+ bufputc(ob, '\n');
43
+
44
+ if (!text || !text->size) {
45
+ BUFPUTSL(ob, "<pre><code></code></pre>");
46
+ return;
47
+ }
48
+
49
+ if (lang && lang->size) {
50
+ size_t i = 0, lang_size;
51
+ const char *lang_name = NULL;
52
+
53
+ while (i < lang->size && !isspace(lang->data[i]))
54
+ i++;
55
+
56
+ if (lang->data[0] == '.') {
57
+ lang_name = lang->data + 1;
58
+ lang_size = i - 1;
59
+ } else {
60
+ lang_name = lang->data;
61
+ lang_size = i;
62
+ }
63
+
64
+ if (rb_block_given_p()) {
65
+ VALUE hilight;
66
+
67
+ hilight = rb_yield_values(2,
68
+ geefem_str_new(text->data, text->size),
69
+ geefem_str_new(lang_name, lang_size));
70
+
71
+ if (!NIL_P(hilight)) {
72
+ Check_Type(hilight, T_STRING);
73
+ bufput(ob, RSTRING_PTR(hilight), RSTRING_LEN(hilight));
74
+ return;
75
+ }
76
+ }
77
+
78
+ BUFPUTSL(ob, "<pre lang=\"");
79
+ houdini_escape_html0(ob, lang_name, lang_size, 0);
80
+ BUFPUTSL(ob, "\"><code>");
81
+
82
+ } else {
83
+ BUFPUTSL(ob, "<pre><code>");
84
+ }
85
+
86
+ houdini_escape_html0(ob, text->data, text->size, 0);
87
+ BUFPUTSL(ob, "</code></pre>\n");
88
+ }
89
+
90
+ static VALUE rb_ghmd_to_html(VALUE self, VALUE rb_text, VALUE rb_mode)
91
+ {
92
+ struct buf *output_buf;
93
+ struct sd_markdown *md = NULL;
94
+ ID mode;
95
+
96
+ if (NIL_P(rb_text))
97
+ return Qnil;
98
+
99
+ Check_Type(rb_mode, T_SYMBOL);
100
+ mode = SYM2ID(rb_mode);
101
+
102
+ /* check for rendering mode */
103
+ if (mode == rb_intern("markdown")) {
104
+ md = g_markdown.md;
105
+ } else if (mode == rb_intern("gfm")) {
106
+ md = g_GFM.md;
107
+ } else {
108
+ rb_raise(rb_eTypeError, "Invalid render mode");
109
+ }
110
+
111
+ Check_Type(rb_text, T_STRING);
112
+
113
+ /* initialize buffers */
114
+ output_buf = bufnew(128);
115
+
116
+ /* render the magic */
117
+ sd_markdown_render(output_buf, RSTRING_PTR(rb_text), RSTRING_LEN(rb_text), md);
118
+
119
+ /* build the Ruby string */
120
+ rb_text = geefem_str_new(output_buf->data, output_buf->size);
121
+
122
+ bufrelease(output_buf);
123
+ return rb_text;
124
+ }
125
+
126
+
127
+ /* Max recursion nesting when parsing Markdown documents */
128
+ static const int GITHUB_MD_NESTING = 16;
129
+
130
+ /* Default flags for all Markdown pipelines:
131
+ *
132
+ * - NO_INTRA_EMPHASIS: disallow emphasis inside of words
133
+ * - LAX_HTML_BLOCKS: do not require an empty line after
134
+ * a HTML block, unlike Markdown.pl does
135
+ * - STRIKETHROUGH: strike out words with `~~`, same semantics
136
+ * as emphasis
137
+ * - TABLES: the tables extension from PHP-Markdown extra
138
+ * - FENCED_CODE: the fenced code blocks extension from
139
+ * PHP-Markdown extra, but working with ``` besides ~~~.
140
+ */
141
+ static const int GITHUB_MD_FLAGS =
142
+ MKDEXT_NO_INTRA_EMPHASIS |
143
+ MKDEXT_LAX_HTML_BLOCKS |
144
+ MKDEXT_STRIKETHROUGH |
145
+ MKDEXT_TABLES |
146
+ MKDEXT_FENCED_CODE;
147
+
148
+ /* Init the default pipeline */
149
+ static void rb_ghmd__init_md(void)
150
+ {
151
+ struct sd_callbacks callbacks;
152
+
153
+ /* No extra flags to the Markdown renderer */
154
+ sdhtml_renderer(&callbacks, &g_markdown.render_opts, 0);
155
+ callbacks.blockcode = &rndr_blockcode_github;
156
+
157
+ g_markdown.md = sd_markdown_new(
158
+ GITHUB_MD_FLAGS,
159
+ GITHUB_MD_NESTING,
160
+ &callbacks,
161
+ &g_markdown.render_opts
162
+ );
163
+ }
164
+
165
+ /* Init the GFM pipeline */
166
+ static void rb_ghmd__init_gfm(void)
167
+ {
168
+ struct sd_callbacks callbacks;
169
+
170
+ /*
171
+ * The following extensions to the HTML output are enabled:
172
+ *
173
+ * - HARD_WRAP: line breaks are replaced with <br>
174
+ * entities
175
+ */
176
+ sdhtml_renderer(&callbacks, &g_GFM.render_opts, HTML_HARD_WRAP);
177
+ callbacks.blockcode = &rndr_blockcode_github;
178
+
179
+ /* The following extensions to the parser are enabled, on top
180
+ * of the common ones:
181
+ *
182
+ * - SPACE_HEADERS: require a space between the `#` and the
183
+ * name of a header (prevents collisions with the Issues
184
+ * filter)
185
+ */
186
+ g_GFM.md = sd_markdown_new(
187
+ GITHUB_MD_FLAGS | MKDEXT_SPACE_HEADERS,
188
+ GITHUB_MD_NESTING,
189
+ &callbacks,
190
+ &g_GFM.render_opts
191
+ );
192
+ }
193
+
194
+ void Init_markdown()
195
+ {
196
+ VALUE rb_mGitHub = rb_const_get(rb_cObject, rb_intern("GitHub"));
197
+ VALUE rb_cMarkdown = rb_define_class_under(rb_mGitHub, "Markdown", rb_cObject);
198
+
199
+ rb_define_singleton_method(rb_cMarkdown, "to_html", rb_ghmd_to_html, 2);
200
+
201
+ rb_ghmd__init_md();
202
+ rb_ghmd__init_gfm();
203
+ }
204
+
@@ -0,0 +1,29 @@
1
+ #ifndef HOUDINI_H__
2
+ #define HOUDINI_H__
3
+
4
+ #include "buffer.h"
5
+
6
+ #ifdef HOUDINI_USE_LOCALE
7
+ # define _isxdigit(c) isxdigit(c)
8
+ # define _isdigit(c) isdigit(c)
9
+ #else
10
+ /*
11
+ * Helper _isdigit methods -- do not trust the current locale
12
+ * */
13
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
14
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
15
+ #endif
16
+
17
+ extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
18
+ extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
19
+ extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
20
+ extern void houdini_escape_xml(struct buf *ob, const uint8_t *src, size_t size);
21
+ extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
22
+ extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
23
+ extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
24
+ extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
25
+ extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
26
+ extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
27
+ extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
28
+
29
+ #endif
@@ -0,0 +1,108 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
8
+
9
+ /*
10
+ * The following characters will not be escaped:
11
+ *
12
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
13
+ *
14
+ * Note that this character set is the addition of:
15
+ *
16
+ * - The characters which are safe to be in an URL
17
+ * - The characters which are *not* safe to be in
18
+ * an URL because they are RESERVED characters.
19
+ *
20
+ * We asume (lazily) that any RESERVED char that
21
+ * appears inside an URL is actually meant to
22
+ * have its native function (i.e. as an URL
23
+ * component/separator) and hence needs no escaping.
24
+ *
25
+ * There are two exceptions: the chacters & (amp)
26
+ * and ' (single quote) do not appear in the table.
27
+ * They are meant to appear in the URL as components,
28
+ * yet they require special HTML-entity escaping
29
+ * to generate valid HTML markup.
30
+ *
31
+ * All other characters will be escaped to %XX.
32
+ *
33
+ */
34
+ static const char HREF_SAFE[] = {
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
39
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
41
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ };
52
+
53
+ void
54
+ houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
55
+ {
56
+ static const char hex_chars[] = "0123456789ABCDEF";
57
+ size_t i = 0, org;
58
+ char hex_str[3];
59
+
60
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
61
+ hex_str[0] = '%';
62
+
63
+ while (i < size) {
64
+ org = i;
65
+ while (i < size && HREF_SAFE[src[i]] != 0)
66
+ i++;
67
+
68
+ if (i > org)
69
+ bufput(ob, src + org, i - org);
70
+
71
+ /* escaping */
72
+ if (i >= size)
73
+ break;
74
+
75
+ switch (src[i]) {
76
+ /* amp appears all the time in URLs, but needs
77
+ * HTML-entity escaping to be inside an href */
78
+ case '&':
79
+ BUFPUTSL(ob, "&amp;");
80
+ break;
81
+
82
+ /* the single quote is a valid URL character
83
+ * according to the standard; it needs HTML
84
+ * entity escaping too */
85
+ case '\'':
86
+ BUFPUTSL(ob, "&#x27;");
87
+ break;
88
+
89
+ /* the space can be escaped to %20 or a plus
90
+ * sign. we're going with the generic escape
91
+ * for now. the plus thing is more commonly seen
92
+ * when building GET strings */
93
+ #if 0
94
+ case ' ':
95
+ bufputc(ob, '+');
96
+ break;
97
+ #endif
98
+
99
+ /* every other character goes with a %XX escaping */
100
+ default:
101
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
102
+ hex_str[2] = hex_chars[src[i] & 0xF];
103
+ bufput(ob, hex_str, 3);
104
+ }
105
+
106
+ i++;
107
+ }
108
+ }
@@ -0,0 +1,84 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
8
+
9
+ /**
10
+ * According to the OWASP rules:
11
+ *
12
+ * & --> &amp;
13
+ * < --> &lt;
14
+ * > --> &gt;
15
+ * " --> &quot;
16
+ * ' --> &#x27; &apos; is not recommended
17
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
18
+ *
19
+ */
20
+ static const char HTML_ESCAPE_TABLE[] = {
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ };
38
+
39
+ static const char *HTML_ESCAPES[] = {
40
+ "",
41
+ "&quot;",
42
+ "&amp;",
43
+ "&#39;",
44
+ "&#47;",
45
+ "&lt;",
46
+ "&gt;"
47
+ };
48
+
49
+ void
50
+ houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
51
+ {
52
+ size_t i = 0, org, esc = 0;
53
+
54
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
55
+
56
+ while (i < size) {
57
+ org = i;
58
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
59
+ i++;
60
+
61
+ if (i > org)
62
+ bufput(ob, src + org, i - org);
63
+
64
+ /* escaping */
65
+ if (i >= size)
66
+ break;
67
+
68
+ /* The forward slash is only escaped in secure mode */
69
+ if (src[i] == '/' && !secure) {
70
+ bufputc(ob, '/');
71
+ } else {
72
+ bufputs(ob, HTML_ESCAPES[esc]);
73
+ }
74
+
75
+ i++;
76
+ }
77
+ }
78
+
79
+ void
80
+ houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
81
+ {
82
+ houdini_escape_html0(ob, src, size, 1);
83
+ }
84
+