rinku 1.2.2 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +0 -18
- data/ext/rinku/autolink.c +13 -13
- data/ext/rinku/autolink.h +7 -4
- data/ext/rinku/buffer.c +123 -218
- data/ext/rinku/buffer.h +49 -112
- data/ext/rinku/houdini.h +28 -0
- data/ext/rinku/houdini_href_e.c +108 -0
- data/ext/rinku/houdini_html_e.c +84 -0
- data/ext/rinku/rinku.c +123 -78
- data/lib/rails_rinku.rb +2 -2
- data/rinku.gemspec +4 -1
- data/test/autolink_test.rb +36 -3
- metadata +8 -7
data/ext/rinku/buffer.h
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
/* buffer.h - automatic buffer structure */
|
2
|
-
|
3
1
|
/*
|
4
2
|
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
5
4
|
*
|
6
5
|
* Permission to use, copy, modify, and distribute this software for any
|
7
6
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -16,139 +15,77 @@
|
|
16
15
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
17
16
|
*/
|
18
17
|
|
19
|
-
#ifndef
|
20
|
-
#define
|
18
|
+
#ifndef __GEN_BUFFER_H__
|
19
|
+
#define __GEN_BUFFER_H__
|
21
20
|
|
22
21
|
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
23
24
|
|
24
25
|
#if defined(_MSC_VER)
|
25
26
|
#define __attribute__(x)
|
26
27
|
#define inline
|
27
|
-
#define strncasecmp _strnicmp
|
28
|
-
#define snprintf _snprintf
|
29
|
-
#define va_copy(d,s) ((d) = (s))
|
30
28
|
#endif
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
typedef enum {
|
31
|
+
BUF_OK = 0,
|
32
|
+
BUF_ENOMEM = -1,
|
33
|
+
} buferror_t;
|
35
34
|
|
36
|
-
/* struct buf
|
35
|
+
/* struct buf: character array buffer */
|
37
36
|
struct buf {
|
38
|
-
|
39
|
-
size_t
|
40
|
-
size_t
|
41
|
-
size_t
|
42
|
-
|
43
|
-
|
44
|
-
/**********
|
45
|
-
* MACROS *
|
46
|
-
**********/
|
47
|
-
|
48
|
-
#define STRLEN(x) (sizeof(x) - 1)
|
49
|
-
|
50
|
-
/* CONST_BUF • global buffer from a string litteral */
|
51
|
-
#define CONST_BUF(name, string) \
|
52
|
-
static struct buf name = { string, sizeof string -1, sizeof string }
|
53
|
-
|
54
|
-
|
55
|
-
/* VOLATILE_BUF • macro for creating a volatile buffer on the stack */
|
56
|
-
#define VOLATILE_BUF(name, strname) \
|
57
|
-
struct buf name = { strname, strlen(strname) }
|
58
|
-
|
59
|
-
|
60
|
-
/* BUFPUTSL • optimized bufputs of a string litteral */
|
61
|
-
#define BUFPUTSL(output, litteral) \
|
62
|
-
bufput(output, litteral, sizeof litteral - 1)
|
63
|
-
|
64
|
-
|
37
|
+
uint8_t *data; /* actual character data */
|
38
|
+
size_t size; /* size of the string */
|
39
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
40
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
41
|
+
};
|
65
42
|
|
66
|
-
|
67
|
-
|
68
|
-
|
43
|
+
/* CONST_BUF: global buffer from a string litteral */
|
44
|
+
#define BUF_STATIC(string) \
|
45
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
69
46
|
|
70
|
-
/*
|
71
|
-
|
72
|
-
|
47
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
48
|
+
#define BUF_VOLATILE(strname) \
|
49
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
73
50
|
|
74
|
-
/*
|
75
|
-
|
76
|
-
|
51
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
52
|
+
#define BUFPUTSL(output, literal) \
|
53
|
+
bufput(output, literal, sizeof literal - 1)
|
77
54
|
|
78
|
-
/*
|
79
|
-
int
|
80
|
-
bufcmps(const struct buf *, const char *);
|
55
|
+
/* bufgrow: increasing the allocated size to the given value */
|
56
|
+
int bufgrow(struct buf *, size_t);
|
81
57
|
|
82
|
-
/*
|
83
|
-
|
84
|
-
bufprefix(const struct buf *buf, const char *prefix);
|
58
|
+
/* bufnew: allocation of a new buffer */
|
59
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
85
60
|
|
86
|
-
/*
|
87
|
-
struct buf *
|
88
|
-
bufdup(const struct buf *, size_t)
|
89
|
-
__attribute__ ((malloc));
|
61
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
62
|
+
const char *bufcstr(struct buf *);
|
90
63
|
|
91
|
-
/*
|
92
|
-
int
|
93
|
-
bufgrow(struct buf *, size_t);
|
64
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
65
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
94
66
|
|
95
|
-
/*
|
96
|
-
struct buf
|
97
|
-
bufnew(size_t)
|
98
|
-
__attribute__ ((malloc));
|
67
|
+
/* bufput: appends raw data to a buffer */
|
68
|
+
void bufput(struct buf *, const void *, size_t);
|
99
69
|
|
100
|
-
/*
|
101
|
-
void
|
102
|
-
bufnullterm(struct buf *);
|
70
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
71
|
+
void bufputs(struct buf *, const char *);
|
103
72
|
|
104
|
-
/*
|
105
|
-
void
|
106
|
-
bufprintf(struct buf *, const char *, ...)
|
107
|
-
__attribute__ ((format (printf, 2, 3)));
|
73
|
+
/* bufputc: appends a single char to a buffer */
|
74
|
+
void bufputc(struct buf *, int);
|
108
75
|
|
109
|
-
/*
|
110
|
-
void
|
111
|
-
bufput(struct buf *, const void*, size_t);
|
76
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
77
|
+
void bufrelease(struct buf *);
|
112
78
|
|
113
|
-
/*
|
114
|
-
void
|
115
|
-
bufputs(struct buf *, const char*);
|
79
|
+
/* bufreset: frees internal data of the buffer */
|
80
|
+
void bufreset(struct buf *);
|
116
81
|
|
117
|
-
/*
|
118
|
-
void
|
119
|
-
bufputc(struct buf *, char);
|
82
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
83
|
+
void bufslurp(struct buf *, size_t);
|
120
84
|
|
121
|
-
/*
|
122
|
-
void
|
123
|
-
bufrelease(struct buf *);
|
85
|
+
/* bufprintf: formatted printing to a buffer */
|
86
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
124
87
|
|
125
|
-
/*
|
126
|
-
void
|
127
|
-
bufreset(struct buf *);
|
88
|
+
/* vbufprintf: stdarg variant of formatted printing into a buffer */
|
89
|
+
void vbufprintf(struct buf *, const char * , va_list);
|
128
90
|
|
129
|
-
|
130
|
-
void
|
131
|
-
bufset(struct buf **, struct buf *);
|
132
|
-
|
133
|
-
/* bufslurp • removes a given number of bytes from the head of the array */
|
134
|
-
void
|
135
|
-
bufslurp(struct buf *, size_t);
|
136
|
-
|
137
|
-
/* buftoi • converts the numbers at the beginning of the buf into an int */
|
138
|
-
int
|
139
|
-
buftoi(struct buf *, size_t, size_t *);
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
#ifdef BUFFER_STDARG
|
144
|
-
#include <stdarg.h>
|
145
|
-
|
146
|
-
/* vbufprintf • stdarg variant of formatted printing into a buffer */
|
147
|
-
void
|
148
|
-
vbufprintf(struct buf *, const char*, va_list);
|
149
|
-
|
150
|
-
#endif /* def BUFFER_STDARG */
|
151
|
-
|
152
|
-
#endif /* ndef LITHIUM_BUFFER_H */
|
153
|
-
|
154
|
-
/* vim: set filetype=c: */
|
91
|
+
#endif
|
data/ext/rinku/houdini.h
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#ifndef __HOUDINI_H__
|
2
|
+
#define __HOUDINI_H__
|
3
|
+
|
4
|
+
#include "buffer.h"
|
5
|
+
|
6
|
+
#ifdef HOUDINI_USE_LOCALE
|
7
|
+
# define _isxdigit(c) isxdigit(c)
|
8
|
+
# define _isdigit(c) isdigit(c)
|
9
|
+
#else
|
10
|
+
/*
|
11
|
+
* Helper _isdigit methods -- do not trust the current locale
|
12
|
+
* */
|
13
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
14
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
15
|
+
#endif
|
16
|
+
|
17
|
+
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
18
|
+
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
19
|
+
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
20
|
+
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
21
|
+
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
22
|
+
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
23
|
+
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
24
|
+
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
25
|
+
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
26
|
+
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
27
|
+
|
28
|
+
#endif
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
8
|
+
|
9
|
+
/*
|
10
|
+
* The following characters will not be escaped:
|
11
|
+
*
|
12
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
13
|
+
*
|
14
|
+
* Note that this character set is the addition of:
|
15
|
+
*
|
16
|
+
* - The characters which are safe to be in an URL
|
17
|
+
* - The characters which are *not* safe to be in
|
18
|
+
* an URL because they are RESERVED characters.
|
19
|
+
*
|
20
|
+
* We asume (lazily) that any RESERVED char that
|
21
|
+
* appears inside an URL is actually meant to
|
22
|
+
* have its native function (i.e. as an URL
|
23
|
+
* component/separator) and hence needs no escaping.
|
24
|
+
*
|
25
|
+
* There are two exceptions: the chacters & (amp)
|
26
|
+
* and ' (single quote) do not appear in the table.
|
27
|
+
* They are meant to appear in the URL as components,
|
28
|
+
* yet they require special HTML-entity escaping
|
29
|
+
* to generate valid HTML markup.
|
30
|
+
*
|
31
|
+
* All other characters will be escaped to %XX.
|
32
|
+
*
|
33
|
+
*/
|
34
|
+
static const char HREF_SAFE[] = {
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
39
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
41
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
42
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
50
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
+
};
|
52
|
+
|
53
|
+
void
|
54
|
+
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
55
|
+
{
|
56
|
+
static const char hex_chars[] = "0123456789ABCDEF";
|
57
|
+
size_t i = 0, org;
|
58
|
+
char hex_str[3];
|
59
|
+
|
60
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
61
|
+
hex_str[0] = '%';
|
62
|
+
|
63
|
+
while (i < size) {
|
64
|
+
org = i;
|
65
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
66
|
+
i++;
|
67
|
+
|
68
|
+
if (i > org)
|
69
|
+
bufput(ob, src + org, i - org);
|
70
|
+
|
71
|
+
/* escaping */
|
72
|
+
if (i >= size)
|
73
|
+
break;
|
74
|
+
|
75
|
+
switch (src[i]) {
|
76
|
+
/* amp appears all the time in URLs, but needs
|
77
|
+
* HTML-entity escaping to be inside an href */
|
78
|
+
case '&':
|
79
|
+
BUFPUTSL(ob, "&");
|
80
|
+
break;
|
81
|
+
|
82
|
+
/* the single quote is a valid URL character
|
83
|
+
* according to the standard; it needs HTML
|
84
|
+
* entity escaping too */
|
85
|
+
case '\'':
|
86
|
+
BUFPUTSL(ob, "'");
|
87
|
+
break;
|
88
|
+
|
89
|
+
/* the space can be escaped to %20 or a plus
|
90
|
+
* sign. we're going with the generic escape
|
91
|
+
* for now. the plus thing is more commonly seen
|
92
|
+
* when building GET strings */
|
93
|
+
#if 0
|
94
|
+
case ' ':
|
95
|
+
bufputc(ob, '+');
|
96
|
+
break;
|
97
|
+
#endif
|
98
|
+
|
99
|
+
/* every other character goes with a %XX escaping */
|
100
|
+
default:
|
101
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
102
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
103
|
+
bufput(ob, hex_str, 3);
|
104
|
+
}
|
105
|
+
|
106
|
+
i++;
|
107
|
+
}
|
108
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
8
|
+
|
9
|
+
/**
|
10
|
+
* According to the OWASP rules:
|
11
|
+
*
|
12
|
+
* & --> &
|
13
|
+
* < --> <
|
14
|
+
* > --> >
|
15
|
+
* " --> "
|
16
|
+
* ' --> ' ' is not recommended
|
17
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
18
|
+
*
|
19
|
+
*/
|
20
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
};
|
38
|
+
|
39
|
+
static const char *HTML_ESCAPES[] = {
|
40
|
+
"",
|
41
|
+
""",
|
42
|
+
"&",
|
43
|
+
"'",
|
44
|
+
"/",
|
45
|
+
"<",
|
46
|
+
">"
|
47
|
+
};
|
48
|
+
|
49
|
+
void
|
50
|
+
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
51
|
+
{
|
52
|
+
size_t i = 0, org, esc;
|
53
|
+
|
54
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
55
|
+
|
56
|
+
while (i < size) {
|
57
|
+
org = i;
|
58
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
59
|
+
i++;
|
60
|
+
|
61
|
+
if (i > org)
|
62
|
+
bufput(ob, src + org, i - org);
|
63
|
+
|
64
|
+
/* escaping */
|
65
|
+
if (i >= size)
|
66
|
+
break;
|
67
|
+
|
68
|
+
/* The forward slash is only escaped in secure mode */
|
69
|
+
if (src[i] == '/' && !secure) {
|
70
|
+
bufputc(ob, '/');
|
71
|
+
} else {
|
72
|
+
bufputs(ob, HTML_ESCAPES[esc]);
|
73
|
+
}
|
74
|
+
|
75
|
+
i++;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
void
|
80
|
+
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
81
|
+
{
|
82
|
+
houdini_escape_html0(ob, src, size, 1);
|
83
|
+
}
|
84
|
+
|