rinku 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +0 -18
- data/ext/rinku/autolink.c +13 -13
- data/ext/rinku/autolink.h +7 -4
- data/ext/rinku/buffer.c +123 -218
- data/ext/rinku/buffer.h +49 -112
- data/ext/rinku/houdini.h +28 -0
- data/ext/rinku/houdini_href_e.c +108 -0
- data/ext/rinku/houdini_html_e.c +84 -0
- data/ext/rinku/rinku.c +123 -78
- data/lib/rails_rinku.rb +2 -2
- data/rinku.gemspec +4 -1
- data/test/autolink_test.rb +36 -3
- metadata +8 -7
data/ext/rinku/buffer.h
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
/* buffer.h - automatic buffer structure */
|
2
|
-
|
3
1
|
/*
|
4
2
|
* Copyright (c) 2008, Natacha Porté
|
3
|
+
* Copyright (c) 2011, Vicent Martí
|
5
4
|
*
|
6
5
|
* Permission to use, copy, modify, and distribute this software for any
|
7
6
|
* purpose with or without fee is hereby granted, provided that the above
|
@@ -16,139 +15,77 @@
|
|
16
15
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
17
16
|
*/
|
18
17
|
|
19
|
-
#ifndef
|
20
|
-
#define
|
18
|
+
#ifndef __GEN_BUFFER_H__
|
19
|
+
#define __GEN_BUFFER_H__
|
21
20
|
|
22
21
|
#include <stddef.h>
|
22
|
+
#include <stdarg.h>
|
23
|
+
#include <stdint.h>
|
23
24
|
|
24
25
|
#if defined(_MSC_VER)
|
25
26
|
#define __attribute__(x)
|
26
27
|
#define inline
|
27
|
-
#define strncasecmp _strnicmp
|
28
|
-
#define snprintf _snprintf
|
29
|
-
#define va_copy(d,s) ((d) = (s))
|
30
28
|
#endif
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
typedef enum {
|
31
|
+
BUF_OK = 0,
|
32
|
+
BUF_ENOMEM = -1,
|
33
|
+
} buferror_t;
|
35
34
|
|
36
|
-
/* struct buf
|
35
|
+
/* struct buf: character array buffer */
|
37
36
|
struct buf {
|
38
|
-
|
39
|
-
size_t
|
40
|
-
size_t
|
41
|
-
size_t
|
42
|
-
|
43
|
-
|
44
|
-
/**********
|
45
|
-
* MACROS *
|
46
|
-
**********/
|
47
|
-
|
48
|
-
#define STRLEN(x) (sizeof(x) - 1)
|
49
|
-
|
50
|
-
/* CONST_BUF • global buffer from a string litteral */
|
51
|
-
#define CONST_BUF(name, string) \
|
52
|
-
static struct buf name = { string, sizeof string -1, sizeof string }
|
53
|
-
|
54
|
-
|
55
|
-
/* VOLATILE_BUF • macro for creating a volatile buffer on the stack */
|
56
|
-
#define VOLATILE_BUF(name, strname) \
|
57
|
-
struct buf name = { strname, strlen(strname) }
|
58
|
-
|
59
|
-
|
60
|
-
/* BUFPUTSL • optimized bufputs of a string litteral */
|
61
|
-
#define BUFPUTSL(output, litteral) \
|
62
|
-
bufput(output, litteral, sizeof litteral - 1)
|
63
|
-
|
64
|
-
|
37
|
+
uint8_t *data; /* actual character data */
|
38
|
+
size_t size; /* size of the string */
|
39
|
+
size_t asize; /* allocated size (0 = volatile buffer) */
|
40
|
+
size_t unit; /* reallocation unit size (0 = read-only buffer) */
|
41
|
+
};
|
65
42
|
|
66
|
-
|
67
|
-
|
68
|
-
|
43
|
+
/* CONST_BUF: global buffer from a string litteral */
|
44
|
+
#define BUF_STATIC(string) \
|
45
|
+
{ (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
|
69
46
|
|
70
|
-
/*
|
71
|
-
|
72
|
-
|
47
|
+
/* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
|
48
|
+
#define BUF_VOLATILE(strname) \
|
49
|
+
{ (uint8_t *)strname, strlen(strname), 0, 0, 0 }
|
73
50
|
|
74
|
-
/*
|
75
|
-
|
76
|
-
|
51
|
+
/* BUFPUTSL: optimized bufputs of a string litteral */
|
52
|
+
#define BUFPUTSL(output, literal) \
|
53
|
+
bufput(output, literal, sizeof literal - 1)
|
77
54
|
|
78
|
-
/*
|
79
|
-
int
|
80
|
-
bufcmps(const struct buf *, const char *);
|
55
|
+
/* bufgrow: increasing the allocated size to the given value */
|
56
|
+
int bufgrow(struct buf *, size_t);
|
81
57
|
|
82
|
-
/*
|
83
|
-
|
84
|
-
bufprefix(const struct buf *buf, const char *prefix);
|
58
|
+
/* bufnew: allocation of a new buffer */
|
59
|
+
struct buf *bufnew(size_t) __attribute__ ((malloc));
|
85
60
|
|
86
|
-
/*
|
87
|
-
struct buf *
|
88
|
-
bufdup(const struct buf *, size_t)
|
89
|
-
__attribute__ ((malloc));
|
61
|
+
/* bufnullterm: NUL-termination of the string array (making a C-string) */
|
62
|
+
const char *bufcstr(struct buf *);
|
90
63
|
|
91
|
-
/*
|
92
|
-
int
|
93
|
-
bufgrow(struct buf *, size_t);
|
64
|
+
/* bufprefix: compare the beginning of a buffer with a string */
|
65
|
+
int bufprefix(const struct buf *buf, const char *prefix);
|
94
66
|
|
95
|
-
/*
|
96
|
-
struct buf
|
97
|
-
bufnew(size_t)
|
98
|
-
__attribute__ ((malloc));
|
67
|
+
/* bufput: appends raw data to a buffer */
|
68
|
+
void bufput(struct buf *, const void *, size_t);
|
99
69
|
|
100
|
-
/*
|
101
|
-
void
|
102
|
-
bufnullterm(struct buf *);
|
70
|
+
/* bufputs: appends a NUL-terminated string to a buffer */
|
71
|
+
void bufputs(struct buf *, const char *);
|
103
72
|
|
104
|
-
/*
|
105
|
-
void
|
106
|
-
bufprintf(struct buf *, const char *, ...)
|
107
|
-
__attribute__ ((format (printf, 2, 3)));
|
73
|
+
/* bufputc: appends a single char to a buffer */
|
74
|
+
void bufputc(struct buf *, int);
|
108
75
|
|
109
|
-
/*
|
110
|
-
void
|
111
|
-
bufput(struct buf *, const void*, size_t);
|
76
|
+
/* bufrelease: decrease the reference count and free the buffer if needed */
|
77
|
+
void bufrelease(struct buf *);
|
112
78
|
|
113
|
-
/*
|
114
|
-
void
|
115
|
-
bufputs(struct buf *, const char*);
|
79
|
+
/* bufreset: frees internal data of the buffer */
|
80
|
+
void bufreset(struct buf *);
|
116
81
|
|
117
|
-
/*
|
118
|
-
void
|
119
|
-
bufputc(struct buf *, char);
|
82
|
+
/* bufslurp: removes a given number of bytes from the head of the array */
|
83
|
+
void bufslurp(struct buf *, size_t);
|
120
84
|
|
121
|
-
/*
|
122
|
-
void
|
123
|
-
bufrelease(struct buf *);
|
85
|
+
/* bufprintf: formatted printing to a buffer */
|
86
|
+
void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
|
124
87
|
|
125
|
-
/*
|
126
|
-
void
|
127
|
-
bufreset(struct buf *);
|
88
|
+
/* vbufprintf: stdarg variant of formatted printing into a buffer */
|
89
|
+
void vbufprintf(struct buf *, const char * , va_list);
|
128
90
|
|
129
|
-
|
130
|
-
void
|
131
|
-
bufset(struct buf **, struct buf *);
|
132
|
-
|
133
|
-
/* bufslurp • removes a given number of bytes from the head of the array */
|
134
|
-
void
|
135
|
-
bufslurp(struct buf *, size_t);
|
136
|
-
|
137
|
-
/* buftoi • converts the numbers at the beginning of the buf into an int */
|
138
|
-
int
|
139
|
-
buftoi(struct buf *, size_t, size_t *);
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
#ifdef BUFFER_STDARG
|
144
|
-
#include <stdarg.h>
|
145
|
-
|
146
|
-
/* vbufprintf • stdarg variant of formatted printing into a buffer */
|
147
|
-
void
|
148
|
-
vbufprintf(struct buf *, const char*, va_list);
|
149
|
-
|
150
|
-
#endif /* def BUFFER_STDARG */
|
151
|
-
|
152
|
-
#endif /* ndef LITHIUM_BUFFER_H */
|
153
|
-
|
154
|
-
/* vim: set filetype=c: */
|
91
|
+
#endif
|
data/ext/rinku/houdini.h
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#ifndef __HOUDINI_H__
|
2
|
+
#define __HOUDINI_H__
|
3
|
+
|
4
|
+
#include "buffer.h"
|
5
|
+
|
6
|
+
#ifdef HOUDINI_USE_LOCALE
|
7
|
+
# define _isxdigit(c) isxdigit(c)
|
8
|
+
# define _isdigit(c) isdigit(c)
|
9
|
+
#else
|
10
|
+
/*
|
11
|
+
* Helper _isdigit methods -- do not trust the current locale
|
12
|
+
* */
|
13
|
+
# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
|
14
|
+
# define _isdigit(c) ((c) >= '0' && (c) <= '9')
|
15
|
+
#endif
|
16
|
+
|
17
|
+
extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
|
18
|
+
extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
|
19
|
+
extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
|
20
|
+
extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
21
|
+
extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
|
22
|
+
extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
|
23
|
+
extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
|
24
|
+
extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
|
25
|
+
extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
|
26
|
+
extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
|
27
|
+
|
28
|
+
#endif
|
@@ -0,0 +1,108 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
|
8
|
+
|
9
|
+
/*
|
10
|
+
* The following characters will not be escaped:
|
11
|
+
*
|
12
|
+
* -_.+!*'(),%#@?=;:/,+&$ alphanum
|
13
|
+
*
|
14
|
+
* Note that this character set is the addition of:
|
15
|
+
*
|
16
|
+
* - The characters which are safe to be in an URL
|
17
|
+
* - The characters which are *not* safe to be in
|
18
|
+
* an URL because they are RESERVED characters.
|
19
|
+
*
|
20
|
+
* We asume (lazily) that any RESERVED char that
|
21
|
+
* appears inside an URL is actually meant to
|
22
|
+
* have its native function (i.e. as an URL
|
23
|
+
* component/separator) and hence needs no escaping.
|
24
|
+
*
|
25
|
+
* There are two exceptions: the chacters & (amp)
|
26
|
+
* and ' (single quote) do not appear in the table.
|
27
|
+
* They are meant to appear in the URL as components,
|
28
|
+
* yet they require special HTML-entity escaping
|
29
|
+
* to generate valid HTML markup.
|
30
|
+
*
|
31
|
+
* All other characters will be escaped to %XX.
|
32
|
+
*
|
33
|
+
*/
|
34
|
+
static const char HREF_SAFE[] = {
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
|
38
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
|
39
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
40
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
|
41
|
+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
42
|
+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
45
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
50
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
+
};
|
52
|
+
|
53
|
+
void
|
54
|
+
houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
|
55
|
+
{
|
56
|
+
static const char hex_chars[] = "0123456789ABCDEF";
|
57
|
+
size_t i = 0, org;
|
58
|
+
char hex_str[3];
|
59
|
+
|
60
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
61
|
+
hex_str[0] = '%';
|
62
|
+
|
63
|
+
while (i < size) {
|
64
|
+
org = i;
|
65
|
+
while (i < size && HREF_SAFE[src[i]] != 0)
|
66
|
+
i++;
|
67
|
+
|
68
|
+
if (i > org)
|
69
|
+
bufput(ob, src + org, i - org);
|
70
|
+
|
71
|
+
/* escaping */
|
72
|
+
if (i >= size)
|
73
|
+
break;
|
74
|
+
|
75
|
+
switch (src[i]) {
|
76
|
+
/* amp appears all the time in URLs, but needs
|
77
|
+
* HTML-entity escaping to be inside an href */
|
78
|
+
case '&':
|
79
|
+
BUFPUTSL(ob, "&");
|
80
|
+
break;
|
81
|
+
|
82
|
+
/* the single quote is a valid URL character
|
83
|
+
* according to the standard; it needs HTML
|
84
|
+
* entity escaping too */
|
85
|
+
case '\'':
|
86
|
+
BUFPUTSL(ob, "'");
|
87
|
+
break;
|
88
|
+
|
89
|
+
/* the space can be escaped to %20 or a plus
|
90
|
+
* sign. we're going with the generic escape
|
91
|
+
* for now. the plus thing is more commonly seen
|
92
|
+
* when building GET strings */
|
93
|
+
#if 0
|
94
|
+
case ' ':
|
95
|
+
bufputc(ob, '+');
|
96
|
+
break;
|
97
|
+
#endif
|
98
|
+
|
99
|
+
/* every other character goes with a %XX escaping */
|
100
|
+
default:
|
101
|
+
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
|
102
|
+
hex_str[2] = hex_chars[src[i] & 0xF];
|
103
|
+
bufput(ob, hex_str, 3);
|
104
|
+
}
|
105
|
+
|
106
|
+
i++;
|
107
|
+
}
|
108
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
#include "houdini.h"
|
6
|
+
|
7
|
+
#define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
|
8
|
+
|
9
|
+
/**
|
10
|
+
* According to the OWASP rules:
|
11
|
+
*
|
12
|
+
* & --> &
|
13
|
+
* < --> <
|
14
|
+
* > --> >
|
15
|
+
* " --> "
|
16
|
+
* ' --> ' ' is not recommended
|
17
|
+
* / --> / forward slash is included as it helps end an HTML entity
|
18
|
+
*
|
19
|
+
*/
|
20
|
+
static const char HTML_ESCAPE_TABLE[] = {
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
23
|
+
0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
|
24
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
27
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
28
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
29
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
};
|
38
|
+
|
39
|
+
static const char *HTML_ESCAPES[] = {
|
40
|
+
"",
|
41
|
+
""",
|
42
|
+
"&",
|
43
|
+
"'",
|
44
|
+
"/",
|
45
|
+
"<",
|
46
|
+
">"
|
47
|
+
};
|
48
|
+
|
49
|
+
void
|
50
|
+
houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
|
51
|
+
{
|
52
|
+
size_t i = 0, org, esc;
|
53
|
+
|
54
|
+
bufgrow(ob, ESCAPE_GROW_FACTOR(size));
|
55
|
+
|
56
|
+
while (i < size) {
|
57
|
+
org = i;
|
58
|
+
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
|
59
|
+
i++;
|
60
|
+
|
61
|
+
if (i > org)
|
62
|
+
bufput(ob, src + org, i - org);
|
63
|
+
|
64
|
+
/* escaping */
|
65
|
+
if (i >= size)
|
66
|
+
break;
|
67
|
+
|
68
|
+
/* The forward slash is only escaped in secure mode */
|
69
|
+
if (src[i] == '/' && !secure) {
|
70
|
+
bufputc(ob, '/');
|
71
|
+
} else {
|
72
|
+
bufputs(ob, HTML_ESCAPES[esc]);
|
73
|
+
}
|
74
|
+
|
75
|
+
i++;
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
void
|
80
|
+
houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
|
81
|
+
{
|
82
|
+
houdini_escape_html0(ob, src, size, 1);
|
83
|
+
}
|
84
|
+
|