rinku 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/rinku/buffer.h CHANGED
@@ -1,7 +1,6 @@
1
- /* buffer.h - automatic buffer structure */
2
-
3
1
  /*
4
2
  * Copyright (c) 2008, Natacha Porté
3
+ * Copyright (c) 2011, Vicent Martí
5
4
  *
6
5
  * Permission to use, copy, modify, and distribute this software for any
7
6
  * purpose with or without fee is hereby granted, provided that the above
@@ -16,139 +15,77 @@
16
15
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17
16
  */
18
17
 
19
- #ifndef LITHIUM_BUFFER_H
20
- #define LITHIUM_BUFFER_H
18
+ #ifndef __GEN_BUFFER_H__
19
+ #define __GEN_BUFFER_H__
21
20
 
22
21
  #include <stddef.h>
22
+ #include <stdarg.h>
23
+ #include <stdint.h>
23
24
 
24
25
  #if defined(_MSC_VER)
25
26
  #define __attribute__(x)
26
27
  #define inline
27
- #define strncasecmp _strnicmp
28
- #define snprintf _snprintf
29
- #define va_copy(d,s) ((d) = (s))
30
28
  #endif
31
29
 
32
- /********************
33
- * TYPE DEFINITIONS *
34
- ********************/
30
+ typedef enum {
31
+ BUF_OK = 0,
32
+ BUF_ENOMEM = -1,
33
+ } buferror_t;
35
34
 
36
- /* struct buf character array buffer */
35
+ /* struct buf: character array buffer */
37
36
  struct buf {
38
- char * data; /* actual character data */
39
- size_t size; /* size of the string */
40
- size_t asize; /* allocated size (0 = volatile buffer) */
41
- size_t unit; /* reallocation unit size (0 = read-only buffer) */
42
- int ref; }; /* reference count */
43
-
44
- /**********
45
- * MACROS *
46
- **********/
47
-
48
- #define STRLEN(x) (sizeof(x) - 1)
49
-
50
- /* CONST_BUF • global buffer from a string litteral */
51
- #define CONST_BUF(name, string) \
52
- static struct buf name = { string, sizeof string -1, sizeof string }
53
-
54
-
55
- /* VOLATILE_BUF • macro for creating a volatile buffer on the stack */
56
- #define VOLATILE_BUF(name, strname) \
57
- struct buf name = { strname, strlen(strname) }
58
-
59
-
60
- /* BUFPUTSL • optimized bufputs of a string litteral */
61
- #define BUFPUTSL(output, litteral) \
62
- bufput(output, litteral, sizeof litteral - 1)
63
-
64
-
37
+ uint8_t *data; /* actual character data */
38
+ size_t size; /* size of the string */
39
+ size_t asize; /* allocated size (0 = volatile buffer) */
40
+ size_t unit; /* reallocation unit size (0 = read-only buffer) */
41
+ };
65
42
 
66
- /********************
67
- * BUFFER FUNCTIONS *
68
- ********************/
43
+ /* CONST_BUF: global buffer from a string litteral */
44
+ #define BUF_STATIC(string) \
45
+ { (uint8_t *)string, sizeof string -1, sizeof string, 0, 0 }
69
46
 
70
- /* bufcasecmp case-insensitive buffer comparison */
71
- int
72
- bufcasecmp(const struct buf *, const struct buf *);
47
+ /* VOLATILE_BUF: macro for creating a volatile buffer on the stack */
48
+ #define BUF_VOLATILE(strname) \
49
+ { (uint8_t *)strname, strlen(strname), 0, 0, 0 }
73
50
 
74
- /* bufcmp case-sensitive buffer comparison */
75
- int
76
- bufcmp(const struct buf *, const struct buf *);
51
+ /* BUFPUTSL: optimized bufputs of a string litteral */
52
+ #define BUFPUTSL(output, literal) \
53
+ bufput(output, literal, sizeof literal - 1)
77
54
 
78
- /* bufcmps case-sensitive comparison of a string to a buffer */
79
- int
80
- bufcmps(const struct buf *, const char *);
55
+ /* bufgrow: increasing the allocated size to the given value */
56
+ int bufgrow(struct buf *, size_t);
81
57
 
82
- /* bufprefix * compare the beginning of a buffer with a string */
83
- int
84
- bufprefix(const struct buf *buf, const char *prefix);
58
+ /* bufnew: allocation of a new buffer */
59
+ struct buf *bufnew(size_t) __attribute__ ((malloc));
85
60
 
86
- /* bufdup buffer duplication */
87
- struct buf *
88
- bufdup(const struct buf *, size_t)
89
- __attribute__ ((malloc));
61
+ /* bufnullterm: NUL-termination of the string array (making a C-string) */
62
+ const char *bufcstr(struct buf *);
90
63
 
91
- /* bufgrow increasing the allocated size to the given value */
92
- int
93
- bufgrow(struct buf *, size_t);
64
+ /* bufprefix: compare the beginning of a buffer with a string */
65
+ int bufprefix(const struct buf *buf, const char *prefix);
94
66
 
95
- /* bufnew allocation of a new buffer */
96
- struct buf *
97
- bufnew(size_t)
98
- __attribute__ ((malloc));
67
+ /* bufput: appends raw data to a buffer */
68
+ void bufput(struct buf *, const void *, size_t);
99
69
 
100
- /* bufnullterm NUL-termination of the string array (making a C-string) */
101
- void
102
- bufnullterm(struct buf *);
70
+ /* bufputs: appends a NUL-terminated string to a buffer */
71
+ void bufputs(struct buf *, const char *);
103
72
 
104
- /* bufprintf formatted printing to a buffer */
105
- void
106
- bufprintf(struct buf *, const char *, ...)
107
- __attribute__ ((format (printf, 2, 3)));
73
+ /* bufputc: appends a single char to a buffer */
74
+ void bufputc(struct buf *, int);
108
75
 
109
- /* bufput appends raw data to a buffer */
110
- void
111
- bufput(struct buf *, const void*, size_t);
76
+ /* bufrelease: decrease the reference count and free the buffer if needed */
77
+ void bufrelease(struct buf *);
112
78
 
113
- /* bufputs appends a NUL-terminated string to a buffer */
114
- void
115
- bufputs(struct buf *, const char*);
79
+ /* bufreset: frees internal data of the buffer */
80
+ void bufreset(struct buf *);
116
81
 
117
- /* bufputc appends a single char to a buffer */
118
- void
119
- bufputc(struct buf *, char);
82
+ /* bufslurp: removes a given number of bytes from the head of the array */
83
+ void bufslurp(struct buf *, size_t);
120
84
 
121
- /* bufrelease decrease the reference count and free the buffer if needed */
122
- void
123
- bufrelease(struct buf *);
85
+ /* bufprintf: formatted printing to a buffer */
86
+ void bufprintf(struct buf *, const char *, ...) __attribute__ ((format (printf, 2, 3)));
124
87
 
125
- /* bufreset frees internal data of the buffer */
126
- void
127
- bufreset(struct buf *);
88
+ /* vbufprintf: stdarg variant of formatted printing into a buffer */
89
+ void vbufprintf(struct buf *, const char * , va_list);
128
90
 
129
- /* bufset • safely assigns a buffer to another */
130
- void
131
- bufset(struct buf **, struct buf *);
132
-
133
- /* bufslurp • removes a given number of bytes from the head of the array */
134
- void
135
- bufslurp(struct buf *, size_t);
136
-
137
- /* buftoi • converts the numbers at the beginning of the buf into an int */
138
- int
139
- buftoi(struct buf *, size_t, size_t *);
140
-
141
-
142
-
143
- #ifdef BUFFER_STDARG
144
- #include <stdarg.h>
145
-
146
- /* vbufprintf • stdarg variant of formatted printing into a buffer */
147
- void
148
- vbufprintf(struct buf *, const char*, va_list);
149
-
150
- #endif /* def BUFFER_STDARG */
151
-
152
- #endif /* ndef LITHIUM_BUFFER_H */
153
-
154
- /* vim: set filetype=c: */
91
+ #endif
@@ -0,0 +1,28 @@
1
+ #ifndef __HOUDINI_H__
2
+ #define __HOUDINI_H__
3
+
4
+ #include "buffer.h"
5
+
6
+ #ifdef HOUDINI_USE_LOCALE
7
+ # define _isxdigit(c) isxdigit(c)
8
+ # define _isdigit(c) isdigit(c)
9
+ #else
10
+ /*
11
+ * Helper _isdigit methods -- do not trust the current locale
12
+ * */
13
+ # define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
14
+ # define _isdigit(c) ((c) >= '0' && (c) <= '9')
15
+ #endif
16
+
17
+ extern void houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size);
18
+ extern void houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure);
19
+ extern void houdini_unescape_html(struct buf *ob, const uint8_t *src, size_t size);
20
+ extern void houdini_escape_uri(struct buf *ob, const uint8_t *src, size_t size);
21
+ extern void houdini_escape_url(struct buf *ob, const uint8_t *src, size_t size);
22
+ extern void houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size);
23
+ extern void houdini_unescape_uri(struct buf *ob, const uint8_t *src, size_t size);
24
+ extern void houdini_unescape_url(struct buf *ob, const uint8_t *src, size_t size);
25
+ extern void houdini_escape_js(struct buf *ob, const uint8_t *src, size_t size);
26
+ extern void houdini_unescape_js(struct buf *ob, const uint8_t *src, size_t size);
27
+
28
+ #endif
@@ -0,0 +1,108 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10)
8
+
9
+ /*
10
+ * The following characters will not be escaped:
11
+ *
12
+ * -_.+!*'(),%#@?=;:/,+&$ alphanum
13
+ *
14
+ * Note that this character set is the addition of:
15
+ *
16
+ * - The characters which are safe to be in an URL
17
+ * - The characters which are *not* safe to be in
18
+ * an URL because they are RESERVED characters.
19
+ *
20
+ * We asume (lazily) that any RESERVED char that
21
+ * appears inside an URL is actually meant to
22
+ * have its native function (i.e. as an URL
23
+ * component/separator) and hence needs no escaping.
24
+ *
25
+ * There are two exceptions: the chacters & (amp)
26
+ * and ' (single quote) do not appear in the table.
27
+ * They are meant to appear in the URL as components,
28
+ * yet they require special HTML-entity escaping
29
+ * to generate valid HTML markup.
30
+ *
31
+ * All other characters will be escaped to %XX.
32
+ *
33
+ */
34
+ static const char HREF_SAFE[] = {
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
39
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
41
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51
+ };
52
+
53
+ void
54
+ houdini_escape_href(struct buf *ob, const uint8_t *src, size_t size)
55
+ {
56
+ static const char hex_chars[] = "0123456789ABCDEF";
57
+ size_t i = 0, org;
58
+ char hex_str[3];
59
+
60
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
61
+ hex_str[0] = '%';
62
+
63
+ while (i < size) {
64
+ org = i;
65
+ while (i < size && HREF_SAFE[src[i]] != 0)
66
+ i++;
67
+
68
+ if (i > org)
69
+ bufput(ob, src + org, i - org);
70
+
71
+ /* escaping */
72
+ if (i >= size)
73
+ break;
74
+
75
+ switch (src[i]) {
76
+ /* amp appears all the time in URLs, but needs
77
+ * HTML-entity escaping to be inside an href */
78
+ case '&':
79
+ BUFPUTSL(ob, "&amp;");
80
+ break;
81
+
82
+ /* the single quote is a valid URL character
83
+ * according to the standard; it needs HTML
84
+ * entity escaping too */
85
+ case '\'':
86
+ BUFPUTSL(ob, "&#x27;");
87
+ break;
88
+
89
+ /* the space can be escaped to %20 or a plus
90
+ * sign. we're going with the generic escape
91
+ * for now. the plus thing is more commonly seen
92
+ * when building GET strings */
93
+ #if 0
94
+ case ' ':
95
+ bufputc(ob, '+');
96
+ break;
97
+ #endif
98
+
99
+ /* every other character goes with a %XX escaping */
100
+ default:
101
+ hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
102
+ hex_str[2] = hex_chars[src[i] & 0xF];
103
+ bufput(ob, hex_str, 3);
104
+ }
105
+
106
+ i++;
107
+ }
108
+ }
@@ -0,0 +1,84 @@
1
+ #include <assert.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ #include "houdini.h"
6
+
7
+ #define ESCAPE_GROW_FACTOR(x) (((x) * 12) / 10) /* this is very scientific, yes */
8
+
9
+ /**
10
+ * According to the OWASP rules:
11
+ *
12
+ * & --> &amp;
13
+ * < --> &lt;
14
+ * > --> &gt;
15
+ * " --> &quot;
16
+ * ' --> &#x27; &apos; is not recommended
17
+ * / --> &#x2F; forward slash is included as it helps end an HTML entity
18
+ *
19
+ */
20
+ static const char HTML_ESCAPE_TABLE[] = {
21
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
+ 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
24
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
25
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37
+ };
38
+
39
+ static const char *HTML_ESCAPES[] = {
40
+ "",
41
+ "&quot;",
42
+ "&amp;",
43
+ "&#39;",
44
+ "&#47;",
45
+ "&lt;",
46
+ "&gt;"
47
+ };
48
+
49
+ void
50
+ houdini_escape_html0(struct buf *ob, const uint8_t *src, size_t size, int secure)
51
+ {
52
+ size_t i = 0, org, esc;
53
+
54
+ bufgrow(ob, ESCAPE_GROW_FACTOR(size));
55
+
56
+ while (i < size) {
57
+ org = i;
58
+ while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
59
+ i++;
60
+
61
+ if (i > org)
62
+ bufput(ob, src + org, i - org);
63
+
64
+ /* escaping */
65
+ if (i >= size)
66
+ break;
67
+
68
+ /* The forward slash is only escaped in secure mode */
69
+ if (src[i] == '/' && !secure) {
70
+ bufputc(ob, '/');
71
+ } else {
72
+ bufputs(ob, HTML_ESCAPES[esc]);
73
+ }
74
+
75
+ i++;
76
+ }
77
+ }
78
+
79
+ void
80
+ houdini_escape_html(struct buf *ob, const uint8_t *src, size_t size)
81
+ {
82
+ houdini_escape_html0(ob, src, size, 1);
83
+ }
84
+