immunio 0.15.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +234 -0
- data/README.md +147 -0
- data/bin/immunio +5 -0
- data/lib/immunio.rb +29 -0
- data/lib/immunio/agent.rb +260 -0
- data/lib/immunio/authentication.rb +96 -0
- data/lib/immunio/blocked_app.rb +38 -0
- data/lib/immunio/channel.rb +432 -0
- data/lib/immunio/cli.rb +39 -0
- data/lib/immunio/context.rb +114 -0
- data/lib/immunio/errors.rb +43 -0
- data/lib/immunio/immunio_ca.crt +45 -0
- data/lib/immunio/logger.rb +87 -0
- data/lib/immunio/plugins/action_dispatch.rb +45 -0
- data/lib/immunio/plugins/action_view.rb +431 -0
- data/lib/immunio/plugins/active_record.rb +707 -0
- data/lib/immunio/plugins/active_record_relation.rb +370 -0
- data/lib/immunio/plugins/authlogic.rb +80 -0
- data/lib/immunio/plugins/csrf.rb +24 -0
- data/lib/immunio/plugins/devise.rb +40 -0
- data/lib/immunio/plugins/environment_reporter.rb +69 -0
- data/lib/immunio/plugins/eval.rb +51 -0
- data/lib/immunio/plugins/exception_handler.rb +55 -0
- data/lib/immunio/plugins/gems_tracker.rb +5 -0
- data/lib/immunio/plugins/haml.rb +36 -0
- data/lib/immunio/plugins/http_finisher.rb +50 -0
- data/lib/immunio/plugins/http_tracker.rb +203 -0
- data/lib/immunio/plugins/io.rb +96 -0
- data/lib/immunio/plugins/redirect.rb +42 -0
- data/lib/immunio/plugins/warden.rb +66 -0
- data/lib/immunio/processor.rb +234 -0
- data/lib/immunio/rails.rb +26 -0
- data/lib/immunio/request.rb +139 -0
- data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
- data/lib/immunio/rufus_lua_ext/state.rb +157 -0
- data/lib/immunio/rufus_lua_ext/table.rb +137 -0
- data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
- data/lib/immunio/version.rb +5 -0
- data/lib/immunio/vm.rb +291 -0
- data/lua-hooks/ext/all.c +78 -0
- data/lua-hooks/ext/bitop/README +22 -0
- data/lua-hooks/ext/bitop/bit.c +189 -0
- data/lua-hooks/ext/extconf.rb +38 -0
- data/lua-hooks/ext/libinjection/COPYING +37 -0
- data/lua-hooks/ext/libinjection/libinjection.h +65 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
- data/lua-hooks/ext/libinjection/lualib.c +109 -0
- data/lua-hooks/ext/lpeg/HISTORY +90 -0
- data/lua-hooks/ext/lpeg/lpcap.c +537 -0
- data/lua-hooks/ext/lpeg/lpcap.h +43 -0
- data/lua-hooks/ext/lpeg/lpcode.c +986 -0
- data/lua-hooks/ext/lpeg/lpcode.h +34 -0
- data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
- data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
- data/lua-hooks/ext/lpeg/lpprint.c +244 -0
- data/lua-hooks/ext/lpeg/lpprint.h +35 -0
- data/lua-hooks/ext/lpeg/lptree.c +1238 -0
- data/lua-hooks/ext/lpeg/lptree.h +77 -0
- data/lua-hooks/ext/lpeg/lptypes.h +149 -0
- data/lua-hooks/ext/lpeg/lpvm.c +355 -0
- data/lua-hooks/ext/lpeg/lpvm.h +58 -0
- data/lua-hooks/ext/lpeg/makefile +55 -0
- data/lua-hooks/ext/lpeg/re.html +498 -0
- data/lua-hooks/ext/lpeg/test.lua +1409 -0
- data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
- data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
- data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
- data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
- data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
- data/lua-hooks/ext/lua-snapshot/README.md +18 -0
- data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
- data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
- data/lua-hooks/ext/lua/COPYRIGHT +34 -0
- data/lua-hooks/ext/lua/lapi.c +1087 -0
- data/lua-hooks/ext/lua/lapi.h +16 -0
- data/lua-hooks/ext/lua/lauxlib.c +652 -0
- data/lua-hooks/ext/lua/lauxlib.h +174 -0
- data/lua-hooks/ext/lua/lbaselib.c +659 -0
- data/lua-hooks/ext/lua/lcode.c +831 -0
- data/lua-hooks/ext/lua/lcode.h +76 -0
- data/lua-hooks/ext/lua/ldblib.c +398 -0
- data/lua-hooks/ext/lua/ldebug.c +638 -0
- data/lua-hooks/ext/lua/ldebug.h +33 -0
- data/lua-hooks/ext/lua/ldo.c +519 -0
- data/lua-hooks/ext/lua/ldo.h +57 -0
- data/lua-hooks/ext/lua/ldump.c +164 -0
- data/lua-hooks/ext/lua/lfunc.c +174 -0
- data/lua-hooks/ext/lua/lfunc.h +34 -0
- data/lua-hooks/ext/lua/lgc.c +710 -0
- data/lua-hooks/ext/lua/lgc.h +110 -0
- data/lua-hooks/ext/lua/linit.c +38 -0
- data/lua-hooks/ext/lua/liolib.c +556 -0
- data/lua-hooks/ext/lua/llex.c +463 -0
- data/lua-hooks/ext/lua/llex.h +81 -0
- data/lua-hooks/ext/lua/llimits.h +128 -0
- data/lua-hooks/ext/lua/lmathlib.c +263 -0
- data/lua-hooks/ext/lua/lmem.c +86 -0
- data/lua-hooks/ext/lua/lmem.h +49 -0
- data/lua-hooks/ext/lua/loadlib.c +705 -0
- data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
- data/lua-hooks/ext/lua/lobject.c +214 -0
- data/lua-hooks/ext/lua/lobject.h +381 -0
- data/lua-hooks/ext/lua/lopcodes.c +102 -0
- data/lua-hooks/ext/lua/lopcodes.h +268 -0
- data/lua-hooks/ext/lua/loslib.c +243 -0
- data/lua-hooks/ext/lua/lparser.c +1339 -0
- data/lua-hooks/ext/lua/lparser.h +82 -0
- data/lua-hooks/ext/lua/lstate.c +214 -0
- data/lua-hooks/ext/lua/lstate.h +169 -0
- data/lua-hooks/ext/lua/lstring.c +111 -0
- data/lua-hooks/ext/lua/lstring.h +31 -0
- data/lua-hooks/ext/lua/lstrlib.c +871 -0
- data/lua-hooks/ext/lua/ltable.c +588 -0
- data/lua-hooks/ext/lua/ltable.h +40 -0
- data/lua-hooks/ext/lua/ltablib.c +287 -0
- data/lua-hooks/ext/lua/ltm.c +75 -0
- data/lua-hooks/ext/lua/ltm.h +54 -0
- data/lua-hooks/ext/lua/lua.c +392 -0
- data/lua-hooks/ext/lua/lua.def +131 -0
- data/lua-hooks/ext/lua/lua.h +388 -0
- data/lua-hooks/ext/lua/lua.rc +28 -0
- data/lua-hooks/ext/lua/lua_dll.rc +26 -0
- data/lua-hooks/ext/lua/luac.c +200 -0
- data/lua-hooks/ext/lua/luac.rc +1 -0
- data/lua-hooks/ext/lua/luaconf.h +763 -0
- data/lua-hooks/ext/lua/luaconf.h.in +724 -0
- data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
- data/lua-hooks/ext/lua/lualib.h +53 -0
- data/lua-hooks/ext/lua/lundump.c +227 -0
- data/lua-hooks/ext/lua/lundump.h +36 -0
- data/lua-hooks/ext/lua/lvm.c +767 -0
- data/lua-hooks/ext/lua/lvm.h +36 -0
- data/lua-hooks/ext/lua/lzio.c +82 -0
- data/lua-hooks/ext/lua/lzio.h +67 -0
- data/lua-hooks/ext/lua/print.c +227 -0
- data/lua-hooks/ext/luautf8/README.md +152 -0
- data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
- data/lua-hooks/ext/luautf8/unidata.h +3064 -0
- data/lua-hooks/lib/boot.lua +254 -0
- data/lua-hooks/lib/encode.lua +4 -0
- data/lua-hooks/lib/lexers/LICENSE +21 -0
- data/lua-hooks/lib/lexers/bash.lua +134 -0
- data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
- data/lua-hooks/lib/lexers/css.lua +216 -0
- data/lua-hooks/lib/lexers/html.lua +106 -0
- data/lua-hooks/lib/lexers/javascript.lua +68 -0
- data/lua-hooks/lib/lexers/lexer.lua +1575 -0
- data/lua-hooks/lib/lexers/markers.lua +33 -0
- metadata +308 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
#ifndef LIBINJECTION_HTML5
|
2
|
+
#define LIBINJECTION_HTML5
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
/* pull in size_t */
|
9
|
+
|
10
|
+
#include <stddef.h>
|
11
|
+
|
12
|
+
enum html5_type {
|
13
|
+
DATA_TEXT
|
14
|
+
, TAG_NAME_OPEN
|
15
|
+
, TAG_NAME_CLOSE
|
16
|
+
, TAG_NAME_SELFCLOSE
|
17
|
+
, TAG_DATA
|
18
|
+
, TAG_CLOSE
|
19
|
+
, ATTR_NAME
|
20
|
+
, ATTR_VALUE
|
21
|
+
, TAG_COMMENT
|
22
|
+
, DOCTYPE
|
23
|
+
};
|
24
|
+
|
25
|
+
enum html5_flags {
|
26
|
+
DATA_STATE
|
27
|
+
, VALUE_NO_QUOTE
|
28
|
+
, VALUE_SINGLE_QUOTE
|
29
|
+
, VALUE_DOUBLE_QUOTE
|
30
|
+
, VALUE_BACK_QUOTE
|
31
|
+
};
|
32
|
+
|
33
|
+
struct h5_state;
|
34
|
+
typedef int (*ptr_html5_state)(struct h5_state*);
|
35
|
+
|
36
|
+
typedef struct h5_state {
|
37
|
+
const char* s;
|
38
|
+
size_t len;
|
39
|
+
size_t pos;
|
40
|
+
int is_close;
|
41
|
+
ptr_html5_state state;
|
42
|
+
const char* token_start;
|
43
|
+
size_t token_len;
|
44
|
+
enum html5_type token_type;
|
45
|
+
} h5_state_t;
|
46
|
+
|
47
|
+
|
48
|
+
void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags);
|
49
|
+
int libinjection_h5_next(h5_state_t* hs);
|
50
|
+
|
51
|
+
#ifdef __cplusplus
|
52
|
+
}
|
53
|
+
#endif
|
54
|
+
#endif
|
@@ -0,0 +1,2301 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright 2012,2013 Nick Galbreath
|
3
|
+
* nickg@client9.com
|
4
|
+
* BSD License -- see COPYING.txt for details
|
5
|
+
*
|
6
|
+
* https://libinjection.client9.com/
|
7
|
+
*
|
8
|
+
*/
|
9
|
+
|
10
|
+
#include <string.h>
|
11
|
+
#include <stdlib.h>
|
12
|
+
#include <stdio.h>
|
13
|
+
#include <ctype.h>
|
14
|
+
#include <assert.h>
|
15
|
+
#include <stddef.h>
|
16
|
+
|
17
|
+
#include "libinjection.h"
|
18
|
+
#include "libinjection_sqli.h"
|
19
|
+
#include "libinjection_sqli_data.h"
|
20
|
+
|
21
|
+
#define LIBINJECTION_VERSION "3.9.1"
|
22
|
+
|
23
|
+
#define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
|
24
|
+
#define LIBINJECTION_SQLI_MAX_TOKENS 5
|
25
|
+
|
26
|
+
#ifndef TRUE
|
27
|
+
#define TRUE 1
|
28
|
+
#endif
|
29
|
+
#ifndef FALSE
|
30
|
+
#define FALSE 0
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#define CHAR_NULL '\0'
|
34
|
+
#define CHAR_SINGLE '\''
|
35
|
+
#define CHAR_DOUBLE '"'
|
36
|
+
#define CHAR_TICK '`'
|
37
|
+
|
38
|
+
/* faster than calling out to libc isdigit */
|
39
|
+
#ifdef ISDIGIT
|
40
|
+
#undef ISDIGIT
|
41
|
+
#endif
|
42
|
+
#define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
|
43
|
+
|
44
|
+
#if 0
|
45
|
+
#define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
|
46
|
+
#else
|
47
|
+
#define FOLD_DEBUG
|
48
|
+
#endif
|
49
|
+
|
50
|
+
/*
|
51
|
+
* not making public just yet
|
52
|
+
*/
|
53
|
+
typedef enum {
|
54
|
+
TYPE_NONE = 0
|
55
|
+
, TYPE_KEYWORD = (int)'k'
|
56
|
+
, TYPE_UNION = (int)'U'
|
57
|
+
, TYPE_GROUP = (int)'B'
|
58
|
+
, TYPE_EXPRESSION = (int)'E'
|
59
|
+
, TYPE_SQLTYPE = (int)'t'
|
60
|
+
, TYPE_FUNCTION = (int)'f'
|
61
|
+
, TYPE_BAREWORD = (int)'n'
|
62
|
+
, TYPE_NUMBER = (int)'1'
|
63
|
+
, TYPE_VARIABLE = (int)'v'
|
64
|
+
, TYPE_STRING = (int)'s'
|
65
|
+
, TYPE_OPERATOR = (int)'o'
|
66
|
+
, TYPE_LOGIC_OPERATOR = (int)'&'
|
67
|
+
, TYPE_COMMENT = (int)'c'
|
68
|
+
, TYPE_COLLATE = (int)'A'
|
69
|
+
, TYPE_LEFTPARENS = (int)'('
|
70
|
+
, TYPE_RIGHTPARENS = (int)')' /* not used? */
|
71
|
+
, TYPE_LEFTBRACE = (int)'{'
|
72
|
+
, TYPE_RIGHTBRACE = (int)'}'
|
73
|
+
, TYPE_DOT = (int)'.'
|
74
|
+
, TYPE_COMMA = (int)','
|
75
|
+
, TYPE_COLON = (int)':'
|
76
|
+
, TYPE_SEMICOLON = (int)';'
|
77
|
+
, TYPE_TSQL = (int)'T' /* TSQL start */
|
78
|
+
, TYPE_UNKNOWN = (int)'?'
|
79
|
+
, TYPE_EVIL = (int)'X' /* unparsable, abort */
|
80
|
+
, TYPE_FINGERPRINT = (int)'F' /* not really a token */
|
81
|
+
, TYPE_BACKSLASH = (int)'\\'
|
82
|
+
} sqli_token_types;
|
83
|
+
|
84
|
+
/**
|
85
|
+
* Initializes parsing state
|
86
|
+
*
|
87
|
+
*/
|
88
|
+
static char flag2delim(int flag)
|
89
|
+
{
|
90
|
+
if (flag & FLAG_QUOTE_SINGLE) {
|
91
|
+
return CHAR_SINGLE;
|
92
|
+
} else if (flag & FLAG_QUOTE_DOUBLE) {
|
93
|
+
return CHAR_DOUBLE;
|
94
|
+
} else {
|
95
|
+
return CHAR_NULL;
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
/* memchr2 finds a string of 2 characters inside another string
|
100
|
+
* This a specialized version of "memmem" or "memchr".
|
101
|
+
* 'memmem' doesn't exist on all platforms
|
102
|
+
*
|
103
|
+
* Porting notes: this is just a special version of
|
104
|
+
* astring.find("AB")
|
105
|
+
*
|
106
|
+
*/
|
107
|
+
static const char *
|
108
|
+
memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
|
109
|
+
{
|
110
|
+
const char *cur = haystack;
|
111
|
+
const char *last = haystack + haystack_len - 1;
|
112
|
+
|
113
|
+
if (haystack_len < 2) {
|
114
|
+
return NULL;
|
115
|
+
}
|
116
|
+
|
117
|
+
while (cur < last) {
|
118
|
+
/* safe since cur < len - 1 always */
|
119
|
+
if (cur[0] == c0 && cur[1] == c1) {
|
120
|
+
return cur;
|
121
|
+
}
|
122
|
+
cur += 1;
|
123
|
+
}
|
124
|
+
|
125
|
+
return NULL;
|
126
|
+
}
|
127
|
+
|
128
|
+
/**
|
129
|
+
* memmem might not exist on some systems
|
130
|
+
*/
|
131
|
+
static const char *
|
132
|
+
my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
|
133
|
+
{
|
134
|
+
const char* cur;
|
135
|
+
const char* last;
|
136
|
+
assert(haystack);
|
137
|
+
assert(needle);
|
138
|
+
assert(nlen > 1);
|
139
|
+
last = haystack + hlen - nlen;
|
140
|
+
for (cur = haystack; cur <= last; ++cur) {
|
141
|
+
if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
|
142
|
+
return cur;
|
143
|
+
}
|
144
|
+
}
|
145
|
+
return NULL;
|
146
|
+
}
|
147
|
+
|
148
|
+
/** Find largest string containing certain characters.
|
149
|
+
*
|
150
|
+
* C Standard library 'strspn' only works for 'c-strings' (null terminated)
|
151
|
+
* This works on arbitrary length.
|
152
|
+
*
|
153
|
+
* Performance notes:
|
154
|
+
* not critical
|
155
|
+
*
|
156
|
+
* Porting notes:
|
157
|
+
* if accept is 'ABC', then this function would be similar to
|
158
|
+
* a_regexp.match(a_str, '[ABC]*'),
|
159
|
+
*/
|
160
|
+
static size_t
|
161
|
+
strlenspn(const char *s, size_t len, const char *accept)
|
162
|
+
{
|
163
|
+
size_t i;
|
164
|
+
for (i = 0; i < len; ++i) {
|
165
|
+
/* likely we can do better by inlining this function
|
166
|
+
* but this works for now
|
167
|
+
*/
|
168
|
+
if (strchr(accept, s[i]) == NULL) {
|
169
|
+
return i;
|
170
|
+
}
|
171
|
+
}
|
172
|
+
return len;
|
173
|
+
}
|
174
|
+
|
175
|
+
static size_t
|
176
|
+
strlencspn(const char *s, size_t len, const char *accept)
|
177
|
+
{
|
178
|
+
size_t i;
|
179
|
+
for (i = 0; i < len; ++i) {
|
180
|
+
/* likely we can do better by inlining this function
|
181
|
+
* but this works for now
|
182
|
+
*/
|
183
|
+
if (strchr(accept, s[i]) != NULL) {
|
184
|
+
return i;
|
185
|
+
}
|
186
|
+
}
|
187
|
+
return len;
|
188
|
+
}
|
189
|
+
static int char_is_white(char ch) {
|
190
|
+
/* ' ' space is 0x32
|
191
|
+
'\t 0x09 \011 horizontal tab
|
192
|
+
'\n' 0x0a \012 new line
|
193
|
+
'\v' 0x0b \013 verical tab
|
194
|
+
'\f' 0x0c \014 new page
|
195
|
+
'\r' 0x0d \015 carriage return
|
196
|
+
0x00 \000 null (oracle)
|
197
|
+
0xa0 \240 is latin1
|
198
|
+
*/
|
199
|
+
return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
|
200
|
+
}
|
201
|
+
|
202
|
+
/* DANGER DANGER
|
203
|
+
* This is -very specialized function-
|
204
|
+
*
|
205
|
+
* this compares a ALL_UPPER CASE C STRING
|
206
|
+
* with a *arbitrary memory* + length
|
207
|
+
*
|
208
|
+
* Sane people would just make a copy, up-case
|
209
|
+
* and use a hash table.
|
210
|
+
*
|
211
|
+
* Required since libc version uses the current locale
|
212
|
+
* and is much slower.
|
213
|
+
*/
|
214
|
+
static int cstrcasecmp(const char *a, const char *b, size_t n)
|
215
|
+
{
|
216
|
+
char cb;
|
217
|
+
|
218
|
+
for (; n > 0; a++, b++, n--) {
|
219
|
+
cb = *b;
|
220
|
+
if (cb >= 'a' && cb <= 'z') {
|
221
|
+
cb -= 0x20;
|
222
|
+
}
|
223
|
+
if (*a != cb) {
|
224
|
+
return *a - cb;
|
225
|
+
} else if (*a == '\0') {
|
226
|
+
return -1;
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
return (*a == 0) ? 0 : 1;
|
231
|
+
}
|
232
|
+
|
233
|
+
/**
|
234
|
+
* Case sensitive string compare.
|
235
|
+
* Here only to make code more readable
|
236
|
+
*/
|
237
|
+
static int streq(const char *a, const char *b)
|
238
|
+
{
|
239
|
+
return strcmp(a, b) == 0;
|
240
|
+
}
|
241
|
+
|
242
|
+
/**
|
243
|
+
*
|
244
|
+
*
|
245
|
+
*
|
246
|
+
* Porting Notes:
|
247
|
+
* given a mapping/hash of string to char
|
248
|
+
* this is just
|
249
|
+
* typecode = mapping[key.upper()]
|
250
|
+
*/
|
251
|
+
|
252
|
+
static char bsearch_keyword_type(const char *key, size_t len,
|
253
|
+
const keyword_t * keywords, size_t numb)
|
254
|
+
{
|
255
|
+
size_t pos;
|
256
|
+
size_t left = 0;
|
257
|
+
size_t right = numb - 1;
|
258
|
+
|
259
|
+
while (left < right) {
|
260
|
+
pos = (left + right) >> 1;
|
261
|
+
|
262
|
+
/* arg0 = upper case only, arg1 = mixed case */
|
263
|
+
if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
|
264
|
+
left = pos + 1;
|
265
|
+
} else {
|
266
|
+
right = pos;
|
267
|
+
}
|
268
|
+
}
|
269
|
+
if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
|
270
|
+
return keywords[left].type;
|
271
|
+
} else {
|
272
|
+
return CHAR_NULL;
|
273
|
+
}
|
274
|
+
}
|
275
|
+
|
276
|
+
static char is_keyword(const char* key, size_t len)
|
277
|
+
{
|
278
|
+
return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
|
279
|
+
}
|
280
|
+
|
281
|
+
/* st_token methods
|
282
|
+
*
|
283
|
+
* The following functions manipulates the stoken_t type
|
284
|
+
*
|
285
|
+
*
|
286
|
+
*/
|
287
|
+
|
288
|
+
static void st_clear(stoken_t * st)
|
289
|
+
{
|
290
|
+
memset(st, 0, sizeof(stoken_t));
|
291
|
+
}
|
292
|
+
|
293
|
+
static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
|
294
|
+
const char value)
|
295
|
+
{
|
296
|
+
/* done to elimiate unused warning */
|
297
|
+
(void)len;
|
298
|
+
st->type = (char) stype;
|
299
|
+
st->pos = pos;
|
300
|
+
st->len = 1;
|
301
|
+
st->val[0] = value;
|
302
|
+
st->val[1] = CHAR_NULL;
|
303
|
+
}
|
304
|
+
|
305
|
+
static void st_assign(stoken_t * st, const char stype,
|
306
|
+
size_t pos, size_t len, const char* value)
|
307
|
+
{
|
308
|
+
const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
|
309
|
+
size_t last = len < MSIZE ? len : (MSIZE - 1);
|
310
|
+
st->type = (char) stype;
|
311
|
+
st->pos = pos;
|
312
|
+
st->len = last;
|
313
|
+
memcpy(st->val, value, last);
|
314
|
+
st->val[last] = CHAR_NULL;
|
315
|
+
}
|
316
|
+
|
317
|
+
static void st_copy(stoken_t * dest, const stoken_t * src)
|
318
|
+
{
|
319
|
+
memcpy(dest, src, sizeof(stoken_t));
|
320
|
+
}
|
321
|
+
|
322
|
+
static int st_is_arithmetic_op(const stoken_t* st)
|
323
|
+
{
|
324
|
+
const char ch = st->val[0];
|
325
|
+
return (st->type == TYPE_OPERATOR && st->len == 1 &&
|
326
|
+
(ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
|
327
|
+
}
|
328
|
+
|
329
|
+
static int st_is_unary_op(const stoken_t * st)
|
330
|
+
{
|
331
|
+
const char* str = st->val;
|
332
|
+
const size_t len = st->len;
|
333
|
+
|
334
|
+
if (st->type != TYPE_OPERATOR) {
|
335
|
+
return FALSE;
|
336
|
+
}
|
337
|
+
|
338
|
+
switch (len) {
|
339
|
+
case 1:
|
340
|
+
return *str == '+' || *str == '-' || *str == '!' || *str == '~';
|
341
|
+
case 2:
|
342
|
+
return str[0] == '!' && str[1] == '!';
|
343
|
+
case 3:
|
344
|
+
return cstrcasecmp("NOT", str, 3) == 0;
|
345
|
+
default:
|
346
|
+
return FALSE;
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
/* Parsers
|
351
|
+
*
|
352
|
+
*
|
353
|
+
*/
|
354
|
+
|
355
|
+
static size_t parse_white(struct libinjection_sqli_state * sf)
|
356
|
+
{
|
357
|
+
return sf->pos + 1;
|
358
|
+
}
|
359
|
+
|
360
|
+
static size_t parse_operator1(struct libinjection_sqli_state * sf)
|
361
|
+
{
|
362
|
+
const char *cs = sf->s;
|
363
|
+
size_t pos = sf->pos;
|
364
|
+
|
365
|
+
st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
|
366
|
+
return pos + 1;
|
367
|
+
}
|
368
|
+
|
369
|
+
static size_t parse_other(struct libinjection_sqli_state * sf)
|
370
|
+
{
|
371
|
+
const char *cs = sf->s;
|
372
|
+
size_t pos = sf->pos;
|
373
|
+
|
374
|
+
st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
|
375
|
+
return pos + 1;
|
376
|
+
}
|
377
|
+
|
378
|
+
static size_t parse_char(struct libinjection_sqli_state * sf)
|
379
|
+
{
|
380
|
+
const char *cs = sf->s;
|
381
|
+
size_t pos = sf->pos;
|
382
|
+
|
383
|
+
st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
|
384
|
+
return pos + 1;
|
385
|
+
}
|
386
|
+
|
387
|
+
static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
|
388
|
+
{
|
389
|
+
const char *cs = sf->s;
|
390
|
+
const size_t slen = sf->slen;
|
391
|
+
size_t pos = sf->pos;
|
392
|
+
|
393
|
+
const char *endpos =
|
394
|
+
(const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
|
395
|
+
if (endpos == NULL) {
|
396
|
+
st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
|
397
|
+
return slen;
|
398
|
+
} else {
|
399
|
+
st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
|
400
|
+
return (size_t)((endpos - cs) + 1);
|
401
|
+
}
|
402
|
+
}
|
403
|
+
|
404
|
+
/** In Ansi mode, hash is an operator
|
405
|
+
* In MYSQL mode, it's a EOL comment like '--'
|
406
|
+
*/
|
407
|
+
static size_t parse_hash(struct libinjection_sqli_state * sf)
|
408
|
+
{
|
409
|
+
sf->stats_comment_hash += 1;
|
410
|
+
if (sf->flags & FLAG_SQL_MYSQL) {
|
411
|
+
sf->stats_comment_hash += 1;
|
412
|
+
return parse_eol_comment(sf);
|
413
|
+
} else {
|
414
|
+
st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
|
415
|
+
return sf->pos + 1;
|
416
|
+
}
|
417
|
+
}
|
418
|
+
|
419
|
+
static size_t parse_dash(struct libinjection_sqli_state * sf)
|
420
|
+
{
|
421
|
+
const char *cs = sf->s;
|
422
|
+
const size_t slen = sf->slen;
|
423
|
+
size_t pos = sf->pos;
|
424
|
+
|
425
|
+
/*
|
426
|
+
* five cases
|
427
|
+
* 1) --[white] this is always a SQL comment
|
428
|
+
* 2) --[EOF] this is a comment
|
429
|
+
* 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
|
430
|
+
* 4) --[notwhite] everyone else thinks this is a comment
|
431
|
+
* 5) -[not dash] '-' is a unary operator
|
432
|
+
*/
|
433
|
+
|
434
|
+
if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
|
435
|
+
return parse_eol_comment(sf);
|
436
|
+
} else if (pos +2 == slen && cs[pos + 1] == '-') {
|
437
|
+
return parse_eol_comment(sf);
|
438
|
+
} else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
|
439
|
+
/* --[not-white] not-white case:
|
440
|
+
*
|
441
|
+
*/
|
442
|
+
sf->stats_comment_ddx += 1;
|
443
|
+
return parse_eol_comment(sf);
|
444
|
+
} else {
|
445
|
+
st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
|
446
|
+
return pos + 1;
|
447
|
+
}
|
448
|
+
}
|
449
|
+
|
450
|
+
|
451
|
+
/** This detects MySQL comments, comments that
|
452
|
+
* start with /x! We just ban these now but
|
453
|
+
* previously we attempted to parse the inside
|
454
|
+
*
|
455
|
+
* For reference:
|
456
|
+
* the form of /x![anything]x/ or /x!12345[anything] x/
|
457
|
+
*
|
458
|
+
* Mysql 3 (maybe 4), allowed this:
|
459
|
+
* /x!0selectx/ 1;
|
460
|
+
* where 0 could be any number.
|
461
|
+
*
|
462
|
+
* The last version of MySQL 3 was in 2003.
|
463
|
+
|
464
|
+
* It is unclear if the MySQL 3 syntax was allowed
|
465
|
+
* in MySQL 4. The last version of MySQL 4 was in 2008
|
466
|
+
*
|
467
|
+
*/
|
468
|
+
static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
|
469
|
+
{
|
470
|
+
/* so far...
|
471
|
+
* cs[pos] == '/' && cs[pos+1] == '*'
|
472
|
+
*/
|
473
|
+
|
474
|
+
if (pos + 2 >= len) {
|
475
|
+
/* not a mysql comment */
|
476
|
+
return 0;
|
477
|
+
}
|
478
|
+
|
479
|
+
if (cs[pos + 2] != '!') {
|
480
|
+
/* not a mysql comment */
|
481
|
+
return 0;
|
482
|
+
}
|
483
|
+
|
484
|
+
/*
|
485
|
+
* this is a mysql comment
|
486
|
+
* got "/x!"
|
487
|
+
*/
|
488
|
+
return 1;
|
489
|
+
}
|
490
|
+
|
491
|
+
static size_t parse_slash(struct libinjection_sqli_state * sf)
|
492
|
+
{
|
493
|
+
const char* ptr;
|
494
|
+
size_t clen;
|
495
|
+
const char *cs = sf->s;
|
496
|
+
const size_t slen = sf->slen;
|
497
|
+
size_t pos = sf->pos;
|
498
|
+
const char* cur = cs + pos;
|
499
|
+
char ctype = TYPE_COMMENT;
|
500
|
+
size_t pos1 = pos + 1;
|
501
|
+
if (pos1 == slen || cs[pos1] != '*') {
|
502
|
+
return parse_operator1(sf);
|
503
|
+
}
|
504
|
+
|
505
|
+
/*
|
506
|
+
* skip over initial '/x'
|
507
|
+
*/
|
508
|
+
ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
|
509
|
+
|
510
|
+
/*
|
511
|
+
* (ptr == NULL) causes false positive in cppcheck 1.61
|
512
|
+
* casting to type seems to fix it
|
513
|
+
*/
|
514
|
+
if (ptr == (const char*) NULL) {
|
515
|
+
/* till end of line */
|
516
|
+
clen = slen - pos;
|
517
|
+
} else {
|
518
|
+
clen = (size_t)(ptr + 2 - cur);
|
519
|
+
}
|
520
|
+
|
521
|
+
/*
|
522
|
+
* postgresql allows nested comments which makes
|
523
|
+
* this is incompatible with parsing so
|
524
|
+
* if we find a '/x' inside the coment, then
|
525
|
+
* make a new token.
|
526
|
+
*
|
527
|
+
* Also, Mysql's "conditional" comments for version
|
528
|
+
* are an automatic black ban!
|
529
|
+
*/
|
530
|
+
|
531
|
+
if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
|
532
|
+
ctype = TYPE_EVIL;
|
533
|
+
} else if (is_mysql_comment(cs, slen, pos)) {
|
534
|
+
ctype = TYPE_EVIL;
|
535
|
+
}
|
536
|
+
|
537
|
+
st_assign(sf->current, ctype, pos, clen, cs + pos);
|
538
|
+
return pos + clen;
|
539
|
+
}
|
540
|
+
|
541
|
+
|
542
|
+
static size_t parse_backslash(struct libinjection_sqli_state * sf)
|
543
|
+
{
|
544
|
+
const char *cs = sf->s;
|
545
|
+
const size_t slen = sf->slen;
|
546
|
+
size_t pos = sf->pos;
|
547
|
+
|
548
|
+
/*
|
549
|
+
* Weird MySQL alias for NULL, "\N" (capital N only)
|
550
|
+
*/
|
551
|
+
if (pos + 1 < slen && cs[pos +1] == 'N') {
|
552
|
+
st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
|
553
|
+
return pos + 2;
|
554
|
+
} else {
|
555
|
+
st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
|
556
|
+
return pos + 1;
|
557
|
+
}
|
558
|
+
}
|
559
|
+
|
560
|
+
static size_t parse_operator2(struct libinjection_sqli_state * sf)
|
561
|
+
{
|
562
|
+
char ch;
|
563
|
+
const char *cs = sf->s;
|
564
|
+
const size_t slen = sf->slen;
|
565
|
+
size_t pos = sf->pos;
|
566
|
+
|
567
|
+
if (pos + 1 >= slen) {
|
568
|
+
return parse_operator1(sf);
|
569
|
+
}
|
570
|
+
|
571
|
+
if (pos + 2 < slen &&
|
572
|
+
cs[pos] == '<' &&
|
573
|
+
cs[pos + 1] == '=' &&
|
574
|
+
cs[pos + 2] == '>') {
|
575
|
+
/*
|
576
|
+
* special 3-char operator
|
577
|
+
*/
|
578
|
+
st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
|
579
|
+
return pos + 3;
|
580
|
+
}
|
581
|
+
|
582
|
+
ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
|
583
|
+
if (ch != CHAR_NULL) {
|
584
|
+
st_assign(sf->current, ch, pos, 2, cs+pos);
|
585
|
+
return pos + 2;
|
586
|
+
}
|
587
|
+
|
588
|
+
/*
|
589
|
+
* not an operator.. what to do with the two
|
590
|
+
* characters we got?
|
591
|
+
*/
|
592
|
+
|
593
|
+
if (cs[pos] == ':') {
|
594
|
+
/* ':' is not an operator */
|
595
|
+
st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
|
596
|
+
return pos + 1;
|
597
|
+
} else {
|
598
|
+
/*
|
599
|
+
* must be a single char operator
|
600
|
+
*/
|
601
|
+
return parse_operator1(sf);
|
602
|
+
}
|
603
|
+
}
|
604
|
+
|
605
|
+
/*
|
606
|
+
* Ok! " \" " one backslash = escaped!
|
607
|
+
* " \\" " two backslash = not escaped!
|
608
|
+
* "\\\" " three backslash = escaped!
|
609
|
+
*/
|
610
|
+
static int is_backslash_escaped(const char* end, const char* start)
|
611
|
+
{
|
612
|
+
const char* ptr;
|
613
|
+
for (ptr = end; ptr >= start; ptr--) {
|
614
|
+
if (*ptr != '\\') {
|
615
|
+
break;
|
616
|
+
}
|
617
|
+
}
|
618
|
+
/* if number of backslashes is odd, it is escaped */
|
619
|
+
|
620
|
+
return (end - ptr) & 1;
|
621
|
+
}
|
622
|
+
|
623
|
+
static size_t is_double_delim_escaped(const char* cur, const char* end)
|
624
|
+
{
|
625
|
+
return ((cur + 1) < end) && *(cur+1) == *cur;
|
626
|
+
}
|
627
|
+
|
628
|
+
/* Look forward for doubling of delimiter
|
629
|
+
*
|
630
|
+
* case 'foo''bar' --> foo''bar
|
631
|
+
*
|
632
|
+
* ending quote isn't duplicated (i.e. escaped)
|
633
|
+
* since it's the wrong char or EOL
|
634
|
+
*
|
635
|
+
*/
|
636
|
+
static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
|
637
|
+
stoken_t * st, char delim, size_t offset)
|
638
|
+
{
|
639
|
+
/*
|
640
|
+
* offset is to skip the perhaps first quote char
|
641
|
+
*/
|
642
|
+
const char *qpos =
|
643
|
+
(const char *) memchr((const void *) (cs + pos + offset), delim,
|
644
|
+
len - pos - offset);
|
645
|
+
|
646
|
+
/*
|
647
|
+
* then keep string open/close info
|
648
|
+
*/
|
649
|
+
if (offset > 0) {
|
650
|
+
/*
|
651
|
+
* this is real quote
|
652
|
+
*/
|
653
|
+
st->str_open = delim;
|
654
|
+
} else {
|
655
|
+
/*
|
656
|
+
* this was a simulated quote
|
657
|
+
*/
|
658
|
+
st->str_open = CHAR_NULL;
|
659
|
+
}
|
660
|
+
|
661
|
+
while (TRUE) {
|
662
|
+
if (qpos == NULL) {
|
663
|
+
/*
|
664
|
+
* string ended with no trailing quote
|
665
|
+
* assign what we have
|
666
|
+
*/
|
667
|
+
st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
|
668
|
+
st->str_close = CHAR_NULL;
|
669
|
+
return len;
|
670
|
+
} else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
|
671
|
+
/* keep going, move ahead one character */
|
672
|
+
qpos =
|
673
|
+
(const char *) memchr((const void *) (qpos + 1), delim,
|
674
|
+
(size_t)((cs + len) - (qpos + 1)));
|
675
|
+
continue;
|
676
|
+
} else if (is_double_delim_escaped(qpos, cs + len)) {
|
677
|
+
/* keep going, move ahead two characters */
|
678
|
+
qpos =
|
679
|
+
(const char *) memchr((const void *) (qpos + 2), delim,
|
680
|
+
(size_t)((cs + len) - (qpos + 2)));
|
681
|
+
continue;
|
682
|
+
} else {
|
683
|
+
/* hey it's a normal string */
|
684
|
+
st_assign(st, TYPE_STRING, pos + offset,
|
685
|
+
(size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
|
686
|
+
st->str_close = delim;
|
687
|
+
return (size_t)(qpos - cs + 1);
|
688
|
+
}
|
689
|
+
}
|
690
|
+
}
|
691
|
+
|
692
|
+
/**
|
693
|
+
* Used when first char is a ' or "
|
694
|
+
*/
|
695
|
+
static size_t parse_string(struct libinjection_sqli_state * sf)
|
696
|
+
{
|
697
|
+
const char *cs = sf->s;
|
698
|
+
const size_t slen = sf->slen;
|
699
|
+
size_t pos = sf->pos;
|
700
|
+
|
701
|
+
/*
|
702
|
+
* assert cs[pos] == single or double quote
|
703
|
+
*/
|
704
|
+
return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
|
705
|
+
}
|
706
|
+
|
707
|
+
/**
|
708
|
+
* Used when first char is:
|
709
|
+
* N or n: mysql "National Character set"
|
710
|
+
* E : psql "Escaped String"
|
711
|
+
*/
|
712
|
+
static size_t parse_estring(struct libinjection_sqli_state * sf)
|
713
|
+
{
|
714
|
+
const char *cs = sf->s;
|
715
|
+
const size_t slen = sf->slen;
|
716
|
+
size_t pos = sf->pos;
|
717
|
+
|
718
|
+
if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
|
719
|
+
return parse_word(sf);
|
720
|
+
}
|
721
|
+
return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
|
722
|
+
}
|
723
|
+
|
724
|
+
static size_t parse_ustring(struct libinjection_sqli_state * sf)
|
725
|
+
{
|
726
|
+
const char *cs = sf->s;
|
727
|
+
size_t slen = sf->slen;
|
728
|
+
size_t pos = sf->pos;
|
729
|
+
|
730
|
+
if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
|
731
|
+
sf->pos += 2;
|
732
|
+
pos = parse_string(sf);
|
733
|
+
sf->current->str_open = 'u';
|
734
|
+
if (sf->current->str_close == '\'') {
|
735
|
+
sf->current->str_close = 'u';
|
736
|
+
}
|
737
|
+
return pos;
|
738
|
+
} else {
|
739
|
+
return parse_word(sf);
|
740
|
+
}
|
741
|
+
}
|
742
|
+
|
743
|
+
static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
|
744
|
+
{
|
745
|
+
char ch;
|
746
|
+
const char *strend;
|
747
|
+
const char *cs = sf->s;
|
748
|
+
size_t slen = sf->slen;
|
749
|
+
size_t pos = sf->pos + offset;
|
750
|
+
|
751
|
+
/* if we are already at end of string..
|
752
|
+
if current char is not q or Q
|
753
|
+
if we don't have 2 more chars
|
754
|
+
if char2 != a single quote
|
755
|
+
then, just treat as word
|
756
|
+
*/
|
757
|
+
if (pos >= slen ||
|
758
|
+
(cs[pos] != 'q' && cs[pos] != 'Q') ||
|
759
|
+
pos + 2 >= slen ||
|
760
|
+
cs[pos + 1] != '\'') {
|
761
|
+
return parse_word(sf);
|
762
|
+
}
|
763
|
+
|
764
|
+
ch = cs[pos + 2];
|
765
|
+
|
766
|
+
/* the ch > 127 is un-needed since
|
767
|
+
* we assume char is signed
|
768
|
+
*/
|
769
|
+
if (ch < 33 /* || ch > 127 */) {
|
770
|
+
return parse_word(sf);
|
771
|
+
}
|
772
|
+
switch (ch) {
|
773
|
+
case '(' : ch = ')'; break;
|
774
|
+
case '[' : ch = ']'; break;
|
775
|
+
case '{' : ch = '}'; break;
|
776
|
+
case '<' : ch = '>'; break;
|
777
|
+
}
|
778
|
+
|
779
|
+
strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
|
780
|
+
if (strend == NULL) {
|
781
|
+
st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
|
782
|
+
sf->current->str_open = 'q';
|
783
|
+
sf->current->str_close = CHAR_NULL;
|
784
|
+
return slen;
|
785
|
+
} else {
|
786
|
+
st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
|
787
|
+
sf->current->str_open = 'q';
|
788
|
+
sf->current->str_close = 'q';
|
789
|
+
return (size_t)(strend - cs + 2);
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
/*
|
794
|
+
* Oracle's q string
|
795
|
+
*/
|
796
|
+
static size_t parse_qstring(struct libinjection_sqli_state * sf)
|
797
|
+
{
|
798
|
+
return parse_qstring_core(sf, 0);
|
799
|
+
}
|
800
|
+
|
801
|
+
/*
|
802
|
+
* mysql's N'STRING' or
|
803
|
+
* ... Oracle's nq string
|
804
|
+
*/
|
805
|
+
static size_t parse_nqstring(struct libinjection_sqli_state * sf)
|
806
|
+
{
|
807
|
+
size_t slen = sf->slen;
|
808
|
+
size_t pos = sf->pos;
|
809
|
+
if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
|
810
|
+
return parse_estring(sf);
|
811
|
+
}
|
812
|
+
return parse_qstring_core(sf, 1);
|
813
|
+
}
|
814
|
+
|
815
|
+
/*
|
816
|
+
* binary literal string
|
817
|
+
* re: [bB]'[01]*'
|
818
|
+
*/
|
819
|
+
static size_t parse_bstring(struct libinjection_sqli_state *sf)
|
820
|
+
{
|
821
|
+
size_t wlen;
|
822
|
+
const char *cs = sf->s;
|
823
|
+
size_t pos = sf->pos;
|
824
|
+
size_t slen = sf->slen;
|
825
|
+
|
826
|
+
/* need at least 2 more characters
|
827
|
+
* if next char isn't a single quote, then
|
828
|
+
* continue as normal word
|
829
|
+
*/
|
830
|
+
if (pos + 2 >= slen || cs[pos+1] != '\'') {
|
831
|
+
return parse_word(sf);
|
832
|
+
}
|
833
|
+
|
834
|
+
wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
|
835
|
+
if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
|
836
|
+
return parse_word(sf);
|
837
|
+
}
|
838
|
+
st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
|
839
|
+
return pos + 2 + wlen + 1;
|
840
|
+
}
|
841
|
+
|
842
|
+
/*
|
843
|
+
* hex literal string
|
844
|
+
* re: [xX]'[0123456789abcdefABCDEF]*'
|
845
|
+
* mysql has requirement of having EVEN number of chars,
|
846
|
+
* but pgsql does not
|
847
|
+
*/
|
848
|
+
static size_t parse_xstring(struct libinjection_sqli_state *sf)
|
849
|
+
{
|
850
|
+
size_t wlen;
|
851
|
+
const char *cs = sf->s;
|
852
|
+
size_t pos = sf->pos;
|
853
|
+
size_t slen = sf->slen;
|
854
|
+
|
855
|
+
/* need at least 2 more characters
|
856
|
+
* if next char isn't a single quote, then
|
857
|
+
* continue as normal word
|
858
|
+
*/
|
859
|
+
if (pos + 2 >= slen || cs[pos+1] != '\'') {
|
860
|
+
return parse_word(sf);
|
861
|
+
}
|
862
|
+
|
863
|
+
wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
|
864
|
+
if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
|
865
|
+
return parse_word(sf);
|
866
|
+
}
|
867
|
+
st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
|
868
|
+
return pos + 2 + wlen + 1;
|
869
|
+
}
|
870
|
+
|
871
|
+
/**
|
872
|
+
* This handles MS SQLSERVER bracket words
|
873
|
+
* http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
|
874
|
+
*
|
875
|
+
*/
|
876
|
+
static size_t parse_bword(struct libinjection_sqli_state * sf)
|
877
|
+
{
|
878
|
+
const char *cs = sf->s;
|
879
|
+
size_t pos = sf->pos;
|
880
|
+
const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
|
881
|
+
if (endptr == NULL) {
|
882
|
+
st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
|
883
|
+
return sf->slen;
|
884
|
+
} else {
|
885
|
+
st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
|
886
|
+
return (size_t)((endptr - cs) + 1);
|
887
|
+
}
|
888
|
+
}
|
889
|
+
|
890
|
+
static size_t parse_word(struct libinjection_sqli_state * sf)
|
891
|
+
{
|
892
|
+
char ch;
|
893
|
+
char delim;
|
894
|
+
size_t i;
|
895
|
+
const char *cs = sf->s;
|
896
|
+
size_t pos = sf->pos;
|
897
|
+
size_t wlen = strlencspn(cs + pos, sf->slen - pos,
|
898
|
+
" []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
|
899
|
+
|
900
|
+
st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
|
901
|
+
|
902
|
+
/* now we need to look inside what we good for "." and "`"
|
903
|
+
* and see if what is before is a keyword or not
|
904
|
+
*/
|
905
|
+
for (i =0; i < sf->current->len; ++i) {
|
906
|
+
delim = sf->current->val[i];
|
907
|
+
if (delim == '.' || delim == '`') {
|
908
|
+
ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
|
909
|
+
if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
|
910
|
+
/* needed for swig */
|
911
|
+
st_clear(sf->current);
|
912
|
+
/*
|
913
|
+
* we got something like "SELECT.1"
|
914
|
+
* or SELECT`column`
|
915
|
+
*/
|
916
|
+
st_assign(sf->current, ch, pos, i, cs + pos);
|
917
|
+
return pos + i;
|
918
|
+
}
|
919
|
+
}
|
920
|
+
}
|
921
|
+
|
922
|
+
/*
|
923
|
+
* do normal lookup with word including '.'
|
924
|
+
*/
|
925
|
+
if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
|
926
|
+
|
927
|
+
ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
|
928
|
+
if (ch == CHAR_NULL) {
|
929
|
+
ch = TYPE_BAREWORD;
|
930
|
+
}
|
931
|
+
sf->current->type = ch;
|
932
|
+
}
|
933
|
+
return pos + wlen;
|
934
|
+
}
|
935
|
+
|
936
|
+
/* MySQL backticks are a cross between string and
|
937
|
+
* and a bare word.
|
938
|
+
*
|
939
|
+
*/
|
940
|
+
static size_t parse_tick(struct libinjection_sqli_state* sf)
|
941
|
+
{
|
942
|
+
size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
|
943
|
+
|
944
|
+
/* we could check to see if start and end of
|
945
|
+
* of string are both "`", i.e. make sure we have
|
946
|
+
* matching set. `foo` vs. `foo
|
947
|
+
* but I don't think it matters much
|
948
|
+
*/
|
949
|
+
|
950
|
+
/* check value of string to see if it's a keyword,
|
951
|
+
* function, operator, etc
|
952
|
+
*/
|
953
|
+
char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
|
954
|
+
if (ch == TYPE_FUNCTION) {
|
955
|
+
/* if it's a function, then convert token */
|
956
|
+
sf->current->type = TYPE_FUNCTION;
|
957
|
+
} else {
|
958
|
+
/* otherwise it's a 'n' type -- mysql treats
|
959
|
+
* everything as a bare word
|
960
|
+
*/
|
961
|
+
sf->current->type = TYPE_BAREWORD;
|
962
|
+
}
|
963
|
+
return pos;
|
964
|
+
}
|
965
|
+
|
966
|
+
static size_t parse_var(struct libinjection_sqli_state * sf)
|
967
|
+
{
|
968
|
+
size_t xlen;
|
969
|
+
const char *cs = sf->s;
|
970
|
+
const size_t slen = sf->slen;
|
971
|
+
size_t pos = sf->pos + 1;
|
972
|
+
|
973
|
+
/*
|
974
|
+
* var_count is only used to reconstruct
|
975
|
+
* the input. It counts the number of '@'
|
976
|
+
* seen 0 in the case of NULL, 1 or 2
|
977
|
+
*/
|
978
|
+
|
979
|
+
/*
|
980
|
+
* move past optional other '@'
|
981
|
+
*/
|
982
|
+
if (pos < slen && cs[pos] == '@') {
|
983
|
+
pos += 1;
|
984
|
+
sf->current->count = 2;
|
985
|
+
} else {
|
986
|
+
sf->current->count = 1;
|
987
|
+
}
|
988
|
+
|
989
|
+
/*
|
990
|
+
* MySQL allows @@`version`
|
991
|
+
*/
|
992
|
+
if (pos < slen) {
|
993
|
+
if (cs[pos] == '`') {
|
994
|
+
sf->pos = pos;
|
995
|
+
pos = parse_tick(sf);
|
996
|
+
sf->current->type = TYPE_VARIABLE;
|
997
|
+
return pos;
|
998
|
+
} else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
|
999
|
+
sf->pos = pos;
|
1000
|
+
pos = parse_string(sf);
|
1001
|
+
sf->current->type = TYPE_VARIABLE;
|
1002
|
+
return pos;
|
1003
|
+
}
|
1004
|
+
}
|
1005
|
+
|
1006
|
+
|
1007
|
+
xlen = strlencspn(cs + pos, slen - pos,
|
1008
|
+
" <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
|
1009
|
+
if (xlen == 0) {
|
1010
|
+
st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
|
1011
|
+
return pos;
|
1012
|
+
} else {
|
1013
|
+
st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
|
1014
|
+
return pos + xlen;
|
1015
|
+
}
|
1016
|
+
}
|
1017
|
+
|
1018
|
+
static size_t parse_money(struct libinjection_sqli_state *sf)
|
1019
|
+
{
|
1020
|
+
size_t xlen;
|
1021
|
+
const char* strend;
|
1022
|
+
const char *cs = sf->s;
|
1023
|
+
const size_t slen = sf->slen;
|
1024
|
+
size_t pos = sf->pos;
|
1025
|
+
|
1026
|
+
if (pos + 1 == slen) {
|
1027
|
+
/* end of line */
|
1028
|
+
st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
|
1029
|
+
return slen;
|
1030
|
+
}
|
1031
|
+
|
1032
|
+
/*
|
1033
|
+
* $1,000.00 or $1.000,00 ok!
|
1034
|
+
* This also parses $....,,,111 but that's ok
|
1035
|
+
*/
|
1036
|
+
|
1037
|
+
xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
|
1038
|
+
if (xlen == 0) {
|
1039
|
+
if (cs[pos + 1] == '$') {
|
1040
|
+
/* we have $$ .. find ending $$ and make string */
|
1041
|
+
strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
|
1042
|
+
if (strend == NULL) {
|
1043
|
+
/* fell off edge */
|
1044
|
+
st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
|
1045
|
+
sf->current->str_open = '$';
|
1046
|
+
sf->current->str_close = CHAR_NULL;
|
1047
|
+
return slen;
|
1048
|
+
} else {
|
1049
|
+
st_assign(sf->current, TYPE_STRING, pos + 2,
|
1050
|
+
(size_t)(strend - (cs + pos + 2)), cs + pos + 2);
|
1051
|
+
sf->current->str_open = '$';
|
1052
|
+
sf->current->str_close = '$';
|
1053
|
+
return (size_t)(strend - cs + 2);
|
1054
|
+
}
|
1055
|
+
} else {
|
1056
|
+
/* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
|
1057
|
+
xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
1058
|
+
if (xlen == 0) {
|
1059
|
+
/* hmm it's "$" _something_ .. just add $ and keep going*/
|
1060
|
+
st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
|
1061
|
+
return pos + 1;
|
1062
|
+
}
|
1063
|
+
/* we have $foobar????? */
|
1064
|
+
/* is it $foobar$ */
|
1065
|
+
if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
|
1066
|
+
/* not $foobar$, or fell off edge */
|
1067
|
+
st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
|
1068
|
+
return pos + 1;
|
1069
|
+
}
|
1070
|
+
|
1071
|
+
/* we have $foobar$ ... find it again */
|
1072
|
+
strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
|
1073
|
+
|
1074
|
+
if (strend == NULL) {
|
1075
|
+
/* fell off edge */
|
1076
|
+
st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
|
1077
|
+
sf->current->str_open = '$';
|
1078
|
+
sf->current->str_close = CHAR_NULL;
|
1079
|
+
return slen;
|
1080
|
+
} else {
|
1081
|
+
/* got one */
|
1082
|
+
st_assign(sf->current, TYPE_STRING, pos+xlen+2,
|
1083
|
+
(size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
|
1084
|
+
sf->current->str_open = '$';
|
1085
|
+
sf->current->str_close = '$';
|
1086
|
+
return (size_t)((strend + xlen + 2) - cs);
|
1087
|
+
}
|
1088
|
+
}
|
1089
|
+
} else if (xlen == 1 && cs[pos + 1] == '.') {
|
1090
|
+
/* $. should parsed as a word */
|
1091
|
+
return parse_word(sf);
|
1092
|
+
} else {
|
1093
|
+
st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
|
1094
|
+
return pos + 1 + xlen;
|
1095
|
+
}
|
1096
|
+
}
|
1097
|
+
|
1098
|
+
static size_t parse_number(struct libinjection_sqli_state * sf)
|
1099
|
+
{
|
1100
|
+
size_t xlen;
|
1101
|
+
size_t start;
|
1102
|
+
const char* digits = NULL;
|
1103
|
+
const char *cs = sf->s;
|
1104
|
+
const size_t slen = sf->slen;
|
1105
|
+
size_t pos = sf->pos;
|
1106
|
+
int have_e = 0;
|
1107
|
+
int have_exp = 0;
|
1108
|
+
|
1109
|
+
/* cs[pos] == '0' has 1/10 chance of being true,
|
1110
|
+
* while pos+1< slen is almost always true
|
1111
|
+
*/
|
1112
|
+
if (cs[pos] == '0' && pos + 1 < slen) {
|
1113
|
+
if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
|
1114
|
+
digits = "0123456789ABCDEFabcdef";
|
1115
|
+
} else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
|
1116
|
+
digits = "01";
|
1117
|
+
}
|
1118
|
+
|
1119
|
+
if (digits) {
|
1120
|
+
xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
|
1121
|
+
if (xlen == 0) {
|
1122
|
+
st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
|
1123
|
+
return pos + 2;
|
1124
|
+
} else {
|
1125
|
+
st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
|
1126
|
+
return pos + 2 + xlen;
|
1127
|
+
}
|
1128
|
+
}
|
1129
|
+
}
|
1130
|
+
|
1131
|
+
start = pos;
|
1132
|
+
while (pos < slen && ISDIGIT(cs[pos])) {
|
1133
|
+
pos += 1;
|
1134
|
+
}
|
1135
|
+
|
1136
|
+
if (pos < slen && cs[pos] == '.') {
|
1137
|
+
pos += 1;
|
1138
|
+
while (pos < slen && ISDIGIT(cs[pos])) {
|
1139
|
+
pos += 1;
|
1140
|
+
}
|
1141
|
+
if (pos - start == 1) {
|
1142
|
+
/* only one character read so far */
|
1143
|
+
st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
|
1144
|
+
return pos;
|
1145
|
+
}
|
1146
|
+
}
|
1147
|
+
|
1148
|
+
if (pos < slen) {
|
1149
|
+
if (cs[pos] == 'E' || cs[pos] == 'e') {
|
1150
|
+
have_e = 1;
|
1151
|
+
pos += 1;
|
1152
|
+
if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
|
1153
|
+
pos += 1;
|
1154
|
+
}
|
1155
|
+
while (pos < slen && ISDIGIT(cs[pos])) {
|
1156
|
+
have_exp = 1;
|
1157
|
+
pos += 1;
|
1158
|
+
}
|
1159
|
+
}
|
1160
|
+
}
|
1161
|
+
|
1162
|
+
/* oracle's ending float or double suffix
|
1163
|
+
* http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
|
1164
|
+
*/
|
1165
|
+
if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
|
1166
|
+
if (pos + 1 == slen) {
|
1167
|
+
/* line ends evaluate "... 1.2f$" as '1.2f' */
|
1168
|
+
pos += 1;
|
1169
|
+
} else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
|
1170
|
+
/*
|
1171
|
+
* easy case, evaluate "... 1.2f ... as '1.2f'
|
1172
|
+
*/
|
1173
|
+
pos += 1;
|
1174
|
+
} else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
|
1175
|
+
/*
|
1176
|
+
* a bit of a hack but makes '1fUNION' parse as '1f UNION'
|
1177
|
+
*/
|
1178
|
+
pos += 1;
|
1179
|
+
} else {
|
1180
|
+
/* it's like "123FROM" */
|
1181
|
+
/* parse as "123" only */
|
1182
|
+
}
|
1183
|
+
}
|
1184
|
+
|
1185
|
+
if (have_e == 1 && have_exp == 0) {
|
1186
|
+
/* very special form of
|
1187
|
+
* "1234.e"
|
1188
|
+
* "10.10E"
|
1189
|
+
* ".E"
|
1190
|
+
* this is a WORD not a number!! */
|
1191
|
+
st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
|
1192
|
+
} else {
|
1193
|
+
st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
|
1194
|
+
}
|
1195
|
+
return pos;
|
1196
|
+
}
|
1197
|
+
|
1198
|
+
/*
|
1199
|
+
* API to return version. This allows us to increment the version
|
1200
|
+
* without having to regenerated the SWIG (or other binding) in minor
|
1201
|
+
* releases.
|
1202
|
+
*/
|
1203
|
+
const char* libinjection_version()
|
1204
|
+
{
|
1205
|
+
return LIBINJECTION_VERSION;
|
1206
|
+
}
|
1207
|
+
|
1208
|
+
int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
|
1209
|
+
{
|
1210
|
+
pt2Function fnptr;
|
1211
|
+
size_t *pos = &sf->pos;
|
1212
|
+
stoken_t *current = sf->current;
|
1213
|
+
const char *s = sf->s;
|
1214
|
+
const size_t slen = sf->slen;
|
1215
|
+
|
1216
|
+
if (slen == 0) {
|
1217
|
+
return FALSE;
|
1218
|
+
}
|
1219
|
+
|
1220
|
+
st_clear(current);
|
1221
|
+
sf->current = current;
|
1222
|
+
|
1223
|
+
/*
|
1224
|
+
* if we are at beginning of string
|
1225
|
+
* and in single-quote or double quote mode
|
1226
|
+
* then pretend the input starts with a quote
|
1227
|
+
*/
|
1228
|
+
if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
|
1229
|
+
*pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
|
1230
|
+
sf->stats_tokens += 1;
|
1231
|
+
return TRUE;
|
1232
|
+
}
|
1233
|
+
|
1234
|
+
while (*pos < slen) {
|
1235
|
+
|
1236
|
+
/*
|
1237
|
+
* get current character
|
1238
|
+
*/
|
1239
|
+
const unsigned char ch = (unsigned char) (s[*pos]);
|
1240
|
+
|
1241
|
+
/*
|
1242
|
+
* look up the parser, and call it
|
1243
|
+
*
|
1244
|
+
* Porting Note: this is mapping of char to function
|
1245
|
+
* charparsers[ch]()
|
1246
|
+
*/
|
1247
|
+
fnptr = char_parse_map[ch];
|
1248
|
+
|
1249
|
+
*pos = (*fnptr) (sf);
|
1250
|
+
|
1251
|
+
/*
|
1252
|
+
*
|
1253
|
+
*/
|
1254
|
+
if (current->type != CHAR_NULL) {
|
1255
|
+
sf->stats_tokens += 1;
|
1256
|
+
return TRUE;
|
1257
|
+
}
|
1258
|
+
}
|
1259
|
+
return FALSE;
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
|
1263
|
+
{
|
1264
|
+
if (flags == 0) {
|
1265
|
+
flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
|
1266
|
+
}
|
1267
|
+
|
1268
|
+
memset(sf, 0, sizeof(struct libinjection_sqli_state));
|
1269
|
+
sf->s = s;
|
1270
|
+
sf->slen = len;
|
1271
|
+
sf->lookup = libinjection_sqli_lookup_word;
|
1272
|
+
sf->userdata = 0;
|
1273
|
+
sf->flags = flags;
|
1274
|
+
sf->current = &(sf->tokenvec[0]);
|
1275
|
+
}
|
1276
|
+
|
1277
|
+
void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
|
1278
|
+
{
|
1279
|
+
void *userdata = sf->userdata;
|
1280
|
+
ptr_lookup_fn lookup = sf->lookup;;
|
1281
|
+
|
1282
|
+
if (flags == 0) {
|
1283
|
+
flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
|
1284
|
+
}
|
1285
|
+
libinjection_sqli_init(sf, sf->s, sf->slen, flags);
|
1286
|
+
sf->lookup = lookup;
|
1287
|
+
sf->userdata = userdata;
|
1288
|
+
}
|
1289
|
+
|
1290
|
+
void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
|
1291
|
+
{
|
1292
|
+
if (fn == NULL) {
|
1293
|
+
sf->lookup = libinjection_sqli_lookup_word;
|
1294
|
+
sf->userdata = (void*)(NULL);
|
1295
|
+
} else {
|
1296
|
+
sf->lookup = fn;
|
1297
|
+
sf->userdata = userdata;
|
1298
|
+
}
|
1299
|
+
}
|
1300
|
+
|
1301
|
+
/** See if two tokens can be merged since they are compound SQL phrases.
|
1302
|
+
*
|
1303
|
+
* This takes two tokens, and, if they are the right type,
|
1304
|
+
* merges their values together. Then checks to see if the
|
1305
|
+
* new value is special using the PHRASES mapping.
|
1306
|
+
*
|
1307
|
+
* Example: "UNION" + "ALL" ==> "UNION ALL"
|
1308
|
+
*
|
1309
|
+
* C Security Notes: this is safe to use C-strings (null-terminated)
|
1310
|
+
* since the types involved by definition do not have embedded nulls
|
1311
|
+
* (e.g. there is no keyword with embedded null)
|
1312
|
+
*
|
1313
|
+
* Porting Notes: since this is C, it's oddly complicated.
|
1314
|
+
* This is just: multikeywords[token.value + ' ' + token2.value]
|
1315
|
+
*
|
1316
|
+
*/
|
1317
|
+
static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
|
1318
|
+
{
|
1319
|
+
size_t sz1;
|
1320
|
+
size_t sz2;
|
1321
|
+
size_t sz3;
|
1322
|
+
char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
|
1323
|
+
char ch;
|
1324
|
+
|
1325
|
+
/* first token is of right type? */
|
1326
|
+
if (!
|
1327
|
+
(a->type == TYPE_KEYWORD ||
|
1328
|
+
a->type == TYPE_BAREWORD ||
|
1329
|
+
a->type == TYPE_OPERATOR ||
|
1330
|
+
a->type == TYPE_UNION ||
|
1331
|
+
a->type == TYPE_FUNCTION ||
|
1332
|
+
a->type == TYPE_EXPRESSION ||
|
1333
|
+
a->type == TYPE_SQLTYPE)) {
|
1334
|
+
return CHAR_NULL;
|
1335
|
+
}
|
1336
|
+
|
1337
|
+
if (b->type != TYPE_KEYWORD && b->type != TYPE_BAREWORD &&
|
1338
|
+
b->type != TYPE_OPERATOR && b->type != TYPE_SQLTYPE &&
|
1339
|
+
b->type != TYPE_LOGIC_OPERATOR &&
|
1340
|
+
b->type != TYPE_FUNCTION &&
|
1341
|
+
b->type != TYPE_UNION && b->type != TYPE_EXPRESSION) {
|
1342
|
+
return CHAR_NULL;
|
1343
|
+
}
|
1344
|
+
|
1345
|
+
sz1 = a->len;
|
1346
|
+
sz2 = b->len;
|
1347
|
+
sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
|
1348
|
+
if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
|
1349
|
+
return FALSE;
|
1350
|
+
}
|
1351
|
+
/*
|
1352
|
+
* oddly annoying last.val + ' ' + current.val
|
1353
|
+
*/
|
1354
|
+
memcpy(tmp, a->val, sz1);
|
1355
|
+
tmp[sz1] = ' ';
|
1356
|
+
memcpy(tmp + sz1 + 1, b->val, sz2);
|
1357
|
+
tmp[sz3] = CHAR_NULL;
|
1358
|
+
|
1359
|
+
ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
|
1360
|
+
|
1361
|
+
if (ch != CHAR_NULL) {
|
1362
|
+
st_assign(a, ch, a->pos, sz3, tmp);
|
1363
|
+
return TRUE;
|
1364
|
+
} else {
|
1365
|
+
return FALSE;
|
1366
|
+
}
|
1367
|
+
}
|
1368
|
+
|
1369
|
+
int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
|
1370
|
+
{
|
1371
|
+
stoken_t last_comment;
|
1372
|
+
|
1373
|
+
/* POS is the position of where the NEXT token goes */
|
1374
|
+
size_t pos = 0;
|
1375
|
+
|
1376
|
+
/* LEFT is a count of how many tokens that are already
|
1377
|
+
folded or processed (i.e. part of the fingerprint) */
|
1378
|
+
size_t left = 0;
|
1379
|
+
|
1380
|
+
int more = 1;
|
1381
|
+
|
1382
|
+
st_clear(&last_comment);
|
1383
|
+
|
1384
|
+
/* Skip all initial comments, right-parens ( and unary operators
|
1385
|
+
*
|
1386
|
+
*/
|
1387
|
+
sf->current = &(sf->tokenvec[0]);
|
1388
|
+
while (more) {
|
1389
|
+
more = libinjection_sqli_tokenize(sf);
|
1390
|
+
if ( ! (sf->current->type == TYPE_COMMENT ||
|
1391
|
+
sf->current->type == TYPE_LEFTPARENS ||
|
1392
|
+
sf->current->type == TYPE_SQLTYPE ||
|
1393
|
+
st_is_unary_op(sf->current))) {
|
1394
|
+
break;
|
1395
|
+
}
|
1396
|
+
}
|
1397
|
+
|
1398
|
+
if (! more) {
|
1399
|
+
/* If input was only comments, unary or (, then exit */
|
1400
|
+
return 0;
|
1401
|
+
} else {
|
1402
|
+
/* it's some other token */
|
1403
|
+
pos += 1;
|
1404
|
+
}
|
1405
|
+
|
1406
|
+
while (1) {
|
1407
|
+
FOLD_DEBUG;
|
1408
|
+
|
1409
|
+
/* do we have all the max number of tokens? if so do
|
1410
|
+
* some special cases for 5 tokens
|
1411
|
+
*/
|
1412
|
+
if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
|
1413
|
+
if (
|
1414
|
+
(
|
1415
|
+
sf->tokenvec[0].type == TYPE_NUMBER &&
|
1416
|
+
(sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
|
1417
|
+
sf->tokenvec[2].type == TYPE_LEFTPARENS &&
|
1418
|
+
sf->tokenvec[3].type == TYPE_NUMBER &&
|
1419
|
+
sf->tokenvec[4].type == TYPE_RIGHTPARENS
|
1420
|
+
) ||
|
1421
|
+
(
|
1422
|
+
sf->tokenvec[0].type == TYPE_BAREWORD &&
|
1423
|
+
sf->tokenvec[1].type == TYPE_OPERATOR &&
|
1424
|
+
sf->tokenvec[2].type == TYPE_LEFTPARENS &&
|
1425
|
+
(sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
|
1426
|
+
sf->tokenvec[4].type == TYPE_RIGHTPARENS
|
1427
|
+
) ||
|
1428
|
+
(
|
1429
|
+
sf->tokenvec[0].type == TYPE_NUMBER &&
|
1430
|
+
sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
|
1431
|
+
sf->tokenvec[2].type == TYPE_COMMA &&
|
1432
|
+
sf->tokenvec[3].type == TYPE_LEFTPARENS &&
|
1433
|
+
sf->tokenvec[4].type == TYPE_NUMBER
|
1434
|
+
)
|
1435
|
+
)
|
1436
|
+
{
|
1437
|
+
if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
|
1438
|
+
st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
|
1439
|
+
pos = 2;
|
1440
|
+
left = 0;
|
1441
|
+
} else {
|
1442
|
+
pos = 1;
|
1443
|
+
left = 0;
|
1444
|
+
}
|
1445
|
+
}
|
1446
|
+
}
|
1447
|
+
|
1448
|
+
if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
|
1449
|
+
left = pos;
|
1450
|
+
break;
|
1451
|
+
}
|
1452
|
+
|
1453
|
+
/* get up to two tokens */
|
1454
|
+
while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
|
1455
|
+
sf->current = &(sf->tokenvec[pos]);
|
1456
|
+
more = libinjection_sqli_tokenize(sf);
|
1457
|
+
if (more) {
|
1458
|
+
if (sf->current->type == TYPE_COMMENT) {
|
1459
|
+
st_copy(&last_comment, sf->current);
|
1460
|
+
} else {
|
1461
|
+
last_comment.type = CHAR_NULL;
|
1462
|
+
pos += 1;
|
1463
|
+
}
|
1464
|
+
}
|
1465
|
+
}
|
1466
|
+
FOLD_DEBUG;
|
1467
|
+
/* did we get 2 tokens? if not then we are done */
|
1468
|
+
if (pos - left < 2) {
|
1469
|
+
left = pos;
|
1470
|
+
continue;
|
1471
|
+
}
|
1472
|
+
|
1473
|
+
/* FOLD: "ss" -> "s"
|
1474
|
+
* "foo" "bar" is valid SQL
|
1475
|
+
* just ignore second string
|
1476
|
+
*/
|
1477
|
+
if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
|
1478
|
+
pos -= 1;
|
1479
|
+
sf->stats_folds += 1;
|
1480
|
+
continue;
|
1481
|
+
} else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
|
1482
|
+
/* not sure how various engines handle
|
1483
|
+
* 'select 1;;drop table foo' or
|
1484
|
+
* 'select 1; /x foo x/; drop table foo'
|
1485
|
+
* to prevent surprises, just fold away repeated semicolons
|
1486
|
+
*/
|
1487
|
+
pos -= 1;
|
1488
|
+
sf->stats_folds += 1;
|
1489
|
+
continue;
|
1490
|
+
} else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
|
1491
|
+
sf->tokenvec[left+1].type == TYPE_FUNCTION &&
|
1492
|
+
cstrcasecmp("IF", sf->tokenvec[left+1].val, sf->tokenvec[left+1].len) == 0) {
|
1493
|
+
/* IF is normally a function, except in Transact-SQL where it can be used as a
|
1494
|
+
* standalone control flow operator, e.g. ; IF 1=1 ...
|
1495
|
+
* if found after a semicolon, convert from 'f' type to 'T' type
|
1496
|
+
*/
|
1497
|
+
sf->tokenvec[left+1].type = TYPE_TSQL;
|
1498
|
+
left += 2;
|
1499
|
+
continue; /* reparse everything, but we probably can advance left, and pos */
|
1500
|
+
} else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
|
1501
|
+
sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
|
1502
|
+
(st_is_unary_op(&sf->tokenvec[left+1]) ||
|
1503
|
+
sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
|
1504
|
+
pos -= 1;
|
1505
|
+
sf->stats_folds += 1;
|
1506
|
+
left = 0;
|
1507
|
+
continue;
|
1508
|
+
} else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
|
1509
|
+
st_is_unary_op(&sf->tokenvec[left+1])) {
|
1510
|
+
pos -= 1;
|
1511
|
+
sf->stats_folds += 1;
|
1512
|
+
if (left > 0) {
|
1513
|
+
left -= 1;
|
1514
|
+
}
|
1515
|
+
continue;
|
1516
|
+
} else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
|
1517
|
+
pos -= 1;
|
1518
|
+
sf->stats_folds += 1;
|
1519
|
+
if (left > 0) {
|
1520
|
+
left -= 1;
|
1521
|
+
}
|
1522
|
+
continue;
|
1523
|
+
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
|
1524
|
+
sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
|
1525
|
+
/* TSQL functions but common enough to be collumn names */
|
1526
|
+
cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1527
|
+
cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1528
|
+
|
1529
|
+
/* Function in MYSQL */
|
1530
|
+
cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1531
|
+
cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1532
|
+
cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1533
|
+
|
1534
|
+
/* Mysql words that act as a variable and are a function */
|
1535
|
+
|
1536
|
+
/* TSQL current_users is fake-variable */
|
1537
|
+
/* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
|
1538
|
+
cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1539
|
+
cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1540
|
+
cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1541
|
+
cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1542
|
+
cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1543
|
+
cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
|
1544
|
+
)) {
|
1545
|
+
|
1546
|
+
/* pos is the same
|
1547
|
+
* other conversions need to go here... for instance
|
1548
|
+
* password CAN be a function, coalese CAN be a function
|
1549
|
+
*/
|
1550
|
+
sf->tokenvec[left].type = TYPE_FUNCTION;
|
1551
|
+
continue;
|
1552
|
+
} else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
|
1553
|
+
cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1554
|
+
cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
|
1555
|
+
)) {
|
1556
|
+
|
1557
|
+
if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
|
1558
|
+
/* got .... IN ( ... (or 'NOT IN')
|
1559
|
+
* it's an operator
|
1560
|
+
*/
|
1561
|
+
sf->tokenvec[left].type = TYPE_OPERATOR;
|
1562
|
+
} else {
|
1563
|
+
/*
|
1564
|
+
* it's a nothing
|
1565
|
+
*/
|
1566
|
+
sf->tokenvec[left].type = TYPE_BAREWORD;
|
1567
|
+
}
|
1568
|
+
|
1569
|
+
/* "IN" can be used as "IN BOOLEAN MODE" for mysql
|
1570
|
+
* in which case merging of words can be done later
|
1571
|
+
* other wise it acts as an equality operator __ IN (values..)
|
1572
|
+
*
|
1573
|
+
* here we got "IN" "(" so it's an operator.
|
1574
|
+
* also back track to handle "NOT IN"
|
1575
|
+
* might need to do the same with like
|
1576
|
+
* two use cases "foo" LIKE "BAR" (normal operator)
|
1577
|
+
* "foo" = LIKE(1,2)
|
1578
|
+
*/
|
1579
|
+
continue;
|
1580
|
+
} else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
|
1581
|
+
cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
|
1582
|
+
cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
|
1583
|
+
if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
|
1584
|
+
/* SELECT LIKE(...
|
1585
|
+
* it's a function
|
1586
|
+
*/
|
1587
|
+
sf->tokenvec[left].type = TYPE_FUNCTION;
|
1588
|
+
}
|
1589
|
+
} else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
|
1590
|
+
(sf->tokenvec[left+1].type == TYPE_BAREWORD ||
|
1591
|
+
sf->tokenvec[left+1].type == TYPE_NUMBER ||
|
1592
|
+
sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
|
1593
|
+
sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
|
1594
|
+
sf->tokenvec[left+1].type == TYPE_FUNCTION ||
|
1595
|
+
sf->tokenvec[left+1].type == TYPE_VARIABLE ||
|
1596
|
+
sf->tokenvec[left+1].type == TYPE_STRING)) {
|
1597
|
+
st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
|
1598
|
+
pos -= 1;
|
1599
|
+
sf->stats_folds += 1;
|
1600
|
+
left = 0;
|
1601
|
+
continue;
|
1602
|
+
} else if (sf->tokenvec[left].type == TYPE_COLLATE &&
|
1603
|
+
sf->tokenvec[left+1].type == TYPE_BAREWORD) {
|
1604
|
+
/*
|
1605
|
+
* there are too many collation types.. so if the bareword has a "_"
|
1606
|
+
* then it's TYPE_SQLTYPE
|
1607
|
+
*/
|
1608
|
+
if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
|
1609
|
+
sf->tokenvec[left+1].type = TYPE_SQLTYPE;
|
1610
|
+
left = 0;
|
1611
|
+
}
|
1612
|
+
} else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
|
1613
|
+
if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
|
1614
|
+
/* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
|
1615
|
+
sf->tokenvec[left].type = TYPE_NUMBER;
|
1616
|
+
} else {
|
1617
|
+
/* just ignore it.. Again T-SQL seems to parse \1 as "1" */
|
1618
|
+
st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
|
1619
|
+
pos -= 1;
|
1620
|
+
sf->stats_folds += 1;
|
1621
|
+
}
|
1622
|
+
left = 0;
|
1623
|
+
continue;
|
1624
|
+
} else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
|
1625
|
+
sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
|
1626
|
+
pos -= 1;
|
1627
|
+
left = 0;
|
1628
|
+
sf->stats_folds += 1;
|
1629
|
+
continue;
|
1630
|
+
} else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
|
1631
|
+
sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
|
1632
|
+
pos -= 1;
|
1633
|
+
left = 0;
|
1634
|
+
sf->stats_folds += 1;
|
1635
|
+
continue;
|
1636
|
+
} else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
|
1637
|
+
sf->tokenvec[left+1].type == TYPE_BAREWORD) {
|
1638
|
+
|
1639
|
+
/*
|
1640
|
+
* MySQL Degenerate case --
|
1641
|
+
*
|
1642
|
+
* select { ``.``.id }; -- valid !!!
|
1643
|
+
* select { ``.``.``.id }; -- invalid
|
1644
|
+
* select ``.``.id; -- invalid
|
1645
|
+
* select { ``.id }; -- invalid
|
1646
|
+
*
|
1647
|
+
* so it appears {``.``.id} is a magic case
|
1648
|
+
* I suspect this is "current database, current table, field id"
|
1649
|
+
*
|
1650
|
+
* The folding code can't look at more than 3 tokens, and
|
1651
|
+
* I don't want to make two passes.
|
1652
|
+
*
|
1653
|
+
* Since "{ ``" so rare, we are just going to blacklist it.
|
1654
|
+
*
|
1655
|
+
* Highly likely this will need revisiting!
|
1656
|
+
*
|
1657
|
+
* CREDIT @rsalgado 2013-11-25
|
1658
|
+
*/
|
1659
|
+
if (sf->tokenvec[left+1].len == 0) {
|
1660
|
+
sf->tokenvec[left+1].type = TYPE_EVIL;
|
1661
|
+
return (int)(left+2);
|
1662
|
+
}
|
1663
|
+
/* weird ODBC / MYSQL {foo expr} --> expr
|
1664
|
+
* but for this rule we just strip away the "{ foo" part
|
1665
|
+
*/
|
1666
|
+
left = 0;
|
1667
|
+
pos -= 2;
|
1668
|
+
sf->stats_folds += 2;
|
1669
|
+
continue;
|
1670
|
+
} else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
|
1671
|
+
pos -= 1;
|
1672
|
+
left = 0;
|
1673
|
+
sf->stats_folds += 1;
|
1674
|
+
continue;
|
1675
|
+
}
|
1676
|
+
|
1677
|
+
/* all cases of handing 2 tokens is done
|
1678
|
+
and nothing matched. Get one more token
|
1679
|
+
*/
|
1680
|
+
FOLD_DEBUG;
|
1681
|
+
while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
|
1682
|
+
sf->current = &(sf->tokenvec[pos]);
|
1683
|
+
more = libinjection_sqli_tokenize(sf);
|
1684
|
+
if (more) {
|
1685
|
+
if (sf->current->type == TYPE_COMMENT) {
|
1686
|
+
st_copy(&last_comment, sf->current);
|
1687
|
+
} else {
|
1688
|
+
last_comment.type = CHAR_NULL;
|
1689
|
+
pos += 1;
|
1690
|
+
}
|
1691
|
+
}
|
1692
|
+
}
|
1693
|
+
|
1694
|
+
/* do we have three tokens? If not then we are done */
|
1695
|
+
if (pos -left < 3) {
|
1696
|
+
left = pos;
|
1697
|
+
continue;
|
1698
|
+
}
|
1699
|
+
|
1700
|
+
/*
|
1701
|
+
* now look for three token folding
|
1702
|
+
*/
|
1703
|
+
if (sf->tokenvec[left].type == TYPE_NUMBER &&
|
1704
|
+
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
|
1705
|
+
sf->tokenvec[left+2].type == TYPE_NUMBER) {
|
1706
|
+
pos -= 2;
|
1707
|
+
left = 0;
|
1708
|
+
continue;
|
1709
|
+
} else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
|
1710
|
+
sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
|
1711
|
+
sf->tokenvec[left+2].type == TYPE_OPERATOR) {
|
1712
|
+
left = 0;
|
1713
|
+
pos -= 2;
|
1714
|
+
continue;
|
1715
|
+
} else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
|
1716
|
+
sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
|
1717
|
+
pos -= 2;
|
1718
|
+
left = 0;
|
1719
|
+
continue;
|
1720
|
+
} else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
|
1721
|
+
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
|
1722
|
+
(sf->tokenvec[left+2].type == TYPE_VARIABLE ||
|
1723
|
+
sf->tokenvec[left+2].type == TYPE_NUMBER ||
|
1724
|
+
sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
|
1725
|
+
pos -= 2;
|
1726
|
+
left = 0;
|
1727
|
+
continue;
|
1728
|
+
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
|
1729
|
+
sf->tokenvec[left].type == TYPE_NUMBER ) &&
|
1730
|
+
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
|
1731
|
+
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
|
1732
|
+
sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
|
1733
|
+
pos -= 2;
|
1734
|
+
left = 0;
|
1735
|
+
continue;
|
1736
|
+
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
|
1737
|
+
sf->tokenvec[left].type == TYPE_NUMBER ||
|
1738
|
+
sf->tokenvec[left].type == TYPE_VARIABLE ||
|
1739
|
+
sf->tokenvec[left].type == TYPE_STRING) &&
|
1740
|
+
sf->tokenvec[left+1].type == TYPE_OPERATOR &&
|
1741
|
+
streq(sf->tokenvec[left+1].val, "::") &&
|
1742
|
+
sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
|
1743
|
+
pos -= 2;
|
1744
|
+
left = 0;
|
1745
|
+
sf->stats_folds += 2;
|
1746
|
+
continue;
|
1747
|
+
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
|
1748
|
+
sf->tokenvec[left].type == TYPE_NUMBER ||
|
1749
|
+
sf->tokenvec[left].type == TYPE_STRING ||
|
1750
|
+
sf->tokenvec[left].type == TYPE_VARIABLE) &&
|
1751
|
+
sf->tokenvec[left+1].type == TYPE_COMMA &&
|
1752
|
+
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
|
1753
|
+
sf->tokenvec[left+2].type == TYPE_BAREWORD ||
|
1754
|
+
sf->tokenvec[left+2].type == TYPE_STRING ||
|
1755
|
+
sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
|
1756
|
+
pos -= 2;
|
1757
|
+
left = 0;
|
1758
|
+
continue;
|
1759
|
+
} else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
|
1760
|
+
sf->tokenvec[left].type == TYPE_GROUP ||
|
1761
|
+
sf->tokenvec[left].type == TYPE_COMMA) &&
|
1762
|
+
st_is_unary_op(&sf->tokenvec[left+1]) &&
|
1763
|
+
sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
|
1764
|
+
/* got something like SELECT + (, LIMIT + (
|
1765
|
+
* remove unary operator
|
1766
|
+
*/
|
1767
|
+
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
|
1768
|
+
pos -= 1;
|
1769
|
+
left = 0;
|
1770
|
+
continue;
|
1771
|
+
} else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
|
1772
|
+
sf->tokenvec[left].type == TYPE_EXPRESSION ||
|
1773
|
+
sf->tokenvec[left].type == TYPE_GROUP ) &&
|
1774
|
+
st_is_unary_op(&sf->tokenvec[left+1]) &&
|
1775
|
+
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
|
1776
|
+
sf->tokenvec[left+2].type == TYPE_BAREWORD ||
|
1777
|
+
sf->tokenvec[left+2].type == TYPE_VARIABLE ||
|
1778
|
+
sf->tokenvec[left+2].type == TYPE_STRING ||
|
1779
|
+
sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
|
1780
|
+
/* remove unary operators
|
1781
|
+
* select - 1
|
1782
|
+
*/
|
1783
|
+
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
|
1784
|
+
pos -= 1;
|
1785
|
+
left = 0;
|
1786
|
+
continue;
|
1787
|
+
} else if (sf->tokenvec[left].type == TYPE_COMMA &&
|
1788
|
+
st_is_unary_op(&sf->tokenvec[left+1]) &&
|
1789
|
+
(sf->tokenvec[left+2].type == TYPE_NUMBER ||
|
1790
|
+
sf->tokenvec[left+2].type == TYPE_BAREWORD ||
|
1791
|
+
sf->tokenvec[left+2].type == TYPE_VARIABLE ||
|
1792
|
+
sf->tokenvec[left+2].type == TYPE_STRING)) {
|
1793
|
+
/*
|
1794
|
+
* interesting case turn ", -1" ->> ",1" PLUS we need to back up
|
1795
|
+
* one token if possible to see if more folding can be done
|
1796
|
+
* "1,-1" --> "1"
|
1797
|
+
*/
|
1798
|
+
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
|
1799
|
+
left = 0;
|
1800
|
+
/* pos is >= 3 so this is safe */
|
1801
|
+
assert(pos >= 3);
|
1802
|
+
pos -= 3;
|
1803
|
+
continue;
|
1804
|
+
} else if (sf->tokenvec[left].type == TYPE_COMMA &&
|
1805
|
+
st_is_unary_op(&sf->tokenvec[left+1]) &&
|
1806
|
+
sf->tokenvec[left+2].type == TYPE_FUNCTION) {
|
1807
|
+
|
1808
|
+
/* Separate case from above since you end up with
|
1809
|
+
* 1,-sin(1) --> 1 (1)
|
1810
|
+
* Here, just do
|
1811
|
+
* 1,-sin(1) --> 1,sin(1)
|
1812
|
+
* just remove unary opartor
|
1813
|
+
*/
|
1814
|
+
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
|
1815
|
+
pos -= 1;
|
1816
|
+
left = 0;
|
1817
|
+
continue;
|
1818
|
+
} else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
|
1819
|
+
(sf->tokenvec[left+1].type == TYPE_DOT) &&
|
1820
|
+
(sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
|
1821
|
+
/* ignore the '.n'
|
1822
|
+
* typically is this databasename.table
|
1823
|
+
*/
|
1824
|
+
assert(pos >= 3);
|
1825
|
+
pos -= 2;
|
1826
|
+
left = 0;
|
1827
|
+
continue;
|
1828
|
+
} else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
|
1829
|
+
(sf->tokenvec[left+1].type == TYPE_DOT) &&
|
1830
|
+
(sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
|
1831
|
+
/* select . `foo` --> select `foo` */
|
1832
|
+
st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
|
1833
|
+
pos -= 1;
|
1834
|
+
left = 0;
|
1835
|
+
continue;
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
|
1839
|
+
/* no folding -- assume left-most token is
|
1840
|
+
is good, now use the existing 2 tokens --
|
1841
|
+
do not get another
|
1842
|
+
*/
|
1843
|
+
|
1844
|
+
left += 1;
|
1845
|
+
|
1846
|
+
} /* while(1) */
|
1847
|
+
|
1848
|
+
/* if we have 4 or less tokens, and we had a comment token
|
1849
|
+
* at the end, add it back
|
1850
|
+
*/
|
1851
|
+
|
1852
|
+
if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
|
1853
|
+
st_copy(&sf->tokenvec[left], &last_comment);
|
1854
|
+
left += 1;
|
1855
|
+
}
|
1856
|
+
|
1857
|
+
/* sometimes we grab a 6th token to help
|
1858
|
+
determine the type of token 5.
|
1859
|
+
*/
|
1860
|
+
if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
|
1861
|
+
left = LIBINJECTION_SQLI_MAX_TOKENS;
|
1862
|
+
}
|
1863
|
+
|
1864
|
+
return (int)left;
|
1865
|
+
}
|
1866
|
+
|
1867
|
+
/* secondary api: detects SQLi in a string, GIVEN a context.
|
1868
|
+
*
|
1869
|
+
* A context can be:
|
1870
|
+
* * CHAR_NULL (\0), process as is
|
1871
|
+
* * CHAR_SINGLE ('), process pretending input started with a
|
1872
|
+
* single quote.
|
1873
|
+
* * CHAR_DOUBLE ("), process pretending input started with a
|
1874
|
+
* double quote.
|
1875
|
+
*
|
1876
|
+
*/
|
1877
|
+
const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
|
1878
|
+
{
|
1879
|
+
int i;
|
1880
|
+
int tlen = 0;
|
1881
|
+
|
1882
|
+
libinjection_sqli_reset(sql_state, flags);
|
1883
|
+
|
1884
|
+
tlen = libinjection_sqli_fold(sql_state);
|
1885
|
+
|
1886
|
+
/* Check for magic PHP backquote comment
|
1887
|
+
* If:
|
1888
|
+
* * last token is of type "bareword"
|
1889
|
+
* * And is quoted in a backtick
|
1890
|
+
* * And isn't closed
|
1891
|
+
* * And it's empty?
|
1892
|
+
* Then convert it to comment
|
1893
|
+
*/
|
1894
|
+
if (tlen > 2 &&
|
1895
|
+
sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
|
1896
|
+
sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
|
1897
|
+
sql_state->tokenvec[tlen-1].len == 0 &&
|
1898
|
+
sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
|
1899
|
+
sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
|
1900
|
+
}
|
1901
|
+
|
1902
|
+
for (i = 0; i < tlen; ++i) {
|
1903
|
+
sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
|
1904
|
+
}
|
1905
|
+
|
1906
|
+
/*
|
1907
|
+
* make the fingerprint pattern a c-string (null delimited)
|
1908
|
+
*/
|
1909
|
+
sql_state->fingerprint[tlen] = CHAR_NULL;
|
1910
|
+
|
1911
|
+
/*
|
1912
|
+
* check for 'X' in pattern, and then
|
1913
|
+
* clear out all tokens
|
1914
|
+
*
|
1915
|
+
* this means parsing could not be done
|
1916
|
+
* accurately due to pgsql's double comments
|
1917
|
+
* or other syntax that isn't consistent.
|
1918
|
+
* Should be very rare false positive
|
1919
|
+
*/
|
1920
|
+
if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
|
1921
|
+
/* needed for SWIG */
|
1922
|
+
memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
|
1923
|
+
memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
|
1924
|
+
|
1925
|
+
sql_state->fingerprint[0] = TYPE_EVIL;
|
1926
|
+
|
1927
|
+
sql_state->tokenvec[0].type = TYPE_EVIL;
|
1928
|
+
sql_state->tokenvec[0].val[0] = TYPE_EVIL;
|
1929
|
+
sql_state->tokenvec[1].type = CHAR_NULL;
|
1930
|
+
}
|
1931
|
+
|
1932
|
+
|
1933
|
+
return sql_state->fingerprint;
|
1934
|
+
}
|
1935
|
+
|
1936
|
+
int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
|
1937
|
+
{
|
1938
|
+
return libinjection_sqli_blacklist(sql_state) &&
|
1939
|
+
libinjection_sqli_not_whitelist(sql_state);
|
1940
|
+
}
|
1941
|
+
|
1942
|
+
char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
|
1943
|
+
const char* str, size_t len)
|
1944
|
+
{
|
1945
|
+
if (lookup_type == LOOKUP_FINGERPRINT) {
|
1946
|
+
return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
|
1947
|
+
} else {
|
1948
|
+
return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
|
1949
|
+
}
|
1950
|
+
}
|
1951
|
+
|
1952
|
+
int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
|
1953
|
+
{
|
1954
|
+
/*
|
1955
|
+
* use minimum of 8 bytes to make sure gcc -fstack-protector
|
1956
|
+
* works correctly
|
1957
|
+
*/
|
1958
|
+
char fp2[8];
|
1959
|
+
char ch;
|
1960
|
+
size_t i;
|
1961
|
+
size_t len = strlen(sql_state->fingerprint);
|
1962
|
+
int patmatch;
|
1963
|
+
|
1964
|
+
if (len < 1) {
|
1965
|
+
sql_state->reason = __LINE__;
|
1966
|
+
return FALSE;
|
1967
|
+
}
|
1968
|
+
|
1969
|
+
/*
|
1970
|
+
to keep everything compatible, convert the
|
1971
|
+
v0 fingerprint pattern to v1
|
1972
|
+
v0: up to 5 chars, mixed case
|
1973
|
+
v1: 1 char is '0', up to 5 more chars, upper case
|
1974
|
+
*/
|
1975
|
+
|
1976
|
+
fp2[0] = '0';
|
1977
|
+
for (i = 0; i < len; ++i) {
|
1978
|
+
ch = sql_state->fingerprint[i];
|
1979
|
+
if (ch >= 'a' && ch <= 'z') {
|
1980
|
+
ch -= 0x20;
|
1981
|
+
}
|
1982
|
+
fp2[i+1] = ch;
|
1983
|
+
}
|
1984
|
+
fp2[i+1] = '\0';
|
1985
|
+
|
1986
|
+
patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
|
1987
|
+
|
1988
|
+
/*
|
1989
|
+
* No match.
|
1990
|
+
*
|
1991
|
+
* Set sql_state->reason to current line number
|
1992
|
+
* only for debugging purposes.
|
1993
|
+
*/
|
1994
|
+
if (!patmatch) {
|
1995
|
+
sql_state->reason = __LINE__;
|
1996
|
+
return FALSE;
|
1997
|
+
}
|
1998
|
+
|
1999
|
+
return TRUE;
|
2000
|
+
}
|
2001
|
+
|
2002
|
+
/*
|
2003
|
+
* return TRUE if sqli, false is benign
|
2004
|
+
*/
|
2005
|
+
int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
|
2006
|
+
{
|
2007
|
+
/*
|
2008
|
+
* We assume we got a SQLi match
|
2009
|
+
* This next part just helps reduce false positives.
|
2010
|
+
*
|
2011
|
+
*/
|
2012
|
+
char ch;
|
2013
|
+
size_t tlen = strlen(sql_state->fingerprint);
|
2014
|
+
|
2015
|
+
if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
|
2016
|
+
/*
|
2017
|
+
* if ending comment is contains 'sp_password' then it's sqli!
|
2018
|
+
* MS Audit log apparently ignores anything with
|
2019
|
+
* 'sp_password' in it. Unable to find primary refernece to
|
2020
|
+
* this "feature" of SQL Server but seems to be known sqli
|
2021
|
+
* technique
|
2022
|
+
*/
|
2023
|
+
if (my_memmem(sql_state->s, sql_state->slen,
|
2024
|
+
"sp_password", strlen("sp_password"))) {
|
2025
|
+
sql_state->reason = __LINE__;
|
2026
|
+
return TRUE;
|
2027
|
+
}
|
2028
|
+
}
|
2029
|
+
|
2030
|
+
switch (tlen) {
|
2031
|
+
case 2:{
|
2032
|
+
/*
|
2033
|
+
* case 2 are "very small SQLi" which make them
|
2034
|
+
* hard to tell from normal input...
|
2035
|
+
*/
|
2036
|
+
|
2037
|
+
if (sql_state->fingerprint[1] == TYPE_UNION) {
|
2038
|
+
if (sql_state->stats_tokens == 2) {
|
2039
|
+
/* not sure why but 1U comes up in Sqli attack
|
2040
|
+
* likely part of parameter splitting/etc.
|
2041
|
+
* lots of reasons why "1 union" might be normal
|
2042
|
+
* input, so beep only if other SQLi things are present
|
2043
|
+
*/
|
2044
|
+
/* it really is a number and 'union'
|
2045
|
+
* other wise it has folding or comments
|
2046
|
+
*/
|
2047
|
+
sql_state->reason = __LINE__;
|
2048
|
+
return FALSE;
|
2049
|
+
} else {
|
2050
|
+
sql_state->reason = __LINE__;
|
2051
|
+
return TRUE;
|
2052
|
+
}
|
2053
|
+
}
|
2054
|
+
/*
|
2055
|
+
* if 'comment' is '#' ignore.. too many FP
|
2056
|
+
*/
|
2057
|
+
if (sql_state->tokenvec[1].val[0] == '#') {
|
2058
|
+
sql_state->reason = __LINE__;
|
2059
|
+
return FALSE;
|
2060
|
+
}
|
2061
|
+
|
2062
|
+
/*
|
2063
|
+
* for fingerprint like 'nc', only comments of /x are treated
|
2064
|
+
* as SQL... ending comments of "--" and "#" are not sqli
|
2065
|
+
*/
|
2066
|
+
if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
|
2067
|
+
sql_state->tokenvec[1].type == TYPE_COMMENT &&
|
2068
|
+
sql_state->tokenvec[1].val[0] != '/') {
|
2069
|
+
sql_state->reason = __LINE__;
|
2070
|
+
return FALSE;
|
2071
|
+
}
|
2072
|
+
|
2073
|
+
/*
|
2074
|
+
* if '1c' ends with '/x' then it's sqli
|
2075
|
+
*/
|
2076
|
+
if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
|
2077
|
+
sql_state->tokenvec[1].type == TYPE_COMMENT &&
|
2078
|
+
sql_state->tokenvec[1].val[0] == '/') {
|
2079
|
+
return TRUE;
|
2080
|
+
}
|
2081
|
+
|
2082
|
+
/**
|
2083
|
+
* there are some odd base64-looking query string values
|
2084
|
+
* 1234-ABCDEFEhfhihwuefi--
|
2085
|
+
* which evaluate to "1c"... these are not SQLi
|
2086
|
+
* but 1234-- probably is.
|
2087
|
+
* Make sure the "1" in "1c" is actually a true decimal number
|
2088
|
+
*
|
2089
|
+
* Need to check -original- string since the folding step
|
2090
|
+
* may have merged tokens, e.g. "1+FOO" is folded into "1"
|
2091
|
+
*
|
2092
|
+
* Note: evasion: 1*1--
|
2093
|
+
*/
|
2094
|
+
if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
|
2095
|
+
sql_state->tokenvec[1].type == TYPE_COMMENT) {
|
2096
|
+
if (sql_state->stats_tokens > 2) {
|
2097
|
+
/* we have some folding going on, highly likely sqli */
|
2098
|
+
sql_state->reason = __LINE__;
|
2099
|
+
return TRUE;
|
2100
|
+
}
|
2101
|
+
/*
|
2102
|
+
* we check that next character after the number is either whitespace,
|
2103
|
+
* or '/' or a '-' ==> sqli.
|
2104
|
+
*/
|
2105
|
+
ch = sql_state->s[sql_state->tokenvec[0].len];
|
2106
|
+
if ( ch <= 32 ) {
|
2107
|
+
/* next char was whitespace,e.g. "1234 --"
|
2108
|
+
* this isn't exactly correct.. ideally we should skip over all whitespace
|
2109
|
+
* but this seems to be ok for now
|
2110
|
+
*/
|
2111
|
+
return TRUE;
|
2112
|
+
}
|
2113
|
+
if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
|
2114
|
+
return TRUE;
|
2115
|
+
}
|
2116
|
+
if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
|
2117
|
+
return TRUE;
|
2118
|
+
}
|
2119
|
+
|
2120
|
+
sql_state->reason = __LINE__;
|
2121
|
+
return FALSE;
|
2122
|
+
}
|
2123
|
+
|
2124
|
+
/*
|
2125
|
+
* detect obvious sqli scans.. many people put '--' in plain text
|
2126
|
+
* so only detect if input ends with '--', e.g. 1-- but not 1-- foo
|
2127
|
+
*/
|
2128
|
+
if ((sql_state->tokenvec[1].len > 2)
|
2129
|
+
&& sql_state->tokenvec[1].val[0] == '-') {
|
2130
|
+
sql_state->reason = __LINE__;
|
2131
|
+
return FALSE;
|
2132
|
+
}
|
2133
|
+
|
2134
|
+
break;
|
2135
|
+
} /* case 2 */
|
2136
|
+
case 3:{
|
2137
|
+
/*
|
2138
|
+
* ...foo' + 'bar...
|
2139
|
+
* no opening quote, no closing quote
|
2140
|
+
* and each string has data
|
2141
|
+
*/
|
2142
|
+
|
2143
|
+
if (streq(sql_state->fingerprint, "sos")
|
2144
|
+
|| streq(sql_state->fingerprint, "s&s")) {
|
2145
|
+
|
2146
|
+
if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
|
2147
|
+
&& (sql_state->tokenvec[2].str_close == CHAR_NULL)
|
2148
|
+
&& (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
|
2149
|
+
/*
|
2150
|
+
* if ....foo" + "bar....
|
2151
|
+
*/
|
2152
|
+
sql_state->reason = __LINE__;
|
2153
|
+
return TRUE;
|
2154
|
+
}
|
2155
|
+
if (sql_state->stats_tokens == 3) {
|
2156
|
+
sql_state->reason = __LINE__;
|
2157
|
+
return FALSE;
|
2158
|
+
}
|
2159
|
+
|
2160
|
+
/*
|
2161
|
+
* not sqli
|
2162
|
+
*/
|
2163
|
+
sql_state->reason = __LINE__;
|
2164
|
+
return FALSE;
|
2165
|
+
} else if (streq(sql_state->fingerprint, "s&n") ||
|
2166
|
+
streq(sql_state->fingerprint, "n&1") ||
|
2167
|
+
streq(sql_state->fingerprint, "1&1") ||
|
2168
|
+
streq(sql_state->fingerprint, "1&v") ||
|
2169
|
+
streq(sql_state->fingerprint, "1&s")) {
|
2170
|
+
/* 'sexy and 17' not sqli
|
2171
|
+
* 'sexy and 17<18' sqli
|
2172
|
+
*/
|
2173
|
+
if (sql_state->stats_tokens == 3) {
|
2174
|
+
sql_state->reason = __LINE__;
|
2175
|
+
return FALSE;
|
2176
|
+
}
|
2177
|
+
} else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
|
2178
|
+
if ((sql_state->tokenvec[1].len < 5) ||
|
2179
|
+
cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
|
2180
|
+
/* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
|
2181
|
+
* then treat as safe
|
2182
|
+
*/
|
2183
|
+
sql_state->reason = __LINE__;
|
2184
|
+
return FALSE;
|
2185
|
+
}
|
2186
|
+
}
|
2187
|
+
break;
|
2188
|
+
} /* case 3 */
|
2189
|
+
case 4:
|
2190
|
+
case 5: {
|
2191
|
+
/* nothing right now */
|
2192
|
+
break;
|
2193
|
+
} /* case 5 */
|
2194
|
+
} /* end switch */
|
2195
|
+
|
2196
|
+
return TRUE;
|
2197
|
+
}
|
2198
|
+
|
2199
|
+
/** Main API, detects SQLi in an input.
|
2200
|
+
*
|
2201
|
+
*
|
2202
|
+
*/
|
2203
|
+
static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
|
2204
|
+
{
|
2205
|
+
return sql_state->stats_comment_ddx ||
|
2206
|
+
sql_state->stats_comment_hash;
|
2207
|
+
}
|
2208
|
+
|
2209
|
+
/*
|
2210
|
+
* This function is mostly use with SWIG
|
2211
|
+
*/
|
2212
|
+
struct libinjection_sqli_token*
|
2213
|
+
libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
|
2214
|
+
{
|
2215
|
+
if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
|
2216
|
+
return NULL;
|
2217
|
+
}
|
2218
|
+
return &(sql_state->tokenvec[i]);
|
2219
|
+
}
|
2220
|
+
|
2221
|
+
int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
|
2222
|
+
{
|
2223
|
+
const char *s = sql_state->s;
|
2224
|
+
size_t slen = sql_state->slen;
|
2225
|
+
|
2226
|
+
/*
|
2227
|
+
* no input? not sqli
|
2228
|
+
*/
|
2229
|
+
if (slen == 0) {
|
2230
|
+
return FALSE;
|
2231
|
+
}
|
2232
|
+
|
2233
|
+
/*
|
2234
|
+
* test input "as-is"
|
2235
|
+
*/
|
2236
|
+
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
|
2237
|
+
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
|
2238
|
+
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
|
2239
|
+
return TRUE;
|
2240
|
+
} else if (reparse_as_mysql(sql_state)) {
|
2241
|
+
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
|
2242
|
+
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
|
2243
|
+
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
|
2244
|
+
return TRUE;
|
2245
|
+
}
|
2246
|
+
}
|
2247
|
+
|
2248
|
+
/*
|
2249
|
+
* if input has a single_quote, then
|
2250
|
+
* test as if input was actually '
|
2251
|
+
* example: if input if "1' = 1", then pretend it's
|
2252
|
+
* "'1' = 1"
|
2253
|
+
* Porting Notes: example the same as doing
|
2254
|
+
* is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
|
2255
|
+
*
|
2256
|
+
*/
|
2257
|
+
if (memchr(s, CHAR_SINGLE, slen)) {
|
2258
|
+
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
|
2259
|
+
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
|
2260
|
+
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
|
2261
|
+
return TRUE;
|
2262
|
+
} else if (reparse_as_mysql(sql_state)) {
|
2263
|
+
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
|
2264
|
+
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
|
2265
|
+
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
|
2266
|
+
return TRUE;
|
2267
|
+
}
|
2268
|
+
}
|
2269
|
+
}
|
2270
|
+
|
2271
|
+
/*
|
2272
|
+
* same as above but with a double-quote "
|
2273
|
+
*/
|
2274
|
+
if (memchr(s, CHAR_DOUBLE, slen)) {
|
2275
|
+
libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
|
2276
|
+
if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
|
2277
|
+
sql_state->fingerprint, strlen(sql_state->fingerprint))) {
|
2278
|
+
return TRUE;
|
2279
|
+
}
|
2280
|
+
}
|
2281
|
+
|
2282
|
+
/*
|
2283
|
+
* Hurray, input is not SQLi
|
2284
|
+
*/
|
2285
|
+
return FALSE;
|
2286
|
+
}
|
2287
|
+
|
2288
|
+
int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
|
2289
|
+
{
|
2290
|
+
int issqli;
|
2291
|
+
struct libinjection_sqli_state state;
|
2292
|
+
|
2293
|
+
libinjection_sqli_init(&state, input, slen, 0);
|
2294
|
+
issqli = libinjection_is_sqli(&state);
|
2295
|
+
if (issqli) {
|
2296
|
+
strcpy(fingerprint, state.fingerprint);
|
2297
|
+
} else {
|
2298
|
+
fingerprint[0] = '\0';
|
2299
|
+
}
|
2300
|
+
return issqli;
|
2301
|
+
}
|