immunio 0.15.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +234 -0
  3. data/README.md +147 -0
  4. data/bin/immunio +5 -0
  5. data/lib/immunio.rb +29 -0
  6. data/lib/immunio/agent.rb +260 -0
  7. data/lib/immunio/authentication.rb +96 -0
  8. data/lib/immunio/blocked_app.rb +38 -0
  9. data/lib/immunio/channel.rb +432 -0
  10. data/lib/immunio/cli.rb +39 -0
  11. data/lib/immunio/context.rb +114 -0
  12. data/lib/immunio/errors.rb +43 -0
  13. data/lib/immunio/immunio_ca.crt +45 -0
  14. data/lib/immunio/logger.rb +87 -0
  15. data/lib/immunio/plugins/action_dispatch.rb +45 -0
  16. data/lib/immunio/plugins/action_view.rb +431 -0
  17. data/lib/immunio/plugins/active_record.rb +707 -0
  18. data/lib/immunio/plugins/active_record_relation.rb +370 -0
  19. data/lib/immunio/plugins/authlogic.rb +80 -0
  20. data/lib/immunio/plugins/csrf.rb +24 -0
  21. data/lib/immunio/plugins/devise.rb +40 -0
  22. data/lib/immunio/plugins/environment_reporter.rb +69 -0
  23. data/lib/immunio/plugins/eval.rb +51 -0
  24. data/lib/immunio/plugins/exception_handler.rb +55 -0
  25. data/lib/immunio/plugins/gems_tracker.rb +5 -0
  26. data/lib/immunio/plugins/haml.rb +36 -0
  27. data/lib/immunio/plugins/http_finisher.rb +50 -0
  28. data/lib/immunio/plugins/http_tracker.rb +203 -0
  29. data/lib/immunio/plugins/io.rb +96 -0
  30. data/lib/immunio/plugins/redirect.rb +42 -0
  31. data/lib/immunio/plugins/warden.rb +66 -0
  32. data/lib/immunio/processor.rb +234 -0
  33. data/lib/immunio/rails.rb +26 -0
  34. data/lib/immunio/request.rb +139 -0
  35. data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
  36. data/lib/immunio/rufus_lua_ext/state.rb +157 -0
  37. data/lib/immunio/rufus_lua_ext/table.rb +137 -0
  38. data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
  39. data/lib/immunio/version.rb +5 -0
  40. data/lib/immunio/vm.rb +291 -0
  41. data/lua-hooks/ext/all.c +78 -0
  42. data/lua-hooks/ext/bitop/README +22 -0
  43. data/lua-hooks/ext/bitop/bit.c +189 -0
  44. data/lua-hooks/ext/extconf.rb +38 -0
  45. data/lua-hooks/ext/libinjection/COPYING +37 -0
  46. data/lua-hooks/ext/libinjection/libinjection.h +65 -0
  47. data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
  48. data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
  49. data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
  50. data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
  51. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
  52. data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
  53. data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
  54. data/lua-hooks/ext/libinjection/lualib.c +109 -0
  55. data/lua-hooks/ext/lpeg/HISTORY +90 -0
  56. data/lua-hooks/ext/lpeg/lpcap.c +537 -0
  57. data/lua-hooks/ext/lpeg/lpcap.h +43 -0
  58. data/lua-hooks/ext/lpeg/lpcode.c +986 -0
  59. data/lua-hooks/ext/lpeg/lpcode.h +34 -0
  60. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  61. data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
  62. data/lua-hooks/ext/lpeg/lpprint.c +244 -0
  63. data/lua-hooks/ext/lpeg/lpprint.h +35 -0
  64. data/lua-hooks/ext/lpeg/lptree.c +1238 -0
  65. data/lua-hooks/ext/lpeg/lptree.h +77 -0
  66. data/lua-hooks/ext/lpeg/lptypes.h +149 -0
  67. data/lua-hooks/ext/lpeg/lpvm.c +355 -0
  68. data/lua-hooks/ext/lpeg/lpvm.h +58 -0
  69. data/lua-hooks/ext/lpeg/makefile +55 -0
  70. data/lua-hooks/ext/lpeg/re.html +498 -0
  71. data/lua-hooks/ext/lpeg/test.lua +1409 -0
  72. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
  73. data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
  74. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
  75. data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
  76. data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
  77. data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
  78. data/lua-hooks/ext/lua-snapshot/README.md +18 -0
  79. data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
  80. data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
  81. data/lua-hooks/ext/lua/COPYRIGHT +34 -0
  82. data/lua-hooks/ext/lua/lapi.c +1087 -0
  83. data/lua-hooks/ext/lua/lapi.h +16 -0
  84. data/lua-hooks/ext/lua/lauxlib.c +652 -0
  85. data/lua-hooks/ext/lua/lauxlib.h +174 -0
  86. data/lua-hooks/ext/lua/lbaselib.c +659 -0
  87. data/lua-hooks/ext/lua/lcode.c +831 -0
  88. data/lua-hooks/ext/lua/lcode.h +76 -0
  89. data/lua-hooks/ext/lua/ldblib.c +398 -0
  90. data/lua-hooks/ext/lua/ldebug.c +638 -0
  91. data/lua-hooks/ext/lua/ldebug.h +33 -0
  92. data/lua-hooks/ext/lua/ldo.c +519 -0
  93. data/lua-hooks/ext/lua/ldo.h +57 -0
  94. data/lua-hooks/ext/lua/ldump.c +164 -0
  95. data/lua-hooks/ext/lua/lfunc.c +174 -0
  96. data/lua-hooks/ext/lua/lfunc.h +34 -0
  97. data/lua-hooks/ext/lua/lgc.c +710 -0
  98. data/lua-hooks/ext/lua/lgc.h +110 -0
  99. data/lua-hooks/ext/lua/linit.c +38 -0
  100. data/lua-hooks/ext/lua/liolib.c +556 -0
  101. data/lua-hooks/ext/lua/llex.c +463 -0
  102. data/lua-hooks/ext/lua/llex.h +81 -0
  103. data/lua-hooks/ext/lua/llimits.h +128 -0
  104. data/lua-hooks/ext/lua/lmathlib.c +263 -0
  105. data/lua-hooks/ext/lua/lmem.c +86 -0
  106. data/lua-hooks/ext/lua/lmem.h +49 -0
  107. data/lua-hooks/ext/lua/loadlib.c +705 -0
  108. data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
  109. data/lua-hooks/ext/lua/lobject.c +214 -0
  110. data/lua-hooks/ext/lua/lobject.h +381 -0
  111. data/lua-hooks/ext/lua/lopcodes.c +102 -0
  112. data/lua-hooks/ext/lua/lopcodes.h +268 -0
  113. data/lua-hooks/ext/lua/loslib.c +243 -0
  114. data/lua-hooks/ext/lua/lparser.c +1339 -0
  115. data/lua-hooks/ext/lua/lparser.h +82 -0
  116. data/lua-hooks/ext/lua/lstate.c +214 -0
  117. data/lua-hooks/ext/lua/lstate.h +169 -0
  118. data/lua-hooks/ext/lua/lstring.c +111 -0
  119. data/lua-hooks/ext/lua/lstring.h +31 -0
  120. data/lua-hooks/ext/lua/lstrlib.c +871 -0
  121. data/lua-hooks/ext/lua/ltable.c +588 -0
  122. data/lua-hooks/ext/lua/ltable.h +40 -0
  123. data/lua-hooks/ext/lua/ltablib.c +287 -0
  124. data/lua-hooks/ext/lua/ltm.c +75 -0
  125. data/lua-hooks/ext/lua/ltm.h +54 -0
  126. data/lua-hooks/ext/lua/lua.c +392 -0
  127. data/lua-hooks/ext/lua/lua.def +131 -0
  128. data/lua-hooks/ext/lua/lua.h +388 -0
  129. data/lua-hooks/ext/lua/lua.rc +28 -0
  130. data/lua-hooks/ext/lua/lua_dll.rc +26 -0
  131. data/lua-hooks/ext/lua/luac.c +200 -0
  132. data/lua-hooks/ext/lua/luac.rc +1 -0
  133. data/lua-hooks/ext/lua/luaconf.h +763 -0
  134. data/lua-hooks/ext/lua/luaconf.h.in +724 -0
  135. data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
  136. data/lua-hooks/ext/lua/lualib.h +53 -0
  137. data/lua-hooks/ext/lua/lundump.c +227 -0
  138. data/lua-hooks/ext/lua/lundump.h +36 -0
  139. data/lua-hooks/ext/lua/lvm.c +767 -0
  140. data/lua-hooks/ext/lua/lvm.h +36 -0
  141. data/lua-hooks/ext/lua/lzio.c +82 -0
  142. data/lua-hooks/ext/lua/lzio.h +67 -0
  143. data/lua-hooks/ext/lua/print.c +227 -0
  144. data/lua-hooks/ext/luautf8/README.md +152 -0
  145. data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
  146. data/lua-hooks/ext/luautf8/unidata.h +3064 -0
  147. data/lua-hooks/lib/boot.lua +254 -0
  148. data/lua-hooks/lib/encode.lua +4 -0
  149. data/lua-hooks/lib/lexers/LICENSE +21 -0
  150. data/lua-hooks/lib/lexers/bash.lua +134 -0
  151. data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
  152. data/lua-hooks/lib/lexers/css.lua +216 -0
  153. data/lua-hooks/lib/lexers/html.lua +106 -0
  154. data/lua-hooks/lib/lexers/javascript.lua +68 -0
  155. data/lua-hooks/lib/lexers/lexer.lua +1575 -0
  156. data/lua-hooks/lib/lexers/markers.lua +33 -0
  157. metadata +308 -0
@@ -0,0 +1,54 @@
1
+ #ifndef LIBINJECTION_HTML5
2
+ #define LIBINJECTION_HTML5
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ /* pull in size_t */
9
+
10
+ #include <stddef.h>
11
+
12
+ enum html5_type {
13
+ DATA_TEXT
14
+ , TAG_NAME_OPEN
15
+ , TAG_NAME_CLOSE
16
+ , TAG_NAME_SELFCLOSE
17
+ , TAG_DATA
18
+ , TAG_CLOSE
19
+ , ATTR_NAME
20
+ , ATTR_VALUE
21
+ , TAG_COMMENT
22
+ , DOCTYPE
23
+ };
24
+
25
+ enum html5_flags {
26
+ DATA_STATE
27
+ , VALUE_NO_QUOTE
28
+ , VALUE_SINGLE_QUOTE
29
+ , VALUE_DOUBLE_QUOTE
30
+ , VALUE_BACK_QUOTE
31
+ };
32
+
33
+ struct h5_state;
34
+ typedef int (*ptr_html5_state)(struct h5_state*);
35
+
36
+ typedef struct h5_state {
37
+ const char* s;
38
+ size_t len;
39
+ size_t pos;
40
+ int is_close;
41
+ ptr_html5_state state;
42
+ const char* token_start;
43
+ size_t token_len;
44
+ enum html5_type token_type;
45
+ } h5_state_t;
46
+
47
+
48
+ void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags);
49
+ int libinjection_h5_next(h5_state_t* hs);
50
+
51
+ #ifdef __cplusplus
52
+ }
53
+ #endif
54
+ #endif
@@ -0,0 +1,2301 @@
1
+ /**
2
+ * Copyright 2012,2013 Nick Galbreath
3
+ * nickg@client9.com
4
+ * BSD License -- see COPYING.txt for details
5
+ *
6
+ * https://libinjection.client9.com/
7
+ *
8
+ */
9
+
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <stdio.h>
13
+ #include <ctype.h>
14
+ #include <assert.h>
15
+ #include <stddef.h>
16
+
17
+ #include "libinjection.h"
18
+ #include "libinjection_sqli.h"
19
+ #include "libinjection_sqli_data.h"
20
+
21
+ #define LIBINJECTION_VERSION "3.9.1"
22
+
23
+ #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
24
+ #define LIBINJECTION_SQLI_MAX_TOKENS 5
25
+
26
+ #ifndef TRUE
27
+ #define TRUE 1
28
+ #endif
29
+ #ifndef FALSE
30
+ #define FALSE 0
31
+ #endif
32
+
33
+ #define CHAR_NULL '\0'
34
+ #define CHAR_SINGLE '\''
35
+ #define CHAR_DOUBLE '"'
36
+ #define CHAR_TICK '`'
37
+
38
+ /* faster than calling out to libc isdigit */
39
+ #ifdef ISDIGIT
40
+ #undef ISDIGIT
41
+ #endif
42
+ #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
43
+
44
+ #if 0
45
+ #define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
46
+ #else
47
+ #define FOLD_DEBUG
48
+ #endif
49
+
50
+ /*
51
+ * not making public just yet
52
+ */
53
+ typedef enum {
54
+ TYPE_NONE = 0
55
+ , TYPE_KEYWORD = (int)'k'
56
+ , TYPE_UNION = (int)'U'
57
+ , TYPE_GROUP = (int)'B'
58
+ , TYPE_EXPRESSION = (int)'E'
59
+ , TYPE_SQLTYPE = (int)'t'
60
+ , TYPE_FUNCTION = (int)'f'
61
+ , TYPE_BAREWORD = (int)'n'
62
+ , TYPE_NUMBER = (int)'1'
63
+ , TYPE_VARIABLE = (int)'v'
64
+ , TYPE_STRING = (int)'s'
65
+ , TYPE_OPERATOR = (int)'o'
66
+ , TYPE_LOGIC_OPERATOR = (int)'&'
67
+ , TYPE_COMMENT = (int)'c'
68
+ , TYPE_COLLATE = (int)'A'
69
+ , TYPE_LEFTPARENS = (int)'('
70
+ , TYPE_RIGHTPARENS = (int)')' /* not used? */
71
+ , TYPE_LEFTBRACE = (int)'{'
72
+ , TYPE_RIGHTBRACE = (int)'}'
73
+ , TYPE_DOT = (int)'.'
74
+ , TYPE_COMMA = (int)','
75
+ , TYPE_COLON = (int)':'
76
+ , TYPE_SEMICOLON = (int)';'
77
+ , TYPE_TSQL = (int)'T' /* TSQL start */
78
+ , TYPE_UNKNOWN = (int)'?'
79
+ , TYPE_EVIL = (int)'X' /* unparsable, abort */
80
+ , TYPE_FINGERPRINT = (int)'F' /* not really a token */
81
+ , TYPE_BACKSLASH = (int)'\\'
82
+ } sqli_token_types;
83
+
84
+ /**
85
+ * Initializes parsing state
86
+ *
87
+ */
88
+ static char flag2delim(int flag)
89
+ {
90
+ if (flag & FLAG_QUOTE_SINGLE) {
91
+ return CHAR_SINGLE;
92
+ } else if (flag & FLAG_QUOTE_DOUBLE) {
93
+ return CHAR_DOUBLE;
94
+ } else {
95
+ return CHAR_NULL;
96
+ }
97
+ }
98
+
99
+ /* memchr2 finds a string of 2 characters inside another string
100
+ * This a specialized version of "memmem" or "memchr".
101
+ * 'memmem' doesn't exist on all platforms
102
+ *
103
+ * Porting notes: this is just a special version of
104
+ * astring.find("AB")
105
+ *
106
+ */
107
+ static const char *
108
+ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
109
+ {
110
+ const char *cur = haystack;
111
+ const char *last = haystack + haystack_len - 1;
112
+
113
+ if (haystack_len < 2) {
114
+ return NULL;
115
+ }
116
+
117
+ while (cur < last) {
118
+ /* safe since cur < len - 1 always */
119
+ if (cur[0] == c0 && cur[1] == c1) {
120
+ return cur;
121
+ }
122
+ cur += 1;
123
+ }
124
+
125
+ return NULL;
126
+ }
127
+
128
+ /**
129
+ * memmem might not exist on some systems
130
+ */
131
+ static const char *
132
+ my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
133
+ {
134
+ const char* cur;
135
+ const char* last;
136
+ assert(haystack);
137
+ assert(needle);
138
+ assert(nlen > 1);
139
+ last = haystack + hlen - nlen;
140
+ for (cur = haystack; cur <= last; ++cur) {
141
+ if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
142
+ return cur;
143
+ }
144
+ }
145
+ return NULL;
146
+ }
147
+
148
+ /** Find largest string containing certain characters.
149
+ *
150
+ * C Standard library 'strspn' only works for 'c-strings' (null terminated)
151
+ * This works on arbitrary length.
152
+ *
153
+ * Performance notes:
154
+ * not critical
155
+ *
156
+ * Porting notes:
157
+ * if accept is 'ABC', then this function would be similar to
158
+ * a_regexp.match(a_str, '[ABC]*'),
159
+ */
160
+ static size_t
161
+ strlenspn(const char *s, size_t len, const char *accept)
162
+ {
163
+ size_t i;
164
+ for (i = 0; i < len; ++i) {
165
+ /* likely we can do better by inlining this function
166
+ * but this works for now
167
+ */
168
+ if (strchr(accept, s[i]) == NULL) {
169
+ return i;
170
+ }
171
+ }
172
+ return len;
173
+ }
174
+
175
+ static size_t
176
+ strlencspn(const char *s, size_t len, const char *accept)
177
+ {
178
+ size_t i;
179
+ for (i = 0; i < len; ++i) {
180
+ /* likely we can do better by inlining this function
181
+ * but this works for now
182
+ */
183
+ if (strchr(accept, s[i]) != NULL) {
184
+ return i;
185
+ }
186
+ }
187
+ return len;
188
+ }
189
+ static int char_is_white(char ch) {
190
+ /* ' ' space is 0x32
191
+ '\t 0x09 \011 horizontal tab
192
+ '\n' 0x0a \012 new line
193
+ '\v' 0x0b \013 verical tab
194
+ '\f' 0x0c \014 new page
195
+ '\r' 0x0d \015 carriage return
196
+ 0x00 \000 null (oracle)
197
+ 0xa0 \240 is latin1
198
+ */
199
+ return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
200
+ }
201
+
202
+ /* DANGER DANGER
203
+ * This is -very specialized function-
204
+ *
205
+ * this compares a ALL_UPPER CASE C STRING
206
+ * with a *arbitrary memory* + length
207
+ *
208
+ * Sane people would just make a copy, up-case
209
+ * and use a hash table.
210
+ *
211
+ * Required since libc version uses the current locale
212
+ * and is much slower.
213
+ */
214
+ static int cstrcasecmp(const char *a, const char *b, size_t n)
215
+ {
216
+ char cb;
217
+
218
+ for (; n > 0; a++, b++, n--) {
219
+ cb = *b;
220
+ if (cb >= 'a' && cb <= 'z') {
221
+ cb -= 0x20;
222
+ }
223
+ if (*a != cb) {
224
+ return *a - cb;
225
+ } else if (*a == '\0') {
226
+ return -1;
227
+ }
228
+ }
229
+
230
+ return (*a == 0) ? 0 : 1;
231
+ }
232
+
233
+ /**
234
+ * Case sensitive string compare.
235
+ * Here only to make code more readable
236
+ */
237
+ static int streq(const char *a, const char *b)
238
+ {
239
+ return strcmp(a, b) == 0;
240
+ }
241
+
242
+ /**
243
+ *
244
+ *
245
+ *
246
+ * Porting Notes:
247
+ * given a mapping/hash of string to char
248
+ * this is just
249
+ * typecode = mapping[key.upper()]
250
+ */
251
+
252
+ static char bsearch_keyword_type(const char *key, size_t len,
253
+ const keyword_t * keywords, size_t numb)
254
+ {
255
+ size_t pos;
256
+ size_t left = 0;
257
+ size_t right = numb - 1;
258
+
259
+ while (left < right) {
260
+ pos = (left + right) >> 1;
261
+
262
+ /* arg0 = upper case only, arg1 = mixed case */
263
+ if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
264
+ left = pos + 1;
265
+ } else {
266
+ right = pos;
267
+ }
268
+ }
269
+ if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
270
+ return keywords[left].type;
271
+ } else {
272
+ return CHAR_NULL;
273
+ }
274
+ }
275
+
276
+ static char is_keyword(const char* key, size_t len)
277
+ {
278
+ return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
279
+ }
280
+
281
+ /* st_token methods
282
+ *
283
+ * The following functions manipulates the stoken_t type
284
+ *
285
+ *
286
+ */
287
+
288
+ static void st_clear(stoken_t * st)
289
+ {
290
+ memset(st, 0, sizeof(stoken_t));
291
+ }
292
+
293
+ static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
294
+ const char value)
295
+ {
296
+ /* done to elimiate unused warning */
297
+ (void)len;
298
+ st->type = (char) stype;
299
+ st->pos = pos;
300
+ st->len = 1;
301
+ st->val[0] = value;
302
+ st->val[1] = CHAR_NULL;
303
+ }
304
+
305
+ static void st_assign(stoken_t * st, const char stype,
306
+ size_t pos, size_t len, const char* value)
307
+ {
308
+ const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
309
+ size_t last = len < MSIZE ? len : (MSIZE - 1);
310
+ st->type = (char) stype;
311
+ st->pos = pos;
312
+ st->len = last;
313
+ memcpy(st->val, value, last);
314
+ st->val[last] = CHAR_NULL;
315
+ }
316
+
317
+ static void st_copy(stoken_t * dest, const stoken_t * src)
318
+ {
319
+ memcpy(dest, src, sizeof(stoken_t));
320
+ }
321
+
322
+ static int st_is_arithmetic_op(const stoken_t* st)
323
+ {
324
+ const char ch = st->val[0];
325
+ return (st->type == TYPE_OPERATOR && st->len == 1 &&
326
+ (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
327
+ }
328
+
329
+ static int st_is_unary_op(const stoken_t * st)
330
+ {
331
+ const char* str = st->val;
332
+ const size_t len = st->len;
333
+
334
+ if (st->type != TYPE_OPERATOR) {
335
+ return FALSE;
336
+ }
337
+
338
+ switch (len) {
339
+ case 1:
340
+ return *str == '+' || *str == '-' || *str == '!' || *str == '~';
341
+ case 2:
342
+ return str[0] == '!' && str[1] == '!';
343
+ case 3:
344
+ return cstrcasecmp("NOT", str, 3) == 0;
345
+ default:
346
+ return FALSE;
347
+ }
348
+ }
349
+
350
+ /* Parsers
351
+ *
352
+ *
353
+ */
354
+
355
+ static size_t parse_white(struct libinjection_sqli_state * sf)
356
+ {
357
+ return sf->pos + 1;
358
+ }
359
+
360
+ static size_t parse_operator1(struct libinjection_sqli_state * sf)
361
+ {
362
+ const char *cs = sf->s;
363
+ size_t pos = sf->pos;
364
+
365
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
366
+ return pos + 1;
367
+ }
368
+
369
+ static size_t parse_other(struct libinjection_sqli_state * sf)
370
+ {
371
+ const char *cs = sf->s;
372
+ size_t pos = sf->pos;
373
+
374
+ st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
375
+ return pos + 1;
376
+ }
377
+
378
+ static size_t parse_char(struct libinjection_sqli_state * sf)
379
+ {
380
+ const char *cs = sf->s;
381
+ size_t pos = sf->pos;
382
+
383
+ st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
384
+ return pos + 1;
385
+ }
386
+
387
+ static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
388
+ {
389
+ const char *cs = sf->s;
390
+ const size_t slen = sf->slen;
391
+ size_t pos = sf->pos;
392
+
393
+ const char *endpos =
394
+ (const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
395
+ if (endpos == NULL) {
396
+ st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
397
+ return slen;
398
+ } else {
399
+ st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
400
+ return (size_t)((endpos - cs) + 1);
401
+ }
402
+ }
403
+
404
+ /** In Ansi mode, hash is an operator
405
+ * In MYSQL mode, it's a EOL comment like '--'
406
+ */
407
+ static size_t parse_hash(struct libinjection_sqli_state * sf)
408
+ {
409
+ sf->stats_comment_hash += 1;
410
+ if (sf->flags & FLAG_SQL_MYSQL) {
411
+ sf->stats_comment_hash += 1;
412
+ return parse_eol_comment(sf);
413
+ } else {
414
+ st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
415
+ return sf->pos + 1;
416
+ }
417
+ }
418
+
419
+ static size_t parse_dash(struct libinjection_sqli_state * sf)
420
+ {
421
+ const char *cs = sf->s;
422
+ const size_t slen = sf->slen;
423
+ size_t pos = sf->pos;
424
+
425
+ /*
426
+ * five cases
427
+ * 1) --[white] this is always a SQL comment
428
+ * 2) --[EOF] this is a comment
429
+ * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
430
+ * 4) --[notwhite] everyone else thinks this is a comment
431
+ * 5) -[not dash] '-' is a unary operator
432
+ */
433
+
434
+ if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
435
+ return parse_eol_comment(sf);
436
+ } else if (pos +2 == slen && cs[pos + 1] == '-') {
437
+ return parse_eol_comment(sf);
438
+ } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
439
+ /* --[not-white] not-white case:
440
+ *
441
+ */
442
+ sf->stats_comment_ddx += 1;
443
+ return parse_eol_comment(sf);
444
+ } else {
445
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
446
+ return pos + 1;
447
+ }
448
+ }
449
+
450
+
451
+ /** This detects MySQL comments, comments that
452
+ * start with /x! We just ban these now but
453
+ * previously we attempted to parse the inside
454
+ *
455
+ * For reference:
456
+ * the form of /x![anything]x/ or /x!12345[anything] x/
457
+ *
458
+ * Mysql 3 (maybe 4), allowed this:
459
+ * /x!0selectx/ 1;
460
+ * where 0 could be any number.
461
+ *
462
+ * The last version of MySQL 3 was in 2003.
463
+
464
+ * It is unclear if the MySQL 3 syntax was allowed
465
+ * in MySQL 4. The last version of MySQL 4 was in 2008
466
+ *
467
+ */
468
+ static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
469
+ {
470
+ /* so far...
471
+ * cs[pos] == '/' && cs[pos+1] == '*'
472
+ */
473
+
474
+ if (pos + 2 >= len) {
475
+ /* not a mysql comment */
476
+ return 0;
477
+ }
478
+
479
+ if (cs[pos + 2] != '!') {
480
+ /* not a mysql comment */
481
+ return 0;
482
+ }
483
+
484
+ /*
485
+ * this is a mysql comment
486
+ * got "/x!"
487
+ */
488
+ return 1;
489
+ }
490
+
491
+ static size_t parse_slash(struct libinjection_sqli_state * sf)
492
+ {
493
+ const char* ptr;
494
+ size_t clen;
495
+ const char *cs = sf->s;
496
+ const size_t slen = sf->slen;
497
+ size_t pos = sf->pos;
498
+ const char* cur = cs + pos;
499
+ char ctype = TYPE_COMMENT;
500
+ size_t pos1 = pos + 1;
501
+ if (pos1 == slen || cs[pos1] != '*') {
502
+ return parse_operator1(sf);
503
+ }
504
+
505
+ /*
506
+ * skip over initial '/x'
507
+ */
508
+ ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
509
+
510
+ /*
511
+ * (ptr == NULL) causes false positive in cppcheck 1.61
512
+ * casting to type seems to fix it
513
+ */
514
+ if (ptr == (const char*) NULL) {
515
+ /* till end of line */
516
+ clen = slen - pos;
517
+ } else {
518
+ clen = (size_t)(ptr + 2 - cur);
519
+ }
520
+
521
+ /*
522
+ * postgresql allows nested comments which makes
523
+ * this is incompatible with parsing so
524
+ * if we find a '/x' inside the coment, then
525
+ * make a new token.
526
+ *
527
+ * Also, Mysql's "conditional" comments for version
528
+ * are an automatic black ban!
529
+ */
530
+
531
+ if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
532
+ ctype = TYPE_EVIL;
533
+ } else if (is_mysql_comment(cs, slen, pos)) {
534
+ ctype = TYPE_EVIL;
535
+ }
536
+
537
+ st_assign(sf->current, ctype, pos, clen, cs + pos);
538
+ return pos + clen;
539
+ }
540
+
541
+
542
+ static size_t parse_backslash(struct libinjection_sqli_state * sf)
543
+ {
544
+ const char *cs = sf->s;
545
+ const size_t slen = sf->slen;
546
+ size_t pos = sf->pos;
547
+
548
+ /*
549
+ * Weird MySQL alias for NULL, "\N" (capital N only)
550
+ */
551
+ if (pos + 1 < slen && cs[pos +1] == 'N') {
552
+ st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
553
+ return pos + 2;
554
+ } else {
555
+ st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
556
+ return pos + 1;
557
+ }
558
+ }
559
+
560
+ static size_t parse_operator2(struct libinjection_sqli_state * sf)
561
+ {
562
+ char ch;
563
+ const char *cs = sf->s;
564
+ const size_t slen = sf->slen;
565
+ size_t pos = sf->pos;
566
+
567
+ if (pos + 1 >= slen) {
568
+ return parse_operator1(sf);
569
+ }
570
+
571
+ if (pos + 2 < slen &&
572
+ cs[pos] == '<' &&
573
+ cs[pos + 1] == '=' &&
574
+ cs[pos + 2] == '>') {
575
+ /*
576
+ * special 3-char operator
577
+ */
578
+ st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
579
+ return pos + 3;
580
+ }
581
+
582
+ ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
583
+ if (ch != CHAR_NULL) {
584
+ st_assign(sf->current, ch, pos, 2, cs+pos);
585
+ return pos + 2;
586
+ }
587
+
588
+ /*
589
+ * not an operator.. what to do with the two
590
+ * characters we got?
591
+ */
592
+
593
+ if (cs[pos] == ':') {
594
+ /* ':' is not an operator */
595
+ st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
596
+ return pos + 1;
597
+ } else {
598
+ /*
599
+ * must be a single char operator
600
+ */
601
+ return parse_operator1(sf);
602
+ }
603
+ }
604
+
605
+ /*
606
+ * Ok! " \" " one backslash = escaped!
607
+ * " \\" " two backslash = not escaped!
608
+ * "\\\" " three backslash = escaped!
609
+ */
610
+ static int is_backslash_escaped(const char* end, const char* start)
611
+ {
612
+ const char* ptr;
613
+ for (ptr = end; ptr >= start; ptr--) {
614
+ if (*ptr != '\\') {
615
+ break;
616
+ }
617
+ }
618
+ /* if number of backslashes is odd, it is escaped */
619
+
620
+ return (end - ptr) & 1;
621
+ }
622
+
623
+ static size_t is_double_delim_escaped(const char* cur, const char* end)
624
+ {
625
+ return ((cur + 1) < end) && *(cur+1) == *cur;
626
+ }
627
+
628
+ /* Look forward for doubling of delimiter
629
+ *
630
+ * case 'foo''bar' --> foo''bar
631
+ *
632
+ * ending quote isn't duplicated (i.e. escaped)
633
+ * since it's the wrong char or EOL
634
+ *
635
+ */
636
+ static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
637
+ stoken_t * st, char delim, size_t offset)
638
+ {
639
+ /*
640
+ * offset is to skip the perhaps first quote char
641
+ */
642
+ const char *qpos =
643
+ (const char *) memchr((const void *) (cs + pos + offset), delim,
644
+ len - pos - offset);
645
+
646
+ /*
647
+ * then keep string open/close info
648
+ */
649
+ if (offset > 0) {
650
+ /*
651
+ * this is real quote
652
+ */
653
+ st->str_open = delim;
654
+ } else {
655
+ /*
656
+ * this was a simulated quote
657
+ */
658
+ st->str_open = CHAR_NULL;
659
+ }
660
+
661
+ while (TRUE) {
662
+ if (qpos == NULL) {
663
+ /*
664
+ * string ended with no trailing quote
665
+ * assign what we have
666
+ */
667
+ st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
668
+ st->str_close = CHAR_NULL;
669
+ return len;
670
+ } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
671
+ /* keep going, move ahead one character */
672
+ qpos =
673
+ (const char *) memchr((const void *) (qpos + 1), delim,
674
+ (size_t)((cs + len) - (qpos + 1)));
675
+ continue;
676
+ } else if (is_double_delim_escaped(qpos, cs + len)) {
677
+ /* keep going, move ahead two characters */
678
+ qpos =
679
+ (const char *) memchr((const void *) (qpos + 2), delim,
680
+ (size_t)((cs + len) - (qpos + 2)));
681
+ continue;
682
+ } else {
683
+ /* hey it's a normal string */
684
+ st_assign(st, TYPE_STRING, pos + offset,
685
+ (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
686
+ st->str_close = delim;
687
+ return (size_t)(qpos - cs + 1);
688
+ }
689
+ }
690
+ }
691
+
692
+ /**
693
+ * Used when first char is a ' or "
694
+ */
695
+ static size_t parse_string(struct libinjection_sqli_state * sf)
696
+ {
697
+ const char *cs = sf->s;
698
+ const size_t slen = sf->slen;
699
+ size_t pos = sf->pos;
700
+
701
+ /*
702
+ * assert cs[pos] == single or double quote
703
+ */
704
+ return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
705
+ }
706
+
707
+ /**
708
+ * Used when first char is:
709
+ * N or n: mysql "National Character set"
710
+ * E : psql "Escaped String"
711
+ */
712
+ static size_t parse_estring(struct libinjection_sqli_state * sf)
713
+ {
714
+ const char *cs = sf->s;
715
+ const size_t slen = sf->slen;
716
+ size_t pos = sf->pos;
717
+
718
+ if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
719
+ return parse_word(sf);
720
+ }
721
+ return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
722
+ }
723
+
724
+ static size_t parse_ustring(struct libinjection_sqli_state * sf)
725
+ {
726
+ const char *cs = sf->s;
727
+ size_t slen = sf->slen;
728
+ size_t pos = sf->pos;
729
+
730
+ if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
731
+ sf->pos += 2;
732
+ pos = parse_string(sf);
733
+ sf->current->str_open = 'u';
734
+ if (sf->current->str_close == '\'') {
735
+ sf->current->str_close = 'u';
736
+ }
737
+ return pos;
738
+ } else {
739
+ return parse_word(sf);
740
+ }
741
+ }
742
+
743
+ static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
744
+ {
745
+ char ch;
746
+ const char *strend;
747
+ const char *cs = sf->s;
748
+ size_t slen = sf->slen;
749
+ size_t pos = sf->pos + offset;
750
+
751
+ /* if we are already at end of string..
752
+ if current char is not q or Q
753
+ if we don't have 2 more chars
754
+ if char2 != a single quote
755
+ then, just treat as word
756
+ */
757
+ if (pos >= slen ||
758
+ (cs[pos] != 'q' && cs[pos] != 'Q') ||
759
+ pos + 2 >= slen ||
760
+ cs[pos + 1] != '\'') {
761
+ return parse_word(sf);
762
+ }
763
+
764
+ ch = cs[pos + 2];
765
+
766
+ /* the ch > 127 is un-needed since
767
+ * we assume char is signed
768
+ */
769
+ if (ch < 33 /* || ch > 127 */) {
770
+ return parse_word(sf);
771
+ }
772
+ switch (ch) {
773
+ case '(' : ch = ')'; break;
774
+ case '[' : ch = ']'; break;
775
+ case '{' : ch = '}'; break;
776
+ case '<' : ch = '>'; break;
777
+ }
778
+
779
+ strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
780
+ if (strend == NULL) {
781
+ st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
782
+ sf->current->str_open = 'q';
783
+ sf->current->str_close = CHAR_NULL;
784
+ return slen;
785
+ } else {
786
+ st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
787
+ sf->current->str_open = 'q';
788
+ sf->current->str_close = 'q';
789
+ return (size_t)(strend - cs + 2);
790
+ }
791
+ }
792
+
793
+ /*
794
+ * Oracle's q string
795
+ */
796
+ static size_t parse_qstring(struct libinjection_sqli_state * sf)
797
+ {
798
+ return parse_qstring_core(sf, 0);
799
+ }
800
+
801
+ /*
802
+ * mysql's N'STRING' or
803
+ * ... Oracle's nq string
804
+ */
805
+ static size_t parse_nqstring(struct libinjection_sqli_state * sf)
806
+ {
807
+ size_t slen = sf->slen;
808
+ size_t pos = sf->pos;
809
+ if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
810
+ return parse_estring(sf);
811
+ }
812
+ return parse_qstring_core(sf, 1);
813
+ }
814
+
815
+ /*
816
+ * binary literal string
817
+ * re: [bB]'[01]*'
818
+ */
819
+ static size_t parse_bstring(struct libinjection_sqli_state *sf)
820
+ {
821
+ size_t wlen;
822
+ const char *cs = sf->s;
823
+ size_t pos = sf->pos;
824
+ size_t slen = sf->slen;
825
+
826
+ /* need at least 2 more characters
827
+ * if next char isn't a single quote, then
828
+ * continue as normal word
829
+ */
830
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
831
+ return parse_word(sf);
832
+ }
833
+
834
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
835
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
836
+ return parse_word(sf);
837
+ }
838
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
839
+ return pos + 2 + wlen + 1;
840
+ }
841
+
842
+ /*
843
+ * hex literal string
844
+ * re: [xX]'[0123456789abcdefABCDEF]*'
845
+ * mysql has requirement of having EVEN number of chars,
846
+ * but pgsql does not
847
+ */
848
+ static size_t parse_xstring(struct libinjection_sqli_state *sf)
849
+ {
850
+ size_t wlen;
851
+ const char *cs = sf->s;
852
+ size_t pos = sf->pos;
853
+ size_t slen = sf->slen;
854
+
855
+ /* need at least 2 more characters
856
+ * if next char isn't a single quote, then
857
+ * continue as normal word
858
+ */
859
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
860
+ return parse_word(sf);
861
+ }
862
+
863
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
864
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
865
+ return parse_word(sf);
866
+ }
867
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
868
+ return pos + 2 + wlen + 1;
869
+ }
870
+
871
+ /**
872
+ * This handles MS SQLSERVER bracket words
873
+ * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
874
+ *
875
+ */
876
+ static size_t parse_bword(struct libinjection_sqli_state * sf)
877
+ {
878
+ const char *cs = sf->s;
879
+ size_t pos = sf->pos;
880
+ const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
881
+ if (endptr == NULL) {
882
+ st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
883
+ return sf->slen;
884
+ } else {
885
+ st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
886
+ return (size_t)((endptr - cs) + 1);
887
+ }
888
+ }
889
+
890
+ static size_t parse_word(struct libinjection_sqli_state * sf)
891
+ {
892
+ char ch;
893
+ char delim;
894
+ size_t i;
895
+ const char *cs = sf->s;
896
+ size_t pos = sf->pos;
897
+ size_t wlen = strlencspn(cs + pos, sf->slen - pos,
898
+ " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
899
+
900
+ st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
901
+
902
+ /* now we need to look inside what we good for "." and "`"
903
+ * and see if what is before is a keyword or not
904
+ */
905
+ for (i =0; i < sf->current->len; ++i) {
906
+ delim = sf->current->val[i];
907
+ if (delim == '.' || delim == '`') {
908
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
909
+ if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
910
+ /* needed for swig */
911
+ st_clear(sf->current);
912
+ /*
913
+ * we got something like "SELECT.1"
914
+ * or SELECT`column`
915
+ */
916
+ st_assign(sf->current, ch, pos, i, cs + pos);
917
+ return pos + i;
918
+ }
919
+ }
920
+ }
921
+
922
+ /*
923
+ * do normal lookup with word including '.'
924
+ */
925
+ if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
926
+
927
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
928
+ if (ch == CHAR_NULL) {
929
+ ch = TYPE_BAREWORD;
930
+ }
931
+ sf->current->type = ch;
932
+ }
933
+ return pos + wlen;
934
+ }
935
+
936
+ /* MySQL backticks are a cross between string and
937
+ * and a bare word.
938
+ *
939
+ */
940
+ static size_t parse_tick(struct libinjection_sqli_state* sf)
941
+ {
942
+ size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
943
+
944
+ /* we could check to see if start and end of
945
+ * of string are both "`", i.e. make sure we have
946
+ * matching set. `foo` vs. `foo
947
+ * but I don't think it matters much
948
+ */
949
+
950
+ /* check value of string to see if it's a keyword,
951
+ * function, operator, etc
952
+ */
953
+ char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
954
+ if (ch == TYPE_FUNCTION) {
955
+ /* if it's a function, then convert token */
956
+ sf->current->type = TYPE_FUNCTION;
957
+ } else {
958
+ /* otherwise it's a 'n' type -- mysql treats
959
+ * everything as a bare word
960
+ */
961
+ sf->current->type = TYPE_BAREWORD;
962
+ }
963
+ return pos;
964
+ }
965
+
966
+ static size_t parse_var(struct libinjection_sqli_state * sf)
967
+ {
968
+ size_t xlen;
969
+ const char *cs = sf->s;
970
+ const size_t slen = sf->slen;
971
+ size_t pos = sf->pos + 1;
972
+
973
+ /*
974
+ * var_count is only used to reconstruct
975
+ * the input. It counts the number of '@'
976
+ * seen 0 in the case of NULL, 1 or 2
977
+ */
978
+
979
+ /*
980
+ * move past optional other '@'
981
+ */
982
+ if (pos < slen && cs[pos] == '@') {
983
+ pos += 1;
984
+ sf->current->count = 2;
985
+ } else {
986
+ sf->current->count = 1;
987
+ }
988
+
989
+ /*
990
+ * MySQL allows @@`version`
991
+ */
992
+ if (pos < slen) {
993
+ if (cs[pos] == '`') {
994
+ sf->pos = pos;
995
+ pos = parse_tick(sf);
996
+ sf->current->type = TYPE_VARIABLE;
997
+ return pos;
998
+ } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
999
+ sf->pos = pos;
1000
+ pos = parse_string(sf);
1001
+ sf->current->type = TYPE_VARIABLE;
1002
+ return pos;
1003
+ }
1004
+ }
1005
+
1006
+
1007
+ xlen = strlencspn(cs + pos, slen - pos,
1008
+ " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
1009
+ if (xlen == 0) {
1010
+ st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
1011
+ return pos;
1012
+ } else {
1013
+ st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
1014
+ return pos + xlen;
1015
+ }
1016
+ }
1017
+
1018
+ static size_t parse_money(struct libinjection_sqli_state *sf)
1019
+ {
1020
+ size_t xlen;
1021
+ const char* strend;
1022
+ const char *cs = sf->s;
1023
+ const size_t slen = sf->slen;
1024
+ size_t pos = sf->pos;
1025
+
1026
+ if (pos + 1 == slen) {
1027
+ /* end of line */
1028
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1029
+ return slen;
1030
+ }
1031
+
1032
+ /*
1033
+ * $1,000.00 or $1.000,00 ok!
1034
+ * This also parses $....,,,111 but that's ok
1035
+ */
1036
+
1037
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
1038
+ if (xlen == 0) {
1039
+ if (cs[pos + 1] == '$') {
1040
+ /* we have $$ .. find ending $$ and make string */
1041
+ strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
1042
+ if (strend == NULL) {
1043
+ /* fell off edge */
1044
+ st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
1045
+ sf->current->str_open = '$';
1046
+ sf->current->str_close = CHAR_NULL;
1047
+ return slen;
1048
+ } else {
1049
+ st_assign(sf->current, TYPE_STRING, pos + 2,
1050
+ (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
1051
+ sf->current->str_open = '$';
1052
+ sf->current->str_close = '$';
1053
+ return (size_t)(strend - cs + 2);
1054
+ }
1055
+ } else {
1056
+ /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
1057
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1058
+ if (xlen == 0) {
1059
+ /* hmm it's "$" _something_ .. just add $ and keep going*/
1060
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1061
+ return pos + 1;
1062
+ }
1063
+ /* we have $foobar????? */
1064
+ /* is it $foobar$ */
1065
+ if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
1066
+ /* not $foobar$, or fell off edge */
1067
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1068
+ return pos + 1;
1069
+ }
1070
+
1071
+ /* we have $foobar$ ... find it again */
1072
+ strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
1073
+
1074
+ if (strend == NULL) {
1075
+ /* fell off edge */
1076
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
1077
+ sf->current->str_open = '$';
1078
+ sf->current->str_close = CHAR_NULL;
1079
+ return slen;
1080
+ } else {
1081
+ /* got one */
1082
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2,
1083
+ (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
1084
+ sf->current->str_open = '$';
1085
+ sf->current->str_close = '$';
1086
+ return (size_t)((strend + xlen + 2) - cs);
1087
+ }
1088
+ }
1089
+ } else if (xlen == 1 && cs[pos + 1] == '.') {
1090
+ /* $. should parsed as a word */
1091
+ return parse_word(sf);
1092
+ } else {
1093
+ st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
1094
+ return pos + 1 + xlen;
1095
+ }
1096
+ }
1097
+
1098
+ static size_t parse_number(struct libinjection_sqli_state * sf)
1099
+ {
1100
+ size_t xlen;
1101
+ size_t start;
1102
+ const char* digits = NULL;
1103
+ const char *cs = sf->s;
1104
+ const size_t slen = sf->slen;
1105
+ size_t pos = sf->pos;
1106
+ int have_e = 0;
1107
+ int have_exp = 0;
1108
+
1109
+ /* cs[pos] == '0' has 1/10 chance of being true,
1110
+ * while pos+1< slen is almost always true
1111
+ */
1112
+ if (cs[pos] == '0' && pos + 1 < slen) {
1113
+ if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
1114
+ digits = "0123456789ABCDEFabcdef";
1115
+ } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
1116
+ digits = "01";
1117
+ }
1118
+
1119
+ if (digits) {
1120
+ xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
1121
+ if (xlen == 0) {
1122
+ st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
1123
+ return pos + 2;
1124
+ } else {
1125
+ st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
1126
+ return pos + 2 + xlen;
1127
+ }
1128
+ }
1129
+ }
1130
+
1131
+ start = pos;
1132
+ while (pos < slen && ISDIGIT(cs[pos])) {
1133
+ pos += 1;
1134
+ }
1135
+
1136
+ if (pos < slen && cs[pos] == '.') {
1137
+ pos += 1;
1138
+ while (pos < slen && ISDIGIT(cs[pos])) {
1139
+ pos += 1;
1140
+ }
1141
+ if (pos - start == 1) {
1142
+ /* only one character read so far */
1143
+ st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
1144
+ return pos;
1145
+ }
1146
+ }
1147
+
1148
+ if (pos < slen) {
1149
+ if (cs[pos] == 'E' || cs[pos] == 'e') {
1150
+ have_e = 1;
1151
+ pos += 1;
1152
+ if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
1153
+ pos += 1;
1154
+ }
1155
+ while (pos < slen && ISDIGIT(cs[pos])) {
1156
+ have_exp = 1;
1157
+ pos += 1;
1158
+ }
1159
+ }
1160
+ }
1161
+
1162
+ /* oracle's ending float or double suffix
1163
+ * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
1164
+ */
1165
+ if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
1166
+ if (pos + 1 == slen) {
1167
+ /* line ends evaluate "... 1.2f$" as '1.2f' */
1168
+ pos += 1;
1169
+ } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
1170
+ /*
1171
+ * easy case, evaluate "... 1.2f ... as '1.2f'
1172
+ */
1173
+ pos += 1;
1174
+ } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
1175
+ /*
1176
+ * a bit of a hack but makes '1fUNION' parse as '1f UNION'
1177
+ */
1178
+ pos += 1;
1179
+ } else {
1180
+ /* it's like "123FROM" */
1181
+ /* parse as "123" only */
1182
+ }
1183
+ }
1184
+
1185
+ if (have_e == 1 && have_exp == 0) {
1186
+ /* very special form of
1187
+ * "1234.e"
1188
+ * "10.10E"
1189
+ * ".E"
1190
+ * this is a WORD not a number!! */
1191
+ st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
1192
+ } else {
1193
+ st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
1194
+ }
1195
+ return pos;
1196
+ }
1197
+
1198
+ /*
1199
+ * API to return version. This allows us to increment the version
1200
+ * without having to regenerated the SWIG (or other binding) in minor
1201
+ * releases.
1202
+ */
1203
+ const char* libinjection_version()
1204
+ {
1205
+ return LIBINJECTION_VERSION;
1206
+ }
1207
+
1208
+ int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
1209
+ {
1210
+ pt2Function fnptr;
1211
+ size_t *pos = &sf->pos;
1212
+ stoken_t *current = sf->current;
1213
+ const char *s = sf->s;
1214
+ const size_t slen = sf->slen;
1215
+
1216
+ if (slen == 0) {
1217
+ return FALSE;
1218
+ }
1219
+
1220
+ st_clear(current);
1221
+ sf->current = current;
1222
+
1223
+ /*
1224
+ * if we are at beginning of string
1225
+ * and in single-quote or double quote mode
1226
+ * then pretend the input starts with a quote
1227
+ */
1228
+ if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
1229
+ *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
1230
+ sf->stats_tokens += 1;
1231
+ return TRUE;
1232
+ }
1233
+
1234
+ while (*pos < slen) {
1235
+
1236
+ /*
1237
+ * get current character
1238
+ */
1239
+ const unsigned char ch = (unsigned char) (s[*pos]);
1240
+
1241
+ /*
1242
+ * look up the parser, and call it
1243
+ *
1244
+ * Porting Note: this is mapping of char to function
1245
+ * charparsers[ch]()
1246
+ */
1247
+ fnptr = char_parse_map[ch];
1248
+
1249
+ *pos = (*fnptr) (sf);
1250
+
1251
+ /*
1252
+ *
1253
+ */
1254
+ if (current->type != CHAR_NULL) {
1255
+ sf->stats_tokens += 1;
1256
+ return TRUE;
1257
+ }
1258
+ }
1259
+ return FALSE;
1260
+ }
1261
+
1262
+ void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
1263
+ {
1264
+ if (flags == 0) {
1265
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1266
+ }
1267
+
1268
+ memset(sf, 0, sizeof(struct libinjection_sqli_state));
1269
+ sf->s = s;
1270
+ sf->slen = len;
1271
+ sf->lookup = libinjection_sqli_lookup_word;
1272
+ sf->userdata = 0;
1273
+ sf->flags = flags;
1274
+ sf->current = &(sf->tokenvec[0]);
1275
+ }
1276
+
1277
+ void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
1278
+ {
1279
+ void *userdata = sf->userdata;
1280
+ ptr_lookup_fn lookup = sf->lookup;;
1281
+
1282
+ if (flags == 0) {
1283
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1284
+ }
1285
+ libinjection_sqli_init(sf, sf->s, sf->slen, flags);
1286
+ sf->lookup = lookup;
1287
+ sf->userdata = userdata;
1288
+ }
1289
+
1290
+ void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
1291
+ {
1292
+ if (fn == NULL) {
1293
+ sf->lookup = libinjection_sqli_lookup_word;
1294
+ sf->userdata = (void*)(NULL);
1295
+ } else {
1296
+ sf->lookup = fn;
1297
+ sf->userdata = userdata;
1298
+ }
1299
+ }
1300
+
1301
+ /** See if two tokens can be merged since they are compound SQL phrases.
1302
+ *
1303
+ * This takes two tokens, and, if they are the right type,
1304
+ * merges their values together. Then checks to see if the
1305
+ * new value is special using the PHRASES mapping.
1306
+ *
1307
+ * Example: "UNION" + "ALL" ==> "UNION ALL"
1308
+ *
1309
+ * C Security Notes: this is safe to use C-strings (null-terminated)
1310
+ * since the types involved by definition do not have embedded nulls
1311
+ * (e.g. there is no keyword with embedded null)
1312
+ *
1313
+ * Porting Notes: since this is C, it's oddly complicated.
1314
+ * This is just: multikeywords[token.value + ' ' + token2.value]
1315
+ *
1316
+ */
1317
+ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
1318
+ {
1319
+ size_t sz1;
1320
+ size_t sz2;
1321
+ size_t sz3;
1322
+ char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
1323
+ char ch;
1324
+
1325
+ /* first token is of right type? */
1326
+ if (!
1327
+ (a->type == TYPE_KEYWORD ||
1328
+ a->type == TYPE_BAREWORD ||
1329
+ a->type == TYPE_OPERATOR ||
1330
+ a->type == TYPE_UNION ||
1331
+ a->type == TYPE_FUNCTION ||
1332
+ a->type == TYPE_EXPRESSION ||
1333
+ a->type == TYPE_SQLTYPE)) {
1334
+ return CHAR_NULL;
1335
+ }
1336
+
1337
+ if (b->type != TYPE_KEYWORD && b->type != TYPE_BAREWORD &&
1338
+ b->type != TYPE_OPERATOR && b->type != TYPE_SQLTYPE &&
1339
+ b->type != TYPE_LOGIC_OPERATOR &&
1340
+ b->type != TYPE_FUNCTION &&
1341
+ b->type != TYPE_UNION && b->type != TYPE_EXPRESSION) {
1342
+ return CHAR_NULL;
1343
+ }
1344
+
1345
+ sz1 = a->len;
1346
+ sz2 = b->len;
1347
+ sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
1348
+ if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
1349
+ return FALSE;
1350
+ }
1351
+ /*
1352
+ * oddly annoying last.val + ' ' + current.val
1353
+ */
1354
+ memcpy(tmp, a->val, sz1);
1355
+ tmp[sz1] = ' ';
1356
+ memcpy(tmp + sz1 + 1, b->val, sz2);
1357
+ tmp[sz3] = CHAR_NULL;
1358
+
1359
+ ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
1360
+
1361
+ if (ch != CHAR_NULL) {
1362
+ st_assign(a, ch, a->pos, sz3, tmp);
1363
+ return TRUE;
1364
+ } else {
1365
+ return FALSE;
1366
+ }
1367
+ }
1368
+
1369
+ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
1370
+ {
1371
+ stoken_t last_comment;
1372
+
1373
+ /* POS is the position of where the NEXT token goes */
1374
+ size_t pos = 0;
1375
+
1376
+ /* LEFT is a count of how many tokens that are already
1377
+ folded or processed (i.e. part of the fingerprint) */
1378
+ size_t left = 0;
1379
+
1380
+ int more = 1;
1381
+
1382
+ st_clear(&last_comment);
1383
+
1384
+ /* Skip all initial comments, right-parens ( and unary operators
1385
+ *
1386
+ */
1387
+ sf->current = &(sf->tokenvec[0]);
1388
+ while (more) {
1389
+ more = libinjection_sqli_tokenize(sf);
1390
+ if ( ! (sf->current->type == TYPE_COMMENT ||
1391
+ sf->current->type == TYPE_LEFTPARENS ||
1392
+ sf->current->type == TYPE_SQLTYPE ||
1393
+ st_is_unary_op(sf->current))) {
1394
+ break;
1395
+ }
1396
+ }
1397
+
1398
+ if (! more) {
1399
+ /* If input was only comments, unary or (, then exit */
1400
+ return 0;
1401
+ } else {
1402
+ /* it's some other token */
1403
+ pos += 1;
1404
+ }
1405
+
1406
+ while (1) {
1407
+ FOLD_DEBUG;
1408
+
1409
+ /* do we have all the max number of tokens? if so do
1410
+ * some special cases for 5 tokens
1411
+ */
1412
+ if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
1413
+ if (
1414
+ (
1415
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1416
+ (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
1417
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1418
+ sf->tokenvec[3].type == TYPE_NUMBER &&
1419
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1420
+ ) ||
1421
+ (
1422
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1423
+ sf->tokenvec[1].type == TYPE_OPERATOR &&
1424
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1425
+ (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
1426
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1427
+ ) ||
1428
+ (
1429
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1430
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1431
+ sf->tokenvec[2].type == TYPE_COMMA &&
1432
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1433
+ sf->tokenvec[4].type == TYPE_NUMBER
1434
+ )
1435
+ )
1436
+ {
1437
+ if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
1438
+ st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
1439
+ pos = 2;
1440
+ left = 0;
1441
+ } else {
1442
+ pos = 1;
1443
+ left = 0;
1444
+ }
1445
+ }
1446
+ }
1447
+
1448
+ if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
1449
+ left = pos;
1450
+ break;
1451
+ }
1452
+
1453
+ /* get up to two tokens */
1454
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
1455
+ sf->current = &(sf->tokenvec[pos]);
1456
+ more = libinjection_sqli_tokenize(sf);
1457
+ if (more) {
1458
+ if (sf->current->type == TYPE_COMMENT) {
1459
+ st_copy(&last_comment, sf->current);
1460
+ } else {
1461
+ last_comment.type = CHAR_NULL;
1462
+ pos += 1;
1463
+ }
1464
+ }
1465
+ }
1466
+ FOLD_DEBUG;
1467
+ /* did we get 2 tokens? if not then we are done */
1468
+ if (pos - left < 2) {
1469
+ left = pos;
1470
+ continue;
1471
+ }
1472
+
1473
+ /* FOLD: "ss" -> "s"
1474
+ * "foo" "bar" is valid SQL
1475
+ * just ignore second string
1476
+ */
1477
+ if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
1478
+ pos -= 1;
1479
+ sf->stats_folds += 1;
1480
+ continue;
1481
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
1482
+ /* not sure how various engines handle
1483
+ * 'select 1;;drop table foo' or
1484
+ * 'select 1; /x foo x/; drop table foo'
1485
+ * to prevent surprises, just fold away repeated semicolons
1486
+ */
1487
+ pos -= 1;
1488
+ sf->stats_folds += 1;
1489
+ continue;
1490
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1491
+ sf->tokenvec[left+1].type == TYPE_FUNCTION &&
1492
+ cstrcasecmp("IF", sf->tokenvec[left+1].val, sf->tokenvec[left+1].len) == 0) {
1493
+ /* IF is normally a function, except in Transact-SQL where it can be used as a
1494
+ * standalone control flow operator, e.g. ; IF 1=1 ...
1495
+ * if found after a semicolon, convert from 'f' type to 'T' type
1496
+ */
1497
+ sf->tokenvec[left+1].type = TYPE_TSQL;
1498
+ left += 2;
1499
+ continue; /* reparse everything, but we probably can advance left, and pos */
1500
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
1501
+ sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
1502
+ (st_is_unary_op(&sf->tokenvec[left+1]) ||
1503
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
1504
+ pos -= 1;
1505
+ sf->stats_folds += 1;
1506
+ left = 0;
1507
+ continue;
1508
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1509
+ st_is_unary_op(&sf->tokenvec[left+1])) {
1510
+ pos -= 1;
1511
+ sf->stats_folds += 1;
1512
+ if (left > 0) {
1513
+ left -= 1;
1514
+ }
1515
+ continue;
1516
+ } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
1517
+ pos -= 1;
1518
+ sf->stats_folds += 1;
1519
+ if (left > 0) {
1520
+ left -= 1;
1521
+ }
1522
+ continue;
1523
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
1524
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
1525
+ /* TSQL functions but common enough to be collumn names */
1526
+ cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1527
+ cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1528
+
1529
+ /* Function in MYSQL */
1530
+ cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1531
+ cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1532
+ cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1533
+
1534
+ /* Mysql words that act as a variable and are a function */
1535
+
1536
+ /* TSQL current_users is fake-variable */
1537
+ /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
1538
+ cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1539
+ cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1540
+ cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1541
+ cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1542
+ cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1543
+ cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1544
+ )) {
1545
+
1546
+ /* pos is the same
1547
+ * other conversions need to go here... for instance
1548
+ * password CAN be a function, coalese CAN be a function
1549
+ */
1550
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1551
+ continue;
1552
+ } else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
1553
+ cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1554
+ cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1555
+ )) {
1556
+
1557
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1558
+ /* got .... IN ( ... (or 'NOT IN')
1559
+ * it's an operator
1560
+ */
1561
+ sf->tokenvec[left].type = TYPE_OPERATOR;
1562
+ } else {
1563
+ /*
1564
+ * it's a nothing
1565
+ */
1566
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1567
+ }
1568
+
1569
+ /* "IN" can be used as "IN BOOLEAN MODE" for mysql
1570
+ * in which case merging of words can be done later
1571
+ * other wise it acts as an equality operator __ IN (values..)
1572
+ *
1573
+ * here we got "IN" "(" so it's an operator.
1574
+ * also back track to handle "NOT IN"
1575
+ * might need to do the same with like
1576
+ * two use cases "foo" LIKE "BAR" (normal operator)
1577
+ * "foo" = LIKE(1,2)
1578
+ */
1579
+ continue;
1580
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
1581
+ cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1582
+ cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
1583
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1584
+ /* SELECT LIKE(...
1585
+ * it's a function
1586
+ */
1587
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1588
+ }
1589
+ } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
1590
+ (sf->tokenvec[left+1].type == TYPE_BAREWORD ||
1591
+ sf->tokenvec[left+1].type == TYPE_NUMBER ||
1592
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
1593
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
1594
+ sf->tokenvec[left+1].type == TYPE_FUNCTION ||
1595
+ sf->tokenvec[left+1].type == TYPE_VARIABLE ||
1596
+ sf->tokenvec[left+1].type == TYPE_STRING)) {
1597
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1598
+ pos -= 1;
1599
+ sf->stats_folds += 1;
1600
+ left = 0;
1601
+ continue;
1602
+ } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
1603
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1604
+ /*
1605
+ * there are too many collation types.. so if the bareword has a "_"
1606
+ * then it's TYPE_SQLTYPE
1607
+ */
1608
+ if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
1609
+ sf->tokenvec[left+1].type = TYPE_SQLTYPE;
1610
+ left = 0;
1611
+ }
1612
+ } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
1613
+ if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
1614
+ /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
1615
+ sf->tokenvec[left].type = TYPE_NUMBER;
1616
+ } else {
1617
+ /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
1618
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1619
+ pos -= 1;
1620
+ sf->stats_folds += 1;
1621
+ }
1622
+ left = 0;
1623
+ continue;
1624
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1625
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1626
+ pos -= 1;
1627
+ left = 0;
1628
+ sf->stats_folds += 1;
1629
+ continue;
1630
+ } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
1631
+ sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
1632
+ pos -= 1;
1633
+ left = 0;
1634
+ sf->stats_folds += 1;
1635
+ continue;
1636
+ } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
1637
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1638
+
1639
+ /*
1640
+ * MySQL Degenerate case --
1641
+ *
1642
+ * select { ``.``.id }; -- valid !!!
1643
+ * select { ``.``.``.id }; -- invalid
1644
+ * select ``.``.id; -- invalid
1645
+ * select { ``.id }; -- invalid
1646
+ *
1647
+ * so it appears {``.``.id} is a magic case
1648
+ * I suspect this is "current database, current table, field id"
1649
+ *
1650
+ * The folding code can't look at more than 3 tokens, and
1651
+ * I don't want to make two passes.
1652
+ *
1653
+ * Since "{ ``" so rare, we are just going to blacklist it.
1654
+ *
1655
+ * Highly likely this will need revisiting!
1656
+ *
1657
+ * CREDIT @rsalgado 2013-11-25
1658
+ */
1659
+ if (sf->tokenvec[left+1].len == 0) {
1660
+ sf->tokenvec[left+1].type = TYPE_EVIL;
1661
+ return (int)(left+2);
1662
+ }
1663
+ /* weird ODBC / MYSQL {foo expr} --> expr
1664
+ * but for this rule we just strip away the "{ foo" part
1665
+ */
1666
+ left = 0;
1667
+ pos -= 2;
1668
+ sf->stats_folds += 2;
1669
+ continue;
1670
+ } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
1671
+ pos -= 1;
1672
+ left = 0;
1673
+ sf->stats_folds += 1;
1674
+ continue;
1675
+ }
1676
+
1677
+ /* all cases of handing 2 tokens is done
1678
+ and nothing matched. Get one more token
1679
+ */
1680
+ FOLD_DEBUG;
1681
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
1682
+ sf->current = &(sf->tokenvec[pos]);
1683
+ more = libinjection_sqli_tokenize(sf);
1684
+ if (more) {
1685
+ if (sf->current->type == TYPE_COMMENT) {
1686
+ st_copy(&last_comment, sf->current);
1687
+ } else {
1688
+ last_comment.type = CHAR_NULL;
1689
+ pos += 1;
1690
+ }
1691
+ }
1692
+ }
1693
+
1694
+ /* do we have three tokens? If not then we are done */
1695
+ if (pos -left < 3) {
1696
+ left = pos;
1697
+ continue;
1698
+ }
1699
+
1700
+ /*
1701
+ * now look for three token folding
1702
+ */
1703
+ if (sf->tokenvec[left].type == TYPE_NUMBER &&
1704
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1705
+ sf->tokenvec[left+2].type == TYPE_NUMBER) {
1706
+ pos -= 2;
1707
+ left = 0;
1708
+ continue;
1709
+ } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
1710
+ sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
1711
+ sf->tokenvec[left+2].type == TYPE_OPERATOR) {
1712
+ left = 0;
1713
+ pos -= 2;
1714
+ continue;
1715
+ } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
1716
+ sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
1717
+ pos -= 2;
1718
+ left = 0;
1719
+ continue;
1720
+ } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
1721
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1722
+ (sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1723
+ sf->tokenvec[left+2].type == TYPE_NUMBER ||
1724
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1725
+ pos -= 2;
1726
+ left = 0;
1727
+ continue;
1728
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1729
+ sf->tokenvec[left].type == TYPE_NUMBER ) &&
1730
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1731
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1732
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1733
+ pos -= 2;
1734
+ left = 0;
1735
+ continue;
1736
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1737
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1738
+ sf->tokenvec[left].type == TYPE_VARIABLE ||
1739
+ sf->tokenvec[left].type == TYPE_STRING) &&
1740
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1741
+ streq(sf->tokenvec[left+1].val, "::") &&
1742
+ sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
1743
+ pos -= 2;
1744
+ left = 0;
1745
+ sf->stats_folds += 2;
1746
+ continue;
1747
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1748
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1749
+ sf->tokenvec[left].type == TYPE_STRING ||
1750
+ sf->tokenvec[left].type == TYPE_VARIABLE) &&
1751
+ sf->tokenvec[left+1].type == TYPE_COMMA &&
1752
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1753
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1754
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1755
+ sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
1756
+ pos -= 2;
1757
+ left = 0;
1758
+ continue;
1759
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
1760
+ sf->tokenvec[left].type == TYPE_GROUP ||
1761
+ sf->tokenvec[left].type == TYPE_COMMA) &&
1762
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1763
+ sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
1764
+ /* got something like SELECT + (, LIMIT + (
1765
+ * remove unary operator
1766
+ */
1767
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1768
+ pos -= 1;
1769
+ left = 0;
1770
+ continue;
1771
+ } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
1772
+ sf->tokenvec[left].type == TYPE_EXPRESSION ||
1773
+ sf->tokenvec[left].type == TYPE_GROUP ) &&
1774
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1775
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1776
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1777
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1778
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1779
+ sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
1780
+ /* remove unary operators
1781
+ * select - 1
1782
+ */
1783
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1784
+ pos -= 1;
1785
+ left = 0;
1786
+ continue;
1787
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1788
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1789
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1790
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1791
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1792
+ sf->tokenvec[left+2].type == TYPE_STRING)) {
1793
+ /*
1794
+ * interesting case turn ", -1" ->> ",1" PLUS we need to back up
1795
+ * one token if possible to see if more folding can be done
1796
+ * "1,-1" --> "1"
1797
+ */
1798
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1799
+ left = 0;
1800
+ /* pos is >= 3 so this is safe */
1801
+ assert(pos >= 3);
1802
+ pos -= 3;
1803
+ continue;
1804
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1805
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1806
+ sf->tokenvec[left+2].type == TYPE_FUNCTION) {
1807
+
1808
+ /* Separate case from above since you end up with
1809
+ * 1,-sin(1) --> 1 (1)
1810
+ * Here, just do
1811
+ * 1,-sin(1) --> 1,sin(1)
1812
+ * just remove unary opartor
1813
+ */
1814
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1815
+ pos -= 1;
1816
+ left = 0;
1817
+ continue;
1818
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
1819
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1820
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1821
+ /* ignore the '.n'
1822
+ * typically is this databasename.table
1823
+ */
1824
+ assert(pos >= 3);
1825
+ pos -= 2;
1826
+ left = 0;
1827
+ continue;
1828
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
1829
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1830
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1831
+ /* select . `foo` --> select `foo` */
1832
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1833
+ pos -= 1;
1834
+ left = 0;
1835
+ continue;
1836
+ }
1837
+
1838
+
1839
+ /* no folding -- assume left-most token is
1840
+ is good, now use the existing 2 tokens --
1841
+ do not get another
1842
+ */
1843
+
1844
+ left += 1;
1845
+
1846
+ } /* while(1) */
1847
+
1848
+ /* if we have 4 or less tokens, and we had a comment token
1849
+ * at the end, add it back
1850
+ */
1851
+
1852
+ if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
1853
+ st_copy(&sf->tokenvec[left], &last_comment);
1854
+ left += 1;
1855
+ }
1856
+
1857
+ /* sometimes we grab a 6th token to help
1858
+ determine the type of token 5.
1859
+ */
1860
+ if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
1861
+ left = LIBINJECTION_SQLI_MAX_TOKENS;
1862
+ }
1863
+
1864
+ return (int)left;
1865
+ }
1866
+
1867
+ /* secondary api: detects SQLi in a string, GIVEN a context.
1868
+ *
1869
+ * A context can be:
1870
+ * * CHAR_NULL (\0), process as is
1871
+ * * CHAR_SINGLE ('), process pretending input started with a
1872
+ * single quote.
1873
+ * * CHAR_DOUBLE ("), process pretending input started with a
1874
+ * double quote.
1875
+ *
1876
+ */
1877
+ const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
1878
+ {
1879
+ int i;
1880
+ int tlen = 0;
1881
+
1882
+ libinjection_sqli_reset(sql_state, flags);
1883
+
1884
+ tlen = libinjection_sqli_fold(sql_state);
1885
+
1886
+ /* Check for magic PHP backquote comment
1887
+ * If:
1888
+ * * last token is of type "bareword"
1889
+ * * And is quoted in a backtick
1890
+ * * And isn't closed
1891
+ * * And it's empty?
1892
+ * Then convert it to comment
1893
+ */
1894
+ if (tlen > 2 &&
1895
+ sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
1896
+ sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
1897
+ sql_state->tokenvec[tlen-1].len == 0 &&
1898
+ sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
1899
+ sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
1900
+ }
1901
+
1902
+ for (i = 0; i < tlen; ++i) {
1903
+ sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
1904
+ }
1905
+
1906
+ /*
1907
+ * make the fingerprint pattern a c-string (null delimited)
1908
+ */
1909
+ sql_state->fingerprint[tlen] = CHAR_NULL;
1910
+
1911
+ /*
1912
+ * check for 'X' in pattern, and then
1913
+ * clear out all tokens
1914
+ *
1915
+ * this means parsing could not be done
1916
+ * accurately due to pgsql's double comments
1917
+ * or other syntax that isn't consistent.
1918
+ * Should be very rare false positive
1919
+ */
1920
+ if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
1921
+ /* needed for SWIG */
1922
+ memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
1923
+ memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
1924
+
1925
+ sql_state->fingerprint[0] = TYPE_EVIL;
1926
+
1927
+ sql_state->tokenvec[0].type = TYPE_EVIL;
1928
+ sql_state->tokenvec[0].val[0] = TYPE_EVIL;
1929
+ sql_state->tokenvec[1].type = CHAR_NULL;
1930
+ }
1931
+
1932
+
1933
+ return sql_state->fingerprint;
1934
+ }
1935
+
1936
+ int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
1937
+ {
1938
+ return libinjection_sqli_blacklist(sql_state) &&
1939
+ libinjection_sqli_not_whitelist(sql_state);
1940
+ }
1941
+
1942
+ char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
1943
+ const char* str, size_t len)
1944
+ {
1945
+ if (lookup_type == LOOKUP_FINGERPRINT) {
1946
+ return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
1947
+ } else {
1948
+ return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
1949
+ }
1950
+ }
1951
+
1952
+ int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
1953
+ {
1954
+ /*
1955
+ * use minimum of 8 bytes to make sure gcc -fstack-protector
1956
+ * works correctly
1957
+ */
1958
+ char fp2[8];
1959
+ char ch;
1960
+ size_t i;
1961
+ size_t len = strlen(sql_state->fingerprint);
1962
+ int patmatch;
1963
+
1964
+ if (len < 1) {
1965
+ sql_state->reason = __LINE__;
1966
+ return FALSE;
1967
+ }
1968
+
1969
+ /*
1970
+ to keep everything compatible, convert the
1971
+ v0 fingerprint pattern to v1
1972
+ v0: up to 5 chars, mixed case
1973
+ v1: 1 char is '0', up to 5 more chars, upper case
1974
+ */
1975
+
1976
+ fp2[0] = '0';
1977
+ for (i = 0; i < len; ++i) {
1978
+ ch = sql_state->fingerprint[i];
1979
+ if (ch >= 'a' && ch <= 'z') {
1980
+ ch -= 0x20;
1981
+ }
1982
+ fp2[i+1] = ch;
1983
+ }
1984
+ fp2[i+1] = '\0';
1985
+
1986
+ patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
1987
+
1988
+ /*
1989
+ * No match.
1990
+ *
1991
+ * Set sql_state->reason to current line number
1992
+ * only for debugging purposes.
1993
+ */
1994
+ if (!patmatch) {
1995
+ sql_state->reason = __LINE__;
1996
+ return FALSE;
1997
+ }
1998
+
1999
+ return TRUE;
2000
+ }
2001
+
2002
+ /*
2003
+ * return TRUE if sqli, false is benign
2004
+ */
2005
+ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
2006
+ {
2007
+ /*
2008
+ * We assume we got a SQLi match
2009
+ * This next part just helps reduce false positives.
2010
+ *
2011
+ */
2012
+ char ch;
2013
+ size_t tlen = strlen(sql_state->fingerprint);
2014
+
2015
+ if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
2016
+ /*
2017
+ * if ending comment is contains 'sp_password' then it's sqli!
2018
+ * MS Audit log apparently ignores anything with
2019
+ * 'sp_password' in it. Unable to find primary refernece to
2020
+ * this "feature" of SQL Server but seems to be known sqli
2021
+ * technique
2022
+ */
2023
+ if (my_memmem(sql_state->s, sql_state->slen,
2024
+ "sp_password", strlen("sp_password"))) {
2025
+ sql_state->reason = __LINE__;
2026
+ return TRUE;
2027
+ }
2028
+ }
2029
+
2030
+ switch (tlen) {
2031
+ case 2:{
2032
+ /*
2033
+ * case 2 are "very small SQLi" which make them
2034
+ * hard to tell from normal input...
2035
+ */
2036
+
2037
+ if (sql_state->fingerprint[1] == TYPE_UNION) {
2038
+ if (sql_state->stats_tokens == 2) {
2039
+ /* not sure why but 1U comes up in Sqli attack
2040
+ * likely part of parameter splitting/etc.
2041
+ * lots of reasons why "1 union" might be normal
2042
+ * input, so beep only if other SQLi things are present
2043
+ */
2044
+ /* it really is a number and 'union'
2045
+ * other wise it has folding or comments
2046
+ */
2047
+ sql_state->reason = __LINE__;
2048
+ return FALSE;
2049
+ } else {
2050
+ sql_state->reason = __LINE__;
2051
+ return TRUE;
2052
+ }
2053
+ }
2054
+ /*
2055
+ * if 'comment' is '#' ignore.. too many FP
2056
+ */
2057
+ if (sql_state->tokenvec[1].val[0] == '#') {
2058
+ sql_state->reason = __LINE__;
2059
+ return FALSE;
2060
+ }
2061
+
2062
+ /*
2063
+ * for fingerprint like 'nc', only comments of /x are treated
2064
+ * as SQL... ending comments of "--" and "#" are not sqli
2065
+ */
2066
+ if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
2067
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2068
+ sql_state->tokenvec[1].val[0] != '/') {
2069
+ sql_state->reason = __LINE__;
2070
+ return FALSE;
2071
+ }
2072
+
2073
+ /*
2074
+ * if '1c' ends with '/x' then it's sqli
2075
+ */
2076
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2077
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2078
+ sql_state->tokenvec[1].val[0] == '/') {
2079
+ return TRUE;
2080
+ }
2081
+
2082
+ /**
2083
+ * there are some odd base64-looking query string values
2084
+ * 1234-ABCDEFEhfhihwuefi--
2085
+ * which evaluate to "1c"... these are not SQLi
2086
+ * but 1234-- probably is.
2087
+ * Make sure the "1" in "1c" is actually a true decimal number
2088
+ *
2089
+ * Need to check -original- string since the folding step
2090
+ * may have merged tokens, e.g. "1+FOO" is folded into "1"
2091
+ *
2092
+ * Note: evasion: 1*1--
2093
+ */
2094
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2095
+ sql_state->tokenvec[1].type == TYPE_COMMENT) {
2096
+ if (sql_state->stats_tokens > 2) {
2097
+ /* we have some folding going on, highly likely sqli */
2098
+ sql_state->reason = __LINE__;
2099
+ return TRUE;
2100
+ }
2101
+ /*
2102
+ * we check that next character after the number is either whitespace,
2103
+ * or '/' or a '-' ==> sqli.
2104
+ */
2105
+ ch = sql_state->s[sql_state->tokenvec[0].len];
2106
+ if ( ch <= 32 ) {
2107
+ /* next char was whitespace,e.g. "1234 --"
2108
+ * this isn't exactly correct.. ideally we should skip over all whitespace
2109
+ * but this seems to be ok for now
2110
+ */
2111
+ return TRUE;
2112
+ }
2113
+ if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
2114
+ return TRUE;
2115
+ }
2116
+ if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
2117
+ return TRUE;
2118
+ }
2119
+
2120
+ sql_state->reason = __LINE__;
2121
+ return FALSE;
2122
+ }
2123
+
2124
+ /*
2125
+ * detect obvious sqli scans.. many people put '--' in plain text
2126
+ * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
2127
+ */
2128
+ if ((sql_state->tokenvec[1].len > 2)
2129
+ && sql_state->tokenvec[1].val[0] == '-') {
2130
+ sql_state->reason = __LINE__;
2131
+ return FALSE;
2132
+ }
2133
+
2134
+ break;
2135
+ } /* case 2 */
2136
+ case 3:{
2137
+ /*
2138
+ * ...foo' + 'bar...
2139
+ * no opening quote, no closing quote
2140
+ * and each string has data
2141
+ */
2142
+
2143
+ if (streq(sql_state->fingerprint, "sos")
2144
+ || streq(sql_state->fingerprint, "s&s")) {
2145
+
2146
+ if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
2147
+ && (sql_state->tokenvec[2].str_close == CHAR_NULL)
2148
+ && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
2149
+ /*
2150
+ * if ....foo" + "bar....
2151
+ */
2152
+ sql_state->reason = __LINE__;
2153
+ return TRUE;
2154
+ }
2155
+ if (sql_state->stats_tokens == 3) {
2156
+ sql_state->reason = __LINE__;
2157
+ return FALSE;
2158
+ }
2159
+
2160
+ /*
2161
+ * not sqli
2162
+ */
2163
+ sql_state->reason = __LINE__;
2164
+ return FALSE;
2165
+ } else if (streq(sql_state->fingerprint, "s&n") ||
2166
+ streq(sql_state->fingerprint, "n&1") ||
2167
+ streq(sql_state->fingerprint, "1&1") ||
2168
+ streq(sql_state->fingerprint, "1&v") ||
2169
+ streq(sql_state->fingerprint, "1&s")) {
2170
+ /* 'sexy and 17' not sqli
2171
+ * 'sexy and 17<18' sqli
2172
+ */
2173
+ if (sql_state->stats_tokens == 3) {
2174
+ sql_state->reason = __LINE__;
2175
+ return FALSE;
2176
+ }
2177
+ } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
2178
+ if ((sql_state->tokenvec[1].len < 5) ||
2179
+ cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
2180
+ /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
2181
+ * then treat as safe
2182
+ */
2183
+ sql_state->reason = __LINE__;
2184
+ return FALSE;
2185
+ }
2186
+ }
2187
+ break;
2188
+ } /* case 3 */
2189
+ case 4:
2190
+ case 5: {
2191
+ /* nothing right now */
2192
+ break;
2193
+ } /* case 5 */
2194
+ } /* end switch */
2195
+
2196
+ return TRUE;
2197
+ }
2198
+
2199
+ /** Main API, detects SQLi in an input.
2200
+ *
2201
+ *
2202
+ */
2203
+ static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
2204
+ {
2205
+ return sql_state->stats_comment_ddx ||
2206
+ sql_state->stats_comment_hash;
2207
+ }
2208
+
2209
+ /*
2210
+ * This function is mostly use with SWIG
2211
+ */
2212
+ struct libinjection_sqli_token*
2213
+ libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
2214
+ {
2215
+ if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
2216
+ return NULL;
2217
+ }
2218
+ return &(sql_state->tokenvec[i]);
2219
+ }
2220
+
2221
+ int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
2222
+ {
2223
+ const char *s = sql_state->s;
2224
+ size_t slen = sql_state->slen;
2225
+
2226
+ /*
2227
+ * no input? not sqli
2228
+ */
2229
+ if (slen == 0) {
2230
+ return FALSE;
2231
+ }
2232
+
2233
+ /*
2234
+ * test input "as-is"
2235
+ */
2236
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
2237
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2238
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2239
+ return TRUE;
2240
+ } else if (reparse_as_mysql(sql_state)) {
2241
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
2242
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2243
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2244
+ return TRUE;
2245
+ }
2246
+ }
2247
+
2248
+ /*
2249
+ * if input has a single_quote, then
2250
+ * test as if input was actually '
2251
+ * example: if input if "1' = 1", then pretend it's
2252
+ * "'1' = 1"
2253
+ * Porting Notes: example the same as doing
2254
+ * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
2255
+ *
2256
+ */
2257
+ if (memchr(s, CHAR_SINGLE, slen)) {
2258
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
2259
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2260
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2261
+ return TRUE;
2262
+ } else if (reparse_as_mysql(sql_state)) {
2263
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
2264
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2265
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2266
+ return TRUE;
2267
+ }
2268
+ }
2269
+ }
2270
+
2271
+ /*
2272
+ * same as above but with a double-quote "
2273
+ */
2274
+ if (memchr(s, CHAR_DOUBLE, slen)) {
2275
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
2276
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2277
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2278
+ return TRUE;
2279
+ }
2280
+ }
2281
+
2282
+ /*
2283
+ * Hurray, input is not SQLi
2284
+ */
2285
+ return FALSE;
2286
+ }
2287
+
2288
+ int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
2289
+ {
2290
+ int issqli;
2291
+ struct libinjection_sqli_state state;
2292
+
2293
+ libinjection_sqli_init(&state, input, slen, 0);
2294
+ issqli = libinjection_is_sqli(&state);
2295
+ if (issqli) {
2296
+ strcpy(fingerprint, state.fingerprint);
2297
+ } else {
2298
+ fingerprint[0] = '\0';
2299
+ }
2300
+ return issqli;
2301
+ }