tcell_agent 0.2.19 → 0.2.21

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE_libinjection +32 -0
  3. data/Rakefile +14 -1
  4. data/ext/libinjection/extconf.rb +3 -0
  5. data/ext/libinjection/libinjection.h +65 -0
  6. data/ext/libinjection/libinjection_html5.c +847 -0
  7. data/ext/libinjection/libinjection_html5.h +54 -0
  8. data/ext/libinjection/libinjection_sqli.c +2317 -0
  9. data/ext/libinjection/libinjection_sqli.h +295 -0
  10. data/ext/libinjection/libinjection_sqli_data.h +9004 -0
  11. data/ext/libinjection/libinjection_wrap.c +3525 -0
  12. data/ext/libinjection/libinjection_xss.c +531 -0
  13. data/ext/libinjection/libinjection_xss.h +21 -0
  14. data/lib/tcell_agent/configuration.rb +0 -48
  15. data/lib/tcell_agent/logger.rb +1 -0
  16. data/lib/tcell_agent/policies/appsensor/database_sensor.rb +8 -20
  17. data/lib/tcell_agent/policies/appsensor/injection_sensor.rb +30 -46
  18. data/lib/tcell_agent/policies/appsensor/login_sensor.rb +1 -4
  19. data/lib/tcell_agent/policies/appsensor/misc_sensor.rb +8 -22
  20. data/lib/tcell_agent/policies/appsensor/payloads_policy.rb +143 -0
  21. data/lib/tcell_agent/policies/appsensor/response_codes_sensor.rb +3 -1
  22. data/lib/tcell_agent/policies/appsensor/sensor.rb +21 -2
  23. data/lib/tcell_agent/policies/appsensor/size_sensor.rb +3 -1
  24. data/lib/tcell_agent/policies/appsensor/sqli_sensor.rb +9 -0
  25. data/lib/tcell_agent/policies/appsensor/user_agent_sensor.rb +1 -5
  26. data/lib/tcell_agent/policies/appsensor/xss_sensor.rb +9 -1
  27. data/lib/tcell_agent/policies/appsensor_policy.rb +40 -19
  28. data/lib/tcell_agent/policies/http_redirect_policy.rb +12 -2
  29. data/lib/tcell_agent/rails/csrf_exception.rb +1 -1
  30. data/lib/tcell_agent/rails/dlp.rb +98 -76
  31. data/lib/tcell_agent/rails/middleware/global_middleware.rb +1 -2
  32. data/lib/tcell_agent/rails/middleware/headers_middleware.rb +2 -2
  33. data/lib/tcell_agent/rails/on_start.rb +53 -20
  34. data/lib/tcell_agent/sensor_events/appsensor_event.rb +12 -19
  35. data/lib/tcell_agent/sensor_events/appsensor_meta_event.rb +7 -2
  36. data/lib/tcell_agent/sensor_events/sensor.rb +10 -11
  37. data/lib/tcell_agent/sensor_events/server_agent.rb +17 -12
  38. data/lib/tcell_agent/sensor_events/util/sanitizer_utilities.rb +148 -139
  39. data/lib/tcell_agent/utils/params.rb +24 -21
  40. data/lib/tcell_agent/version.rb +1 -1
  41. data/spec/lib/tcell_agent/configuration_spec.rb +0 -179
  42. data/spec/lib/tcell_agent/policies/appsensor/database_sensor_spec.rb +6 -4
  43. data/spec/lib/tcell_agent/policies/appsensor/misc_sensor_spec.rb +31 -22
  44. data/spec/lib/tcell_agent/policies/appsensor/payloads_policy_apply_spec.rb +466 -0
  45. data/spec/lib/tcell_agent/policies/appsensor/payloads_policy_from_json_spec.rb +890 -0
  46. data/spec/lib/tcell_agent/policies/appsensor/payloads_policy_log_spec.rb +484 -0
  47. data/spec/lib/tcell_agent/policies/appsensor/request_size_sensor_spec.rb +4 -3
  48. data/spec/lib/tcell_agent/policies/appsensor/response_codes_sensor_spec.rb +4 -4
  49. data/spec/lib/tcell_agent/policies/appsensor/response_size_sensor_spec.rb +1 -1
  50. data/spec/lib/tcell_agent/policies/appsensor/sqli_sensor_spec.rb +85 -0
  51. data/spec/lib/tcell_agent/policies/appsensor/user_agent_sensor_spec.rb +36 -16
  52. data/spec/lib/tcell_agent/policies/appsensor/xss_sensor_spec.rb +188 -312
  53. data/spec/lib/tcell_agent/policies/appsensor_policy_spec.rb +61 -0
  54. data/spec/lib/tcell_agent/rails/middleware/appsensor_middleware_spec.rb +18 -11
  55. data/spec/lib/tcell_agent/rails/middleware/redirect_middleware_spec.rb +14 -15
  56. data/spec/lib/tcell_agent/sensor_events/appsensor_meta_event_spec.rb +1 -1
  57. data/spec/lib/tcell_agent/sensor_events/util/sanitizer_utilities_spec.rb +6 -5
  58. data/spec/lib/tcell_agent/utils/params_spec.rb +28 -108
  59. data/tcell_agent.gemspec +21 -1
  60. metadata +37 -4
@@ -0,0 +1,2317 @@
1
+ /**
2
+ * Copyright 2012,2016 Nick Galbreath
3
+ * nickg@client9.com
4
+ * BSD License -- see COPYING.txt for details
5
+ *
6
+ * https://libinjection.client9.com/
7
+ *
8
+ */
9
+
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <stdio.h>
13
+ #include <ctype.h>
14
+ #include <assert.h>
15
+ #include <stddef.h>
16
+
17
+ #include "libinjection.h"
18
+ #include "libinjection_sqli.h"
19
+ #include "libinjection_sqli_data.h"
20
+
21
+ #define LIBINJECTION_VERSION "3.9.1"
22
+
23
+ #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
24
+ #define LIBINJECTION_SQLI_MAX_TOKENS 5
25
+
26
+ #ifndef TRUE
27
+ #define TRUE 1
28
+ #endif
29
+ #ifndef FALSE
30
+ #define FALSE 0
31
+ #endif
32
+
33
+ #define CHAR_NULL '\0'
34
+ #define CHAR_SINGLE '\''
35
+ #define CHAR_DOUBLE '"'
36
+ #define CHAR_TICK '`'
37
+
38
+ /* faster than calling out to libc isdigit */
39
+ #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
40
+
41
+ #if 0
42
+ #define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
43
+ #else
44
+ #define FOLD_DEBUG
45
+ #endif
46
+
47
+ /*
48
+ * not making public just yet
49
+ */
50
+ typedef enum {
51
+ TYPE_NONE = 0
52
+ , TYPE_KEYWORD = (int)'k'
53
+ , TYPE_UNION = (int)'U'
54
+ , TYPE_GROUP = (int)'B'
55
+ , TYPE_EXPRESSION = (int)'E'
56
+ , TYPE_SQLTYPE = (int)'t'
57
+ , TYPE_FUNCTION = (int)'f'
58
+ , TYPE_BAREWORD = (int)'n'
59
+ , TYPE_NUMBER = (int)'1'
60
+ , TYPE_VARIABLE = (int)'v'
61
+ , TYPE_STRING = (int)'s'
62
+ , TYPE_OPERATOR = (int)'o'
63
+ , TYPE_LOGIC_OPERATOR = (int)'&'
64
+ , TYPE_COMMENT = (int)'c'
65
+ , TYPE_COLLATE = (int)'A'
66
+ , TYPE_LEFTPARENS = (int)'('
67
+ , TYPE_RIGHTPARENS = (int)')' /* not used? */
68
+ , TYPE_LEFTBRACE = (int)'{'
69
+ , TYPE_RIGHTBRACE = (int)'}'
70
+ , TYPE_DOT = (int)'.'
71
+ , TYPE_COMMA = (int)','
72
+ , TYPE_COLON = (int)':'
73
+ , TYPE_SEMICOLON = (int)';'
74
+ , TYPE_TSQL = (int)'T' /* TSQL start */
75
+ , TYPE_UNKNOWN = (int)'?'
76
+ , TYPE_EVIL = (int)'X' /* unparsable, abort */
77
+ , TYPE_FINGERPRINT = (int)'F' /* not really a token */
78
+ , TYPE_BACKSLASH = (int)'\\'
79
+ } sqli_token_types;
80
+
81
+ /**
82
+ * Initializes parsing state
83
+ *
84
+ */
85
+ static char flag2delim(int flag)
86
+ {
87
+ if (flag & FLAG_QUOTE_SINGLE) {
88
+ return CHAR_SINGLE;
89
+ } else if (flag & FLAG_QUOTE_DOUBLE) {
90
+ return CHAR_DOUBLE;
91
+ } else {
92
+ return CHAR_NULL;
93
+ }
94
+ }
95
+
96
+ /* memchr2 finds a string of 2 characters inside another string
97
+ * This a specialized version of "memmem" or "memchr".
98
+ * 'memmem' doesn't exist on all platforms
99
+ *
100
+ * Porting notes: this is just a special version of
101
+ * astring.find("AB")
102
+ *
103
+ */
104
+ static const char *
105
+ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
106
+ {
107
+ const char *cur = haystack;
108
+ const char *last = haystack + haystack_len - 1;
109
+
110
+ if (haystack_len < 2) {
111
+ return NULL;
112
+ }
113
+
114
+ while (cur < last) {
115
+ /* safe since cur < len - 1 always */
116
+ if (cur[0] == c0 && cur[1] == c1) {
117
+ return cur;
118
+ }
119
+ cur += 1;
120
+ }
121
+
122
+ return NULL;
123
+ }
124
+
125
+ /**
126
+ * memmem might not exist on some systems
127
+ */
128
+ static const char *
129
+ my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
130
+ {
131
+ const char* cur;
132
+ const char* last;
133
+ assert(haystack);
134
+ assert(needle);
135
+ assert(nlen > 1);
136
+ last = haystack + hlen - nlen;
137
+ for (cur = haystack; cur <= last; ++cur) {
138
+ if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
139
+ return cur;
140
+ }
141
+ }
142
+ return NULL;
143
+ }
144
+
145
+ /** Find largest string containing certain characters.
146
+ *
147
+ * C Standard library 'strspn' only works for 'c-strings' (null terminated)
148
+ * This works on arbitrary length.
149
+ *
150
+ * Performance notes:
151
+ * not critical
152
+ *
153
+ * Porting notes:
154
+ * if accept is 'ABC', then this function would be similar to
155
+ * a_regexp.match(a_str, '[ABC]*'),
156
+ */
157
+ static size_t
158
+ strlenspn(const char *s, size_t len, const char *accept)
159
+ {
160
+ size_t i;
161
+ for (i = 0; i < len; ++i) {
162
+ /* likely we can do better by inlining this function
163
+ * but this works for now
164
+ */
165
+ if (strchr(accept, s[i]) == NULL) {
166
+ return i;
167
+ }
168
+ }
169
+ return len;
170
+ }
171
+
172
+ static size_t
173
+ strlencspn(const char *s, size_t len, const char *accept)
174
+ {
175
+ size_t i;
176
+ for (i = 0; i < len; ++i) {
177
+ /* likely we can do better by inlining this function
178
+ * but this works for now
179
+ */
180
+ if (strchr(accept, s[i]) != NULL) {
181
+ return i;
182
+ }
183
+ }
184
+ return len;
185
+ }
186
+ static int char_is_white(char ch) {
187
+ /* ' ' space is 0x32
188
+ '\t 0x09 \011 horizontal tab
189
+ '\n' 0x0a \012 new line
190
+ '\v' 0x0b \013 vertical tab
191
+ '\f' 0x0c \014 new page
192
+ '\r' 0x0d \015 carriage return
193
+ 0x00 \000 null (oracle)
194
+ 0xa0 \240 is Latin-1
195
+ */
196
+ return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
197
+ }
198
+
199
+ /* DANGER DANGER
200
+ * This is -very specialized function-
201
+ *
202
+ * this compares a ALL_UPPER CASE C STRING
203
+ * with a *arbitrary memory* + length
204
+ *
205
+ * Sane people would just make a copy, up-case
206
+ * and use a hash table.
207
+ *
208
+ * Required since libc version uses the current locale
209
+ * and is much slower.
210
+ */
211
+ static int cstrcasecmp(const char *a, const char *b, size_t n)
212
+ {
213
+ char cb;
214
+
215
+ for (; n > 0; a++, b++, n--) {
216
+ cb = *b;
217
+ if (cb >= 'a' && cb <= 'z') {
218
+ cb -= 0x20;
219
+ }
220
+ if (*a != cb) {
221
+ return *a - cb;
222
+ } else if (*a == '\0') {
223
+ return -1;
224
+ }
225
+ }
226
+
227
+ return (*a == 0) ? 0 : 1;
228
+ }
229
+
230
+ /**
231
+ * Case sensitive string compare.
232
+ * Here only to make code more readable
233
+ */
234
+ static int streq(const char *a, const char *b)
235
+ {
236
+ return strcmp(a, b) == 0;
237
+ }
238
+
239
+ /**
240
+ *
241
+ *
242
+ *
243
+ * Porting Notes:
244
+ * given a mapping/hash of string to char
245
+ * this is just
246
+ * typecode = mapping[key.upper()]
247
+ */
248
+
249
+ static char bsearch_keyword_type(const char *key, size_t len,
250
+ const keyword_t * keywords, size_t numb)
251
+ {
252
+ size_t pos;
253
+ size_t left = 0;
254
+ size_t right = numb - 1;
255
+
256
+ while (left < right) {
257
+ pos = (left + right) >> 1;
258
+
259
+ /* arg0 = upper case only, arg1 = mixed case */
260
+ if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
261
+ left = pos + 1;
262
+ } else {
263
+ right = pos;
264
+ }
265
+ }
266
+ if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
267
+ return keywords[left].type;
268
+ } else {
269
+ return CHAR_NULL;
270
+ }
271
+ }
272
+
273
+ static char is_keyword(const char* key, size_t len)
274
+ {
275
+ return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
276
+ }
277
+
278
+ /* st_token methods
279
+ *
280
+ * The following functions manipulates the stoken_t type
281
+ *
282
+ *
283
+ */
284
+
285
+ static void st_clear(stoken_t * st)
286
+ {
287
+ memset(st, 0, sizeof(stoken_t));
288
+ }
289
+
290
+ static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
291
+ const char value)
292
+ {
293
+ /* done to eliminate unused warning */
294
+ (void)len;
295
+ st->type = (char) stype;
296
+ st->pos = pos;
297
+ st->len = 1;
298
+ st->val[0] = value;
299
+ st->val[1] = CHAR_NULL;
300
+ }
301
+
302
+ static void st_assign(stoken_t * st, const char stype,
303
+ size_t pos, size_t len, const char* value)
304
+ {
305
+ const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
306
+ size_t last = len < MSIZE ? len : (MSIZE - 1);
307
+ st->type = (char) stype;
308
+ st->pos = pos;
309
+ st->len = last;
310
+ memcpy(st->val, value, last);
311
+ st->val[last] = CHAR_NULL;
312
+ }
313
+
314
+ static void st_copy(stoken_t * dest, const stoken_t * src)
315
+ {
316
+ memcpy(dest, src, sizeof(stoken_t));
317
+ }
318
+
319
+ static int st_is_arithmetic_op(const stoken_t* st)
320
+ {
321
+ const char ch = st->val[0];
322
+ return (st->type == TYPE_OPERATOR && st->len == 1 &&
323
+ (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
324
+ }
325
+
326
+ static int st_is_unary_op(const stoken_t * st)
327
+ {
328
+ const char* str = st->val;
329
+ const size_t len = st->len;
330
+
331
+ if (st->type != TYPE_OPERATOR) {
332
+ return FALSE;
333
+ }
334
+
335
+ switch (len) {
336
+ case 1:
337
+ return *str == '+' || *str == '-' || *str == '!' || *str == '~';
338
+ case 2:
339
+ return str[0] == '!' && str[1] == '!';
340
+ case 3:
341
+ return cstrcasecmp("NOT", str, 3) == 0;
342
+ default:
343
+ return FALSE;
344
+ }
345
+ }
346
+
347
+ /* Parsers
348
+ *
349
+ *
350
+ */
351
+
352
+ static size_t parse_white(struct libinjection_sqli_state * sf)
353
+ {
354
+ return sf->pos + 1;
355
+ }
356
+
357
+ static size_t parse_operator1(struct libinjection_sqli_state * sf)
358
+ {
359
+ const char *cs = sf->s;
360
+ size_t pos = sf->pos;
361
+
362
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
363
+ return pos + 1;
364
+ }
365
+
366
+ static size_t parse_other(struct libinjection_sqli_state * sf)
367
+ {
368
+ const char *cs = sf->s;
369
+ size_t pos = sf->pos;
370
+
371
+ st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
372
+ return pos + 1;
373
+ }
374
+
375
+ static size_t parse_char(struct libinjection_sqli_state * sf)
376
+ {
377
+ const char *cs = sf->s;
378
+ size_t pos = sf->pos;
379
+
380
+ st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
381
+ return pos + 1;
382
+ }
383
+
384
+ static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
385
+ {
386
+ const char *cs = sf->s;
387
+ const size_t slen = sf->slen;
388
+ size_t pos = sf->pos;
389
+
390
+ const char *endpos =
391
+ (const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
392
+ if (endpos == NULL) {
393
+ st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
394
+ return slen;
395
+ } else {
396
+ st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
397
+ return (size_t)((endpos - cs) + 1);
398
+ }
399
+ }
400
+
401
+ /** In ANSI mode, hash is an operator
402
+ * In MYSQL mode, it's a EOL comment like '--'
403
+ */
404
+ static size_t parse_hash(struct libinjection_sqli_state * sf)
405
+ {
406
+ sf->stats_comment_hash += 1;
407
+ if (sf->flags & FLAG_SQL_MYSQL) {
408
+ sf->stats_comment_hash += 1;
409
+ return parse_eol_comment(sf);
410
+ } else {
411
+ st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
412
+ return sf->pos + 1;
413
+ }
414
+ }
415
+
416
+ static size_t parse_dash(struct libinjection_sqli_state * sf)
417
+ {
418
+ const char *cs = sf->s;
419
+ const size_t slen = sf->slen;
420
+ size_t pos = sf->pos;
421
+
422
+ /*
423
+ * five cases
424
+ * 1) --[white] this is always a SQL comment
425
+ * 2) --[EOF] this is a comment
426
+ * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
427
+ * 4) --[notwhite] everyone else thinks this is a comment
428
+ * 5) -[not dash] '-' is a unary operator
429
+ */
430
+
431
+ if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
432
+ return parse_eol_comment(sf);
433
+ } else if (pos +2 == slen && cs[pos + 1] == '-') {
434
+ return parse_eol_comment(sf);
435
+ } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
436
+ /* --[not-white] not-white case:
437
+ *
438
+ */
439
+ sf->stats_comment_ddx += 1;
440
+ return parse_eol_comment(sf);
441
+ } else {
442
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
443
+ return pos + 1;
444
+ }
445
+ }
446
+
447
+
448
+ /** This detects MySQL comments, comments that
449
+ * start with /x! We just ban these now but
450
+ * previously we attempted to parse the inside
451
+ *
452
+ * For reference:
453
+ * the form of /x![anything]x/ or /x!12345[anything] x/
454
+ *
455
+ * Mysql 3 (maybe 4), allowed this:
456
+ * /x!0selectx/ 1;
457
+ * where 0 could be any number.
458
+ *
459
+ * The last version of MySQL 3 was in 2003.
460
+
461
+ * It is unclear if the MySQL 3 syntax was allowed
462
+ * in MySQL 4. The last version of MySQL 4 was in 2008
463
+ *
464
+ */
465
+ static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
466
+ {
467
+ /* so far...
468
+ * cs[pos] == '/' && cs[pos+1] == '*'
469
+ */
470
+
471
+ if (pos + 2 >= len) {
472
+ /* not a mysql comment */
473
+ return 0;
474
+ }
475
+
476
+ if (cs[pos + 2] != '!') {
477
+ /* not a mysql comment */
478
+ return 0;
479
+ }
480
+
481
+ /*
482
+ * this is a mysql comment
483
+ * got "/x!"
484
+ */
485
+ return 1;
486
+ }
487
+
488
+ static size_t parse_slash(struct libinjection_sqli_state * sf)
489
+ {
490
+ const char* ptr;
491
+ size_t clen;
492
+ const char *cs = sf->s;
493
+ const size_t slen = sf->slen;
494
+ size_t pos = sf->pos;
495
+ const char* cur = cs + pos;
496
+ char ctype = TYPE_COMMENT;
497
+ size_t pos1 = pos + 1;
498
+ if (pos1 == slen || cs[pos1] != '*') {
499
+ return parse_operator1(sf);
500
+ }
501
+
502
+ /*
503
+ * skip over initial '/x'
504
+ */
505
+ ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
506
+
507
+ /*
508
+ * (ptr == NULL) causes false positive in cppcheck 1.61
509
+ * casting to type seems to fix it
510
+ */
511
+ if (ptr == (const char*) NULL) {
512
+ /* till end of line */
513
+ clen = slen - pos;
514
+ } else {
515
+ clen = (size_t)(ptr + 2 - cur);
516
+ }
517
+
518
+ /*
519
+ * postgresql allows nested comments which makes
520
+ * this is incompatible with parsing so
521
+ * if we find a '/x' inside the coment, then
522
+ * make a new token.
523
+ *
524
+ * Also, Mysql's "conditional" comments for version
525
+ * are an automatic black ban!
526
+ */
527
+
528
+ if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
529
+ ctype = TYPE_EVIL;
530
+ } else if (is_mysql_comment(cs, slen, pos)) {
531
+ ctype = TYPE_EVIL;
532
+ }
533
+
534
+ st_assign(sf->current, ctype, pos, clen, cs + pos);
535
+ return pos + clen;
536
+ }
537
+
538
+
539
+ static size_t parse_backslash(struct libinjection_sqli_state * sf)
540
+ {
541
+ const char *cs = sf->s;
542
+ const size_t slen = sf->slen;
543
+ size_t pos = sf->pos;
544
+
545
+ /*
546
+ * Weird MySQL alias for NULL, "\N" (capital N only)
547
+ */
548
+ if (pos + 1 < slen && cs[pos +1] == 'N') {
549
+ st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
550
+ return pos + 2;
551
+ } else {
552
+ st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
553
+ return pos + 1;
554
+ }
555
+ }
556
+
557
+ static size_t parse_operator2(struct libinjection_sqli_state * sf)
558
+ {
559
+ char ch;
560
+ const char *cs = sf->s;
561
+ const size_t slen = sf->slen;
562
+ size_t pos = sf->pos;
563
+
564
+ if (pos + 1 >= slen) {
565
+ return parse_operator1(sf);
566
+ }
567
+
568
+ if (pos + 2 < slen &&
569
+ cs[pos] == '<' &&
570
+ cs[pos + 1] == '=' &&
571
+ cs[pos + 2] == '>') {
572
+ /*
573
+ * special 3-char operator
574
+ */
575
+ st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
576
+ return pos + 3;
577
+ }
578
+
579
+ ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
580
+ if (ch != CHAR_NULL) {
581
+ st_assign(sf->current, ch, pos, 2, cs+pos);
582
+ return pos + 2;
583
+ }
584
+
585
+ /*
586
+ * not an operator.. what to do with the two
587
+ * characters we got?
588
+ */
589
+
590
+ if (cs[pos] == ':') {
591
+ /* ':' is not an operator */
592
+ st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
593
+ return pos + 1;
594
+ } else {
595
+ /*
596
+ * must be a single char operator
597
+ */
598
+ return parse_operator1(sf);
599
+ }
600
+ }
601
+
602
+ /*
603
+ * Ok! " \" " one backslash = escaped!
604
+ * " \\" " two backslash = not escaped!
605
+ * "\\\" " three backslash = escaped!
606
+ */
607
+ static int is_backslash_escaped(const char* end, const char* start)
608
+ {
609
+ const char* ptr;
610
+ for (ptr = end; ptr >= start; ptr--) {
611
+ if (*ptr != '\\') {
612
+ break;
613
+ }
614
+ }
615
+ /* if number of backslashes is odd, it is escaped */
616
+
617
+ return (end - ptr) & 1;
618
+ }
619
+
620
+ static size_t is_double_delim_escaped(const char* cur, const char* end)
621
+ {
622
+ return ((cur + 1) < end) && *(cur+1) == *cur;
623
+ }
624
+
625
+ /* Look forward for doubling of delimiter
626
+ *
627
+ * case 'foo''bar' --> foo''bar
628
+ *
629
+ * ending quote isn't duplicated (i.e. escaped)
630
+ * since it's the wrong char or EOL
631
+ *
632
+ */
633
+ static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
634
+ stoken_t * st, char delim, size_t offset)
635
+ {
636
+ /*
637
+ * offset is to skip the perhaps first quote char
638
+ */
639
+ const char *qpos =
640
+ (const char *) memchr((const void *) (cs + pos + offset), delim,
641
+ len - pos - offset);
642
+
643
+ /*
644
+ * then keep string open/close info
645
+ */
646
+ if (offset > 0) {
647
+ /*
648
+ * this is real quote
649
+ */
650
+ st->str_open = delim;
651
+ } else {
652
+ /*
653
+ * this was a simulated quote
654
+ */
655
+ st->str_open = CHAR_NULL;
656
+ }
657
+
658
+ while (TRUE) {
659
+ if (qpos == NULL) {
660
+ /*
661
+ * string ended with no trailing quote
662
+ * assign what we have
663
+ */
664
+ st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
665
+ st->str_close = CHAR_NULL;
666
+ return len;
667
+ } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
668
+ /* keep going, move ahead one character */
669
+ qpos =
670
+ (const char *) memchr((const void *) (qpos + 1), delim,
671
+ (size_t)((cs + len) - (qpos + 1)));
672
+ continue;
673
+ } else if (is_double_delim_escaped(qpos, cs + len)) {
674
+ /* keep going, move ahead two characters */
675
+ qpos =
676
+ (const char *) memchr((const void *) (qpos + 2), delim,
677
+ (size_t)((cs + len) - (qpos + 2)));
678
+ continue;
679
+ } else {
680
+ /* hey it's a normal string */
681
+ st_assign(st, TYPE_STRING, pos + offset,
682
+ (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
683
+ st->str_close = delim;
684
+ return (size_t)(qpos - cs + 1);
685
+ }
686
+ }
687
+ }
688
+
689
+ /**
690
+ * Used when first char is a ' or "
691
+ */
692
+ static size_t parse_string(struct libinjection_sqli_state * sf)
693
+ {
694
+ const char *cs = sf->s;
695
+ const size_t slen = sf->slen;
696
+ size_t pos = sf->pos;
697
+
698
+ /*
699
+ * assert cs[pos] == single or double quote
700
+ */
701
+ return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
702
+ }
703
+
704
+ /**
705
+ * Used when first char is:
706
+ * N or n: mysql "National Character set"
707
+ * E : psql "Escaped String"
708
+ */
709
+ static size_t parse_estring(struct libinjection_sqli_state * sf)
710
+ {
711
+ const char *cs = sf->s;
712
+ const size_t slen = sf->slen;
713
+ size_t pos = sf->pos;
714
+
715
+ if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
716
+ return parse_word(sf);
717
+ }
718
+ return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
719
+ }
720
+
721
+ static size_t parse_ustring(struct libinjection_sqli_state * sf)
722
+ {
723
+ const char *cs = sf->s;
724
+ size_t slen = sf->slen;
725
+ size_t pos = sf->pos;
726
+
727
+ if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
728
+ sf->pos += 2;
729
+ pos = parse_string(sf);
730
+ sf->current->str_open = 'u';
731
+ if (sf->current->str_close == '\'') {
732
+ sf->current->str_close = 'u';
733
+ }
734
+ return pos;
735
+ } else {
736
+ return parse_word(sf);
737
+ }
738
+ }
739
+
740
+ static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
741
+ {
742
+ char ch;
743
+ const char *strend;
744
+ const char *cs = sf->s;
745
+ size_t slen = sf->slen;
746
+ size_t pos = sf->pos + offset;
747
+
748
+ /* if we are already at end of string..
749
+ if current char is not q or Q
750
+ if we don't have 2 more chars
751
+ if char2 != a single quote
752
+ then, just treat as word
753
+ */
754
+ if (pos >= slen ||
755
+ (cs[pos] != 'q' && cs[pos] != 'Q') ||
756
+ pos + 2 >= slen ||
757
+ cs[pos + 1] != '\'') {
758
+ return parse_word(sf);
759
+ }
760
+
761
+ ch = cs[pos + 2];
762
+
763
+ /* the ch > 127 is un-needed since
764
+ * we assume char is signed
765
+ */
766
+ if (ch < 33 /* || ch > 127 */) {
767
+ return parse_word(sf);
768
+ }
769
+ switch (ch) {
770
+ case '(' : ch = ')'; break;
771
+ case '[' : ch = ']'; break;
772
+ case '{' : ch = '}'; break;
773
+ case '<' : ch = '>'; break;
774
+ }
775
+
776
+ strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
777
+ if (strend == NULL) {
778
+ st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
779
+ sf->current->str_open = 'q';
780
+ sf->current->str_close = CHAR_NULL;
781
+ return slen;
782
+ } else {
783
+ st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
784
+ sf->current->str_open = 'q';
785
+ sf->current->str_close = 'q';
786
+ return (size_t)(strend - cs + 2);
787
+ }
788
+ }
789
+
790
+ /*
791
+ * Oracle's q string
792
+ */
793
+ static size_t parse_qstring(struct libinjection_sqli_state * sf)
794
+ {
795
+ return parse_qstring_core(sf, 0);
796
+ }
797
+
798
+ /*
799
+ * mysql's N'STRING' or
800
+ * ... Oracle's nq string
801
+ */
802
+ static size_t parse_nqstring(struct libinjection_sqli_state * sf)
803
+ {
804
+ size_t slen = sf->slen;
805
+ size_t pos = sf->pos;
806
+ if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
807
+ return parse_estring(sf);
808
+ }
809
+ return parse_qstring_core(sf, 1);
810
+ }
811
+
812
+ /*
813
+ * binary literal string
814
+ * re: [bB]'[01]*'
815
+ */
816
+ static size_t parse_bstring(struct libinjection_sqli_state *sf)
817
+ {
818
+ size_t wlen;
819
+ const char *cs = sf->s;
820
+ size_t pos = sf->pos;
821
+ size_t slen = sf->slen;
822
+
823
+ /* need at least 2 more characters
824
+ * if next char isn't a single quote, then
825
+ * continue as normal word
826
+ */
827
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
828
+ return parse_word(sf);
829
+ }
830
+
831
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
832
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
833
+ return parse_word(sf);
834
+ }
835
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
836
+ return pos + 2 + wlen + 1;
837
+ }
838
+
839
+ /*
840
+ * hex literal string
841
+ * re: [xX]'[0123456789abcdefABCDEF]*'
842
+ * mysql has requirement of having EVEN number of chars,
843
+ * but pgsql does not
844
+ */
845
+ static size_t parse_xstring(struct libinjection_sqli_state *sf)
846
+ {
847
+ size_t wlen;
848
+ const char *cs = sf->s;
849
+ size_t pos = sf->pos;
850
+ size_t slen = sf->slen;
851
+
852
+ /* need at least 2 more characters
853
+ * if next char isn't a single quote, then
854
+ * continue as normal word
855
+ */
856
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
857
+ return parse_word(sf);
858
+ }
859
+
860
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
861
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
862
+ return parse_word(sf);
863
+ }
864
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
865
+ return pos + 2 + wlen + 1;
866
+ }
867
+
868
+ /**
869
+ * This handles MS SQLSERVER bracket words
870
+ * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
871
+ *
872
+ */
873
+ static size_t parse_bword(struct libinjection_sqli_state * sf)
874
+ {
875
+ const char *cs = sf->s;
876
+ size_t pos = sf->pos;
877
+ const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
878
+ if (endptr == NULL) {
879
+ st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
880
+ return sf->slen;
881
+ } else {
882
+ st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
883
+ return (size_t)((endptr - cs) + 1);
884
+ }
885
+ }
886
+
887
+ static size_t parse_word(struct libinjection_sqli_state * sf)
888
+ {
889
+ char ch;
890
+ char delim;
891
+ size_t i;
892
+ const char *cs = sf->s;
893
+ size_t pos = sf->pos;
894
+ size_t wlen = strlencspn(cs + pos, sf->slen - pos,
895
+ " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
896
+
897
+ st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
898
+
899
+ /* now we need to look inside what we good for "." and "`"
900
+ * and see if what is before is a keyword or not
901
+ */
902
+ for (i =0; i < sf->current->len; ++i) {
903
+ delim = sf->current->val[i];
904
+ if (delim == '.' || delim == '`') {
905
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
906
+ if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
907
+ /* needed for swig */
908
+ st_clear(sf->current);
909
+ /*
910
+ * we got something like "SELECT.1"
911
+ * or SELECT`column`
912
+ */
913
+ st_assign(sf->current, ch, pos, i, cs + pos);
914
+ return pos + i;
915
+ }
916
+ }
917
+ }
918
+
919
+ /*
920
+ * do normal lookup with word including '.'
921
+ */
922
+ if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
923
+
924
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
925
+ if (ch == CHAR_NULL) {
926
+ ch = TYPE_BAREWORD;
927
+ }
928
+ sf->current->type = ch;
929
+ }
930
+ return pos + wlen;
931
+ }
932
+
933
+ /* MySQL backticks are a cross between string and
934
+ * and a bare word.
935
+ *
936
+ */
937
+ static size_t parse_tick(struct libinjection_sqli_state* sf)
938
+ {
939
+ size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
940
+
941
+ /* we could check to see if start and end of
942
+ * of string are both "`", i.e. make sure we have
943
+ * matching set. `foo` vs. `foo
944
+ * but I don't think it matters much
945
+ */
946
+
947
+ /* check value of string to see if it's a keyword,
948
+ * function, operator, etc
949
+ */
950
+ char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
951
+ if (ch == TYPE_FUNCTION) {
952
+ /* if it's a function, then convert token */
953
+ sf->current->type = TYPE_FUNCTION;
954
+ } else {
955
+ /* otherwise it's a 'n' type -- mysql treats
956
+ * everything as a bare word
957
+ */
958
+ sf->current->type = TYPE_BAREWORD;
959
+ }
960
+ return pos;
961
+ }
962
+
963
+ static size_t parse_var(struct libinjection_sqli_state * sf)
964
+ {
965
+ size_t xlen;
966
+ const char *cs = sf->s;
967
+ const size_t slen = sf->slen;
968
+ size_t pos = sf->pos + 1;
969
+
970
+ /*
971
+ * var_count is only used to reconstruct
972
+ * the input. It counts the number of '@'
973
+ * seen 0 in the case of NULL, 1 or 2
974
+ */
975
+
976
+ /*
977
+ * move past optional other '@'
978
+ */
979
+ if (pos < slen && cs[pos] == '@') {
980
+ pos += 1;
981
+ sf->current->count = 2;
982
+ } else {
983
+ sf->current->count = 1;
984
+ }
985
+
986
+ /*
987
+ * MySQL allows @@`version`
988
+ */
989
+ if (pos < slen) {
990
+ if (cs[pos] == '`') {
991
+ sf->pos = pos;
992
+ pos = parse_tick(sf);
993
+ sf->current->type = TYPE_VARIABLE;
994
+ return pos;
995
+ } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
996
+ sf->pos = pos;
997
+ pos = parse_string(sf);
998
+ sf->current->type = TYPE_VARIABLE;
999
+ return pos;
1000
+ }
1001
+ }
1002
+
1003
+
1004
+ xlen = strlencspn(cs + pos, slen - pos,
1005
+ " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
1006
+ if (xlen == 0) {
1007
+ st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
1008
+ return pos;
1009
+ } else {
1010
+ st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
1011
+ return pos + xlen;
1012
+ }
1013
+ }
1014
+
1015
+ static size_t parse_money(struct libinjection_sqli_state *sf)
1016
+ {
1017
+ size_t xlen;
1018
+ const char* strend;
1019
+ const char *cs = sf->s;
1020
+ const size_t slen = sf->slen;
1021
+ size_t pos = sf->pos;
1022
+
1023
+ if (pos + 1 == slen) {
1024
+ /* end of line */
1025
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1026
+ return slen;
1027
+ }
1028
+
1029
+ /*
1030
+ * $1,000.00 or $1.000,00 ok!
1031
+ * This also parses $....,,,111 but that's ok
1032
+ */
1033
+
1034
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
1035
+ if (xlen == 0) {
1036
+ if (cs[pos + 1] == '$') {
1037
+ /* we have $$ .. find ending $$ and make string */
1038
+ strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
1039
+ if (strend == NULL) {
1040
+ /* fell off edge */
1041
+ st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
1042
+ sf->current->str_open = '$';
1043
+ sf->current->str_close = CHAR_NULL;
1044
+ return slen;
1045
+ } else {
1046
+ st_assign(sf->current, TYPE_STRING, pos + 2,
1047
+ (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
1048
+ sf->current->str_open = '$';
1049
+ sf->current->str_close = '$';
1050
+ return (size_t)(strend - cs + 2);
1051
+ }
1052
+ } else {
1053
+ /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
1054
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1055
+ if (xlen == 0) {
1056
+ /* hmm it's "$" _something_ .. just add $ and keep going*/
1057
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1058
+ return pos + 1;
1059
+ }
1060
+ /* we have $foobar????? */
1061
+ /* is it $foobar$ */
1062
+ if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
1063
+ /* not $foobar$, or fell off edge */
1064
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1065
+ return pos + 1;
1066
+ }
1067
+
1068
+ /* we have $foobar$ ... find it again */
1069
+ strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
1070
+
1071
+ if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) {
1072
+ /* fell off edge */
1073
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
1074
+ sf->current->str_open = '$';
1075
+ sf->current->str_close = CHAR_NULL;
1076
+ return slen;
1077
+ } else {
1078
+ /* got one */
1079
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2,
1080
+ (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
1081
+ sf->current->str_open = '$';
1082
+ sf->current->str_close = '$';
1083
+ return (size_t)((strend + xlen + 2) - cs);
1084
+ }
1085
+ }
1086
+ } else if (xlen == 1 && cs[pos + 1] == '.') {
1087
+ /* $. should parsed as a word */
1088
+ return parse_word(sf);
1089
+ } else {
1090
+ st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
1091
+ return pos + 1 + xlen;
1092
+ }
1093
+ }
1094
+
1095
+ static size_t parse_number(struct libinjection_sqli_state * sf)
1096
+ {
1097
+ size_t xlen;
1098
+ size_t start;
1099
+ const char* digits = NULL;
1100
+ const char *cs = sf->s;
1101
+ const size_t slen = sf->slen;
1102
+ size_t pos = sf->pos;
1103
+ int have_e = 0;
1104
+ int have_exp = 0;
1105
+
1106
+ /* cs[pos] == '0' has 1/10 chance of being true,
1107
+ * while pos+1< slen is almost always true
1108
+ */
1109
+ if (cs[pos] == '0' && pos + 1 < slen) {
1110
+ if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
1111
+ digits = "0123456789ABCDEFabcdef";
1112
+ } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
1113
+ digits = "01";
1114
+ }
1115
+
1116
+ if (digits) {
1117
+ xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
1118
+ if (xlen == 0) {
1119
+ st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
1120
+ return pos + 2;
1121
+ } else {
1122
+ st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
1123
+ return pos + 2 + xlen;
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+ start = pos;
1129
+ while (pos < slen && ISDIGIT(cs[pos])) {
1130
+ pos += 1;
1131
+ }
1132
+
1133
+ if (pos < slen && cs[pos] == '.') {
1134
+ pos += 1;
1135
+ while (pos < slen && ISDIGIT(cs[pos])) {
1136
+ pos += 1;
1137
+ }
1138
+ if (pos - start == 1) {
1139
+ /* only one character read so far */
1140
+ st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
1141
+ return pos;
1142
+ }
1143
+ }
1144
+
1145
+ if (pos < slen) {
1146
+ if (cs[pos] == 'E' || cs[pos] == 'e') {
1147
+ have_e = 1;
1148
+ pos += 1;
1149
+ if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
1150
+ pos += 1;
1151
+ }
1152
+ while (pos < slen && ISDIGIT(cs[pos])) {
1153
+ have_exp = 1;
1154
+ pos += 1;
1155
+ }
1156
+ }
1157
+ }
1158
+
1159
+ /* oracle's ending float or double suffix
1160
+ * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
1161
+ */
1162
+ if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
1163
+ if (pos + 1 == slen) {
1164
+ /* line ends evaluate "... 1.2f$" as '1.2f' */
1165
+ pos += 1;
1166
+ } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
1167
+ /*
1168
+ * easy case, evaluate "... 1.2f ... as '1.2f'
1169
+ */
1170
+ pos += 1;
1171
+ } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
1172
+ /*
1173
+ * a bit of a hack but makes '1fUNION' parse as '1f UNION'
1174
+ */
1175
+ pos += 1;
1176
+ } else {
1177
+ /* it's like "123FROM" */
1178
+ /* parse as "123" only */
1179
+ }
1180
+ }
1181
+
1182
+ if (have_e == 1 && have_exp == 0) {
1183
+ /* very special form of
1184
+ * "1234.e"
1185
+ * "10.10E"
1186
+ * ".E"
1187
+ * this is a WORD not a number!! */
1188
+ st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
1189
+ } else {
1190
+ st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
1191
+ }
1192
+ return pos;
1193
+ }
1194
+
1195
+ /*
1196
+ * API to return version. This allows us to increment the version
1197
+ * without having to regenerated the SWIG (or other binding) in minor
1198
+ * releases.
1199
+ */
1200
+ const char* libinjection_version()
1201
+ {
1202
+ return LIBINJECTION_VERSION;
1203
+ }
1204
+
1205
+ int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
1206
+ {
1207
+ pt2Function fnptr;
1208
+ size_t *pos = &sf->pos;
1209
+ stoken_t *current = sf->current;
1210
+ const char *s = sf->s;
1211
+ const size_t slen = sf->slen;
1212
+
1213
+ if (slen == 0) {
1214
+ return FALSE;
1215
+ }
1216
+
1217
+ st_clear(current);
1218
+ sf->current = current;
1219
+
1220
+ /*
1221
+ * if we are at beginning of string
1222
+ * and in single-quote or double quote mode
1223
+ * then pretend the input starts with a quote
1224
+ */
1225
+ if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
1226
+ *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
1227
+ sf->stats_tokens += 1;
1228
+ return TRUE;
1229
+ }
1230
+
1231
+ while (*pos < slen) {
1232
+
1233
+ /*
1234
+ * get current character
1235
+ */
1236
+ const unsigned char ch = (unsigned char) (s[*pos]);
1237
+
1238
+ /*
1239
+ * look up the parser, and call it
1240
+ *
1241
+ * Porting Note: this is mapping of char to function
1242
+ * charparsers[ch]()
1243
+ */
1244
+ fnptr = char_parse_map[ch];
1245
+
1246
+ *pos = (*fnptr) (sf);
1247
+
1248
+ /*
1249
+ *
1250
+ */
1251
+ if (current->type != CHAR_NULL) {
1252
+ sf->stats_tokens += 1;
1253
+ return TRUE;
1254
+ }
1255
+ }
1256
+ return FALSE;
1257
+ }
1258
+
1259
+ void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
1260
+ {
1261
+ if (flags == 0) {
1262
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1263
+ }
1264
+
1265
+ memset(sf, 0, sizeof(struct libinjection_sqli_state));
1266
+ sf->s = s;
1267
+ sf->slen = len;
1268
+ sf->lookup = libinjection_sqli_lookup_word;
1269
+ sf->userdata = 0;
1270
+ sf->flags = flags;
1271
+ sf->current = &(sf->tokenvec[0]);
1272
+ }
1273
+
1274
+ void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
1275
+ {
1276
+ void *userdata = sf->userdata;
1277
+ ptr_lookup_fn lookup = sf->lookup;;
1278
+
1279
+ if (flags == 0) {
1280
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1281
+ }
1282
+ libinjection_sqli_init(sf, sf->s, sf->slen, flags);
1283
+ sf->lookup = lookup;
1284
+ sf->userdata = userdata;
1285
+ }
1286
+
1287
+ void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
1288
+ {
1289
+ if (fn == NULL) {
1290
+ sf->lookup = libinjection_sqli_lookup_word;
1291
+ sf->userdata = (void*)(NULL);
1292
+ } else {
1293
+ sf->lookup = fn;
1294
+ sf->userdata = userdata;
1295
+ }
1296
+ }
1297
+
1298
+ /** See if two tokens can be merged since they are compound SQL phrases.
1299
+ *
1300
+ * This takes two tokens, and, if they are the right type,
1301
+ * merges their values together. Then checks to see if the
1302
+ * new value is special using the PHRASES mapping.
1303
+ *
1304
+ * Example: "UNION" + "ALL" ==> "UNION ALL"
1305
+ *
1306
+ * C Security Notes: this is safe to use C-strings (null-terminated)
1307
+ * since the types involved by definition do not have embedded nulls
1308
+ * (e.g. there is no keyword with embedded null)
1309
+ *
1310
+ * Porting Notes: since this is C, it's oddly complicated.
1311
+ * This is just: multikeywords[token.value + ' ' + token2.value]
1312
+ *
1313
+ */
1314
+ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
1315
+ {
1316
+ size_t sz1;
1317
+ size_t sz2;
1318
+ size_t sz3;
1319
+ char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
1320
+ char ch;
1321
+
1322
+ /* first token is of right type? */
1323
+ if (!
1324
+ (a->type == TYPE_KEYWORD ||
1325
+ a->type == TYPE_BAREWORD ||
1326
+ a->type == TYPE_OPERATOR ||
1327
+ a->type == TYPE_UNION ||
1328
+ a->type == TYPE_FUNCTION ||
1329
+ a->type == TYPE_EXPRESSION ||
1330
+ a->type == TYPE_SQLTYPE)) {
1331
+ return CHAR_NULL;
1332
+ }
1333
+
1334
+ if (b->type != TYPE_KEYWORD && b->type != TYPE_BAREWORD &&
1335
+ b->type != TYPE_OPERATOR && b->type != TYPE_SQLTYPE &&
1336
+ b->type != TYPE_LOGIC_OPERATOR &&
1337
+ b->type != TYPE_FUNCTION &&
1338
+ b->type != TYPE_UNION && b->type != TYPE_EXPRESSION) {
1339
+ return CHAR_NULL;
1340
+ }
1341
+
1342
+ sz1 = a->len;
1343
+ sz2 = b->len;
1344
+ sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
1345
+ if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
1346
+ return FALSE;
1347
+ }
1348
+ /*
1349
+ * oddly annoying last.val + ' ' + current.val
1350
+ */
1351
+ memcpy(tmp, a->val, sz1);
1352
+ tmp[sz1] = ' ';
1353
+ memcpy(tmp + sz1 + 1, b->val, sz2);
1354
+ tmp[sz3] = CHAR_NULL;
1355
+
1356
+ ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
1357
+
1358
+ if (ch != CHAR_NULL) {
1359
+ st_assign(a, ch, a->pos, sz3, tmp);
1360
+ return TRUE;
1361
+ } else {
1362
+ return FALSE;
1363
+ }
1364
+ }
1365
+
1366
+ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
1367
+ {
1368
+ stoken_t last_comment;
1369
+
1370
+ /* POS is the position of where the NEXT token goes */
1371
+ size_t pos = 0;
1372
+
1373
+ /* LEFT is a count of how many tokens that are already
1374
+ folded or processed (i.e. part of the fingerprint) */
1375
+ size_t left = 0;
1376
+
1377
+ int more = 1;
1378
+
1379
+ st_clear(&last_comment);
1380
+
1381
+ /* Skip all initial comments, right-parens ( and unary operators
1382
+ *
1383
+ */
1384
+ sf->current = &(sf->tokenvec[0]);
1385
+ while (more) {
1386
+ more = libinjection_sqli_tokenize(sf);
1387
+ if ( ! (sf->current->type == TYPE_COMMENT ||
1388
+ sf->current->type == TYPE_LEFTPARENS ||
1389
+ sf->current->type == TYPE_SQLTYPE ||
1390
+ st_is_unary_op(sf->current))) {
1391
+ break;
1392
+ }
1393
+ }
1394
+
1395
+ if (! more) {
1396
+ /* If input was only comments, unary or (, then exit */
1397
+ return 0;
1398
+ } else {
1399
+ /* it's some other token */
1400
+ pos += 1;
1401
+ }
1402
+
1403
+ while (1) {
1404
+ FOLD_DEBUG;
1405
+
1406
+ /* do we have all the max number of tokens? if so do
1407
+ * some special cases for 5 tokens
1408
+ */
1409
+ if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
1410
+ if (
1411
+ (
1412
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1413
+ (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
1414
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1415
+ sf->tokenvec[3].type == TYPE_NUMBER &&
1416
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1417
+ ) ||
1418
+ (
1419
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1420
+ sf->tokenvec[1].type == TYPE_OPERATOR &&
1421
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1422
+ (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
1423
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1424
+ ) ||
1425
+ (
1426
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1427
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1428
+ sf->tokenvec[2].type == TYPE_COMMA &&
1429
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1430
+ sf->tokenvec[4].type == TYPE_NUMBER
1431
+ ) ||
1432
+ (
1433
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1434
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1435
+ sf->tokenvec[2].type == TYPE_OPERATOR &&
1436
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1437
+ sf->tokenvec[4].type == TYPE_BAREWORD
1438
+ )
1439
+ )
1440
+ {
1441
+ if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
1442
+ st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
1443
+ pos = 2;
1444
+ left = 0;
1445
+ } else {
1446
+ pos = 1;
1447
+ left = 0;
1448
+ }
1449
+ }
1450
+ }
1451
+
1452
+ if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
1453
+ left = pos;
1454
+ break;
1455
+ }
1456
+
1457
+ /* get up to two tokens */
1458
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
1459
+ sf->current = &(sf->tokenvec[pos]);
1460
+ more = libinjection_sqli_tokenize(sf);
1461
+ if (more) {
1462
+ if (sf->current->type == TYPE_COMMENT) {
1463
+ st_copy(&last_comment, sf->current);
1464
+ } else {
1465
+ last_comment.type = CHAR_NULL;
1466
+ pos += 1;
1467
+ }
1468
+ }
1469
+ }
1470
+ FOLD_DEBUG;
1471
+ /* did we get 2 tokens? if not then we are done */
1472
+ if (pos - left < 2) {
1473
+ left = pos;
1474
+ continue;
1475
+ }
1476
+
1477
+ /* FOLD: "ss" -> "s"
1478
+ * "foo" "bar" is valid SQL
1479
+ * just ignore second string
1480
+ */
1481
+ if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
1482
+ pos -= 1;
1483
+ sf->stats_folds += 1;
1484
+ continue;
1485
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
1486
+ /* not sure how various engines handle
1487
+ * 'select 1;;drop table foo' or
1488
+ * 'select 1; /x foo x/; drop table foo'
1489
+ * to prevent surprises, just fold away repeated semicolons
1490
+ */
1491
+ pos -= 1;
1492
+ sf->stats_folds += 1;
1493
+ continue;
1494
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1495
+ sf->tokenvec[left+1].type == TYPE_FUNCTION &&
1496
+ cstrcasecmp("IF", sf->tokenvec[left+1].val, sf->tokenvec[left+1].len) == 0) {
1497
+ /* IF is normally a function, except in Transact-SQL where it can be used as a
1498
+ * standalone control flow operator, e.g. ; IF 1=1 ...
1499
+ * if found after a semicolon, convert from 'f' type to 'T' type
1500
+ */
1501
+ sf->tokenvec[left+1].type = TYPE_TSQL;
1502
+ left += 2;
1503
+ continue; /* reparse everything, but we probably can advance left, and pos */
1504
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
1505
+ sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
1506
+ (st_is_unary_op(&sf->tokenvec[left+1]) ||
1507
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
1508
+ pos -= 1;
1509
+ sf->stats_folds += 1;
1510
+ left = 0;
1511
+ continue;
1512
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1513
+ st_is_unary_op(&sf->tokenvec[left+1])) {
1514
+ pos -= 1;
1515
+ sf->stats_folds += 1;
1516
+ if (left > 0) {
1517
+ left -= 1;
1518
+ }
1519
+ continue;
1520
+ } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
1521
+ pos -= 1;
1522
+ sf->stats_folds += 1;
1523
+ if (left > 0) {
1524
+ left -= 1;
1525
+ }
1526
+ continue;
1527
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
1528
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
1529
+ /* TSQL functions but common enough to be column names */
1530
+ cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1531
+ cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1532
+
1533
+ /* Function in MYSQL */
1534
+ cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1535
+ cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1536
+ cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1537
+
1538
+ /* Mysql words that act as a variable and are a function */
1539
+
1540
+ /* TSQL current_users is fake-variable */
1541
+ /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
1542
+ cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1543
+ cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1544
+ cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1545
+ cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1546
+ cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1547
+ cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1548
+ )) {
1549
+
1550
+ /* pos is the same
1551
+ * other conversions need to go here... for instance
1552
+ * password CAN be a function, coalesce CAN be a function
1553
+ */
1554
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1555
+ continue;
1556
+ } else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
1557
+ cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1558
+ cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1559
+ )) {
1560
+
1561
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1562
+ /* got .... IN ( ... (or 'NOT IN')
1563
+ * it's an operator
1564
+ */
1565
+ sf->tokenvec[left].type = TYPE_OPERATOR;
1566
+ } else {
1567
+ /*
1568
+ * it's a nothing
1569
+ */
1570
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1571
+ }
1572
+
1573
+ /* "IN" can be used as "IN BOOLEAN MODE" for mysql
1574
+ * in which case merging of words can be done later
1575
+ * other wise it acts as an equality operator __ IN (values..)
1576
+ *
1577
+ * here we got "IN" "(" so it's an operator.
1578
+ * also back track to handle "NOT IN"
1579
+ * might need to do the same with like
1580
+ * two use cases "foo" LIKE "BAR" (normal operator)
1581
+ * "foo" = LIKE(1,2)
1582
+ */
1583
+ continue;
1584
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
1585
+ cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1586
+ cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
1587
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1588
+ /* SELECT LIKE(...
1589
+ * it's a function
1590
+ */
1591
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1592
+ }
1593
+ } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
1594
+ (sf->tokenvec[left+1].type == TYPE_BAREWORD ||
1595
+ sf->tokenvec[left+1].type == TYPE_NUMBER ||
1596
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
1597
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
1598
+ sf->tokenvec[left+1].type == TYPE_FUNCTION ||
1599
+ sf->tokenvec[left+1].type == TYPE_VARIABLE ||
1600
+ sf->tokenvec[left+1].type == TYPE_STRING)) {
1601
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1602
+ pos -= 1;
1603
+ sf->stats_folds += 1;
1604
+ left = 0;
1605
+ continue;
1606
+ } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
1607
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1608
+ /*
1609
+ * there are too many collation types.. so if the bareword has a "_"
1610
+ * then it's TYPE_SQLTYPE
1611
+ */
1612
+ if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
1613
+ sf->tokenvec[left+1].type = TYPE_SQLTYPE;
1614
+ left = 0;
1615
+ }
1616
+ } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
1617
+ if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
1618
+ /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
1619
+ sf->tokenvec[left].type = TYPE_NUMBER;
1620
+ } else {
1621
+ /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
1622
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1623
+ pos -= 1;
1624
+ sf->stats_folds += 1;
1625
+ }
1626
+ left = 0;
1627
+ continue;
1628
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1629
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1630
+ pos -= 1;
1631
+ left = 0;
1632
+ sf->stats_folds += 1;
1633
+ continue;
1634
+ } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
1635
+ sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
1636
+ pos -= 1;
1637
+ left = 0;
1638
+ sf->stats_folds += 1;
1639
+ continue;
1640
+ } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
1641
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1642
+
1643
+ /*
1644
+ * MySQL Degenerate case --
1645
+ *
1646
+ * select { ``.``.id }; -- valid !!!
1647
+ * select { ``.``.``.id }; -- invalid
1648
+ * select ``.``.id; -- invalid
1649
+ * select { ``.id }; -- invalid
1650
+ *
1651
+ * so it appears {``.``.id} is a magic case
1652
+ * I suspect this is "current database, current table, field id"
1653
+ *
1654
+ * The folding code can't look at more than 3 tokens, and
1655
+ * I don't want to make two passes.
1656
+ *
1657
+ * Since "{ ``" so rare, we are just going to blacklist it.
1658
+ *
1659
+ * Highly likely this will need revisiting!
1660
+ *
1661
+ * CREDIT @rsalgado 2013-11-25
1662
+ */
1663
+ if (sf->tokenvec[left+1].len == 0) {
1664
+ sf->tokenvec[left+1].type = TYPE_EVIL;
1665
+ return (int)(left+2);
1666
+ }
1667
+ /* weird ODBC / MYSQL {foo expr} --> expr
1668
+ * but for this rule we just strip away the "{ foo" part
1669
+ */
1670
+ left = 0;
1671
+ pos -= 2;
1672
+ sf->stats_folds += 2;
1673
+ continue;
1674
+ } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
1675
+ pos -= 1;
1676
+ left = 0;
1677
+ sf->stats_folds += 1;
1678
+ continue;
1679
+ }
1680
+
1681
+ /* all cases of handing 2 tokens is done
1682
+ and nothing matched. Get one more token
1683
+ */
1684
+ FOLD_DEBUG;
1685
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
1686
+ sf->current = &(sf->tokenvec[pos]);
1687
+ more = libinjection_sqli_tokenize(sf);
1688
+ if (more) {
1689
+ if (sf->current->type == TYPE_COMMENT) {
1690
+ st_copy(&last_comment, sf->current);
1691
+ } else {
1692
+ last_comment.type = CHAR_NULL;
1693
+ pos += 1;
1694
+ }
1695
+ }
1696
+ }
1697
+
1698
+ /* do we have three tokens? If not then we are done */
1699
+ if (pos -left < 3) {
1700
+ left = pos;
1701
+ continue;
1702
+ }
1703
+
1704
+ /*
1705
+ * now look for three token folding
1706
+ */
1707
+ if (sf->tokenvec[left].type == TYPE_NUMBER &&
1708
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1709
+ sf->tokenvec[left+2].type == TYPE_NUMBER) {
1710
+ pos -= 2;
1711
+ left = 0;
1712
+ continue;
1713
+ } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
1714
+ sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
1715
+ sf->tokenvec[left+2].type == TYPE_OPERATOR) {
1716
+ left = 0;
1717
+ pos -= 2;
1718
+ continue;
1719
+ } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
1720
+ sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
1721
+ pos -= 2;
1722
+ left = 0;
1723
+ continue;
1724
+ } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
1725
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1726
+ (sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1727
+ sf->tokenvec[left+2].type == TYPE_NUMBER ||
1728
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1729
+ pos -= 2;
1730
+ left = 0;
1731
+ continue;
1732
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1733
+ sf->tokenvec[left].type == TYPE_NUMBER ) &&
1734
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1735
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1736
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1737
+ pos -= 2;
1738
+ left = 0;
1739
+ continue;
1740
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1741
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1742
+ sf->tokenvec[left].type == TYPE_VARIABLE ||
1743
+ sf->tokenvec[left].type == TYPE_STRING) &&
1744
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1745
+ streq(sf->tokenvec[left+1].val, "::") &&
1746
+ sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
1747
+ pos -= 2;
1748
+ left = 0;
1749
+ sf->stats_folds += 2;
1750
+ continue;
1751
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1752
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1753
+ sf->tokenvec[left].type == TYPE_STRING ||
1754
+ sf->tokenvec[left].type == TYPE_VARIABLE) &&
1755
+ sf->tokenvec[left+1].type == TYPE_COMMA &&
1756
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1757
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1758
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1759
+ sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
1760
+ pos -= 2;
1761
+ left = 0;
1762
+ continue;
1763
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
1764
+ sf->tokenvec[left].type == TYPE_GROUP ||
1765
+ sf->tokenvec[left].type == TYPE_COMMA) &&
1766
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1767
+ sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
1768
+ /* got something like SELECT + (, LIMIT + (
1769
+ * remove unary operator
1770
+ */
1771
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1772
+ pos -= 1;
1773
+ left = 0;
1774
+ continue;
1775
+ } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
1776
+ sf->tokenvec[left].type == TYPE_EXPRESSION ||
1777
+ sf->tokenvec[left].type == TYPE_GROUP ) &&
1778
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1779
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1780
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1781
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1782
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1783
+ sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
1784
+ /* remove unary operators
1785
+ * select - 1
1786
+ */
1787
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1788
+ pos -= 1;
1789
+ left = 0;
1790
+ continue;
1791
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1792
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1793
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1794
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1795
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1796
+ sf->tokenvec[left+2].type == TYPE_STRING)) {
1797
+ /*
1798
+ * interesting case turn ", -1" ->> ",1" PLUS we need to back up
1799
+ * one token if possible to see if more folding can be done
1800
+ * "1,-1" --> "1"
1801
+ */
1802
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1803
+ left = 0;
1804
+ /* pos is >= 3 so this is safe */
1805
+ assert(pos >= 3);
1806
+ pos -= 3;
1807
+ continue;
1808
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1809
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1810
+ sf->tokenvec[left+2].type == TYPE_FUNCTION) {
1811
+
1812
+ /* Separate case from above since you end up with
1813
+ * 1,-sin(1) --> 1 (1)
1814
+ * Here, just do
1815
+ * 1,-sin(1) --> 1,sin(1)
1816
+ * just remove unary operator
1817
+ */
1818
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1819
+ pos -= 1;
1820
+ left = 0;
1821
+ continue;
1822
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
1823
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1824
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1825
+ /* ignore the '.n'
1826
+ * typically is this databasename.table
1827
+ */
1828
+ assert(pos >= 3);
1829
+ pos -= 2;
1830
+ left = 0;
1831
+ continue;
1832
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
1833
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1834
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1835
+ /* select . `foo` --> select `foo` */
1836
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1837
+ pos -= 1;
1838
+ left = 0;
1839
+ continue;
1840
+ } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
1841
+ (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) &&
1842
+ (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) {
1843
+ /*
1844
+ * whats going on here
1845
+ * Some SQL functions like USER() have 0 args
1846
+ * if we get User(foo), then User is not a function
1847
+ * This should be expanded since it eliminated a lot of false
1848
+ * positives.
1849
+ */
1850
+ if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) {
1851
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1852
+ }
1853
+ }
1854
+
1855
+ /* no folding -- assume left-most token is
1856
+ is good, now use the existing 2 tokens --
1857
+ do not get another
1858
+ */
1859
+
1860
+ left += 1;
1861
+
1862
+ } /* while(1) */
1863
+
1864
+ /* if we have 4 or less tokens, and we had a comment token
1865
+ * at the end, add it back
1866
+ */
1867
+
1868
+ if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
1869
+ st_copy(&sf->tokenvec[left], &last_comment);
1870
+ left += 1;
1871
+ }
1872
+
1873
+ /* sometimes we grab a 6th token to help
1874
+ determine the type of token 5.
1875
+ */
1876
+ if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
1877
+ left = LIBINJECTION_SQLI_MAX_TOKENS;
1878
+ }
1879
+
1880
+ return (int)left;
1881
+ }
1882
+
1883
+ /* secondary api: detects SQLi in a string, GIVEN a context.
1884
+ *
1885
+ * A context can be:
1886
+ * * CHAR_NULL (\0), process as is
1887
+ * * CHAR_SINGLE ('), process pretending input started with a
1888
+ * single quote.
1889
+ * * CHAR_DOUBLE ("), process pretending input started with a
1890
+ * double quote.
1891
+ *
1892
+ */
1893
+ const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
1894
+ {
1895
+ int i;
1896
+ int tlen = 0;
1897
+
1898
+ libinjection_sqli_reset(sql_state, flags);
1899
+
1900
+ tlen = libinjection_sqli_fold(sql_state);
1901
+
1902
+ /* Check for magic PHP backquote comment
1903
+ * If:
1904
+ * * last token is of type "bareword"
1905
+ * * And is quoted in a backtick
1906
+ * * And isn't closed
1907
+ * * And it's empty?
1908
+ * Then convert it to comment
1909
+ */
1910
+ if (tlen > 2 &&
1911
+ sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
1912
+ sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
1913
+ sql_state->tokenvec[tlen-1].len == 0 &&
1914
+ sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
1915
+ sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
1916
+ }
1917
+
1918
+ for (i = 0; i < tlen; ++i) {
1919
+ sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
1920
+ }
1921
+
1922
+ /*
1923
+ * make the fingerprint pattern a c-string (null delimited)
1924
+ */
1925
+ sql_state->fingerprint[tlen] = CHAR_NULL;
1926
+
1927
+ /*
1928
+ * check for 'X' in pattern, and then
1929
+ * clear out all tokens
1930
+ *
1931
+ * this means parsing could not be done
1932
+ * accurately due to pgsql's double comments
1933
+ * or other syntax that isn't consistent.
1934
+ * Should be very rare false positive
1935
+ */
1936
+ if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
1937
+ /* needed for SWIG */
1938
+ memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
1939
+ memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
1940
+
1941
+ sql_state->fingerprint[0] = TYPE_EVIL;
1942
+
1943
+ sql_state->tokenvec[0].type = TYPE_EVIL;
1944
+ sql_state->tokenvec[0].val[0] = TYPE_EVIL;
1945
+ sql_state->tokenvec[1].type = CHAR_NULL;
1946
+ }
1947
+
1948
+
1949
+ return sql_state->fingerprint;
1950
+ }
1951
+
1952
+ int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
1953
+ {
1954
+ return libinjection_sqli_blacklist(sql_state) &&
1955
+ libinjection_sqli_not_whitelist(sql_state);
1956
+ }
1957
+
1958
+ char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
1959
+ const char* str, size_t len)
1960
+ {
1961
+ if (lookup_type == LOOKUP_FINGERPRINT) {
1962
+ return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
1963
+ } else {
1964
+ return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
1965
+ }
1966
+ }
1967
+
1968
+ int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
1969
+ {
1970
+ /*
1971
+ * use minimum of 8 bytes to make sure gcc -fstack-protector
1972
+ * works correctly
1973
+ */
1974
+ char fp2[8];
1975
+ char ch;
1976
+ size_t i;
1977
+ size_t len = strlen(sql_state->fingerprint);
1978
+ int patmatch;
1979
+
1980
+ if (len < 1) {
1981
+ sql_state->reason = __LINE__;
1982
+ return FALSE;
1983
+ }
1984
+
1985
+ /*
1986
+ to keep everything compatible, convert the
1987
+ v0 fingerprint pattern to v1
1988
+ v0: up to 5 chars, mixed case
1989
+ v1: 1 char is '0', up to 5 more chars, upper case
1990
+ */
1991
+
1992
+ fp2[0] = '0';
1993
+ for (i = 0; i < len; ++i) {
1994
+ ch = sql_state->fingerprint[i];
1995
+ if (ch >= 'a' && ch <= 'z') {
1996
+ ch -= 0x20;
1997
+ }
1998
+ fp2[i+1] = ch;
1999
+ }
2000
+ fp2[i+1] = '\0';
2001
+
2002
+ patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
2003
+
2004
+ /*
2005
+ * No match.
2006
+ *
2007
+ * Set sql_state->reason to current line number
2008
+ * only for debugging purposes.
2009
+ */
2010
+ if (!patmatch) {
2011
+ sql_state->reason = __LINE__;
2012
+ return FALSE;
2013
+ }
2014
+
2015
+ return TRUE;
2016
+ }
2017
+
2018
+ /*
2019
+ * return TRUE if SQLi, false is benign
2020
+ */
2021
+ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
2022
+ {
2023
+ /*
2024
+ * We assume we got a SQLi match
2025
+ * This next part just helps reduce false positives.
2026
+ *
2027
+ */
2028
+ char ch;
2029
+ size_t tlen = strlen(sql_state->fingerprint);
2030
+
2031
+ if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
2032
+ /*
2033
+ * if ending comment is contains 'sp_password' then it's SQLi!
2034
+ * MS Audit log apparently ignores anything with
2035
+ * 'sp_password' in it. Unable to find primary reference to
2036
+ * this "feature" of SQL Server but seems to be known SQLi
2037
+ * technique
2038
+ */
2039
+ if (my_memmem(sql_state->s, sql_state->slen,
2040
+ "sp_password", strlen("sp_password"))) {
2041
+ sql_state->reason = __LINE__;
2042
+ return TRUE;
2043
+ }
2044
+ }
2045
+
2046
+ switch (tlen) {
2047
+ case 2:{
2048
+ /*
2049
+ * case 2 are "very small SQLi" which make them
2050
+ * hard to tell from normal input...
2051
+ */
2052
+
2053
+ if (sql_state->fingerprint[1] == TYPE_UNION) {
2054
+ if (sql_state->stats_tokens == 2) {
2055
+ /* not sure why but 1U comes up in SQLi attack
2056
+ * likely part of parameter splitting/etc.
2057
+ * lots of reasons why "1 union" might be normal
2058
+ * input, so beep only if other SQLi things are present
2059
+ */
2060
+ /* it really is a number and 'union'
2061
+ * other wise it has folding or comments
2062
+ */
2063
+ sql_state->reason = __LINE__;
2064
+ return FALSE;
2065
+ } else {
2066
+ sql_state->reason = __LINE__;
2067
+ return TRUE;
2068
+ }
2069
+ }
2070
+ /*
2071
+ * if 'comment' is '#' ignore.. too many FP
2072
+ */
2073
+ if (sql_state->tokenvec[1].val[0] == '#') {
2074
+ sql_state->reason = __LINE__;
2075
+ return FALSE;
2076
+ }
2077
+
2078
+ /*
2079
+ * for fingerprint like 'nc', only comments of /x are treated
2080
+ * as SQL... ending comments of "--" and "#" are not SQLi
2081
+ */
2082
+ if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
2083
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2084
+ sql_state->tokenvec[1].val[0] != '/') {
2085
+ sql_state->reason = __LINE__;
2086
+ return FALSE;
2087
+ }
2088
+
2089
+ /*
2090
+ * if '1c' ends with '/x' then it's SQLi
2091
+ */
2092
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2093
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2094
+ sql_state->tokenvec[1].val[0] == '/') {
2095
+ return TRUE;
2096
+ }
2097
+
2098
+ /**
2099
+ * there are some odd base64-looking query string values
2100
+ * 1234-ABCDEFEhfhihwuefi--
2101
+ * which evaluate to "1c"... these are not SQLi
2102
+ * but 1234-- probably is.
2103
+ * Make sure the "1" in "1c" is actually a true decimal number
2104
+ *
2105
+ * Need to check -original- string since the folding step
2106
+ * may have merged tokens, e.g. "1+FOO" is folded into "1"
2107
+ *
2108
+ * Note: evasion: 1*1--
2109
+ */
2110
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2111
+ sql_state->tokenvec[1].type == TYPE_COMMENT) {
2112
+ if (sql_state->stats_tokens > 2) {
2113
+ /* we have some folding going on, highly likely SQLi */
2114
+ sql_state->reason = __LINE__;
2115
+ return TRUE;
2116
+ }
2117
+ /*
2118
+ * we check that next character after the number is either whitespace,
2119
+ * or '/' or a '-' ==> SQLi.
2120
+ */
2121
+ ch = sql_state->s[sql_state->tokenvec[0].len];
2122
+ if ( ch <= 32 ) {
2123
+ /* next char was whitespace,e.g. "1234 --"
2124
+ * this isn't exactly correct.. ideally we should skip over all whitespace
2125
+ * but this seems to be ok for now
2126
+ */
2127
+ return TRUE;
2128
+ }
2129
+ if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
2130
+ return TRUE;
2131
+ }
2132
+ if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
2133
+ return TRUE;
2134
+ }
2135
+
2136
+ sql_state->reason = __LINE__;
2137
+ return FALSE;
2138
+ }
2139
+
2140
+ /*
2141
+ * detect obvious SQLi scans.. many people put '--' in plain text
2142
+ * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
2143
+ */
2144
+ if ((sql_state->tokenvec[1].len > 2)
2145
+ && sql_state->tokenvec[1].val[0] == '-') {
2146
+ sql_state->reason = __LINE__;
2147
+ return FALSE;
2148
+ }
2149
+
2150
+ break;
2151
+ } /* case 2 */
2152
+ case 3:{
2153
+ /*
2154
+ * ...foo' + 'bar...
2155
+ * no opening quote, no closing quote
2156
+ * and each string has data
2157
+ */
2158
+
2159
+ if (streq(sql_state->fingerprint, "sos")
2160
+ || streq(sql_state->fingerprint, "s&s")) {
2161
+
2162
+ if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
2163
+ && (sql_state->tokenvec[2].str_close == CHAR_NULL)
2164
+ && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
2165
+ /*
2166
+ * if ....foo" + "bar....
2167
+ */
2168
+ sql_state->reason = __LINE__;
2169
+ return TRUE;
2170
+ }
2171
+ if (sql_state->stats_tokens == 3) {
2172
+ sql_state->reason = __LINE__;
2173
+ return FALSE;
2174
+ }
2175
+
2176
+ /*
2177
+ * not SQLi
2178
+ */
2179
+ sql_state->reason = __LINE__;
2180
+ return FALSE;
2181
+ } else if (streq(sql_state->fingerprint, "s&n") ||
2182
+ streq(sql_state->fingerprint, "n&1") ||
2183
+ streq(sql_state->fingerprint, "1&1") ||
2184
+ streq(sql_state->fingerprint, "1&v") ||
2185
+ streq(sql_state->fingerprint, "1&s")) {
2186
+ /* 'sexy and 17' not SQLi
2187
+ * 'sexy and 17<18' SQLi
2188
+ */
2189
+ if (sql_state->stats_tokens == 3) {
2190
+ sql_state->reason = __LINE__;
2191
+ return FALSE;
2192
+ }
2193
+ } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
2194
+ if ((sql_state->tokenvec[1].len < 5) ||
2195
+ cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
2196
+ /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
2197
+ * then treat as safe
2198
+ */
2199
+ sql_state->reason = __LINE__;
2200
+ return FALSE;
2201
+ }
2202
+ }
2203
+ break;
2204
+ } /* case 3 */
2205
+ case 4:
2206
+ case 5: {
2207
+ /* nothing right now */
2208
+ break;
2209
+ } /* case 5 */
2210
+ } /* end switch */
2211
+
2212
+ return TRUE;
2213
+ }
2214
+
2215
+ /** Main API, detects SQLi in an input.
2216
+ *
2217
+ *
2218
+ */
2219
+ static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
2220
+ {
2221
+ return sql_state->stats_comment_ddx ||
2222
+ sql_state->stats_comment_hash;
2223
+ }
2224
+
2225
+ /*
2226
+ * This function is mostly use with SWIG
2227
+ */
2228
+ struct libinjection_sqli_token*
2229
+ libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
2230
+ {
2231
+ if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
2232
+ return NULL;
2233
+ }
2234
+ return &(sql_state->tokenvec[i]);
2235
+ }
2236
+
2237
+ int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
2238
+ {
2239
+ const char *s = sql_state->s;
2240
+ size_t slen = sql_state->slen;
2241
+
2242
+ /*
2243
+ * no input? not SQLi
2244
+ */
2245
+ if (slen == 0) {
2246
+ return FALSE;
2247
+ }
2248
+
2249
+ /*
2250
+ * test input "as-is"
2251
+ */
2252
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
2253
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2254
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2255
+ return TRUE;
2256
+ } else if (reparse_as_mysql(sql_state)) {
2257
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
2258
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2259
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2260
+ return TRUE;
2261
+ }
2262
+ }
2263
+
2264
+ /*
2265
+ * if input has a single_quote, then
2266
+ * test as if input was actually '
2267
+ * example: if input if "1' = 1", then pretend it's
2268
+ * "'1' = 1"
2269
+ * Porting Notes: example the same as doing
2270
+ * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
2271
+ *
2272
+ */
2273
+ if (memchr(s, CHAR_SINGLE, slen)) {
2274
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
2275
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2276
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2277
+ return TRUE;
2278
+ } else if (reparse_as_mysql(sql_state)) {
2279
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
2280
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2281
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2282
+ return TRUE;
2283
+ }
2284
+ }
2285
+ }
2286
+
2287
+ /*
2288
+ * same as above but with a double-quote "
2289
+ */
2290
+ if (memchr(s, CHAR_DOUBLE, slen)) {
2291
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
2292
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2293
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2294
+ return TRUE;
2295
+ }
2296
+ }
2297
+
2298
+ /*
2299
+ * Hurray, input is not SQLi
2300
+ */
2301
+ return FALSE;
2302
+ }
2303
+
2304
+ int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
2305
+ {
2306
+ int issqli;
2307
+ struct libinjection_sqli_state state;
2308
+
2309
+ libinjection_sqli_init(&state, input, slen, 0);
2310
+ issqli = libinjection_is_sqli(&state);
2311
+ if (issqli) {
2312
+ strcpy(fingerprint, state.fingerprint);
2313
+ } else {
2314
+ fingerprint[0] = '\0';
2315
+ }
2316
+ return issqli;
2317
+ }