threatstack-agent-ruby 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +6 -0
  4. data/ext/libinjection/extconf.rb +4 -0
  5. data/ext/libinjection/libinjection.h +65 -0
  6. data/ext/libinjection/libinjection.i +13 -0
  7. data/ext/libinjection/libinjection_html5.c +850 -0
  8. data/ext/libinjection/libinjection_html5.h +54 -0
  9. data/ext/libinjection/libinjection_sqli.c +2325 -0
  10. data/ext/libinjection/libinjection_sqli.h +298 -0
  11. data/ext/libinjection/libinjection_sqli_data.h +9654 -0
  12. data/ext/libinjection/libinjection_wrap.c +2393 -0
  13. data/ext/libinjection/libinjection_xss.c +532 -0
  14. data/ext/libinjection/libinjection_xss.h +21 -0
  15. data/lib/constants.rb +110 -0
  16. data/lib/control.rb +61 -0
  17. data/lib/events/event_accumulator.rb +36 -0
  18. data/lib/events/models/attack_event.rb +58 -0
  19. data/lib/events/models/base_event.rb +41 -0
  20. data/lib/events/models/dependency_event.rb +93 -0
  21. data/lib/events/models/environment_event.rb +93 -0
  22. data/lib/events/models/instrumentation_event.rb +46 -0
  23. data/lib/exceptions/request_blocked_error.rb +11 -0
  24. data/lib/instrumentation/common.rb +172 -0
  25. data/lib/instrumentation/instrumenter.rb +144 -0
  26. data/lib/instrumentation/kernel.rb +45 -0
  27. data/lib/instrumentation/rails.rb +61 -0
  28. data/lib/jobs/delayed_job.rb +26 -0
  29. data/lib/jobs/event_submitter.rb +101 -0
  30. data/lib/jobs/job_queue.rb +38 -0
  31. data/lib/jobs/recurrent_job.rb +61 -0
  32. data/lib/threatstack-agent-ruby.rb +7 -0
  33. data/lib/utils/aws_utils.rb +46 -0
  34. data/lib/utils/formatter.rb +47 -0
  35. data/lib/utils/logger.rb +43 -0
  36. data/threatstack-agent-ruby.gemspec +35 -0
  37. metadata +221 -0
@@ -0,0 +1,54 @@
1
+ #ifndef LIBINJECTION_HTML5
2
+ #define LIBINJECTION_HTML5
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ /* pull in size_t */
9
+
10
+ #include <stddef.h>
11
+
12
+ enum html5_type {
13
+ DATA_TEXT
14
+ , TAG_NAME_OPEN
15
+ , TAG_NAME_CLOSE
16
+ , TAG_NAME_SELFCLOSE
17
+ , TAG_DATA
18
+ , TAG_CLOSE
19
+ , ATTR_NAME
20
+ , ATTR_VALUE
21
+ , TAG_COMMENT
22
+ , DOCTYPE
23
+ };
24
+
25
+ enum html5_flags {
26
+ DATA_STATE
27
+ , VALUE_NO_QUOTE
28
+ , VALUE_SINGLE_QUOTE
29
+ , VALUE_DOUBLE_QUOTE
30
+ , VALUE_BACK_QUOTE
31
+ };
32
+
33
+ struct h5_state;
34
+ typedef int (*ptr_html5_state)(struct h5_state*);
35
+
36
+ typedef struct h5_state {
37
+ const char* s;
38
+ size_t len;
39
+ size_t pos;
40
+ int is_close;
41
+ ptr_html5_state state;
42
+ const char* token_start;
43
+ size_t token_len;
44
+ enum html5_type token_type;
45
+ } h5_state_t;
46
+
47
+
48
+ void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags);
49
+ int libinjection_h5_next(h5_state_t* hs);
50
+
51
+ #ifdef __cplusplus
52
+ }
53
+ #endif
54
+ #endif
@@ -0,0 +1,2325 @@
1
+ /**
2
+ * Copyright 2012,2016 Nick Galbreath
3
+ * nickg@client9.com
4
+ * BSD License -- see COPYING.txt for details
5
+ *
6
+ * https://libinjection.client9.com/
7
+ *
8
+ */
9
+
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <stdio.h>
13
+ #include <ctype.h>
14
+ #include <assert.h>
15
+ #include <stddef.h>
16
+
17
+ #include "libinjection.h"
18
+ #include "libinjection_sqli.h"
19
+ #include "libinjection_sqli_data.h"
20
+
21
+ #define LIBINJECTION_VERSION "3.9.2"
22
+
23
+ #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
24
+ #define LIBINJECTION_SQLI_MAX_TOKENS 5
25
+
26
+ #ifndef TRUE
27
+ #define TRUE 1
28
+ #endif
29
+ #ifndef FALSE
30
+ #define FALSE 0
31
+ #endif
32
+
33
+ #define CHAR_NULL '\0'
34
+ #define CHAR_SINGLE '\''
35
+ #define CHAR_DOUBLE '"'
36
+ #define CHAR_TICK '`'
37
+
38
+ /* faster than calling out to libc isdigit */
39
+ #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
40
+
41
+ #if 0
42
+ #define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
43
+ #else
44
+ #define FOLD_DEBUG
45
+ #endif
46
+
47
+ /*
48
+ * not making public just yet
49
+ */
50
+ typedef enum {
51
+ TYPE_NONE = 0
52
+ , TYPE_KEYWORD = (int)'k'
53
+ , TYPE_UNION = (int)'U'
54
+ , TYPE_GROUP = (int)'B'
55
+ , TYPE_EXPRESSION = (int)'E'
56
+ , TYPE_SQLTYPE = (int)'t'
57
+ , TYPE_FUNCTION = (int)'f'
58
+ , TYPE_BAREWORD = (int)'n'
59
+ , TYPE_NUMBER = (int)'1'
60
+ , TYPE_VARIABLE = (int)'v'
61
+ , TYPE_STRING = (int)'s'
62
+ , TYPE_OPERATOR = (int)'o'
63
+ , TYPE_LOGIC_OPERATOR = (int)'&'
64
+ , TYPE_COMMENT = (int)'c'
65
+ , TYPE_COLLATE = (int)'A'
66
+ , TYPE_LEFTPARENS = (int)'('
67
+ , TYPE_RIGHTPARENS = (int)')' /* not used? */
68
+ , TYPE_LEFTBRACE = (int)'{'
69
+ , TYPE_RIGHTBRACE = (int)'}'
70
+ , TYPE_DOT = (int)'.'
71
+ , TYPE_COMMA = (int)','
72
+ , TYPE_COLON = (int)':'
73
+ , TYPE_SEMICOLON = (int)';'
74
+ , TYPE_TSQL = (int)'T' /* TSQL start */
75
+ , TYPE_UNKNOWN = (int)'?'
76
+ , TYPE_EVIL = (int)'X' /* unparsable, abort */
77
+ , TYPE_FINGERPRINT = (int)'F' /* not really a token */
78
+ , TYPE_BACKSLASH = (int)'\\'
79
+ } sqli_token_types;
80
+
81
+ /**
82
+ * Initializes parsing state
83
+ *
84
+ */
85
+ static char flag2delim(int flag)
86
+ {
87
+ if (flag & FLAG_QUOTE_SINGLE) {
88
+ return CHAR_SINGLE;
89
+ } else if (flag & FLAG_QUOTE_DOUBLE) {
90
+ return CHAR_DOUBLE;
91
+ } else {
92
+ return CHAR_NULL;
93
+ }
94
+ }
95
+
96
+ /* memchr2 finds a string of 2 characters inside another string
97
+ * This a specialized version of "memmem" or "memchr".
98
+ * 'memmem' doesn't exist on all platforms
99
+ *
100
+ * Porting notes: this is just a special version of
101
+ * astring.find("AB")
102
+ *
103
+ */
104
+ static const char *
105
+ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
106
+ {
107
+ const char *cur = haystack;
108
+ const char *last = haystack + haystack_len - 1;
109
+
110
+ if (haystack_len < 2) {
111
+ return NULL;
112
+ }
113
+
114
+ while (cur < last) {
115
+ /* safe since cur < len - 1 always */
116
+ if (cur[0] == c0 && cur[1] == c1) {
117
+ return cur;
118
+ }
119
+ cur += 1;
120
+ }
121
+
122
+ return NULL;
123
+ }
124
+
125
+ /**
126
+ * memmem might not exist on some systems
127
+ */
128
+ static const char *
129
+ my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
130
+ {
131
+ const char* cur;
132
+ const char* last;
133
+ assert(haystack);
134
+ assert(needle);
135
+ assert(nlen > 1);
136
+ last = haystack + hlen - nlen;
137
+ for (cur = haystack; cur <= last; ++cur) {
138
+ if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
139
+ return cur;
140
+ }
141
+ }
142
+ return NULL;
143
+ }
144
+
145
+ /** Find largest string containing certain characters.
146
+ *
147
+ * C Standard library 'strspn' only works for 'c-strings' (null terminated)
148
+ * This works on arbitrary length.
149
+ *
150
+ * Performance notes:
151
+ * not critical
152
+ *
153
+ * Porting notes:
154
+ * if accept is 'ABC', then this function would be similar to
155
+ * a_regexp.match(a_str, '[ABC]*'),
156
+ */
157
+ static size_t
158
+ strlenspn(const char *s, size_t len, const char *accept)
159
+ {
160
+ size_t i;
161
+ for (i = 0; i < len; ++i) {
162
+ /* likely we can do better by inlining this function
163
+ * but this works for now
164
+ */
165
+ if (strchr(accept, s[i]) == NULL) {
166
+ return i;
167
+ }
168
+ }
169
+ return len;
170
+ }
171
+
172
+ static size_t
173
+ strlencspn(const char *s, size_t len, const char *accept)
174
+ {
175
+ size_t i;
176
+ for (i = 0; i < len; ++i) {
177
+ /* likely we can do better by inlining this function
178
+ * but this works for now
179
+ */
180
+ if (strchr(accept, s[i]) != NULL) {
181
+ return i;
182
+ }
183
+ }
184
+ return len;
185
+ }
186
+ static int char_is_white(char ch) {
187
+ /* ' ' space is 0x32
188
+ '\t 0x09 \011 horizontal tab
189
+ '\n' 0x0a \012 new line
190
+ '\v' 0x0b \013 vertical tab
191
+ '\f' 0x0c \014 new page
192
+ '\r' 0x0d \015 carriage return
193
+ 0x00 \000 null (oracle)
194
+ 0xa0 \240 is Latin-1
195
+ */
196
+ return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
197
+ }
198
+
199
+ /* DANGER DANGER
200
+ * This is -very specialized function-
201
+ *
202
+ * this compares a ALL_UPPER CASE C STRING
203
+ * with a *arbitrary memory* + length
204
+ *
205
+ * Sane people would just make a copy, up-case
206
+ * and use a hash table.
207
+ *
208
+ * Required since libc version uses the current locale
209
+ * and is much slower.
210
+ */
211
+ static int cstrcasecmp(const char *a, const char *b, size_t n)
212
+ {
213
+ char cb;
214
+
215
+ for (; n > 0; a++, b++, n--) {
216
+ cb = *b;
217
+ if (cb >= 'a' && cb <= 'z') {
218
+ cb -= 0x20;
219
+ }
220
+ if (*a != cb) {
221
+ return *a - cb;
222
+ } else if (*a == '\0') {
223
+ return -1;
224
+ }
225
+ }
226
+
227
+ return (*a == 0) ? 0 : 1;
228
+ }
229
+
230
+ /**
231
+ * Case sensitive string compare.
232
+ * Here only to make code more readable
233
+ */
234
+ static int streq(const char *a, const char *b)
235
+ {
236
+ return strcmp(a, b) == 0;
237
+ }
238
+
239
+ /**
240
+ *
241
+ *
242
+ *
243
+ * Porting Notes:
244
+ * given a mapping/hash of string to char
245
+ * this is just
246
+ * typecode = mapping[key.upper()]
247
+ */
248
+
249
+ static char bsearch_keyword_type(const char *key, size_t len,
250
+ const keyword_t * keywords, size_t numb)
251
+ {
252
+ size_t pos;
253
+ size_t left = 0;
254
+ size_t right = numb - 1;
255
+
256
+ while (left < right) {
257
+ pos = (left + right) >> 1;
258
+
259
+ /* arg0 = upper case only, arg1 = mixed case */
260
+ if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
261
+ left = pos + 1;
262
+ } else {
263
+ right = pos;
264
+ }
265
+ }
266
+ if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
267
+ return keywords[left].type;
268
+ } else {
269
+ return CHAR_NULL;
270
+ }
271
+ }
272
+
273
+ static char is_keyword(const char* key, size_t len)
274
+ {
275
+ return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
276
+ }
277
+
278
+ /* st_token methods
279
+ *
280
+ * The following functions manipulates the stoken_t type
281
+ *
282
+ *
283
+ */
284
+
285
+ static void st_clear(stoken_t * st)
286
+ {
287
+ memset(st, 0, sizeof(stoken_t));
288
+ }
289
+
290
+ static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
291
+ const char value)
292
+ {
293
+ /* done to eliminate unused warning */
294
+ (void)len;
295
+ st->type = (char) stype;
296
+ st->pos = pos;
297
+ st->len = 1;
298
+ st->val[0] = value;
299
+ st->val[1] = CHAR_NULL;
300
+ }
301
+
302
+ static void st_assign(stoken_t * st, const char stype,
303
+ size_t pos, size_t len, const char* value)
304
+ {
305
+ const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
306
+ size_t last = len < MSIZE ? len : (MSIZE - 1);
307
+ st->type = (char) stype;
308
+ st->pos = pos;
309
+ st->len = last;
310
+ memcpy(st->val, value, last);
311
+ st->val[last] = CHAR_NULL;
312
+ }
313
+
314
+ static void st_copy(stoken_t * dest, const stoken_t * src)
315
+ {
316
+ memcpy(dest, src, sizeof(stoken_t));
317
+ }
318
+
319
+ static int st_is_arithmetic_op(const stoken_t* st)
320
+ {
321
+ const char ch = st->val[0];
322
+ return (st->type == TYPE_OPERATOR && st->len == 1 &&
323
+ (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
324
+ }
325
+
326
+ static int st_is_unary_op(const stoken_t * st)
327
+ {
328
+ const char* str = st->val;
329
+ const size_t len = st->len;
330
+
331
+ if (st->type != TYPE_OPERATOR) {
332
+ return FALSE;
333
+ }
334
+
335
+ switch (len) {
336
+ case 1:
337
+ return *str == '+' || *str == '-' || *str == '!' || *str == '~';
338
+ case 2:
339
+ return str[0] == '!' && str[1] == '!';
340
+ case 3:
341
+ return cstrcasecmp("NOT", str, 3) == 0;
342
+ default:
343
+ return FALSE;
344
+ }
345
+ }
346
+
347
+ /* Parsers
348
+ *
349
+ *
350
+ */
351
+
352
+ static size_t parse_white(struct libinjection_sqli_state * sf)
353
+ {
354
+ return sf->pos + 1;
355
+ }
356
+
357
+ static size_t parse_operator1(struct libinjection_sqli_state * sf)
358
+ {
359
+ const char *cs = sf->s;
360
+ size_t pos = sf->pos;
361
+
362
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
363
+ return pos + 1;
364
+ }
365
+
366
+ static size_t parse_other(struct libinjection_sqli_state * sf)
367
+ {
368
+ const char *cs = sf->s;
369
+ size_t pos = sf->pos;
370
+
371
+ st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
372
+ return pos + 1;
373
+ }
374
+
375
+ static size_t parse_char(struct libinjection_sqli_state * sf)
376
+ {
377
+ const char *cs = sf->s;
378
+ size_t pos = sf->pos;
379
+
380
+ st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
381
+ return pos + 1;
382
+ }
383
+
384
+ static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
385
+ {
386
+ const char *cs = sf->s;
387
+ const size_t slen = sf->slen;
388
+ size_t pos = sf->pos;
389
+
390
+ const char *endpos =
391
+ (const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
392
+ if (endpos == NULL) {
393
+ st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
394
+ return slen;
395
+ } else {
396
+ st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
397
+ return (size_t)((endpos - cs) + 1);
398
+ }
399
+ }
400
+
401
+ /** In ANSI mode, hash is an operator
402
+ * In MYSQL mode, it's a EOL comment like '--'
403
+ */
404
+ static size_t parse_hash(struct libinjection_sqli_state * sf)
405
+ {
406
+ sf->stats_comment_hash += 1;
407
+ if (sf->flags & FLAG_SQL_MYSQL) {
408
+ sf->stats_comment_hash += 1;
409
+ return parse_eol_comment(sf);
410
+ } else {
411
+ st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
412
+ return sf->pos + 1;
413
+ }
414
+ }
415
+
416
+ static size_t parse_dash(struct libinjection_sqli_state * sf)
417
+ {
418
+ const char *cs = sf->s;
419
+ const size_t slen = sf->slen;
420
+ size_t pos = sf->pos;
421
+
422
+ /*
423
+ * five cases
424
+ * 1) --[white] this is always a SQL comment
425
+ * 2) --[EOF] this is a comment
426
+ * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
427
+ * 4) --[notwhite] everyone else thinks this is a comment
428
+ * 5) -[not dash] '-' is a unary operator
429
+ */
430
+
431
+ if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
432
+ return parse_eol_comment(sf);
433
+ } else if (pos +2 == slen && cs[pos + 1] == '-') {
434
+ return parse_eol_comment(sf);
435
+ } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
436
+ /* --[not-white] not-white case:
437
+ *
438
+ */
439
+ sf->stats_comment_ddx += 1;
440
+ return parse_eol_comment(sf);
441
+ } else {
442
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
443
+ return pos + 1;
444
+ }
445
+ }
446
+
447
+
448
+ /** This detects MySQL comments, comments that
449
+ * start with /x! We just ban these now but
450
+ * previously we attempted to parse the inside
451
+ *
452
+ * For reference:
453
+ * the form of /x![anything]x/ or /x!12345[anything] x/
454
+ *
455
+ * Mysql 3 (maybe 4), allowed this:
456
+ * /x!0selectx/ 1;
457
+ * where 0 could be any number.
458
+ *
459
+ * The last version of MySQL 3 was in 2003.
460
+
461
+ * It is unclear if the MySQL 3 syntax was allowed
462
+ * in MySQL 4. The last version of MySQL 4 was in 2008
463
+ *
464
+ */
465
+ static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
466
+ {
467
+ /* so far...
468
+ * cs[pos] == '/' && cs[pos+1] == '*'
469
+ */
470
+
471
+ if (pos + 2 >= len) {
472
+ /* not a mysql comment */
473
+ return 0;
474
+ }
475
+
476
+ if (cs[pos + 2] != '!') {
477
+ /* not a mysql comment */
478
+ return 0;
479
+ }
480
+
481
+ /*
482
+ * this is a mysql comment
483
+ * got "/x!"
484
+ */
485
+ return 1;
486
+ }
487
+
488
+ static size_t parse_slash(struct libinjection_sqli_state * sf)
489
+ {
490
+ const char* ptr;
491
+ size_t clen;
492
+ const char *cs = sf->s;
493
+ const size_t slen = sf->slen;
494
+ size_t pos = sf->pos;
495
+ const char* cur = cs + pos;
496
+ char ctype = TYPE_COMMENT;
497
+ size_t pos1 = pos + 1;
498
+ if (pos1 == slen || cs[pos1] != '*') {
499
+ return parse_operator1(sf);
500
+ }
501
+
502
+ /*
503
+ * skip over initial '/x'
504
+ */
505
+ ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
506
+
507
+ /*
508
+ * (ptr == NULL) causes false positive in cppcheck 1.61
509
+ * casting to type seems to fix it
510
+ */
511
+ if (ptr == (const char*) NULL) {
512
+ /* till end of line */
513
+ clen = slen - pos;
514
+ } else {
515
+ clen = (size_t)(ptr + 2 - cur);
516
+ }
517
+
518
+ /*
519
+ * postgresql allows nested comments which makes
520
+ * this is incompatible with parsing so
521
+ * if we find a '/x' inside the coment, then
522
+ * make a new token.
523
+ *
524
+ * Also, Mysql's "conditional" comments for version
525
+ * are an automatic black ban!
526
+ */
527
+
528
+ if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
529
+ ctype = TYPE_EVIL;
530
+ } else if (is_mysql_comment(cs, slen, pos)) {
531
+ ctype = TYPE_EVIL;
532
+ }
533
+
534
+ st_assign(sf->current, ctype, pos, clen, cs + pos);
535
+ return pos + clen;
536
+ }
537
+
538
+
539
+ static size_t parse_backslash(struct libinjection_sqli_state * sf)
540
+ {
541
+ const char *cs = sf->s;
542
+ const size_t slen = sf->slen;
543
+ size_t pos = sf->pos;
544
+
545
+ /*
546
+ * Weird MySQL alias for NULL, "\N" (capital N only)
547
+ */
548
+ if (pos + 1 < slen && cs[pos +1] == 'N') {
549
+ st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
550
+ return pos + 2;
551
+ } else {
552
+ st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
553
+ return pos + 1;
554
+ }
555
+ }
556
+
557
+ static size_t parse_operator2(struct libinjection_sqli_state * sf)
558
+ {
559
+ char ch;
560
+ const char *cs = sf->s;
561
+ const size_t slen = sf->slen;
562
+ size_t pos = sf->pos;
563
+
564
+ if (pos + 1 >= slen) {
565
+ return parse_operator1(sf);
566
+ }
567
+
568
+ if (pos + 2 < slen &&
569
+ cs[pos] == '<' &&
570
+ cs[pos + 1] == '=' &&
571
+ cs[pos + 2] == '>') {
572
+ /*
573
+ * special 3-char operator
574
+ */
575
+ st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
576
+ return pos + 3;
577
+ }
578
+
579
+ ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
580
+ if (ch != CHAR_NULL) {
581
+ st_assign(sf->current, ch, pos, 2, cs+pos);
582
+ return pos + 2;
583
+ }
584
+
585
+ /*
586
+ * not an operator.. what to do with the two
587
+ * characters we got?
588
+ */
589
+
590
+ if (cs[pos] == ':') {
591
+ /* ':' is not an operator */
592
+ st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
593
+ return pos + 1;
594
+ } else {
595
+ /*
596
+ * must be a single char operator
597
+ */
598
+ return parse_operator1(sf);
599
+ }
600
+ }
601
+
602
+ /*
603
+ * Ok! " \" " one backslash = escaped!
604
+ * " \\" " two backslash = not escaped!
605
+ * "\\\" " three backslash = escaped!
606
+ */
607
+ static int is_backslash_escaped(const char* end, const char* start)
608
+ {
609
+ const char* ptr;
610
+ for (ptr = end; ptr >= start; ptr--) {
611
+ if (*ptr != '\\') {
612
+ break;
613
+ }
614
+ }
615
+ /* if number of backslashes is odd, it is escaped */
616
+
617
+ return (end - ptr) & 1;
618
+ }
619
+
620
+ static size_t is_double_delim_escaped(const char* cur, const char* end)
621
+ {
622
+ return ((cur + 1) < end) && *(cur+1) == *cur;
623
+ }
624
+
625
+ /* Look forward for doubling of delimiter
626
+ *
627
+ * case 'foo''bar' --> foo''bar
628
+ *
629
+ * ending quote isn't duplicated (i.e. escaped)
630
+ * since it's the wrong char or EOL
631
+ *
632
+ */
633
+ static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
634
+ stoken_t * st, char delim, size_t offset)
635
+ {
636
+ /*
637
+ * offset is to skip the perhaps first quote char
638
+ */
639
+ const char *qpos =
640
+ (const char *) memchr((const void *) (cs + pos + offset), delim,
641
+ len - pos - offset);
642
+
643
+ /*
644
+ * then keep string open/close info
645
+ */
646
+ if (offset > 0) {
647
+ /*
648
+ * this is real quote
649
+ */
650
+ st->str_open = delim;
651
+ } else {
652
+ /*
653
+ * this was a simulated quote
654
+ */
655
+ st->str_open = CHAR_NULL;
656
+ }
657
+
658
+ while (TRUE) {
659
+ if (qpos == NULL) {
660
+ /*
661
+ * string ended with no trailing quote
662
+ * assign what we have
663
+ */
664
+ st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
665
+ st->str_close = CHAR_NULL;
666
+ return len;
667
+ } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
668
+ /* keep going, move ahead one character */
669
+ qpos =
670
+ (const char *) memchr((const void *) (qpos + 1), delim,
671
+ (size_t)((cs + len) - (qpos + 1)));
672
+ continue;
673
+ } else if (is_double_delim_escaped(qpos, cs + len)) {
674
+ /* keep going, move ahead two characters */
675
+ qpos =
676
+ (const char *) memchr((const void *) (qpos + 2), delim,
677
+ (size_t)((cs + len) - (qpos + 2)));
678
+ continue;
679
+ } else {
680
+ /* hey it's a normal string */
681
+ st_assign(st, TYPE_STRING, pos + offset,
682
+ (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
683
+ st->str_close = delim;
684
+ return (size_t)(qpos - cs + 1);
685
+ }
686
+ }
687
+ }
688
+
689
+ /**
690
+ * Used when first char is a ' or "
691
+ */
692
+ static size_t parse_string(struct libinjection_sqli_state * sf)
693
+ {
694
+ const char *cs = sf->s;
695
+ const size_t slen = sf->slen;
696
+ size_t pos = sf->pos;
697
+
698
+ /*
699
+ * assert cs[pos] == single or double quote
700
+ */
701
+ return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
702
+ }
703
+
704
+ /**
705
+ * Used when first char is:
706
+ * N or n: mysql "National Character set"
707
+ * E : psql "Escaped String"
708
+ */
709
+ static size_t parse_estring(struct libinjection_sqli_state * sf)
710
+ {
711
+ const char *cs = sf->s;
712
+ const size_t slen = sf->slen;
713
+ size_t pos = sf->pos;
714
+
715
+ if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
716
+ return parse_word(sf);
717
+ }
718
+ return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
719
+ }
720
+
721
+ static size_t parse_ustring(struct libinjection_sqli_state * sf)
722
+ {
723
+ const char *cs = sf->s;
724
+ size_t slen = sf->slen;
725
+ size_t pos = sf->pos;
726
+
727
+ if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
728
+ sf->pos += 2;
729
+ pos = parse_string(sf);
730
+ sf->current->str_open = 'u';
731
+ if (sf->current->str_close == '\'') {
732
+ sf->current->str_close = 'u';
733
+ }
734
+ return pos;
735
+ } else {
736
+ return parse_word(sf);
737
+ }
738
+ }
739
+
740
+ static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
741
+ {
742
+ char ch;
743
+ const char *strend;
744
+ const char *cs = sf->s;
745
+ size_t slen = sf->slen;
746
+ size_t pos = sf->pos + offset;
747
+
748
+ /* if we are already at end of string..
749
+ if current char is not q or Q
750
+ if we don't have 2 more chars
751
+ if char2 != a single quote
752
+ then, just treat as word
753
+ */
754
+ if (pos >= slen ||
755
+ (cs[pos] != 'q' && cs[pos] != 'Q') ||
756
+ pos + 2 >= slen ||
757
+ cs[pos + 1] != '\'') {
758
+ return parse_word(sf);
759
+ }
760
+
761
+ ch = cs[pos + 2];
762
+
763
+ /* the ch > 127 is un-needed since
764
+ * we assume char is signed
765
+ */
766
+ if (ch < 33 /* || ch > 127 */) {
767
+ return parse_word(sf);
768
+ }
769
+ switch (ch) {
770
+ case '(' : ch = ')'; break;
771
+ case '[' : ch = ']'; break;
772
+ case '{' : ch = '}'; break;
773
+ case '<' : ch = '>'; break;
774
+ }
775
+
776
+ strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
777
+ if (strend == NULL) {
778
+ st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
779
+ sf->current->str_open = 'q';
780
+ sf->current->str_close = CHAR_NULL;
781
+ return slen;
782
+ } else {
783
+ st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
784
+ sf->current->str_open = 'q';
785
+ sf->current->str_close = 'q';
786
+ return (size_t)(strend - cs + 2);
787
+ }
788
+ }
789
+
790
+ /*
791
+ * Oracle's q string
792
+ */
793
+ static size_t parse_qstring(struct libinjection_sqli_state * sf)
794
+ {
795
+ return parse_qstring_core(sf, 0);
796
+ }
797
+
798
+ /*
799
+ * mysql's N'STRING' or
800
+ * ... Oracle's nq string
801
+ */
802
+ static size_t parse_nqstring(struct libinjection_sqli_state * sf)
803
+ {
804
+ size_t slen = sf->slen;
805
+ size_t pos = sf->pos;
806
+ if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
807
+ return parse_estring(sf);
808
+ }
809
+ return parse_qstring_core(sf, 1);
810
+ }
811
+
812
+ /*
813
+ * binary literal string
814
+ * re: [bB]'[01]*'
815
+ */
816
+ static size_t parse_bstring(struct libinjection_sqli_state *sf)
817
+ {
818
+ size_t wlen;
819
+ const char *cs = sf->s;
820
+ size_t pos = sf->pos;
821
+ size_t slen = sf->slen;
822
+
823
+ /* need at least 2 more characters
824
+ * if next char isn't a single quote, then
825
+ * continue as normal word
826
+ */
827
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
828
+ return parse_word(sf);
829
+ }
830
+
831
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
832
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
833
+ return parse_word(sf);
834
+ }
835
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
836
+ return pos + 2 + wlen + 1;
837
+ }
838
+
839
+ /*
840
+ * hex literal string
841
+ * re: [xX]'[0123456789abcdefABCDEF]*'
842
+ * mysql has requirement of having EVEN number of chars,
843
+ * but pgsql does not
844
+ */
845
+ static size_t parse_xstring(struct libinjection_sqli_state *sf)
846
+ {
847
+ size_t wlen;
848
+ const char *cs = sf->s;
849
+ size_t pos = sf->pos;
850
+ size_t slen = sf->slen;
851
+
852
+ /* need at least 2 more characters
853
+ * if next char isn't a single quote, then
854
+ * continue as normal word
855
+ */
856
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
857
+ return parse_word(sf);
858
+ }
859
+
860
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
861
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
862
+ return parse_word(sf);
863
+ }
864
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
865
+ return pos + 2 + wlen + 1;
866
+ }
867
+
868
+ /**
869
+ * This handles MS SQLSERVER bracket words
870
+ * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
871
+ *
872
+ */
873
+ static size_t parse_bword(struct libinjection_sqli_state * sf)
874
+ {
875
+ const char *cs = sf->s;
876
+ size_t pos = sf->pos;
877
+ const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
878
+ if (endptr == NULL) {
879
+ st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
880
+ return sf->slen;
881
+ } else {
882
+ st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
883
+ return (size_t)((endptr - cs) + 1);
884
+ }
885
+ }
886
+
887
+ static size_t parse_word(struct libinjection_sqli_state * sf)
888
+ {
889
+ char ch;
890
+ char delim;
891
+ size_t i;
892
+ const char *cs = sf->s;
893
+ size_t pos = sf->pos;
894
+ size_t wlen = strlencspn(cs + pos, sf->slen - pos,
895
+ " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
896
+
897
+ st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
898
+
899
+ /* now we need to look inside what we good for "." and "`"
900
+ * and see if what is before is a keyword or not
901
+ */
902
+ for (i =0; i < sf->current->len; ++i) {
903
+ delim = sf->current->val[i];
904
+ if (delim == '.' || delim == '`') {
905
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
906
+ if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
907
+ /* needed for swig */
908
+ st_clear(sf->current);
909
+ /*
910
+ * we got something like "SELECT.1"
911
+ * or SELECT`column`
912
+ */
913
+ st_assign(sf->current, ch, pos, i, cs + pos);
914
+ return pos + i;
915
+ }
916
+ }
917
+ }
918
+
919
+ /*
920
+ * do normal lookup with word including '.'
921
+ */
922
+ if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
923
+
924
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
925
+ if (ch == CHAR_NULL) {
926
+ ch = TYPE_BAREWORD;
927
+ }
928
+ sf->current->type = ch;
929
+ }
930
+ return pos + wlen;
931
+ }
932
+
933
+ /* MySQL backticks are a cross between string and
934
+ * and a bare word.
935
+ *
936
+ */
937
+ static size_t parse_tick(struct libinjection_sqli_state* sf)
938
+ {
939
+ size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
940
+
941
+ /* we could check to see if start and end of
942
+ * of string are both "`", i.e. make sure we have
943
+ * matching set. `foo` vs. `foo
944
+ * but I don't think it matters much
945
+ */
946
+
947
+ /* check value of string to see if it's a keyword,
948
+ * function, operator, etc
949
+ */
950
+ char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
951
+ if (ch == TYPE_FUNCTION) {
952
+ /* if it's a function, then convert token */
953
+ sf->current->type = TYPE_FUNCTION;
954
+ } else {
955
+ /* otherwise it's a 'n' type -- mysql treats
956
+ * everything as a bare word
957
+ */
958
+ sf->current->type = TYPE_BAREWORD;
959
+ }
960
+ return pos;
961
+ }
962
+
963
+ static size_t parse_var(struct libinjection_sqli_state * sf)
964
+ {
965
+ size_t xlen;
966
+ const char *cs = sf->s;
967
+ const size_t slen = sf->slen;
968
+ size_t pos = sf->pos + 1;
969
+
970
+ /*
971
+ * var_count is only used to reconstruct
972
+ * the input. It counts the number of '@'
973
+ * seen 0 in the case of NULL, 1 or 2
974
+ */
975
+
976
+ /*
977
+ * move past optional other '@'
978
+ */
979
+ if (pos < slen && cs[pos] == '@') {
980
+ pos += 1;
981
+ sf->current->count = 2;
982
+ } else {
983
+ sf->current->count = 1;
984
+ }
985
+
986
+ /*
987
+ * MySQL allows @@`version`
988
+ */
989
+ if (pos < slen) {
990
+ if (cs[pos] == '`') {
991
+ sf->pos = pos;
992
+ pos = parse_tick(sf);
993
+ sf->current->type = TYPE_VARIABLE;
994
+ return pos;
995
+ } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
996
+ sf->pos = pos;
997
+ pos = parse_string(sf);
998
+ sf->current->type = TYPE_VARIABLE;
999
+ return pos;
1000
+ }
1001
+ }
1002
+
1003
+
1004
+ xlen = strlencspn(cs + pos, slen - pos,
1005
+ " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
1006
+ if (xlen == 0) {
1007
+ st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
1008
+ return pos;
1009
+ } else {
1010
+ st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
1011
+ return pos + xlen;
1012
+ }
1013
+ }
1014
+
1015
+ static size_t parse_money(struct libinjection_sqli_state *sf)
1016
+ {
1017
+ size_t xlen;
1018
+ const char* strend;
1019
+ const char *cs = sf->s;
1020
+ const size_t slen = sf->slen;
1021
+ size_t pos = sf->pos;
1022
+
1023
+ if (pos + 1 == slen) {
1024
+ /* end of line */
1025
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1026
+ return slen;
1027
+ }
1028
+
1029
+ /*
1030
+ * $1,000.00 or $1.000,00 ok!
1031
+ * This also parses $....,,,111 but that's ok
1032
+ */
1033
+
1034
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
1035
+ if (xlen == 0) {
1036
+ if (cs[pos + 1] == '$') {
1037
+ /* we have $$ .. find ending $$ and make string */
1038
+ strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
1039
+ if (strend == NULL) {
1040
+ /* fell off edge */
1041
+ st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
1042
+ sf->current->str_open = '$';
1043
+ sf->current->str_close = CHAR_NULL;
1044
+ return slen;
1045
+ } else {
1046
+ st_assign(sf->current, TYPE_STRING, pos + 2,
1047
+ (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
1048
+ sf->current->str_open = '$';
1049
+ sf->current->str_close = '$';
1050
+ return (size_t)(strend - cs + 2);
1051
+ }
1052
+ } else {
1053
+ /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
1054
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1055
+ if (xlen == 0) {
1056
+ /* hmm it's "$" _something_ .. just add $ and keep going*/
1057
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1058
+ return pos + 1;
1059
+ }
1060
+ /* we have $foobar????? */
1061
+ /* is it $foobar$ */
1062
+ if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
1063
+ /* not $foobar$, or fell off edge */
1064
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1065
+ return pos + 1;
1066
+ }
1067
+
1068
+ /* we have $foobar$ ... find it again */
1069
+ strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
1070
+
1071
+ if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) {
1072
+ /* fell off edge */
1073
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
1074
+ sf->current->str_open = '$';
1075
+ sf->current->str_close = CHAR_NULL;
1076
+ return slen;
1077
+ } else {
1078
+ /* got one */
1079
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2,
1080
+ (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
1081
+ sf->current->str_open = '$';
1082
+ sf->current->str_close = '$';
1083
+ return (size_t)((strend + xlen + 2) - cs);
1084
+ }
1085
+ }
1086
+ } else if (xlen == 1 && cs[pos + 1] == '.') {
1087
+ /* $. should parsed as a word */
1088
+ return parse_word(sf);
1089
+ } else {
1090
+ st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
1091
+ return pos + 1 + xlen;
1092
+ }
1093
+ }
1094
+
1095
+ static size_t parse_number(struct libinjection_sqli_state * sf)
1096
+ {
1097
+ size_t xlen;
1098
+ size_t start;
1099
+ const char* digits = NULL;
1100
+ const char *cs = sf->s;
1101
+ const size_t slen = sf->slen;
1102
+ size_t pos = sf->pos;
1103
+ int have_e = 0;
1104
+ int have_exp = 0;
1105
+
1106
+ /* cs[pos] == '0' has 1/10 chance of being true,
1107
+ * while pos+1< slen is almost always true
1108
+ */
1109
+ if (cs[pos] == '0' && pos + 1 < slen) {
1110
+ if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
1111
+ digits = "0123456789ABCDEFabcdef";
1112
+ } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
1113
+ digits = "01";
1114
+ }
1115
+
1116
+ if (digits) {
1117
+ xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
1118
+ if (xlen == 0) {
1119
+ st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
1120
+ return pos + 2;
1121
+ } else {
1122
+ st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
1123
+ return pos + 2 + xlen;
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+ start = pos;
1129
+ while (pos < slen && ISDIGIT(cs[pos])) {
1130
+ pos += 1;
1131
+ }
1132
+
1133
+ if (pos < slen && cs[pos] == '.') {
1134
+ pos += 1;
1135
+ while (pos < slen && ISDIGIT(cs[pos])) {
1136
+ pos += 1;
1137
+ }
1138
+ if (pos - start == 1) {
1139
+ /* only one character read so far */
1140
+ st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
1141
+ return pos;
1142
+ }
1143
+ }
1144
+
1145
+ if (pos < slen) {
1146
+ if (cs[pos] == 'E' || cs[pos] == 'e') {
1147
+ have_e = 1;
1148
+ pos += 1;
1149
+ if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
1150
+ pos += 1;
1151
+ }
1152
+ while (pos < slen && ISDIGIT(cs[pos])) {
1153
+ have_exp = 1;
1154
+ pos += 1;
1155
+ }
1156
+ }
1157
+ }
1158
+
1159
+ /* oracle's ending float or double suffix
1160
+ * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
1161
+ */
1162
+ if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
1163
+ if (pos + 1 == slen) {
1164
+ /* line ends evaluate "... 1.2f$" as '1.2f' */
1165
+ pos += 1;
1166
+ } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
1167
+ /*
1168
+ * easy case, evaluate "... 1.2f ... as '1.2f'
1169
+ */
1170
+ pos += 1;
1171
+ } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
1172
+ /*
1173
+ * a bit of a hack but makes '1fUNION' parse as '1f UNION'
1174
+ */
1175
+ pos += 1;
1176
+ } else {
1177
+ /* it's like "123FROM" */
1178
+ /* parse as "123" only */
1179
+ }
1180
+ }
1181
+
1182
+ if (have_e == 1 && have_exp == 0) {
1183
+ /* very special form of
1184
+ * "1234.e"
1185
+ * "10.10E"
1186
+ * ".E"
1187
+ * this is a WORD not a number!! */
1188
+ st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
1189
+ } else {
1190
+ st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
1191
+ }
1192
+ return pos;
1193
+ }
1194
+
1195
+ /*
1196
+ * API to return version. This allows us to increment the version
1197
+ * without having to regenerated the SWIG (or other binding) in minor
1198
+ * releases.
1199
+ */
1200
+ const char* libinjection_version()
1201
+ {
1202
+ return LIBINJECTION_VERSION;
1203
+ }
1204
+
1205
+ int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
1206
+ {
1207
+ pt2Function fnptr;
1208
+ size_t *pos = &sf->pos;
1209
+ stoken_t *current = sf->current;
1210
+ const char *s = sf->s;
1211
+ const size_t slen = sf->slen;
1212
+
1213
+ if (slen == 0) {
1214
+ return FALSE;
1215
+ }
1216
+
1217
+ st_clear(current);
1218
+ sf->current = current;
1219
+
1220
+ /*
1221
+ * if we are at beginning of string
1222
+ * and in single-quote or double quote mode
1223
+ * then pretend the input starts with a quote
1224
+ */
1225
+ if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
1226
+ *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
1227
+ sf->stats_tokens += 1;
1228
+ return TRUE;
1229
+ }
1230
+
1231
+ while (*pos < slen) {
1232
+
1233
+ /*
1234
+ * get current character
1235
+ */
1236
+ const unsigned char ch = (unsigned char) (s[*pos]);
1237
+
1238
+ /*
1239
+ * look up the parser, and call it
1240
+ *
1241
+ * Porting Note: this is mapping of char to function
1242
+ * charparsers[ch]()
1243
+ */
1244
+ fnptr = char_parse_map[ch];
1245
+
1246
+ *pos = (*fnptr) (sf);
1247
+
1248
+ /*
1249
+ *
1250
+ */
1251
+ if (current->type != CHAR_NULL) {
1252
+ sf->stats_tokens += 1;
1253
+ return TRUE;
1254
+ }
1255
+ }
1256
+ return FALSE;
1257
+ }
1258
+
1259
+ void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
1260
+ {
1261
+ if (flags == 0) {
1262
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1263
+ }
1264
+
1265
+ memset(sf, 0, sizeof(struct libinjection_sqli_state));
1266
+ sf->s = s;
1267
+ sf->slen = len;
1268
+ sf->lookup = libinjection_sqli_lookup_word;
1269
+ sf->userdata = 0;
1270
+ sf->flags = flags;
1271
+ sf->current = &(sf->tokenvec[0]);
1272
+ }
1273
+
1274
+ void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
1275
+ {
1276
+ void *userdata = sf->userdata;
1277
+ ptr_lookup_fn lookup = sf->lookup;;
1278
+
1279
+ if (flags == 0) {
1280
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1281
+ }
1282
+ libinjection_sqli_init(sf, sf->s, sf->slen, flags);
1283
+ sf->lookup = lookup;
1284
+ sf->userdata = userdata;
1285
+ }
1286
+
1287
+ void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
1288
+ {
1289
+ if (fn == NULL) {
1290
+ sf->lookup = libinjection_sqli_lookup_word;
1291
+ sf->userdata = (void*)(NULL);
1292
+ } else {
1293
+ sf->lookup = fn;
1294
+ sf->userdata = userdata;
1295
+ }
1296
+ }
1297
+
1298
+ /** See if two tokens can be merged since they are compound SQL phrases.
1299
+ *
1300
+ * This takes two tokens, and, if they are the right type,
1301
+ * merges their values together. Then checks to see if the
1302
+ * new value is special using the PHRASES mapping.
1303
+ *
1304
+ * Example: "UNION" + "ALL" ==> "UNION ALL"
1305
+ *
1306
+ * C Security Notes: this is safe to use C-strings (null-terminated)
1307
+ * since the types involved by definition do not have embedded nulls
1308
+ * (e.g. there is no keyword with embedded null)
1309
+ *
1310
+ * Porting Notes: since this is C, it's oddly complicated.
1311
+ * This is just: multikeywords[token.value + ' ' + token2.value]
1312
+ *
1313
+ */
1314
+ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
1315
+ {
1316
+ size_t sz1;
1317
+ size_t sz2;
1318
+ size_t sz3;
1319
+ char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
1320
+ char ch;
1321
+
1322
+ /* first token is of right type? */
1323
+ if (!
1324
+ (a->type == TYPE_KEYWORD ||
1325
+ a->type == TYPE_BAREWORD ||
1326
+ a->type == TYPE_OPERATOR ||
1327
+ a->type == TYPE_UNION ||
1328
+ a->type == TYPE_FUNCTION ||
1329
+ a->type == TYPE_EXPRESSION ||
1330
+ a->type == TYPE_TSQL ||
1331
+ a->type == TYPE_SQLTYPE)) {
1332
+ return FALSE;
1333
+ }
1334
+
1335
+ if (!
1336
+ (b->type == TYPE_KEYWORD ||
1337
+ b->type == TYPE_BAREWORD ||
1338
+ b->type == TYPE_OPERATOR ||
1339
+ b->type == TYPE_UNION ||
1340
+ b->type == TYPE_FUNCTION ||
1341
+ b->type == TYPE_EXPRESSION ||
1342
+ b->type == TYPE_TSQL ||
1343
+ b->type == TYPE_SQLTYPE ||
1344
+ b->type == TYPE_LOGIC_OPERATOR)) {
1345
+ return FALSE;
1346
+ }
1347
+
1348
+ sz1 = a->len;
1349
+ sz2 = b->len;
1350
+ sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
1351
+ if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
1352
+ return FALSE;
1353
+ }
1354
+ /*
1355
+ * oddly annoying last.val + ' ' + current.val
1356
+ */
1357
+ memcpy(tmp, a->val, sz1);
1358
+ tmp[sz1] = ' ';
1359
+ memcpy(tmp + sz1 + 1, b->val, sz2);
1360
+ tmp[sz3] = CHAR_NULL;
1361
+ ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
1362
+
1363
+ if (ch != CHAR_NULL) {
1364
+ st_assign(a, ch, a->pos, sz3, tmp);
1365
+ return TRUE;
1366
+ } else {
1367
+ return FALSE;
1368
+ }
1369
+ }
1370
+
1371
+ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
1372
+ {
1373
+ stoken_t last_comment;
1374
+
1375
+ /* POS is the position of where the NEXT token goes */
1376
+ size_t pos = 0;
1377
+
1378
+ /* LEFT is a count of how many tokens that are already
1379
+ folded or processed (i.e. part of the fingerprint) */
1380
+ size_t left = 0;
1381
+
1382
+ int more = 1;
1383
+
1384
+ st_clear(&last_comment);
1385
+
1386
+ /* Skip all initial comments, right-parens ( and unary operators
1387
+ *
1388
+ */
1389
+ sf->current = &(sf->tokenvec[0]);
1390
+ while (more) {
1391
+ more = libinjection_sqli_tokenize(sf);
1392
+ if ( ! (sf->current->type == TYPE_COMMENT ||
1393
+ sf->current->type == TYPE_LEFTPARENS ||
1394
+ sf->current->type == TYPE_SQLTYPE ||
1395
+ st_is_unary_op(sf->current))) {
1396
+ break;
1397
+ }
1398
+ }
1399
+
1400
+ if (! more) {
1401
+ /* If input was only comments, unary or (, then exit */
1402
+ return 0;
1403
+ } else {
1404
+ /* it's some other token */
1405
+ pos += 1;
1406
+ }
1407
+
1408
+ while (1) {
1409
+ FOLD_DEBUG;
1410
+
1411
+ /* do we have all the max number of tokens? if so do
1412
+ * some special cases for 5 tokens
1413
+ */
1414
+ if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
1415
+ if (
1416
+ (
1417
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1418
+ (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
1419
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1420
+ sf->tokenvec[3].type == TYPE_NUMBER &&
1421
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1422
+ ) ||
1423
+ (
1424
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1425
+ sf->tokenvec[1].type == TYPE_OPERATOR &&
1426
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1427
+ (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
1428
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1429
+ ) ||
1430
+ (
1431
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1432
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1433
+ sf->tokenvec[2].type == TYPE_COMMA &&
1434
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1435
+ sf->tokenvec[4].type == TYPE_NUMBER
1436
+ ) ||
1437
+ (
1438
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1439
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1440
+ sf->tokenvec[2].type == TYPE_OPERATOR &&
1441
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1442
+ sf->tokenvec[4].type == TYPE_BAREWORD
1443
+ )
1444
+ )
1445
+ {
1446
+ if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
1447
+ st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
1448
+ pos = 2;
1449
+ left = 0;
1450
+ } else {
1451
+ pos = 1;
1452
+ left = 0;
1453
+ }
1454
+ }
1455
+ }
1456
+
1457
+ if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
1458
+ left = pos;
1459
+ break;
1460
+ }
1461
+
1462
+ /* get up to two tokens */
1463
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
1464
+ sf->current = &(sf->tokenvec[pos]);
1465
+ more = libinjection_sqli_tokenize(sf);
1466
+ if (more) {
1467
+ if (sf->current->type == TYPE_COMMENT) {
1468
+ st_copy(&last_comment, sf->current);
1469
+ } else {
1470
+ last_comment.type = CHAR_NULL;
1471
+ pos += 1;
1472
+ }
1473
+ }
1474
+ }
1475
+ FOLD_DEBUG;
1476
+ /* did we get 2 tokens? if not then we are done */
1477
+ if (pos - left < 2) {
1478
+ left = pos;
1479
+ continue;
1480
+ }
1481
+
1482
+ /* FOLD: "ss" -> "s"
1483
+ * "foo" "bar" is valid SQL
1484
+ * just ignore second string
1485
+ */
1486
+ if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
1487
+ pos -= 1;
1488
+ sf->stats_folds += 1;
1489
+ continue;
1490
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
1491
+ /* not sure how various engines handle
1492
+ * 'select 1;;drop table foo' or
1493
+ * 'select 1; /x foo x/; drop table foo'
1494
+ * to prevent surprises, just fold away repeated semicolons
1495
+ */
1496
+ pos -= 1;
1497
+ sf->stats_folds += 1;
1498
+ continue;
1499
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
1500
+ sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
1501
+ (st_is_unary_op(&sf->tokenvec[left+1]) ||
1502
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
1503
+ pos -= 1;
1504
+ sf->stats_folds += 1;
1505
+ left = 0;
1506
+ continue;
1507
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1508
+ st_is_unary_op(&sf->tokenvec[left+1])) {
1509
+ pos -= 1;
1510
+ sf->stats_folds += 1;
1511
+ if (left > 0) {
1512
+ left -= 1;
1513
+ }
1514
+ continue;
1515
+ } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
1516
+ pos -= 1;
1517
+ sf->stats_folds += 1;
1518
+ if (left > 0) {
1519
+ left -= 1;
1520
+ }
1521
+ continue;
1522
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1523
+ sf->tokenvec[left+1].type == TYPE_FUNCTION &&
1524
+ (sf->tokenvec[left+1].val[0] == 'I' ||
1525
+ sf->tokenvec[left+1].val[0] == 'i' ) &&
1526
+ (sf->tokenvec[left+1].val[1] == 'F' ||
1527
+ sf->tokenvec[left+1].val[1] == 'f' )) {
1528
+ /* IF is normally a function, except in Transact-SQL where it can be used as a
1529
+ * standalone control flow operator, e.g. ; IF 1=1 ...
1530
+ * if found after a semicolon, convert from 'f' type to 'T' type
1531
+ */
1532
+ sf->tokenvec[left+1].type = TYPE_TSQL;
1533
+ /* left += 2; */
1534
+ continue; /* reparse everything, but we probably can advance left, and pos */
1535
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
1536
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
1537
+ /* TSQL functions but common enough to be column names */
1538
+ cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1539
+ cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1540
+
1541
+ /* Function in MYSQL */
1542
+ cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1543
+ cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1544
+ cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1545
+
1546
+ /* Mysql words that act as a variable and are a function */
1547
+
1548
+ /* TSQL current_users is fake-variable */
1549
+ /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
1550
+ cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1551
+ cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1552
+ cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1553
+ cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1554
+ cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1555
+ cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1556
+ )) {
1557
+
1558
+ /* pos is the same
1559
+ * other conversions need to go here... for instance
1560
+ * password CAN be a function, coalesce CAN be a function
1561
+ */
1562
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1563
+ continue;
1564
+ } else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
1565
+ cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1566
+ cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1567
+ )) {
1568
+
1569
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1570
+ /* got .... IN ( ... (or 'NOT IN')
1571
+ * it's an operator
1572
+ */
1573
+ sf->tokenvec[left].type = TYPE_OPERATOR;
1574
+ } else {
1575
+ /*
1576
+ * it's a nothing
1577
+ */
1578
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1579
+ }
1580
+
1581
+ /* "IN" can be used as "IN BOOLEAN MODE" for mysql
1582
+ * in which case merging of words can be done later
1583
+ * other wise it acts as an equality operator __ IN (values..)
1584
+ *
1585
+ * here we got "IN" "(" so it's an operator.
1586
+ * also back track to handle "NOT IN"
1587
+ * might need to do the same with like
1588
+ * two use cases "foo" LIKE "BAR" (normal operator)
1589
+ * "foo" = LIKE(1,2)
1590
+ */
1591
+ continue;
1592
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
1593
+ cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1594
+ cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
1595
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1596
+ /* SELECT LIKE(...
1597
+ * it's a function
1598
+ */
1599
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1600
+ }
1601
+ } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
1602
+ (sf->tokenvec[left+1].type == TYPE_BAREWORD ||
1603
+ sf->tokenvec[left+1].type == TYPE_NUMBER ||
1604
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
1605
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
1606
+ sf->tokenvec[left+1].type == TYPE_FUNCTION ||
1607
+ sf->tokenvec[left+1].type == TYPE_VARIABLE ||
1608
+ sf->tokenvec[left+1].type == TYPE_STRING)) {
1609
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1610
+ pos -= 1;
1611
+ sf->stats_folds += 1;
1612
+ left = 0;
1613
+ continue;
1614
+ } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
1615
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1616
+ /*
1617
+ * there are too many collation types.. so if the bareword has a "_"
1618
+ * then it's TYPE_SQLTYPE
1619
+ */
1620
+ if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
1621
+ sf->tokenvec[left+1].type = TYPE_SQLTYPE;
1622
+ left = 0;
1623
+ }
1624
+ } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
1625
+ if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
1626
+ /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
1627
+ sf->tokenvec[left].type = TYPE_NUMBER;
1628
+ } else {
1629
+ /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
1630
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1631
+ pos -= 1;
1632
+ sf->stats_folds += 1;
1633
+ }
1634
+ left = 0;
1635
+ continue;
1636
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1637
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1638
+ pos -= 1;
1639
+ left = 0;
1640
+ sf->stats_folds += 1;
1641
+ continue;
1642
+ } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
1643
+ sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
1644
+ pos -= 1;
1645
+ left = 0;
1646
+ sf->stats_folds += 1;
1647
+ continue;
1648
+ } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
1649
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1650
+
1651
+ /*
1652
+ * MySQL Degenerate case --
1653
+ *
1654
+ * select { ``.``.id }; -- valid !!!
1655
+ * select { ``.``.``.id }; -- invalid
1656
+ * select ``.``.id; -- invalid
1657
+ * select { ``.id }; -- invalid
1658
+ *
1659
+ * so it appears {``.``.id} is a magic case
1660
+ * I suspect this is "current database, current table, field id"
1661
+ *
1662
+ * The folding code can't look at more than 3 tokens, and
1663
+ * I don't want to make two passes.
1664
+ *
1665
+ * Since "{ ``" so rare, we are just going to blacklist it.
1666
+ *
1667
+ * Highly likely this will need revisiting!
1668
+ *
1669
+ * CREDIT @rsalgado 2013-11-25
1670
+ */
1671
+ if (sf->tokenvec[left+1].len == 0) {
1672
+ sf->tokenvec[left+1].type = TYPE_EVIL;
1673
+ return (int)(left+2);
1674
+ }
1675
+ /* weird ODBC / MYSQL {foo expr} --> expr
1676
+ * but for this rule we just strip away the "{ foo" part
1677
+ */
1678
+ left = 0;
1679
+ pos -= 2;
1680
+ sf->stats_folds += 2;
1681
+ continue;
1682
+ } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
1683
+ pos -= 1;
1684
+ left = 0;
1685
+ sf->stats_folds += 1;
1686
+ continue;
1687
+ }
1688
+
1689
+ /* all cases of handing 2 tokens is done
1690
+ and nothing matched. Get one more token
1691
+ */
1692
+ FOLD_DEBUG;
1693
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
1694
+ sf->current = &(sf->tokenvec[pos]);
1695
+ more = libinjection_sqli_tokenize(sf);
1696
+ if (more) {
1697
+ if (sf->current->type == TYPE_COMMENT) {
1698
+ st_copy(&last_comment, sf->current);
1699
+ } else {
1700
+ last_comment.type = CHAR_NULL;
1701
+ pos += 1;
1702
+ }
1703
+ }
1704
+ }
1705
+
1706
+ /* do we have three tokens? If not then we are done */
1707
+ if (pos -left < 3) {
1708
+ left = pos;
1709
+ continue;
1710
+ }
1711
+
1712
+ /*
1713
+ * now look for three token folding
1714
+ */
1715
+ if (sf->tokenvec[left].type == TYPE_NUMBER &&
1716
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1717
+ sf->tokenvec[left+2].type == TYPE_NUMBER) {
1718
+ pos -= 2;
1719
+ left = 0;
1720
+ continue;
1721
+ } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
1722
+ sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
1723
+ sf->tokenvec[left+2].type == TYPE_OPERATOR) {
1724
+ left = 0;
1725
+ pos -= 2;
1726
+ continue;
1727
+ } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
1728
+ sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
1729
+ pos -= 2;
1730
+ left = 0;
1731
+ continue;
1732
+ } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
1733
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1734
+ (sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1735
+ sf->tokenvec[left+2].type == TYPE_NUMBER ||
1736
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1737
+ pos -= 2;
1738
+ left = 0;
1739
+ continue;
1740
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1741
+ sf->tokenvec[left].type == TYPE_NUMBER ) &&
1742
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1743
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1744
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1745
+ pos -= 2;
1746
+ left = 0;
1747
+ continue;
1748
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1749
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1750
+ sf->tokenvec[left].type == TYPE_VARIABLE ||
1751
+ sf->tokenvec[left].type == TYPE_STRING) &&
1752
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1753
+ streq(sf->tokenvec[left+1].val, "::") &&
1754
+ sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
1755
+ pos -= 2;
1756
+ left = 0;
1757
+ sf->stats_folds += 2;
1758
+ continue;
1759
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1760
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1761
+ sf->tokenvec[left].type == TYPE_STRING ||
1762
+ sf->tokenvec[left].type == TYPE_VARIABLE) &&
1763
+ sf->tokenvec[left+1].type == TYPE_COMMA &&
1764
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1765
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1766
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1767
+ sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
1768
+ pos -= 2;
1769
+ left = 0;
1770
+ continue;
1771
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
1772
+ sf->tokenvec[left].type == TYPE_GROUP ||
1773
+ sf->tokenvec[left].type == TYPE_COMMA) &&
1774
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1775
+ sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
1776
+ /* got something like SELECT + (, LIMIT + (
1777
+ * remove unary operator
1778
+ */
1779
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1780
+ pos -= 1;
1781
+ left = 0;
1782
+ continue;
1783
+ } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
1784
+ sf->tokenvec[left].type == TYPE_EXPRESSION ||
1785
+ sf->tokenvec[left].type == TYPE_GROUP ) &&
1786
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1787
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1788
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1789
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1790
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1791
+ sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
1792
+ /* remove unary operators
1793
+ * select - 1
1794
+ */
1795
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1796
+ pos -= 1;
1797
+ left = 0;
1798
+ continue;
1799
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1800
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1801
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1802
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1803
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1804
+ sf->tokenvec[left+2].type == TYPE_STRING)) {
1805
+ /*
1806
+ * interesting case turn ", -1" ->> ",1" PLUS we need to back up
1807
+ * one token if possible to see if more folding can be done
1808
+ * "1,-1" --> "1"
1809
+ */
1810
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1811
+ left = 0;
1812
+ /* pos is >= 3 so this is safe */
1813
+ assert(pos >= 3);
1814
+ pos -= 3;
1815
+ continue;
1816
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1817
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1818
+ sf->tokenvec[left+2].type == TYPE_FUNCTION) {
1819
+
1820
+ /* Separate case from above since you end up with
1821
+ * 1,-sin(1) --> 1 (1)
1822
+ * Here, just do
1823
+ * 1,-sin(1) --> 1,sin(1)
1824
+ * just remove unary operator
1825
+ */
1826
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1827
+ pos -= 1;
1828
+ left = 0;
1829
+ continue;
1830
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
1831
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1832
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1833
+ /* ignore the '.n'
1834
+ * typically is this databasename.table
1835
+ */
1836
+ assert(pos >= 3);
1837
+ pos -= 2;
1838
+ left = 0;
1839
+ continue;
1840
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
1841
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1842
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1843
+ /* select . `foo` --> select `foo` */
1844
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1845
+ pos -= 1;
1846
+ left = 0;
1847
+ continue;
1848
+ } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
1849
+ (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) &&
1850
+ (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) {
1851
+ /*
1852
+ * whats going on here
1853
+ * Some SQL functions like USER() have 0 args
1854
+ * if we get User(foo), then User is not a function
1855
+ * This should be expanded since it eliminated a lot of false
1856
+ * positives.
1857
+ */
1858
+ if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) {
1859
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1860
+ }
1861
+ }
1862
+
1863
+ /* no folding -- assume left-most token is
1864
+ is good, now use the existing 2 tokens --
1865
+ do not get another
1866
+ */
1867
+
1868
+ left += 1;
1869
+
1870
+ } /* while(1) */
1871
+
1872
+ /* if we have 4 or less tokens, and we had a comment token
1873
+ * at the end, add it back
1874
+ */
1875
+
1876
+ if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
1877
+ st_copy(&sf->tokenvec[left], &last_comment);
1878
+ left += 1;
1879
+ }
1880
+
1881
+ /* sometimes we grab a 6th token to help
1882
+ determine the type of token 5.
1883
+ */
1884
+ if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
1885
+ left = LIBINJECTION_SQLI_MAX_TOKENS;
1886
+ }
1887
+
1888
+ return (int)left;
1889
+ }
1890
+
1891
+ /* secondary api: detects SQLi in a string, GIVEN a context.
1892
+ *
1893
+ * A context can be:
1894
+ * * CHAR_NULL (\0), process as is
1895
+ * * CHAR_SINGLE ('), process pretending input started with a
1896
+ * single quote.
1897
+ * * CHAR_DOUBLE ("), process pretending input started with a
1898
+ * double quote.
1899
+ *
1900
+ */
1901
+ const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
1902
+ {
1903
+ int i;
1904
+ int tlen = 0;
1905
+
1906
+ libinjection_sqli_reset(sql_state, flags);
1907
+
1908
+ tlen = libinjection_sqli_fold(sql_state);
1909
+
1910
+ /* Check for magic PHP backquote comment
1911
+ * If:
1912
+ * * last token is of type "bareword"
1913
+ * * And is quoted in a backtick
1914
+ * * And isn't closed
1915
+ * * And it's empty?
1916
+ * Then convert it to comment
1917
+ */
1918
+ if (tlen > 2 &&
1919
+ sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
1920
+ sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
1921
+ sql_state->tokenvec[tlen-1].len == 0 &&
1922
+ sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
1923
+ sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
1924
+ }
1925
+
1926
+ for (i = 0; i < tlen; ++i) {
1927
+ sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
1928
+ }
1929
+
1930
+ /*
1931
+ * make the fingerprint pattern a c-string (null delimited)
1932
+ */
1933
+ sql_state->fingerprint[tlen] = CHAR_NULL;
1934
+
1935
+ /*
1936
+ * check for 'X' in pattern, and then
1937
+ * clear out all tokens
1938
+ *
1939
+ * this means parsing could not be done
1940
+ * accurately due to pgsql's double comments
1941
+ * or other syntax that isn't consistent.
1942
+ * Should be very rare false positive
1943
+ */
1944
+ if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
1945
+ /* needed for SWIG */
1946
+ memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
1947
+ memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
1948
+
1949
+ sql_state->fingerprint[0] = TYPE_EVIL;
1950
+
1951
+ sql_state->tokenvec[0].type = TYPE_EVIL;
1952
+ sql_state->tokenvec[0].val[0] = TYPE_EVIL;
1953
+ sql_state->tokenvec[1].type = CHAR_NULL;
1954
+ }
1955
+
1956
+
1957
+ return sql_state->fingerprint;
1958
+ }
1959
+
1960
+ int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
1961
+ {
1962
+ return libinjection_sqli_blacklist(sql_state) &&
1963
+ libinjection_sqli_not_whitelist(sql_state);
1964
+ }
1965
+
1966
+ char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
1967
+ const char* str, size_t len)
1968
+ {
1969
+ if (lookup_type == LOOKUP_FINGERPRINT) {
1970
+ return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
1971
+ } else {
1972
+ return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
1973
+ }
1974
+ }
1975
+
1976
+ int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
1977
+ {
1978
+ /*
1979
+ * use minimum of 8 bytes to make sure gcc -fstack-protector
1980
+ * works correctly
1981
+ */
1982
+ char fp2[8];
1983
+ char ch;
1984
+ size_t i;
1985
+ size_t len = strlen(sql_state->fingerprint);
1986
+ int patmatch;
1987
+
1988
+ if (len < 1) {
1989
+ sql_state->reason = __LINE__;
1990
+ return FALSE;
1991
+ }
1992
+
1993
+ /*
1994
+ to keep everything compatible, convert the
1995
+ v0 fingerprint pattern to v1
1996
+ v0: up to 5 chars, mixed case
1997
+ v1: 1 char is '0', up to 5 more chars, upper case
1998
+ */
1999
+
2000
+ fp2[0] = '0';
2001
+ for (i = 0; i < len; ++i) {
2002
+ ch = sql_state->fingerprint[i];
2003
+ if (ch >= 'a' && ch <= 'z') {
2004
+ ch -= 0x20;
2005
+ }
2006
+ fp2[i+1] = ch;
2007
+ }
2008
+ fp2[i+1] = '\0';
2009
+
2010
+ patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
2011
+
2012
+ /*
2013
+ * No match.
2014
+ *
2015
+ * Set sql_state->reason to current line number
2016
+ * only for debugging purposes.
2017
+ */
2018
+ if (!patmatch) {
2019
+ sql_state->reason = __LINE__;
2020
+ return FALSE;
2021
+ }
2022
+
2023
+ return TRUE;
2024
+ }
2025
+
2026
+ /*
2027
+ * return TRUE if SQLi, false is benign
2028
+ */
2029
+ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
2030
+ {
2031
+ /*
2032
+ * We assume we got a SQLi match
2033
+ * This next part just helps reduce false positives.
2034
+ *
2035
+ */
2036
+ char ch;
2037
+ size_t tlen = strlen(sql_state->fingerprint);
2038
+
2039
+ if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
2040
+ /*
2041
+ * if ending comment is contains 'sp_password' then it's SQLi!
2042
+ * MS Audit log apparently ignores anything with
2043
+ * 'sp_password' in it. Unable to find primary reference to
2044
+ * this "feature" of SQL Server but seems to be known SQLi
2045
+ * technique
2046
+ */
2047
+ if (my_memmem(sql_state->s, sql_state->slen,
2048
+ "sp_password", strlen("sp_password"))) {
2049
+ sql_state->reason = __LINE__;
2050
+ return TRUE;
2051
+ }
2052
+ }
2053
+
2054
+ switch (tlen) {
2055
+ case 2:{
2056
+ /*
2057
+ * case 2 are "very small SQLi" which make them
2058
+ * hard to tell from normal input...
2059
+ */
2060
+
2061
+ if (sql_state->fingerprint[1] == TYPE_UNION) {
2062
+ if (sql_state->stats_tokens == 2) {
2063
+ /* not sure why but 1U comes up in SQLi attack
2064
+ * likely part of parameter splitting/etc.
2065
+ * lots of reasons why "1 union" might be normal
2066
+ * input, so beep only if other SQLi things are present
2067
+ */
2068
+ /* it really is a number and 'union'
2069
+ * other wise it has folding or comments
2070
+ */
2071
+ sql_state->reason = __LINE__;
2072
+ return FALSE;
2073
+ } else {
2074
+ sql_state->reason = __LINE__;
2075
+ return TRUE;
2076
+ }
2077
+ }
2078
+ /*
2079
+ * if 'comment' is '#' ignore.. too many FP
2080
+ */
2081
+ if (sql_state->tokenvec[1].val[0] == '#') {
2082
+ sql_state->reason = __LINE__;
2083
+ return FALSE;
2084
+ }
2085
+
2086
+ /*
2087
+ * for fingerprint like 'nc', only comments of /x are treated
2088
+ * as SQL... ending comments of "--" and "#" are not SQLi
2089
+ */
2090
+ if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
2091
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2092
+ sql_state->tokenvec[1].val[0] != '/') {
2093
+ sql_state->reason = __LINE__;
2094
+ return FALSE;
2095
+ }
2096
+
2097
+ /*
2098
+ * if '1c' ends with '/x' then it's SQLi
2099
+ */
2100
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2101
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2102
+ sql_state->tokenvec[1].val[0] == '/') {
2103
+ return TRUE;
2104
+ }
2105
+
2106
+ /**
2107
+ * there are some odd base64-looking query string values
2108
+ * 1234-ABCDEFEhfhihwuefi--
2109
+ * which evaluate to "1c"... these are not SQLi
2110
+ * but 1234-- probably is.
2111
+ * Make sure the "1" in "1c" is actually a true decimal number
2112
+ *
2113
+ * Need to check -original- string since the folding step
2114
+ * may have merged tokens, e.g. "1+FOO" is folded into "1"
2115
+ *
2116
+ * Note: evasion: 1*1--
2117
+ */
2118
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2119
+ sql_state->tokenvec[1].type == TYPE_COMMENT) {
2120
+ if (sql_state->stats_tokens > 2) {
2121
+ /* we have some folding going on, highly likely SQLi */
2122
+ sql_state->reason = __LINE__;
2123
+ return TRUE;
2124
+ }
2125
+ /*
2126
+ * we check that next character after the number is either whitespace,
2127
+ * or '/' or a '-' ==> SQLi.
2128
+ */
2129
+ ch = sql_state->s[sql_state->tokenvec[0].len];
2130
+ if ( ch <= 32 ) {
2131
+ /* next char was whitespace,e.g. "1234 --"
2132
+ * this isn't exactly correct.. ideally we should skip over all whitespace
2133
+ * but this seems to be ok for now
2134
+ */
2135
+ return TRUE;
2136
+ }
2137
+ if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
2138
+ return TRUE;
2139
+ }
2140
+ if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
2141
+ return TRUE;
2142
+ }
2143
+
2144
+ sql_state->reason = __LINE__;
2145
+ return FALSE;
2146
+ }
2147
+
2148
+ /*
2149
+ * detect obvious SQLi scans.. many people put '--' in plain text
2150
+ * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
2151
+ */
2152
+ if ((sql_state->tokenvec[1].len > 2)
2153
+ && sql_state->tokenvec[1].val[0] == '-') {
2154
+ sql_state->reason = __LINE__;
2155
+ return FALSE;
2156
+ }
2157
+
2158
+ break;
2159
+ } /* case 2 */
2160
+ case 3:{
2161
+ /*
2162
+ * ...foo' + 'bar...
2163
+ * no opening quote, no closing quote
2164
+ * and each string has data
2165
+ */
2166
+
2167
+ if (streq(sql_state->fingerprint, "sos")
2168
+ || streq(sql_state->fingerprint, "s&s")) {
2169
+
2170
+ if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
2171
+ && (sql_state->tokenvec[2].str_close == CHAR_NULL)
2172
+ && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
2173
+ /*
2174
+ * if ....foo" + "bar....
2175
+ */
2176
+ sql_state->reason = __LINE__;
2177
+ return TRUE;
2178
+ }
2179
+ if (sql_state->stats_tokens == 3) {
2180
+ sql_state->reason = __LINE__;
2181
+ return FALSE;
2182
+ }
2183
+
2184
+ /*
2185
+ * not SQLi
2186
+ */
2187
+ sql_state->reason = __LINE__;
2188
+ return FALSE;
2189
+ } else if (streq(sql_state->fingerprint, "s&n") ||
2190
+ streq(sql_state->fingerprint, "n&1") ||
2191
+ streq(sql_state->fingerprint, "1&1") ||
2192
+ streq(sql_state->fingerprint, "1&v") ||
2193
+ streq(sql_state->fingerprint, "1&s")) {
2194
+ /* 'sexy and 17' not SQLi
2195
+ * 'sexy and 17<18' SQLi
2196
+ */
2197
+ if (sql_state->stats_tokens == 3) {
2198
+ sql_state->reason = __LINE__;
2199
+ return FALSE;
2200
+ }
2201
+ } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
2202
+ if ((sql_state->tokenvec[1].len < 5) ||
2203
+ cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
2204
+ /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
2205
+ * then treat as safe
2206
+ */
2207
+ sql_state->reason = __LINE__;
2208
+ return FALSE;
2209
+ }
2210
+ }
2211
+ break;
2212
+ } /* case 3 */
2213
+ case 4:
2214
+ case 5: {
2215
+ /* nothing right now */
2216
+ break;
2217
+ } /* case 5 */
2218
+ } /* end switch */
2219
+
2220
+ return TRUE;
2221
+ }
2222
+
2223
+ /** Main API, detects SQLi in an input.
2224
+ *
2225
+ *
2226
+ */
2227
+ static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
2228
+ {
2229
+ return sql_state->stats_comment_ddx ||
2230
+ sql_state->stats_comment_hash;
2231
+ }
2232
+
2233
+ /*
2234
+ * This function is mostly use with SWIG
2235
+ */
2236
+ struct libinjection_sqli_token*
2237
+ libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
2238
+ {
2239
+ if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
2240
+ return NULL;
2241
+ }
2242
+ return &(sql_state->tokenvec[i]);
2243
+ }
2244
+
2245
+ int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
2246
+ {
2247
+ const char *s = sql_state->s;
2248
+ size_t slen = sql_state->slen;
2249
+
2250
+ /*
2251
+ * no input? not SQLi
2252
+ */
2253
+ if (slen == 0) {
2254
+ return FALSE;
2255
+ }
2256
+
2257
+ /*
2258
+ * test input "as-is"
2259
+ */
2260
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
2261
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2262
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2263
+ return TRUE;
2264
+ } else if (reparse_as_mysql(sql_state)) {
2265
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
2266
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2267
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2268
+ return TRUE;
2269
+ }
2270
+ }
2271
+
2272
+ /*
2273
+ * if input has a single_quote, then
2274
+ * test as if input was actually '
2275
+ * example: if input if "1' = 1", then pretend it's
2276
+ * "'1' = 1"
2277
+ * Porting Notes: example the same as doing
2278
+ * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
2279
+ *
2280
+ */
2281
+ if (memchr(s, CHAR_SINGLE, slen)) {
2282
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
2283
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2284
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2285
+ return TRUE;
2286
+ } else if (reparse_as_mysql(sql_state)) {
2287
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
2288
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2289
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2290
+ return TRUE;
2291
+ }
2292
+ }
2293
+ }
2294
+
2295
+ /*
2296
+ * same as above but with a double-quote "
2297
+ */
2298
+ if (memchr(s, CHAR_DOUBLE, slen)) {
2299
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
2300
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2301
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2302
+ return TRUE;
2303
+ }
2304
+ }
2305
+
2306
+ /*
2307
+ * Hurray, input is not SQLi
2308
+ */
2309
+ return FALSE;
2310
+ }
2311
+
2312
+ int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
2313
+ {
2314
+ int issqli;
2315
+ struct libinjection_sqli_state state;
2316
+
2317
+ libinjection_sqli_init(&state, input, slen, 0);
2318
+ issqli = libinjection_is_sqli(&state);
2319
+ if (issqli) {
2320
+ strcpy(fingerprint, state.fingerprint);
2321
+ } else {
2322
+ fingerprint[0] = '\0';
2323
+ }
2324
+ return issqli;
2325
+ }