threatstack-agent-ruby 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +6 -0
  4. data/ext/libinjection/extconf.rb +4 -0
  5. data/ext/libinjection/libinjection.h +65 -0
  6. data/ext/libinjection/libinjection.i +13 -0
  7. data/ext/libinjection/libinjection_html5.c +850 -0
  8. data/ext/libinjection/libinjection_html5.h +54 -0
  9. data/ext/libinjection/libinjection_sqli.c +2325 -0
  10. data/ext/libinjection/libinjection_sqli.h +298 -0
  11. data/ext/libinjection/libinjection_sqli_data.h +9654 -0
  12. data/ext/libinjection/libinjection_wrap.c +2393 -0
  13. data/ext/libinjection/libinjection_xss.c +532 -0
  14. data/ext/libinjection/libinjection_xss.h +21 -0
  15. data/lib/constants.rb +110 -0
  16. data/lib/control.rb +61 -0
  17. data/lib/events/event_accumulator.rb +36 -0
  18. data/lib/events/models/attack_event.rb +58 -0
  19. data/lib/events/models/base_event.rb +41 -0
  20. data/lib/events/models/dependency_event.rb +93 -0
  21. data/lib/events/models/environment_event.rb +93 -0
  22. data/lib/events/models/instrumentation_event.rb +46 -0
  23. data/lib/exceptions/request_blocked_error.rb +11 -0
  24. data/lib/instrumentation/common.rb +172 -0
  25. data/lib/instrumentation/instrumenter.rb +144 -0
  26. data/lib/instrumentation/kernel.rb +45 -0
  27. data/lib/instrumentation/rails.rb +61 -0
  28. data/lib/jobs/delayed_job.rb +26 -0
  29. data/lib/jobs/event_submitter.rb +101 -0
  30. data/lib/jobs/job_queue.rb +38 -0
  31. data/lib/jobs/recurrent_job.rb +61 -0
  32. data/lib/threatstack-agent-ruby.rb +7 -0
  33. data/lib/utils/aws_utils.rb +46 -0
  34. data/lib/utils/formatter.rb +47 -0
  35. data/lib/utils/logger.rb +43 -0
  36. data/threatstack-agent-ruby.gemspec +35 -0
  37. metadata +221 -0
@@ -0,0 +1,54 @@
1
+ #ifndef LIBINJECTION_HTML5
2
+ #define LIBINJECTION_HTML5
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ /* pull in size_t */
9
+
10
+ #include <stddef.h>
11
+
12
+ enum html5_type {
13
+ DATA_TEXT
14
+ , TAG_NAME_OPEN
15
+ , TAG_NAME_CLOSE
16
+ , TAG_NAME_SELFCLOSE
17
+ , TAG_DATA
18
+ , TAG_CLOSE
19
+ , ATTR_NAME
20
+ , ATTR_VALUE
21
+ , TAG_COMMENT
22
+ , DOCTYPE
23
+ };
24
+
25
+ enum html5_flags {
26
+ DATA_STATE
27
+ , VALUE_NO_QUOTE
28
+ , VALUE_SINGLE_QUOTE
29
+ , VALUE_DOUBLE_QUOTE
30
+ , VALUE_BACK_QUOTE
31
+ };
32
+
33
+ struct h5_state;
34
+ typedef int (*ptr_html5_state)(struct h5_state*);
35
+
36
+ typedef struct h5_state {
37
+ const char* s;
38
+ size_t len;
39
+ size_t pos;
40
+ int is_close;
41
+ ptr_html5_state state;
42
+ const char* token_start;
43
+ size_t token_len;
44
+ enum html5_type token_type;
45
+ } h5_state_t;
46
+
47
+
48
+ void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags);
49
+ int libinjection_h5_next(h5_state_t* hs);
50
+
51
+ #ifdef __cplusplus
52
+ }
53
+ #endif
54
+ #endif
@@ -0,0 +1,2325 @@
1
+ /**
2
+ * Copyright 2012,2016 Nick Galbreath
3
+ * nickg@client9.com
4
+ * BSD License -- see COPYING.txt for details
5
+ *
6
+ * https://libinjection.client9.com/
7
+ *
8
+ */
9
+
10
+ #include <string.h>
11
+ #include <stdlib.h>
12
+ #include <stdio.h>
13
+ #include <ctype.h>
14
+ #include <assert.h>
15
+ #include <stddef.h>
16
+
17
+ #include "libinjection.h"
18
+ #include "libinjection_sqli.h"
19
+ #include "libinjection_sqli_data.h"
20
+
21
+ #define LIBINJECTION_VERSION "3.9.2"
22
+
23
+ #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
24
+ #define LIBINJECTION_SQLI_MAX_TOKENS 5
25
+
26
+ #ifndef TRUE
27
+ #define TRUE 1
28
+ #endif
29
+ #ifndef FALSE
30
+ #define FALSE 0
31
+ #endif
32
+
33
+ #define CHAR_NULL '\0'
34
+ #define CHAR_SINGLE '\''
35
+ #define CHAR_DOUBLE '"'
36
+ #define CHAR_TICK '`'
37
+
38
+ /* faster than calling out to libc isdigit */
39
+ #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
40
+
41
+ #if 0
42
+ #define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
43
+ #else
44
+ #define FOLD_DEBUG
45
+ #endif
46
+
47
+ /*
48
+ * not making public just yet
49
+ */
50
+ typedef enum {
51
+ TYPE_NONE = 0
52
+ , TYPE_KEYWORD = (int)'k'
53
+ , TYPE_UNION = (int)'U'
54
+ , TYPE_GROUP = (int)'B'
55
+ , TYPE_EXPRESSION = (int)'E'
56
+ , TYPE_SQLTYPE = (int)'t'
57
+ , TYPE_FUNCTION = (int)'f'
58
+ , TYPE_BAREWORD = (int)'n'
59
+ , TYPE_NUMBER = (int)'1'
60
+ , TYPE_VARIABLE = (int)'v'
61
+ , TYPE_STRING = (int)'s'
62
+ , TYPE_OPERATOR = (int)'o'
63
+ , TYPE_LOGIC_OPERATOR = (int)'&'
64
+ , TYPE_COMMENT = (int)'c'
65
+ , TYPE_COLLATE = (int)'A'
66
+ , TYPE_LEFTPARENS = (int)'('
67
+ , TYPE_RIGHTPARENS = (int)')' /* not used? */
68
+ , TYPE_LEFTBRACE = (int)'{'
69
+ , TYPE_RIGHTBRACE = (int)'}'
70
+ , TYPE_DOT = (int)'.'
71
+ , TYPE_COMMA = (int)','
72
+ , TYPE_COLON = (int)':'
73
+ , TYPE_SEMICOLON = (int)';'
74
+ , TYPE_TSQL = (int)'T' /* TSQL start */
75
+ , TYPE_UNKNOWN = (int)'?'
76
+ , TYPE_EVIL = (int)'X' /* unparsable, abort */
77
+ , TYPE_FINGERPRINT = (int)'F' /* not really a token */
78
+ , TYPE_BACKSLASH = (int)'\\'
79
+ } sqli_token_types;
80
+
81
+ /**
82
+ * Initializes parsing state
83
+ *
84
+ */
85
+ static char flag2delim(int flag)
86
+ {
87
+ if (flag & FLAG_QUOTE_SINGLE) {
88
+ return CHAR_SINGLE;
89
+ } else if (flag & FLAG_QUOTE_DOUBLE) {
90
+ return CHAR_DOUBLE;
91
+ } else {
92
+ return CHAR_NULL;
93
+ }
94
+ }
95
+
96
+ /* memchr2 finds a string of 2 characters inside another string
97
+ * This a specialized version of "memmem" or "memchr".
98
+ * 'memmem' doesn't exist on all platforms
99
+ *
100
+ * Porting notes: this is just a special version of
101
+ * astring.find("AB")
102
+ *
103
+ */
104
+ static const char *
105
+ memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
106
+ {
107
+ const char *cur = haystack;
108
+ const char *last = haystack + haystack_len - 1;
109
+
110
+ if (haystack_len < 2) {
111
+ return NULL;
112
+ }
113
+
114
+ while (cur < last) {
115
+ /* safe since cur < len - 1 always */
116
+ if (cur[0] == c0 && cur[1] == c1) {
117
+ return cur;
118
+ }
119
+ cur += 1;
120
+ }
121
+
122
+ return NULL;
123
+ }
124
+
125
+ /**
126
+ * memmem might not exist on some systems
127
+ */
128
+ static const char *
129
+ my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
130
+ {
131
+ const char* cur;
132
+ const char* last;
133
+ assert(haystack);
134
+ assert(needle);
135
+ assert(nlen > 1);
136
+ last = haystack + hlen - nlen;
137
+ for (cur = haystack; cur <= last; ++cur) {
138
+ if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
139
+ return cur;
140
+ }
141
+ }
142
+ return NULL;
143
+ }
144
+
145
+ /** Find largest string containing certain characters.
146
+ *
147
+ * C Standard library 'strspn' only works for 'c-strings' (null terminated)
148
+ * This works on arbitrary length.
149
+ *
150
+ * Performance notes:
151
+ * not critical
152
+ *
153
+ * Porting notes:
154
+ * if accept is 'ABC', then this function would be similar to
155
+ * a_regexp.match(a_str, '[ABC]*'),
156
+ */
157
+ static size_t
158
+ strlenspn(const char *s, size_t len, const char *accept)
159
+ {
160
+ size_t i;
161
+ for (i = 0; i < len; ++i) {
162
+ /* likely we can do better by inlining this function
163
+ * but this works for now
164
+ */
165
+ if (strchr(accept, s[i]) == NULL) {
166
+ return i;
167
+ }
168
+ }
169
+ return len;
170
+ }
171
+
172
+ static size_t
173
+ strlencspn(const char *s, size_t len, const char *accept)
174
+ {
175
+ size_t i;
176
+ for (i = 0; i < len; ++i) {
177
+ /* likely we can do better by inlining this function
178
+ * but this works for now
179
+ */
180
+ if (strchr(accept, s[i]) != NULL) {
181
+ return i;
182
+ }
183
+ }
184
+ return len;
185
+ }
186
+ static int char_is_white(char ch) {
187
+ /* ' ' space is 0x32
188
+ '\t 0x09 \011 horizontal tab
189
+ '\n' 0x0a \012 new line
190
+ '\v' 0x0b \013 vertical tab
191
+ '\f' 0x0c \014 new page
192
+ '\r' 0x0d \015 carriage return
193
+ 0x00 \000 null (oracle)
194
+ 0xa0 \240 is Latin-1
195
+ */
196
+ return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
197
+ }
198
+
199
+ /* DANGER DANGER
200
+ * This is -very specialized function-
201
+ *
202
+ * this compares a ALL_UPPER CASE C STRING
203
+ * with a *arbitrary memory* + length
204
+ *
205
+ * Sane people would just make a copy, up-case
206
+ * and use a hash table.
207
+ *
208
+ * Required since libc version uses the current locale
209
+ * and is much slower.
210
+ */
211
+ static int cstrcasecmp(const char *a, const char *b, size_t n)
212
+ {
213
+ char cb;
214
+
215
+ for (; n > 0; a++, b++, n--) {
216
+ cb = *b;
217
+ if (cb >= 'a' && cb <= 'z') {
218
+ cb -= 0x20;
219
+ }
220
+ if (*a != cb) {
221
+ return *a - cb;
222
+ } else if (*a == '\0') {
223
+ return -1;
224
+ }
225
+ }
226
+
227
+ return (*a == 0) ? 0 : 1;
228
+ }
229
+
230
+ /**
231
+ * Case sensitive string compare.
232
+ * Here only to make code more readable
233
+ */
234
+ static int streq(const char *a, const char *b)
235
+ {
236
+ return strcmp(a, b) == 0;
237
+ }
238
+
239
+ /**
240
+ *
241
+ *
242
+ *
243
+ * Porting Notes:
244
+ * given a mapping/hash of string to char
245
+ * this is just
246
+ * typecode = mapping[key.upper()]
247
+ */
248
+
249
+ static char bsearch_keyword_type(const char *key, size_t len,
250
+ const keyword_t * keywords, size_t numb)
251
+ {
252
+ size_t pos;
253
+ size_t left = 0;
254
+ size_t right = numb - 1;
255
+
256
+ while (left < right) {
257
+ pos = (left + right) >> 1;
258
+
259
+ /* arg0 = upper case only, arg1 = mixed case */
260
+ if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
261
+ left = pos + 1;
262
+ } else {
263
+ right = pos;
264
+ }
265
+ }
266
+ if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
267
+ return keywords[left].type;
268
+ } else {
269
+ return CHAR_NULL;
270
+ }
271
+ }
272
+
273
+ static char is_keyword(const char* key, size_t len)
274
+ {
275
+ return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
276
+ }
277
+
278
+ /* st_token methods
279
+ *
280
+ * The following functions manipulates the stoken_t type
281
+ *
282
+ *
283
+ */
284
+
285
+ static void st_clear(stoken_t * st)
286
+ {
287
+ memset(st, 0, sizeof(stoken_t));
288
+ }
289
+
290
+ static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
291
+ const char value)
292
+ {
293
+ /* done to eliminate unused warning */
294
+ (void)len;
295
+ st->type = (char) stype;
296
+ st->pos = pos;
297
+ st->len = 1;
298
+ st->val[0] = value;
299
+ st->val[1] = CHAR_NULL;
300
+ }
301
+
302
+ static void st_assign(stoken_t * st, const char stype,
303
+ size_t pos, size_t len, const char* value)
304
+ {
305
+ const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
306
+ size_t last = len < MSIZE ? len : (MSIZE - 1);
307
+ st->type = (char) stype;
308
+ st->pos = pos;
309
+ st->len = last;
310
+ memcpy(st->val, value, last);
311
+ st->val[last] = CHAR_NULL;
312
+ }
313
+
314
+ static void st_copy(stoken_t * dest, const stoken_t * src)
315
+ {
316
+ memcpy(dest, src, sizeof(stoken_t));
317
+ }
318
+
319
+ static int st_is_arithmetic_op(const stoken_t* st)
320
+ {
321
+ const char ch = st->val[0];
322
+ return (st->type == TYPE_OPERATOR && st->len == 1 &&
323
+ (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
324
+ }
325
+
326
+ static int st_is_unary_op(const stoken_t * st)
327
+ {
328
+ const char* str = st->val;
329
+ const size_t len = st->len;
330
+
331
+ if (st->type != TYPE_OPERATOR) {
332
+ return FALSE;
333
+ }
334
+
335
+ switch (len) {
336
+ case 1:
337
+ return *str == '+' || *str == '-' || *str == '!' || *str == '~';
338
+ case 2:
339
+ return str[0] == '!' && str[1] == '!';
340
+ case 3:
341
+ return cstrcasecmp("NOT", str, 3) == 0;
342
+ default:
343
+ return FALSE;
344
+ }
345
+ }
346
+
347
+ /* Parsers
348
+ *
349
+ *
350
+ */
351
+
352
+ static size_t parse_white(struct libinjection_sqli_state * sf)
353
+ {
354
+ return sf->pos + 1;
355
+ }
356
+
357
+ static size_t parse_operator1(struct libinjection_sqli_state * sf)
358
+ {
359
+ const char *cs = sf->s;
360
+ size_t pos = sf->pos;
361
+
362
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
363
+ return pos + 1;
364
+ }
365
+
366
+ static size_t parse_other(struct libinjection_sqli_state * sf)
367
+ {
368
+ const char *cs = sf->s;
369
+ size_t pos = sf->pos;
370
+
371
+ st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
372
+ return pos + 1;
373
+ }
374
+
375
+ static size_t parse_char(struct libinjection_sqli_state * sf)
376
+ {
377
+ const char *cs = sf->s;
378
+ size_t pos = sf->pos;
379
+
380
+ st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
381
+ return pos + 1;
382
+ }
383
+
384
+ static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
385
+ {
386
+ const char *cs = sf->s;
387
+ const size_t slen = sf->slen;
388
+ size_t pos = sf->pos;
389
+
390
+ const char *endpos =
391
+ (const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
392
+ if (endpos == NULL) {
393
+ st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
394
+ return slen;
395
+ } else {
396
+ st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
397
+ return (size_t)((endpos - cs) + 1);
398
+ }
399
+ }
400
+
401
+ /** In ANSI mode, hash is an operator
402
+ * In MYSQL mode, it's a EOL comment like '--'
403
+ */
404
+ static size_t parse_hash(struct libinjection_sqli_state * sf)
405
+ {
406
+ sf->stats_comment_hash += 1;
407
+ if (sf->flags & FLAG_SQL_MYSQL) {
408
+ sf->stats_comment_hash += 1;
409
+ return parse_eol_comment(sf);
410
+ } else {
411
+ st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
412
+ return sf->pos + 1;
413
+ }
414
+ }
415
+
416
+ static size_t parse_dash(struct libinjection_sqli_state * sf)
417
+ {
418
+ const char *cs = sf->s;
419
+ const size_t slen = sf->slen;
420
+ size_t pos = sf->pos;
421
+
422
+ /*
423
+ * five cases
424
+ * 1) --[white] this is always a SQL comment
425
+ * 2) --[EOF] this is a comment
426
+ * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
427
+ * 4) --[notwhite] everyone else thinks this is a comment
428
+ * 5) -[not dash] '-' is a unary operator
429
+ */
430
+
431
+ if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
432
+ return parse_eol_comment(sf);
433
+ } else if (pos +2 == slen && cs[pos + 1] == '-') {
434
+ return parse_eol_comment(sf);
435
+ } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
436
+ /* --[not-white] not-white case:
437
+ *
438
+ */
439
+ sf->stats_comment_ddx += 1;
440
+ return parse_eol_comment(sf);
441
+ } else {
442
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
443
+ return pos + 1;
444
+ }
445
+ }
446
+
447
+
448
+ /** This detects MySQL comments, comments that
449
+ * start with /x! We just ban these now but
450
+ * previously we attempted to parse the inside
451
+ *
452
+ * For reference:
453
+ * the form of /x![anything]x/ or /x!12345[anything] x/
454
+ *
455
+ * Mysql 3 (maybe 4), allowed this:
456
+ * /x!0selectx/ 1;
457
+ * where 0 could be any number.
458
+ *
459
+ * The last version of MySQL 3 was in 2003.
460
+
461
+ * It is unclear if the MySQL 3 syntax was allowed
462
+ * in MySQL 4. The last version of MySQL 4 was in 2008
463
+ *
464
+ */
465
+ static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
466
+ {
467
+ /* so far...
468
+ * cs[pos] == '/' && cs[pos+1] == '*'
469
+ */
470
+
471
+ if (pos + 2 >= len) {
472
+ /* not a mysql comment */
473
+ return 0;
474
+ }
475
+
476
+ if (cs[pos + 2] != '!') {
477
+ /* not a mysql comment */
478
+ return 0;
479
+ }
480
+
481
+ /*
482
+ * this is a mysql comment
483
+ * got "/x!"
484
+ */
485
+ return 1;
486
+ }
487
+
488
+ static size_t parse_slash(struct libinjection_sqli_state * sf)
489
+ {
490
+ const char* ptr;
491
+ size_t clen;
492
+ const char *cs = sf->s;
493
+ const size_t slen = sf->slen;
494
+ size_t pos = sf->pos;
495
+ const char* cur = cs + pos;
496
+ char ctype = TYPE_COMMENT;
497
+ size_t pos1 = pos + 1;
498
+ if (pos1 == slen || cs[pos1] != '*') {
499
+ return parse_operator1(sf);
500
+ }
501
+
502
+ /*
503
+ * skip over initial '/x'
504
+ */
505
+ ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
506
+
507
+ /*
508
+ * (ptr == NULL) causes false positive in cppcheck 1.61
509
+ * casting to type seems to fix it
510
+ */
511
+ if (ptr == (const char*) NULL) {
512
+ /* till end of line */
513
+ clen = slen - pos;
514
+ } else {
515
+ clen = (size_t)(ptr + 2 - cur);
516
+ }
517
+
518
+ /*
519
+ * postgresql allows nested comments which makes
520
+ * this is incompatible with parsing so
521
+ * if we find a '/x' inside the coment, then
522
+ * make a new token.
523
+ *
524
+ * Also, Mysql's "conditional" comments for version
525
+ * are an automatic black ban!
526
+ */
527
+
528
+ if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
529
+ ctype = TYPE_EVIL;
530
+ } else if (is_mysql_comment(cs, slen, pos)) {
531
+ ctype = TYPE_EVIL;
532
+ }
533
+
534
+ st_assign(sf->current, ctype, pos, clen, cs + pos);
535
+ return pos + clen;
536
+ }
537
+
538
+
539
+ static size_t parse_backslash(struct libinjection_sqli_state * sf)
540
+ {
541
+ const char *cs = sf->s;
542
+ const size_t slen = sf->slen;
543
+ size_t pos = sf->pos;
544
+
545
+ /*
546
+ * Weird MySQL alias for NULL, "\N" (capital N only)
547
+ */
548
+ if (pos + 1 < slen && cs[pos +1] == 'N') {
549
+ st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
550
+ return pos + 2;
551
+ } else {
552
+ st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
553
+ return pos + 1;
554
+ }
555
+ }
556
+
557
+ static size_t parse_operator2(struct libinjection_sqli_state * sf)
558
+ {
559
+ char ch;
560
+ const char *cs = sf->s;
561
+ const size_t slen = sf->slen;
562
+ size_t pos = sf->pos;
563
+
564
+ if (pos + 1 >= slen) {
565
+ return parse_operator1(sf);
566
+ }
567
+
568
+ if (pos + 2 < slen &&
569
+ cs[pos] == '<' &&
570
+ cs[pos + 1] == '=' &&
571
+ cs[pos + 2] == '>') {
572
+ /*
573
+ * special 3-char operator
574
+ */
575
+ st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
576
+ return pos + 3;
577
+ }
578
+
579
+ ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
580
+ if (ch != CHAR_NULL) {
581
+ st_assign(sf->current, ch, pos, 2, cs+pos);
582
+ return pos + 2;
583
+ }
584
+
585
+ /*
586
+ * not an operator.. what to do with the two
587
+ * characters we got?
588
+ */
589
+
590
+ if (cs[pos] == ':') {
591
+ /* ':' is not an operator */
592
+ st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
593
+ return pos + 1;
594
+ } else {
595
+ /*
596
+ * must be a single char operator
597
+ */
598
+ return parse_operator1(sf);
599
+ }
600
+ }
601
+
602
+ /*
603
+ * Ok! " \" " one backslash = escaped!
604
+ * " \\" " two backslash = not escaped!
605
+ * "\\\" " three backslash = escaped!
606
+ */
607
+ static int is_backslash_escaped(const char* end, const char* start)
608
+ {
609
+ const char* ptr;
610
+ for (ptr = end; ptr >= start; ptr--) {
611
+ if (*ptr != '\\') {
612
+ break;
613
+ }
614
+ }
615
+ /* if number of backslashes is odd, it is escaped */
616
+
617
+ return (end - ptr) & 1;
618
+ }
619
+
620
+ static size_t is_double_delim_escaped(const char* cur, const char* end)
621
+ {
622
+ return ((cur + 1) < end) && *(cur+1) == *cur;
623
+ }
624
+
625
+ /* Look forward for doubling of delimiter
626
+ *
627
+ * case 'foo''bar' --> foo''bar
628
+ *
629
+ * ending quote isn't duplicated (i.e. escaped)
630
+ * since it's the wrong char or EOL
631
+ *
632
+ */
633
+ static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
634
+ stoken_t * st, char delim, size_t offset)
635
+ {
636
+ /*
637
+ * offset is to skip the perhaps first quote char
638
+ */
639
+ const char *qpos =
640
+ (const char *) memchr((const void *) (cs + pos + offset), delim,
641
+ len - pos - offset);
642
+
643
+ /*
644
+ * then keep string open/close info
645
+ */
646
+ if (offset > 0) {
647
+ /*
648
+ * this is real quote
649
+ */
650
+ st->str_open = delim;
651
+ } else {
652
+ /*
653
+ * this was a simulated quote
654
+ */
655
+ st->str_open = CHAR_NULL;
656
+ }
657
+
658
+ while (TRUE) {
659
+ if (qpos == NULL) {
660
+ /*
661
+ * string ended with no trailing quote
662
+ * assign what we have
663
+ */
664
+ st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
665
+ st->str_close = CHAR_NULL;
666
+ return len;
667
+ } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
668
+ /* keep going, move ahead one character */
669
+ qpos =
670
+ (const char *) memchr((const void *) (qpos + 1), delim,
671
+ (size_t)((cs + len) - (qpos + 1)));
672
+ continue;
673
+ } else if (is_double_delim_escaped(qpos, cs + len)) {
674
+ /* keep going, move ahead two characters */
675
+ qpos =
676
+ (const char *) memchr((const void *) (qpos + 2), delim,
677
+ (size_t)((cs + len) - (qpos + 2)));
678
+ continue;
679
+ } else {
680
+ /* hey it's a normal string */
681
+ st_assign(st, TYPE_STRING, pos + offset,
682
+ (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
683
+ st->str_close = delim;
684
+ return (size_t)(qpos - cs + 1);
685
+ }
686
+ }
687
+ }
688
+
689
+ /**
690
+ * Used when first char is a ' or "
691
+ */
692
+ static size_t parse_string(struct libinjection_sqli_state * sf)
693
+ {
694
+ const char *cs = sf->s;
695
+ const size_t slen = sf->slen;
696
+ size_t pos = sf->pos;
697
+
698
+ /*
699
+ * assert cs[pos] == single or double quote
700
+ */
701
+ return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
702
+ }
703
+
704
+ /**
705
+ * Used when first char is:
706
+ * N or n: mysql "National Character set"
707
+ * E : psql "Escaped String"
708
+ */
709
+ static size_t parse_estring(struct libinjection_sqli_state * sf)
710
+ {
711
+ const char *cs = sf->s;
712
+ const size_t slen = sf->slen;
713
+ size_t pos = sf->pos;
714
+
715
+ if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
716
+ return parse_word(sf);
717
+ }
718
+ return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
719
+ }
720
+
721
+ static size_t parse_ustring(struct libinjection_sqli_state * sf)
722
+ {
723
+ const char *cs = sf->s;
724
+ size_t slen = sf->slen;
725
+ size_t pos = sf->pos;
726
+
727
+ if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
728
+ sf->pos += 2;
729
+ pos = parse_string(sf);
730
+ sf->current->str_open = 'u';
731
+ if (sf->current->str_close == '\'') {
732
+ sf->current->str_close = 'u';
733
+ }
734
+ return pos;
735
+ } else {
736
+ return parse_word(sf);
737
+ }
738
+ }
739
+
740
+ static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
741
+ {
742
+ char ch;
743
+ const char *strend;
744
+ const char *cs = sf->s;
745
+ size_t slen = sf->slen;
746
+ size_t pos = sf->pos + offset;
747
+
748
+ /* if we are already at end of string..
749
+ if current char is not q or Q
750
+ if we don't have 2 more chars
751
+ if char2 != a single quote
752
+ then, just treat as word
753
+ */
754
+ if (pos >= slen ||
755
+ (cs[pos] != 'q' && cs[pos] != 'Q') ||
756
+ pos + 2 >= slen ||
757
+ cs[pos + 1] != '\'') {
758
+ return parse_word(sf);
759
+ }
760
+
761
+ ch = cs[pos + 2];
762
+
763
+ /* the ch > 127 is un-needed since
764
+ * we assume char is signed
765
+ */
766
+ if (ch < 33 /* || ch > 127 */) {
767
+ return parse_word(sf);
768
+ }
769
+ switch (ch) {
770
+ case '(' : ch = ')'; break;
771
+ case '[' : ch = ']'; break;
772
+ case '{' : ch = '}'; break;
773
+ case '<' : ch = '>'; break;
774
+ }
775
+
776
+ strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
777
+ if (strend == NULL) {
778
+ st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
779
+ sf->current->str_open = 'q';
780
+ sf->current->str_close = CHAR_NULL;
781
+ return slen;
782
+ } else {
783
+ st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
784
+ sf->current->str_open = 'q';
785
+ sf->current->str_close = 'q';
786
+ return (size_t)(strend - cs + 2);
787
+ }
788
+ }
789
+
790
+ /*
791
+ * Oracle's q string
792
+ */
793
+ static size_t parse_qstring(struct libinjection_sqli_state * sf)
794
+ {
795
+ return parse_qstring_core(sf, 0);
796
+ }
797
+
798
+ /*
799
+ * mysql's N'STRING' or
800
+ * ... Oracle's nq string
801
+ */
802
+ static size_t parse_nqstring(struct libinjection_sqli_state * sf)
803
+ {
804
+ size_t slen = sf->slen;
805
+ size_t pos = sf->pos;
806
+ if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
807
+ return parse_estring(sf);
808
+ }
809
+ return parse_qstring_core(sf, 1);
810
+ }
811
+
812
+ /*
813
+ * binary literal string
814
+ * re: [bB]'[01]*'
815
+ */
816
+ static size_t parse_bstring(struct libinjection_sqli_state *sf)
817
+ {
818
+ size_t wlen;
819
+ const char *cs = sf->s;
820
+ size_t pos = sf->pos;
821
+ size_t slen = sf->slen;
822
+
823
+ /* need at least 2 more characters
824
+ * if next char isn't a single quote, then
825
+ * continue as normal word
826
+ */
827
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
828
+ return parse_word(sf);
829
+ }
830
+
831
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
832
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
833
+ return parse_word(sf);
834
+ }
835
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
836
+ return pos + 2 + wlen + 1;
837
+ }
838
+
839
+ /*
840
+ * hex literal string
841
+ * re: [xX]'[0123456789abcdefABCDEF]*'
842
+ * mysql has requirement of having EVEN number of chars,
843
+ * but pgsql does not
844
+ */
845
+ static size_t parse_xstring(struct libinjection_sqli_state *sf)
846
+ {
847
+ size_t wlen;
848
+ const char *cs = sf->s;
849
+ size_t pos = sf->pos;
850
+ size_t slen = sf->slen;
851
+
852
+ /* need at least 2 more characters
853
+ * if next char isn't a single quote, then
854
+ * continue as normal word
855
+ */
856
+ if (pos + 2 >= slen || cs[pos+1] != '\'') {
857
+ return parse_word(sf);
858
+ }
859
+
860
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
861
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
862
+ return parse_word(sf);
863
+ }
864
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
865
+ return pos + 2 + wlen + 1;
866
+ }
867
+
868
+ /**
869
+ * This handles MS SQLSERVER bracket words
870
+ * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
871
+ *
872
+ */
873
+ static size_t parse_bword(struct libinjection_sqli_state * sf)
874
+ {
875
+ const char *cs = sf->s;
876
+ size_t pos = sf->pos;
877
+ const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
878
+ if (endptr == NULL) {
879
+ st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
880
+ return sf->slen;
881
+ } else {
882
+ st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
883
+ return (size_t)((endptr - cs) + 1);
884
+ }
885
+ }
886
+
887
+ static size_t parse_word(struct libinjection_sqli_state * sf)
888
+ {
889
+ char ch;
890
+ char delim;
891
+ size_t i;
892
+ const char *cs = sf->s;
893
+ size_t pos = sf->pos;
894
+ size_t wlen = strlencspn(cs + pos, sf->slen - pos,
895
+ " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
896
+
897
+ st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
898
+
899
+ /* now we need to look inside what we good for "." and "`"
900
+ * and see if what is before is a keyword or not
901
+ */
902
+ for (i =0; i < sf->current->len; ++i) {
903
+ delim = sf->current->val[i];
904
+ if (delim == '.' || delim == '`') {
905
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
906
+ if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
907
+ /* needed for swig */
908
+ st_clear(sf->current);
909
+ /*
910
+ * we got something like "SELECT.1"
911
+ * or SELECT`column`
912
+ */
913
+ st_assign(sf->current, ch, pos, i, cs + pos);
914
+ return pos + i;
915
+ }
916
+ }
917
+ }
918
+
919
+ /*
920
+ * do normal lookup with word including '.'
921
+ */
922
+ if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
923
+
924
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
925
+ if (ch == CHAR_NULL) {
926
+ ch = TYPE_BAREWORD;
927
+ }
928
+ sf->current->type = ch;
929
+ }
930
+ return pos + wlen;
931
+ }
932
+
933
+ /* MySQL backticks are a cross between string and
934
+ * and a bare word.
935
+ *
936
+ */
937
+ static size_t parse_tick(struct libinjection_sqli_state* sf)
938
+ {
939
+ size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
940
+
941
+ /* we could check to see if start and end of
942
+ * of string are both "`", i.e. make sure we have
943
+ * matching set. `foo` vs. `foo
944
+ * but I don't think it matters much
945
+ */
946
+
947
+ /* check value of string to see if it's a keyword,
948
+ * function, operator, etc
949
+ */
950
+ char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
951
+ if (ch == TYPE_FUNCTION) {
952
+ /* if it's a function, then convert token */
953
+ sf->current->type = TYPE_FUNCTION;
954
+ } else {
955
+ /* otherwise it's a 'n' type -- mysql treats
956
+ * everything as a bare word
957
+ */
958
+ sf->current->type = TYPE_BAREWORD;
959
+ }
960
+ return pos;
961
+ }
962
+
963
+ static size_t parse_var(struct libinjection_sqli_state * sf)
964
+ {
965
+ size_t xlen;
966
+ const char *cs = sf->s;
967
+ const size_t slen = sf->slen;
968
+ size_t pos = sf->pos + 1;
969
+
970
+ /*
971
+ * var_count is only used to reconstruct
972
+ * the input. It counts the number of '@'
973
+ * seen 0 in the case of NULL, 1 or 2
974
+ */
975
+
976
+ /*
977
+ * move past optional other '@'
978
+ */
979
+ if (pos < slen && cs[pos] == '@') {
980
+ pos += 1;
981
+ sf->current->count = 2;
982
+ } else {
983
+ sf->current->count = 1;
984
+ }
985
+
986
+ /*
987
+ * MySQL allows @@`version`
988
+ */
989
+ if (pos < slen) {
990
+ if (cs[pos] == '`') {
991
+ sf->pos = pos;
992
+ pos = parse_tick(sf);
993
+ sf->current->type = TYPE_VARIABLE;
994
+ return pos;
995
+ } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
996
+ sf->pos = pos;
997
+ pos = parse_string(sf);
998
+ sf->current->type = TYPE_VARIABLE;
999
+ return pos;
1000
+ }
1001
+ }
1002
+
1003
+
1004
+ xlen = strlencspn(cs + pos, slen - pos,
1005
+ " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
1006
+ if (xlen == 0) {
1007
+ st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
1008
+ return pos;
1009
+ } else {
1010
+ st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
1011
+ return pos + xlen;
1012
+ }
1013
+ }
1014
+
1015
+ static size_t parse_money(struct libinjection_sqli_state *sf)
1016
+ {
1017
+ size_t xlen;
1018
+ const char* strend;
1019
+ const char *cs = sf->s;
1020
+ const size_t slen = sf->slen;
1021
+ size_t pos = sf->pos;
1022
+
1023
+ if (pos + 1 == slen) {
1024
+ /* end of line */
1025
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1026
+ return slen;
1027
+ }
1028
+
1029
+ /*
1030
+ * $1,000.00 or $1.000,00 ok!
1031
+ * This also parses $....,,,111 but that's ok
1032
+ */
1033
+
1034
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
1035
+ if (xlen == 0) {
1036
+ if (cs[pos + 1] == '$') {
1037
+ /* we have $$ .. find ending $$ and make string */
1038
+ strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
1039
+ if (strend == NULL) {
1040
+ /* fell off edge */
1041
+ st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
1042
+ sf->current->str_open = '$';
1043
+ sf->current->str_close = CHAR_NULL;
1044
+ return slen;
1045
+ } else {
1046
+ st_assign(sf->current, TYPE_STRING, pos + 2,
1047
+ (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
1048
+ sf->current->str_open = '$';
1049
+ sf->current->str_close = '$';
1050
+ return (size_t)(strend - cs + 2);
1051
+ }
1052
+ } else {
1053
+ /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
1054
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1055
+ if (xlen == 0) {
1056
+ /* hmm it's "$" _something_ .. just add $ and keep going*/
1057
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1058
+ return pos + 1;
1059
+ }
1060
+ /* we have $foobar????? */
1061
+ /* is it $foobar$ */
1062
+ if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
1063
+ /* not $foobar$, or fell off edge */
1064
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1065
+ return pos + 1;
1066
+ }
1067
+
1068
+ /* we have $foobar$ ... find it again */
1069
+ strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
1070
+
1071
+ if (strend == NULL || ((size_t)(strend - cs) < (pos+xlen+2))) {
1072
+ /* fell off edge */
1073
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
1074
+ sf->current->str_open = '$';
1075
+ sf->current->str_close = CHAR_NULL;
1076
+ return slen;
1077
+ } else {
1078
+ /* got one */
1079
+ st_assign(sf->current, TYPE_STRING, pos+xlen+2,
1080
+ (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
1081
+ sf->current->str_open = '$';
1082
+ sf->current->str_close = '$';
1083
+ return (size_t)((strend + xlen + 2) - cs);
1084
+ }
1085
+ }
1086
+ } else if (xlen == 1 && cs[pos + 1] == '.') {
1087
+ /* $. should parsed as a word */
1088
+ return parse_word(sf);
1089
+ } else {
1090
+ st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
1091
+ return pos + 1 + xlen;
1092
+ }
1093
+ }
1094
+
1095
+ static size_t parse_number(struct libinjection_sqli_state * sf)
1096
+ {
1097
+ size_t xlen;
1098
+ size_t start;
1099
+ const char* digits = NULL;
1100
+ const char *cs = sf->s;
1101
+ const size_t slen = sf->slen;
1102
+ size_t pos = sf->pos;
1103
+ int have_e = 0;
1104
+ int have_exp = 0;
1105
+
1106
+ /* cs[pos] == '0' has 1/10 chance of being true,
1107
+ * while pos+1< slen is almost always true
1108
+ */
1109
+ if (cs[pos] == '0' && pos + 1 < slen) {
1110
+ if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
1111
+ digits = "0123456789ABCDEFabcdef";
1112
+ } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
1113
+ digits = "01";
1114
+ }
1115
+
1116
+ if (digits) {
1117
+ xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
1118
+ if (xlen == 0) {
1119
+ st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
1120
+ return pos + 2;
1121
+ } else {
1122
+ st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
1123
+ return pos + 2 + xlen;
1124
+ }
1125
+ }
1126
+ }
1127
+
1128
+ start = pos;
1129
+ while (pos < slen && ISDIGIT(cs[pos])) {
1130
+ pos += 1;
1131
+ }
1132
+
1133
+ if (pos < slen && cs[pos] == '.') {
1134
+ pos += 1;
1135
+ while (pos < slen && ISDIGIT(cs[pos])) {
1136
+ pos += 1;
1137
+ }
1138
+ if (pos - start == 1) {
1139
+ /* only one character read so far */
1140
+ st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
1141
+ return pos;
1142
+ }
1143
+ }
1144
+
1145
+ if (pos < slen) {
1146
+ if (cs[pos] == 'E' || cs[pos] == 'e') {
1147
+ have_e = 1;
1148
+ pos += 1;
1149
+ if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
1150
+ pos += 1;
1151
+ }
1152
+ while (pos < slen && ISDIGIT(cs[pos])) {
1153
+ have_exp = 1;
1154
+ pos += 1;
1155
+ }
1156
+ }
1157
+ }
1158
+
1159
+ /* oracle's ending float or double suffix
1160
+ * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
1161
+ */
1162
+ if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
1163
+ if (pos + 1 == slen) {
1164
+ /* line ends evaluate "... 1.2f$" as '1.2f' */
1165
+ pos += 1;
1166
+ } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
1167
+ /*
1168
+ * easy case, evaluate "... 1.2f ... as '1.2f'
1169
+ */
1170
+ pos += 1;
1171
+ } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
1172
+ /*
1173
+ * a bit of a hack but makes '1fUNION' parse as '1f UNION'
1174
+ */
1175
+ pos += 1;
1176
+ } else {
1177
+ /* it's like "123FROM" */
1178
+ /* parse as "123" only */
1179
+ }
1180
+ }
1181
+
1182
+ if (have_e == 1 && have_exp == 0) {
1183
+ /* very special form of
1184
+ * "1234.e"
1185
+ * "10.10E"
1186
+ * ".E"
1187
+ * this is a WORD not a number!! */
1188
+ st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
1189
+ } else {
1190
+ st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
1191
+ }
1192
+ return pos;
1193
+ }
1194
+
1195
+ /*
1196
+ * API to return version. This allows us to increment the version
1197
+ * without having to regenerated the SWIG (or other binding) in minor
1198
+ * releases.
1199
+ */
1200
+ const char* libinjection_version()
1201
+ {
1202
+ return LIBINJECTION_VERSION;
1203
+ }
1204
+
1205
+ int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
1206
+ {
1207
+ pt2Function fnptr;
1208
+ size_t *pos = &sf->pos;
1209
+ stoken_t *current = sf->current;
1210
+ const char *s = sf->s;
1211
+ const size_t slen = sf->slen;
1212
+
1213
+ if (slen == 0) {
1214
+ return FALSE;
1215
+ }
1216
+
1217
+ st_clear(current);
1218
+ sf->current = current;
1219
+
1220
+ /*
1221
+ * if we are at beginning of string
1222
+ * and in single-quote or double quote mode
1223
+ * then pretend the input starts with a quote
1224
+ */
1225
+ if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
1226
+ *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
1227
+ sf->stats_tokens += 1;
1228
+ return TRUE;
1229
+ }
1230
+
1231
+ while (*pos < slen) {
1232
+
1233
+ /*
1234
+ * get current character
1235
+ */
1236
+ const unsigned char ch = (unsigned char) (s[*pos]);
1237
+
1238
+ /*
1239
+ * look up the parser, and call it
1240
+ *
1241
+ * Porting Note: this is mapping of char to function
1242
+ * charparsers[ch]()
1243
+ */
1244
+ fnptr = char_parse_map[ch];
1245
+
1246
+ *pos = (*fnptr) (sf);
1247
+
1248
+ /*
1249
+ *
1250
+ */
1251
+ if (current->type != CHAR_NULL) {
1252
+ sf->stats_tokens += 1;
1253
+ return TRUE;
1254
+ }
1255
+ }
1256
+ return FALSE;
1257
+ }
1258
+
1259
+ void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
1260
+ {
1261
+ if (flags == 0) {
1262
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1263
+ }
1264
+
1265
+ memset(sf, 0, sizeof(struct libinjection_sqli_state));
1266
+ sf->s = s;
1267
+ sf->slen = len;
1268
+ sf->lookup = libinjection_sqli_lookup_word;
1269
+ sf->userdata = 0;
1270
+ sf->flags = flags;
1271
+ sf->current = &(sf->tokenvec[0]);
1272
+ }
1273
+
1274
+ void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
1275
+ {
1276
+ void *userdata = sf->userdata;
1277
+ ptr_lookup_fn lookup = sf->lookup;;
1278
+
1279
+ if (flags == 0) {
1280
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1281
+ }
1282
+ libinjection_sqli_init(sf, sf->s, sf->slen, flags);
1283
+ sf->lookup = lookup;
1284
+ sf->userdata = userdata;
1285
+ }
1286
+
1287
+ void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
1288
+ {
1289
+ if (fn == NULL) {
1290
+ sf->lookup = libinjection_sqli_lookup_word;
1291
+ sf->userdata = (void*)(NULL);
1292
+ } else {
1293
+ sf->lookup = fn;
1294
+ sf->userdata = userdata;
1295
+ }
1296
+ }
1297
+
1298
+ /** See if two tokens can be merged since they are compound SQL phrases.
1299
+ *
1300
+ * This takes two tokens, and, if they are the right type,
1301
+ * merges their values together. Then checks to see if the
1302
+ * new value is special using the PHRASES mapping.
1303
+ *
1304
+ * Example: "UNION" + "ALL" ==> "UNION ALL"
1305
+ *
1306
+ * C Security Notes: this is safe to use C-strings (null-terminated)
1307
+ * since the types involved by definition do not have embedded nulls
1308
+ * (e.g. there is no keyword with embedded null)
1309
+ *
1310
+ * Porting Notes: since this is C, it's oddly complicated.
1311
+ * This is just: multikeywords[token.value + ' ' + token2.value]
1312
+ *
1313
+ */
1314
+ static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
1315
+ {
1316
+ size_t sz1;
1317
+ size_t sz2;
1318
+ size_t sz3;
1319
+ char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
1320
+ char ch;
1321
+
1322
+ /* first token is of right type? */
1323
+ if (!
1324
+ (a->type == TYPE_KEYWORD ||
1325
+ a->type == TYPE_BAREWORD ||
1326
+ a->type == TYPE_OPERATOR ||
1327
+ a->type == TYPE_UNION ||
1328
+ a->type == TYPE_FUNCTION ||
1329
+ a->type == TYPE_EXPRESSION ||
1330
+ a->type == TYPE_TSQL ||
1331
+ a->type == TYPE_SQLTYPE)) {
1332
+ return FALSE;
1333
+ }
1334
+
1335
+ if (!
1336
+ (b->type == TYPE_KEYWORD ||
1337
+ b->type == TYPE_BAREWORD ||
1338
+ b->type == TYPE_OPERATOR ||
1339
+ b->type == TYPE_UNION ||
1340
+ b->type == TYPE_FUNCTION ||
1341
+ b->type == TYPE_EXPRESSION ||
1342
+ b->type == TYPE_TSQL ||
1343
+ b->type == TYPE_SQLTYPE ||
1344
+ b->type == TYPE_LOGIC_OPERATOR)) {
1345
+ return FALSE;
1346
+ }
1347
+
1348
+ sz1 = a->len;
1349
+ sz2 = b->len;
1350
+ sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
1351
+ if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
1352
+ return FALSE;
1353
+ }
1354
+ /*
1355
+ * oddly annoying last.val + ' ' + current.val
1356
+ */
1357
+ memcpy(tmp, a->val, sz1);
1358
+ tmp[sz1] = ' ';
1359
+ memcpy(tmp + sz1 + 1, b->val, sz2);
1360
+ tmp[sz3] = CHAR_NULL;
1361
+ ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
1362
+
1363
+ if (ch != CHAR_NULL) {
1364
+ st_assign(a, ch, a->pos, sz3, tmp);
1365
+ return TRUE;
1366
+ } else {
1367
+ return FALSE;
1368
+ }
1369
+ }
1370
+
1371
+ int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
1372
+ {
1373
+ stoken_t last_comment;
1374
+
1375
+ /* POS is the position of where the NEXT token goes */
1376
+ size_t pos = 0;
1377
+
1378
+ /* LEFT is a count of how many tokens that are already
1379
+ folded or processed (i.e. part of the fingerprint) */
1380
+ size_t left = 0;
1381
+
1382
+ int more = 1;
1383
+
1384
+ st_clear(&last_comment);
1385
+
1386
+ /* Skip all initial comments, right-parens ( and unary operators
1387
+ *
1388
+ */
1389
+ sf->current = &(sf->tokenvec[0]);
1390
+ while (more) {
1391
+ more = libinjection_sqli_tokenize(sf);
1392
+ if ( ! (sf->current->type == TYPE_COMMENT ||
1393
+ sf->current->type == TYPE_LEFTPARENS ||
1394
+ sf->current->type == TYPE_SQLTYPE ||
1395
+ st_is_unary_op(sf->current))) {
1396
+ break;
1397
+ }
1398
+ }
1399
+
1400
+ if (! more) {
1401
+ /* If input was only comments, unary or (, then exit */
1402
+ return 0;
1403
+ } else {
1404
+ /* it's some other token */
1405
+ pos += 1;
1406
+ }
1407
+
1408
+ while (1) {
1409
+ FOLD_DEBUG;
1410
+
1411
+ /* do we have all the max number of tokens? if so do
1412
+ * some special cases for 5 tokens
1413
+ */
1414
+ if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
1415
+ if (
1416
+ (
1417
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1418
+ (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
1419
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1420
+ sf->tokenvec[3].type == TYPE_NUMBER &&
1421
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1422
+ ) ||
1423
+ (
1424
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1425
+ sf->tokenvec[1].type == TYPE_OPERATOR &&
1426
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1427
+ (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
1428
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS
1429
+ ) ||
1430
+ (
1431
+ sf->tokenvec[0].type == TYPE_NUMBER &&
1432
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1433
+ sf->tokenvec[2].type == TYPE_COMMA &&
1434
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1435
+ sf->tokenvec[4].type == TYPE_NUMBER
1436
+ ) ||
1437
+ (
1438
+ sf->tokenvec[0].type == TYPE_BAREWORD &&
1439
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1440
+ sf->tokenvec[2].type == TYPE_OPERATOR &&
1441
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1442
+ sf->tokenvec[4].type == TYPE_BAREWORD
1443
+ )
1444
+ )
1445
+ {
1446
+ if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
1447
+ st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
1448
+ pos = 2;
1449
+ left = 0;
1450
+ } else {
1451
+ pos = 1;
1452
+ left = 0;
1453
+ }
1454
+ }
1455
+ }
1456
+
1457
+ if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
1458
+ left = pos;
1459
+ break;
1460
+ }
1461
+
1462
+ /* get up to two tokens */
1463
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
1464
+ sf->current = &(sf->tokenvec[pos]);
1465
+ more = libinjection_sqli_tokenize(sf);
1466
+ if (more) {
1467
+ if (sf->current->type == TYPE_COMMENT) {
1468
+ st_copy(&last_comment, sf->current);
1469
+ } else {
1470
+ last_comment.type = CHAR_NULL;
1471
+ pos += 1;
1472
+ }
1473
+ }
1474
+ }
1475
+ FOLD_DEBUG;
1476
+ /* did we get 2 tokens? if not then we are done */
1477
+ if (pos - left < 2) {
1478
+ left = pos;
1479
+ continue;
1480
+ }
1481
+
1482
+ /* FOLD: "ss" -> "s"
1483
+ * "foo" "bar" is valid SQL
1484
+ * just ignore second string
1485
+ */
1486
+ if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
1487
+ pos -= 1;
1488
+ sf->stats_folds += 1;
1489
+ continue;
1490
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
1491
+ /* not sure how various engines handle
1492
+ * 'select 1;;drop table foo' or
1493
+ * 'select 1; /x foo x/; drop table foo'
1494
+ * to prevent surprises, just fold away repeated semicolons
1495
+ */
1496
+ pos -= 1;
1497
+ sf->stats_folds += 1;
1498
+ continue;
1499
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
1500
+ sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
1501
+ (st_is_unary_op(&sf->tokenvec[left+1]) ||
1502
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
1503
+ pos -= 1;
1504
+ sf->stats_folds += 1;
1505
+ left = 0;
1506
+ continue;
1507
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1508
+ st_is_unary_op(&sf->tokenvec[left+1])) {
1509
+ pos -= 1;
1510
+ sf->stats_folds += 1;
1511
+ if (left > 0) {
1512
+ left -= 1;
1513
+ }
1514
+ continue;
1515
+ } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
1516
+ pos -= 1;
1517
+ sf->stats_folds += 1;
1518
+ if (left > 0) {
1519
+ left -= 1;
1520
+ }
1521
+ continue;
1522
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1523
+ sf->tokenvec[left+1].type == TYPE_FUNCTION &&
1524
+ (sf->tokenvec[left+1].val[0] == 'I' ||
1525
+ sf->tokenvec[left+1].val[0] == 'i' ) &&
1526
+ (sf->tokenvec[left+1].val[1] == 'F' ||
1527
+ sf->tokenvec[left+1].val[1] == 'f' )) {
1528
+ /* IF is normally a function, except in Transact-SQL where it can be used as a
1529
+ * standalone control flow operator, e.g. ; IF 1=1 ...
1530
+ * if found after a semicolon, convert from 'f' type to 'T' type
1531
+ */
1532
+ sf->tokenvec[left+1].type = TYPE_TSQL;
1533
+ /* left += 2; */
1534
+ continue; /* reparse everything, but we probably can advance left, and pos */
1535
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
1536
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
1537
+ /* TSQL functions but common enough to be column names */
1538
+ cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1539
+ cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1540
+
1541
+ /* Function in MYSQL */
1542
+ cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1543
+ cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1544
+ cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1545
+
1546
+ /* Mysql words that act as a variable and are a function */
1547
+
1548
+ /* TSQL current_users is fake-variable */
1549
+ /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
1550
+ cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1551
+ cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1552
+ cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1553
+ cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1554
+ cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1555
+ cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1556
+ )) {
1557
+
1558
+ /* pos is the same
1559
+ * other conversions need to go here... for instance
1560
+ * password CAN be a function, coalesce CAN be a function
1561
+ */
1562
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1563
+ continue;
1564
+ } else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
1565
+ cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1566
+ cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1567
+ )) {
1568
+
1569
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1570
+ /* got .... IN ( ... (or 'NOT IN')
1571
+ * it's an operator
1572
+ */
1573
+ sf->tokenvec[left].type = TYPE_OPERATOR;
1574
+ } else {
1575
+ /*
1576
+ * it's a nothing
1577
+ */
1578
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1579
+ }
1580
+
1581
+ /* "IN" can be used as "IN BOOLEAN MODE" for mysql
1582
+ * in which case merging of words can be done later
1583
+ * other wise it acts as an equality operator __ IN (values..)
1584
+ *
1585
+ * here we got "IN" "(" so it's an operator.
1586
+ * also back track to handle "NOT IN"
1587
+ * might need to do the same with like
1588
+ * two use cases "foo" LIKE "BAR" (normal operator)
1589
+ * "foo" = LIKE(1,2)
1590
+ */
1591
+ continue;
1592
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
1593
+ cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1594
+ cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
1595
+ if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1596
+ /* SELECT LIKE(...
1597
+ * it's a function
1598
+ */
1599
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1600
+ }
1601
+ } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
1602
+ (sf->tokenvec[left+1].type == TYPE_BAREWORD ||
1603
+ sf->tokenvec[left+1].type == TYPE_NUMBER ||
1604
+ sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
1605
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
1606
+ sf->tokenvec[left+1].type == TYPE_FUNCTION ||
1607
+ sf->tokenvec[left+1].type == TYPE_VARIABLE ||
1608
+ sf->tokenvec[left+1].type == TYPE_STRING)) {
1609
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1610
+ pos -= 1;
1611
+ sf->stats_folds += 1;
1612
+ left = 0;
1613
+ continue;
1614
+ } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
1615
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1616
+ /*
1617
+ * there are too many collation types.. so if the bareword has a "_"
1618
+ * then it's TYPE_SQLTYPE
1619
+ */
1620
+ if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
1621
+ sf->tokenvec[left+1].type = TYPE_SQLTYPE;
1622
+ left = 0;
1623
+ }
1624
+ } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
1625
+ if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
1626
+ /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
1627
+ sf->tokenvec[left].type = TYPE_NUMBER;
1628
+ } else {
1629
+ /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
1630
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1631
+ pos -= 1;
1632
+ sf->stats_folds += 1;
1633
+ }
1634
+ left = 0;
1635
+ continue;
1636
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1637
+ sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1638
+ pos -= 1;
1639
+ left = 0;
1640
+ sf->stats_folds += 1;
1641
+ continue;
1642
+ } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
1643
+ sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
1644
+ pos -= 1;
1645
+ left = 0;
1646
+ sf->stats_folds += 1;
1647
+ continue;
1648
+ } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
1649
+ sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1650
+
1651
+ /*
1652
+ * MySQL Degenerate case --
1653
+ *
1654
+ * select { ``.``.id }; -- valid !!!
1655
+ * select { ``.``.``.id }; -- invalid
1656
+ * select ``.``.id; -- invalid
1657
+ * select { ``.id }; -- invalid
1658
+ *
1659
+ * so it appears {``.``.id} is a magic case
1660
+ * I suspect this is "current database, current table, field id"
1661
+ *
1662
+ * The folding code can't look at more than 3 tokens, and
1663
+ * I don't want to make two passes.
1664
+ *
1665
+ * Since "{ ``" so rare, we are just going to blacklist it.
1666
+ *
1667
+ * Highly likely this will need revisiting!
1668
+ *
1669
+ * CREDIT @rsalgado 2013-11-25
1670
+ */
1671
+ if (sf->tokenvec[left+1].len == 0) {
1672
+ sf->tokenvec[left+1].type = TYPE_EVIL;
1673
+ return (int)(left+2);
1674
+ }
1675
+ /* weird ODBC / MYSQL {foo expr} --> expr
1676
+ * but for this rule we just strip away the "{ foo" part
1677
+ */
1678
+ left = 0;
1679
+ pos -= 2;
1680
+ sf->stats_folds += 2;
1681
+ continue;
1682
+ } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
1683
+ pos -= 1;
1684
+ left = 0;
1685
+ sf->stats_folds += 1;
1686
+ continue;
1687
+ }
1688
+
1689
+ /* all cases of handing 2 tokens is done
1690
+ and nothing matched. Get one more token
1691
+ */
1692
+ FOLD_DEBUG;
1693
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
1694
+ sf->current = &(sf->tokenvec[pos]);
1695
+ more = libinjection_sqli_tokenize(sf);
1696
+ if (more) {
1697
+ if (sf->current->type == TYPE_COMMENT) {
1698
+ st_copy(&last_comment, sf->current);
1699
+ } else {
1700
+ last_comment.type = CHAR_NULL;
1701
+ pos += 1;
1702
+ }
1703
+ }
1704
+ }
1705
+
1706
+ /* do we have three tokens? If not then we are done */
1707
+ if (pos -left < 3) {
1708
+ left = pos;
1709
+ continue;
1710
+ }
1711
+
1712
+ /*
1713
+ * now look for three token folding
1714
+ */
1715
+ if (sf->tokenvec[left].type == TYPE_NUMBER &&
1716
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1717
+ sf->tokenvec[left+2].type == TYPE_NUMBER) {
1718
+ pos -= 2;
1719
+ left = 0;
1720
+ continue;
1721
+ } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
1722
+ sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
1723
+ sf->tokenvec[left+2].type == TYPE_OPERATOR) {
1724
+ left = 0;
1725
+ pos -= 2;
1726
+ continue;
1727
+ } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
1728
+ sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
1729
+ pos -= 2;
1730
+ left = 0;
1731
+ continue;
1732
+ } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
1733
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1734
+ (sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1735
+ sf->tokenvec[left+2].type == TYPE_NUMBER ||
1736
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1737
+ pos -= 2;
1738
+ left = 0;
1739
+ continue;
1740
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1741
+ sf->tokenvec[left].type == TYPE_NUMBER ) &&
1742
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1743
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1744
+ sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1745
+ pos -= 2;
1746
+ left = 0;
1747
+ continue;
1748
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1749
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1750
+ sf->tokenvec[left].type == TYPE_VARIABLE ||
1751
+ sf->tokenvec[left].type == TYPE_STRING) &&
1752
+ sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1753
+ streq(sf->tokenvec[left+1].val, "::") &&
1754
+ sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
1755
+ pos -= 2;
1756
+ left = 0;
1757
+ sf->stats_folds += 2;
1758
+ continue;
1759
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1760
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1761
+ sf->tokenvec[left].type == TYPE_STRING ||
1762
+ sf->tokenvec[left].type == TYPE_VARIABLE) &&
1763
+ sf->tokenvec[left+1].type == TYPE_COMMA &&
1764
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1765
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1766
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1767
+ sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
1768
+ pos -= 2;
1769
+ left = 0;
1770
+ continue;
1771
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
1772
+ sf->tokenvec[left].type == TYPE_GROUP ||
1773
+ sf->tokenvec[left].type == TYPE_COMMA) &&
1774
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1775
+ sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
1776
+ /* got something like SELECT + (, LIMIT + (
1777
+ * remove unary operator
1778
+ */
1779
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1780
+ pos -= 1;
1781
+ left = 0;
1782
+ continue;
1783
+ } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
1784
+ sf->tokenvec[left].type == TYPE_EXPRESSION ||
1785
+ sf->tokenvec[left].type == TYPE_GROUP ) &&
1786
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1787
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1788
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1789
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1790
+ sf->tokenvec[left+2].type == TYPE_STRING ||
1791
+ sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
1792
+ /* remove unary operators
1793
+ * select - 1
1794
+ */
1795
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1796
+ pos -= 1;
1797
+ left = 0;
1798
+ continue;
1799
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1800
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1801
+ (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1802
+ sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1803
+ sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1804
+ sf->tokenvec[left+2].type == TYPE_STRING)) {
1805
+ /*
1806
+ * interesting case turn ", -1" ->> ",1" PLUS we need to back up
1807
+ * one token if possible to see if more folding can be done
1808
+ * "1,-1" --> "1"
1809
+ */
1810
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1811
+ left = 0;
1812
+ /* pos is >= 3 so this is safe */
1813
+ assert(pos >= 3);
1814
+ pos -= 3;
1815
+ continue;
1816
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1817
+ st_is_unary_op(&sf->tokenvec[left+1]) &&
1818
+ sf->tokenvec[left+2].type == TYPE_FUNCTION) {
1819
+
1820
+ /* Separate case from above since you end up with
1821
+ * 1,-sin(1) --> 1 (1)
1822
+ * Here, just do
1823
+ * 1,-sin(1) --> 1,sin(1)
1824
+ * just remove unary operator
1825
+ */
1826
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1827
+ pos -= 1;
1828
+ left = 0;
1829
+ continue;
1830
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
1831
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1832
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1833
+ /* ignore the '.n'
1834
+ * typically is this databasename.table
1835
+ */
1836
+ assert(pos >= 3);
1837
+ pos -= 2;
1838
+ left = 0;
1839
+ continue;
1840
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
1841
+ (sf->tokenvec[left+1].type == TYPE_DOT) &&
1842
+ (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1843
+ /* select . `foo` --> select `foo` */
1844
+ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1845
+ pos -= 1;
1846
+ left = 0;
1847
+ continue;
1848
+ } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
1849
+ (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) &&
1850
+ (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) {
1851
+ /*
1852
+ * whats going on here
1853
+ * Some SQL functions like USER() have 0 args
1854
+ * if we get User(foo), then User is not a function
1855
+ * This should be expanded since it eliminated a lot of false
1856
+ * positives.
1857
+ */
1858
+ if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) {
1859
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1860
+ }
1861
+ }
1862
+
1863
+ /* no folding -- assume left-most token is
1864
+ is good, now use the existing 2 tokens --
1865
+ do not get another
1866
+ */
1867
+
1868
+ left += 1;
1869
+
1870
+ } /* while(1) */
1871
+
1872
+ /* if we have 4 or less tokens, and we had a comment token
1873
+ * at the end, add it back
1874
+ */
1875
+
1876
+ if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
1877
+ st_copy(&sf->tokenvec[left], &last_comment);
1878
+ left += 1;
1879
+ }
1880
+
1881
+ /* sometimes we grab a 6th token to help
1882
+ determine the type of token 5.
1883
+ */
1884
+ if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
1885
+ left = LIBINJECTION_SQLI_MAX_TOKENS;
1886
+ }
1887
+
1888
+ return (int)left;
1889
+ }
1890
+
1891
+ /* secondary api: detects SQLi in a string, GIVEN a context.
1892
+ *
1893
+ * A context can be:
1894
+ * * CHAR_NULL (\0), process as is
1895
+ * * CHAR_SINGLE ('), process pretending input started with a
1896
+ * single quote.
1897
+ * * CHAR_DOUBLE ("), process pretending input started with a
1898
+ * double quote.
1899
+ *
1900
+ */
1901
+ const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
1902
+ {
1903
+ int i;
1904
+ int tlen = 0;
1905
+
1906
+ libinjection_sqli_reset(sql_state, flags);
1907
+
1908
+ tlen = libinjection_sqli_fold(sql_state);
1909
+
1910
+ /* Check for magic PHP backquote comment
1911
+ * If:
1912
+ * * last token is of type "bareword"
1913
+ * * And is quoted in a backtick
1914
+ * * And isn't closed
1915
+ * * And it's empty?
1916
+ * Then convert it to comment
1917
+ */
1918
+ if (tlen > 2 &&
1919
+ sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
1920
+ sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
1921
+ sql_state->tokenvec[tlen-1].len == 0 &&
1922
+ sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
1923
+ sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
1924
+ }
1925
+
1926
+ for (i = 0; i < tlen; ++i) {
1927
+ sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
1928
+ }
1929
+
1930
+ /*
1931
+ * make the fingerprint pattern a c-string (null delimited)
1932
+ */
1933
+ sql_state->fingerprint[tlen] = CHAR_NULL;
1934
+
1935
+ /*
1936
+ * check for 'X' in pattern, and then
1937
+ * clear out all tokens
1938
+ *
1939
+ * this means parsing could not be done
1940
+ * accurately due to pgsql's double comments
1941
+ * or other syntax that isn't consistent.
1942
+ * Should be very rare false positive
1943
+ */
1944
+ if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
1945
+ /* needed for SWIG */
1946
+ memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
1947
+ memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
1948
+
1949
+ sql_state->fingerprint[0] = TYPE_EVIL;
1950
+
1951
+ sql_state->tokenvec[0].type = TYPE_EVIL;
1952
+ sql_state->tokenvec[0].val[0] = TYPE_EVIL;
1953
+ sql_state->tokenvec[1].type = CHAR_NULL;
1954
+ }
1955
+
1956
+
1957
+ return sql_state->fingerprint;
1958
+ }
1959
+
1960
+ int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
1961
+ {
1962
+ return libinjection_sqli_blacklist(sql_state) &&
1963
+ libinjection_sqli_not_whitelist(sql_state);
1964
+ }
1965
+
1966
+ char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
1967
+ const char* str, size_t len)
1968
+ {
1969
+ if (lookup_type == LOOKUP_FINGERPRINT) {
1970
+ return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
1971
+ } else {
1972
+ return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
1973
+ }
1974
+ }
1975
+
1976
+ int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
1977
+ {
1978
+ /*
1979
+ * use minimum of 8 bytes to make sure gcc -fstack-protector
1980
+ * works correctly
1981
+ */
1982
+ char fp2[8];
1983
+ char ch;
1984
+ size_t i;
1985
+ size_t len = strlen(sql_state->fingerprint);
1986
+ int patmatch;
1987
+
1988
+ if (len < 1) {
1989
+ sql_state->reason = __LINE__;
1990
+ return FALSE;
1991
+ }
1992
+
1993
+ /*
1994
+ to keep everything compatible, convert the
1995
+ v0 fingerprint pattern to v1
1996
+ v0: up to 5 chars, mixed case
1997
+ v1: 1 char is '0', up to 5 more chars, upper case
1998
+ */
1999
+
2000
+ fp2[0] = '0';
2001
+ for (i = 0; i < len; ++i) {
2002
+ ch = sql_state->fingerprint[i];
2003
+ if (ch >= 'a' && ch <= 'z') {
2004
+ ch -= 0x20;
2005
+ }
2006
+ fp2[i+1] = ch;
2007
+ }
2008
+ fp2[i+1] = '\0';
2009
+
2010
+ patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
2011
+
2012
+ /*
2013
+ * No match.
2014
+ *
2015
+ * Set sql_state->reason to current line number
2016
+ * only for debugging purposes.
2017
+ */
2018
+ if (!patmatch) {
2019
+ sql_state->reason = __LINE__;
2020
+ return FALSE;
2021
+ }
2022
+
2023
+ return TRUE;
2024
+ }
2025
+
2026
+ /*
2027
+ * return TRUE if SQLi, false is benign
2028
+ */
2029
+ int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
2030
+ {
2031
+ /*
2032
+ * We assume we got a SQLi match
2033
+ * This next part just helps reduce false positives.
2034
+ *
2035
+ */
2036
+ char ch;
2037
+ size_t tlen = strlen(sql_state->fingerprint);
2038
+
2039
+ if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
2040
+ /*
2041
+ * if ending comment is contains 'sp_password' then it's SQLi!
2042
+ * MS Audit log apparently ignores anything with
2043
+ * 'sp_password' in it. Unable to find primary reference to
2044
+ * this "feature" of SQL Server but seems to be known SQLi
2045
+ * technique
2046
+ */
2047
+ if (my_memmem(sql_state->s, sql_state->slen,
2048
+ "sp_password", strlen("sp_password"))) {
2049
+ sql_state->reason = __LINE__;
2050
+ return TRUE;
2051
+ }
2052
+ }
2053
+
2054
+ switch (tlen) {
2055
+ case 2:{
2056
+ /*
2057
+ * case 2 are "very small SQLi" which make them
2058
+ * hard to tell from normal input...
2059
+ */
2060
+
2061
+ if (sql_state->fingerprint[1] == TYPE_UNION) {
2062
+ if (sql_state->stats_tokens == 2) {
2063
+ /* not sure why but 1U comes up in SQLi attack
2064
+ * likely part of parameter splitting/etc.
2065
+ * lots of reasons why "1 union" might be normal
2066
+ * input, so beep only if other SQLi things are present
2067
+ */
2068
+ /* it really is a number and 'union'
2069
+ * other wise it has folding or comments
2070
+ */
2071
+ sql_state->reason = __LINE__;
2072
+ return FALSE;
2073
+ } else {
2074
+ sql_state->reason = __LINE__;
2075
+ return TRUE;
2076
+ }
2077
+ }
2078
+ /*
2079
+ * if 'comment' is '#' ignore.. too many FP
2080
+ */
2081
+ if (sql_state->tokenvec[1].val[0] == '#') {
2082
+ sql_state->reason = __LINE__;
2083
+ return FALSE;
2084
+ }
2085
+
2086
+ /*
2087
+ * for fingerprint like 'nc', only comments of /x are treated
2088
+ * as SQL... ending comments of "--" and "#" are not SQLi
2089
+ */
2090
+ if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
2091
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2092
+ sql_state->tokenvec[1].val[0] != '/') {
2093
+ sql_state->reason = __LINE__;
2094
+ return FALSE;
2095
+ }
2096
+
2097
+ /*
2098
+ * if '1c' ends with '/x' then it's SQLi
2099
+ */
2100
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2101
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2102
+ sql_state->tokenvec[1].val[0] == '/') {
2103
+ return TRUE;
2104
+ }
2105
+
2106
+ /**
2107
+ * there are some odd base64-looking query string values
2108
+ * 1234-ABCDEFEhfhihwuefi--
2109
+ * which evaluate to "1c"... these are not SQLi
2110
+ * but 1234-- probably is.
2111
+ * Make sure the "1" in "1c" is actually a true decimal number
2112
+ *
2113
+ * Need to check -original- string since the folding step
2114
+ * may have merged tokens, e.g. "1+FOO" is folded into "1"
2115
+ *
2116
+ * Note: evasion: 1*1--
2117
+ */
2118
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2119
+ sql_state->tokenvec[1].type == TYPE_COMMENT) {
2120
+ if (sql_state->stats_tokens > 2) {
2121
+ /* we have some folding going on, highly likely SQLi */
2122
+ sql_state->reason = __LINE__;
2123
+ return TRUE;
2124
+ }
2125
+ /*
2126
+ * we check that next character after the number is either whitespace,
2127
+ * or '/' or a '-' ==> SQLi.
2128
+ */
2129
+ ch = sql_state->s[sql_state->tokenvec[0].len];
2130
+ if ( ch <= 32 ) {
2131
+ /* next char was whitespace,e.g. "1234 --"
2132
+ * this isn't exactly correct.. ideally we should skip over all whitespace
2133
+ * but this seems to be ok for now
2134
+ */
2135
+ return TRUE;
2136
+ }
2137
+ if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
2138
+ return TRUE;
2139
+ }
2140
+ if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
2141
+ return TRUE;
2142
+ }
2143
+
2144
+ sql_state->reason = __LINE__;
2145
+ return FALSE;
2146
+ }
2147
+
2148
+ /*
2149
+ * detect obvious SQLi scans.. many people put '--' in plain text
2150
+ * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
2151
+ */
2152
+ if ((sql_state->tokenvec[1].len > 2)
2153
+ && sql_state->tokenvec[1].val[0] == '-') {
2154
+ sql_state->reason = __LINE__;
2155
+ return FALSE;
2156
+ }
2157
+
2158
+ break;
2159
+ } /* case 2 */
2160
+ case 3:{
2161
+ /*
2162
+ * ...foo' + 'bar...
2163
+ * no opening quote, no closing quote
2164
+ * and each string has data
2165
+ */
2166
+
2167
+ if (streq(sql_state->fingerprint, "sos")
2168
+ || streq(sql_state->fingerprint, "s&s")) {
2169
+
2170
+ if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
2171
+ && (sql_state->tokenvec[2].str_close == CHAR_NULL)
2172
+ && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
2173
+ /*
2174
+ * if ....foo" + "bar....
2175
+ */
2176
+ sql_state->reason = __LINE__;
2177
+ return TRUE;
2178
+ }
2179
+ if (sql_state->stats_tokens == 3) {
2180
+ sql_state->reason = __LINE__;
2181
+ return FALSE;
2182
+ }
2183
+
2184
+ /*
2185
+ * not SQLi
2186
+ */
2187
+ sql_state->reason = __LINE__;
2188
+ return FALSE;
2189
+ } else if (streq(sql_state->fingerprint, "s&n") ||
2190
+ streq(sql_state->fingerprint, "n&1") ||
2191
+ streq(sql_state->fingerprint, "1&1") ||
2192
+ streq(sql_state->fingerprint, "1&v") ||
2193
+ streq(sql_state->fingerprint, "1&s")) {
2194
+ /* 'sexy and 17' not SQLi
2195
+ * 'sexy and 17<18' SQLi
2196
+ */
2197
+ if (sql_state->stats_tokens == 3) {
2198
+ sql_state->reason = __LINE__;
2199
+ return FALSE;
2200
+ }
2201
+ } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
2202
+ if ((sql_state->tokenvec[1].len < 5) ||
2203
+ cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
2204
+ /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
2205
+ * then treat as safe
2206
+ */
2207
+ sql_state->reason = __LINE__;
2208
+ return FALSE;
2209
+ }
2210
+ }
2211
+ break;
2212
+ } /* case 3 */
2213
+ case 4:
2214
+ case 5: {
2215
+ /* nothing right now */
2216
+ break;
2217
+ } /* case 5 */
2218
+ } /* end switch */
2219
+
2220
+ return TRUE;
2221
+ }
2222
+
2223
+ /** Main API, detects SQLi in an input.
2224
+ *
2225
+ *
2226
+ */
2227
+ static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
2228
+ {
2229
+ return sql_state->stats_comment_ddx ||
2230
+ sql_state->stats_comment_hash;
2231
+ }
2232
+
2233
+ /*
2234
+ * This function is mostly use with SWIG
2235
+ */
2236
+ struct libinjection_sqli_token*
2237
+ libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
2238
+ {
2239
+ if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
2240
+ return NULL;
2241
+ }
2242
+ return &(sql_state->tokenvec[i]);
2243
+ }
2244
+
2245
+ int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
2246
+ {
2247
+ const char *s = sql_state->s;
2248
+ size_t slen = sql_state->slen;
2249
+
2250
+ /*
2251
+ * no input? not SQLi
2252
+ */
2253
+ if (slen == 0) {
2254
+ return FALSE;
2255
+ }
2256
+
2257
+ /*
2258
+ * test input "as-is"
2259
+ */
2260
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
2261
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2262
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2263
+ return TRUE;
2264
+ } else if (reparse_as_mysql(sql_state)) {
2265
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
2266
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2267
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2268
+ return TRUE;
2269
+ }
2270
+ }
2271
+
2272
+ /*
2273
+ * if input has a single_quote, then
2274
+ * test as if input was actually '
2275
+ * example: if input if "1' = 1", then pretend it's
2276
+ * "'1' = 1"
2277
+ * Porting Notes: example the same as doing
2278
+ * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
2279
+ *
2280
+ */
2281
+ if (memchr(s, CHAR_SINGLE, slen)) {
2282
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
2283
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2284
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2285
+ return TRUE;
2286
+ } else if (reparse_as_mysql(sql_state)) {
2287
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
2288
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2289
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2290
+ return TRUE;
2291
+ }
2292
+ }
2293
+ }
2294
+
2295
+ /*
2296
+ * same as above but with a double-quote "
2297
+ */
2298
+ if (memchr(s, CHAR_DOUBLE, slen)) {
2299
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
2300
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2301
+ sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2302
+ return TRUE;
2303
+ }
2304
+ }
2305
+
2306
+ /*
2307
+ * Hurray, input is not SQLi
2308
+ */
2309
+ return FALSE;
2310
+ }
2311
+
2312
+ int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
2313
+ {
2314
+ int issqli;
2315
+ struct libinjection_sqli_state state;
2316
+
2317
+ libinjection_sqli_init(&state, input, slen, 0);
2318
+ issqli = libinjection_is_sqli(&state);
2319
+ if (issqli) {
2320
+ strcpy(fingerprint, state.fingerprint);
2321
+ } else {
2322
+ fingerprint[0] = '\0';
2323
+ }
2324
+ return issqli;
2325
+ }