rack-libinjection 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ci.yml +55 -0
  3. data/CHANGELOG.md +112 -0
  4. data/GET_STARTED.md +418 -0
  5. data/LICENSE-libinjection.txt +33 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +68 -0
  8. data/SECURITY.md +65 -0
  9. data/ext/libinjection/extconf.rb +113 -0
  10. data/ext/libinjection/libinjection_ext.c +1132 -0
  11. data/ext/libinjection/vendor/libinjection/.vendored +5 -0
  12. data/ext/libinjection/vendor/libinjection/COPYING +33 -0
  13. data/ext/libinjection/vendor/libinjection/MIGRATION.md +393 -0
  14. data/ext/libinjection/vendor/libinjection/README.md +251 -0
  15. data/ext/libinjection/vendor/libinjection/src/libinjection.h +70 -0
  16. data/ext/libinjection/vendor/libinjection/src/libinjection_error.h +26 -0
  17. data/ext/libinjection/vendor/libinjection/src/libinjection_html5.c +830 -0
  18. data/ext/libinjection/vendor/libinjection/src/libinjection_html5.h +56 -0
  19. data/ext/libinjection/vendor/libinjection/src/libinjection_sqli.c +2342 -0
  20. data/ext/libinjection/vendor/libinjection/src/libinjection_sqli.h +297 -0
  21. data/ext/libinjection/vendor/libinjection/src/libinjection_sqli_data.h +9651 -0
  22. data/ext/libinjection/vendor/libinjection/src/libinjection_xss.c +1203 -0
  23. data/ext/libinjection/vendor/libinjection/src/libinjection_xss.h +23 -0
  24. data/lib/libinjection/version.rb +6 -0
  25. data/lib/libinjection.rb +31 -0
  26. data/lib/rack/libinjection.rb +586 -0
  27. data/lib/rack-libinjection.rb +3 -0
  28. data/samples/README.md +67 -0
  29. data/samples/libinjection_detect_raw_hot_path.rb +161 -0
  30. data/samples/rack_all_surfaces_hot_path.rb +198 -0
  31. data/samples/rack_params_hot_path.rb +166 -0
  32. data/samples/rack_query_hot_path.rb +176 -0
  33. data/samples/results/.gitkeep +0 -0
  34. data/script/fuzz_smoke.rb +39 -0
  35. data/script/vendor_libs.rb +227 -0
  36. data/test/test_helper.rb +7 -0
  37. data/test/test_libinjection.rb +223 -0
  38. data/test/test_middleware.rb +404 -0
  39. metadata +148 -0
@@ -0,0 +1,2342 @@
1
+ /**
2
+ * Copyright 2012,2016 Nick Galbreath
3
+ * nickg@client9.com
4
+ *
5
+ * Copyright 2021 LibInjection
6
+ *
7
+ * BSD License -- see COPYING.txt for details
8
+ *
9
+ * https://github.com/libinjection/libinjection
10
+ *
11
+ */
12
+
13
+ #include <assert.h>
14
+ #include <ctype.h>
15
+ #include <stddef.h>
16
+ #include <stdio.h>
17
+ #include <stdlib.h>
18
+ #include <string.h>
19
+
20
+ #include "libinjection.h"
21
+ #include "libinjection_sqli.h"
22
+ #include "libinjection_sqli_data.h"
23
+
24
+ #ifndef LIBINJECTION_VERSION
25
+ /* Default version for embedding without autotools.
26
+ * Autotools and custom builds can override via -DLIBINJECTION_VERSION=\"...\"
27
+ */
28
+ #define LIBINJECTION_VERSION "4.0.0"
29
+ #endif
30
+
31
+ #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t *)(0))->val)
32
+ #define LIBINJECTION_SQLI_MAX_TOKENS 5
33
+
34
+ #ifndef TRUE
35
+ #define TRUE 1
36
+ #endif
37
+ #ifndef FALSE
38
+ #define FALSE 0
39
+ #endif
40
+
41
+ #define CHAR_NULL '\0'
42
+ #define CHAR_SINGLE '\''
43
+ #define CHAR_DOUBLE '"'
44
+ #define CHAR_TICK '`'
45
+
46
+ /* faster than calling out to libc isdigit */
47
+ #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
48
+
49
+ #ifdef DEBUG
50
+ #define FOLD_DEBUG \
51
+ printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, \
52
+ (int)left);
53
+ #else
54
+ #define FOLD_DEBUG
55
+ #endif
56
+
57
+ /*
58
+ * not making public just yet
59
+ */
60
+ typedef enum {
61
+ TYPE_NONE = 0,
62
+ TYPE_KEYWORD = (int)'k',
63
+ TYPE_UNION = (int)'U',
64
+ TYPE_GROUP = (int)'B',
65
+ TYPE_EXPRESSION = (int)'E',
66
+ TYPE_SQLTYPE = (int)'t',
67
+ TYPE_FUNCTION = (int)'f',
68
+ TYPE_BAREWORD = (int)'n',
69
+ TYPE_NUMBER = (int)'1',
70
+ TYPE_VARIABLE = (int)'v',
71
+ TYPE_STRING = (int)'s',
72
+ TYPE_OPERATOR = (int)'o',
73
+ TYPE_LOGIC_OPERATOR = (int)'&',
74
+ TYPE_COMMENT = (int)'c',
75
+ TYPE_COLLATE = (int)'A',
76
+ TYPE_LEFTPARENS = (int)'(',
77
+ TYPE_RIGHTPARENS = (int)')' /* not used? */
78
+ ,
79
+ TYPE_LEFTBRACE = (int)'{',
80
+ TYPE_RIGHTBRACE = (int)'}',
81
+ TYPE_DOT = (int)'.',
82
+ TYPE_COMMA = (int)',',
83
+ TYPE_COLON = (int)':',
84
+ TYPE_SEMICOLON = (int)';',
85
+ TYPE_TSQL = (int)'T' /* TSQL start */
86
+ ,
87
+ TYPE_UNKNOWN = (int)'?',
88
+ TYPE_EVIL = (int)'X' /* unparsable, abort */
89
+ ,
90
+ TYPE_FINGERPRINT = (int)'F' /* not really a token */
91
+ ,
92
+ TYPE_BACKSLASH = (int)'\\'
93
+ } sqli_token_types;
94
+
95
+ // prototype for is_backslash_escaped()
96
+ static int is_backslash_escaped(const char *end, const char *start);
97
+
98
+ /**
99
+ * Initializes parsing state
100
+ *
101
+ */
102
+ static char flag2delim(int flag) {
103
+ if (flag & FLAG_QUOTE_SINGLE) {
104
+ return CHAR_SINGLE;
105
+ } else if (flag & FLAG_QUOTE_DOUBLE) {
106
+ return CHAR_DOUBLE;
107
+ } else {
108
+ return CHAR_NULL;
109
+ }
110
+ }
111
+
112
+ /* memchr2 finds a string of 2 characters inside another string
113
+ * This a specialized version of "memmem" or "memchr".
114
+ * 'memmem' doesn't exist on all platforms
115
+ *
116
+ * Porting notes: this is just a special version of
117
+ * astring.find("AB")
118
+ *
119
+ */
120
+ static const char *memchr2(const char *haystack, size_t haystack_len, char c0,
121
+ char c1) {
122
+ const char *cur = haystack;
123
+ const char *last = haystack + haystack_len - 1;
124
+
125
+ if (haystack_len < 2) {
126
+ return NULL;
127
+ }
128
+
129
+ while (cur < last) {
130
+ /* safe since cur < len - 1 always */
131
+ if (cur[0] == c0 && cur[1] == c1) {
132
+ return cur;
133
+ }
134
+ cur += 1;
135
+ }
136
+
137
+ return NULL;
138
+ }
139
+
140
+ /**
141
+ * memmem might not exist on some systems
142
+ */
143
+ static const char *my_memmem(const char *haystack, size_t hlen,
144
+ const char *needle, size_t nlen) {
145
+ const char *cur;
146
+ const char *last;
147
+ assert(haystack);
148
+ assert(needle);
149
+ assert(nlen > 1);
150
+ last = haystack + hlen - nlen;
151
+ for (cur = haystack; cur <= last; ++cur) {
152
+ if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
153
+ return cur;
154
+ }
155
+ }
156
+ return NULL;
157
+ }
158
+
159
+ /** Find largest string containing certain characters.
160
+ *
161
+ * C Standard library 'strspn' only works for 'c-strings' (null terminated)
162
+ * This works on arbitrary length.
163
+ *
164
+ * Performance notes:
165
+ * not critical
166
+ *
167
+ * Porting notes:
168
+ * if accept is 'ABC', then this function would be similar to
169
+ * a_regexp.match(a_str, '[ABC]*'),
170
+ */
171
+ static size_t strlenspn(const char *s, size_t len, const char *accept) {
172
+ size_t i;
173
+ for (i = 0; i < len; ++i) {
174
+ /* likely we can do better by inlining this function
175
+ * but this works for now
176
+ */
177
+ if (strchr(accept, s[i]) == NULL) {
178
+ return i;
179
+ }
180
+ }
181
+ return len;
182
+ }
183
+
184
+ static size_t strlencspn(const char *s, size_t len, const char *accept) {
185
+ size_t i;
186
+ for (i = 0; i < len; ++i) {
187
+ /* likely we can do better by inlining this function
188
+ * but this works for now
189
+ */
190
+ if (strchr(accept, s[i]) != NULL) {
191
+ return i;
192
+ }
193
+ }
194
+ return len;
195
+ }
196
+ static int char_is_white(char ch) {
197
+ /* ' ' space is 0x32
198
+ '\t 0x09 \011 horizontal tab
199
+ '\n' 0x0a \012 new line
200
+ '\v' 0x0b \013 vertical tab
201
+ '\f' 0x0c \014 new page
202
+ '\r' 0x0d \015 carriage return
203
+ 0x00 \000 null (oracle)
204
+ 0xa0 \240 is Latin-1
205
+ */
206
+ return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
207
+ }
208
+
209
+ /* DANGER DANGER
210
+ * This is -very specialized function-
211
+ *
212
+ * this compares a ALL_UPPER CASE C STRING
213
+ * with a *arbitrary memory* + length
214
+ *
215
+ * Sane people would just make a copy, up-case
216
+ * and use a hash table.
217
+ *
218
+ * Required since libc version uses the current locale
219
+ * and is much slower.
220
+ */
221
+ static int cstrcasecmp(const char *a, const char *b, size_t n) {
222
+ char cb;
223
+
224
+ for (; n > 0; a++, b++, n--) {
225
+ cb = *b;
226
+ if (cb >= 'a' && cb <= 'z') {
227
+ cb -= 0x20;
228
+ }
229
+ if (*a != cb) {
230
+ return *a - cb;
231
+ } else if (*a == '\0') {
232
+ return -1;
233
+ }
234
+ }
235
+
236
+ return (*a == 0) ? 0 : 1;
237
+ }
238
+
239
+ /**
240
+ * Case sensitive string compare.
241
+ * Here only to make code more readable
242
+ */
243
+ static int streq(const char *a, const char *b) { return strcmp(a, b) == 0; }
244
+
245
+ /**
246
+ *
247
+ *
248
+ *
249
+ * Porting Notes:
250
+ * given a mapping/hash of string to char
251
+ * this is just
252
+ * typecode = mapping[key.upper()]
253
+ */
254
+
255
+ static char bsearch_keyword_type(const char *key, size_t len,
256
+ const keyword_t *keywords, size_t numb) {
257
+ size_t pos;
258
+ size_t left = 0;
259
+ size_t right = numb - 1;
260
+
261
+ while (left < right) {
262
+ pos = (left + right) >> 1;
263
+
264
+ /* arg0 = upper case only, arg1 = mixed case */
265
+ if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
266
+ left = pos + 1;
267
+ } else {
268
+ right = pos;
269
+ }
270
+ }
271
+ if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
272
+ return keywords[left].type;
273
+ } else {
274
+ return CHAR_NULL;
275
+ }
276
+ }
277
+
278
+ static char is_keyword(const char *key, size_t len) {
279
+ return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
280
+ }
281
+
282
+ /* st_token methods
283
+ *
284
+ * The following functions manipulates the stoken_t type
285
+ *
286
+ *
287
+ */
288
+
289
+ static void st_clear(stoken_t *st) { memset(st, 0, sizeof(stoken_t)); }
290
+
291
+ static void st_assign_char(stoken_t *st, const char stype, size_t pos,
292
+ size_t len, const char value) {
293
+ /* done to eliminate unused warning */
294
+ (void)len;
295
+ st->type = (char)stype;
296
+ st->pos = pos;
297
+ st->len = 1;
298
+ st->val[0] = value;
299
+ st->val[1] = CHAR_NULL;
300
+ }
301
+
302
+ static void st_assign(stoken_t *st, const char stype, size_t pos, size_t len,
303
+ const char *value) {
304
+ const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
305
+ size_t last = len < MSIZE ? len : (MSIZE - 1);
306
+ st->type = (char)stype;
307
+ st->pos = pos;
308
+ st->len = last;
309
+ memcpy(st->val, value, last);
310
+ st->val[last] = CHAR_NULL;
311
+ }
312
+
313
+ static void st_copy(stoken_t *dest, const stoken_t *src) {
314
+ memcpy(dest, src, sizeof(stoken_t));
315
+ }
316
+
317
+ static int st_is_arithmetic_op(const stoken_t *st) {
318
+ const char ch = st->val[0];
319
+ return (st->type == TYPE_OPERATOR && st->len == 1 &&
320
+ (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
321
+ }
322
+
323
+ static int st_is_unary_op(const stoken_t *st) {
324
+ const char *str = st->val;
325
+ const size_t len = st->len;
326
+
327
+ if (st->type != TYPE_OPERATOR) {
328
+ return FALSE;
329
+ }
330
+
331
+ switch (len) {
332
+ case 1:
333
+ return *str == '+' || *str == '-' || *str == '!' || *str == '~';
334
+ case 2:
335
+ return str[0] == '!' && str[1] == '!';
336
+ case 3:
337
+ return cstrcasecmp("NOT", str, 3) == 0;
338
+ default:
339
+ return FALSE;
340
+ }
341
+ }
342
+
343
+ /* Parsers
344
+ *
345
+ *
346
+ */
347
+
348
+ static size_t parse_white(struct libinjection_sqli_state
349
+ *sf) { // cppcheck-suppress constParameterCallback
350
+ return sf->pos + 1;
351
+ }
352
+
353
+ static size_t parse_operator1(struct libinjection_sqli_state *sf) {
354
+ const char *cs = sf->s;
355
+ size_t pos = sf->pos;
356
+
357
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
358
+ return pos + 1;
359
+ }
360
+
361
+ static size_t parse_other(struct libinjection_sqli_state *sf) {
362
+ const char *cs = sf->s;
363
+ size_t pos = sf->pos;
364
+
365
+ st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
366
+ return pos + 1;
367
+ }
368
+
369
+ static size_t parse_char(struct libinjection_sqli_state *sf) {
370
+ const char *cs = sf->s;
371
+ size_t pos = sf->pos;
372
+
373
+ st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
374
+ return pos + 1;
375
+ }
376
+
377
+ static size_t parse_eol_comment(struct libinjection_sqli_state *sf) {
378
+ const char *cs = sf->s;
379
+ const size_t slen = sf->slen;
380
+ size_t pos = sf->pos;
381
+
382
+ const char *endpos =
383
+ (const char *)memchr((const void *)(cs + pos), '\n', slen - pos);
384
+ if (endpos == NULL) {
385
+ st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
386
+ return slen;
387
+ } else {
388
+ st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos,
389
+ cs + pos);
390
+ return (size_t)((endpos - cs) + 1);
391
+ }
392
+ }
393
+
394
+ /** In ANSI mode, hash is an operator
395
+ * In MYSQL mode, it's a EOL comment like '--'
396
+ */
397
+ static size_t parse_hash(struct libinjection_sqli_state *sf) {
398
+ sf->stats_comment_hash += 1;
399
+ if (sf->flags & FLAG_SQL_MYSQL) {
400
+ sf->stats_comment_hash += 1;
401
+ return parse_eol_comment(sf);
402
+ } else {
403
+ st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
404
+ return sf->pos + 1;
405
+ }
406
+ }
407
+
408
+ static size_t parse_dash(struct libinjection_sqli_state *sf) {
409
+ const char *cs = sf->s;
410
+ const size_t slen = sf->slen;
411
+ size_t pos = sf->pos;
412
+
413
+ /*
414
+ * five cases
415
+ * 1) --[white] this is always a SQL comment
416
+ * 2) --[EOF] this is a comment
417
+ * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
418
+ * 4) --[notwhite] everyone else thinks this is a comment
419
+ * 5) -[not dash] '-' is a unary operator
420
+ */
421
+
422
+ if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos + 2])) {
423
+ return parse_eol_comment(sf);
424
+ } else if (pos + 2 == slen && cs[pos + 1] == '-') {
425
+ return parse_eol_comment(sf);
426
+ } else if (pos + 1 < slen && cs[pos + 1] == '-' &&
427
+ (sf->flags & FLAG_SQL_ANSI)) {
428
+ /* --[not-white] not-white case:
429
+ *
430
+ */
431
+ sf->stats_comment_ddx += 1;
432
+ return parse_eol_comment(sf);
433
+ } else {
434
+ st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
435
+ return pos + 1;
436
+ }
437
+ }
438
+
439
+ /** This detects MySQL comments, comments that
440
+ * start with /x! We just ban these now but
441
+ * previously we attempted to parse the inside
442
+ *
443
+ * For reference:
444
+ * the form of /x![anything]x/ or /x!12345[anything] x/
445
+ *
446
+ * Mysql 3 (maybe 4), allowed this:
447
+ * /x!0selectx/ 1;
448
+ * where 0 could be any number.
449
+ *
450
+ * The last version of MySQL 3 was in 2003.
451
+
452
+ * It is unclear if the MySQL 3 syntax was allowed
453
+ * in MySQL 4. The last version of MySQL 4 was in 2008
454
+ *
455
+ */
456
+ static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos) {
457
+ /* so far...
458
+ * cs[pos] == '/' && cs[pos+1] == '*'
459
+ */
460
+
461
+ if (pos + 2 >= len) {
462
+ /* not a mysql comment */
463
+ return 0;
464
+ }
465
+
466
+ if (cs[pos + 2] != '!') {
467
+ /* not a mysql comment */
468
+ return 0;
469
+ }
470
+
471
+ /*
472
+ * this is a mysql comment
473
+ * got "/x!"
474
+ */
475
+ return 1;
476
+ }
477
+
478
+ static size_t parse_slash(struct libinjection_sqli_state *sf) {
479
+ const char *ptr;
480
+ size_t clen;
481
+ const char *cs = sf->s;
482
+ const size_t slen = sf->slen;
483
+ size_t pos = sf->pos;
484
+ const char *cur = cs + pos;
485
+ char ctype = TYPE_COMMENT;
486
+ size_t pos1 = pos + 1;
487
+ if (pos1 == slen || cs[pos1] != '*') {
488
+ return parse_operator1(sf);
489
+ }
490
+
491
+ /*
492
+ * skip over initial '/x'
493
+ */
494
+ ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
495
+ if (ptr == NULL) {
496
+ /* till end of line */
497
+ clen = slen - pos;
498
+ } else {
499
+ clen = (size_t)(ptr + 2 - cur);
500
+ }
501
+
502
+ /*
503
+ * postgresql allows nested comments which makes
504
+ * this is incompatible with parsing so
505
+ * if we find a '/x' inside the coment, then
506
+ * make a new token.
507
+ *
508
+ * Also, Mysql's "conditional" comments for version
509
+ * are an automatic black ban!
510
+ */
511
+
512
+ if (ptr != NULL &&
513
+ memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
514
+ ctype = TYPE_EVIL;
515
+ } else if (is_mysql_comment(cs, slen, pos)) {
516
+ ctype = TYPE_EVIL;
517
+ }
518
+
519
+ st_assign(sf->current, ctype, pos, clen, cs + pos);
520
+ return pos + clen;
521
+ }
522
+
523
+ static size_t parse_backslash(struct libinjection_sqli_state *sf) {
524
+ const char *cs = sf->s;
525
+ const size_t slen = sf->slen;
526
+ size_t pos = sf->pos;
527
+
528
+ /*
529
+ * Weird MySQL alias for NULL, "\N" (capital N only)
530
+ */
531
+ if (pos + 1 < slen && cs[pos + 1] == 'N') {
532
+ st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
533
+ return pos + 2;
534
+ } else {
535
+ st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
536
+ return pos + 1;
537
+ }
538
+ }
539
+
540
+ static size_t parse_operator2(struct libinjection_sqli_state *sf) {
541
+ char ch;
542
+ const char *cs = sf->s;
543
+ const size_t slen = sf->slen;
544
+ size_t pos = sf->pos;
545
+
546
+ if (pos + 1 >= slen) {
547
+ return parse_operator1(sf);
548
+ }
549
+
550
+ if (pos + 2 < slen && cs[pos] == '<' && cs[pos + 1] == '=' &&
551
+ cs[pos + 2] == '>') {
552
+ /*
553
+ * special 3-char operator
554
+ */
555
+ st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
556
+ return pos + 3;
557
+ }
558
+
559
+ ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
560
+ if (ch != CHAR_NULL) {
561
+ st_assign(sf->current, ch, pos, 2, cs + pos);
562
+ return pos + 2;
563
+ }
564
+
565
+ /*
566
+ * not an operator.. what to do with the two
567
+ * characters we got?
568
+ */
569
+
570
+ if (cs[pos] == ':') {
571
+ /* ':' is not an operator */
572
+ st_assign(sf->current, TYPE_COLON, pos, 1, cs + pos);
573
+ return pos + 1;
574
+ } else {
575
+ /*
576
+ * must be a single char operator
577
+ */
578
+ return parse_operator1(sf);
579
+ }
580
+ }
581
+
582
+ /*
583
+ * Ok! " \" " one backslash = escaped!
584
+ * " \\" " two backslash = not escaped!
585
+ * "\\\" " three backslash = escaped!
586
+ */
587
+ #ifndef __clang_analyzer__
588
+ static int is_backslash_escaped(const char *end, const char *start) {
589
+ const char *ptr;
590
+ /* Code not to be analyzed by clang.
591
+ *
592
+ * Why we do this? Because there is a false positive here:
593
+ * libinjection_sqli.c:608:13: warning: Out of bound memory access (access
594
+ * exceeds upper limit of memory block) [alpha.security.ArrayBoundV2] if
595
+ * (*ptr
596
+ * != '\\') {
597
+ * ^~~~
598
+ * Specifically, this function deals with non-null terminated char arrays.
599
+ * This can be added as prerequisite, and is not written clearly. But the
600
+ * math in the for below holds.
601
+ */
602
+ for (ptr = end; ptr >= start; ptr--) {
603
+ if (*ptr != '\\') {
604
+ break;
605
+ }
606
+ }
607
+ /* if number of backslashes is odd, it is escaped */
608
+ return (end - ptr) & 1;
609
+ }
610
+ #endif
611
+
612
+ static size_t is_double_delim_escaped(const char *cur, const char *end) {
613
+ return ((cur + 1) < end) && *(cur + 1) == *cur;
614
+ }
615
+
616
+ /* Look forward for doubling of delimiter
617
+ *
618
+ * case 'foo''bar' --> foo''bar
619
+ *
620
+ * ending quote isn't duplicated (i.e. escaped)
621
+ * since it's the wrong char or EOL
622
+ *
623
+ */
624
+ static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
625
+ stoken_t *st, char delim, size_t offset) {
626
+ /*
627
+ * offset is to skip the perhaps first quote char
628
+ */
629
+ const char *qpos = (const char *)memchr((const void *)(cs + pos + offset),
630
+ delim, len - pos - offset);
631
+
632
+ /*
633
+ * then keep string open/close info
634
+ */
635
+ if (offset > 0) {
636
+ /*
637
+ * this is real quote
638
+ */
639
+ st->str_open = delim;
640
+ } else {
641
+ /*
642
+ * this was a simulated quote
643
+ */
644
+ st->str_open = CHAR_NULL;
645
+ }
646
+
647
+ while (TRUE) {
648
+ if (qpos == NULL) {
649
+ /*
650
+ * string ended with no trailing quote
651
+ * assign what we have
652
+ */
653
+ st_assign(st, TYPE_STRING, pos + offset, len - pos - offset,
654
+ cs + pos + offset);
655
+ st->str_close = CHAR_NULL;
656
+ return len;
657
+ } else if (is_backslash_escaped(qpos - 1, cs + pos + offset)) {
658
+ /* keep going, move ahead one character */
659
+ qpos = (const char *)memchr((const void *)(qpos + 1), delim,
660
+ (size_t)((cs + len) - (qpos + 1)));
661
+ continue;
662
+ } else if (is_double_delim_escaped(qpos, cs + len)) {
663
+ /* keep going, move ahead two characters */
664
+ qpos = (const char *)memchr((const void *)(qpos + 2), delim,
665
+ (size_t)((cs + len) - (qpos + 2)));
666
+ continue;
667
+ } else {
668
+ /* hey it's a normal string */
669
+ st_assign(st, TYPE_STRING, pos + offset,
670
+ (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
671
+ st->str_close = delim;
672
+ return (size_t)(qpos - cs + 1);
673
+ }
674
+ }
675
+ }
676
+
677
+ /**
678
+ * Used when first char is a ' or "
679
+ */
680
+ static size_t parse_string(struct libinjection_sqli_state *sf) {
681
+ const char *cs = sf->s;
682
+ const size_t slen = sf->slen;
683
+ size_t pos = sf->pos;
684
+
685
+ /*
686
+ * assert cs[pos] == single or double quote
687
+ */
688
+ return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
689
+ }
690
+
691
+ /**
692
+ * Used when first char is:
693
+ * N or n: mysql "National Character set"
694
+ * E : psql "Escaped String"
695
+ */
696
+ static size_t parse_estring(struct libinjection_sqli_state *sf) {
697
+ const char *cs = sf->s;
698
+ const size_t slen = sf->slen;
699
+ size_t pos = sf->pos;
700
+
701
+ if (pos + 2 >= slen || cs[pos + 1] != CHAR_SINGLE) {
702
+ return parse_word(sf);
703
+ }
704
+ return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
705
+ }
706
+
707
+ static size_t parse_ustring(struct libinjection_sqli_state *sf) {
708
+ const char *cs = sf->s;
709
+ size_t slen = sf->slen;
710
+ size_t pos = sf->pos;
711
+
712
+ if (pos + 2 < slen && cs[pos + 1] == '&' && cs[pos + 2] == '\'') {
713
+ sf->pos += 2;
714
+ pos = parse_string(sf);
715
+ sf->current->str_open = 'u';
716
+ if (sf->current->str_close == '\'') {
717
+ sf->current->str_close = 'u';
718
+ }
719
+ return pos;
720
+ } else {
721
+ return parse_word(sf);
722
+ }
723
+ }
724
+
725
+ static size_t parse_qstring_core(struct libinjection_sqli_state *sf,
726
+ size_t offset) {
727
+ char ch;
728
+ const char *strend;
729
+ const char *cs = sf->s;
730
+ size_t slen = sf->slen;
731
+ size_t pos = sf->pos + offset;
732
+
733
+ /* if we are already at end of string..
734
+ if current char is not q or Q
735
+ if we don't have 2 more chars
736
+ if char2 != a single quote
737
+ then, just treat as word
738
+ */
739
+ if (pos >= slen || (cs[pos] != 'q' && cs[pos] != 'Q') || pos + 2 >= slen ||
740
+ cs[pos + 1] != '\'') {
741
+ return parse_word(sf);
742
+ }
743
+
744
+ ch = cs[pos + 2];
745
+
746
+ /* the ch > 127 is un-needed since
747
+ * we assume char is signed
748
+ */
749
+ if (ch < 33 /* || ch > 127 */) {
750
+ return parse_word(sf);
751
+ }
752
+ switch (ch) {
753
+ case '(':
754
+ ch = ')';
755
+ break;
756
+ case '[':
757
+ ch = ']';
758
+ break;
759
+ case '{':
760
+ ch = '}';
761
+ break;
762
+ case '<':
763
+ ch = '>';
764
+ break;
765
+ }
766
+
767
+ strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
768
+ if (strend == NULL) {
769
+ st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3,
770
+ cs + pos + 3);
771
+ sf->current->str_open = 'q';
772
+ sf->current->str_close = CHAR_NULL;
773
+ return slen;
774
+ } else {
775
+ st_assign(sf->current, TYPE_STRING, pos + 3,
776
+ (size_t)(strend - cs) - pos - 3, cs + pos + 3);
777
+ sf->current->str_open = 'q';
778
+ sf->current->str_close = 'q';
779
+ return (size_t)(strend - cs + 2);
780
+ }
781
+ }
782
+
783
+ /*
784
+ * Oracle's q string
785
+ */
786
+ static size_t parse_qstring(struct libinjection_sqli_state *sf) {
787
+ return parse_qstring_core(sf, 0);
788
+ }
789
+
790
+ /*
791
+ * mysql's N'STRING' or
792
+ * ... Oracle's nq string
793
+ */
794
+ static size_t parse_nqstring(struct libinjection_sqli_state *sf) {
795
+ size_t slen = sf->slen;
796
+ size_t pos = sf->pos;
797
+ if (pos + 2 < slen && sf->s[pos + 1] == CHAR_SINGLE) {
798
+ return parse_estring(sf);
799
+ }
800
+ return parse_qstring_core(sf, 1);
801
+ }
802
+
803
+ /*
804
+ * binary literal string
805
+ * re: [bB]'[01]*'
806
+ */
807
+ static size_t parse_bstring(struct libinjection_sqli_state *sf) {
808
+ size_t wlen;
809
+ const char *cs = sf->s;
810
+ size_t pos = sf->pos;
811
+ size_t slen = sf->slen;
812
+
813
+ /* need at least 2 more characters
814
+ * if next char isn't a single quote, then
815
+ * continue as normal word
816
+ */
817
+ if (pos + 2 >= slen || cs[pos + 1] != '\'') {
818
+ return parse_word(sf);
819
+ }
820
+
821
+ wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
822
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
823
+ return parse_word(sf);
824
+ }
825
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
826
+ return pos + 2 + wlen + 1;
827
+ }
828
+
829
+ /*
830
+ * hex literal string
831
+ * re: [xX]'[0123456789abcdefABCDEF]*'
832
+ * mysql has requirement of having EVEN number of chars,
833
+ * but pgsql does not
834
+ */
835
+ static size_t parse_xstring(struct libinjection_sqli_state *sf) {
836
+ size_t wlen;
837
+ const char *cs = sf->s;
838
+ size_t pos = sf->pos;
839
+ size_t slen = sf->slen;
840
+
841
+ /* need at least 2 more characters
842
+ * if next char isn't a single quote, then
843
+ * continue as normal word
844
+ */
845
+ if (pos + 2 >= slen || cs[pos + 1] != '\'') {
846
+ return parse_word(sf);
847
+ }
848
+
849
+ wlen =
850
+ strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
851
+ if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
852
+ return parse_word(sf);
853
+ }
854
+ st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
855
+ return pos + 2 + wlen + 1;
856
+ }
857
+
858
+ /**
859
+ * This handles MS SQLSERVER bracket words
860
+ * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
861
+ *
862
+ */
863
+ static size_t parse_bword(struct libinjection_sqli_state *sf) {
864
+ const char *cs = sf->s;
865
+ size_t pos = sf->pos;
866
+ const char *endptr = (const char *)memchr(cs + pos, ']', sf->slen - pos);
867
+ if (endptr == NULL) {
868
+ st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
869
+ return sf->slen;
870
+ } else {
871
+ st_assign(sf->current, TYPE_BAREWORD, pos,
872
+ (size_t)(endptr - cs) - pos + 1, cs + pos);
873
+ return (size_t)((endptr - cs) + 1);
874
+ }
875
+ }
876
+
877
+ static size_t parse_word(struct libinjection_sqli_state *sf) {
878
+ char ch;
879
+ char delim;
880
+ size_t i;
881
+ const char *cs = sf->s;
882
+ size_t pos = sf->pos;
883
+ size_t wlen =
884
+ strlencspn(cs + pos, sf->slen - pos,
885
+ " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
886
+
887
+ st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
888
+
889
+ /* now we need to look inside what we good for "." and "`"
890
+ * and see if what is before is a keyword or not
891
+ */
892
+ for (i = 0; i < sf->current->len; ++i) {
893
+ delim = sf->current->val[i];
894
+ if (delim == '.' || delim == '`') {
895
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
896
+ if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
897
+ /* needed for swig */
898
+ st_clear(sf->current);
899
+ /*
900
+ * we got something like "SELECT.1"
901
+ * or SELECT`column`
902
+ */
903
+ st_assign(sf->current, ch, pos, i, cs + pos);
904
+ return pos + i;
905
+ }
906
+ }
907
+ }
908
+
909
+ /*
910
+ * do normal lookup with word including '.'
911
+ */
912
+ if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
913
+
914
+ ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
915
+ if (ch == CHAR_NULL) {
916
+ ch = TYPE_BAREWORD;
917
+ }
918
+ sf->current->type = ch;
919
+ }
920
+ return pos + wlen;
921
+ }
922
+
923
+ /* MySQL backticks are a cross between string and
924
+ * and a bare word.
925
+ *
926
+ */
927
+ static size_t parse_tick(struct libinjection_sqli_state *sf) {
928
+ size_t pos =
929
+ parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
930
+
931
+ /* we could check to see if start and end of
932
+ * of string are both "`", i.e. make sure we have
933
+ * matching set. `foo` vs. `foo
934
+ * but I don't think it matters much
935
+ */
936
+
937
+ /* check value of string to see if it's a keyword,
938
+ * function, operator, etc
939
+ */
940
+ char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
941
+ if (ch == TYPE_FUNCTION) {
942
+ /* if it's a function, then convert token */
943
+ sf->current->type = TYPE_FUNCTION;
944
+ } else {
945
+ /* otherwise it's a 'n' type -- mysql treats
946
+ * everything as a bare word
947
+ */
948
+ sf->current->type = TYPE_BAREWORD;
949
+ }
950
+ return pos;
951
+ }
952
+
953
+ static size_t parse_var(struct libinjection_sqli_state *sf) {
954
+ size_t xlen;
955
+ const char *cs = sf->s;
956
+ const size_t slen = sf->slen;
957
+ size_t pos = sf->pos + 1;
958
+
959
+ /*
960
+ * var_count is only used to reconstruct
961
+ * the input. It counts the number of '@'
962
+ * seen 0 in the case of NULL, 1 or 2
963
+ */
964
+
965
+ /*
966
+ * move past optional other '@'
967
+ */
968
+ if (pos < slen && cs[pos] == '@') {
969
+ pos += 1;
970
+ sf->current->count = 2;
971
+ } else {
972
+ sf->current->count = 1;
973
+ }
974
+
975
+ /*
976
+ * MySQL allows @@`version`
977
+ */
978
+ if (pos < slen) {
979
+ if (cs[pos] == '`') {
980
+ sf->pos = pos;
981
+ pos = parse_tick(sf);
982
+ sf->current->type = TYPE_VARIABLE;
983
+ return pos;
984
+ } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
985
+ sf->pos = pos;
986
+ pos = parse_string(sf);
987
+ sf->current->type = TYPE_VARIABLE;
988
+ return pos;
989
+ }
990
+ }
991
+
992
+ xlen = strlencspn(cs + pos, slen - pos,
993
+ " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
994
+ if (xlen == 0) {
995
+ st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
996
+ return pos;
997
+ } else {
998
+ st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
999
+ return pos + xlen;
1000
+ }
1001
+ }
1002
+
1003
+ static size_t parse_money(struct libinjection_sqli_state *sf) {
1004
+ size_t xlen;
1005
+ const char *strend;
1006
+ const char *cs = sf->s;
1007
+ const size_t slen = sf->slen;
1008
+ size_t pos = sf->pos;
1009
+
1010
+ if (pos + 1 == slen) {
1011
+ /* end of line */
1012
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1013
+ return slen;
1014
+ }
1015
+
1016
+ /*
1017
+ * $1,000.00 or $1.000,00 ok!
1018
+ * This also parses $....,,,111 but that's ok
1019
+ */
1020
+
1021
+ xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
1022
+ if (xlen == 0) {
1023
+ if (cs[pos + 1] == '$') {
1024
+ /* we have $$ .. find ending $$ and make string */
1025
+ strend = memchr2(cs + pos + 2, slen - pos - 2, '$', '$');
1026
+ if (strend == NULL) {
1027
+ /* fell off edge */
1028
+ st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2),
1029
+ cs + pos + 2);
1030
+ sf->current->str_open = '$';
1031
+ sf->current->str_close = CHAR_NULL;
1032
+ return slen;
1033
+ } else {
1034
+ st_assign(sf->current, TYPE_STRING, pos + 2,
1035
+ (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
1036
+ sf->current->str_open = '$';
1037
+ sf->current->str_close = '$';
1038
+ return (size_t)(strend - cs + 2);
1039
+ }
1040
+ } else {
1041
+ /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted
1042
+ * strings"
1043
+ */
1044
+ xlen = strlenspn(
1045
+ cs + pos + 1, slen - pos - 1,
1046
+ "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1047
+ if (xlen == 0) {
1048
+ /* hmm it's "$" _something_ .. just add $ and keep going*/
1049
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1050
+ return pos + 1;
1051
+ }
1052
+ /* we have $foobar????? */
1053
+ /* is it $foobar$ */
1054
+ if (pos + xlen + 1 == slen || cs[pos + xlen + 1] != '$') {
1055
+ /* not $foobar$, or fell off edge */
1056
+ st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1057
+ return pos + 1;
1058
+ }
1059
+
1060
+ /* we have $foobar$ ... find it again */
1061
+ strend = my_memmem(cs + pos + xlen + 2, slen - (pos + xlen + 2),
1062
+ cs + pos, xlen + 2);
1063
+
1064
+ if (strend == NULL) {
1065
+ /* fell off edge */
1066
+ st_assign(sf->current, TYPE_STRING, pos + xlen + 2,
1067
+ slen - pos - xlen - 2, cs + pos + xlen + 2);
1068
+ sf->current->str_open = '$';
1069
+ sf->current->str_close = CHAR_NULL;
1070
+ return slen;
1071
+ } else {
1072
+ /* got one */
1073
+ st_assign(sf->current, TYPE_STRING, pos + xlen + 2,
1074
+ (size_t)(strend - (cs + pos + xlen + 2)),
1075
+ cs + pos + xlen + 2);
1076
+ sf->current->str_open = '$';
1077
+ sf->current->str_close = '$';
1078
+ return (size_t)((strend + xlen + 2) - cs);
1079
+ }
1080
+ }
1081
+ } else if (xlen == 1 && cs[pos + 1] == '.') {
1082
+ /* $. should parsed as a word */
1083
+ return parse_word(sf);
1084
+ } else {
1085
+ st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
1086
+ return pos + 1 + xlen;
1087
+ }
1088
+ }
1089
+
1090
+ static size_t parse_number(struct libinjection_sqli_state *sf) {
1091
+ size_t xlen;
1092
+ size_t start;
1093
+ const char *digits = NULL;
1094
+ const char *cs = sf->s;
1095
+ const size_t slen = sf->slen;
1096
+ size_t pos = sf->pos;
1097
+ int have_e = 0;
1098
+ int have_exp = 0;
1099
+
1100
+ /* cs[pos] == '0' has 1/10 chance of being true,
1101
+ * while pos+1< slen is almost always true
1102
+ */
1103
+ if (cs[pos] == '0' && pos + 1 < slen) {
1104
+ if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
1105
+ digits = "0123456789ABCDEFabcdef";
1106
+ } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
1107
+ digits = "01";
1108
+ }
1109
+
1110
+ if (digits) {
1111
+ xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
1112
+ if (xlen == 0) {
1113
+ st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
1114
+ return pos + 2;
1115
+ } else {
1116
+ st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
1117
+ return pos + 2 + xlen;
1118
+ }
1119
+ }
1120
+ }
1121
+
1122
+ start = pos;
1123
+ while (pos < slen && ISDIGIT(cs[pos])) {
1124
+ pos += 1;
1125
+ }
1126
+
1127
+ if (pos < slen && cs[pos] == '.') {
1128
+ pos += 1;
1129
+ while (pos < slen && ISDIGIT(cs[pos])) {
1130
+ pos += 1;
1131
+ }
1132
+ if (pos - start == 1) {
1133
+ /* only one character read so far */
1134
+ st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
1135
+ return pos;
1136
+ }
1137
+ }
1138
+
1139
+ if (pos < slen) {
1140
+ if (cs[pos] == 'E' || cs[pos] == 'e') {
1141
+ have_e = 1;
1142
+ pos += 1;
1143
+ if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
1144
+ pos += 1;
1145
+ }
1146
+ while (pos < slen && ISDIGIT(cs[pos])) {
1147
+ have_exp = 1;
1148
+ pos += 1;
1149
+ }
1150
+ }
1151
+ }
1152
+
1153
+ /* oracle's ending float or double suffix
1154
+ * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
1155
+ */
1156
+ if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' ||
1157
+ cs[pos] == 'F')) {
1158
+ if (pos + 1 == slen) {
1159
+ /* line ends evaluate "... 1.2f$" as '1.2f' */
1160
+ pos += 1;
1161
+ } else if ((char_is_white(cs[pos + 1]) || cs[pos + 1] == ';')) {
1162
+ /*
1163
+ * easy case, evaluate "... 1.2f ... as '1.2f'
1164
+ */
1165
+ pos += 1;
1166
+ } else if (cs[pos + 1] == 'u' || cs[pos + 1] == 'U') {
1167
+ /*
1168
+ * a bit of a hack but makes '1fUNION' parse as '1f UNION'
1169
+ */
1170
+ pos += 1;
1171
+ } else {
1172
+ /* it's like "123FROM" */
1173
+ /* parse as "123" only */
1174
+ }
1175
+ }
1176
+
1177
+ /* very special form of
1178
+ * "1234.e"
1179
+ * "10.10E"
1180
+ * ".E"
1181
+ *
1182
+ * https://gosecure.ai/blog/2021/10/19/a-scientific-notation-bug-in-mysql-left-aws-waf-clients-vulnerable-to-sql-injection/
1183
+ * In this blog post, we can see that 1.e or 1.E is a risky SQLI. The SQL
1184
+ * parser ignores it during parsing. For example, "1.e(1)" => (1), 1 1.e/1
1185
+ * => 1/1, etc. So, if a payload like "1' or 1.e(1)" bypasses SQLI
1186
+ * detection, which is really risky, then we should detect such SQLI
1187
+ * injection in case of WAF bypass.
1188
+ */
1189
+ if (!(have_e == 1 && have_exp == 0)) {
1190
+ st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
1191
+ }
1192
+
1193
+ return pos;
1194
+ }
1195
+
1196
+ /*
1197
+ * API to return version. This allows us to increment the version
1198
+ * without having to regenerated the SWIG (or other binding) in minor
1199
+ * releases.
1200
+ */
1201
+ const char *libinjection_version(void) { return LIBINJECTION_VERSION; }
1202
+
1203
+ int libinjection_sqli_tokenize(struct libinjection_sqli_state *sf) {
1204
+ pt2Function fnptr;
1205
+ size_t *pos = &sf->pos;
1206
+ stoken_t *current = sf->current;
1207
+ const char *s = sf->s;
1208
+ const size_t slen = sf->slen;
1209
+
1210
+ if (slen == 0) {
1211
+ return FALSE;
1212
+ }
1213
+
1214
+ st_clear(current);
1215
+ sf->current = // cppcheck-suppress[redundantAssignment,unmatchedSuppression]
1216
+ current;
1217
+
1218
+ /*
1219
+ * if we are at beginning of string
1220
+ * and in single-quote or double quote mode
1221
+ * then pretend the input starts with a quote
1222
+ */
1223
+ if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
1224
+ *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
1225
+ sf->stats_tokens += 1;
1226
+ return TRUE;
1227
+ }
1228
+
1229
+ while (*pos < slen) {
1230
+
1231
+ /*
1232
+ * get current character
1233
+ */
1234
+ const unsigned char ch = (unsigned char)(s[*pos]);
1235
+
1236
+ /*
1237
+ * look up the parser, and call it
1238
+ *
1239
+ * Porting Note: this is mapping of char to function
1240
+ * charparsers[ch]()
1241
+ */
1242
+ fnptr = char_parse_map[ch];
1243
+
1244
+ *pos = (*fnptr)(sf);
1245
+
1246
+ /*
1247
+ *
1248
+ */
1249
+ if (current->type != CHAR_NULL) {
1250
+ sf->stats_tokens += 1;
1251
+ return TRUE;
1252
+ }
1253
+ }
1254
+ return FALSE;
1255
+ }
1256
+
1257
+ void libinjection_sqli_init(struct libinjection_sqli_state *sf, const char *s,
1258
+ size_t len, int flags) {
1259
+ if (flags == 0) {
1260
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1261
+ }
1262
+
1263
+ memset(sf, 0, sizeof(struct libinjection_sqli_state));
1264
+ sf->s = s;
1265
+ sf->slen = len;
1266
+ sf->lookup = libinjection_sqli_lookup_word;
1267
+ sf->userdata = 0;
1268
+ sf->flags = flags;
1269
+ sf->current = &(sf->tokenvec[0]);
1270
+ }
1271
+
1272
+ static void libinjection_sqli_reset(struct libinjection_sqli_state *sf,
1273
+ int flags) {
1274
+ void *userdata = sf->userdata;
1275
+ ptr_lookup_fn lookup = sf->lookup;
1276
+
1277
+ if (flags == 0) {
1278
+ flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1279
+ }
1280
+ libinjection_sqli_init(sf, sf->s, sf->slen, flags);
1281
+ sf->lookup = lookup;
1282
+ sf->userdata = userdata;
1283
+ }
1284
+
1285
+ void libinjection_sqli_callback(struct libinjection_sqli_state *sf,
1286
+ ptr_lookup_fn fn, void *userdata) {
1287
+ if (fn == NULL) {
1288
+ sf->lookup = libinjection_sqli_lookup_word;
1289
+ sf->userdata = (void *)(NULL);
1290
+ } else {
1291
+ sf->lookup = fn;
1292
+ sf->userdata = userdata;
1293
+ }
1294
+ }
1295
+
1296
+ /** See if two tokens can be merged since they are compound SQL phrases.
1297
+ *
1298
+ * This takes two tokens, and, if they are the right type,
1299
+ * merges their values together. Then checks to see if the
1300
+ * new value is special using the PHRASES mapping.
1301
+ *
1302
+ * Example: "UNION" + "ALL" ==> "UNION ALL"
1303
+ *
1304
+ * C Security Notes: this is safe to use C-strings (null-terminated)
1305
+ * since the types involved by definition do not have embedded nulls
1306
+ * (e.g. there is no keyword with embedded null)
1307
+ *
1308
+ * Porting Notes: since this is C, it's oddly complicated.
1309
+ * This is just: multikeywords[token.value + ' ' + token2.value]
1310
+ *
1311
+ */
1312
+ static int
1313
+ syntax_merge_words(struct libinjection_sqli_state *sf, stoken_t *a,
1314
+ stoken_t *b) { // cppcheck-suppress constParameterPointer
1315
+ size_t sz1;
1316
+ size_t sz2;
1317
+ size_t sz3;
1318
+ char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
1319
+ char ch;
1320
+
1321
+ /* first token is of right type? */
1322
+ if (!(a->type == TYPE_KEYWORD || a->type == TYPE_BAREWORD ||
1323
+ a->type == TYPE_OPERATOR || a->type == TYPE_UNION ||
1324
+ a->type == TYPE_FUNCTION || a->type == TYPE_EXPRESSION ||
1325
+ a->type == TYPE_TSQL || a->type == TYPE_SQLTYPE)) {
1326
+ return FALSE;
1327
+ }
1328
+
1329
+ if (!(b->type == TYPE_KEYWORD || b->type == TYPE_BAREWORD ||
1330
+ b->type == TYPE_OPERATOR || b->type == TYPE_UNION ||
1331
+ b->type == TYPE_FUNCTION || b->type == TYPE_EXPRESSION ||
1332
+ b->type == TYPE_TSQL || b->type == TYPE_SQLTYPE ||
1333
+ b->type == TYPE_LOGIC_OPERATOR)) {
1334
+ return FALSE;
1335
+ }
1336
+
1337
+ sz1 = a->len;
1338
+ sz2 = b->len;
1339
+ sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
1340
+ if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for
1341
+ ending null */
1342
+ return FALSE;
1343
+ }
1344
+ /*
1345
+ * oddly annoying last.val + ' ' + current.val
1346
+ */
1347
+ memcpy(tmp, a->val, sz1);
1348
+ tmp[sz1] = ' ';
1349
+ memcpy(tmp + sz1 + 1, b->val, sz2);
1350
+ tmp[sz3] = CHAR_NULL;
1351
+ ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
1352
+
1353
+ if (ch != CHAR_NULL) {
1354
+ st_assign(a, ch, a->pos, sz3, tmp);
1355
+ return TRUE;
1356
+ } else {
1357
+ return FALSE;
1358
+ }
1359
+ }
1360
+
1361
+ int libinjection_sqli_fold(struct libinjection_sqli_state *sf) {
1362
+ stoken_t last_comment;
1363
+
1364
+ /* POS is the position of where the NEXT token goes */
1365
+ size_t pos = 0;
1366
+
1367
+ /* LEFT is a count of how many tokens that are already
1368
+ folded or processed (i.e. part of the fingerprint) */
1369
+ size_t left = 0;
1370
+
1371
+ int more = 1;
1372
+
1373
+ st_clear(&last_comment);
1374
+
1375
+ /* Skip all initial comments, right-parens ( and unary operators
1376
+ *
1377
+ */
1378
+ sf->current = &(sf->tokenvec[0]);
1379
+ while (more) {
1380
+ more = libinjection_sqli_tokenize(sf);
1381
+ if (!(sf->current->type == TYPE_COMMENT ||
1382
+ sf->current->type == TYPE_LEFTPARENS ||
1383
+ sf->current->type == TYPE_SQLTYPE ||
1384
+ st_is_unary_op(sf->current))) {
1385
+ break;
1386
+ }
1387
+ }
1388
+
1389
+ if (!more) {
1390
+ /* If input was only comments, unary or (, then exit */
1391
+ return 0;
1392
+ } else {
1393
+ /* it's some other token */
1394
+ pos += 1;
1395
+ }
1396
+
1397
+ while (1) {
1398
+ FOLD_DEBUG;
1399
+
1400
+ /* do we have all the max number of tokens? if so do
1401
+ * some special cases for 5 tokens
1402
+ */
1403
+ if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
1404
+ if ((sf->tokenvec[0].type == TYPE_NUMBER &&
1405
+ (sf->tokenvec[1].type == TYPE_OPERATOR ||
1406
+ sf->tokenvec[1].type == TYPE_COMMA) &&
1407
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1408
+ sf->tokenvec[3].type == TYPE_NUMBER &&
1409
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS) ||
1410
+ (sf->tokenvec[0].type == TYPE_BAREWORD &&
1411
+ sf->tokenvec[1].type == TYPE_OPERATOR &&
1412
+ sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1413
+ (sf->tokenvec[3].type == TYPE_BAREWORD ||
1414
+ sf->tokenvec[3].type == TYPE_NUMBER) &&
1415
+ sf->tokenvec[4].type == TYPE_RIGHTPARENS) ||
1416
+ (sf->tokenvec[0].type == TYPE_NUMBER &&
1417
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1418
+ sf->tokenvec[2].type == TYPE_COMMA &&
1419
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1420
+ sf->tokenvec[4].type == TYPE_NUMBER) ||
1421
+ (sf->tokenvec[0].type == TYPE_BAREWORD &&
1422
+ sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1423
+ sf->tokenvec[2].type == TYPE_OPERATOR &&
1424
+ sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1425
+ sf->tokenvec[4].type == TYPE_BAREWORD)) {
1426
+ if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
1427
+ st_copy(&(sf->tokenvec[1]),
1428
+ &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
1429
+ pos = 2;
1430
+ left = 0;
1431
+ } else {
1432
+ pos = 1;
1433
+ left = 0;
1434
+ }
1435
+ }
1436
+ }
1437
+
1438
+ if (!more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
1439
+ left = pos;
1440
+ break;
1441
+ }
1442
+
1443
+ /* get up to two tokens */
1444
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS &&
1445
+ (pos - left) < 2) {
1446
+ sf->current = &(sf->tokenvec[pos]);
1447
+ more = libinjection_sqli_tokenize(sf);
1448
+ if (more) {
1449
+ if (sf->current->type == TYPE_COMMENT) {
1450
+ st_copy(&last_comment, sf->current);
1451
+ } else {
1452
+ last_comment.type = CHAR_NULL;
1453
+ pos += 1;
1454
+ }
1455
+ }
1456
+ }
1457
+ FOLD_DEBUG;
1458
+ /* did we get 2 tokens? if not then we are done */
1459
+ if (pos - left < 2) {
1460
+ left = pos;
1461
+ continue;
1462
+ }
1463
+
1464
+ /* FOLD: "ss" -> "s"
1465
+ * "foo" "bar" is valid SQL
1466
+ * just ignore second string
1467
+ */
1468
+ if (sf->tokenvec[left].type == TYPE_STRING &&
1469
+ sf->tokenvec[left + 1].type == TYPE_STRING) {
1470
+ pos -= 1;
1471
+ sf->stats_folds += 1;
1472
+ continue;
1473
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1474
+ sf->tokenvec[left + 1].type == TYPE_SEMICOLON) {
1475
+ /* not sure how various engines handle
1476
+ * 'select 1;;drop table foo' or
1477
+ * 'select 1; /x foo x/; drop table foo'
1478
+ * to prevent surprises, just fold away repeated semicolons
1479
+ */
1480
+ pos -= 1;
1481
+ sf->stats_folds += 1;
1482
+ continue;
1483
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
1484
+ sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
1485
+ (st_is_unary_op(&sf->tokenvec[left + 1]) ||
1486
+ sf->tokenvec[left + 1].type == TYPE_SQLTYPE)) {
1487
+ pos -= 1;
1488
+ sf->stats_folds += 1;
1489
+ left = 0;
1490
+ continue;
1491
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1492
+ st_is_unary_op(&sf->tokenvec[left + 1])) {
1493
+ pos -= 1;
1494
+ sf->stats_folds += 1;
1495
+ if (left > 0) {
1496
+ left -= 1;
1497
+ }
1498
+ continue;
1499
+ } else if (syntax_merge_words(sf, &sf->tokenvec[left],
1500
+ &sf->tokenvec[left + 1])) {
1501
+ pos -= 1;
1502
+ sf->stats_folds += 1;
1503
+ if (left > 0) {
1504
+ left -= 1;
1505
+ }
1506
+ continue;
1507
+ } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1508
+ sf->tokenvec[left + 1].type == TYPE_FUNCTION &&
1509
+ (sf->tokenvec[left + 1].val[0] == 'I' ||
1510
+ sf->tokenvec[left + 1].val[0] == 'i') &&
1511
+ (sf->tokenvec[left + 1].val[1] == 'F' ||
1512
+ sf->tokenvec[left + 1].val[1] == 'f')) {
1513
+ /* IF is normally a function, except in Transact-SQL where it can be
1514
+ * used as a standalone control flow operator, e.g. ; IF 1=1 ... if
1515
+ * found after a semicolon, convert from 'f' type to 'T' type
1516
+ */
1517
+ sf->tokenvec[left + 1].type = TYPE_TSQL;
1518
+ /* left += 2; */
1519
+ continue; /* reparse everything, but we probably can advance left,
1520
+ * and pos
1521
+ */
1522
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1523
+ sf->tokenvec[left].type == TYPE_VARIABLE) &&
1524
+ sf->tokenvec[left + 1].type == TYPE_LEFTPARENS &&
1525
+ (
1526
+ /* TSQL functions but common enough to be column names */
1527
+ cstrcasecmp("USER_ID", sf->tokenvec[left].val,
1528
+ sf->tokenvec[left].len) == 0 ||
1529
+ cstrcasecmp("USER_NAME", sf->tokenvec[left].val,
1530
+ sf->tokenvec[left].len) == 0 ||
1531
+
1532
+ /* Function in MYSQL */
1533
+ cstrcasecmp("DATABASE", sf->tokenvec[left].val,
1534
+ sf->tokenvec[left].len) == 0 ||
1535
+ cstrcasecmp("PASSWORD", sf->tokenvec[left].val,
1536
+ sf->tokenvec[left].len) == 0 ||
1537
+ cstrcasecmp("USER", sf->tokenvec[left].val,
1538
+ sf->tokenvec[left].len) == 0 ||
1539
+
1540
+ /* Mysql words that act as a variable and are a function
1541
+ */
1542
+
1543
+ /* TSQL current_users is fake-variable */
1544
+ /* http://msdn.microsoft.com/en-us/library/ms176050.aspx
1545
+ */
1546
+ cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val,
1547
+ sf->tokenvec[left].len) == 0 ||
1548
+ cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val,
1549
+ sf->tokenvec[left].len) == 0 ||
1550
+ cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val,
1551
+ sf->tokenvec[left].len) == 0 ||
1552
+ cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val,
1553
+ sf->tokenvec[left].len) == 0 ||
1554
+ cstrcasecmp("LOCALTIME", sf->tokenvec[left].val,
1555
+ sf->tokenvec[left].len) == 0 ||
1556
+ cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val,
1557
+ sf->tokenvec[left].len) == 0)) {
1558
+
1559
+ /* pos is the same
1560
+ * other conversions need to go here... for instance
1561
+ * password CAN be a function, coalesce CAN be a function
1562
+ */
1563
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1564
+ continue;
1565
+ } else if (sf->tokenvec[left].type == TYPE_KEYWORD &&
1566
+ (cstrcasecmp("IN", sf->tokenvec[left].val,
1567
+ sf->tokenvec[left].len) == 0 ||
1568
+ cstrcasecmp("NOT IN", sf->tokenvec[left].val,
1569
+ sf->tokenvec[left].len) == 0)) {
1570
+
1571
+ if (sf->tokenvec[left + 1].type == TYPE_LEFTPARENS) {
1572
+ /* got .... IN ( ... (or 'NOT IN')
1573
+ * it's an operator
1574
+ */
1575
+ sf->tokenvec[left].type = TYPE_OPERATOR;
1576
+ } else {
1577
+ /*
1578
+ * it's a nothing
1579
+ */
1580
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1581
+ }
1582
+
1583
+ /* "IN" can be used as "IN BOOLEAN MODE" for mysql
1584
+ * in which case merging of words can be done later
1585
+ * other wise it acts as an equality operator __ IN (values..)
1586
+ *
1587
+ * here we got "IN" "(" so it's an operator.
1588
+ * also back track to handle "NOT IN"
1589
+ * might need to do the same with like
1590
+ * two use cases "foo" LIKE "BAR" (normal operator)
1591
+ * "foo" = LIKE(1,2)
1592
+ */
1593
+ continue;
1594
+ } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) &&
1595
+ (cstrcasecmp("LIKE", sf->tokenvec[left].val,
1596
+ sf->tokenvec[left].len) == 0 ||
1597
+ cstrcasecmp("NOT LIKE", sf->tokenvec[left].val,
1598
+ sf->tokenvec[left].len) == 0)) {
1599
+ if (sf->tokenvec[left + 1].type == TYPE_LEFTPARENS) {
1600
+ /* SELECT LIKE(...
1601
+ * it's a function
1602
+ */
1603
+ sf->tokenvec[left].type = TYPE_FUNCTION;
1604
+ }
1605
+ } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
1606
+ (sf->tokenvec[left + 1].type == TYPE_BAREWORD ||
1607
+ sf->tokenvec[left + 1].type == TYPE_NUMBER ||
1608
+ sf->tokenvec[left + 1].type == TYPE_SQLTYPE ||
1609
+ sf->tokenvec[left + 1].type == TYPE_LEFTPARENS ||
1610
+ sf->tokenvec[left + 1].type == TYPE_FUNCTION ||
1611
+ sf->tokenvec[left + 1].type == TYPE_VARIABLE ||
1612
+ sf->tokenvec[left + 1].type == TYPE_STRING)) {
1613
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left + 1]);
1614
+ pos -= 1;
1615
+ sf->stats_folds += 1;
1616
+ left = 0;
1617
+ continue;
1618
+ } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
1619
+ sf->tokenvec[left + 1].type == TYPE_BAREWORD) {
1620
+ /*
1621
+ * there are too many collation types.. so if the bareword has a "_"
1622
+ * then it's TYPE_SQLTYPE
1623
+ */
1624
+ if (strchr(sf->tokenvec[left + 1].val, '_') != NULL) {
1625
+ sf->tokenvec[left + 1].type = TYPE_SQLTYPE;
1626
+ left = 0;
1627
+ }
1628
+ } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
1629
+ if (st_is_arithmetic_op(&(sf->tokenvec[left + 1]))) {
1630
+ /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc
1631
+ */
1632
+ sf->tokenvec[left].type = TYPE_NUMBER;
1633
+ } else {
1634
+ /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
1635
+ st_copy(&sf->tokenvec[left], &sf->tokenvec[left + 1]);
1636
+ pos -= 1;
1637
+ sf->stats_folds += 1;
1638
+ }
1639
+ left = 0;
1640
+ continue;
1641
+ } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1642
+ sf->tokenvec[left + 1].type == TYPE_LEFTPARENS) {
1643
+ pos -= 1;
1644
+ left = 0;
1645
+ sf->stats_folds += 1;
1646
+ continue;
1647
+ } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
1648
+ sf->tokenvec[left + 1].type == TYPE_RIGHTPARENS) {
1649
+ pos -= 1;
1650
+ left = 0;
1651
+ sf->stats_folds += 1;
1652
+ continue;
1653
+ } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
1654
+ sf->tokenvec[left + 1].type == TYPE_BAREWORD) {
1655
+
1656
+ /*
1657
+ * MySQL Degenerate case --
1658
+ *
1659
+ * select { ``.``.id }; -- valid !!!
1660
+ * select { ``.``.``.id }; -- invalid
1661
+ * select ``.``.id; -- invalid
1662
+ * select { ``.id }; -- invalid
1663
+ *
1664
+ * so it appears {``.``.id} is a magic case
1665
+ * I suspect this is "current database, current table, field id"
1666
+ *
1667
+ * The folding code can't look at more than 3 tokens, and
1668
+ * I don't want to make two passes.
1669
+ *
1670
+ * Since "{ ``" so rare, we are just going to blacklist it.
1671
+ *
1672
+ * Highly likely this will need revisiting!
1673
+ *
1674
+ * CREDIT @rsalgado 2013-11-25
1675
+ */
1676
+ if (sf->tokenvec[left + 1].len == 0) {
1677
+ sf->tokenvec[left + 1].type = TYPE_EVIL;
1678
+ return (int)(left + 2);
1679
+ }
1680
+ /* weird ODBC / MYSQL {foo expr} --> expr
1681
+ * but for this rule we just strip away the "{ foo" part
1682
+ */
1683
+ left = 0;
1684
+ pos -= 2;
1685
+ sf->stats_folds += 2;
1686
+ continue;
1687
+ } else if (sf->tokenvec[left + 1].type == TYPE_RIGHTBRACE) {
1688
+ pos -= 1;
1689
+ left = 0;
1690
+ sf->stats_folds += 1;
1691
+ continue;
1692
+ }
1693
+
1694
+ /* all cases of handing 2 tokens is done
1695
+ and nothing matched. Get one more token
1696
+ */
1697
+ FOLD_DEBUG;
1698
+ while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
1699
+ sf->current = &(sf->tokenvec[pos]);
1700
+ more = libinjection_sqli_tokenize(sf);
1701
+ if (more) {
1702
+ if (sf->current->type == TYPE_COMMENT) {
1703
+ st_copy(&last_comment, sf->current);
1704
+ } else {
1705
+ last_comment.type = CHAR_NULL;
1706
+ pos += 1;
1707
+ }
1708
+ }
1709
+ }
1710
+
1711
+ /* do we have three tokens? If not then we are done */
1712
+ if (pos - left < 3) {
1713
+ left = pos;
1714
+ continue;
1715
+ }
1716
+
1717
+ /*
1718
+ * now look for three token folding
1719
+ */
1720
+ if (sf->tokenvec[left].type == TYPE_NUMBER &&
1721
+ sf->tokenvec[left + 1].type == TYPE_OPERATOR &&
1722
+ sf->tokenvec[left + 2].type == TYPE_NUMBER) {
1723
+ pos -= 2;
1724
+ left = 0;
1725
+ continue;
1726
+ } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
1727
+ sf->tokenvec[left + 1].type != TYPE_LEFTPARENS &&
1728
+ sf->tokenvec[left + 2].type == TYPE_OPERATOR) {
1729
+ left = 0;
1730
+ pos -= 2;
1731
+ continue;
1732
+ } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
1733
+ sf->tokenvec[left + 2].type == TYPE_LOGIC_OPERATOR) {
1734
+ pos -= 2;
1735
+ left = 0;
1736
+ continue;
1737
+ } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
1738
+ sf->tokenvec[left + 1].type == TYPE_OPERATOR &&
1739
+ (sf->tokenvec[left + 2].type == TYPE_VARIABLE ||
1740
+ sf->tokenvec[left + 2].type == TYPE_NUMBER ||
1741
+ sf->tokenvec[left + 2].type == TYPE_BAREWORD)) {
1742
+ pos -= 2;
1743
+ left = 0;
1744
+ continue;
1745
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1746
+ sf->tokenvec[left].type == TYPE_NUMBER) &&
1747
+ sf->tokenvec[left + 1].type == TYPE_OPERATOR &&
1748
+ (sf->tokenvec[left + 2].type == TYPE_NUMBER ||
1749
+ sf->tokenvec[left + 2].type == TYPE_BAREWORD)) {
1750
+ pos -= 2;
1751
+ left = 0;
1752
+ continue;
1753
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1754
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1755
+ sf->tokenvec[left].type == TYPE_VARIABLE ||
1756
+ sf->tokenvec[left].type == TYPE_STRING) &&
1757
+ sf->tokenvec[left + 1].type == TYPE_OPERATOR &&
1758
+ streq(sf->tokenvec[left + 1].val, "::") &&
1759
+ sf->tokenvec[left + 2].type == TYPE_SQLTYPE) {
1760
+ pos -= 2;
1761
+ left = 0;
1762
+ sf->stats_folds += 2;
1763
+ continue;
1764
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1765
+ sf->tokenvec[left].type == TYPE_NUMBER ||
1766
+ sf->tokenvec[left].type == TYPE_STRING ||
1767
+ sf->tokenvec[left].type == TYPE_VARIABLE) &&
1768
+ sf->tokenvec[left + 1].type == TYPE_COMMA &&
1769
+ (sf->tokenvec[left + 2].type == TYPE_NUMBER ||
1770
+ sf->tokenvec[left + 2].type == TYPE_BAREWORD ||
1771
+ sf->tokenvec[left + 2].type == TYPE_STRING ||
1772
+ sf->tokenvec[left + 2].type == TYPE_VARIABLE)) {
1773
+ pos -= 2;
1774
+ left = 0;
1775
+ continue;
1776
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
1777
+ sf->tokenvec[left].type == TYPE_GROUP ||
1778
+ sf->tokenvec[left].type == TYPE_COMMA) &&
1779
+ st_is_unary_op(&sf->tokenvec[left + 1]) &&
1780
+ sf->tokenvec[left + 2].type == TYPE_LEFTPARENS) {
1781
+ /* got something like SELECT + (, LIMIT + (
1782
+ * remove unary operator
1783
+ */
1784
+ st_copy(&sf->tokenvec[left + 1], &sf->tokenvec[left + 2]);
1785
+ pos -= 1;
1786
+ left = 0;
1787
+ continue;
1788
+ } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
1789
+ sf->tokenvec[left].type == TYPE_EXPRESSION ||
1790
+ sf->tokenvec[left].type == TYPE_GROUP) &&
1791
+ st_is_unary_op(&sf->tokenvec[left + 1]) &&
1792
+ (sf->tokenvec[left + 2].type == TYPE_NUMBER ||
1793
+ sf->tokenvec[left + 2].type == TYPE_BAREWORD ||
1794
+ sf->tokenvec[left + 2].type == TYPE_VARIABLE ||
1795
+ sf->tokenvec[left + 2].type == TYPE_STRING ||
1796
+ sf->tokenvec[left + 2].type == TYPE_FUNCTION)) {
1797
+ /* remove unary operators
1798
+ * select - 1
1799
+ */
1800
+ st_copy(&sf->tokenvec[left + 1], &sf->tokenvec[left + 2]);
1801
+ pos -= 1;
1802
+ left = 0;
1803
+ continue;
1804
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1805
+ st_is_unary_op(&sf->tokenvec[left + 1]) &&
1806
+ (sf->tokenvec[left + 2].type == TYPE_NUMBER ||
1807
+ sf->tokenvec[left + 2].type == TYPE_BAREWORD ||
1808
+ sf->tokenvec[left + 2].type == TYPE_VARIABLE ||
1809
+ sf->tokenvec[left + 2].type == TYPE_STRING)) {
1810
+ /*
1811
+ * interesting case turn ", -1" ->> ",1" PLUS we need to back up
1812
+ * one token if possible to see if more folding can be done
1813
+ * "1,-1" --> "1"
1814
+ */
1815
+ st_copy(&sf->tokenvec[left + 1], &sf->tokenvec[left + 2]);
1816
+ left = 0;
1817
+ /* pos is >= 3 so this is safe */
1818
+ assert(pos >= 3);
1819
+ pos -= 3;
1820
+ continue;
1821
+ } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1822
+ st_is_unary_op(&sf->tokenvec[left + 1]) &&
1823
+ sf->tokenvec[left + 2].type == TYPE_FUNCTION) {
1824
+
1825
+ /* Separate case from above since you end up with
1826
+ * 1,-sin(1) --> 1 (1)
1827
+ * Here, just do
1828
+ * 1,-sin(1) --> 1,sin(1)
1829
+ * just remove unary operator
1830
+ */
1831
+ st_copy(&sf->tokenvec[left + 1], &sf->tokenvec[left + 2]);
1832
+ pos -= 1;
1833
+ left = 0;
1834
+ continue;
1835
+ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
1836
+ (sf->tokenvec[left + 1].type == TYPE_DOT) &&
1837
+ (sf->tokenvec[left + 2].type == TYPE_BAREWORD)) {
1838
+ /* ignore the '.n'
1839
+ * typically is this databasename.table
1840
+ */
1841
+ assert(pos >= 3);
1842
+ pos -= 2;
1843
+ left = 0;
1844
+ continue;
1845
+ } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
1846
+ (sf->tokenvec[left + 1].type == TYPE_DOT) &&
1847
+ (sf->tokenvec[left + 2].type == TYPE_BAREWORD)) {
1848
+ /* select . `foo` --> select `foo` */
1849
+ st_copy(&sf->tokenvec[left + 1], &sf->tokenvec[left + 2]);
1850
+ pos -= 1;
1851
+ left = 0;
1852
+ continue;
1853
+ } else if ((sf->tokenvec[left].type == TYPE_FUNCTION) &&
1854
+ (sf->tokenvec[left + 1].type == TYPE_LEFTPARENS) &&
1855
+ (sf->tokenvec[left + 2].type != TYPE_RIGHTPARENS)) {
1856
+ /*
1857
+ * whats going on here
1858
+ * Some SQL functions like USER() have 0 args
1859
+ * if we get User(foo), then User is not a function
1860
+ * This should be expanded since it eliminated a lot of false
1861
+ * positives.
1862
+ */
1863
+ if (cstrcasecmp("USER", sf->tokenvec[left].val,
1864
+ sf->tokenvec[left].len) == 0) {
1865
+ sf->tokenvec[left].type = TYPE_BAREWORD;
1866
+ }
1867
+ }
1868
+
1869
+ /* no folding -- assume left-most token is
1870
+ is good, now use the existing 2 tokens --
1871
+ do not get another
1872
+ */
1873
+
1874
+ left += 1;
1875
+
1876
+ } /* while(1) */
1877
+
1878
+ /* if we have 4 or less tokens, and we had a comment token
1879
+ * at the end, add it back
1880
+ */
1881
+
1882
+ if (left < LIBINJECTION_SQLI_MAX_TOKENS &&
1883
+ last_comment.type == TYPE_COMMENT) {
1884
+ st_copy(&sf->tokenvec[left], &last_comment);
1885
+ left += 1;
1886
+ }
1887
+
1888
+ /* sometimes we grab a 6th token to help
1889
+ determine the type of token 5.
1890
+ */
1891
+ if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
1892
+ left = LIBINJECTION_SQLI_MAX_TOKENS;
1893
+ }
1894
+
1895
+ return (int)left;
1896
+ }
1897
+
1898
+ /* secondary api: detects SQLi in a string, GIVEN a context.
1899
+ *
1900
+ * A context can be:
1901
+ * * CHAR_NULL (\0), process as is
1902
+ * * CHAR_SINGLE ('), process pretending input started with a
1903
+ * single quote.
1904
+ * * CHAR_DOUBLE ("), process pretending input started with a
1905
+ * double quote.
1906
+ *
1907
+ */
1908
+ const char *
1909
+ libinjection_sqli_fingerprint(struct libinjection_sqli_state *sql_state,
1910
+ int flags) {
1911
+ int i;
1912
+ int tlen = 0;
1913
+
1914
+ libinjection_sqli_reset(sql_state, flags);
1915
+
1916
+ tlen = libinjection_sqli_fold(sql_state);
1917
+
1918
+ /* Check for magic PHP backquote comment
1919
+ * If:
1920
+ * * last token is of type "bareword"
1921
+ * * And is quoted in a backtick
1922
+ * * And isn't closed
1923
+ * * And it's empty?
1924
+ * Then convert it to comment
1925
+ */
1926
+ if (tlen > 2 && sql_state->tokenvec[tlen - 1].type == TYPE_BAREWORD &&
1927
+ sql_state->tokenvec[tlen - 1].str_open == CHAR_TICK &&
1928
+ sql_state->tokenvec[tlen - 1].len == 0 &&
1929
+ sql_state->tokenvec[tlen - 1].str_close == CHAR_NULL) {
1930
+ sql_state->tokenvec[tlen - 1].type = TYPE_COMMENT;
1931
+ }
1932
+
1933
+ for (i = 0; i < tlen; ++i) {
1934
+ sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
1935
+ }
1936
+
1937
+ /*
1938
+ * make the fingerprint pattern a c-string (null delimited)
1939
+ */
1940
+ sql_state->fingerprint[tlen] = CHAR_NULL;
1941
+
1942
+ /*
1943
+ * check for 'X' in pattern, and then
1944
+ * clear out all tokens
1945
+ *
1946
+ * this means parsing could not be done
1947
+ * accurately due to pgsql's double comments
1948
+ * or other syntax that isn't consistent.
1949
+ * Should be very rare false positive
1950
+ */
1951
+ if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
1952
+ /* needed for SWIG */
1953
+ memset((void *)sql_state->fingerprint, 0,
1954
+ LIBINJECTION_SQLI_MAX_TOKENS + 1);
1955
+ memset((void *)sql_state->tokenvec[0].val, 0,
1956
+ LIBINJECTION_SQLI_TOKEN_SIZE);
1957
+
1958
+ sql_state->fingerprint[0] = TYPE_EVIL;
1959
+
1960
+ sql_state->tokenvec[0].type = TYPE_EVIL;
1961
+ sql_state->tokenvec[0].val[0] = TYPE_EVIL;
1962
+ sql_state->tokenvec[1].type = CHAR_NULL;
1963
+ }
1964
+
1965
+ return sql_state->fingerprint;
1966
+ }
1967
+
1968
+ int libinjection_sqli_check_fingerprint(
1969
+ struct libinjection_sqli_state *sql_state) {
1970
+ return libinjection_sqli_blacklist(sql_state) &&
1971
+ libinjection_sqli_not_whitelist(sql_state);
1972
+ }
1973
+
1974
+ static char
1975
+ libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state,
1976
+ int lookup_type, const char *str, size_t len) {
1977
+ if (lookup_type == LOOKUP_FINGERPRINT) {
1978
+ return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
1979
+ } else {
1980
+ return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
1981
+ }
1982
+ }
1983
+
1984
+ static int
1985
+ libinjection_sqli_blacklist(struct libinjection_sqli_state *sql_state) {
1986
+ /*
1987
+ * use minimum of 8 bytes to make sure gcc -fstack-protector
1988
+ * works correctly
1989
+ */
1990
+ char fp2[8];
1991
+ char ch;
1992
+ size_t i;
1993
+ size_t len = strlen(sql_state->fingerprint);
1994
+ int patmatch;
1995
+
1996
+ if (len < 1) {
1997
+ sql_state->reason = __LINE__;
1998
+ return FALSE;
1999
+ }
2000
+
2001
+ /*
2002
+ to keep everything compatible, convert the
2003
+ v0 fingerprint pattern to v1
2004
+ v0: up to 5 chars, mixed case
2005
+ v1: 1 char is '0', up to 5 more chars, upper case
2006
+ */
2007
+
2008
+ fp2[0] = '0';
2009
+ for (i = 0; i < len; ++i) {
2010
+ ch = sql_state->fingerprint[i];
2011
+ if (ch >= 'a' && ch <= 'z') {
2012
+ ch -= 0x20;
2013
+ }
2014
+ fp2[i + 1] = ch;
2015
+ }
2016
+ fp2[i + 1] = '\0';
2017
+
2018
+ patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
2019
+
2020
+ /*
2021
+ * No match.
2022
+ *
2023
+ * Set sql_state->reason to current line number
2024
+ * only for debugging purposes.
2025
+ */
2026
+ if (!patmatch) {
2027
+ sql_state->reason = __LINE__;
2028
+ return FALSE;
2029
+ }
2030
+
2031
+ return TRUE;
2032
+ }
2033
+
2034
+ /*
2035
+ * return TRUE if SQLi, false is benign
2036
+ */
2037
+ static int
2038
+ libinjection_sqli_not_whitelist(struct libinjection_sqli_state *sql_state) {
2039
+ /*
2040
+ * We assume we got a SQLi match
2041
+ * This next part just helps reduce false positives.
2042
+ *
2043
+ */
2044
+ char ch;
2045
+ size_t tlen = strlen(sql_state->fingerprint);
2046
+
2047
+ if (tlen > 1 && sql_state->fingerprint[tlen - 1] == TYPE_COMMENT) {
2048
+ /*
2049
+ * if ending comment is contains 'sp_password' then it's SQLi!
2050
+ * MS Audit log apparently ignores anything with
2051
+ * 'sp_password' in it. Unable to find primary reference to
2052
+ * this "feature" of SQL Server but seems to be known SQLi
2053
+ * technique
2054
+ */
2055
+ if (my_memmem(sql_state->s, sql_state->slen, "sp_password",
2056
+ strlen("sp_password"))) {
2057
+ sql_state->reason = __LINE__;
2058
+ return TRUE;
2059
+ }
2060
+ }
2061
+
2062
+ switch (tlen) {
2063
+ case 2: {
2064
+ /*
2065
+ * case 2 are "very small SQLi" which make them
2066
+ * hard to tell from normal input...
2067
+ */
2068
+
2069
+ if (sql_state->fingerprint[1] == TYPE_UNION) {
2070
+ if (sql_state->stats_tokens == 2) {
2071
+ /* not sure why but 1U comes up in SQLi attack
2072
+ * likely part of parameter splitting/etc.
2073
+ * lots of reasons why "1 union" might be normal
2074
+ * input, so beep only if other SQLi things are present
2075
+ */
2076
+ /* it really is a number and 'union'
2077
+ * other wise it has folding or comments
2078
+ */
2079
+ sql_state->reason = __LINE__;
2080
+ return FALSE;
2081
+ } else {
2082
+ sql_state->reason = __LINE__;
2083
+ return TRUE;
2084
+ }
2085
+ }
2086
+ /*
2087
+ * if 'comment' is '#' ignore.. too many FP
2088
+ */
2089
+ if (sql_state->tokenvec[1].val[0] == '#') {
2090
+ sql_state->reason = __LINE__;
2091
+ return FALSE;
2092
+ }
2093
+
2094
+ /*
2095
+ * for fingerprint like 'nc', only comments of /x are treated
2096
+ * as SQL... ending comments of "--" and "#" are not SQLi
2097
+ */
2098
+ if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
2099
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2100
+ sql_state->tokenvec[1].val[0] != '/') {
2101
+ sql_state->reason = __LINE__;
2102
+ return FALSE;
2103
+ }
2104
+
2105
+ /*
2106
+ * if '1c' ends with '/x' then it's SQLi
2107
+ */
2108
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2109
+ sql_state->tokenvec[1].type == TYPE_COMMENT &&
2110
+ sql_state->tokenvec[1].val[0] == '/') {
2111
+ return TRUE;
2112
+ }
2113
+
2114
+ /**
2115
+ * there are some odd base64-looking query string values
2116
+ * 1234-ABCDEFEhfhihwuefi--
2117
+ * which evaluate to "1c"... these are not SQLi
2118
+ * but 1234-- probably is.
2119
+ * Make sure the "1" in "1c" is actually a true decimal number
2120
+ *
2121
+ * Need to check -original- string since the folding step
2122
+ * may have merged tokens, e.g. "1+FOO" is folded into "1"
2123
+ *
2124
+ * Note: evasion: 1*1--
2125
+ */
2126
+ if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2127
+ sql_state->tokenvec[1].type == TYPE_COMMENT) {
2128
+ if (sql_state->stats_tokens > 2) {
2129
+ /* we have some folding going on, highly likely SQLi */
2130
+ sql_state->reason = __LINE__;
2131
+ return TRUE;
2132
+ }
2133
+ /*
2134
+ * we check that next character after the number is either
2135
+ * whitespace, or '/' or a '-' ==> SQLi.
2136
+ */
2137
+ ch = sql_state->s[sql_state->tokenvec[0].len];
2138
+ if (ch <= 32) {
2139
+ /* next char was whitespace,e.g. "1234 --"
2140
+ * this isn't exactly correct.. ideally we should skip over all
2141
+ * whitespace but this seems to be ok for now
2142
+ */
2143
+ return TRUE;
2144
+ }
2145
+ if (ch == '/' &&
2146
+ sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
2147
+ return TRUE;
2148
+ }
2149
+ if (ch == '-' &&
2150
+ sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
2151
+ return TRUE;
2152
+ }
2153
+
2154
+ sql_state->reason = __LINE__;
2155
+ return FALSE;
2156
+ }
2157
+
2158
+ /*
2159
+ * detect obvious SQLi scans.. many people put '--' in plain text
2160
+ * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
2161
+ */
2162
+ if ((sql_state->tokenvec[1].len > 2) &&
2163
+ sql_state->tokenvec[1].val[0] == '-') {
2164
+ sql_state->reason = __LINE__;
2165
+ return FALSE;
2166
+ }
2167
+
2168
+ break;
2169
+ } /* case 2 */
2170
+ case 3: {
2171
+ /*
2172
+ * ...foo' + 'bar...
2173
+ * no opening quote, no closing quote
2174
+ * and each string has data
2175
+ */
2176
+
2177
+ if (streq(sql_state->fingerprint, "sos") ||
2178
+ streq(sql_state->fingerprint, "s&s")) {
2179
+
2180
+ if ((sql_state->tokenvec[0].str_open == CHAR_NULL) &&
2181
+ (sql_state->tokenvec[2].str_close == CHAR_NULL) &&
2182
+ (sql_state->tokenvec[0].str_close ==
2183
+ sql_state->tokenvec[2].str_open)) {
2184
+ /*
2185
+ * if ....foo" + "bar....
2186
+ */
2187
+ sql_state->reason = __LINE__;
2188
+ return TRUE;
2189
+ }
2190
+ if (sql_state->stats_tokens == 3) {
2191
+ sql_state->reason = __LINE__;
2192
+ return FALSE;
2193
+ }
2194
+
2195
+ /*
2196
+ * not SQLi
2197
+ */
2198
+ sql_state->reason = __LINE__;
2199
+ return FALSE;
2200
+ } else if (streq(sql_state->fingerprint, "s&n") ||
2201
+ streq(sql_state->fingerprint, "n&1") ||
2202
+ streq(sql_state->fingerprint, "1&1") ||
2203
+ streq(sql_state->fingerprint, "1&v") ||
2204
+ streq(sql_state->fingerprint, "1&s")) {
2205
+ /* 'sexy and 17' not SQLi
2206
+ * 'sexy and 17<18' SQLi
2207
+ */
2208
+ if (sql_state->stats_tokens == 3) {
2209
+ sql_state->reason = __LINE__;
2210
+ return FALSE;
2211
+ }
2212
+ } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
2213
+ if ((sql_state->tokenvec[1].len < 5) ||
2214
+ cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
2215
+ /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
2216
+ * then treat as safe
2217
+ */
2218
+ sql_state->reason = __LINE__;
2219
+ return FALSE;
2220
+ }
2221
+ }
2222
+ break;
2223
+ } /* case 3 */
2224
+ case 4:
2225
+ case 5: {
2226
+ /* nothing right now */
2227
+ break;
2228
+ } /* case 5 */
2229
+ } /* end switch */
2230
+
2231
+ return TRUE;
2232
+ }
2233
+
2234
+ /** Main API, detects SQLi in an input.
2235
+ *
2236
+ *
2237
+ */
2238
+ static int
2239
+ reparse_as_mysql(struct libinjection_sqli_state
2240
+ *sql_state) { // cppcheck-suppress constParameterPointer
2241
+ return sql_state->stats_comment_ddx || sql_state->stats_comment_hash;
2242
+ }
2243
+
2244
+ /*
2245
+ * This function is mostly use with SWIG
2246
+ */
2247
+ struct libinjection_sqli_token *
2248
+ libinjection_sqli_get_token(struct libinjection_sqli_state *sql_state, int i) {
2249
+ if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
2250
+ return NULL;
2251
+ }
2252
+ return &(sql_state->tokenvec[i]);
2253
+ }
2254
+
2255
+ int libinjection_is_sqli(struct libinjection_sqli_state *sql_state) {
2256
+ const char *s = sql_state->s;
2257
+ size_t slen = sql_state->slen;
2258
+
2259
+ /*
2260
+ * no input? not SQLi
2261
+ */
2262
+ if (slen == 0) {
2263
+ return FALSE;
2264
+ }
2265
+
2266
+ /*
2267
+ * test input "as-is"
2268
+ */
2269
+ libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
2270
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT, sql_state->fingerprint,
2271
+ strlen(sql_state->fingerprint))) {
2272
+ return TRUE;
2273
+ } else if (reparse_as_mysql(sql_state)) {
2274
+ libinjection_sqli_fingerprint(sql_state,
2275
+ FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
2276
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2277
+ sql_state->fingerprint,
2278
+ strlen(sql_state->fingerprint))) {
2279
+ return TRUE;
2280
+ }
2281
+ }
2282
+
2283
+ /*
2284
+ * if input has a single_quote, then
2285
+ * test as if input was actually '
2286
+ * example: if input if "1' = 1", then pretend it's
2287
+ * "'1' = 1"
2288
+ * Porting Notes: example the same as doing
2289
+ * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
2290
+ *
2291
+ */
2292
+ if (memchr(s, CHAR_SINGLE, slen)) {
2293
+ libinjection_sqli_fingerprint(sql_state,
2294
+ FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
2295
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2296
+ sql_state->fingerprint,
2297
+ strlen(sql_state->fingerprint))) {
2298
+ return TRUE;
2299
+ } else if (reparse_as_mysql(sql_state)) {
2300
+ libinjection_sqli_fingerprint(sql_state,
2301
+ FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
2302
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2303
+ sql_state->fingerprint,
2304
+ strlen(sql_state->fingerprint))) {
2305
+ return TRUE;
2306
+ }
2307
+ }
2308
+ }
2309
+
2310
+ /*
2311
+ * same as above but with a double-quote "
2312
+ */
2313
+ if (memchr(s, CHAR_DOUBLE, slen)) {
2314
+ libinjection_sqli_fingerprint(sql_state,
2315
+ FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
2316
+ if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2317
+ sql_state->fingerprint,
2318
+ strlen(sql_state->fingerprint))) {
2319
+ return TRUE;
2320
+ }
2321
+ }
2322
+
2323
+ /*
2324
+ * Hurray, input is not SQLi
2325
+ */
2326
+ return FALSE;
2327
+ }
2328
+
2329
+ injection_result_t libinjection_sqli(const char *s, size_t slen,
2330
+ char fingerprint[]) {
2331
+ int issqli;
2332
+ struct libinjection_sqli_state state;
2333
+
2334
+ libinjection_sqli_init(&state, s, slen, 0);
2335
+ issqli = libinjection_is_sqli(&state);
2336
+ if (issqli) {
2337
+ strcpy(fingerprint, state.fingerprint);
2338
+ } else {
2339
+ fingerprint[0] = '\0';
2340
+ }
2341
+ return issqli;
2342
+ }