threatstack-agent-ruby 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +6 -0
  4. data/ext/libinjection/extconf.rb +4 -0
  5. data/ext/libinjection/libinjection.h +65 -0
  6. data/ext/libinjection/libinjection.i +13 -0
  7. data/ext/libinjection/libinjection_html5.c +850 -0
  8. data/ext/libinjection/libinjection_html5.h +54 -0
  9. data/ext/libinjection/libinjection_sqli.c +2325 -0
  10. data/ext/libinjection/libinjection_sqli.h +298 -0
  11. data/ext/libinjection/libinjection_sqli_data.h +9654 -0
  12. data/ext/libinjection/libinjection_wrap.c +2393 -0
  13. data/ext/libinjection/libinjection_xss.c +532 -0
  14. data/ext/libinjection/libinjection_xss.h +21 -0
  15. data/lib/constants.rb +110 -0
  16. data/lib/control.rb +61 -0
  17. data/lib/events/event_accumulator.rb +36 -0
  18. data/lib/events/models/attack_event.rb +58 -0
  19. data/lib/events/models/base_event.rb +41 -0
  20. data/lib/events/models/dependency_event.rb +93 -0
  21. data/lib/events/models/environment_event.rb +93 -0
  22. data/lib/events/models/instrumentation_event.rb +46 -0
  23. data/lib/exceptions/request_blocked_error.rb +11 -0
  24. data/lib/instrumentation/common.rb +172 -0
  25. data/lib/instrumentation/instrumenter.rb +144 -0
  26. data/lib/instrumentation/kernel.rb +45 -0
  27. data/lib/instrumentation/rails.rb +61 -0
  28. data/lib/jobs/delayed_job.rb +26 -0
  29. data/lib/jobs/event_submitter.rb +101 -0
  30. data/lib/jobs/job_queue.rb +38 -0
  31. data/lib/jobs/recurrent_job.rb +61 -0
  32. data/lib/threatstack-agent-ruby.rb +7 -0
  33. data/lib/utils/aws_utils.rb +46 -0
  34. data/lib/utils/formatter.rb +47 -0
  35. data/lib/utils/logger.rb +43 -0
  36. data/threatstack-agent-ruby.gemspec +35 -0
  37. metadata +221 -0
@@ -0,0 +1,532 @@
1
+
2
+ #include "libinjection.h"
3
+ #include "libinjection_xss.h"
4
+ #include "libinjection_html5.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdio.h>
8
+
9
+ typedef enum attribute {
10
+ TYPE_NONE
11
+ , TYPE_BLACK /* ban always */
12
+ , TYPE_ATTR_URL /* attribute value takes a URL-like object */
13
+ , TYPE_STYLE
14
+ , TYPE_ATTR_INDIRECT /* attribute *name* is given in *value* */
15
+ } attribute_t;
16
+
17
+
18
+ static attribute_t is_black_attr(const char* s, size_t len);
19
+ static int is_black_tag(const char* s, size_t len);
20
+ static int is_black_url(const char* s, size_t len);
21
+ static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
22
+ static int html_decode_char_at(const char* src, size_t len, size_t* consumed);
23
+ static int htmlencode_startswith(const char* prefix, const char *src, size_t n);
24
+
25
+
26
+ typedef struct stringtype {
27
+ const char* name;
28
+ attribute_t atype;
29
+ } stringtype_t;
30
+
31
+
32
+ static const int gsHexDecodeMap[256] = {
33
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
34
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
35
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
36
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
37
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256,
38
+ 256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256,
39
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
40
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
41
+ 256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256,
42
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
43
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
44
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
45
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
46
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
47
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
48
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
49
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
50
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
51
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
52
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
53
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
54
+ 256, 256, 256, 256
55
+ };
56
+
57
+ static int html_decode_char_at(const char* src, size_t len, size_t* consumed)
58
+ {
59
+ int val = 0;
60
+ size_t i;
61
+ int ch;
62
+
63
+ if (len == 0 || src == NULL) {
64
+ *consumed = 0;
65
+ return -1;
66
+ }
67
+
68
+ *consumed = 1;
69
+ if (*src != '&' || len < 2) {
70
+ return (unsigned char)(*src);
71
+ }
72
+
73
+
74
+ if (*(src+1) != '#') {
75
+ /* normally this would be for named entities
76
+ * but for this case we don't actually care
77
+ */
78
+ return '&';
79
+ }
80
+
81
+ if (*(src+2) == 'x' || *(src+2) == 'X') {
82
+ ch = (unsigned char) (*(src+3));
83
+ ch = gsHexDecodeMap[ch];
84
+ if (ch == 256) {
85
+ /* degenerate case '&#[?]' */
86
+ return '&';
87
+ }
88
+ val = ch;
89
+ i = 4;
90
+ while (i < len) {
91
+ ch = (unsigned char) src[i];
92
+ if (ch == ';') {
93
+ *consumed = i + 1;
94
+ return val;
95
+ }
96
+ ch = gsHexDecodeMap[ch];
97
+ if (ch == 256) {
98
+ *consumed = i;
99
+ return val;
100
+ }
101
+ val = (val * 16) + ch;
102
+ if (val > 0x1000FF) {
103
+ return '&';
104
+ }
105
+ ++i;
106
+ }
107
+ *consumed = i;
108
+ return val;
109
+ } else {
110
+ i = 2;
111
+ ch = (unsigned char) src[i];
112
+ if (ch < '0' || ch > '9') {
113
+ return '&';
114
+ }
115
+ val = ch - '0';
116
+ i += 1;
117
+ while (i < len) {
118
+ ch = (unsigned char) src[i];
119
+ if (ch == ';') {
120
+ *consumed = i + 1;
121
+ return val;
122
+ }
123
+ if (ch < '0' || ch > '9') {
124
+ *consumed = i;
125
+ return val;
126
+ }
127
+ val = (val * 10) + (ch - '0');
128
+ if (val > 0x1000FF) {
129
+ return '&';
130
+ }
131
+ ++i;
132
+ }
133
+ *consumed = i;
134
+ return val;
135
+ }
136
+ }
137
+
138
+
139
+ /*
140
+ * view-source:
141
+ * data:
142
+ * javascript:
143
+ */
144
+ static stringtype_t BLACKATTR[] = {
145
+ { "ACTION", TYPE_ATTR_URL } /* form */
146
+ , { "ATTRIBUTENAME", TYPE_ATTR_INDIRECT } /* SVG allow indirection of attribute names */
147
+ , { "BY", TYPE_ATTR_URL } /* SVG */
148
+ , { "BACKGROUND", TYPE_ATTR_URL } /* IE6, O11 */
149
+ , { "DATAFORMATAS", TYPE_BLACK } /* IE */
150
+ , { "DATASRC", TYPE_BLACK } /* IE */
151
+ , { "DYNSRC", TYPE_ATTR_URL } /* Obsolete img attribute */
152
+ , { "FILTER", TYPE_STYLE } /* Opera, SVG inline style */
153
+ , { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */
154
+ , { "FOLDER", TYPE_ATTR_URL } /* Only on A tags, IE-only */
155
+ , { "FROM", TYPE_ATTR_URL } /* SVG */
156
+ , { "HANDLER", TYPE_ATTR_URL } /* SVG Tiny, Opera */
157
+ , { "HREF", TYPE_ATTR_URL }
158
+ , { "LOWSRC", TYPE_ATTR_URL } /* Obsolete img attribute */
159
+ , { "POSTER", TYPE_ATTR_URL } /* Opera 10,11 */
160
+ , { "SRC", TYPE_ATTR_URL }
161
+ , { "STYLE", TYPE_STYLE }
162
+ , { "TO", TYPE_ATTR_URL } /* SVG */
163
+ , { "VALUES", TYPE_ATTR_URL } /* SVG */
164
+ , { "XLINK:HREF", TYPE_ATTR_URL }
165
+ , { NULL, TYPE_NONE }
166
+ };
167
+
168
+ /* xmlns */
169
+ /* `xml-stylesheet` > <eval>, <if expr=> */
170
+
171
+ /*
172
+ static const char* BLACKATTR[] = {
173
+ "ATTRIBUTENAME",
174
+ "BACKGROUND",
175
+ "DATAFORMATAS",
176
+ "HREF",
177
+ "SCROLL",
178
+ "SRC",
179
+ "STYLE",
180
+ "SRCDOC",
181
+ NULL
182
+ };
183
+ */
184
+
185
+ static const char* BLACKTAG[] = {
186
+ "APPLET"
187
+ /* , "AUDIO" */
188
+ , "BASE"
189
+ , "COMMENT" /* IE http://html5sec.org/#38 */
190
+ , "EMBED"
191
+ /* , "FORM" */
192
+ , "FRAME"
193
+ , "FRAMESET"
194
+ , "HANDLER" /* Opera SVG, effectively a script tag */
195
+ , "IFRAME"
196
+ , "IMPORT"
197
+ , "ISINDEX"
198
+ , "LINK"
199
+ , "LISTENER"
200
+ /* , "MARQUEE" */
201
+ , "META"
202
+ , "NOSCRIPT"
203
+ , "OBJECT"
204
+ , "SCRIPT"
205
+ , "STYLE"
206
+ /* , "VIDEO" */
207
+ , "VMLFRAME"
208
+ , "XML"
209
+ , "XSS"
210
+ , NULL
211
+ };
212
+
213
+
214
+ static int cstrcasecmp_with_null(const char *a, const char *b, size_t n)
215
+ {
216
+ char ca;
217
+ char cb;
218
+ /* printf("Comparing to %s %.*s\n", a, (int)n, b); */
219
+ while (n-- > 0) {
220
+ cb = *b++;
221
+ if (cb == '\0') continue;
222
+
223
+ ca = *a++;
224
+
225
+ if (cb >= 'a' && cb <= 'z') {
226
+ cb -= 0x20;
227
+ }
228
+ /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */
229
+ if (ca != cb) {
230
+ return 1;
231
+ }
232
+ }
233
+
234
+ if (*a == 0) {
235
+ /* printf(" MATCH \n"); */
236
+ return 0;
237
+ } else {
238
+ return 1;
239
+ }
240
+ }
241
+
242
+ /*
243
+ * Does an HTML encoded binary string (const char*, length) start with
244
+ * a all uppercase c-string (null terminated), case insensitive!
245
+ *
246
+ * also ignore any embedded nulls in the HTML string!
247
+ *
248
+ * return 1 if match / starts with
249
+ * return 0 if not
250
+ */
251
+ static int htmlencode_startswith(const char *a, const char *b, size_t n)
252
+ {
253
+ size_t consumed;
254
+ int cb;
255
+ int first = 1;
256
+ /* printf("Comparing %s with %.*s\n", a,(int)n,b); */
257
+ while (n > 0) {
258
+ if (*a == 0) {
259
+ /* printf("Match EOL!\n"); */
260
+ return 1;
261
+ }
262
+ cb = html_decode_char_at(b, n, &consumed);
263
+ b += consumed;
264
+ n -= consumed;
265
+
266
+ if (first && cb <= 32) {
267
+ /* ignore all leading whitespace and control characters */
268
+ continue;
269
+ }
270
+ first = 0;
271
+
272
+ if (cb == 0) {
273
+ /* always ignore null characters in user input */
274
+ continue;
275
+ }
276
+
277
+ if (cb == 10) {
278
+ /* always ignore vertical tab characters in user input */
279
+ /* who allows this?? */
280
+ continue;
281
+ }
282
+
283
+ if (cb >= 'a' && cb <= 'z') {
284
+ /* upcase */
285
+ cb -= 0x20;
286
+ }
287
+
288
+ if (*a != (char) cb) {
289
+ /* printf(" %c != %c\n", *a, cb); */
290
+ /* mismatch */
291
+ return 0;
292
+ }
293
+ a++;
294
+ }
295
+
296
+ return (*a == 0) ? 1 : 0;
297
+ }
298
+
299
+ static int is_black_tag(const char* s, size_t len)
300
+ {
301
+ const char** black;
302
+
303
+ if (len < 3) {
304
+ return 0;
305
+ }
306
+
307
+ black = BLACKTAG;
308
+ while (*black != NULL) {
309
+ if (cstrcasecmp_with_null(*black, s, len) == 0) {
310
+ /* printf("Got black tag %s\n", *black); */
311
+ return 1;
312
+ }
313
+ black += 1;
314
+ }
315
+
316
+ /* anything SVG related */
317
+ if ((s[0] == 's' || s[0] == 'S') &&
318
+ (s[1] == 'v' || s[1] == 'V') &&
319
+ (s[2] == 'g' || s[2] == 'G')) {
320
+ /* printf("Got SVG tag \n"); */
321
+ return 1;
322
+ }
323
+
324
+ /* Anything XSL(t) related */
325
+ if ((s[0] == 'x' || s[0] == 'X') &&
326
+ (s[1] == 's' || s[1] == 'S') &&
327
+ (s[2] == 'l' || s[2] == 'L')) {
328
+ /* printf("Got XSL tag\n"); */
329
+ return 1;
330
+ }
331
+
332
+ return 0;
333
+ }
334
+
335
+ static attribute_t is_black_attr(const char* s, size_t len)
336
+ {
337
+ stringtype_t* black;
338
+
339
+ if (len < 2) {
340
+ return TYPE_NONE;
341
+ }
342
+
343
+ if (len >= 5) {
344
+ /* JavaScript on.* */
345
+ if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
346
+ /* printf("Got JavaScript on- attribute name\n"); */
347
+ return TYPE_BLACK;
348
+ }
349
+
350
+
351
+
352
+ /* XMLNS can be used to create arbitrary tags */
353
+ if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) {
354
+ /* printf("Got XMLNS and XLINK tags\n"); */
355
+ return TYPE_BLACK;
356
+ }
357
+ }
358
+
359
+ black = BLACKATTR;
360
+ while (black->name != NULL) {
361
+ if (cstrcasecmp_with_null(black->name, s, len) == 0) {
362
+ /* printf("Got banned attribute name %s\n", black->name); */
363
+ return black->atype;
364
+ }
365
+ black += 1;
366
+ }
367
+
368
+ return TYPE_NONE;
369
+ }
370
+
371
+ static int is_black_url(const char* s, size_t len)
372
+ {
373
+
374
+ static const char* data_url = "DATA";
375
+ static const char* viewsource_url = "VIEW-SOURCE";
376
+
377
+ /* obsolete but interesting signal */
378
+ static const char* vbscript_url = "VBSCRIPT";
379
+
380
+ /* covers JAVA, JAVASCRIPT, + colon */
381
+ static const char* javascript_url = "JAVA";
382
+
383
+ /* skip whitespace */
384
+ while (len > 0 && (*s <= 32 || *s >= 127)) {
385
+ /*
386
+ * HEY: this is a signed character.
387
+ * We are intentionally skipping high-bit characters too
388
+ * since they are not ASCII, and Opera sometimes uses UTF-8 whitespace.
389
+ *
390
+ * Also in EUC-JP some of the high bytes are just ignored.
391
+ */
392
+ ++s;
393
+ --len;
394
+ }
395
+
396
+ if (htmlencode_startswith(data_url, s, len)) {
397
+ return 1;
398
+ }
399
+
400
+ if (htmlencode_startswith(viewsource_url, s, len)) {
401
+ return 1;
402
+ }
403
+
404
+ if (htmlencode_startswith(javascript_url, s, len)) {
405
+ return 1;
406
+ }
407
+
408
+ if (htmlencode_startswith(vbscript_url, s, len)) {
409
+ return 1;
410
+ }
411
+ return 0;
412
+ }
413
+
414
+ int libinjection_is_xss(const char* s, size_t len, int flags)
415
+ {
416
+ h5_state_t h5;
417
+ attribute_t attr = TYPE_NONE;
418
+
419
+ libinjection_h5_init(&h5, s, len, (enum html5_flags) flags);
420
+ while (libinjection_h5_next(&h5)) {
421
+ if (h5.token_type != ATTR_VALUE) {
422
+ attr = TYPE_NONE;
423
+ }
424
+
425
+ if (h5.token_type == DOCTYPE) {
426
+ return 1;
427
+ } else if (h5.token_type == TAG_NAME_OPEN) {
428
+ if (is_black_tag(h5.token_start, h5.token_len)) {
429
+ return 1;
430
+ }
431
+ } else if (h5.token_type == ATTR_NAME) {
432
+ attr = is_black_attr(h5.token_start, h5.token_len);
433
+ } else if (h5.token_type == ATTR_VALUE) {
434
+ /*
435
+ * IE6,7,8 parsing works a bit differently so
436
+ * a whole <script> or other black tag might be hiding
437
+ * inside an attribute value under HTML 5 parsing
438
+ * See http://html5sec.org/#102
439
+ * to avoid doing a full reparse of the value, just
440
+ * look for "<". This probably need adjusting to
441
+ * handle escaped characters
442
+ */
443
+ /*
444
+ if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
445
+ return 1;
446
+ }
447
+ */
448
+
449
+ switch (attr) {
450
+ case TYPE_NONE:
451
+ break;
452
+ case TYPE_BLACK:
453
+ return 1;
454
+ case TYPE_ATTR_URL:
455
+ if (is_black_url(h5.token_start, h5.token_len)) {
456
+ return 1;
457
+ }
458
+ break;
459
+ case TYPE_STYLE:
460
+ return 1;
461
+ case TYPE_ATTR_INDIRECT:
462
+ /* an attribute name is specified in a _value_ */
463
+ if (is_black_attr(h5.token_start, h5.token_len)) {
464
+ return 1;
465
+ }
466
+ break;
467
+ /*
468
+ default:
469
+ assert(0);
470
+ */
471
+ }
472
+ attr = TYPE_NONE;
473
+ } else if (h5.token_type == TAG_COMMENT) {
474
+ /* IE uses a "`" as a tag ending char */
475
+ if (memchr(h5.token_start, '`', h5.token_len) != NULL) {
476
+ return 1;
477
+ }
478
+
479
+ /* IE conditional comment */
480
+ if (h5.token_len > 3) {
481
+ if (h5.token_start[0] == '[' &&
482
+ (h5.token_start[1] == 'i' || h5.token_start[1] == 'I') &&
483
+ (h5.token_start[2] == 'f' || h5.token_start[2] == 'F')) {
484
+ return 1;
485
+ }
486
+ if ((h5.token_start[0] == 'x' || h5.token_start[0] == 'X') &&
487
+ (h5.token_start[1] == 'm' || h5.token_start[1] == 'M') &&
488
+ (h5.token_start[2] == 'l' || h5.token_start[2] == 'L')) {
489
+ return 1;
490
+ }
491
+ }
492
+
493
+ if (h5.token_len > 5) {
494
+ /* IE <?import pseudo-tag */
495
+ if (cstrcasecmp_with_null("IMPORT", h5.token_start, 6) == 0) {
496
+ return 1;
497
+ }
498
+
499
+ /* XML Entity definition */
500
+ if (cstrcasecmp_with_null("ENTITY", h5.token_start, 6) == 0) {
501
+ return 1;
502
+ }
503
+ }
504
+ }
505
+ }
506
+ return 0;
507
+ }
508
+
509
+
510
+ /*
511
+ * wrapper
512
+ */
513
+ int libinjection_xss(const char* s, size_t len)
514
+ {
515
+ if (libinjection_is_xss(s, len, DATA_STATE)) {
516
+ return 1;
517
+ }
518
+ if (libinjection_is_xss(s, len, VALUE_NO_QUOTE)) {
519
+ return 1;
520
+ }
521
+ if (libinjection_is_xss(s, len, VALUE_SINGLE_QUOTE)) {
522
+ return 1;
523
+ }
524
+ if (libinjection_is_xss(s, len, VALUE_DOUBLE_QUOTE)) {
525
+ return 1;
526
+ }
527
+ if (libinjection_is_xss(s, len, VALUE_BACK_QUOTE)) {
528
+ return 1;
529
+ }
530
+
531
+ return 0;
532
+ }