threatstack-agent-ruby 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +6 -0
  4. data/ext/libinjection/extconf.rb +4 -0
  5. data/ext/libinjection/libinjection.h +65 -0
  6. data/ext/libinjection/libinjection.i +13 -0
  7. data/ext/libinjection/libinjection_html5.c +850 -0
  8. data/ext/libinjection/libinjection_html5.h +54 -0
  9. data/ext/libinjection/libinjection_sqli.c +2325 -0
  10. data/ext/libinjection/libinjection_sqli.h +298 -0
  11. data/ext/libinjection/libinjection_sqli_data.h +9654 -0
  12. data/ext/libinjection/libinjection_wrap.c +2393 -0
  13. data/ext/libinjection/libinjection_xss.c +532 -0
  14. data/ext/libinjection/libinjection_xss.h +21 -0
  15. data/lib/constants.rb +110 -0
  16. data/lib/control.rb +61 -0
  17. data/lib/events/event_accumulator.rb +36 -0
  18. data/lib/events/models/attack_event.rb +58 -0
  19. data/lib/events/models/base_event.rb +41 -0
  20. data/lib/events/models/dependency_event.rb +93 -0
  21. data/lib/events/models/environment_event.rb +93 -0
  22. data/lib/events/models/instrumentation_event.rb +46 -0
  23. data/lib/exceptions/request_blocked_error.rb +11 -0
  24. data/lib/instrumentation/common.rb +172 -0
  25. data/lib/instrumentation/instrumenter.rb +144 -0
  26. data/lib/instrumentation/kernel.rb +45 -0
  27. data/lib/instrumentation/rails.rb +61 -0
  28. data/lib/jobs/delayed_job.rb +26 -0
  29. data/lib/jobs/event_submitter.rb +101 -0
  30. data/lib/jobs/job_queue.rb +38 -0
  31. data/lib/jobs/recurrent_job.rb +61 -0
  32. data/lib/threatstack-agent-ruby.rb +7 -0
  33. data/lib/utils/aws_utils.rb +46 -0
  34. data/lib/utils/formatter.rb +47 -0
  35. data/lib/utils/logger.rb +43 -0
  36. data/threatstack-agent-ruby.gemspec +35 -0
  37. metadata +221 -0
@@ -0,0 +1,532 @@
1
+
2
+ #include "libinjection.h"
3
+ #include "libinjection_xss.h"
4
+ #include "libinjection_html5.h"
5
+
6
+ #include <assert.h>
7
+ #include <stdio.h>
8
+
9
+ typedef enum attribute {
10
+ TYPE_NONE
11
+ , TYPE_BLACK /* ban always */
12
+ , TYPE_ATTR_URL /* attribute value takes a URL-like object */
13
+ , TYPE_STYLE
14
+ , TYPE_ATTR_INDIRECT /* attribute *name* is given in *value* */
15
+ } attribute_t;
16
+
17
+
18
+ static attribute_t is_black_attr(const char* s, size_t len);
19
+ static int is_black_tag(const char* s, size_t len);
20
+ static int is_black_url(const char* s, size_t len);
21
+ static int cstrcasecmp_with_null(const char *a, const char *b, size_t n);
22
+ static int html_decode_char_at(const char* src, size_t len, size_t* consumed);
23
+ static int htmlencode_startswith(const char* prefix, const char *src, size_t n);
24
+
25
+
26
+ typedef struct stringtype {
27
+ const char* name;
28
+ attribute_t atype;
29
+ } stringtype_t;
30
+
31
+
32
+ static const int gsHexDecodeMap[256] = {
33
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
34
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
35
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
36
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
37
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 256, 256,
38
+ 256, 256, 256, 256, 256, 10, 11, 12, 13, 14, 15, 256,
39
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
40
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
41
+ 256, 10, 11, 12, 13, 14, 15, 256, 256, 256, 256, 256,
42
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
43
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
44
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
45
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
46
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
47
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
48
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
49
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
50
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
51
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
52
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
53
+ 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256,
54
+ 256, 256, 256, 256
55
+ };
56
+
57
+ static int html_decode_char_at(const char* src, size_t len, size_t* consumed)
58
+ {
59
+ int val = 0;
60
+ size_t i;
61
+ int ch;
62
+
63
+ if (len == 0 || src == NULL) {
64
+ *consumed = 0;
65
+ return -1;
66
+ }
67
+
68
+ *consumed = 1;
69
+ if (*src != '&' || len < 2) {
70
+ return (unsigned char)(*src);
71
+ }
72
+
73
+
74
+ if (*(src+1) != '#') {
75
+ /* normally this would be for named entities
76
+ * but for this case we don't actually care
77
+ */
78
+ return '&';
79
+ }
80
+
81
+ if (*(src+2) == 'x' || *(src+2) == 'X') {
82
+ ch = (unsigned char) (*(src+3));
83
+ ch = gsHexDecodeMap[ch];
84
+ if (ch == 256) {
85
+ /* degenerate case '&#[?]' */
86
+ return '&';
87
+ }
88
+ val = ch;
89
+ i = 4;
90
+ while (i < len) {
91
+ ch = (unsigned char) src[i];
92
+ if (ch == ';') {
93
+ *consumed = i + 1;
94
+ return val;
95
+ }
96
+ ch = gsHexDecodeMap[ch];
97
+ if (ch == 256) {
98
+ *consumed = i;
99
+ return val;
100
+ }
101
+ val = (val * 16) + ch;
102
+ if (val > 0x1000FF) {
103
+ return '&';
104
+ }
105
+ ++i;
106
+ }
107
+ *consumed = i;
108
+ return val;
109
+ } else {
110
+ i = 2;
111
+ ch = (unsigned char) src[i];
112
+ if (ch < '0' || ch > '9') {
113
+ return '&';
114
+ }
115
+ val = ch - '0';
116
+ i += 1;
117
+ while (i < len) {
118
+ ch = (unsigned char) src[i];
119
+ if (ch == ';') {
120
+ *consumed = i + 1;
121
+ return val;
122
+ }
123
+ if (ch < '0' || ch > '9') {
124
+ *consumed = i;
125
+ return val;
126
+ }
127
+ val = (val * 10) + (ch - '0');
128
+ if (val > 0x1000FF) {
129
+ return '&';
130
+ }
131
+ ++i;
132
+ }
133
+ *consumed = i;
134
+ return val;
135
+ }
136
+ }
137
+
138
+
139
+ /*
140
+ * view-source:
141
+ * data:
142
+ * javascript:
143
+ */
144
+ static stringtype_t BLACKATTR[] = {
145
+ { "ACTION", TYPE_ATTR_URL } /* form */
146
+ , { "ATTRIBUTENAME", TYPE_ATTR_INDIRECT } /* SVG allow indirection of attribute names */
147
+ , { "BY", TYPE_ATTR_URL } /* SVG */
148
+ , { "BACKGROUND", TYPE_ATTR_URL } /* IE6, O11 */
149
+ , { "DATAFORMATAS", TYPE_BLACK } /* IE */
150
+ , { "DATASRC", TYPE_BLACK } /* IE */
151
+ , { "DYNSRC", TYPE_ATTR_URL } /* Obsolete img attribute */
152
+ , { "FILTER", TYPE_STYLE } /* Opera, SVG inline style */
153
+ , { "FORMACTION", TYPE_ATTR_URL } /* HTML 5 */
154
+ , { "FOLDER", TYPE_ATTR_URL } /* Only on A tags, IE-only */
155
+ , { "FROM", TYPE_ATTR_URL } /* SVG */
156
+ , { "HANDLER", TYPE_ATTR_URL } /* SVG Tiny, Opera */
157
+ , { "HREF", TYPE_ATTR_URL }
158
+ , { "LOWSRC", TYPE_ATTR_URL } /* Obsolete img attribute */
159
+ , { "POSTER", TYPE_ATTR_URL } /* Opera 10,11 */
160
+ , { "SRC", TYPE_ATTR_URL }
161
+ , { "STYLE", TYPE_STYLE }
162
+ , { "TO", TYPE_ATTR_URL } /* SVG */
163
+ , { "VALUES", TYPE_ATTR_URL } /* SVG */
164
+ , { "XLINK:HREF", TYPE_ATTR_URL }
165
+ , { NULL, TYPE_NONE }
166
+ };
167
+
168
+ /* xmlns */
169
+ /* `xml-stylesheet` > <eval>, <if expr=> */
170
+
171
+ /*
172
+ static const char* BLACKATTR[] = {
173
+ "ATTRIBUTENAME",
174
+ "BACKGROUND",
175
+ "DATAFORMATAS",
176
+ "HREF",
177
+ "SCROLL",
178
+ "SRC",
179
+ "STYLE",
180
+ "SRCDOC",
181
+ NULL
182
+ };
183
+ */
184
+
185
+ static const char* BLACKTAG[] = {
186
+ "APPLET"
187
+ /* , "AUDIO" */
188
+ , "BASE"
189
+ , "COMMENT" /* IE http://html5sec.org/#38 */
190
+ , "EMBED"
191
+ /* , "FORM" */
192
+ , "FRAME"
193
+ , "FRAMESET"
194
+ , "HANDLER" /* Opera SVG, effectively a script tag */
195
+ , "IFRAME"
196
+ , "IMPORT"
197
+ , "ISINDEX"
198
+ , "LINK"
199
+ , "LISTENER"
200
+ /* , "MARQUEE" */
201
+ , "META"
202
+ , "NOSCRIPT"
203
+ , "OBJECT"
204
+ , "SCRIPT"
205
+ , "STYLE"
206
+ /* , "VIDEO" */
207
+ , "VMLFRAME"
208
+ , "XML"
209
+ , "XSS"
210
+ , NULL
211
+ };
212
+
213
+
214
+ static int cstrcasecmp_with_null(const char *a, const char *b, size_t n)
215
+ {
216
+ char ca;
217
+ char cb;
218
+ /* printf("Comparing to %s %.*s\n", a, (int)n, b); */
219
+ while (n-- > 0) {
220
+ cb = *b++;
221
+ if (cb == '\0') continue;
222
+
223
+ ca = *a++;
224
+
225
+ if (cb >= 'a' && cb <= 'z') {
226
+ cb -= 0x20;
227
+ }
228
+ /* printf("Comparing %c vs %c with %d left\n", ca, cb, (int)n); */
229
+ if (ca != cb) {
230
+ return 1;
231
+ }
232
+ }
233
+
234
+ if (*a == 0) {
235
+ /* printf(" MATCH \n"); */
236
+ return 0;
237
+ } else {
238
+ return 1;
239
+ }
240
+ }
241
+
242
+ /*
243
+ * Does an HTML encoded binary string (const char*, length) start with
244
+ * a all uppercase c-string (null terminated), case insensitive!
245
+ *
246
+ * also ignore any embedded nulls in the HTML string!
247
+ *
248
+ * return 1 if match / starts with
249
+ * return 0 if not
250
+ */
251
+ static int htmlencode_startswith(const char *a, const char *b, size_t n)
252
+ {
253
+ size_t consumed;
254
+ int cb;
255
+ int first = 1;
256
+ /* printf("Comparing %s with %.*s\n", a,(int)n,b); */
257
+ while (n > 0) {
258
+ if (*a == 0) {
259
+ /* printf("Match EOL!\n"); */
260
+ return 1;
261
+ }
262
+ cb = html_decode_char_at(b, n, &consumed);
263
+ b += consumed;
264
+ n -= consumed;
265
+
266
+ if (first && cb <= 32) {
267
+ /* ignore all leading whitespace and control characters */
268
+ continue;
269
+ }
270
+ first = 0;
271
+
272
+ if (cb == 0) {
273
+ /* always ignore null characters in user input */
274
+ continue;
275
+ }
276
+
277
+ if (cb == 10) {
278
+ /* always ignore vertical tab characters in user input */
279
+ /* who allows this?? */
280
+ continue;
281
+ }
282
+
283
+ if (cb >= 'a' && cb <= 'z') {
284
+ /* upcase */
285
+ cb -= 0x20;
286
+ }
287
+
288
+ if (*a != (char) cb) {
289
+ /* printf(" %c != %c\n", *a, cb); */
290
+ /* mismatch */
291
+ return 0;
292
+ }
293
+ a++;
294
+ }
295
+
296
+ return (*a == 0) ? 1 : 0;
297
+ }
298
+
299
+ static int is_black_tag(const char* s, size_t len)
300
+ {
301
+ const char** black;
302
+
303
+ if (len < 3) {
304
+ return 0;
305
+ }
306
+
307
+ black = BLACKTAG;
308
+ while (*black != NULL) {
309
+ if (cstrcasecmp_with_null(*black, s, len) == 0) {
310
+ /* printf("Got black tag %s\n", *black); */
311
+ return 1;
312
+ }
313
+ black += 1;
314
+ }
315
+
316
+ /* anything SVG related */
317
+ if ((s[0] == 's' || s[0] == 'S') &&
318
+ (s[1] == 'v' || s[1] == 'V') &&
319
+ (s[2] == 'g' || s[2] == 'G')) {
320
+ /* printf("Got SVG tag \n"); */
321
+ return 1;
322
+ }
323
+
324
+ /* Anything XSL(t) related */
325
+ if ((s[0] == 'x' || s[0] == 'X') &&
326
+ (s[1] == 's' || s[1] == 'S') &&
327
+ (s[2] == 'l' || s[2] == 'L')) {
328
+ /* printf("Got XSL tag\n"); */
329
+ return 1;
330
+ }
331
+
332
+ return 0;
333
+ }
334
+
335
+ static attribute_t is_black_attr(const char* s, size_t len)
336
+ {
337
+ stringtype_t* black;
338
+
339
+ if (len < 2) {
340
+ return TYPE_NONE;
341
+ }
342
+
343
+ if (len >= 5) {
344
+ /* JavaScript on.* */
345
+ if ((s[0] == 'o' || s[0] == 'O') && (s[1] == 'n' || s[1] == 'N')) {
346
+ /* printf("Got JavaScript on- attribute name\n"); */
347
+ return TYPE_BLACK;
348
+ }
349
+
350
+
351
+
352
+ /* XMLNS can be used to create arbitrary tags */
353
+ if (cstrcasecmp_with_null("XMLNS", s, 5) == 0 || cstrcasecmp_with_null("XLINK", s, 5) == 0) {
354
+ /* printf("Got XMLNS and XLINK tags\n"); */
355
+ return TYPE_BLACK;
356
+ }
357
+ }
358
+
359
+ black = BLACKATTR;
360
+ while (black->name != NULL) {
361
+ if (cstrcasecmp_with_null(black->name, s, len) == 0) {
362
+ /* printf("Got banned attribute name %s\n", black->name); */
363
+ return black->atype;
364
+ }
365
+ black += 1;
366
+ }
367
+
368
+ return TYPE_NONE;
369
+ }
370
+
371
+ static int is_black_url(const char* s, size_t len)
372
+ {
373
+
374
+ static const char* data_url = "DATA";
375
+ static const char* viewsource_url = "VIEW-SOURCE";
376
+
377
+ /* obsolete but interesting signal */
378
+ static const char* vbscript_url = "VBSCRIPT";
379
+
380
+ /* covers JAVA, JAVASCRIPT, + colon */
381
+ static const char* javascript_url = "JAVA";
382
+
383
+ /* skip whitespace */
384
+ while (len > 0 && (*s <= 32 || *s >= 127)) {
385
+ /*
386
+ * HEY: this is a signed character.
387
+ * We are intentionally skipping high-bit characters too
388
+ * since they are not ASCII, and Opera sometimes uses UTF-8 whitespace.
389
+ *
390
+ * Also in EUC-JP some of the high bytes are just ignored.
391
+ */
392
+ ++s;
393
+ --len;
394
+ }
395
+
396
+ if (htmlencode_startswith(data_url, s, len)) {
397
+ return 1;
398
+ }
399
+
400
+ if (htmlencode_startswith(viewsource_url, s, len)) {
401
+ return 1;
402
+ }
403
+
404
+ if (htmlencode_startswith(javascript_url, s, len)) {
405
+ return 1;
406
+ }
407
+
408
+ if (htmlencode_startswith(vbscript_url, s, len)) {
409
+ return 1;
410
+ }
411
+ return 0;
412
+ }
413
+
414
+ int libinjection_is_xss(const char* s, size_t len, int flags)
415
+ {
416
+ h5_state_t h5;
417
+ attribute_t attr = TYPE_NONE;
418
+
419
+ libinjection_h5_init(&h5, s, len, (enum html5_flags) flags);
420
+ while (libinjection_h5_next(&h5)) {
421
+ if (h5.token_type != ATTR_VALUE) {
422
+ attr = TYPE_NONE;
423
+ }
424
+
425
+ if (h5.token_type == DOCTYPE) {
426
+ return 1;
427
+ } else if (h5.token_type == TAG_NAME_OPEN) {
428
+ if (is_black_tag(h5.token_start, h5.token_len)) {
429
+ return 1;
430
+ }
431
+ } else if (h5.token_type == ATTR_NAME) {
432
+ attr = is_black_attr(h5.token_start, h5.token_len);
433
+ } else if (h5.token_type == ATTR_VALUE) {
434
+ /*
435
+ * IE6,7,8 parsing works a bit differently so
436
+ * a whole <script> or other black tag might be hiding
437
+ * inside an attribute value under HTML 5 parsing
438
+ * See http://html5sec.org/#102
439
+ * to avoid doing a full reparse of the value, just
440
+ * look for "<". This probably need adjusting to
441
+ * handle escaped characters
442
+ */
443
+ /*
444
+ if (memchr(h5.token_start, '<', h5.token_len) != NULL) {
445
+ return 1;
446
+ }
447
+ */
448
+
449
+ switch (attr) {
450
+ case TYPE_NONE:
451
+ break;
452
+ case TYPE_BLACK:
453
+ return 1;
454
+ case TYPE_ATTR_URL:
455
+ if (is_black_url(h5.token_start, h5.token_len)) {
456
+ return 1;
457
+ }
458
+ break;
459
+ case TYPE_STYLE:
460
+ return 1;
461
+ case TYPE_ATTR_INDIRECT:
462
+ /* an attribute name is specified in a _value_ */
463
+ if (is_black_attr(h5.token_start, h5.token_len)) {
464
+ return 1;
465
+ }
466
+ break;
467
+ /*
468
+ default:
469
+ assert(0);
470
+ */
471
+ }
472
+ attr = TYPE_NONE;
473
+ } else if (h5.token_type == TAG_COMMENT) {
474
+ /* IE uses a "`" as a tag ending char */
475
+ if (memchr(h5.token_start, '`', h5.token_len) != NULL) {
476
+ return 1;
477
+ }
478
+
479
+ /* IE conditional comment */
480
+ if (h5.token_len > 3) {
481
+ if (h5.token_start[0] == '[' &&
482
+ (h5.token_start[1] == 'i' || h5.token_start[1] == 'I') &&
483
+ (h5.token_start[2] == 'f' || h5.token_start[2] == 'F')) {
484
+ return 1;
485
+ }
486
+ if ((h5.token_start[0] == 'x' || h5.token_start[0] == 'X') &&
487
+ (h5.token_start[1] == 'm' || h5.token_start[1] == 'M') &&
488
+ (h5.token_start[2] == 'l' || h5.token_start[2] == 'L')) {
489
+ return 1;
490
+ }
491
+ }
492
+
493
+ if (h5.token_len > 5) {
494
+ /* IE <?import pseudo-tag */
495
+ if (cstrcasecmp_with_null("IMPORT", h5.token_start, 6) == 0) {
496
+ return 1;
497
+ }
498
+
499
+ /* XML Entity definition */
500
+ if (cstrcasecmp_with_null("ENTITY", h5.token_start, 6) == 0) {
501
+ return 1;
502
+ }
503
+ }
504
+ }
505
+ }
506
+ return 0;
507
+ }
508
+
509
+
510
+ /*
511
+ * wrapper
512
+ */
513
+ int libinjection_xss(const char* s, size_t len)
514
+ {
515
+ if (libinjection_is_xss(s, len, DATA_STATE)) {
516
+ return 1;
517
+ }
518
+ if (libinjection_is_xss(s, len, VALUE_NO_QUOTE)) {
519
+ return 1;
520
+ }
521
+ if (libinjection_is_xss(s, len, VALUE_SINGLE_QUOTE)) {
522
+ return 1;
523
+ }
524
+ if (libinjection_is_xss(s, len, VALUE_DOUBLE_QUOTE)) {
525
+ return 1;
526
+ }
527
+ if (libinjection_is_xss(s, len, VALUE_BACK_QUOTE)) {
528
+ return 1;
529
+ }
530
+
531
+ return 0;
532
+ }