immunio 0.15.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +234 -0
  3. data/README.md +147 -0
  4. data/bin/immunio +5 -0
  5. data/lib/immunio.rb +29 -0
  6. data/lib/immunio/agent.rb +260 -0
  7. data/lib/immunio/authentication.rb +96 -0
  8. data/lib/immunio/blocked_app.rb +38 -0
  9. data/lib/immunio/channel.rb +432 -0
  10. data/lib/immunio/cli.rb +39 -0
  11. data/lib/immunio/context.rb +114 -0
  12. data/lib/immunio/errors.rb +43 -0
  13. data/lib/immunio/immunio_ca.crt +45 -0
  14. data/lib/immunio/logger.rb +87 -0
  15. data/lib/immunio/plugins/action_dispatch.rb +45 -0
  16. data/lib/immunio/plugins/action_view.rb +431 -0
  17. data/lib/immunio/plugins/active_record.rb +707 -0
  18. data/lib/immunio/plugins/active_record_relation.rb +370 -0
  19. data/lib/immunio/plugins/authlogic.rb +80 -0
  20. data/lib/immunio/plugins/csrf.rb +24 -0
  21. data/lib/immunio/plugins/devise.rb +40 -0
  22. data/lib/immunio/plugins/environment_reporter.rb +69 -0
  23. data/lib/immunio/plugins/eval.rb +51 -0
  24. data/lib/immunio/plugins/exception_handler.rb +55 -0
  25. data/lib/immunio/plugins/gems_tracker.rb +5 -0
  26. data/lib/immunio/plugins/haml.rb +36 -0
  27. data/lib/immunio/plugins/http_finisher.rb +50 -0
  28. data/lib/immunio/plugins/http_tracker.rb +203 -0
  29. data/lib/immunio/plugins/io.rb +96 -0
  30. data/lib/immunio/plugins/redirect.rb +42 -0
  31. data/lib/immunio/plugins/warden.rb +66 -0
  32. data/lib/immunio/processor.rb +234 -0
  33. data/lib/immunio/rails.rb +26 -0
  34. data/lib/immunio/request.rb +139 -0
  35. data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
  36. data/lib/immunio/rufus_lua_ext/state.rb +157 -0
  37. data/lib/immunio/rufus_lua_ext/table.rb +137 -0
  38. data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
  39. data/lib/immunio/version.rb +5 -0
  40. data/lib/immunio/vm.rb +291 -0
  41. data/lua-hooks/ext/all.c +78 -0
  42. data/lua-hooks/ext/bitop/README +22 -0
  43. data/lua-hooks/ext/bitop/bit.c +189 -0
  44. data/lua-hooks/ext/extconf.rb +38 -0
  45. data/lua-hooks/ext/libinjection/COPYING +37 -0
  46. data/lua-hooks/ext/libinjection/libinjection.h +65 -0
  47. data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
  48. data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
  49. data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
  50. data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
  51. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
  52. data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
  53. data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
  54. data/lua-hooks/ext/libinjection/lualib.c +109 -0
  55. data/lua-hooks/ext/lpeg/HISTORY +90 -0
  56. data/lua-hooks/ext/lpeg/lpcap.c +537 -0
  57. data/lua-hooks/ext/lpeg/lpcap.h +43 -0
  58. data/lua-hooks/ext/lpeg/lpcode.c +986 -0
  59. data/lua-hooks/ext/lpeg/lpcode.h +34 -0
  60. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  61. data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
  62. data/lua-hooks/ext/lpeg/lpprint.c +244 -0
  63. data/lua-hooks/ext/lpeg/lpprint.h +35 -0
  64. data/lua-hooks/ext/lpeg/lptree.c +1238 -0
  65. data/lua-hooks/ext/lpeg/lptree.h +77 -0
  66. data/lua-hooks/ext/lpeg/lptypes.h +149 -0
  67. data/lua-hooks/ext/lpeg/lpvm.c +355 -0
  68. data/lua-hooks/ext/lpeg/lpvm.h +58 -0
  69. data/lua-hooks/ext/lpeg/makefile +55 -0
  70. data/lua-hooks/ext/lpeg/re.html +498 -0
  71. data/lua-hooks/ext/lpeg/test.lua +1409 -0
  72. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
  73. data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
  74. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
  75. data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
  76. data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
  77. data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
  78. data/lua-hooks/ext/lua-snapshot/README.md +18 -0
  79. data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
  80. data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
  81. data/lua-hooks/ext/lua/COPYRIGHT +34 -0
  82. data/lua-hooks/ext/lua/lapi.c +1087 -0
  83. data/lua-hooks/ext/lua/lapi.h +16 -0
  84. data/lua-hooks/ext/lua/lauxlib.c +652 -0
  85. data/lua-hooks/ext/lua/lauxlib.h +174 -0
  86. data/lua-hooks/ext/lua/lbaselib.c +659 -0
  87. data/lua-hooks/ext/lua/lcode.c +831 -0
  88. data/lua-hooks/ext/lua/lcode.h +76 -0
  89. data/lua-hooks/ext/lua/ldblib.c +398 -0
  90. data/lua-hooks/ext/lua/ldebug.c +638 -0
  91. data/lua-hooks/ext/lua/ldebug.h +33 -0
  92. data/lua-hooks/ext/lua/ldo.c +519 -0
  93. data/lua-hooks/ext/lua/ldo.h +57 -0
  94. data/lua-hooks/ext/lua/ldump.c +164 -0
  95. data/lua-hooks/ext/lua/lfunc.c +174 -0
  96. data/lua-hooks/ext/lua/lfunc.h +34 -0
  97. data/lua-hooks/ext/lua/lgc.c +710 -0
  98. data/lua-hooks/ext/lua/lgc.h +110 -0
  99. data/lua-hooks/ext/lua/linit.c +38 -0
  100. data/lua-hooks/ext/lua/liolib.c +556 -0
  101. data/lua-hooks/ext/lua/llex.c +463 -0
  102. data/lua-hooks/ext/lua/llex.h +81 -0
  103. data/lua-hooks/ext/lua/llimits.h +128 -0
  104. data/lua-hooks/ext/lua/lmathlib.c +263 -0
  105. data/lua-hooks/ext/lua/lmem.c +86 -0
  106. data/lua-hooks/ext/lua/lmem.h +49 -0
  107. data/lua-hooks/ext/lua/loadlib.c +705 -0
  108. data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
  109. data/lua-hooks/ext/lua/lobject.c +214 -0
  110. data/lua-hooks/ext/lua/lobject.h +381 -0
  111. data/lua-hooks/ext/lua/lopcodes.c +102 -0
  112. data/lua-hooks/ext/lua/lopcodes.h +268 -0
  113. data/lua-hooks/ext/lua/loslib.c +243 -0
  114. data/lua-hooks/ext/lua/lparser.c +1339 -0
  115. data/lua-hooks/ext/lua/lparser.h +82 -0
  116. data/lua-hooks/ext/lua/lstate.c +214 -0
  117. data/lua-hooks/ext/lua/lstate.h +169 -0
  118. data/lua-hooks/ext/lua/lstring.c +111 -0
  119. data/lua-hooks/ext/lua/lstring.h +31 -0
  120. data/lua-hooks/ext/lua/lstrlib.c +871 -0
  121. data/lua-hooks/ext/lua/ltable.c +588 -0
  122. data/lua-hooks/ext/lua/ltable.h +40 -0
  123. data/lua-hooks/ext/lua/ltablib.c +287 -0
  124. data/lua-hooks/ext/lua/ltm.c +75 -0
  125. data/lua-hooks/ext/lua/ltm.h +54 -0
  126. data/lua-hooks/ext/lua/lua.c +392 -0
  127. data/lua-hooks/ext/lua/lua.def +131 -0
  128. data/lua-hooks/ext/lua/lua.h +388 -0
  129. data/lua-hooks/ext/lua/lua.rc +28 -0
  130. data/lua-hooks/ext/lua/lua_dll.rc +26 -0
  131. data/lua-hooks/ext/lua/luac.c +200 -0
  132. data/lua-hooks/ext/lua/luac.rc +1 -0
  133. data/lua-hooks/ext/lua/luaconf.h +763 -0
  134. data/lua-hooks/ext/lua/luaconf.h.in +724 -0
  135. data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
  136. data/lua-hooks/ext/lua/lualib.h +53 -0
  137. data/lua-hooks/ext/lua/lundump.c +227 -0
  138. data/lua-hooks/ext/lua/lundump.h +36 -0
  139. data/lua-hooks/ext/lua/lvm.c +767 -0
  140. data/lua-hooks/ext/lua/lvm.h +36 -0
  141. data/lua-hooks/ext/lua/lzio.c +82 -0
  142. data/lua-hooks/ext/lua/lzio.h +67 -0
  143. data/lua-hooks/ext/lua/print.c +227 -0
  144. data/lua-hooks/ext/luautf8/README.md +152 -0
  145. data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
  146. data/lua-hooks/ext/luautf8/unidata.h +3064 -0
  147. data/lua-hooks/lib/boot.lua +254 -0
  148. data/lua-hooks/lib/encode.lua +4 -0
  149. data/lua-hooks/lib/lexers/LICENSE +21 -0
  150. data/lua-hooks/lib/lexers/bash.lua +134 -0
  151. data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
  152. data/lua-hooks/lib/lexers/css.lua +216 -0
  153. data/lua-hooks/lib/lexers/html.lua +106 -0
  154. data/lua-hooks/lib/lexers/javascript.lua +68 -0
  155. data/lua-hooks/lib/lexers/lexer.lua +1575 -0
  156. data/lua-hooks/lib/lexers/markers.lua +33 -0
  157. metadata +308 -0
@@ -0,0 +1,38 @@
1
+ # Used by Ruby to compile the extension.
2
+ require 'mkmf'
3
+
4
+
5
+ # libinjection doesn't support `#include`ing all the .c files directly
6
+ # in the source, since it has symbols which conflict. Instead the `$objs`
7
+ # list below compiles each file separately then links them in the final
8
+ # step.
9
+ $objs = [
10
+ "all.o",
11
+ "libinjection/libinjection_html5.o",
12
+ "libinjection/libinjection_xss.o",
13
+ "libinjection/libinjection_sqli.o",
14
+ #Compile in LPEG
15
+ "lpeg/lpcap.o",
16
+ "lpeg/lpcode.o",
17
+ "lpeg/lpprint.o",
18
+ "lpeg/lpvm.o",
19
+ # "lpeg/lptree.o",
20
+ ]
21
+
22
+ # The created Makefile puts the compiled .o files into the `libinjection`
23
+ # subdirectory, but it doesn't create it. Make sure it exists.
24
+ xsystem "mkdir -p libinjection"
25
+ xsystem "mkdir -p lpeg"
26
+
27
+ # Build init hook, only used when running agent in dev mode
28
+ STDERR.puts `make -C ../../../../lua-hooks hooks/__init__.lua`
29
+
30
+ #!!! PLEASE ALWAYS make sure the flags here match the Lua Makefile so our tests are valid
31
+ # Enable safety assertions
32
+ $CFLAGS << " -DLUA_USE_APICHECK -Dlua_assert=assert "
33
+ # Enable omptimisation
34
+ $CFLAGS << " -O3 "
35
+ # Without this flag, I get this error when trying to compile in agent-java:
36
+ # relocation R_X86_64_32S against `.rodata' can not be used when making a shared object; recompile with -fPIC
37
+ $CFLAGS << " -fPIC "
38
+ create_makefile 'immunio/lua-hooks'
@@ -0,0 +1,37 @@
1
+ /*
2
+ * Copyright 2012, 2013, 2014
3
+ * Nick Galbreath -- nickg [at] client9 [dot] com
4
+ * http://www.client9.com/projects/libinjection/
5
+ *
6
+ * All rights reserved.
7
+ *
8
+ * Redistribution and use in source and binary forms, with or without
9
+ * modification, are permitted provided that the following conditions are
10
+ * met:
11
+ *
12
+ * Redistributions of source code must retain the above copyright
13
+ * notice, this list of conditions and the following disclaimer.
14
+ *
15
+ * Redistributions in binary form must reproduce the above copyright
16
+ * notice, this list of conditions and the following disclaimer in the
17
+ * documentation and/or other materials provided with the distribution.
18
+ *
19
+ * Neither the name of libinjection nor the names of its
20
+ * contributors may be used to endorse or promote products derived from
21
+ * this software without specific prior written permission.
22
+ *
23
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+ *
35
+ * This is the standard "new" BSD license:
36
+ * http://www.opensource.org/licenses/bsd-license.php
37
+ */
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Copyright 2012, 2013 Nick Galbreath
3
+ * nickg@client9.com
4
+ * BSD License -- see COPYING.txt for details
5
+ *
6
+ * https://libinjection.client9.com/
7
+ *
8
+ */
9
+
10
+ #ifndef _LIBINJECTION_H
11
+ #define _LIBINJECTION_H
12
+
13
+ #ifdef __cplusplus
14
+ # define LIBINJECTION_BEGIN_DECLS extern "C" {
15
+ # define LIBINJECTION_END_DECLS }
16
+ #else
17
+ # define LIBINJECTION_BEGIN_DECLS
18
+ # define LIBINJECTION_END_DECLS
19
+ #endif
20
+
21
+ LIBINJECTION_BEGIN_DECLS
22
+
23
+ /*
24
+ * Pull in size_t
25
+ */
26
+ #include <string.h>
27
+
28
+ /*
29
+ * Version info.
30
+ *
31
+ * This is moved into a function to allow SWIG and other auto-generated
32
+ * binding to not be modified during minor release changes. We change
33
+ * change the version number in the c source file, and not regenerated
34
+ * the binding
35
+ *
36
+ * See python's normalized version
37
+ * http://www.python.org/dev/peps/pep-0386/#normalizedversion
38
+ */
39
+ const char* libinjection_version(void);
40
+
41
+ /**
42
+ * Simple API for SQLi detection - returns a SQLi fingerprint or NULL
43
+ * is benign input
44
+ *
45
+ * \param[in] s input string, may contain nulls, does not need to be null-terminated
46
+ * \param[in] slen input string length
47
+ * \param[out] fingerprint buffer of 8+ characters. c-string,
48
+ * \return 1 if SQLi, 0 if benign. fingerprint will be set or set to empty string.
49
+ */
50
+ int libinjection_sqli(const char* s, size_t slen, char fingerprint[]);
51
+
52
+ /** ALPHA version of xss detector.
53
+ *
54
+ * NOT DONE.
55
+ *
56
+ * \param[in] s input string, may contain nulls, does not need to be null-terminated
57
+ * \param[in] slen input string length
58
+ * \return 1 if XSS found, 0 if benign
59
+ *
60
+ */
61
+ int libinjection_xss(const char* s, size_t slen);
62
+
63
+ LIBINJECTION_END_DECLS
64
+
65
+ #endif /* _LIBINJECTION_H */
@@ -0,0 +1,847 @@
1
+ #include "libinjection_html5.h"
2
+
3
+ #include <string.h>
4
+ #include <assert.h>
5
+
6
+ #ifdef DEBUG
7
+ #include <stdio.h>
8
+ #define TRACE() printf("%s:%d\n", __FUNCTION__, __LINE__)
9
+ #else
10
+ #define TRACE()
11
+ #endif
12
+
13
+
14
+ #define CHAR_EOF -1
15
+ #define CHAR_NULL 0
16
+ #define CHAR_BANG 33
17
+ #define CHAR_DOUBLE 34
18
+ #define CHAR_PERCENT 37
19
+ #define CHAR_SINGLE 39
20
+ #define CHAR_DASH 45
21
+ #define CHAR_SLASH 47
22
+ #define CHAR_LT 60
23
+ #define CHAR_EQUALS 61
24
+ #define CHAR_GT 62
25
+ #define CHAR_QUESTION 63
26
+ #define CHAR_RIGHTB 93
27
+ #define CHAR_TICK 96
28
+
29
+ /* prototypes */
30
+
31
+ static int h5_skip_white(h5_state_t* hs);
32
+ static int h5_is_white(char c);
33
+ static int h5_state_eof(h5_state_t* hs);
34
+ static int h5_state_data(h5_state_t* hs);
35
+ static int h5_state_tag_open(h5_state_t* hs);
36
+ static int h5_state_tag_name(h5_state_t* hs);
37
+ static int h5_state_tag_name_close(h5_state_t* hs);
38
+ static int h5_state_end_tag_open(h5_state_t* hs);
39
+ static int h5_state_self_closing_start_tag(h5_state_t* hs);
40
+ static int h5_state_attribute_name(h5_state_t* hs);
41
+ static int h5_state_after_attribute_name(h5_state_t* hs);
42
+ static int h5_state_before_attribute_name(h5_state_t* hs);
43
+ static int h5_state_before_attribute_value(h5_state_t* hs);
44
+ static int h5_state_attribute_value_double_quote(h5_state_t* hs);
45
+ static int h5_state_attribute_value_single_quote(h5_state_t* hs);
46
+ static int h5_state_attribute_value_back_quote(h5_state_t* hs);
47
+ static int h5_state_attribute_value_no_quote(h5_state_t* hs);
48
+ static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs);
49
+ static int h5_state_comment(h5_state_t* hs);
50
+ static int h5_state_cdata(h5_state_t* hs);
51
+
52
+
53
+ /* 12.2.4.44 */
54
+ static int h5_state_bogus_comment(h5_state_t* hs);
55
+ static int h5_state_bogus_comment2(h5_state_t* hs);
56
+
57
+ /* 12.2.4.45 */
58
+ static int h5_state_markup_declaration_open(h5_state_t* hs);
59
+
60
+ /* 8.2.4.52 */
61
+ static int h5_state_doctype(h5_state_t* hs);
62
+
63
+ /**
64
+ * public function
65
+ */
66
+ void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags)
67
+ {
68
+ memset(hs, 0, sizeof(h5_state_t));
69
+ hs->s = s;
70
+ hs->len = len;
71
+
72
+ switch (flags) {
73
+ case DATA_STATE:
74
+ hs->state = h5_state_data;
75
+ break;
76
+ case VALUE_NO_QUOTE:
77
+ hs->state = h5_state_before_attribute_name;
78
+ break;
79
+ case VALUE_SINGLE_QUOTE:
80
+ hs->state = h5_state_attribute_value_single_quote;
81
+ break;
82
+ case VALUE_DOUBLE_QUOTE:
83
+ hs->state = h5_state_attribute_value_double_quote;
84
+ break;
85
+ case VALUE_BACK_QUOTE:
86
+ hs->state = h5_state_attribute_value_back_quote;
87
+ break;
88
+ }
89
+ }
90
+
91
+ /**
92
+ * public function
93
+ */
94
+ int libinjection_h5_next(h5_state_t* hs)
95
+ {
96
+ assert(hs->state != NULL);
97
+ return (*hs->state)(hs);
98
+ }
99
+
100
+ /**
101
+ * Everything below here is private
102
+ *
103
+ */
104
+
105
+
106
+ static int h5_is_white(char ch)
107
+ {
108
+ /*
109
+ * \t = htab = 0x09
110
+ * \n = newline = 0x0A
111
+ * \v = vtab = 0x0B
112
+ * \f = form feed = 0x0C
113
+ * \r = cr = 0x0D
114
+ */
115
+ return strchr(" \t\n\v\f\r", ch) != NULL;
116
+ }
117
+
118
+ static int h5_skip_white(h5_state_t* hs)
119
+ {
120
+ char ch;
121
+ while (hs->pos < hs->len) {
122
+ ch = hs->s[hs->pos];
123
+ switch (ch) {
124
+ case 0x00: /* IE only */
125
+ case 0x20:
126
+ case 0x09:
127
+ case 0x0A:
128
+ case 0x0B: /* IE only */
129
+ case 0x0C:
130
+ case 0x0D: /* IE only */
131
+ hs->pos += 1;
132
+ break;
133
+ default:
134
+ return ch;
135
+ }
136
+ }
137
+ return CHAR_EOF;
138
+ }
139
+
140
+ static int h5_state_eof(h5_state_t* hs)
141
+ {
142
+ /* eliminate unused function argument warning */
143
+ (void)hs;
144
+ return 0;
145
+ }
146
+
147
+ static int h5_state_data(h5_state_t* hs)
148
+ {
149
+ const char* idx;
150
+
151
+ TRACE();
152
+ assert(hs->len >= hs->pos);
153
+ idx = (const char*) memchr(hs->s + hs->pos, CHAR_LT, hs->len - hs->pos);
154
+ if (idx == NULL) {
155
+ hs->token_start = hs->s + hs->pos;
156
+ hs->token_len = hs->len - hs->pos;
157
+ hs->token_type = DATA_TEXT;
158
+ hs->state = h5_state_eof;
159
+ if (hs->token_len == 0) {
160
+ return 0;
161
+ }
162
+ } else {
163
+ hs->token_start = hs->s + hs->pos;
164
+ hs->token_type = DATA_TEXT;
165
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
166
+ hs->pos = (size_t)(idx - hs->s) + 1;
167
+ hs->state = h5_state_tag_open;
168
+ if (hs->token_len == 0) {
169
+ return h5_state_tag_open(hs);
170
+ }
171
+ }
172
+ return 1;
173
+ }
174
+
175
+ /**
176
+ * 12 2.4.8
177
+ */
178
+ static int h5_state_tag_open(h5_state_t* hs)
179
+ {
180
+ char ch;
181
+
182
+ TRACE();
183
+ ch = hs->s[hs->pos];
184
+ if (ch == CHAR_BANG) {
185
+ hs->pos += 1;
186
+ return h5_state_markup_declaration_open(hs);
187
+ } else if (ch == CHAR_SLASH) {
188
+ hs->pos += 1;
189
+ hs->is_close = 1;
190
+ return h5_state_end_tag_open(hs);
191
+ } else if (ch == CHAR_QUESTION) {
192
+ hs->pos += 1;
193
+ return h5_state_bogus_comment(hs);
194
+ } else if (ch == CHAR_PERCENT) {
195
+ /* this is not in spec.. alternative comment format used
196
+ by IE <= 9 and Safari < 4.0.3 */
197
+ hs->pos += 1;
198
+ return h5_state_bogus_comment2(hs);
199
+ } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
200
+ return h5_state_tag_name(hs);
201
+ } else if (ch == CHAR_NULL) {
202
+ /* IE-ism NULL characters are ignored */
203
+ return h5_state_tag_name(hs);
204
+ } else {
205
+ /* user input mistake in configuring state */
206
+ if (hs->pos == 0) {
207
+ return h5_state_data(hs);
208
+ }
209
+ hs->token_start = hs->s + hs->pos - 1;
210
+ hs->token_len = 1;
211
+ hs->token_type = DATA_TEXT;
212
+ hs->state = h5_state_data;
213
+ return 1;
214
+ }
215
+ }
216
+ /**
217
+ * 12.2.4.9
218
+ */
219
+ static int h5_state_end_tag_open(h5_state_t* hs)
220
+ {
221
+ char ch;
222
+
223
+ TRACE();
224
+
225
+ if (hs->pos >= hs->len) {
226
+ return 0;
227
+ }
228
+ ch = hs->s[hs->pos];
229
+ if (ch == CHAR_GT) {
230
+ return h5_state_data(hs);
231
+ } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
232
+ return h5_state_tag_name(hs);
233
+ }
234
+
235
+ hs->is_close = 0;
236
+ return h5_state_bogus_comment(hs);
237
+ }
238
+ /*
239
+ *
240
+ */
241
+ static int h5_state_tag_name_close(h5_state_t* hs)
242
+ {
243
+ TRACE();
244
+ hs->is_close = 0;
245
+ hs->token_start = hs->s + hs->pos;
246
+ hs->token_len = 1;
247
+ hs->token_type = TAG_NAME_CLOSE;
248
+ hs->pos += 1;
249
+ if (hs->pos < hs->len) {
250
+ hs->state = h5_state_data;
251
+ } else {
252
+ hs->state = h5_state_eof;
253
+ }
254
+
255
+ return 1;
256
+ }
257
+
258
+ /**
259
+ * 12.2.4.10
260
+ */
261
+ static int h5_state_tag_name(h5_state_t* hs)
262
+ {
263
+ char ch;
264
+ size_t pos;
265
+
266
+ TRACE();
267
+ pos = hs->pos;
268
+ while (pos < hs->len) {
269
+ ch = hs->s[pos];
270
+ if (ch == 0) {
271
+ /* special non-standard case */
272
+ /* allow nulls in tag name */
273
+ /* some old browsers apparently allow and ignore them */
274
+ pos += 1;
275
+ } else if (h5_is_white(ch)) {
276
+ hs->token_start = hs->s + hs->pos;
277
+ hs->token_len = pos - hs->pos;
278
+ hs->token_type = TAG_NAME_OPEN;
279
+ hs->pos = pos + 1;
280
+ hs->state = h5_state_before_attribute_name;
281
+ return 1;
282
+ } else if (ch == CHAR_SLASH) {
283
+ hs->token_start = hs->s + hs->pos;
284
+ hs->token_len = pos - hs->pos;
285
+ hs->token_type = TAG_NAME_OPEN;
286
+ hs->pos = pos + 1;
287
+ hs->state = h5_state_self_closing_start_tag;
288
+ return 1;
289
+ } else if (ch == CHAR_GT) {
290
+ hs->token_start = hs->s + hs->pos;
291
+ hs->token_len = pos - hs->pos;
292
+ if (hs->is_close) {
293
+ hs->pos = pos + 1;
294
+ hs->is_close = 0;
295
+ hs->token_type = TAG_CLOSE;
296
+ hs->state = h5_state_data;
297
+ } else {
298
+ hs->pos = pos;
299
+ hs->token_type = TAG_NAME_OPEN;
300
+ hs->state = h5_state_tag_name_close;
301
+ }
302
+ return 1;
303
+ } else {
304
+ pos += 1;
305
+ }
306
+ }
307
+
308
+ hs->token_start = hs->s + hs->pos;
309
+ hs->token_len = hs->len - hs->pos;
310
+ hs->token_type = TAG_NAME_OPEN;
311
+ hs->state = h5_state_eof;
312
+ return 1;
313
+ }
314
+
315
+ /**
316
+ * 12.2.4.34
317
+ */
318
+ static int h5_state_before_attribute_name(h5_state_t* hs)
319
+ {
320
+ int ch;
321
+
322
+ TRACE();
323
+ ch = h5_skip_white(hs);
324
+ switch (ch) {
325
+ case CHAR_EOF: {
326
+ return 0;
327
+ }
328
+ case CHAR_SLASH: {
329
+ hs->pos += 1;
330
+ return h5_state_self_closing_start_tag(hs);
331
+ }
332
+ case CHAR_GT: {
333
+ hs->state = h5_state_data;
334
+ hs->token_start = hs->s + hs->pos;
335
+ hs->token_len = 1;
336
+ hs->token_type = TAG_NAME_CLOSE;
337
+ hs->pos += 1;
338
+ return 1;
339
+ }
340
+ default: {
341
+ return h5_state_attribute_name(hs);
342
+ }
343
+ }
344
+ }
345
+
346
+ static int h5_state_attribute_name(h5_state_t* hs)
347
+ {
348
+ char ch;
349
+ size_t pos;
350
+
351
+ TRACE();
352
+ pos = hs->pos + 1;
353
+ while (pos < hs->len) {
354
+ ch = hs->s[pos];
355
+ if (h5_is_white(ch)) {
356
+ hs->token_start = hs->s + hs->pos;
357
+ hs->token_len = pos - hs->pos;
358
+ hs->token_type = ATTR_NAME;
359
+ hs->state = h5_state_after_attribute_name;
360
+ hs->pos = pos + 1;
361
+ return 1;
362
+ } else if (ch == CHAR_SLASH) {
363
+ hs->token_start = hs->s + hs->pos;
364
+ hs->token_len = pos - hs->pos;
365
+ hs->token_type = ATTR_NAME;
366
+ hs->state = h5_state_self_closing_start_tag;
367
+ hs->pos = pos + 1;
368
+ return 1;
369
+ } else if (ch == CHAR_EQUALS) {
370
+ hs->token_start = hs->s + hs->pos;
371
+ hs->token_len = pos - hs->pos;
372
+ hs->token_type = ATTR_NAME;
373
+ hs->state = h5_state_before_attribute_value;
374
+ hs->pos = pos + 1;
375
+ return 1;
376
+ } else if (ch == CHAR_GT) {
377
+ hs->token_start = hs->s + hs->pos;
378
+ hs->token_len = pos - hs->pos;
379
+ hs->token_type = ATTR_NAME;
380
+ hs->state = h5_state_tag_name_close;
381
+ hs->pos = pos;
382
+ return 1;
383
+ } else {
384
+ pos += 1;
385
+ }
386
+ }
387
+ /* EOF */
388
+ hs->token_start = hs->s + hs->pos;
389
+ hs->token_len = hs->len - hs->pos;
390
+ hs->token_type = ATTR_NAME;
391
+ hs->state = h5_state_eof;
392
+ hs->pos = hs->len;
393
+ return 1;
394
+ }
395
+
396
+ /**
397
+ * 12.2.4.36
398
+ */
399
+ static int h5_state_after_attribute_name(h5_state_t* hs)
400
+ {
401
+ int c;
402
+
403
+ TRACE();
404
+ c = h5_skip_white(hs);
405
+ switch (c) {
406
+ case CHAR_EOF: {
407
+ return 0;
408
+ }
409
+ case CHAR_SLASH: {
410
+ hs->pos += 1;
411
+ return h5_state_self_closing_start_tag(hs);
412
+ }
413
+ case CHAR_EQUALS: {
414
+ hs->pos += 1;
415
+ return h5_state_before_attribute_value(hs);
416
+ }
417
+ case CHAR_GT: {
418
+ return h5_state_tag_name_close(hs);
419
+ }
420
+ default: {
421
+ return h5_state_attribute_name(hs);
422
+ }
423
+ }
424
+ }
425
+
426
+ /**
427
+ * 12.2.4.37
428
+ */
429
+ static int h5_state_before_attribute_value(h5_state_t* hs)
430
+ {
431
+ int c;
432
+ TRACE();
433
+
434
+ c = h5_skip_white(hs);
435
+
436
+ if (c == CHAR_EOF) {
437
+ hs->state = h5_state_eof;
438
+ return 0;
439
+ }
440
+
441
+ if (c == CHAR_DOUBLE) {
442
+ return h5_state_attribute_value_double_quote(hs);
443
+ } else if (c == CHAR_SINGLE) {
444
+ return h5_state_attribute_value_single_quote(hs);
445
+ } else if (c == CHAR_TICK) {
446
+ /* NON STANDARD IE */
447
+ return h5_state_attribute_value_back_quote(hs);
448
+ } else {
449
+ return h5_state_attribute_value_no_quote(hs);
450
+ }
451
+ }
452
+
453
+
454
+ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
455
+ {
456
+ const char* idx;
457
+
458
+ TRACE();
459
+
460
+ /* skip initial quote in normal case.
461
+ * dont do this is pos == 0 since it means we have started
462
+ * in a non-data state. given an input of '><foo
463
+ * we want to make 0-length attribute name
464
+ */
465
+ if (hs->pos > 0) {
466
+ hs->pos += 1;
467
+ }
468
+
469
+
470
+ idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos);
471
+ if (idx == NULL) {
472
+ hs->token_start = hs->s + hs->pos;
473
+ hs->token_len = hs->len - hs->pos;
474
+ hs->token_type = ATTR_VALUE;
475
+ hs->state = h5_state_eof;
476
+ } else {
477
+ hs->token_start = hs->s + hs->pos;
478
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
479
+ hs->token_type = ATTR_VALUE;
480
+ hs->state = h5_state_after_attribute_value_quoted_state;
481
+ hs->pos += hs->token_len + 1;
482
+ }
483
+ return 1;
484
+ }
485
+
486
+ static
487
+ int h5_state_attribute_value_double_quote(h5_state_t* hs)
488
+ {
489
+ TRACE();
490
+ return h5_state_attribute_value_quote(hs, CHAR_DOUBLE);
491
+ }
492
+
493
+ static
494
+ int h5_state_attribute_value_single_quote(h5_state_t* hs)
495
+ {
496
+ TRACE();
497
+ return h5_state_attribute_value_quote(hs, CHAR_SINGLE);
498
+ }
499
+
500
+ static
501
+ int h5_state_attribute_value_back_quote(h5_state_t* hs)
502
+ {
503
+ TRACE();
504
+ return h5_state_attribute_value_quote(hs, CHAR_TICK);
505
+ }
506
+
507
+ static int h5_state_attribute_value_no_quote(h5_state_t* hs)
508
+ {
509
+ char ch;
510
+ size_t pos;
511
+
512
+ TRACE();
513
+ pos = hs->pos;
514
+ while (pos < hs->len) {
515
+ ch = hs->s[pos];
516
+ if (h5_is_white(ch)) {
517
+ hs->token_type = ATTR_VALUE;
518
+ hs->token_start = hs->s + hs->pos;
519
+ hs->token_len = pos - hs->pos;
520
+ hs->pos = pos + 1;
521
+ hs->state = h5_state_before_attribute_name;
522
+ return 1;
523
+ } else if (ch == CHAR_GT) {
524
+ hs->token_type = ATTR_VALUE;
525
+ hs->token_start = hs->s + hs->pos;
526
+ hs->token_len = pos - hs->pos;
527
+ hs->pos = pos;
528
+ hs->state = h5_state_tag_name_close;
529
+ return 1;
530
+ }
531
+ pos += 1;
532
+ }
533
+ TRACE();
534
+ /* EOF */
535
+ hs->state = h5_state_eof;
536
+ hs->token_start = hs->s + hs->pos;
537
+ hs->token_len = hs->len - hs->pos;
538
+ hs->token_type = ATTR_VALUE;
539
+ return 1;
540
+ }
541
+
542
+ /**
543
+ * 12.2.4.41
544
+ */
545
+ static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs)
546
+ {
547
+ char ch;
548
+
549
+ TRACE();
550
+ if (hs->pos >= hs->len) {
551
+ return 0;
552
+ }
553
+ ch = hs->s[hs->pos];
554
+ if (h5_is_white(ch)) {
555
+ hs->pos += 1;
556
+ return h5_state_before_attribute_name(hs);
557
+ } else if (ch == CHAR_SLASH) {
558
+ hs->pos += 1;
559
+ return h5_state_self_closing_start_tag(hs);
560
+ } else if (ch == CHAR_GT) {
561
+ hs->token_start = hs->s + hs->pos;
562
+ hs->token_len = 1;
563
+ hs->token_type = TAG_NAME_CLOSE;
564
+ hs->pos += 1;
565
+ hs->state = h5_state_data;
566
+ return 1;
567
+ } else {
568
+ return h5_state_before_attribute_name(hs);
569
+ }
570
+ }
571
+
572
+ /**
573
+ * 12.2.4.43
574
+ */
575
+ static int h5_state_self_closing_start_tag(h5_state_t* hs)
576
+ {
577
+ char ch;
578
+
579
+ TRACE();
580
+ if (hs->pos >= hs->len) {
581
+ return 0;
582
+ }
583
+ ch = hs->s[hs->pos];
584
+ if (ch == CHAR_GT) {
585
+ assert(hs->pos > 0);
586
+ hs->token_start = hs->s + hs->pos -1;
587
+ hs->token_len = 2;
588
+ hs->token_type = TAG_NAME_SELFCLOSE;
589
+ hs->state = h5_state_data;
590
+ hs->pos += 1;
591
+ return 1;
592
+ } else {
593
+ return h5_state_before_attribute_name(hs);
594
+ }
595
+ }
596
+
597
+ /**
598
+ * 12.2.4.44
599
+ */
600
+ static int h5_state_bogus_comment(h5_state_t* hs)
601
+ {
602
+ const char* idx;
603
+
604
+ TRACE();
605
+ idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
606
+ if (idx == NULL) {
607
+ hs->token_start = hs->s + hs->pos;
608
+ hs->token_len = hs->len - hs->pos;
609
+ hs->pos = hs->len;
610
+ hs->state = h5_state_eof;
611
+ } else {
612
+ hs->token_start = hs->s + hs->pos;
613
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
614
+ hs->pos = (size_t)(idx - hs->s) + 1;
615
+ hs->state = h5_state_data;
616
+ }
617
+
618
+ hs->token_type = TAG_COMMENT;
619
+ return 1;
620
+ }
621
+
622
+ /**
623
+ * 12.2.4.44 ALT
624
+ */
625
+ static int h5_state_bogus_comment2(h5_state_t* hs)
626
+ {
627
+ const char* idx;
628
+ size_t pos;
629
+
630
+ TRACE();
631
+ pos = hs->pos;
632
+ while (1) {
633
+ idx = (const char*) memchr(hs->s + pos, CHAR_PERCENT, hs->len - pos);
634
+ if (idx == NULL || (idx + 1 >= hs->s + hs->len)) {
635
+ hs->token_start = hs->s + hs->pos;
636
+ hs->token_len = hs->len - hs->pos;
637
+ hs->pos = hs->len;
638
+ hs->token_type = TAG_COMMENT;
639
+ hs->state = h5_state_eof;
640
+ return 1;
641
+ }
642
+
643
+ if (*(idx +1) != CHAR_GT) {
644
+ pos = (size_t)(idx - hs->s) + 1;
645
+ continue;
646
+ }
647
+
648
+ /* ends in %> */
649
+ hs->token_start = hs->s + hs->pos;
650
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
651
+ hs->pos = (size_t)(idx - hs->s) + 2;
652
+ hs->state = h5_state_data;
653
+ hs->token_type = TAG_COMMENT;
654
+ return 1;
655
+ }
656
+ }
657
+
658
+ /**
659
+ * 8.2.4.45
660
+ */
661
+ static int h5_state_markup_declaration_open(h5_state_t* hs)
662
+ {
663
+ size_t remaining;
664
+
665
+ TRACE();
666
+ remaining = hs->len - hs->pos;
667
+ if (remaining >= 7 &&
668
+ /* case insensitive */
669
+ (hs->s[hs->pos + 0] == 'D' || hs->s[hs->pos + 0] == 'd') &&
670
+ (hs->s[hs->pos + 1] == 'O' || hs->s[hs->pos + 1] == 'o') &&
671
+ (hs->s[hs->pos + 2] == 'C' || hs->s[hs->pos + 2] == 'c') &&
672
+ (hs->s[hs->pos + 3] == 'T' || hs->s[hs->pos + 3] == 't') &&
673
+ (hs->s[hs->pos + 4] == 'Y' || hs->s[hs->pos + 4] == 'y') &&
674
+ (hs->s[hs->pos + 5] == 'P' || hs->s[hs->pos + 5] == 'p') &&
675
+ (hs->s[hs->pos + 6] == 'E' || hs->s[hs->pos + 6] == 'e')
676
+ ) {
677
+ return h5_state_doctype(hs);
678
+ } else if (remaining >= 7 &&
679
+ /* upper case required */
680
+ hs->s[hs->pos + 0] == '[' &&
681
+ hs->s[hs->pos + 1] == 'C' &&
682
+ hs->s[hs->pos + 2] == 'D' &&
683
+ hs->s[hs->pos + 3] == 'A' &&
684
+ hs->s[hs->pos + 4] == 'T' &&
685
+ hs->s[hs->pos + 5] == 'A' &&
686
+ hs->s[hs->pos + 6] == '['
687
+ ) {
688
+ hs->pos += 7;
689
+ return h5_state_cdata(hs);
690
+ } else if (remaining >= 2 &&
691
+ hs->s[hs->pos + 0] == '-' &&
692
+ hs->s[hs->pos + 1] == '-') {
693
+ hs->pos += 2;
694
+ return h5_state_comment(hs);
695
+ }
696
+
697
+ return h5_state_bogus_comment(hs);
698
+ }
699
+
700
+ /**
701
+ * 12.2.4.48
702
+ * 12.2.4.49
703
+ * 12.2.4.50
704
+ * 12.2.4.51
705
+ * state machine spec is confusing since it can only look
706
+ * at one character at a time but simply it's comments end by:
707
+ * 1) EOF
708
+ * 2) ending in -->
709
+ * 3) ending in -!>
710
+ */
711
+ static int h5_state_comment(h5_state_t* hs)
712
+ {
713
+ char ch;
714
+ const char* idx;
715
+ size_t pos;
716
+ size_t offset;
717
+ const char* end = hs->s + hs->len;
718
+
719
+ TRACE();
720
+ pos = hs->pos;
721
+ while (1) {
722
+
723
+ idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos);
724
+
725
+ /* did not find anything or has less than 3 chars left */
726
+ if (idx == NULL || idx > hs->s + hs->len - 3) {
727
+ hs->state = h5_state_eof;
728
+ hs->token_start = hs->s + hs->pos;
729
+ hs->token_len = hs->len - hs->pos;
730
+ hs->token_type = TAG_COMMENT;
731
+ return 1;
732
+ }
733
+ offset = 1;
734
+
735
+ /* skip all nulls */
736
+ while (idx + offset < end && *(idx + offset) == 0) {
737
+ offset += 1;
738
+ }
739
+ if (idx + offset == end) {
740
+ hs->state = h5_state_eof;
741
+ hs->token_start = hs->s + hs->pos;
742
+ hs->token_len = hs->len - hs->pos;
743
+ hs->token_type = TAG_COMMENT;
744
+ return 1;
745
+ }
746
+
747
+ ch = *(idx + offset);
748
+ if (ch != CHAR_DASH && ch != CHAR_BANG) {
749
+ pos = (size_t)(idx - hs->s) + 1;
750
+ continue;
751
+ }
752
+
753
+ /* need to test */
754
+ #if 0
755
+ /* skip all nulls */
756
+ while (idx + offset < end && *(idx + offset) == 0) {
757
+ offset += 1;
758
+ }
759
+ if (idx + offset == end) {
760
+ hs->state = h5_state_eof;
761
+ hs->token_start = hs->s + hs->pos;
762
+ hs->token_len = hs->len - hs->pos;
763
+ hs->token_type = TAG_COMMENT;
764
+ return 1;
765
+ }
766
+ #endif
767
+
768
+ offset += 1;
769
+ if (idx + offset == end) {
770
+ hs->state = h5_state_eof;
771
+ hs->token_start = hs->s + hs->pos;
772
+ hs->token_len = hs->len - hs->pos;
773
+ hs->token_type = TAG_COMMENT;
774
+ return 1;
775
+ }
776
+
777
+
778
+ ch = *(idx + offset);
779
+ if (ch != CHAR_GT) {
780
+ pos = (size_t)(idx - hs->s) + 1;
781
+ continue;
782
+ }
783
+ offset += 1;
784
+
785
+ /* ends in --> or -!> */
786
+ hs->token_start = hs->s + hs->pos;
787
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
788
+ hs->pos = (size_t)(idx + offset - hs->s);
789
+ hs->state = h5_state_data;
790
+ hs->token_type = TAG_COMMENT;
791
+ return 1;
792
+ }
793
+ }
794
+
795
+ static int h5_state_cdata(h5_state_t* hs)
796
+ {
797
+ const char* idx;
798
+ size_t pos;
799
+
800
+ TRACE();
801
+ pos = hs->pos;
802
+ while (1) {
803
+ idx = (const char*) memchr(hs->s + pos, CHAR_RIGHTB, hs->len - pos);
804
+
805
+ /* did not find anything or has less than 3 chars left */
806
+ if (idx == NULL || idx > hs->s + hs->len - 3) {
807
+ hs->state = h5_state_eof;
808
+ hs->token_start = hs->s + hs->pos;
809
+ hs->token_len = hs->len - hs->pos;
810
+ hs->token_type = DATA_TEXT;
811
+ return 1;
812
+ } else if ( *(idx+1) == CHAR_RIGHTB && *(idx+2) == CHAR_GT) {
813
+ hs->state = h5_state_data;
814
+ hs->token_start = hs->s + hs->pos;
815
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
816
+ hs->pos = (size_t)(idx - hs->s) + 3;
817
+ hs->token_type = DATA_TEXT;
818
+ return 1;
819
+ } else {
820
+ pos = (size_t)(idx - hs->s) + 1;
821
+ }
822
+ }
823
+ }
824
+
825
+ /**
826
+ * 8.2.4.52
827
+ * http://www.w3.org/html/wg/drafts/html/master/syntax.html#doctype-state
828
+ */
829
+ static int h5_state_doctype(h5_state_t* hs)
830
+ {
831
+ const char* idx;
832
+
833
+ TRACE();
834
+ hs->token_start = hs->s + hs->pos;
835
+ hs->token_type = DOCTYPE;
836
+
837
+ idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
838
+ if (idx == NULL) {
839
+ hs->state = h5_state_eof;
840
+ hs->token_len = hs->len - hs->pos;
841
+ } else {
842
+ hs->state = h5_state_data;
843
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
844
+ hs->pos = (size_t)(idx - hs->s) + 1;
845
+ }
846
+ return 1;
847
+ }