immunio 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +234 -0
  3. data/README.md +147 -0
  4. data/bin/immunio +5 -0
  5. data/lib/immunio.rb +29 -0
  6. data/lib/immunio/agent.rb +260 -0
  7. data/lib/immunio/authentication.rb +96 -0
  8. data/lib/immunio/blocked_app.rb +38 -0
  9. data/lib/immunio/channel.rb +432 -0
  10. data/lib/immunio/cli.rb +39 -0
  11. data/lib/immunio/context.rb +114 -0
  12. data/lib/immunio/errors.rb +43 -0
  13. data/lib/immunio/immunio_ca.crt +45 -0
  14. data/lib/immunio/logger.rb +87 -0
  15. data/lib/immunio/plugins/action_dispatch.rb +45 -0
  16. data/lib/immunio/plugins/action_view.rb +431 -0
  17. data/lib/immunio/plugins/active_record.rb +707 -0
  18. data/lib/immunio/plugins/active_record_relation.rb +370 -0
  19. data/lib/immunio/plugins/authlogic.rb +80 -0
  20. data/lib/immunio/plugins/csrf.rb +24 -0
  21. data/lib/immunio/plugins/devise.rb +40 -0
  22. data/lib/immunio/plugins/environment_reporter.rb +69 -0
  23. data/lib/immunio/plugins/eval.rb +51 -0
  24. data/lib/immunio/plugins/exception_handler.rb +55 -0
  25. data/lib/immunio/plugins/gems_tracker.rb +5 -0
  26. data/lib/immunio/plugins/haml.rb +36 -0
  27. data/lib/immunio/plugins/http_finisher.rb +50 -0
  28. data/lib/immunio/plugins/http_tracker.rb +203 -0
  29. data/lib/immunio/plugins/io.rb +96 -0
  30. data/lib/immunio/plugins/redirect.rb +42 -0
  31. data/lib/immunio/plugins/warden.rb +66 -0
  32. data/lib/immunio/processor.rb +234 -0
  33. data/lib/immunio/rails.rb +26 -0
  34. data/lib/immunio/request.rb +139 -0
  35. data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
  36. data/lib/immunio/rufus_lua_ext/state.rb +157 -0
  37. data/lib/immunio/rufus_lua_ext/table.rb +137 -0
  38. data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
  39. data/lib/immunio/version.rb +5 -0
  40. data/lib/immunio/vm.rb +291 -0
  41. data/lua-hooks/ext/all.c +78 -0
  42. data/lua-hooks/ext/bitop/README +22 -0
  43. data/lua-hooks/ext/bitop/bit.c +189 -0
  44. data/lua-hooks/ext/extconf.rb +38 -0
  45. data/lua-hooks/ext/libinjection/COPYING +37 -0
  46. data/lua-hooks/ext/libinjection/libinjection.h +65 -0
  47. data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
  48. data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
  49. data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
  50. data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
  51. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
  52. data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
  53. data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
  54. data/lua-hooks/ext/libinjection/lualib.c +109 -0
  55. data/lua-hooks/ext/lpeg/HISTORY +90 -0
  56. data/lua-hooks/ext/lpeg/lpcap.c +537 -0
  57. data/lua-hooks/ext/lpeg/lpcap.h +43 -0
  58. data/lua-hooks/ext/lpeg/lpcode.c +986 -0
  59. data/lua-hooks/ext/lpeg/lpcode.h +34 -0
  60. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  61. data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
  62. data/lua-hooks/ext/lpeg/lpprint.c +244 -0
  63. data/lua-hooks/ext/lpeg/lpprint.h +35 -0
  64. data/lua-hooks/ext/lpeg/lptree.c +1238 -0
  65. data/lua-hooks/ext/lpeg/lptree.h +77 -0
  66. data/lua-hooks/ext/lpeg/lptypes.h +149 -0
  67. data/lua-hooks/ext/lpeg/lpvm.c +355 -0
  68. data/lua-hooks/ext/lpeg/lpvm.h +58 -0
  69. data/lua-hooks/ext/lpeg/makefile +55 -0
  70. data/lua-hooks/ext/lpeg/re.html +498 -0
  71. data/lua-hooks/ext/lpeg/test.lua +1409 -0
  72. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
  73. data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
  74. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
  75. data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
  76. data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
  77. data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
  78. data/lua-hooks/ext/lua-snapshot/README.md +18 -0
  79. data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
  80. data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
  81. data/lua-hooks/ext/lua/COPYRIGHT +34 -0
  82. data/lua-hooks/ext/lua/lapi.c +1087 -0
  83. data/lua-hooks/ext/lua/lapi.h +16 -0
  84. data/lua-hooks/ext/lua/lauxlib.c +652 -0
  85. data/lua-hooks/ext/lua/lauxlib.h +174 -0
  86. data/lua-hooks/ext/lua/lbaselib.c +659 -0
  87. data/lua-hooks/ext/lua/lcode.c +831 -0
  88. data/lua-hooks/ext/lua/lcode.h +76 -0
  89. data/lua-hooks/ext/lua/ldblib.c +398 -0
  90. data/lua-hooks/ext/lua/ldebug.c +638 -0
  91. data/lua-hooks/ext/lua/ldebug.h +33 -0
  92. data/lua-hooks/ext/lua/ldo.c +519 -0
  93. data/lua-hooks/ext/lua/ldo.h +57 -0
  94. data/lua-hooks/ext/lua/ldump.c +164 -0
  95. data/lua-hooks/ext/lua/lfunc.c +174 -0
  96. data/lua-hooks/ext/lua/lfunc.h +34 -0
  97. data/lua-hooks/ext/lua/lgc.c +710 -0
  98. data/lua-hooks/ext/lua/lgc.h +110 -0
  99. data/lua-hooks/ext/lua/linit.c +38 -0
  100. data/lua-hooks/ext/lua/liolib.c +556 -0
  101. data/lua-hooks/ext/lua/llex.c +463 -0
  102. data/lua-hooks/ext/lua/llex.h +81 -0
  103. data/lua-hooks/ext/lua/llimits.h +128 -0
  104. data/lua-hooks/ext/lua/lmathlib.c +263 -0
  105. data/lua-hooks/ext/lua/lmem.c +86 -0
  106. data/lua-hooks/ext/lua/lmem.h +49 -0
  107. data/lua-hooks/ext/lua/loadlib.c +705 -0
  108. data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
  109. data/lua-hooks/ext/lua/lobject.c +214 -0
  110. data/lua-hooks/ext/lua/lobject.h +381 -0
  111. data/lua-hooks/ext/lua/lopcodes.c +102 -0
  112. data/lua-hooks/ext/lua/lopcodes.h +268 -0
  113. data/lua-hooks/ext/lua/loslib.c +243 -0
  114. data/lua-hooks/ext/lua/lparser.c +1339 -0
  115. data/lua-hooks/ext/lua/lparser.h +82 -0
  116. data/lua-hooks/ext/lua/lstate.c +214 -0
  117. data/lua-hooks/ext/lua/lstate.h +169 -0
  118. data/lua-hooks/ext/lua/lstring.c +111 -0
  119. data/lua-hooks/ext/lua/lstring.h +31 -0
  120. data/lua-hooks/ext/lua/lstrlib.c +871 -0
  121. data/lua-hooks/ext/lua/ltable.c +588 -0
  122. data/lua-hooks/ext/lua/ltable.h +40 -0
  123. data/lua-hooks/ext/lua/ltablib.c +287 -0
  124. data/lua-hooks/ext/lua/ltm.c +75 -0
  125. data/lua-hooks/ext/lua/ltm.h +54 -0
  126. data/lua-hooks/ext/lua/lua.c +392 -0
  127. data/lua-hooks/ext/lua/lua.def +131 -0
  128. data/lua-hooks/ext/lua/lua.h +388 -0
  129. data/lua-hooks/ext/lua/lua.rc +28 -0
  130. data/lua-hooks/ext/lua/lua_dll.rc +26 -0
  131. data/lua-hooks/ext/lua/luac.c +200 -0
  132. data/lua-hooks/ext/lua/luac.rc +1 -0
  133. data/lua-hooks/ext/lua/luaconf.h +763 -0
  134. data/lua-hooks/ext/lua/luaconf.h.in +724 -0
  135. data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
  136. data/lua-hooks/ext/lua/lualib.h +53 -0
  137. data/lua-hooks/ext/lua/lundump.c +227 -0
  138. data/lua-hooks/ext/lua/lundump.h +36 -0
  139. data/lua-hooks/ext/lua/lvm.c +767 -0
  140. data/lua-hooks/ext/lua/lvm.h +36 -0
  141. data/lua-hooks/ext/lua/lzio.c +82 -0
  142. data/lua-hooks/ext/lua/lzio.h +67 -0
  143. data/lua-hooks/ext/lua/print.c +227 -0
  144. data/lua-hooks/ext/luautf8/README.md +152 -0
  145. data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
  146. data/lua-hooks/ext/luautf8/unidata.h +3064 -0
  147. data/lua-hooks/lib/boot.lua +254 -0
  148. data/lua-hooks/lib/encode.lua +4 -0
  149. data/lua-hooks/lib/lexers/LICENSE +21 -0
  150. data/lua-hooks/lib/lexers/bash.lua +134 -0
  151. data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
  152. data/lua-hooks/lib/lexers/css.lua +216 -0
  153. data/lua-hooks/lib/lexers/html.lua +106 -0
  154. data/lua-hooks/lib/lexers/javascript.lua +68 -0
  155. data/lua-hooks/lib/lexers/lexer.lua +1575 -0
  156. data/lua-hooks/lib/lexers/markers.lua +33 -0
  157. metadata +308 -0
@@ -0,0 +1,38 @@
1
+ # Used by Ruby to compile the extension.
2
+ require 'mkmf'
3
+
4
+
5
+ # libinjection doesn't support `#include`ing all the .c files directly
6
+ # in the source, since it has symbols which conflict. Instead the `$objs`
7
+ # list below compiles each file separately then links them in the final
8
+ # step.
9
+ $objs = [
10
+ "all.o",
11
+ "libinjection/libinjection_html5.o",
12
+ "libinjection/libinjection_xss.o",
13
+ "libinjection/libinjection_sqli.o",
14
+ #Compile in LPEG
15
+ "lpeg/lpcap.o",
16
+ "lpeg/lpcode.o",
17
+ "lpeg/lpprint.o",
18
+ "lpeg/lpvm.o",
19
+ # "lpeg/lptree.o",
20
+ ]
21
+
22
+ # The created Makefile puts the compiled .o files into the `libinjection`
23
+ # subdirectory, but it doesn't create it. Make sure it exists.
24
+ xsystem "mkdir -p libinjection"
25
+ xsystem "mkdir -p lpeg"
26
+
27
+ # Build init hook, only used when running agent in dev mode
28
+ STDERR.puts `make -C ../../../../lua-hooks hooks/__init__.lua`
29
+
30
+ #!!! PLEASE ALWAYS make sure the flags here match the Lua Makefile so our tests are valid
31
+ # Enable safety assertions
32
+ $CFLAGS << " -DLUA_USE_APICHECK -Dlua_assert=assert "
33
+ # Enable omptimisation
34
+ $CFLAGS << " -O3 "
35
+ # Without this flag, I get this error when trying to compile in agent-java:
36
+ # relocation R_X86_64_32S against `.rodata' can not be used when making a shared object; recompile with -fPIC
37
+ $CFLAGS << " -fPIC "
38
+ create_makefile 'immunio/lua-hooks'
@@ -0,0 +1,37 @@
1
+ /*
2
+ * Copyright 2012, 2013, 2014
3
+ * Nick Galbreath -- nickg [at] client9 [dot] com
4
+ * http://www.client9.com/projects/libinjection/
5
+ *
6
+ * All rights reserved.
7
+ *
8
+ * Redistribution and use in source and binary forms, with or without
9
+ * modification, are permitted provided that the following conditions are
10
+ * met:
11
+ *
12
+ * Redistributions of source code must retain the above copyright
13
+ * notice, this list of conditions and the following disclaimer.
14
+ *
15
+ * Redistributions in binary form must reproduce the above copyright
16
+ * notice, this list of conditions and the following disclaimer in the
17
+ * documentation and/or other materials provided with the distribution.
18
+ *
19
+ * Neither the name of libinjection nor the names of its
20
+ * contributors may be used to endorse or promote products derived from
21
+ * this software without specific prior written permission.
22
+ *
23
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+ *
35
+ * This is the standard "new" BSD license:
36
+ * http://www.opensource.org/licenses/bsd-license.php
37
+ */
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Copyright 2012, 2013 Nick Galbreath
3
+ * nickg@client9.com
4
+ * BSD License -- see COPYING.txt for details
5
+ *
6
+ * https://libinjection.client9.com/
7
+ *
8
+ */
9
+
10
+ #ifndef _LIBINJECTION_H
11
+ #define _LIBINJECTION_H
12
+
13
+ #ifdef __cplusplus
14
+ # define LIBINJECTION_BEGIN_DECLS extern "C" {
15
+ # define LIBINJECTION_END_DECLS }
16
+ #else
17
+ # define LIBINJECTION_BEGIN_DECLS
18
+ # define LIBINJECTION_END_DECLS
19
+ #endif
20
+
21
+ LIBINJECTION_BEGIN_DECLS
22
+
23
+ /*
24
+ * Pull in size_t
25
+ */
26
+ #include <string.h>
27
+
28
+ /*
29
+ * Version info.
30
+ *
31
+ * This is moved into a function to allow SWIG and other auto-generated
32
+ * binding to not be modified during minor release changes. We change
33
+ * change the version number in the c source file, and not regenerated
34
+ * the binding
35
+ *
36
+ * See python's normalized version
37
+ * http://www.python.org/dev/peps/pep-0386/#normalizedversion
38
+ */
39
+ const char* libinjection_version(void);
40
+
41
+ /**
42
+ * Simple API for SQLi detection - returns a SQLi fingerprint or NULL
43
+ * is benign input
44
+ *
45
+ * \param[in] s input string, may contain nulls, does not need to be null-terminated
46
+ * \param[in] slen input string length
47
+ * \param[out] fingerprint buffer of 8+ characters. c-string,
48
+ * \return 1 if SQLi, 0 if benign. fingerprint will be set or set to empty string.
49
+ */
50
+ int libinjection_sqli(const char* s, size_t slen, char fingerprint[]);
51
+
52
+ /** ALPHA version of xss detector.
53
+ *
54
+ * NOT DONE.
55
+ *
56
+ * \param[in] s input string, may contain nulls, does not need to be null-terminated
57
+ * \param[in] slen input string length
58
+ * \return 1 if XSS found, 0 if benign
59
+ *
60
+ */
61
+ int libinjection_xss(const char* s, size_t slen);
62
+
63
+ LIBINJECTION_END_DECLS
64
+
65
+ #endif /* _LIBINJECTION_H */
@@ -0,0 +1,847 @@
1
+ #include "libinjection_html5.h"
2
+
3
+ #include <string.h>
4
+ #include <assert.h>
5
+
6
+ #ifdef DEBUG
7
+ #include <stdio.h>
8
+ #define TRACE() printf("%s:%d\n", __FUNCTION__, __LINE__)
9
+ #else
10
+ #define TRACE()
11
+ #endif
12
+
13
+
14
+ #define CHAR_EOF -1
15
+ #define CHAR_NULL 0
16
+ #define CHAR_BANG 33
17
+ #define CHAR_DOUBLE 34
18
+ #define CHAR_PERCENT 37
19
+ #define CHAR_SINGLE 39
20
+ #define CHAR_DASH 45
21
+ #define CHAR_SLASH 47
22
+ #define CHAR_LT 60
23
+ #define CHAR_EQUALS 61
24
+ #define CHAR_GT 62
25
+ #define CHAR_QUESTION 63
26
+ #define CHAR_RIGHTB 93
27
+ #define CHAR_TICK 96
28
+
29
+ /* prototypes */
30
+
31
+ static int h5_skip_white(h5_state_t* hs);
32
+ static int h5_is_white(char c);
33
+ static int h5_state_eof(h5_state_t* hs);
34
+ static int h5_state_data(h5_state_t* hs);
35
+ static int h5_state_tag_open(h5_state_t* hs);
36
+ static int h5_state_tag_name(h5_state_t* hs);
37
+ static int h5_state_tag_name_close(h5_state_t* hs);
38
+ static int h5_state_end_tag_open(h5_state_t* hs);
39
+ static int h5_state_self_closing_start_tag(h5_state_t* hs);
40
+ static int h5_state_attribute_name(h5_state_t* hs);
41
+ static int h5_state_after_attribute_name(h5_state_t* hs);
42
+ static int h5_state_before_attribute_name(h5_state_t* hs);
43
+ static int h5_state_before_attribute_value(h5_state_t* hs);
44
+ static int h5_state_attribute_value_double_quote(h5_state_t* hs);
45
+ static int h5_state_attribute_value_single_quote(h5_state_t* hs);
46
+ static int h5_state_attribute_value_back_quote(h5_state_t* hs);
47
+ static int h5_state_attribute_value_no_quote(h5_state_t* hs);
48
+ static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs);
49
+ static int h5_state_comment(h5_state_t* hs);
50
+ static int h5_state_cdata(h5_state_t* hs);
51
+
52
+
53
+ /* 12.2.4.44 */
54
+ static int h5_state_bogus_comment(h5_state_t* hs);
55
+ static int h5_state_bogus_comment2(h5_state_t* hs);
56
+
57
+ /* 12.2.4.45 */
58
+ static int h5_state_markup_declaration_open(h5_state_t* hs);
59
+
60
+ /* 8.2.4.52 */
61
+ static int h5_state_doctype(h5_state_t* hs);
62
+
63
+ /**
64
+ * public function
65
+ */
66
+ void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags)
67
+ {
68
+ memset(hs, 0, sizeof(h5_state_t));
69
+ hs->s = s;
70
+ hs->len = len;
71
+
72
+ switch (flags) {
73
+ case DATA_STATE:
74
+ hs->state = h5_state_data;
75
+ break;
76
+ case VALUE_NO_QUOTE:
77
+ hs->state = h5_state_before_attribute_name;
78
+ break;
79
+ case VALUE_SINGLE_QUOTE:
80
+ hs->state = h5_state_attribute_value_single_quote;
81
+ break;
82
+ case VALUE_DOUBLE_QUOTE:
83
+ hs->state = h5_state_attribute_value_double_quote;
84
+ break;
85
+ case VALUE_BACK_QUOTE:
86
+ hs->state = h5_state_attribute_value_back_quote;
87
+ break;
88
+ }
89
+ }
90
+
91
+ /**
92
+ * public function
93
+ */
94
+ int libinjection_h5_next(h5_state_t* hs)
95
+ {
96
+ assert(hs->state != NULL);
97
+ return (*hs->state)(hs);
98
+ }
99
+
100
+ /**
101
+ * Everything below here is private
102
+ *
103
+ */
104
+
105
+
106
+ static int h5_is_white(char ch)
107
+ {
108
+ /*
109
+ * \t = htab = 0x09
110
+ * \n = newline = 0x0A
111
+ * \v = vtab = 0x0B
112
+ * \f = form feed = 0x0C
113
+ * \r = cr = 0x0D
114
+ */
115
+ return strchr(" \t\n\v\f\r", ch) != NULL;
116
+ }
117
+
118
+ static int h5_skip_white(h5_state_t* hs)
119
+ {
120
+ char ch;
121
+ while (hs->pos < hs->len) {
122
+ ch = hs->s[hs->pos];
123
+ switch (ch) {
124
+ case 0x00: /* IE only */
125
+ case 0x20:
126
+ case 0x09:
127
+ case 0x0A:
128
+ case 0x0B: /* IE only */
129
+ case 0x0C:
130
+ case 0x0D: /* IE only */
131
+ hs->pos += 1;
132
+ break;
133
+ default:
134
+ return ch;
135
+ }
136
+ }
137
+ return CHAR_EOF;
138
+ }
139
+
140
+ static int h5_state_eof(h5_state_t* hs)
141
+ {
142
+ /* eliminate unused function argument warning */
143
+ (void)hs;
144
+ return 0;
145
+ }
146
+
147
+ static int h5_state_data(h5_state_t* hs)
148
+ {
149
+ const char* idx;
150
+
151
+ TRACE();
152
+ assert(hs->len >= hs->pos);
153
+ idx = (const char*) memchr(hs->s + hs->pos, CHAR_LT, hs->len - hs->pos);
154
+ if (idx == NULL) {
155
+ hs->token_start = hs->s + hs->pos;
156
+ hs->token_len = hs->len - hs->pos;
157
+ hs->token_type = DATA_TEXT;
158
+ hs->state = h5_state_eof;
159
+ if (hs->token_len == 0) {
160
+ return 0;
161
+ }
162
+ } else {
163
+ hs->token_start = hs->s + hs->pos;
164
+ hs->token_type = DATA_TEXT;
165
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
166
+ hs->pos = (size_t)(idx - hs->s) + 1;
167
+ hs->state = h5_state_tag_open;
168
+ if (hs->token_len == 0) {
169
+ return h5_state_tag_open(hs);
170
+ }
171
+ }
172
+ return 1;
173
+ }
174
+
175
+ /**
176
+ * 12 2.4.8
177
+ */
178
+ static int h5_state_tag_open(h5_state_t* hs)
179
+ {
180
+ char ch;
181
+
182
+ TRACE();
183
+ ch = hs->s[hs->pos];
184
+ if (ch == CHAR_BANG) {
185
+ hs->pos += 1;
186
+ return h5_state_markup_declaration_open(hs);
187
+ } else if (ch == CHAR_SLASH) {
188
+ hs->pos += 1;
189
+ hs->is_close = 1;
190
+ return h5_state_end_tag_open(hs);
191
+ } else if (ch == CHAR_QUESTION) {
192
+ hs->pos += 1;
193
+ return h5_state_bogus_comment(hs);
194
+ } else if (ch == CHAR_PERCENT) {
195
+ /* this is not in spec.. alternative comment format used
196
+ by IE <= 9 and Safari < 4.0.3 */
197
+ hs->pos += 1;
198
+ return h5_state_bogus_comment2(hs);
199
+ } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
200
+ return h5_state_tag_name(hs);
201
+ } else if (ch == CHAR_NULL) {
202
+ /* IE-ism NULL characters are ignored */
203
+ return h5_state_tag_name(hs);
204
+ } else {
205
+ /* user input mistake in configuring state */
206
+ if (hs->pos == 0) {
207
+ return h5_state_data(hs);
208
+ }
209
+ hs->token_start = hs->s + hs->pos - 1;
210
+ hs->token_len = 1;
211
+ hs->token_type = DATA_TEXT;
212
+ hs->state = h5_state_data;
213
+ return 1;
214
+ }
215
+ }
216
+ /**
217
+ * 12.2.4.9
218
+ */
219
+ static int h5_state_end_tag_open(h5_state_t* hs)
220
+ {
221
+ char ch;
222
+
223
+ TRACE();
224
+
225
+ if (hs->pos >= hs->len) {
226
+ return 0;
227
+ }
228
+ ch = hs->s[hs->pos];
229
+ if (ch == CHAR_GT) {
230
+ return h5_state_data(hs);
231
+ } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
232
+ return h5_state_tag_name(hs);
233
+ }
234
+
235
+ hs->is_close = 0;
236
+ return h5_state_bogus_comment(hs);
237
+ }
238
+ /*
239
+ *
240
+ */
241
+ static int h5_state_tag_name_close(h5_state_t* hs)
242
+ {
243
+ TRACE();
244
+ hs->is_close = 0;
245
+ hs->token_start = hs->s + hs->pos;
246
+ hs->token_len = 1;
247
+ hs->token_type = TAG_NAME_CLOSE;
248
+ hs->pos += 1;
249
+ if (hs->pos < hs->len) {
250
+ hs->state = h5_state_data;
251
+ } else {
252
+ hs->state = h5_state_eof;
253
+ }
254
+
255
+ return 1;
256
+ }
257
+
258
+ /**
259
+ * 12.2.4.10
260
+ */
261
+ static int h5_state_tag_name(h5_state_t* hs)
262
+ {
263
+ char ch;
264
+ size_t pos;
265
+
266
+ TRACE();
267
+ pos = hs->pos;
268
+ while (pos < hs->len) {
269
+ ch = hs->s[pos];
270
+ if (ch == 0) {
271
+ /* special non-standard case */
272
+ /* allow nulls in tag name */
273
+ /* some old browsers apparently allow and ignore them */
274
+ pos += 1;
275
+ } else if (h5_is_white(ch)) {
276
+ hs->token_start = hs->s + hs->pos;
277
+ hs->token_len = pos - hs->pos;
278
+ hs->token_type = TAG_NAME_OPEN;
279
+ hs->pos = pos + 1;
280
+ hs->state = h5_state_before_attribute_name;
281
+ return 1;
282
+ } else if (ch == CHAR_SLASH) {
283
+ hs->token_start = hs->s + hs->pos;
284
+ hs->token_len = pos - hs->pos;
285
+ hs->token_type = TAG_NAME_OPEN;
286
+ hs->pos = pos + 1;
287
+ hs->state = h5_state_self_closing_start_tag;
288
+ return 1;
289
+ } else if (ch == CHAR_GT) {
290
+ hs->token_start = hs->s + hs->pos;
291
+ hs->token_len = pos - hs->pos;
292
+ if (hs->is_close) {
293
+ hs->pos = pos + 1;
294
+ hs->is_close = 0;
295
+ hs->token_type = TAG_CLOSE;
296
+ hs->state = h5_state_data;
297
+ } else {
298
+ hs->pos = pos;
299
+ hs->token_type = TAG_NAME_OPEN;
300
+ hs->state = h5_state_tag_name_close;
301
+ }
302
+ return 1;
303
+ } else {
304
+ pos += 1;
305
+ }
306
+ }
307
+
308
+ hs->token_start = hs->s + hs->pos;
309
+ hs->token_len = hs->len - hs->pos;
310
+ hs->token_type = TAG_NAME_OPEN;
311
+ hs->state = h5_state_eof;
312
+ return 1;
313
+ }
314
+
315
+ /**
316
+ * 12.2.4.34
317
+ */
318
+ static int h5_state_before_attribute_name(h5_state_t* hs)
319
+ {
320
+ int ch;
321
+
322
+ TRACE();
323
+ ch = h5_skip_white(hs);
324
+ switch (ch) {
325
+ case CHAR_EOF: {
326
+ return 0;
327
+ }
328
+ case CHAR_SLASH: {
329
+ hs->pos += 1;
330
+ return h5_state_self_closing_start_tag(hs);
331
+ }
332
+ case CHAR_GT: {
333
+ hs->state = h5_state_data;
334
+ hs->token_start = hs->s + hs->pos;
335
+ hs->token_len = 1;
336
+ hs->token_type = TAG_NAME_CLOSE;
337
+ hs->pos += 1;
338
+ return 1;
339
+ }
340
+ default: {
341
+ return h5_state_attribute_name(hs);
342
+ }
343
+ }
344
+ }
345
+
346
+ static int h5_state_attribute_name(h5_state_t* hs)
347
+ {
348
+ char ch;
349
+ size_t pos;
350
+
351
+ TRACE();
352
+ pos = hs->pos + 1;
353
+ while (pos < hs->len) {
354
+ ch = hs->s[pos];
355
+ if (h5_is_white(ch)) {
356
+ hs->token_start = hs->s + hs->pos;
357
+ hs->token_len = pos - hs->pos;
358
+ hs->token_type = ATTR_NAME;
359
+ hs->state = h5_state_after_attribute_name;
360
+ hs->pos = pos + 1;
361
+ return 1;
362
+ } else if (ch == CHAR_SLASH) {
363
+ hs->token_start = hs->s + hs->pos;
364
+ hs->token_len = pos - hs->pos;
365
+ hs->token_type = ATTR_NAME;
366
+ hs->state = h5_state_self_closing_start_tag;
367
+ hs->pos = pos + 1;
368
+ return 1;
369
+ } else if (ch == CHAR_EQUALS) {
370
+ hs->token_start = hs->s + hs->pos;
371
+ hs->token_len = pos - hs->pos;
372
+ hs->token_type = ATTR_NAME;
373
+ hs->state = h5_state_before_attribute_value;
374
+ hs->pos = pos + 1;
375
+ return 1;
376
+ } else if (ch == CHAR_GT) {
377
+ hs->token_start = hs->s + hs->pos;
378
+ hs->token_len = pos - hs->pos;
379
+ hs->token_type = ATTR_NAME;
380
+ hs->state = h5_state_tag_name_close;
381
+ hs->pos = pos;
382
+ return 1;
383
+ } else {
384
+ pos += 1;
385
+ }
386
+ }
387
+ /* EOF */
388
+ hs->token_start = hs->s + hs->pos;
389
+ hs->token_len = hs->len - hs->pos;
390
+ hs->token_type = ATTR_NAME;
391
+ hs->state = h5_state_eof;
392
+ hs->pos = hs->len;
393
+ return 1;
394
+ }
395
+
396
+ /**
397
+ * 12.2.4.36
398
+ */
399
+ static int h5_state_after_attribute_name(h5_state_t* hs)
400
+ {
401
+ int c;
402
+
403
+ TRACE();
404
+ c = h5_skip_white(hs);
405
+ switch (c) {
406
+ case CHAR_EOF: {
407
+ return 0;
408
+ }
409
+ case CHAR_SLASH: {
410
+ hs->pos += 1;
411
+ return h5_state_self_closing_start_tag(hs);
412
+ }
413
+ case CHAR_EQUALS: {
414
+ hs->pos += 1;
415
+ return h5_state_before_attribute_value(hs);
416
+ }
417
+ case CHAR_GT: {
418
+ return h5_state_tag_name_close(hs);
419
+ }
420
+ default: {
421
+ return h5_state_attribute_name(hs);
422
+ }
423
+ }
424
+ }
425
+
426
+ /**
427
+ * 12.2.4.37
428
+ */
429
+ static int h5_state_before_attribute_value(h5_state_t* hs)
430
+ {
431
+ int c;
432
+ TRACE();
433
+
434
+ c = h5_skip_white(hs);
435
+
436
+ if (c == CHAR_EOF) {
437
+ hs->state = h5_state_eof;
438
+ return 0;
439
+ }
440
+
441
+ if (c == CHAR_DOUBLE) {
442
+ return h5_state_attribute_value_double_quote(hs);
443
+ } else if (c == CHAR_SINGLE) {
444
+ return h5_state_attribute_value_single_quote(hs);
445
+ } else if (c == CHAR_TICK) {
446
+ /* NON STANDARD IE */
447
+ return h5_state_attribute_value_back_quote(hs);
448
+ } else {
449
+ return h5_state_attribute_value_no_quote(hs);
450
+ }
451
+ }
452
+
453
+
454
+ static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
455
+ {
456
+ const char* idx;
457
+
458
+ TRACE();
459
+
460
+ /* skip initial quote in normal case.
461
+ * dont do this is pos == 0 since it means we have started
462
+ * in a non-data state. given an input of '><foo
463
+ * we want to make 0-length attribute name
464
+ */
465
+ if (hs->pos > 0) {
466
+ hs->pos += 1;
467
+ }
468
+
469
+
470
+ idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos);
471
+ if (idx == NULL) {
472
+ hs->token_start = hs->s + hs->pos;
473
+ hs->token_len = hs->len - hs->pos;
474
+ hs->token_type = ATTR_VALUE;
475
+ hs->state = h5_state_eof;
476
+ } else {
477
+ hs->token_start = hs->s + hs->pos;
478
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
479
+ hs->token_type = ATTR_VALUE;
480
+ hs->state = h5_state_after_attribute_value_quoted_state;
481
+ hs->pos += hs->token_len + 1;
482
+ }
483
+ return 1;
484
+ }
485
+
486
+ static
487
+ int h5_state_attribute_value_double_quote(h5_state_t* hs)
488
+ {
489
+ TRACE();
490
+ return h5_state_attribute_value_quote(hs, CHAR_DOUBLE);
491
+ }
492
+
493
+ static
494
+ int h5_state_attribute_value_single_quote(h5_state_t* hs)
495
+ {
496
+ TRACE();
497
+ return h5_state_attribute_value_quote(hs, CHAR_SINGLE);
498
+ }
499
+
500
+ static
501
+ int h5_state_attribute_value_back_quote(h5_state_t* hs)
502
+ {
503
+ TRACE();
504
+ return h5_state_attribute_value_quote(hs, CHAR_TICK);
505
+ }
506
+
507
+ static int h5_state_attribute_value_no_quote(h5_state_t* hs)
508
+ {
509
+ char ch;
510
+ size_t pos;
511
+
512
+ TRACE();
513
+ pos = hs->pos;
514
+ while (pos < hs->len) {
515
+ ch = hs->s[pos];
516
+ if (h5_is_white(ch)) {
517
+ hs->token_type = ATTR_VALUE;
518
+ hs->token_start = hs->s + hs->pos;
519
+ hs->token_len = pos - hs->pos;
520
+ hs->pos = pos + 1;
521
+ hs->state = h5_state_before_attribute_name;
522
+ return 1;
523
+ } else if (ch == CHAR_GT) {
524
+ hs->token_type = ATTR_VALUE;
525
+ hs->token_start = hs->s + hs->pos;
526
+ hs->token_len = pos - hs->pos;
527
+ hs->pos = pos;
528
+ hs->state = h5_state_tag_name_close;
529
+ return 1;
530
+ }
531
+ pos += 1;
532
+ }
533
+ TRACE();
534
+ /* EOF */
535
+ hs->state = h5_state_eof;
536
+ hs->token_start = hs->s + hs->pos;
537
+ hs->token_len = hs->len - hs->pos;
538
+ hs->token_type = ATTR_VALUE;
539
+ return 1;
540
+ }
541
+
542
+ /**
543
+ * 12.2.4.41
544
+ */
545
+ static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs)
546
+ {
547
+ char ch;
548
+
549
+ TRACE();
550
+ if (hs->pos >= hs->len) {
551
+ return 0;
552
+ }
553
+ ch = hs->s[hs->pos];
554
+ if (h5_is_white(ch)) {
555
+ hs->pos += 1;
556
+ return h5_state_before_attribute_name(hs);
557
+ } else if (ch == CHAR_SLASH) {
558
+ hs->pos += 1;
559
+ return h5_state_self_closing_start_tag(hs);
560
+ } else if (ch == CHAR_GT) {
561
+ hs->token_start = hs->s + hs->pos;
562
+ hs->token_len = 1;
563
+ hs->token_type = TAG_NAME_CLOSE;
564
+ hs->pos += 1;
565
+ hs->state = h5_state_data;
566
+ return 1;
567
+ } else {
568
+ return h5_state_before_attribute_name(hs);
569
+ }
570
+ }
571
+
572
+ /**
573
+ * 12.2.4.43
574
+ */
575
+ static int h5_state_self_closing_start_tag(h5_state_t* hs)
576
+ {
577
+ char ch;
578
+
579
+ TRACE();
580
+ if (hs->pos >= hs->len) {
581
+ return 0;
582
+ }
583
+ ch = hs->s[hs->pos];
584
+ if (ch == CHAR_GT) {
585
+ assert(hs->pos > 0);
586
+ hs->token_start = hs->s + hs->pos -1;
587
+ hs->token_len = 2;
588
+ hs->token_type = TAG_NAME_SELFCLOSE;
589
+ hs->state = h5_state_data;
590
+ hs->pos += 1;
591
+ return 1;
592
+ } else {
593
+ return h5_state_before_attribute_name(hs);
594
+ }
595
+ }
596
+
597
+ /**
598
+ * 12.2.4.44
599
+ */
600
+ static int h5_state_bogus_comment(h5_state_t* hs)
601
+ {
602
+ const char* idx;
603
+
604
+ TRACE();
605
+ idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
606
+ if (idx == NULL) {
607
+ hs->token_start = hs->s + hs->pos;
608
+ hs->token_len = hs->len - hs->pos;
609
+ hs->pos = hs->len;
610
+ hs->state = h5_state_eof;
611
+ } else {
612
+ hs->token_start = hs->s + hs->pos;
613
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
614
+ hs->pos = (size_t)(idx - hs->s) + 1;
615
+ hs->state = h5_state_data;
616
+ }
617
+
618
+ hs->token_type = TAG_COMMENT;
619
+ return 1;
620
+ }
621
+
622
+ /**
623
+ * 12.2.4.44 ALT
624
+ */
625
+ static int h5_state_bogus_comment2(h5_state_t* hs)
626
+ {
627
+ const char* idx;
628
+ size_t pos;
629
+
630
+ TRACE();
631
+ pos = hs->pos;
632
+ while (1) {
633
+ idx = (const char*) memchr(hs->s + pos, CHAR_PERCENT, hs->len - pos);
634
+ if (idx == NULL || (idx + 1 >= hs->s + hs->len)) {
635
+ hs->token_start = hs->s + hs->pos;
636
+ hs->token_len = hs->len - hs->pos;
637
+ hs->pos = hs->len;
638
+ hs->token_type = TAG_COMMENT;
639
+ hs->state = h5_state_eof;
640
+ return 1;
641
+ }
642
+
643
+ if (*(idx +1) != CHAR_GT) {
644
+ pos = (size_t)(idx - hs->s) + 1;
645
+ continue;
646
+ }
647
+
648
+ /* ends in %> */
649
+ hs->token_start = hs->s + hs->pos;
650
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
651
+ hs->pos = (size_t)(idx - hs->s) + 2;
652
+ hs->state = h5_state_data;
653
+ hs->token_type = TAG_COMMENT;
654
+ return 1;
655
+ }
656
+ }
657
+
658
+ /**
659
+ * 8.2.4.45
660
+ */
661
+ static int h5_state_markup_declaration_open(h5_state_t* hs)
662
+ {
663
+ size_t remaining;
664
+
665
+ TRACE();
666
+ remaining = hs->len - hs->pos;
667
+ if (remaining >= 7 &&
668
+ /* case insensitive */
669
+ (hs->s[hs->pos + 0] == 'D' || hs->s[hs->pos + 0] == 'd') &&
670
+ (hs->s[hs->pos + 1] == 'O' || hs->s[hs->pos + 1] == 'o') &&
671
+ (hs->s[hs->pos + 2] == 'C' || hs->s[hs->pos + 2] == 'c') &&
672
+ (hs->s[hs->pos + 3] == 'T' || hs->s[hs->pos + 3] == 't') &&
673
+ (hs->s[hs->pos + 4] == 'Y' || hs->s[hs->pos + 4] == 'y') &&
674
+ (hs->s[hs->pos + 5] == 'P' || hs->s[hs->pos + 5] == 'p') &&
675
+ (hs->s[hs->pos + 6] == 'E' || hs->s[hs->pos + 6] == 'e')
676
+ ) {
677
+ return h5_state_doctype(hs);
678
+ } else if (remaining >= 7 &&
679
+ /* upper case required */
680
+ hs->s[hs->pos + 0] == '[' &&
681
+ hs->s[hs->pos + 1] == 'C' &&
682
+ hs->s[hs->pos + 2] == 'D' &&
683
+ hs->s[hs->pos + 3] == 'A' &&
684
+ hs->s[hs->pos + 4] == 'T' &&
685
+ hs->s[hs->pos + 5] == 'A' &&
686
+ hs->s[hs->pos + 6] == '['
687
+ ) {
688
+ hs->pos += 7;
689
+ return h5_state_cdata(hs);
690
+ } else if (remaining >= 2 &&
691
+ hs->s[hs->pos + 0] == '-' &&
692
+ hs->s[hs->pos + 1] == '-') {
693
+ hs->pos += 2;
694
+ return h5_state_comment(hs);
695
+ }
696
+
697
+ return h5_state_bogus_comment(hs);
698
+ }
699
+
700
+ /**
701
+ * 12.2.4.48
702
+ * 12.2.4.49
703
+ * 12.2.4.50
704
+ * 12.2.4.51
705
+ * state machine spec is confusing since it can only look
706
+ * at one character at a time but simply it's comments end by:
707
+ * 1) EOF
708
+ * 2) ending in -->
709
+ * 3) ending in -!>
710
+ */
711
+ static int h5_state_comment(h5_state_t* hs)
712
+ {
713
+ char ch;
714
+ const char* idx;
715
+ size_t pos;
716
+ size_t offset;
717
+ const char* end = hs->s + hs->len;
718
+
719
+ TRACE();
720
+ pos = hs->pos;
721
+ while (1) {
722
+
723
+ idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos);
724
+
725
+ /* did not find anything or has less than 3 chars left */
726
+ if (idx == NULL || idx > hs->s + hs->len - 3) {
727
+ hs->state = h5_state_eof;
728
+ hs->token_start = hs->s + hs->pos;
729
+ hs->token_len = hs->len - hs->pos;
730
+ hs->token_type = TAG_COMMENT;
731
+ return 1;
732
+ }
733
+ offset = 1;
734
+
735
+ /* skip all nulls */
736
+ while (idx + offset < end && *(idx + offset) == 0) {
737
+ offset += 1;
738
+ }
739
+ if (idx + offset == end) {
740
+ hs->state = h5_state_eof;
741
+ hs->token_start = hs->s + hs->pos;
742
+ hs->token_len = hs->len - hs->pos;
743
+ hs->token_type = TAG_COMMENT;
744
+ return 1;
745
+ }
746
+
747
+ ch = *(idx + offset);
748
+ if (ch != CHAR_DASH && ch != CHAR_BANG) {
749
+ pos = (size_t)(idx - hs->s) + 1;
750
+ continue;
751
+ }
752
+
753
+ /* need to test */
754
+ #if 0
755
+ /* skip all nulls */
756
+ while (idx + offset < end && *(idx + offset) == 0) {
757
+ offset += 1;
758
+ }
759
+ if (idx + offset == end) {
760
+ hs->state = h5_state_eof;
761
+ hs->token_start = hs->s + hs->pos;
762
+ hs->token_len = hs->len - hs->pos;
763
+ hs->token_type = TAG_COMMENT;
764
+ return 1;
765
+ }
766
+ #endif
767
+
768
+ offset += 1;
769
+ if (idx + offset == end) {
770
+ hs->state = h5_state_eof;
771
+ hs->token_start = hs->s + hs->pos;
772
+ hs->token_len = hs->len - hs->pos;
773
+ hs->token_type = TAG_COMMENT;
774
+ return 1;
775
+ }
776
+
777
+
778
+ ch = *(idx + offset);
779
+ if (ch != CHAR_GT) {
780
+ pos = (size_t)(idx - hs->s) + 1;
781
+ continue;
782
+ }
783
+ offset += 1;
784
+
785
+ /* ends in --> or -!> */
786
+ hs->token_start = hs->s + hs->pos;
787
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
788
+ hs->pos = (size_t)(idx + offset - hs->s);
789
+ hs->state = h5_state_data;
790
+ hs->token_type = TAG_COMMENT;
791
+ return 1;
792
+ }
793
+ }
794
+
795
+ static int h5_state_cdata(h5_state_t* hs)
796
+ {
797
+ const char* idx;
798
+ size_t pos;
799
+
800
+ TRACE();
801
+ pos = hs->pos;
802
+ while (1) {
803
+ idx = (const char*) memchr(hs->s + pos, CHAR_RIGHTB, hs->len - pos);
804
+
805
+ /* did not find anything or has less than 3 chars left */
806
+ if (idx == NULL || idx > hs->s + hs->len - 3) {
807
+ hs->state = h5_state_eof;
808
+ hs->token_start = hs->s + hs->pos;
809
+ hs->token_len = hs->len - hs->pos;
810
+ hs->token_type = DATA_TEXT;
811
+ return 1;
812
+ } else if ( *(idx+1) == CHAR_RIGHTB && *(idx+2) == CHAR_GT) {
813
+ hs->state = h5_state_data;
814
+ hs->token_start = hs->s + hs->pos;
815
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
816
+ hs->pos = (size_t)(idx - hs->s) + 3;
817
+ hs->token_type = DATA_TEXT;
818
+ return 1;
819
+ } else {
820
+ pos = (size_t)(idx - hs->s) + 1;
821
+ }
822
+ }
823
+ }
824
+
825
+ /**
826
+ * 8.2.4.52
827
+ * http://www.w3.org/html/wg/drafts/html/master/syntax.html#doctype-state
828
+ */
829
+ static int h5_state_doctype(h5_state_t* hs)
830
+ {
831
+ const char* idx;
832
+
833
+ TRACE();
834
+ hs->token_start = hs->s + hs->pos;
835
+ hs->token_type = DOCTYPE;
836
+
837
+ idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
838
+ if (idx == NULL) {
839
+ hs->state = h5_state_eof;
840
+ hs->token_len = hs->len - hs->pos;
841
+ } else {
842
+ hs->state = h5_state_data;
843
+ hs->token_len = (size_t)(idx - hs->s) - hs->pos;
844
+ hs->pos = (size_t)(idx - hs->s) + 1;
845
+ }
846
+ return 1;
847
+ }