immunio 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +234 -0
- data/README.md +147 -0
- data/bin/immunio +5 -0
- data/lib/immunio.rb +29 -0
- data/lib/immunio/agent.rb +260 -0
- data/lib/immunio/authentication.rb +96 -0
- data/lib/immunio/blocked_app.rb +38 -0
- data/lib/immunio/channel.rb +432 -0
- data/lib/immunio/cli.rb +39 -0
- data/lib/immunio/context.rb +114 -0
- data/lib/immunio/errors.rb +43 -0
- data/lib/immunio/immunio_ca.crt +45 -0
- data/lib/immunio/logger.rb +87 -0
- data/lib/immunio/plugins/action_dispatch.rb +45 -0
- data/lib/immunio/plugins/action_view.rb +431 -0
- data/lib/immunio/plugins/active_record.rb +707 -0
- data/lib/immunio/plugins/active_record_relation.rb +370 -0
- data/lib/immunio/plugins/authlogic.rb +80 -0
- data/lib/immunio/plugins/csrf.rb +24 -0
- data/lib/immunio/plugins/devise.rb +40 -0
- data/lib/immunio/plugins/environment_reporter.rb +69 -0
- data/lib/immunio/plugins/eval.rb +51 -0
- data/lib/immunio/plugins/exception_handler.rb +55 -0
- data/lib/immunio/plugins/gems_tracker.rb +5 -0
- data/lib/immunio/plugins/haml.rb +36 -0
- data/lib/immunio/plugins/http_finisher.rb +50 -0
- data/lib/immunio/plugins/http_tracker.rb +203 -0
- data/lib/immunio/plugins/io.rb +96 -0
- data/lib/immunio/plugins/redirect.rb +42 -0
- data/lib/immunio/plugins/warden.rb +66 -0
- data/lib/immunio/processor.rb +234 -0
- data/lib/immunio/rails.rb +26 -0
- data/lib/immunio/request.rb +139 -0
- data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
- data/lib/immunio/rufus_lua_ext/state.rb +157 -0
- data/lib/immunio/rufus_lua_ext/table.rb +137 -0
- data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
- data/lib/immunio/version.rb +5 -0
- data/lib/immunio/vm.rb +291 -0
- data/lua-hooks/ext/all.c +78 -0
- data/lua-hooks/ext/bitop/README +22 -0
- data/lua-hooks/ext/bitop/bit.c +189 -0
- data/lua-hooks/ext/extconf.rb +38 -0
- data/lua-hooks/ext/libinjection/COPYING +37 -0
- data/lua-hooks/ext/libinjection/libinjection.h +65 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
- data/lua-hooks/ext/libinjection/lualib.c +109 -0
- data/lua-hooks/ext/lpeg/HISTORY +90 -0
- data/lua-hooks/ext/lpeg/lpcap.c +537 -0
- data/lua-hooks/ext/lpeg/lpcap.h +43 -0
- data/lua-hooks/ext/lpeg/lpcode.c +986 -0
- data/lua-hooks/ext/lpeg/lpcode.h +34 -0
- data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
- data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
- data/lua-hooks/ext/lpeg/lpprint.c +244 -0
- data/lua-hooks/ext/lpeg/lpprint.h +35 -0
- data/lua-hooks/ext/lpeg/lptree.c +1238 -0
- data/lua-hooks/ext/lpeg/lptree.h +77 -0
- data/lua-hooks/ext/lpeg/lptypes.h +149 -0
- data/lua-hooks/ext/lpeg/lpvm.c +355 -0
- data/lua-hooks/ext/lpeg/lpvm.h +58 -0
- data/lua-hooks/ext/lpeg/makefile +55 -0
- data/lua-hooks/ext/lpeg/re.html +498 -0
- data/lua-hooks/ext/lpeg/test.lua +1409 -0
- data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
- data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
- data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
- data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
- data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
- data/lua-hooks/ext/lua-snapshot/README.md +18 -0
- data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
- data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
- data/lua-hooks/ext/lua/COPYRIGHT +34 -0
- data/lua-hooks/ext/lua/lapi.c +1087 -0
- data/lua-hooks/ext/lua/lapi.h +16 -0
- data/lua-hooks/ext/lua/lauxlib.c +652 -0
- data/lua-hooks/ext/lua/lauxlib.h +174 -0
- data/lua-hooks/ext/lua/lbaselib.c +659 -0
- data/lua-hooks/ext/lua/lcode.c +831 -0
- data/lua-hooks/ext/lua/lcode.h +76 -0
- data/lua-hooks/ext/lua/ldblib.c +398 -0
- data/lua-hooks/ext/lua/ldebug.c +638 -0
- data/lua-hooks/ext/lua/ldebug.h +33 -0
- data/lua-hooks/ext/lua/ldo.c +519 -0
- data/lua-hooks/ext/lua/ldo.h +57 -0
- data/lua-hooks/ext/lua/ldump.c +164 -0
- data/lua-hooks/ext/lua/lfunc.c +174 -0
- data/lua-hooks/ext/lua/lfunc.h +34 -0
- data/lua-hooks/ext/lua/lgc.c +710 -0
- data/lua-hooks/ext/lua/lgc.h +110 -0
- data/lua-hooks/ext/lua/linit.c +38 -0
- data/lua-hooks/ext/lua/liolib.c +556 -0
- data/lua-hooks/ext/lua/llex.c +463 -0
- data/lua-hooks/ext/lua/llex.h +81 -0
- data/lua-hooks/ext/lua/llimits.h +128 -0
- data/lua-hooks/ext/lua/lmathlib.c +263 -0
- data/lua-hooks/ext/lua/lmem.c +86 -0
- data/lua-hooks/ext/lua/lmem.h +49 -0
- data/lua-hooks/ext/lua/loadlib.c +705 -0
- data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
- data/lua-hooks/ext/lua/lobject.c +214 -0
- data/lua-hooks/ext/lua/lobject.h +381 -0
- data/lua-hooks/ext/lua/lopcodes.c +102 -0
- data/lua-hooks/ext/lua/lopcodes.h +268 -0
- data/lua-hooks/ext/lua/loslib.c +243 -0
- data/lua-hooks/ext/lua/lparser.c +1339 -0
- data/lua-hooks/ext/lua/lparser.h +82 -0
- data/lua-hooks/ext/lua/lstate.c +214 -0
- data/lua-hooks/ext/lua/lstate.h +169 -0
- data/lua-hooks/ext/lua/lstring.c +111 -0
- data/lua-hooks/ext/lua/lstring.h +31 -0
- data/lua-hooks/ext/lua/lstrlib.c +871 -0
- data/lua-hooks/ext/lua/ltable.c +588 -0
- data/lua-hooks/ext/lua/ltable.h +40 -0
- data/lua-hooks/ext/lua/ltablib.c +287 -0
- data/lua-hooks/ext/lua/ltm.c +75 -0
- data/lua-hooks/ext/lua/ltm.h +54 -0
- data/lua-hooks/ext/lua/lua.c +392 -0
- data/lua-hooks/ext/lua/lua.def +131 -0
- data/lua-hooks/ext/lua/lua.h +388 -0
- data/lua-hooks/ext/lua/lua.rc +28 -0
- data/lua-hooks/ext/lua/lua_dll.rc +26 -0
- data/lua-hooks/ext/lua/luac.c +200 -0
- data/lua-hooks/ext/lua/luac.rc +1 -0
- data/lua-hooks/ext/lua/luaconf.h +763 -0
- data/lua-hooks/ext/lua/luaconf.h.in +724 -0
- data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
- data/lua-hooks/ext/lua/lualib.h +53 -0
- data/lua-hooks/ext/lua/lundump.c +227 -0
- data/lua-hooks/ext/lua/lundump.h +36 -0
- data/lua-hooks/ext/lua/lvm.c +767 -0
- data/lua-hooks/ext/lua/lvm.h +36 -0
- data/lua-hooks/ext/lua/lzio.c +82 -0
- data/lua-hooks/ext/lua/lzio.h +67 -0
- data/lua-hooks/ext/lua/print.c +227 -0
- data/lua-hooks/ext/luautf8/README.md +152 -0
- data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
- data/lua-hooks/ext/luautf8/unidata.h +3064 -0
- data/lua-hooks/lib/boot.lua +254 -0
- data/lua-hooks/lib/encode.lua +4 -0
- data/lua-hooks/lib/lexers/LICENSE +21 -0
- data/lua-hooks/lib/lexers/bash.lua +134 -0
- data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
- data/lua-hooks/lib/lexers/css.lua +216 -0
- data/lua-hooks/lib/lexers/html.lua +106 -0
- data/lua-hooks/lib/lexers/javascript.lua +68 -0
- data/lua-hooks/lib/lexers/lexer.lua +1575 -0
- data/lua-hooks/lib/lexers/markers.lua +33 -0
- metadata +308 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Used by Ruby to compile the extension.
|
|
2
|
+
require 'mkmf'
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
# libinjection doesn't support `#include`ing all the .c files directly
|
|
6
|
+
# in the source, since it has symbols which conflict. Instead the `$objs`
|
|
7
|
+
# list below compiles each file separately then links them in the final
|
|
8
|
+
# step.
|
|
9
|
+
$objs = [
|
|
10
|
+
"all.o",
|
|
11
|
+
"libinjection/libinjection_html5.o",
|
|
12
|
+
"libinjection/libinjection_xss.o",
|
|
13
|
+
"libinjection/libinjection_sqli.o",
|
|
14
|
+
#Compile in LPEG
|
|
15
|
+
"lpeg/lpcap.o",
|
|
16
|
+
"lpeg/lpcode.o",
|
|
17
|
+
"lpeg/lpprint.o",
|
|
18
|
+
"lpeg/lpvm.o",
|
|
19
|
+
# "lpeg/lptree.o",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
# The created Makefile puts the compiled .o files into the `libinjection`
|
|
23
|
+
# subdirectory, but it doesn't create it. Make sure it exists.
|
|
24
|
+
xsystem "mkdir -p libinjection"
|
|
25
|
+
xsystem "mkdir -p lpeg"
|
|
26
|
+
|
|
27
|
+
# Build init hook, only used when running agent in dev mode
|
|
28
|
+
STDERR.puts `make -C ../../../../lua-hooks hooks/__init__.lua`
|
|
29
|
+
|
|
30
|
+
#!!! PLEASE ALWAYS make sure the flags here match the Lua Makefile so our tests are valid
|
|
31
|
+
# Enable safety assertions
|
|
32
|
+
$CFLAGS << " -DLUA_USE_APICHECK -Dlua_assert=assert "
|
|
33
|
+
# Enable omptimisation
|
|
34
|
+
$CFLAGS << " -O3 "
|
|
35
|
+
# Without this flag, I get this error when trying to compile in agent-java:
|
|
36
|
+
# relocation R_X86_64_32S against `.rodata' can not be used when making a shared object; recompile with -fPIC
|
|
37
|
+
$CFLAGS << " -fPIC "
|
|
38
|
+
create_makefile 'immunio/lua-hooks'
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2012, 2013, 2014
|
|
3
|
+
* Nick Galbreath -- nickg [at] client9 [dot] com
|
|
4
|
+
* http://www.client9.com/projects/libinjection/
|
|
5
|
+
*
|
|
6
|
+
* All rights reserved.
|
|
7
|
+
*
|
|
8
|
+
* Redistribution and use in source and binary forms, with or without
|
|
9
|
+
* modification, are permitted provided that the following conditions are
|
|
10
|
+
* met:
|
|
11
|
+
*
|
|
12
|
+
* Redistributions of source code must retain the above copyright
|
|
13
|
+
* notice, this list of conditions and the following disclaimer.
|
|
14
|
+
*
|
|
15
|
+
* Redistributions in binary form must reproduce the above copyright
|
|
16
|
+
* notice, this list of conditions and the following disclaimer in the
|
|
17
|
+
* documentation and/or other materials provided with the distribution.
|
|
18
|
+
*
|
|
19
|
+
* Neither the name of libinjection nor the names of its
|
|
20
|
+
* contributors may be used to endorse or promote products derived from
|
|
21
|
+
* this software without specific prior written permission.
|
|
22
|
+
*
|
|
23
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
24
|
+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
25
|
+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
26
|
+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
27
|
+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
28
|
+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
29
|
+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
30
|
+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
31
|
+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
32
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
*
|
|
35
|
+
* This is the standard "new" BSD license:
|
|
36
|
+
* http://www.opensource.org/licenses/bsd-license.php
|
|
37
|
+
*/
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright 2012, 2013 Nick Galbreath
|
|
3
|
+
* nickg@client9.com
|
|
4
|
+
* BSD License -- see COPYING.txt for details
|
|
5
|
+
*
|
|
6
|
+
* https://libinjection.client9.com/
|
|
7
|
+
*
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#ifndef _LIBINJECTION_H
|
|
11
|
+
#define _LIBINJECTION_H
|
|
12
|
+
|
|
13
|
+
#ifdef __cplusplus
|
|
14
|
+
# define LIBINJECTION_BEGIN_DECLS extern "C" {
|
|
15
|
+
# define LIBINJECTION_END_DECLS }
|
|
16
|
+
#else
|
|
17
|
+
# define LIBINJECTION_BEGIN_DECLS
|
|
18
|
+
# define LIBINJECTION_END_DECLS
|
|
19
|
+
#endif
|
|
20
|
+
|
|
21
|
+
LIBINJECTION_BEGIN_DECLS
|
|
22
|
+
|
|
23
|
+
/*
|
|
24
|
+
* Pull in size_t
|
|
25
|
+
*/
|
|
26
|
+
#include <string.h>
|
|
27
|
+
|
|
28
|
+
/*
|
|
29
|
+
* Version info.
|
|
30
|
+
*
|
|
31
|
+
* This is moved into a function to allow SWIG and other auto-generated
|
|
32
|
+
* binding to not be modified during minor release changes. We change
|
|
33
|
+
* change the version number in the c source file, and not regenerated
|
|
34
|
+
* the binding
|
|
35
|
+
*
|
|
36
|
+
* See python's normalized version
|
|
37
|
+
* http://www.python.org/dev/peps/pep-0386/#normalizedversion
|
|
38
|
+
*/
|
|
39
|
+
const char* libinjection_version(void);
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Simple API for SQLi detection - returns a SQLi fingerprint or NULL
|
|
43
|
+
* is benign input
|
|
44
|
+
*
|
|
45
|
+
* \param[in] s input string, may contain nulls, does not need to be null-terminated
|
|
46
|
+
* \param[in] slen input string length
|
|
47
|
+
* \param[out] fingerprint buffer of 8+ characters. c-string,
|
|
48
|
+
* \return 1 if SQLi, 0 if benign. fingerprint will be set or set to empty string.
|
|
49
|
+
*/
|
|
50
|
+
int libinjection_sqli(const char* s, size_t slen, char fingerprint[]);
|
|
51
|
+
|
|
52
|
+
/** ALPHA version of xss detector.
|
|
53
|
+
*
|
|
54
|
+
* NOT DONE.
|
|
55
|
+
*
|
|
56
|
+
* \param[in] s input string, may contain nulls, does not need to be null-terminated
|
|
57
|
+
* \param[in] slen input string length
|
|
58
|
+
* \return 1 if XSS found, 0 if benign
|
|
59
|
+
*
|
|
60
|
+
*/
|
|
61
|
+
int libinjection_xss(const char* s, size_t slen);
|
|
62
|
+
|
|
63
|
+
LIBINJECTION_END_DECLS
|
|
64
|
+
|
|
65
|
+
#endif /* _LIBINJECTION_H */
|
|
@@ -0,0 +1,847 @@
|
|
|
1
|
+
#include "libinjection_html5.h"
|
|
2
|
+
|
|
3
|
+
#include <string.h>
|
|
4
|
+
#include <assert.h>
|
|
5
|
+
|
|
6
|
+
#ifdef DEBUG
|
|
7
|
+
#include <stdio.h>
|
|
8
|
+
#define TRACE() printf("%s:%d\n", __FUNCTION__, __LINE__)
|
|
9
|
+
#else
|
|
10
|
+
#define TRACE()
|
|
11
|
+
#endif
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
#define CHAR_EOF -1
|
|
15
|
+
#define CHAR_NULL 0
|
|
16
|
+
#define CHAR_BANG 33
|
|
17
|
+
#define CHAR_DOUBLE 34
|
|
18
|
+
#define CHAR_PERCENT 37
|
|
19
|
+
#define CHAR_SINGLE 39
|
|
20
|
+
#define CHAR_DASH 45
|
|
21
|
+
#define CHAR_SLASH 47
|
|
22
|
+
#define CHAR_LT 60
|
|
23
|
+
#define CHAR_EQUALS 61
|
|
24
|
+
#define CHAR_GT 62
|
|
25
|
+
#define CHAR_QUESTION 63
|
|
26
|
+
#define CHAR_RIGHTB 93
|
|
27
|
+
#define CHAR_TICK 96
|
|
28
|
+
|
|
29
|
+
/* prototypes */
|
|
30
|
+
|
|
31
|
+
static int h5_skip_white(h5_state_t* hs);
|
|
32
|
+
static int h5_is_white(char c);
|
|
33
|
+
static int h5_state_eof(h5_state_t* hs);
|
|
34
|
+
static int h5_state_data(h5_state_t* hs);
|
|
35
|
+
static int h5_state_tag_open(h5_state_t* hs);
|
|
36
|
+
static int h5_state_tag_name(h5_state_t* hs);
|
|
37
|
+
static int h5_state_tag_name_close(h5_state_t* hs);
|
|
38
|
+
static int h5_state_end_tag_open(h5_state_t* hs);
|
|
39
|
+
static int h5_state_self_closing_start_tag(h5_state_t* hs);
|
|
40
|
+
static int h5_state_attribute_name(h5_state_t* hs);
|
|
41
|
+
static int h5_state_after_attribute_name(h5_state_t* hs);
|
|
42
|
+
static int h5_state_before_attribute_name(h5_state_t* hs);
|
|
43
|
+
static int h5_state_before_attribute_value(h5_state_t* hs);
|
|
44
|
+
static int h5_state_attribute_value_double_quote(h5_state_t* hs);
|
|
45
|
+
static int h5_state_attribute_value_single_quote(h5_state_t* hs);
|
|
46
|
+
static int h5_state_attribute_value_back_quote(h5_state_t* hs);
|
|
47
|
+
static int h5_state_attribute_value_no_quote(h5_state_t* hs);
|
|
48
|
+
static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs);
|
|
49
|
+
static int h5_state_comment(h5_state_t* hs);
|
|
50
|
+
static int h5_state_cdata(h5_state_t* hs);
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
/* 12.2.4.44 */
|
|
54
|
+
static int h5_state_bogus_comment(h5_state_t* hs);
|
|
55
|
+
static int h5_state_bogus_comment2(h5_state_t* hs);
|
|
56
|
+
|
|
57
|
+
/* 12.2.4.45 */
|
|
58
|
+
static int h5_state_markup_declaration_open(h5_state_t* hs);
|
|
59
|
+
|
|
60
|
+
/* 8.2.4.52 */
|
|
61
|
+
static int h5_state_doctype(h5_state_t* hs);
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* public function
|
|
65
|
+
*/
|
|
66
|
+
void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags flags)
|
|
67
|
+
{
|
|
68
|
+
memset(hs, 0, sizeof(h5_state_t));
|
|
69
|
+
hs->s = s;
|
|
70
|
+
hs->len = len;
|
|
71
|
+
|
|
72
|
+
switch (flags) {
|
|
73
|
+
case DATA_STATE:
|
|
74
|
+
hs->state = h5_state_data;
|
|
75
|
+
break;
|
|
76
|
+
case VALUE_NO_QUOTE:
|
|
77
|
+
hs->state = h5_state_before_attribute_name;
|
|
78
|
+
break;
|
|
79
|
+
case VALUE_SINGLE_QUOTE:
|
|
80
|
+
hs->state = h5_state_attribute_value_single_quote;
|
|
81
|
+
break;
|
|
82
|
+
case VALUE_DOUBLE_QUOTE:
|
|
83
|
+
hs->state = h5_state_attribute_value_double_quote;
|
|
84
|
+
break;
|
|
85
|
+
case VALUE_BACK_QUOTE:
|
|
86
|
+
hs->state = h5_state_attribute_value_back_quote;
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* public function
|
|
93
|
+
*/
|
|
94
|
+
int libinjection_h5_next(h5_state_t* hs)
|
|
95
|
+
{
|
|
96
|
+
assert(hs->state != NULL);
|
|
97
|
+
return (*hs->state)(hs);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Everything below here is private
|
|
102
|
+
*
|
|
103
|
+
*/
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
static int h5_is_white(char ch)
|
|
107
|
+
{
|
|
108
|
+
/*
|
|
109
|
+
* \t = htab = 0x09
|
|
110
|
+
* \n = newline = 0x0A
|
|
111
|
+
* \v = vtab = 0x0B
|
|
112
|
+
* \f = form feed = 0x0C
|
|
113
|
+
* \r = cr = 0x0D
|
|
114
|
+
*/
|
|
115
|
+
return strchr(" \t\n\v\f\r", ch) != NULL;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
static int h5_skip_white(h5_state_t* hs)
|
|
119
|
+
{
|
|
120
|
+
char ch;
|
|
121
|
+
while (hs->pos < hs->len) {
|
|
122
|
+
ch = hs->s[hs->pos];
|
|
123
|
+
switch (ch) {
|
|
124
|
+
case 0x00: /* IE only */
|
|
125
|
+
case 0x20:
|
|
126
|
+
case 0x09:
|
|
127
|
+
case 0x0A:
|
|
128
|
+
case 0x0B: /* IE only */
|
|
129
|
+
case 0x0C:
|
|
130
|
+
case 0x0D: /* IE only */
|
|
131
|
+
hs->pos += 1;
|
|
132
|
+
break;
|
|
133
|
+
default:
|
|
134
|
+
return ch;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return CHAR_EOF;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
static int h5_state_eof(h5_state_t* hs)
|
|
141
|
+
{
|
|
142
|
+
/* eliminate unused function argument warning */
|
|
143
|
+
(void)hs;
|
|
144
|
+
return 0;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
static int h5_state_data(h5_state_t* hs)
|
|
148
|
+
{
|
|
149
|
+
const char* idx;
|
|
150
|
+
|
|
151
|
+
TRACE();
|
|
152
|
+
assert(hs->len >= hs->pos);
|
|
153
|
+
idx = (const char*) memchr(hs->s + hs->pos, CHAR_LT, hs->len - hs->pos);
|
|
154
|
+
if (idx == NULL) {
|
|
155
|
+
hs->token_start = hs->s + hs->pos;
|
|
156
|
+
hs->token_len = hs->len - hs->pos;
|
|
157
|
+
hs->token_type = DATA_TEXT;
|
|
158
|
+
hs->state = h5_state_eof;
|
|
159
|
+
if (hs->token_len == 0) {
|
|
160
|
+
return 0;
|
|
161
|
+
}
|
|
162
|
+
} else {
|
|
163
|
+
hs->token_start = hs->s + hs->pos;
|
|
164
|
+
hs->token_type = DATA_TEXT;
|
|
165
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
166
|
+
hs->pos = (size_t)(idx - hs->s) + 1;
|
|
167
|
+
hs->state = h5_state_tag_open;
|
|
168
|
+
if (hs->token_len == 0) {
|
|
169
|
+
return h5_state_tag_open(hs);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return 1;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* 12 2.4.8
|
|
177
|
+
*/
|
|
178
|
+
static int h5_state_tag_open(h5_state_t* hs)
|
|
179
|
+
{
|
|
180
|
+
char ch;
|
|
181
|
+
|
|
182
|
+
TRACE();
|
|
183
|
+
ch = hs->s[hs->pos];
|
|
184
|
+
if (ch == CHAR_BANG) {
|
|
185
|
+
hs->pos += 1;
|
|
186
|
+
return h5_state_markup_declaration_open(hs);
|
|
187
|
+
} else if (ch == CHAR_SLASH) {
|
|
188
|
+
hs->pos += 1;
|
|
189
|
+
hs->is_close = 1;
|
|
190
|
+
return h5_state_end_tag_open(hs);
|
|
191
|
+
} else if (ch == CHAR_QUESTION) {
|
|
192
|
+
hs->pos += 1;
|
|
193
|
+
return h5_state_bogus_comment(hs);
|
|
194
|
+
} else if (ch == CHAR_PERCENT) {
|
|
195
|
+
/* this is not in spec.. alternative comment format used
|
|
196
|
+
by IE <= 9 and Safari < 4.0.3 */
|
|
197
|
+
hs->pos += 1;
|
|
198
|
+
return h5_state_bogus_comment2(hs);
|
|
199
|
+
} else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
|
|
200
|
+
return h5_state_tag_name(hs);
|
|
201
|
+
} else if (ch == CHAR_NULL) {
|
|
202
|
+
/* IE-ism NULL characters are ignored */
|
|
203
|
+
return h5_state_tag_name(hs);
|
|
204
|
+
} else {
|
|
205
|
+
/* user input mistake in configuring state */
|
|
206
|
+
if (hs->pos == 0) {
|
|
207
|
+
return h5_state_data(hs);
|
|
208
|
+
}
|
|
209
|
+
hs->token_start = hs->s + hs->pos - 1;
|
|
210
|
+
hs->token_len = 1;
|
|
211
|
+
hs->token_type = DATA_TEXT;
|
|
212
|
+
hs->state = h5_state_data;
|
|
213
|
+
return 1;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* 12.2.4.9
|
|
218
|
+
*/
|
|
219
|
+
static int h5_state_end_tag_open(h5_state_t* hs)
|
|
220
|
+
{
|
|
221
|
+
char ch;
|
|
222
|
+
|
|
223
|
+
TRACE();
|
|
224
|
+
|
|
225
|
+
if (hs->pos >= hs->len) {
|
|
226
|
+
return 0;
|
|
227
|
+
}
|
|
228
|
+
ch = hs->s[hs->pos];
|
|
229
|
+
if (ch == CHAR_GT) {
|
|
230
|
+
return h5_state_data(hs);
|
|
231
|
+
} else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
|
|
232
|
+
return h5_state_tag_name(hs);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
hs->is_close = 0;
|
|
236
|
+
return h5_state_bogus_comment(hs);
|
|
237
|
+
}
|
|
238
|
+
/*
|
|
239
|
+
*
|
|
240
|
+
*/
|
|
241
|
+
static int h5_state_tag_name_close(h5_state_t* hs)
|
|
242
|
+
{
|
|
243
|
+
TRACE();
|
|
244
|
+
hs->is_close = 0;
|
|
245
|
+
hs->token_start = hs->s + hs->pos;
|
|
246
|
+
hs->token_len = 1;
|
|
247
|
+
hs->token_type = TAG_NAME_CLOSE;
|
|
248
|
+
hs->pos += 1;
|
|
249
|
+
if (hs->pos < hs->len) {
|
|
250
|
+
hs->state = h5_state_data;
|
|
251
|
+
} else {
|
|
252
|
+
hs->state = h5_state_eof;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return 1;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* 12.2.4.10
|
|
260
|
+
*/
|
|
261
|
+
static int h5_state_tag_name(h5_state_t* hs)
|
|
262
|
+
{
|
|
263
|
+
char ch;
|
|
264
|
+
size_t pos;
|
|
265
|
+
|
|
266
|
+
TRACE();
|
|
267
|
+
pos = hs->pos;
|
|
268
|
+
while (pos < hs->len) {
|
|
269
|
+
ch = hs->s[pos];
|
|
270
|
+
if (ch == 0) {
|
|
271
|
+
/* special non-standard case */
|
|
272
|
+
/* allow nulls in tag name */
|
|
273
|
+
/* some old browsers apparently allow and ignore them */
|
|
274
|
+
pos += 1;
|
|
275
|
+
} else if (h5_is_white(ch)) {
|
|
276
|
+
hs->token_start = hs->s + hs->pos;
|
|
277
|
+
hs->token_len = pos - hs->pos;
|
|
278
|
+
hs->token_type = TAG_NAME_OPEN;
|
|
279
|
+
hs->pos = pos + 1;
|
|
280
|
+
hs->state = h5_state_before_attribute_name;
|
|
281
|
+
return 1;
|
|
282
|
+
} else if (ch == CHAR_SLASH) {
|
|
283
|
+
hs->token_start = hs->s + hs->pos;
|
|
284
|
+
hs->token_len = pos - hs->pos;
|
|
285
|
+
hs->token_type = TAG_NAME_OPEN;
|
|
286
|
+
hs->pos = pos + 1;
|
|
287
|
+
hs->state = h5_state_self_closing_start_tag;
|
|
288
|
+
return 1;
|
|
289
|
+
} else if (ch == CHAR_GT) {
|
|
290
|
+
hs->token_start = hs->s + hs->pos;
|
|
291
|
+
hs->token_len = pos - hs->pos;
|
|
292
|
+
if (hs->is_close) {
|
|
293
|
+
hs->pos = pos + 1;
|
|
294
|
+
hs->is_close = 0;
|
|
295
|
+
hs->token_type = TAG_CLOSE;
|
|
296
|
+
hs->state = h5_state_data;
|
|
297
|
+
} else {
|
|
298
|
+
hs->pos = pos;
|
|
299
|
+
hs->token_type = TAG_NAME_OPEN;
|
|
300
|
+
hs->state = h5_state_tag_name_close;
|
|
301
|
+
}
|
|
302
|
+
return 1;
|
|
303
|
+
} else {
|
|
304
|
+
pos += 1;
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
hs->token_start = hs->s + hs->pos;
|
|
309
|
+
hs->token_len = hs->len - hs->pos;
|
|
310
|
+
hs->token_type = TAG_NAME_OPEN;
|
|
311
|
+
hs->state = h5_state_eof;
|
|
312
|
+
return 1;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* 12.2.4.34
|
|
317
|
+
*/
|
|
318
|
+
static int h5_state_before_attribute_name(h5_state_t* hs)
|
|
319
|
+
{
|
|
320
|
+
int ch;
|
|
321
|
+
|
|
322
|
+
TRACE();
|
|
323
|
+
ch = h5_skip_white(hs);
|
|
324
|
+
switch (ch) {
|
|
325
|
+
case CHAR_EOF: {
|
|
326
|
+
return 0;
|
|
327
|
+
}
|
|
328
|
+
case CHAR_SLASH: {
|
|
329
|
+
hs->pos += 1;
|
|
330
|
+
return h5_state_self_closing_start_tag(hs);
|
|
331
|
+
}
|
|
332
|
+
case CHAR_GT: {
|
|
333
|
+
hs->state = h5_state_data;
|
|
334
|
+
hs->token_start = hs->s + hs->pos;
|
|
335
|
+
hs->token_len = 1;
|
|
336
|
+
hs->token_type = TAG_NAME_CLOSE;
|
|
337
|
+
hs->pos += 1;
|
|
338
|
+
return 1;
|
|
339
|
+
}
|
|
340
|
+
default: {
|
|
341
|
+
return h5_state_attribute_name(hs);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
static int h5_state_attribute_name(h5_state_t* hs)
|
|
347
|
+
{
|
|
348
|
+
char ch;
|
|
349
|
+
size_t pos;
|
|
350
|
+
|
|
351
|
+
TRACE();
|
|
352
|
+
pos = hs->pos + 1;
|
|
353
|
+
while (pos < hs->len) {
|
|
354
|
+
ch = hs->s[pos];
|
|
355
|
+
if (h5_is_white(ch)) {
|
|
356
|
+
hs->token_start = hs->s + hs->pos;
|
|
357
|
+
hs->token_len = pos - hs->pos;
|
|
358
|
+
hs->token_type = ATTR_NAME;
|
|
359
|
+
hs->state = h5_state_after_attribute_name;
|
|
360
|
+
hs->pos = pos + 1;
|
|
361
|
+
return 1;
|
|
362
|
+
} else if (ch == CHAR_SLASH) {
|
|
363
|
+
hs->token_start = hs->s + hs->pos;
|
|
364
|
+
hs->token_len = pos - hs->pos;
|
|
365
|
+
hs->token_type = ATTR_NAME;
|
|
366
|
+
hs->state = h5_state_self_closing_start_tag;
|
|
367
|
+
hs->pos = pos + 1;
|
|
368
|
+
return 1;
|
|
369
|
+
} else if (ch == CHAR_EQUALS) {
|
|
370
|
+
hs->token_start = hs->s + hs->pos;
|
|
371
|
+
hs->token_len = pos - hs->pos;
|
|
372
|
+
hs->token_type = ATTR_NAME;
|
|
373
|
+
hs->state = h5_state_before_attribute_value;
|
|
374
|
+
hs->pos = pos + 1;
|
|
375
|
+
return 1;
|
|
376
|
+
} else if (ch == CHAR_GT) {
|
|
377
|
+
hs->token_start = hs->s + hs->pos;
|
|
378
|
+
hs->token_len = pos - hs->pos;
|
|
379
|
+
hs->token_type = ATTR_NAME;
|
|
380
|
+
hs->state = h5_state_tag_name_close;
|
|
381
|
+
hs->pos = pos;
|
|
382
|
+
return 1;
|
|
383
|
+
} else {
|
|
384
|
+
pos += 1;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
/* EOF */
|
|
388
|
+
hs->token_start = hs->s + hs->pos;
|
|
389
|
+
hs->token_len = hs->len - hs->pos;
|
|
390
|
+
hs->token_type = ATTR_NAME;
|
|
391
|
+
hs->state = h5_state_eof;
|
|
392
|
+
hs->pos = hs->len;
|
|
393
|
+
return 1;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
/**
|
|
397
|
+
* 12.2.4.36
|
|
398
|
+
*/
|
|
399
|
+
static int h5_state_after_attribute_name(h5_state_t* hs)
|
|
400
|
+
{
|
|
401
|
+
int c;
|
|
402
|
+
|
|
403
|
+
TRACE();
|
|
404
|
+
c = h5_skip_white(hs);
|
|
405
|
+
switch (c) {
|
|
406
|
+
case CHAR_EOF: {
|
|
407
|
+
return 0;
|
|
408
|
+
}
|
|
409
|
+
case CHAR_SLASH: {
|
|
410
|
+
hs->pos += 1;
|
|
411
|
+
return h5_state_self_closing_start_tag(hs);
|
|
412
|
+
}
|
|
413
|
+
case CHAR_EQUALS: {
|
|
414
|
+
hs->pos += 1;
|
|
415
|
+
return h5_state_before_attribute_value(hs);
|
|
416
|
+
}
|
|
417
|
+
case CHAR_GT: {
|
|
418
|
+
return h5_state_tag_name_close(hs);
|
|
419
|
+
}
|
|
420
|
+
default: {
|
|
421
|
+
return h5_state_attribute_name(hs);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* 12.2.4.37
|
|
428
|
+
*/
|
|
429
|
+
static int h5_state_before_attribute_value(h5_state_t* hs)
|
|
430
|
+
{
|
|
431
|
+
int c;
|
|
432
|
+
TRACE();
|
|
433
|
+
|
|
434
|
+
c = h5_skip_white(hs);
|
|
435
|
+
|
|
436
|
+
if (c == CHAR_EOF) {
|
|
437
|
+
hs->state = h5_state_eof;
|
|
438
|
+
return 0;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
if (c == CHAR_DOUBLE) {
|
|
442
|
+
return h5_state_attribute_value_double_quote(hs);
|
|
443
|
+
} else if (c == CHAR_SINGLE) {
|
|
444
|
+
return h5_state_attribute_value_single_quote(hs);
|
|
445
|
+
} else if (c == CHAR_TICK) {
|
|
446
|
+
/* NON STANDARD IE */
|
|
447
|
+
return h5_state_attribute_value_back_quote(hs);
|
|
448
|
+
} else {
|
|
449
|
+
return h5_state_attribute_value_no_quote(hs);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
static int h5_state_attribute_value_quote(h5_state_t* hs, char qchar)
|
|
455
|
+
{
|
|
456
|
+
const char* idx;
|
|
457
|
+
|
|
458
|
+
TRACE();
|
|
459
|
+
|
|
460
|
+
/* skip initial quote in normal case.
|
|
461
|
+
* dont do this is pos == 0 since it means we have started
|
|
462
|
+
* in a non-data state. given an input of '><foo
|
|
463
|
+
* we want to make 0-length attribute name
|
|
464
|
+
*/
|
|
465
|
+
if (hs->pos > 0) {
|
|
466
|
+
hs->pos += 1;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
idx = (const char*) memchr(hs->s + hs->pos, qchar, hs->len - hs->pos);
|
|
471
|
+
if (idx == NULL) {
|
|
472
|
+
hs->token_start = hs->s + hs->pos;
|
|
473
|
+
hs->token_len = hs->len - hs->pos;
|
|
474
|
+
hs->token_type = ATTR_VALUE;
|
|
475
|
+
hs->state = h5_state_eof;
|
|
476
|
+
} else {
|
|
477
|
+
hs->token_start = hs->s + hs->pos;
|
|
478
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
479
|
+
hs->token_type = ATTR_VALUE;
|
|
480
|
+
hs->state = h5_state_after_attribute_value_quoted_state;
|
|
481
|
+
hs->pos += hs->token_len + 1;
|
|
482
|
+
}
|
|
483
|
+
return 1;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
static
|
|
487
|
+
int h5_state_attribute_value_double_quote(h5_state_t* hs)
|
|
488
|
+
{
|
|
489
|
+
TRACE();
|
|
490
|
+
return h5_state_attribute_value_quote(hs, CHAR_DOUBLE);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
static
|
|
494
|
+
int h5_state_attribute_value_single_quote(h5_state_t* hs)
|
|
495
|
+
{
|
|
496
|
+
TRACE();
|
|
497
|
+
return h5_state_attribute_value_quote(hs, CHAR_SINGLE);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
static
|
|
501
|
+
int h5_state_attribute_value_back_quote(h5_state_t* hs)
|
|
502
|
+
{
|
|
503
|
+
TRACE();
|
|
504
|
+
return h5_state_attribute_value_quote(hs, CHAR_TICK);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
static int h5_state_attribute_value_no_quote(h5_state_t* hs)
|
|
508
|
+
{
|
|
509
|
+
char ch;
|
|
510
|
+
size_t pos;
|
|
511
|
+
|
|
512
|
+
TRACE();
|
|
513
|
+
pos = hs->pos;
|
|
514
|
+
while (pos < hs->len) {
|
|
515
|
+
ch = hs->s[pos];
|
|
516
|
+
if (h5_is_white(ch)) {
|
|
517
|
+
hs->token_type = ATTR_VALUE;
|
|
518
|
+
hs->token_start = hs->s + hs->pos;
|
|
519
|
+
hs->token_len = pos - hs->pos;
|
|
520
|
+
hs->pos = pos + 1;
|
|
521
|
+
hs->state = h5_state_before_attribute_name;
|
|
522
|
+
return 1;
|
|
523
|
+
} else if (ch == CHAR_GT) {
|
|
524
|
+
hs->token_type = ATTR_VALUE;
|
|
525
|
+
hs->token_start = hs->s + hs->pos;
|
|
526
|
+
hs->token_len = pos - hs->pos;
|
|
527
|
+
hs->pos = pos;
|
|
528
|
+
hs->state = h5_state_tag_name_close;
|
|
529
|
+
return 1;
|
|
530
|
+
}
|
|
531
|
+
pos += 1;
|
|
532
|
+
}
|
|
533
|
+
TRACE();
|
|
534
|
+
/* EOF */
|
|
535
|
+
hs->state = h5_state_eof;
|
|
536
|
+
hs->token_start = hs->s + hs->pos;
|
|
537
|
+
hs->token_len = hs->len - hs->pos;
|
|
538
|
+
hs->token_type = ATTR_VALUE;
|
|
539
|
+
return 1;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* 12.2.4.41
|
|
544
|
+
*/
|
|
545
|
+
static int h5_state_after_attribute_value_quoted_state(h5_state_t* hs)
|
|
546
|
+
{
|
|
547
|
+
char ch;
|
|
548
|
+
|
|
549
|
+
TRACE();
|
|
550
|
+
if (hs->pos >= hs->len) {
|
|
551
|
+
return 0;
|
|
552
|
+
}
|
|
553
|
+
ch = hs->s[hs->pos];
|
|
554
|
+
if (h5_is_white(ch)) {
|
|
555
|
+
hs->pos += 1;
|
|
556
|
+
return h5_state_before_attribute_name(hs);
|
|
557
|
+
} else if (ch == CHAR_SLASH) {
|
|
558
|
+
hs->pos += 1;
|
|
559
|
+
return h5_state_self_closing_start_tag(hs);
|
|
560
|
+
} else if (ch == CHAR_GT) {
|
|
561
|
+
hs->token_start = hs->s + hs->pos;
|
|
562
|
+
hs->token_len = 1;
|
|
563
|
+
hs->token_type = TAG_NAME_CLOSE;
|
|
564
|
+
hs->pos += 1;
|
|
565
|
+
hs->state = h5_state_data;
|
|
566
|
+
return 1;
|
|
567
|
+
} else {
|
|
568
|
+
return h5_state_before_attribute_name(hs);
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/**
|
|
573
|
+
* 12.2.4.43
|
|
574
|
+
*/
|
|
575
|
+
static int h5_state_self_closing_start_tag(h5_state_t* hs)
|
|
576
|
+
{
|
|
577
|
+
char ch;
|
|
578
|
+
|
|
579
|
+
TRACE();
|
|
580
|
+
if (hs->pos >= hs->len) {
|
|
581
|
+
return 0;
|
|
582
|
+
}
|
|
583
|
+
ch = hs->s[hs->pos];
|
|
584
|
+
if (ch == CHAR_GT) {
|
|
585
|
+
assert(hs->pos > 0);
|
|
586
|
+
hs->token_start = hs->s + hs->pos -1;
|
|
587
|
+
hs->token_len = 2;
|
|
588
|
+
hs->token_type = TAG_NAME_SELFCLOSE;
|
|
589
|
+
hs->state = h5_state_data;
|
|
590
|
+
hs->pos += 1;
|
|
591
|
+
return 1;
|
|
592
|
+
} else {
|
|
593
|
+
return h5_state_before_attribute_name(hs);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* 12.2.4.44
|
|
599
|
+
*/
|
|
600
|
+
static int h5_state_bogus_comment(h5_state_t* hs)
|
|
601
|
+
{
|
|
602
|
+
const char* idx;
|
|
603
|
+
|
|
604
|
+
TRACE();
|
|
605
|
+
idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
|
|
606
|
+
if (idx == NULL) {
|
|
607
|
+
hs->token_start = hs->s + hs->pos;
|
|
608
|
+
hs->token_len = hs->len - hs->pos;
|
|
609
|
+
hs->pos = hs->len;
|
|
610
|
+
hs->state = h5_state_eof;
|
|
611
|
+
} else {
|
|
612
|
+
hs->token_start = hs->s + hs->pos;
|
|
613
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
614
|
+
hs->pos = (size_t)(idx - hs->s) + 1;
|
|
615
|
+
hs->state = h5_state_data;
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
hs->token_type = TAG_COMMENT;
|
|
619
|
+
return 1;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* 12.2.4.44 ALT
|
|
624
|
+
*/
|
|
625
|
+
static int h5_state_bogus_comment2(h5_state_t* hs)
|
|
626
|
+
{
|
|
627
|
+
const char* idx;
|
|
628
|
+
size_t pos;
|
|
629
|
+
|
|
630
|
+
TRACE();
|
|
631
|
+
pos = hs->pos;
|
|
632
|
+
while (1) {
|
|
633
|
+
idx = (const char*) memchr(hs->s + pos, CHAR_PERCENT, hs->len - pos);
|
|
634
|
+
if (idx == NULL || (idx + 1 >= hs->s + hs->len)) {
|
|
635
|
+
hs->token_start = hs->s + hs->pos;
|
|
636
|
+
hs->token_len = hs->len - hs->pos;
|
|
637
|
+
hs->pos = hs->len;
|
|
638
|
+
hs->token_type = TAG_COMMENT;
|
|
639
|
+
hs->state = h5_state_eof;
|
|
640
|
+
return 1;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
if (*(idx +1) != CHAR_GT) {
|
|
644
|
+
pos = (size_t)(idx - hs->s) + 1;
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
/* ends in %> */
|
|
649
|
+
hs->token_start = hs->s + hs->pos;
|
|
650
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
651
|
+
hs->pos = (size_t)(idx - hs->s) + 2;
|
|
652
|
+
hs->state = h5_state_data;
|
|
653
|
+
hs->token_type = TAG_COMMENT;
|
|
654
|
+
return 1;
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
/**
|
|
659
|
+
* 8.2.4.45
|
|
660
|
+
*/
|
|
661
|
+
static int h5_state_markup_declaration_open(h5_state_t* hs)
|
|
662
|
+
{
|
|
663
|
+
size_t remaining;
|
|
664
|
+
|
|
665
|
+
TRACE();
|
|
666
|
+
remaining = hs->len - hs->pos;
|
|
667
|
+
if (remaining >= 7 &&
|
|
668
|
+
/* case insensitive */
|
|
669
|
+
(hs->s[hs->pos + 0] == 'D' || hs->s[hs->pos + 0] == 'd') &&
|
|
670
|
+
(hs->s[hs->pos + 1] == 'O' || hs->s[hs->pos + 1] == 'o') &&
|
|
671
|
+
(hs->s[hs->pos + 2] == 'C' || hs->s[hs->pos + 2] == 'c') &&
|
|
672
|
+
(hs->s[hs->pos + 3] == 'T' || hs->s[hs->pos + 3] == 't') &&
|
|
673
|
+
(hs->s[hs->pos + 4] == 'Y' || hs->s[hs->pos + 4] == 'y') &&
|
|
674
|
+
(hs->s[hs->pos + 5] == 'P' || hs->s[hs->pos + 5] == 'p') &&
|
|
675
|
+
(hs->s[hs->pos + 6] == 'E' || hs->s[hs->pos + 6] == 'e')
|
|
676
|
+
) {
|
|
677
|
+
return h5_state_doctype(hs);
|
|
678
|
+
} else if (remaining >= 7 &&
|
|
679
|
+
/* upper case required */
|
|
680
|
+
hs->s[hs->pos + 0] == '[' &&
|
|
681
|
+
hs->s[hs->pos + 1] == 'C' &&
|
|
682
|
+
hs->s[hs->pos + 2] == 'D' &&
|
|
683
|
+
hs->s[hs->pos + 3] == 'A' &&
|
|
684
|
+
hs->s[hs->pos + 4] == 'T' &&
|
|
685
|
+
hs->s[hs->pos + 5] == 'A' &&
|
|
686
|
+
hs->s[hs->pos + 6] == '['
|
|
687
|
+
) {
|
|
688
|
+
hs->pos += 7;
|
|
689
|
+
return h5_state_cdata(hs);
|
|
690
|
+
} else if (remaining >= 2 &&
|
|
691
|
+
hs->s[hs->pos + 0] == '-' &&
|
|
692
|
+
hs->s[hs->pos + 1] == '-') {
|
|
693
|
+
hs->pos += 2;
|
|
694
|
+
return h5_state_comment(hs);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
return h5_state_bogus_comment(hs);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* 12.2.4.48
|
|
702
|
+
* 12.2.4.49
|
|
703
|
+
* 12.2.4.50
|
|
704
|
+
* 12.2.4.51
|
|
705
|
+
* state machine spec is confusing since it can only look
|
|
706
|
+
* at one character at a time but simply it's comments end by:
|
|
707
|
+
* 1) EOF
|
|
708
|
+
* 2) ending in -->
|
|
709
|
+
* 3) ending in -!>
|
|
710
|
+
*/
|
|
711
|
+
static int h5_state_comment(h5_state_t* hs)
|
|
712
|
+
{
|
|
713
|
+
char ch;
|
|
714
|
+
const char* idx;
|
|
715
|
+
size_t pos;
|
|
716
|
+
size_t offset;
|
|
717
|
+
const char* end = hs->s + hs->len;
|
|
718
|
+
|
|
719
|
+
TRACE();
|
|
720
|
+
pos = hs->pos;
|
|
721
|
+
while (1) {
|
|
722
|
+
|
|
723
|
+
idx = (const char*) memchr(hs->s + pos, CHAR_DASH, hs->len - pos);
|
|
724
|
+
|
|
725
|
+
/* did not find anything or has less than 3 chars left */
|
|
726
|
+
if (idx == NULL || idx > hs->s + hs->len - 3) {
|
|
727
|
+
hs->state = h5_state_eof;
|
|
728
|
+
hs->token_start = hs->s + hs->pos;
|
|
729
|
+
hs->token_len = hs->len - hs->pos;
|
|
730
|
+
hs->token_type = TAG_COMMENT;
|
|
731
|
+
return 1;
|
|
732
|
+
}
|
|
733
|
+
offset = 1;
|
|
734
|
+
|
|
735
|
+
/* skip all nulls */
|
|
736
|
+
while (idx + offset < end && *(idx + offset) == 0) {
|
|
737
|
+
offset += 1;
|
|
738
|
+
}
|
|
739
|
+
if (idx + offset == end) {
|
|
740
|
+
hs->state = h5_state_eof;
|
|
741
|
+
hs->token_start = hs->s + hs->pos;
|
|
742
|
+
hs->token_len = hs->len - hs->pos;
|
|
743
|
+
hs->token_type = TAG_COMMENT;
|
|
744
|
+
return 1;
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
ch = *(idx + offset);
|
|
748
|
+
if (ch != CHAR_DASH && ch != CHAR_BANG) {
|
|
749
|
+
pos = (size_t)(idx - hs->s) + 1;
|
|
750
|
+
continue;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
/* need to test */
|
|
754
|
+
#if 0
|
|
755
|
+
/* skip all nulls */
|
|
756
|
+
while (idx + offset < end && *(idx + offset) == 0) {
|
|
757
|
+
offset += 1;
|
|
758
|
+
}
|
|
759
|
+
if (idx + offset == end) {
|
|
760
|
+
hs->state = h5_state_eof;
|
|
761
|
+
hs->token_start = hs->s + hs->pos;
|
|
762
|
+
hs->token_len = hs->len - hs->pos;
|
|
763
|
+
hs->token_type = TAG_COMMENT;
|
|
764
|
+
return 1;
|
|
765
|
+
}
|
|
766
|
+
#endif
|
|
767
|
+
|
|
768
|
+
offset += 1;
|
|
769
|
+
if (idx + offset == end) {
|
|
770
|
+
hs->state = h5_state_eof;
|
|
771
|
+
hs->token_start = hs->s + hs->pos;
|
|
772
|
+
hs->token_len = hs->len - hs->pos;
|
|
773
|
+
hs->token_type = TAG_COMMENT;
|
|
774
|
+
return 1;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
ch = *(idx + offset);
|
|
779
|
+
if (ch != CHAR_GT) {
|
|
780
|
+
pos = (size_t)(idx - hs->s) + 1;
|
|
781
|
+
continue;
|
|
782
|
+
}
|
|
783
|
+
offset += 1;
|
|
784
|
+
|
|
785
|
+
/* ends in --> or -!> */
|
|
786
|
+
hs->token_start = hs->s + hs->pos;
|
|
787
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
788
|
+
hs->pos = (size_t)(idx + offset - hs->s);
|
|
789
|
+
hs->state = h5_state_data;
|
|
790
|
+
hs->token_type = TAG_COMMENT;
|
|
791
|
+
return 1;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
static int h5_state_cdata(h5_state_t* hs)
|
|
796
|
+
{
|
|
797
|
+
const char* idx;
|
|
798
|
+
size_t pos;
|
|
799
|
+
|
|
800
|
+
TRACE();
|
|
801
|
+
pos = hs->pos;
|
|
802
|
+
while (1) {
|
|
803
|
+
idx = (const char*) memchr(hs->s + pos, CHAR_RIGHTB, hs->len - pos);
|
|
804
|
+
|
|
805
|
+
/* did not find anything or has less than 3 chars left */
|
|
806
|
+
if (idx == NULL || idx > hs->s + hs->len - 3) {
|
|
807
|
+
hs->state = h5_state_eof;
|
|
808
|
+
hs->token_start = hs->s + hs->pos;
|
|
809
|
+
hs->token_len = hs->len - hs->pos;
|
|
810
|
+
hs->token_type = DATA_TEXT;
|
|
811
|
+
return 1;
|
|
812
|
+
} else if ( *(idx+1) == CHAR_RIGHTB && *(idx+2) == CHAR_GT) {
|
|
813
|
+
hs->state = h5_state_data;
|
|
814
|
+
hs->token_start = hs->s + hs->pos;
|
|
815
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
816
|
+
hs->pos = (size_t)(idx - hs->s) + 3;
|
|
817
|
+
hs->token_type = DATA_TEXT;
|
|
818
|
+
return 1;
|
|
819
|
+
} else {
|
|
820
|
+
pos = (size_t)(idx - hs->s) + 1;
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
/**
|
|
826
|
+
* 8.2.4.52
|
|
827
|
+
* http://www.w3.org/html/wg/drafts/html/master/syntax.html#doctype-state
|
|
828
|
+
*/
|
|
829
|
+
static int h5_state_doctype(h5_state_t* hs)
|
|
830
|
+
{
|
|
831
|
+
const char* idx;
|
|
832
|
+
|
|
833
|
+
TRACE();
|
|
834
|
+
hs->token_start = hs->s + hs->pos;
|
|
835
|
+
hs->token_type = DOCTYPE;
|
|
836
|
+
|
|
837
|
+
idx = (const char*) memchr(hs->s + hs->pos, CHAR_GT, hs->len - hs->pos);
|
|
838
|
+
if (idx == NULL) {
|
|
839
|
+
hs->state = h5_state_eof;
|
|
840
|
+
hs->token_len = hs->len - hs->pos;
|
|
841
|
+
} else {
|
|
842
|
+
hs->state = h5_state_data;
|
|
843
|
+
hs->token_len = (size_t)(idx - hs->s) - hs->pos;
|
|
844
|
+
hs->pos = (size_t)(idx - hs->s) + 1;
|
|
845
|
+
}
|
|
846
|
+
return 1;
|
|
847
|
+
}
|