immunio 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/immunio/agent.rb +9 -9
- data/lib/immunio/authentication.rb +1 -1
- data/lib/immunio/channel.rb +15 -15
- data/lib/immunio/plugins/active_record.rb +3 -3
- data/lib/immunio/plugins/authlogic.rb +3 -3
- data/lib/immunio/plugins/csrf.rb +1 -1
- data/lib/immunio/plugins/devise.rb +1 -1
- data/lib/immunio/plugins/eval.rb +1 -1
- data/lib/immunio/plugins/http_finisher.rb +2 -2
- data/lib/immunio/plugins/http_tracker.rb +1 -1
- data/lib/immunio/plugins/io.rb +7 -7
- data/lib/immunio/plugins/redirect.rb +2 -2
- data/lib/immunio/plugins/warden.rb +5 -6
- data/lib/immunio/processor.rb +7 -7
- data/lib/immunio/request.rb +3 -3
- data/lib/immunio/version.rb +1 -1
- data/lib/immunio/vm.rb +6 -6
- data/lua-hooks/Makefile +49 -6
- data/lua-hooks/lib/boot.lua +49 -277
- metadata +2 -11
- data/lua-hooks/lib/encode.lua +0 -4
- data/lua-hooks/lib/lexers/LICENSE +0 -21
- data/lua-hooks/lib/lexers/bash.lua +0 -134
- data/lua-hooks/lib/lexers/bash_dqstr.lua +0 -59
- data/lua-hooks/lib/lexers/css.lua +0 -101
- data/lua-hooks/lib/lexers/css_attr.lua +0 -13
- data/lua-hooks/lib/lexers/html.lua +0 -113
- data/lua-hooks/lib/lexers/javascript.lua +0 -68
- data/lua-hooks/lib/lexers/lexer.lua +0 -1575
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immunio
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Immunio
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10
|
11
|
+
date: 2015-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -417,15 +417,6 @@ files:
|
|
417
417
|
- lua-hooks/ext/sha1/luasha1.c
|
418
418
|
- lua-hooks/ext/sha1/sha1.c
|
419
419
|
- lua-hooks/lib/boot.lua
|
420
|
-
- lua-hooks/lib/encode.lua
|
421
|
-
- lua-hooks/lib/lexers/LICENSE
|
422
|
-
- lua-hooks/lib/lexers/bash.lua
|
423
|
-
- lua-hooks/lib/lexers/bash_dqstr.lua
|
424
|
-
- lua-hooks/lib/lexers/css.lua
|
425
|
-
- lua-hooks/lib/lexers/css_attr.lua
|
426
|
-
- lua-hooks/lib/lexers/html.lua
|
427
|
-
- lua-hooks/lib/lexers/javascript.lua
|
428
|
-
- lua-hooks/lib/lexers/lexer.lua
|
429
420
|
homepage: http://immun.io/
|
430
421
|
licenses:
|
431
422
|
- Immunio
|
data/lua-hooks/lib/encode.lua
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
The MIT License
|
2
|
-
|
3
|
-
Copyright (c) 2007-2015 Mitchell
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|
@@ -1,134 +0,0 @@
|
|
1
|
-
-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com.
|
2
|
-
-- Copyright 2015 Immunio, Inc.
|
3
|
-
|
4
|
-
-- Shell LPeg lexer.
|
5
|
-
|
6
|
-
-- This is based on the lexer from the Scintillua package, with a ot of extension
|
7
|
-
-- The goal isn't a complete parser for bash, but a lexer that can extract a useful
|
8
|
-
-- amount of structure to detect tampering. The emphasis is more on common injection
|
9
|
-
-- techniques and lexical structure than actually extracting properly formed bash
|
10
|
-
-- statements. Down the road we may need to go as far as to parse statements, and that
|
11
|
-
-- should be possible at the cost of a lot more complexity.
|
12
|
-
|
13
|
-
local l = require('lexer')
|
14
|
-
local token = l.token
|
15
|
-
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
16
|
-
|
17
|
-
local M = {_NAME = 'bash'}
|
18
|
-
|
19
|
-
-- Whitespace.
|
20
|
-
local ws = token(l.WHITESPACE, l.space^1)
|
21
|
-
|
22
|
-
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ' + '.')^0
|
23
|
-
|
24
|
-
|
25
|
-
-- Comments.
|
26
|
-
local comment = token(l.COMMENT, '#' * l.nonnewline^0)
|
27
|
-
|
28
|
-
-- Strings.
|
29
|
-
local sq_str = token('sq_str', l.delimited_range("'", false, true))
|
30
|
-
local dq_str = token('dq_str', l.delimited_range('"'))
|
31
|
-
local ex_str = token('ex_str', l.delimited_range('`'))
|
32
|
-
local heredoc = token('heredoc', '<<' * P(function(input, index)
|
33
|
-
local s, e, _, delimiter =
|
34
|
-
input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
|
35
|
-
if s == index and delimiter then
|
36
|
-
local _, e = input:find('[\n\r\f]+'..delimiter, e)
|
37
|
-
return e and e + 1 or #input + 1
|
38
|
-
end
|
39
|
-
end))
|
40
|
-
local bash_string = sq_str + dq_str + ex_str + heredoc
|
41
|
-
|
42
|
-
-- Numbers.
|
43
|
-
local number = token(l.NUMBER, l.float + l.integer)
|
44
|
-
|
45
|
-
-- Keywords.
|
46
|
-
local keyword = token(l.KEYWORD, l.word_match({
|
47
|
-
'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
|
48
|
-
'do', 'done', 'continue', 'local', 'return', 'select',
|
49
|
-
-- Operators. These could be split into individual tokens...
|
50
|
-
'-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
|
51
|
-
'-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
|
52
|
-
'-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
|
53
|
-
}, '-'))
|
54
|
-
|
55
|
-
-- Common commands ... this is not exhaustive nor does it need to be.
|
56
|
-
local command = token("command", l.word_match({
|
57
|
-
'awk', 'cat', 'cmp', 'cp', 'curl', 'cut', 'date', 'find', 'grep', 'gunzip', 'gvim',
|
58
|
-
'gzip', 'kill', 'lua', 'make', 'mkdir', 'mv', 'php', 'pkill', 'python', 'rm',
|
59
|
-
'rmdir', 'rsync', 'ruby', 'scp', 'sed', 'sleep', 'ssh', 'sudo', 'tar', 'unlink',
|
60
|
-
'wget', 'zip'
|
61
|
-
}, '-'))
|
62
|
-
|
63
|
-
-- Builtins
|
64
|
-
local builtin = token("builtin", l.word_match({
|
65
|
-
'alias', 'bind', 'builtin', 'caller', 'command', 'declare', 'echo', 'enable',
|
66
|
-
'help', 'let', 'local', 'logout', 'mapfile', 'printf', 'read', 'readarray',
|
67
|
-
'source', 'type', 'typeset', 'ulimit', 'unalias',
|
68
|
-
}, '-'))
|
69
|
-
|
70
|
-
-- Filenames. This is a bit sloppy, but tries to discern filenames from other identifiers
|
71
|
-
-- Very much a case of R&D 'suck it and see'
|
72
|
-
local filename = token("filename", P('/')^0 * (bash_word + '.') * (
|
73
|
-
'/' + bash_word + '.' )^0 * ('.' * bash_word )^0 )
|
74
|
-
|
75
|
-
local ip = (l.integer * P('.') * l.integer * P('.') * l.integer * P('.') * l.integer)
|
76
|
-
|
77
|
-
local protocol = ((P('https') + 'http' + 'ftp' + 'irc') * '://') + 'mailto:'
|
78
|
-
local remainder = ((1-S'\r\n\f\t\v ,."}])') + (S',."}])' * (1-S'\r\n\f\t\v ')))^0
|
79
|
-
local url = protocol * remainder
|
80
|
-
|
81
|
-
-- Identifiers.
|
82
|
-
local identifier = token(l.IDENTIFIER, url + ip + bash_word)
|
83
|
-
|
84
|
-
-- Variables.
|
85
|
-
local ex_variable = token("ex_variable",
|
86
|
-
'$' * l.delimited_range('()', true, true))
|
87
|
-
|
88
|
-
local variable = token(l.VARIABLE,
|
89
|
-
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
90
|
-
l.delimited_range('{}', true, true)))
|
91
|
-
|
92
|
-
local var = ex_variable + variable
|
93
|
-
|
94
|
-
-- Operators. These could be split into individual tokens...
|
95
|
-
local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
|
96
|
-
|
97
|
-
M._rules = {
|
98
|
-
{'whitespace', ws},
|
99
|
-
{'keyword', keyword},
|
100
|
-
{'builtin', builtin},
|
101
|
-
{'command', command},
|
102
|
-
{'identifier', identifier},
|
103
|
-
{'filename', filename},
|
104
|
-
{'string', bash_string},
|
105
|
-
{'comment', comment},
|
106
|
-
{'number', number},
|
107
|
-
{'variable', var},
|
108
|
-
{'operator', operator},
|
109
|
-
}
|
110
|
-
|
111
|
-
-- This is the main function for lexing bash data. It recurses and uses
|
112
|
-
-- the dqstr sub-lexer instance provided (we don't instantiate it directly
|
113
|
-
-- to allow the caller to cache the instance and avoid recompiling the grammar)
|
114
|
-
function M.lex_recursive( self, str, bash_dqstr_lexer )
|
115
|
-
local tokens = self:lex(str)
|
116
|
-
for i = 1, #tokens do
|
117
|
-
if tokens[i]['token'] == "ex_str" then
|
118
|
-
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 2, -2), bash_dqstr_lexer)
|
119
|
-
elseif tokens[i]['token'] == "ex_variable" then
|
120
|
-
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 3, -2), bash_dqstr_lexer)
|
121
|
-
elseif tokens[i]['token'] == "dq_str" then
|
122
|
-
tokens[i]['val'] =
|
123
|
-
bash_dqstr_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
124
|
-
elseif tokens[i]['token'] == "heredoc" then
|
125
|
-
tokens[i]['val'] =
|
126
|
-
bash_dqstr_lexer:lex_recursive(tokens[i]['val'], self)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
return tokens
|
130
|
-
end
|
131
|
-
|
132
|
-
return M
|
133
|
-
|
134
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
-- Copyright (C) 2015 Immunio, Inc.
|
2
|
-
|
3
|
-
-- Lexer for bash magic double quotes
|
4
|
-
|
5
|
-
-- NOTE: not covered by Scintillua MIT license in this directory.
|
6
|
-
|
7
|
-
-- While our lexer has the ability to embed this sort of thing as a child of another lexer
|
8
|
-
-- I didn't bother here due to the recursion; we need to lex the parent (bash) language
|
9
|
-
-- for some tokens which would be very complex at best. It's cleaner to use two lexers
|
10
|
-
-- and handle the recursion in higher level lua at a minute performance cost.
|
11
|
-
|
12
|
-
local l = require('lexer')
|
13
|
-
local token = l.token
|
14
|
-
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
15
|
-
|
16
|
-
local M = {_NAME = 'bash_dqstr'}
|
17
|
-
|
18
|
-
-- Generic token.
|
19
|
-
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ')^0
|
20
|
-
|
21
|
-
-- Strings.
|
22
|
-
-- Shell substitution.
|
23
|
-
local ex_str = token('ex_str', l.delimited_range('`'))
|
24
|
-
|
25
|
-
-- Other string data
|
26
|
-
local bash_string = token('str_data', (l.any - '$' - '`')^1)
|
27
|
-
|
28
|
-
-- Variables.
|
29
|
-
-- Shell Substitution.
|
30
|
-
local ex_variable = token("ex_variable",
|
31
|
-
'$' * l.delimited_range('()', true, true))
|
32
|
-
-- Other variables
|
33
|
-
local variable = token(l.VARIABLE,
|
34
|
-
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
35
|
-
l.delimited_range('{}', true, true)))
|
36
|
-
|
37
|
-
local var = ex_variable + variable
|
38
|
-
|
39
|
-
M._rules = {
|
40
|
-
{'variable', var},
|
41
|
-
{'ex_str', ex_str},
|
42
|
-
{'string', bash_string},
|
43
|
-
}
|
44
|
-
|
45
|
-
function M.lex_recursive( self, str, bash_lexer )
|
46
|
-
local tokens = self:lex(str)
|
47
|
-
for i = 1, #tokens do
|
48
|
-
if tokens[i]['token'] == "ex_str" then
|
49
|
-
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
50
|
-
elseif tokens[i]['token'] == "ex_variable" then
|
51
|
-
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 3, -2), self)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
return tokens
|
55
|
-
end
|
56
|
-
|
57
|
-
return M
|
58
|
-
|
59
|
-
|
@@ -1,101 +0,0 @@
|
|
1
|
-
-- Copyright 2006-2010 Mitchell Foral mitchell<att>caladbolg.net. See LICENSE.
|
2
|
-
-- CSS LPeg lexer
|
3
|
-
local M = {_NAME = 'css'}
|
4
|
-
|
5
|
-
local l = require('lexer')
|
6
|
-
local token, parent_token, word_match, delimited_range =
|
7
|
-
l.token, l.parent_token, l.word_match, l.delimited_range
|
8
|
-
|
9
|
-
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
|
10
|
-
|
11
|
-
local ws = token('whitespace', l.space^1)
|
12
|
-
|
13
|
-
-- comments
|
14
|
-
local comment = token('comment', '/*' * (l.any - '*/')^0 * P('*/')^-1)
|
15
|
-
|
16
|
-
local word_char = l.alnum + S('_-')
|
17
|
-
local identifier = (l.alpha + '-')^1 * word_char^0
|
18
|
-
|
19
|
-
-- strings
|
20
|
-
local sq_str = delimited_range("'", '\\', true)
|
21
|
-
local dq_str = delimited_range('"', '\\', true)
|
22
|
-
local string = token('string', sq_str + dq_str)
|
23
|
-
|
24
|
-
local colon = token('operator', ':')
|
25
|
-
local semicolon = token('operator', ';')
|
26
|
-
local comma = token('operator', ',')
|
27
|
-
local obrace = token('operator', '{')
|
28
|
-
local cbrace = token('operator', '}')
|
29
|
-
local bang = token('operator', '!')
|
30
|
-
|
31
|
-
-- selectors
|
32
|
-
local attribute = '[' * word_char^1 * (S('|~')^-1 * '=' * (identifier + sq_str + dq_str))^-1 * ']'
|
33
|
-
local class_id_selector = identifier^-1 * S('.#') * identifier
|
34
|
-
local pseudoclass = word_match({
|
35
|
-
'first-letter', 'first-line', 'link', 'active', 'visited',
|
36
|
-
'first-child', 'focus', 'hover', 'lang', 'before', 'after',
|
37
|
-
'left', 'right', 'first'
|
38
|
-
}, '-', true)
|
39
|
-
local selector = P('*') * ws + (class_id_selector + identifier + '*') * attribute^-1
|
40
|
-
selector = token('selector', selector * (ws * selector)^0) *
|
41
|
-
(token('selector', ':' * pseudoclass) + token('default_selector', ':' * word_char^1))^-1
|
42
|
-
selector = selector * (ws^0 * (comma + token('selector', S('>+*'))) * ws^0 * selector)^0
|
43
|
-
|
44
|
-
-- css properties and values
|
45
|
-
local property_name = token('property_name', word_char^1)
|
46
|
-
local value = token('value', bang^0 * word_char^1)
|
47
|
-
|
48
|
-
-- colors, units, numbers, and urls
|
49
|
-
local hexcolor = token('color', '#' * l.xdigit * l.xdigit * l.xdigit * (l.xdigit * l.xdigit * l.xdigit)^-1)
|
50
|
-
local rgbunit = (l.digit^1 * P('%')^-1)
|
51
|
-
local rgbcolor = token('color', word_match({'rgb'}, nil, true) * '(' * rgbunit * ',' * rgbunit * ',' * rgbunit * ')')
|
52
|
-
local color = hexcolor + rgbcolor
|
53
|
-
local unit = word_match({
|
54
|
-
'pt', 'mm', 'cm', 'pc', 'in', 'px', 'em', 'ex', 'deg',
|
55
|
-
'rad', 'grad', 'ms', 's', 'Hz', 'kHz'
|
56
|
-
}, nil, true)
|
57
|
-
unit = token('unit', unit + '%')
|
58
|
-
local css_float = l.digit^0 * '.' * l.digit^1 + l.digit^1 * '.' * l.digit^0 + l.digit^1
|
59
|
-
local number = token('number', S('+-')^-1 * css_float) * unit^-1
|
60
|
-
local func = parent_token('function', token('function_name', identifier) * token('function_param', delimited_range('()', true, false, true)))
|
61
|
-
-- declaration block
|
62
|
-
local block_default_char = token('default_block_char', (l.any - '}')^1)
|
63
|
-
local property_value = parent_token('property_value', string + number + color + func + value)
|
64
|
-
local property_values = { property_value * (ws * property_value)^0 * (ws^0 * comma * ws^0 * V(1))^0 }
|
65
|
-
local declaration_value = colon * ws^0 * property_values * ws^0 * semicolon^0
|
66
|
-
local declaration_property = property_name * ws^0
|
67
|
-
local declaration = parent_token('declaration', (declaration_property * (declaration_value + block_default_char)) + comment + block_default_char)
|
68
|
-
local declaration_block = parent_token('declaration_block', obrace * ws^0 * declaration * (ws * declaration)^0 * ws^0 * cbrace^-1)
|
69
|
-
|
70
|
-
local css_element = selector * ws^0 * declaration_block^-1
|
71
|
-
|
72
|
-
-- at rules
|
73
|
-
local at_rule_name = token('at_rule_name', '@' * word_match({
|
74
|
-
'import', 'media', 'page', 'font-face', 'charset'
|
75
|
-
}, '-', true))
|
76
|
-
local at_rule_arg = token('at_rule_arg', word_match({
|
77
|
-
'all', 'aural', 'braille', 'embossed', 'handheld', 'print',
|
78
|
-
'projection', 'screen', 'tty', 'tv'
|
79
|
-
}, nil, true))
|
80
|
-
local at_rule = parent_token('at_rule', at_rule_name * (ws * (at_rule_arg + func + string) )^-1)
|
81
|
-
|
82
|
-
-- Immunio marker
|
83
|
-
local marker = l.token('marker', P('{immunio-var:') * l.integer * ':' * l.xdigit^1 * '}')
|
84
|
-
|
85
|
-
M._rules = {
|
86
|
-
{'whitespace', ws},
|
87
|
-
{'comment', comment},
|
88
|
-
{'marker', marker},
|
89
|
-
{'at_rule', at_rule},
|
90
|
-
{'string', string},
|
91
|
-
{'css_element', css_element},
|
92
|
-
}
|
93
|
-
M.declaration = declaration -- so we can access it in sub-lexer for attrs
|
94
|
-
|
95
|
-
M._tokenstyles = {
|
96
|
-
}
|
97
|
-
|
98
|
-
M._foldsymbols = {
|
99
|
-
}
|
100
|
-
|
101
|
-
return M
|
@@ -1,13 +0,0 @@
|
|
1
|
-
-- Lexer for CSS style attributes. These are slightly different as we need to
|
2
|
-
-- start lexing inside a declaration rather than at the selector level...
|
3
|
-
M = require('css')
|
4
|
-
-- For attributes, remove the css_element rule which includes
|
5
|
-
-- selector and delaration block tokens
|
6
|
-
for k,v in ipairs(M._rules) do
|
7
|
-
if v[1] == 'css_element' then
|
8
|
-
M._rules[k] = nil
|
9
|
-
end
|
10
|
-
end
|
11
|
-
-- Instead insert a top level token for declarations.
|
12
|
-
table.insert(M._rules, {'declaration', M.declaration})
|
13
|
-
return M
|
@@ -1,113 +0,0 @@
|
|
1
|
-
-- Copyright (C) 2015 Immunio, Inc.
|
2
|
-
|
3
|
-
-- HTML: Simple h5 like HTML lexer for Immun.io.
|
4
|
-
|
5
|
-
-- NOTE: not covered by Scintillua MIT license in this directory.
|
6
|
-
|
7
|
-
local l = require('lexer')
|
8
|
-
local token, parent_token, word_match = l.token, l.parent_token, l.word_match
|
9
|
-
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
|
10
|
-
|
11
|
-
local M = {_NAME = 'html'}
|
12
|
-
|
13
|
-
local case_insensitive_tags = true
|
14
|
-
|
15
|
-
-- Whitespace.
|
16
|
-
local ws = l.space^1
|
17
|
-
-- This is broad to both accept our placeholders and be very liberal about what may be
|
18
|
-
-- interpreted as an attribute to ensure we escape attributes fairly aggressively.
|
19
|
-
local element_chars = (l.any - '<' - '>' - '=' - '"' - "'" - ws)^1
|
20
|
-
|
21
|
-
-- Comments.
|
22
|
-
local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->'))
|
23
|
-
|
24
|
-
-- IE Conditional Comments.
|
25
|
-
local ie_condcomment_hidden_open = token(l.COMMENT, P('<!--[') * (l.any - ']>')^0 * P(']>'))
|
26
|
-
local ie_condcomment_hidden_close = token(l.COMMENT, P('<![') * (l.any - ']-->')^0 * P(']-->'))
|
27
|
-
local ie_condcomment_revealed = token(l.COMMENT, P('<![') * (l.any - '>')^0 * P('>'))
|
28
|
-
local condcomment = token('condcomment', ie_condcomment_hidden_open + ie_condcomment_hidden_close + ie_condcomment_revealed)
|
29
|
-
|
30
|
-
-- Strings.
|
31
|
-
local sq_str = l.delimited_range("'")
|
32
|
-
local dq_str = l.delimited_range('"')
|
33
|
-
local string = sq_str + dq_str
|
34
|
-
|
35
|
-
-- Attributes. Individual recognition is handled in our XSS processing code.
|
36
|
-
local attr_name = token('attr_name', element_chars - '=')
|
37
|
-
local attr_value = token('attr_value', string + element_chars)
|
38
|
-
local attribute = parent_token('attribute', attr_name * '=' * attr_value)
|
39
|
-
|
40
|
-
-- Tags.
|
41
|
-
local tag_name = token('tag_name', element_chars - '/')
|
42
|
-
local tag_data = token('tag_data', (l.any - l.space - '>')^1 ) -- crap in a tag
|
43
|
-
|
44
|
-
-- XXX how should we handle void tags... right now they are an unmatched tag_open
|
45
|
-
local tag_open = parent_token('tag_open', P('<') * tag_name * ( (ws * attribute) + ( tag_data ) + ws )^0 * (P('>') + '/>') )
|
46
|
-
local tag_close = parent_token('tag_close', P('</') * tag_name * ( ( tag_data ) + ws )^0 * '>')
|
47
|
-
|
48
|
-
-- Special case for script and style tags.
|
49
|
-
local style_tag_name = token("tag_name", word_match({'style'}, nil, case_insensitive_tags))
|
50
|
-
local style_tag_open = parent_token("tag_open", P('<') * style_tag_name * ((ws * attribute) + tag_data)^0 * P('>'))
|
51
|
-
local style_tag_close = parent_token("tag_close", P('</') * style_tag_name * tag_data^0 * '>')
|
52
|
-
local style_data = token("style_data", (l.any - style_tag_close)^0)
|
53
|
-
local style_tag = parent_token('style_tag', style_tag_open * style_data * style_tag_close)
|
54
|
-
|
55
|
-
local script_tag_name = token("tag_name", word_match({'script'}, nil, case_insensitive_tags))
|
56
|
-
local script_tag_open = parent_token("tag_open", P('<') * script_tag_name * ((ws * attribute) + tag_data)^0 * P('>'))
|
57
|
-
local script_tag_close = parent_token("tag_close", P('</') * script_tag_name * tag_data^0 * '>')
|
58
|
-
local script_data = token("script_data", (l.any - script_tag_close)^0)
|
59
|
-
local script_tag = parent_token('script_tag', script_tag_open * script_data * script_tag_close)
|
60
|
-
|
61
|
-
-- Top level rules
|
62
|
-
|
63
|
-
-- Note: the ordering is important here as <script> and <style> have to supercede tag_open...
|
64
|
-
local tag = style_tag + script_tag + tag_open + tag_close
|
65
|
-
|
66
|
-
-- Entities.
|
67
|
-
local entity = token('entity', '&' * (l.any - l.space - ';' - '<' - '>' - "'" - '"' - "/" )^1 * ';')
|
68
|
-
|
69
|
-
-- Doctype.
|
70
|
-
local doctype = token('doctype', '<!' *
|
71
|
-
word_match({'doctype'}, nil, case_insensitive_tags) *
|
72
|
-
(l.any - '>')^1 * '>')
|
73
|
-
|
74
|
-
-- Data between tags
|
75
|
-
local data = token('data', (l.any - '<')^1)
|
76
|
-
|
77
|
-
M._rules = {
|
78
|
-
{'condcomment', condcomment}, -- must preceed comment
|
79
|
-
{'comment', comment},
|
80
|
-
{'doctype', doctype},
|
81
|
-
{'tag', tag},
|
82
|
-
{'entity', entity},
|
83
|
-
{'data', data},
|
84
|
-
}
|
85
|
-
|
86
|
-
M._tokenstyles = {
|
87
|
-
}
|
88
|
-
|
89
|
-
M._foldsymbols = {
|
90
|
-
}
|
91
|
-
|
92
|
-
M.unlex_rules = {
|
93
|
-
["tag_open"] = {
|
94
|
-
["prefix"] = "<",
|
95
|
-
["suffix"] = ">",
|
96
|
-
},
|
97
|
-
["tag_close"] = {
|
98
|
-
["prefix"] = "</",
|
99
|
-
["suffix"] = ">",
|
100
|
-
},
|
101
|
-
["attribute"] = {
|
102
|
-
["prefix"] = " ",
|
103
|
-
},
|
104
|
-
["tag_data"] = {
|
105
|
-
["prefix"] = " ",
|
106
|
-
},
|
107
|
-
["attr_name"] = {
|
108
|
-
["suffix"] = "=",
|
109
|
-
},
|
110
|
-
}
|
111
|
-
|
112
|
-
|
113
|
-
return M
|