immunio 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/immunio/agent.rb +9 -9
- data/lib/immunio/authentication.rb +1 -1
- data/lib/immunio/channel.rb +15 -15
- data/lib/immunio/plugins/active_record.rb +3 -3
- data/lib/immunio/plugins/authlogic.rb +3 -3
- data/lib/immunio/plugins/csrf.rb +1 -1
- data/lib/immunio/plugins/devise.rb +1 -1
- data/lib/immunio/plugins/eval.rb +1 -1
- data/lib/immunio/plugins/http_finisher.rb +2 -2
- data/lib/immunio/plugins/http_tracker.rb +1 -1
- data/lib/immunio/plugins/io.rb +7 -7
- data/lib/immunio/plugins/redirect.rb +2 -2
- data/lib/immunio/plugins/warden.rb +5 -6
- data/lib/immunio/processor.rb +7 -7
- data/lib/immunio/request.rb +3 -3
- data/lib/immunio/version.rb +1 -1
- data/lib/immunio/vm.rb +6 -6
- data/lua-hooks/Makefile +49 -6
- data/lua-hooks/lib/boot.lua +49 -277
- metadata +2 -11
- data/lua-hooks/lib/encode.lua +0 -4
- data/lua-hooks/lib/lexers/LICENSE +0 -21
- data/lua-hooks/lib/lexers/bash.lua +0 -134
- data/lua-hooks/lib/lexers/bash_dqstr.lua +0 -59
- data/lua-hooks/lib/lexers/css.lua +0 -101
- data/lua-hooks/lib/lexers/css_attr.lua +0 -13
- data/lua-hooks/lib/lexers/html.lua +0 -113
- data/lua-hooks/lib/lexers/javascript.lua +0 -68
- data/lua-hooks/lib/lexers/lexer.lua +0 -1575
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: immunio
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Immunio
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10
|
11
|
+
date: 2015-11-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -417,15 +417,6 @@ files:
|
|
417
417
|
- lua-hooks/ext/sha1/luasha1.c
|
418
418
|
- lua-hooks/ext/sha1/sha1.c
|
419
419
|
- lua-hooks/lib/boot.lua
|
420
|
-
- lua-hooks/lib/encode.lua
|
421
|
-
- lua-hooks/lib/lexers/LICENSE
|
422
|
-
- lua-hooks/lib/lexers/bash.lua
|
423
|
-
- lua-hooks/lib/lexers/bash_dqstr.lua
|
424
|
-
- lua-hooks/lib/lexers/css.lua
|
425
|
-
- lua-hooks/lib/lexers/css_attr.lua
|
426
|
-
- lua-hooks/lib/lexers/html.lua
|
427
|
-
- lua-hooks/lib/lexers/javascript.lua
|
428
|
-
- lua-hooks/lib/lexers/lexer.lua
|
429
420
|
homepage: http://immun.io/
|
430
421
|
licenses:
|
431
422
|
- Immunio
|
data/lua-hooks/lib/encode.lua
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
The MIT License
|
2
|
-
|
3
|
-
Copyright (c) 2007-2015 Mitchell
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|
@@ -1,134 +0,0 @@
|
|
1
|
-
-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com.
|
2
|
-
-- Copyright 2015 Immunio, Inc.
|
3
|
-
|
4
|
-
-- Shell LPeg lexer.
|
5
|
-
|
6
|
-
-- This is based on the lexer from the Scintillua package, with a ot of extension
|
7
|
-
-- The goal isn't a complete parser for bash, but a lexer that can extract a useful
|
8
|
-
-- amount of structure to detect tampering. The emphasis is more on common injection
|
9
|
-
-- techniques and lexical structure than actually extracting properly formed bash
|
10
|
-
-- statements. Down the road we may need to go as far as to parse statements, and that
|
11
|
-
-- should be possible at the cost of a lot more complexity.
|
12
|
-
|
13
|
-
local l = require('lexer')
|
14
|
-
local token = l.token
|
15
|
-
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
16
|
-
|
17
|
-
local M = {_NAME = 'bash'}
|
18
|
-
|
19
|
-
-- Whitespace.
|
20
|
-
local ws = token(l.WHITESPACE, l.space^1)
|
21
|
-
|
22
|
-
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ' + '.')^0
|
23
|
-
|
24
|
-
|
25
|
-
-- Comments.
|
26
|
-
local comment = token(l.COMMENT, '#' * l.nonnewline^0)
|
27
|
-
|
28
|
-
-- Strings.
|
29
|
-
local sq_str = token('sq_str', l.delimited_range("'", false, true))
|
30
|
-
local dq_str = token('dq_str', l.delimited_range('"'))
|
31
|
-
local ex_str = token('ex_str', l.delimited_range('`'))
|
32
|
-
local heredoc = token('heredoc', '<<' * P(function(input, index)
|
33
|
-
local s, e, _, delimiter =
|
34
|
-
input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
|
35
|
-
if s == index and delimiter then
|
36
|
-
local _, e = input:find('[\n\r\f]+'..delimiter, e)
|
37
|
-
return e and e + 1 or #input + 1
|
38
|
-
end
|
39
|
-
end))
|
40
|
-
local bash_string = sq_str + dq_str + ex_str + heredoc
|
41
|
-
|
42
|
-
-- Numbers.
|
43
|
-
local number = token(l.NUMBER, l.float + l.integer)
|
44
|
-
|
45
|
-
-- Keywords.
|
46
|
-
local keyword = token(l.KEYWORD, l.word_match({
|
47
|
-
'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
|
48
|
-
'do', 'done', 'continue', 'local', 'return', 'select',
|
49
|
-
-- Operators. These could be split into individual tokens...
|
50
|
-
'-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
|
51
|
-
'-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
|
52
|
-
'-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
|
53
|
-
}, '-'))
|
54
|
-
|
55
|
-
-- Common commands ... this is not exhaustive nor does it need to be.
|
56
|
-
local command = token("command", l.word_match({
|
57
|
-
'awk', 'cat', 'cmp', 'cp', 'curl', 'cut', 'date', 'find', 'grep', 'gunzip', 'gvim',
|
58
|
-
'gzip', 'kill', 'lua', 'make', 'mkdir', 'mv', 'php', 'pkill', 'python', 'rm',
|
59
|
-
'rmdir', 'rsync', 'ruby', 'scp', 'sed', 'sleep', 'ssh', 'sudo', 'tar', 'unlink',
|
60
|
-
'wget', 'zip'
|
61
|
-
}, '-'))
|
62
|
-
|
63
|
-
-- Builtins
|
64
|
-
local builtin = token("builtin", l.word_match({
|
65
|
-
'alias', 'bind', 'builtin', 'caller', 'command', 'declare', 'echo', 'enable',
|
66
|
-
'help', 'let', 'local', 'logout', 'mapfile', 'printf', 'read', 'readarray',
|
67
|
-
'source', 'type', 'typeset', 'ulimit', 'unalias',
|
68
|
-
}, '-'))
|
69
|
-
|
70
|
-
-- Filenames. This is a bit sloppy, but tries to discern filenames from other identifiers
|
71
|
-
-- Very much a case of R&D 'suck it and see'
|
72
|
-
local filename = token("filename", P('/')^0 * (bash_word + '.') * (
|
73
|
-
'/' + bash_word + '.' )^0 * ('.' * bash_word )^0 )
|
74
|
-
|
75
|
-
local ip = (l.integer * P('.') * l.integer * P('.') * l.integer * P('.') * l.integer)
|
76
|
-
|
77
|
-
local protocol = ((P('https') + 'http' + 'ftp' + 'irc') * '://') + 'mailto:'
|
78
|
-
local remainder = ((1-S'\r\n\f\t\v ,."}])') + (S',."}])' * (1-S'\r\n\f\t\v ')))^0
|
79
|
-
local url = protocol * remainder
|
80
|
-
|
81
|
-
-- Identifiers.
|
82
|
-
local identifier = token(l.IDENTIFIER, url + ip + bash_word)
|
83
|
-
|
84
|
-
-- Variables.
|
85
|
-
local ex_variable = token("ex_variable",
|
86
|
-
'$' * l.delimited_range('()', true, true))
|
87
|
-
|
88
|
-
local variable = token(l.VARIABLE,
|
89
|
-
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
90
|
-
l.delimited_range('{}', true, true)))
|
91
|
-
|
92
|
-
local var = ex_variable + variable
|
93
|
-
|
94
|
-
-- Operators. These could be split into individual tokens...
|
95
|
-
local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
|
96
|
-
|
97
|
-
M._rules = {
|
98
|
-
{'whitespace', ws},
|
99
|
-
{'keyword', keyword},
|
100
|
-
{'builtin', builtin},
|
101
|
-
{'command', command},
|
102
|
-
{'identifier', identifier},
|
103
|
-
{'filename', filename},
|
104
|
-
{'string', bash_string},
|
105
|
-
{'comment', comment},
|
106
|
-
{'number', number},
|
107
|
-
{'variable', var},
|
108
|
-
{'operator', operator},
|
109
|
-
}
|
110
|
-
|
111
|
-
-- This is the main function for lexing bash data. It recurses and uses
|
112
|
-
-- the dqstr sub-lexer instance provided (we don't instantiate it directly
|
113
|
-
-- to allow the caller to cache the instance and avoid recompiling the grammar)
|
114
|
-
function M.lex_recursive( self, str, bash_dqstr_lexer )
|
115
|
-
local tokens = self:lex(str)
|
116
|
-
for i = 1, #tokens do
|
117
|
-
if tokens[i]['token'] == "ex_str" then
|
118
|
-
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 2, -2), bash_dqstr_lexer)
|
119
|
-
elseif tokens[i]['token'] == "ex_variable" then
|
120
|
-
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 3, -2), bash_dqstr_lexer)
|
121
|
-
elseif tokens[i]['token'] == "dq_str" then
|
122
|
-
tokens[i]['val'] =
|
123
|
-
bash_dqstr_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
124
|
-
elseif tokens[i]['token'] == "heredoc" then
|
125
|
-
tokens[i]['val'] =
|
126
|
-
bash_dqstr_lexer:lex_recursive(tokens[i]['val'], self)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
return tokens
|
130
|
-
end
|
131
|
-
|
132
|
-
return M
|
133
|
-
|
134
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
-- Copyright (C) 2015 Immunio, Inc.
|
2
|
-
|
3
|
-
-- Lexer for bash magic double quotes
|
4
|
-
|
5
|
-
-- NOTE: not covered by Scintillua MIT license in this directory.
|
6
|
-
|
7
|
-
-- While our lexer has the ability to embed this sort of thing as a child of another lexer
|
8
|
-
-- I didn't bother here due to the recursion; we need to lex the parent (bash) language
|
9
|
-
-- for some tokens which would be very complex at best. It's cleaner to use two lexers
|
10
|
-
-- and handle the recursion in higher level lua at a minute performance cost.
|
11
|
-
|
12
|
-
local l = require('lexer')
|
13
|
-
local token = l.token
|
14
|
-
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
15
|
-
|
16
|
-
local M = {_NAME = 'bash_dqstr'}
|
17
|
-
|
18
|
-
-- Generic token.
|
19
|
-
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ')^0
|
20
|
-
|
21
|
-
-- Strings.
|
22
|
-
-- Shell substitution.
|
23
|
-
local ex_str = token('ex_str', l.delimited_range('`'))
|
24
|
-
|
25
|
-
-- Other string data
|
26
|
-
local bash_string = token('str_data', (l.any - '$' - '`')^1)
|
27
|
-
|
28
|
-
-- Variables.
|
29
|
-
-- Shell Substitution.
|
30
|
-
local ex_variable = token("ex_variable",
|
31
|
-
'$' * l.delimited_range('()', true, true))
|
32
|
-
-- Other variables
|
33
|
-
local variable = token(l.VARIABLE,
|
34
|
-
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
35
|
-
l.delimited_range('{}', true, true)))
|
36
|
-
|
37
|
-
local var = ex_variable + variable
|
38
|
-
|
39
|
-
M._rules = {
|
40
|
-
{'variable', var},
|
41
|
-
{'ex_str', ex_str},
|
42
|
-
{'string', bash_string},
|
43
|
-
}
|
44
|
-
|
45
|
-
function M.lex_recursive( self, str, bash_lexer )
|
46
|
-
local tokens = self:lex(str)
|
47
|
-
for i = 1, #tokens do
|
48
|
-
if tokens[i]['token'] == "ex_str" then
|
49
|
-
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
50
|
-
elseif tokens[i]['token'] == "ex_variable" then
|
51
|
-
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 3, -2), self)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
return tokens
|
55
|
-
end
|
56
|
-
|
57
|
-
return M
|
58
|
-
|
59
|
-
|
@@ -1,101 +0,0 @@
|
|
1
|
-
-- Copyright 2006-2010 Mitchell Foral mitchell<att>caladbolg.net. See LICENSE.
|
2
|
-
-- CSS LPeg lexer
|
3
|
-
local M = {_NAME = 'css'}
|
4
|
-
|
5
|
-
local l = require('lexer')
|
6
|
-
local token, parent_token, word_match, delimited_range =
|
7
|
-
l.token, l.parent_token, l.word_match, l.delimited_range
|
8
|
-
|
9
|
-
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
|
10
|
-
|
11
|
-
local ws = token('whitespace', l.space^1)
|
12
|
-
|
13
|
-
-- comments
|
14
|
-
local comment = token('comment', '/*' * (l.any - '*/')^0 * P('*/')^-1)
|
15
|
-
|
16
|
-
local word_char = l.alnum + S('_-')
|
17
|
-
local identifier = (l.alpha + '-')^1 * word_char^0
|
18
|
-
|
19
|
-
-- strings
|
20
|
-
local sq_str = delimited_range("'", '\\', true)
|
21
|
-
local dq_str = delimited_range('"', '\\', true)
|
22
|
-
local string = token('string', sq_str + dq_str)
|
23
|
-
|
24
|
-
local colon = token('operator', ':')
|
25
|
-
local semicolon = token('operator', ';')
|
26
|
-
local comma = token('operator', ',')
|
27
|
-
local obrace = token('operator', '{')
|
28
|
-
local cbrace = token('operator', '}')
|
29
|
-
local bang = token('operator', '!')
|
30
|
-
|
31
|
-
-- selectors
|
32
|
-
local attribute = '[' * word_char^1 * (S('|~')^-1 * '=' * (identifier + sq_str + dq_str))^-1 * ']'
|
33
|
-
local class_id_selector = identifier^-1 * S('.#') * identifier
|
34
|
-
local pseudoclass = word_match({
|
35
|
-
'first-letter', 'first-line', 'link', 'active', 'visited',
|
36
|
-
'first-child', 'focus', 'hover', 'lang', 'before', 'after',
|
37
|
-
'left', 'right', 'first'
|
38
|
-
}, '-', true)
|
39
|
-
local selector = P('*') * ws + (class_id_selector + identifier + '*') * attribute^-1
|
40
|
-
selector = token('selector', selector * (ws * selector)^0) *
|
41
|
-
(token('selector', ':' * pseudoclass) + token('default_selector', ':' * word_char^1))^-1
|
42
|
-
selector = selector * (ws^0 * (comma + token('selector', S('>+*'))) * ws^0 * selector)^0
|
43
|
-
|
44
|
-
-- css properties and values
|
45
|
-
local property_name = token('property_name', word_char^1)
|
46
|
-
local value = token('value', bang^0 * word_char^1)
|
47
|
-
|
48
|
-
-- colors, units, numbers, and urls
|
49
|
-
local hexcolor = token('color', '#' * l.xdigit * l.xdigit * l.xdigit * (l.xdigit * l.xdigit * l.xdigit)^-1)
|
50
|
-
local rgbunit = (l.digit^1 * P('%')^-1)
|
51
|
-
local rgbcolor = token('color', word_match({'rgb'}, nil, true) * '(' * rgbunit * ',' * rgbunit * ',' * rgbunit * ')')
|
52
|
-
local color = hexcolor + rgbcolor
|
53
|
-
local unit = word_match({
|
54
|
-
'pt', 'mm', 'cm', 'pc', 'in', 'px', 'em', 'ex', 'deg',
|
55
|
-
'rad', 'grad', 'ms', 's', 'Hz', 'kHz'
|
56
|
-
}, nil, true)
|
57
|
-
unit = token('unit', unit + '%')
|
58
|
-
local css_float = l.digit^0 * '.' * l.digit^1 + l.digit^1 * '.' * l.digit^0 + l.digit^1
|
59
|
-
local number = token('number', S('+-')^-1 * css_float) * unit^-1
|
60
|
-
local func = parent_token('function', token('function_name', identifier) * token('function_param', delimited_range('()', true, false, true)))
|
61
|
-
-- declaration block
|
62
|
-
local block_default_char = token('default_block_char', (l.any - '}')^1)
|
63
|
-
local property_value = parent_token('property_value', string + number + color + func + value)
|
64
|
-
local property_values = { property_value * (ws * property_value)^0 * (ws^0 * comma * ws^0 * V(1))^0 }
|
65
|
-
local declaration_value = colon * ws^0 * property_values * ws^0 * semicolon^0
|
66
|
-
local declaration_property = property_name * ws^0
|
67
|
-
local declaration = parent_token('declaration', (declaration_property * (declaration_value + block_default_char)) + comment + block_default_char)
|
68
|
-
local declaration_block = parent_token('declaration_block', obrace * ws^0 * declaration * (ws * declaration)^0 * ws^0 * cbrace^-1)
|
69
|
-
|
70
|
-
local css_element = selector * ws^0 * declaration_block^-1
|
71
|
-
|
72
|
-
-- at rules
|
73
|
-
local at_rule_name = token('at_rule_name', '@' * word_match({
|
74
|
-
'import', 'media', 'page', 'font-face', 'charset'
|
75
|
-
}, '-', true))
|
76
|
-
local at_rule_arg = token('at_rule_arg', word_match({
|
77
|
-
'all', 'aural', 'braille', 'embossed', 'handheld', 'print',
|
78
|
-
'projection', 'screen', 'tty', 'tv'
|
79
|
-
}, nil, true))
|
80
|
-
local at_rule = parent_token('at_rule', at_rule_name * (ws * (at_rule_arg + func + string) )^-1)
|
81
|
-
|
82
|
-
-- Immunio marker
|
83
|
-
local marker = l.token('marker', P('{immunio-var:') * l.integer * ':' * l.xdigit^1 * '}')
|
84
|
-
|
85
|
-
M._rules = {
|
86
|
-
{'whitespace', ws},
|
87
|
-
{'comment', comment},
|
88
|
-
{'marker', marker},
|
89
|
-
{'at_rule', at_rule},
|
90
|
-
{'string', string},
|
91
|
-
{'css_element', css_element},
|
92
|
-
}
|
93
|
-
M.declaration = declaration -- so we can access it in sub-lexer for attrs
|
94
|
-
|
95
|
-
M._tokenstyles = {
|
96
|
-
}
|
97
|
-
|
98
|
-
M._foldsymbols = {
|
99
|
-
}
|
100
|
-
|
101
|
-
return M
|
@@ -1,13 +0,0 @@
|
|
1
|
-
-- Lexer for CSS style attributes. These are slightly different as we need to
|
2
|
-
-- start lexing inside a declaration rather than at the selector level...
|
3
|
-
M = require('css')
|
4
|
-
-- For attributes, remove the css_element rule which includes
|
5
|
-
-- selector and delaration block tokens
|
6
|
-
for k,v in ipairs(M._rules) do
|
7
|
-
if v[1] == 'css_element' then
|
8
|
-
M._rules[k] = nil
|
9
|
-
end
|
10
|
-
end
|
11
|
-
-- Instead insert a top level token for declarations.
|
12
|
-
table.insert(M._rules, {'declaration', M.declaration})
|
13
|
-
return M
|
@@ -1,113 +0,0 @@
|
|
1
|
-
-- Copyright (C) 2015 Immunio, Inc.
|
2
|
-
|
3
|
-
-- HTML: Simple h5 like HTML lexer for Immun.io.
|
4
|
-
|
5
|
-
-- NOTE: not covered by Scintillua MIT license in this directory.
|
6
|
-
|
7
|
-
local l = require('lexer')
|
8
|
-
local token, parent_token, word_match = l.token, l.parent_token, l.word_match
|
9
|
-
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
|
10
|
-
|
11
|
-
local M = {_NAME = 'html'}
|
12
|
-
|
13
|
-
local case_insensitive_tags = true
|
14
|
-
|
15
|
-
-- Whitespace.
|
16
|
-
local ws = l.space^1
|
17
|
-
-- This is broad to both accept our placeholders and be very liberal about what may be
|
18
|
-
-- interpreted as an attribute to ensure we escape attributes fairly aggressively.
|
19
|
-
local element_chars = (l.any - '<' - '>' - '=' - '"' - "'" - ws)^1
|
20
|
-
|
21
|
-
-- Comments.
|
22
|
-
local comment = token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->'))
|
23
|
-
|
24
|
-
-- IE Conditional Comments.
|
25
|
-
local ie_condcomment_hidden_open = token(l.COMMENT, P('<!--[') * (l.any - ']>')^0 * P(']>'))
|
26
|
-
local ie_condcomment_hidden_close = token(l.COMMENT, P('<![') * (l.any - ']-->')^0 * P(']-->'))
|
27
|
-
local ie_condcomment_revealed = token(l.COMMENT, P('<![') * (l.any - '>')^0 * P('>'))
|
28
|
-
local condcomment = token('condcomment', ie_condcomment_hidden_open + ie_condcomment_hidden_close + ie_condcomment_revealed)
|
29
|
-
|
30
|
-
-- Strings.
|
31
|
-
local sq_str = l.delimited_range("'")
|
32
|
-
local dq_str = l.delimited_range('"')
|
33
|
-
local string = sq_str + dq_str
|
34
|
-
|
35
|
-
-- Attributes. Individual recognition is handled in our XSS processing code.
|
36
|
-
local attr_name = token('attr_name', element_chars - '=')
|
37
|
-
local attr_value = token('attr_value', string + element_chars)
|
38
|
-
local attribute = parent_token('attribute', attr_name * '=' * attr_value)
|
39
|
-
|
40
|
-
-- Tags.
|
41
|
-
local tag_name = token('tag_name', element_chars - '/')
|
42
|
-
local tag_data = token('tag_data', (l.any - l.space - '>')^1 ) -- crap in a tag
|
43
|
-
|
44
|
-
-- XXX how should we handle void tags... right now they are an unmatched tag_open
|
45
|
-
local tag_open = parent_token('tag_open', P('<') * tag_name * ( (ws * attribute) + ( tag_data ) + ws )^0 * (P('>') + '/>') )
|
46
|
-
local tag_close = parent_token('tag_close', P('</') * tag_name * ( ( tag_data ) + ws )^0 * '>')
|
47
|
-
|
48
|
-
-- Special case for script and style tags.
|
49
|
-
local style_tag_name = token("tag_name", word_match({'style'}, nil, case_insensitive_tags))
|
50
|
-
local style_tag_open = parent_token("tag_open", P('<') * style_tag_name * ((ws * attribute) + tag_data)^0 * P('>'))
|
51
|
-
local style_tag_close = parent_token("tag_close", P('</') * style_tag_name * tag_data^0 * '>')
|
52
|
-
local style_data = token("style_data", (l.any - style_tag_close)^0)
|
53
|
-
local style_tag = parent_token('style_tag', style_tag_open * style_data * style_tag_close)
|
54
|
-
|
55
|
-
local script_tag_name = token("tag_name", word_match({'script'}, nil, case_insensitive_tags))
|
56
|
-
local script_tag_open = parent_token("tag_open", P('<') * script_tag_name * ((ws * attribute) + tag_data)^0 * P('>'))
|
57
|
-
local script_tag_close = parent_token("tag_close", P('</') * script_tag_name * tag_data^0 * '>')
|
58
|
-
local script_data = token("script_data", (l.any - script_tag_close)^0)
|
59
|
-
local script_tag = parent_token('script_tag', script_tag_open * script_data * script_tag_close)
|
60
|
-
|
61
|
-
-- Top level rules
|
62
|
-
|
63
|
-
-- Note: the ordering is important here as <script> and <style> have to supercede tag_open...
|
64
|
-
local tag = style_tag + script_tag + tag_open + tag_close
|
65
|
-
|
66
|
-
-- Entities.
|
67
|
-
local entity = token('entity', '&' * (l.any - l.space - ';' - '<' - '>' - "'" - '"' - "/" )^1 * ';')
|
68
|
-
|
69
|
-
-- Doctype.
|
70
|
-
local doctype = token('doctype', '<!' *
|
71
|
-
word_match({'doctype'}, nil, case_insensitive_tags) *
|
72
|
-
(l.any - '>')^1 * '>')
|
73
|
-
|
74
|
-
-- Data between tags
|
75
|
-
local data = token('data', (l.any - '<')^1)
|
76
|
-
|
77
|
-
M._rules = {
|
78
|
-
{'condcomment', condcomment}, -- must preceed comment
|
79
|
-
{'comment', comment},
|
80
|
-
{'doctype', doctype},
|
81
|
-
{'tag', tag},
|
82
|
-
{'entity', entity},
|
83
|
-
{'data', data},
|
84
|
-
}
|
85
|
-
|
86
|
-
M._tokenstyles = {
|
87
|
-
}
|
88
|
-
|
89
|
-
M._foldsymbols = {
|
90
|
-
}
|
91
|
-
|
92
|
-
M.unlex_rules = {
|
93
|
-
["tag_open"] = {
|
94
|
-
["prefix"] = "<",
|
95
|
-
["suffix"] = ">",
|
96
|
-
},
|
97
|
-
["tag_close"] = {
|
98
|
-
["prefix"] = "</",
|
99
|
-
["suffix"] = ">",
|
100
|
-
},
|
101
|
-
["attribute"] = {
|
102
|
-
["prefix"] = " ",
|
103
|
-
},
|
104
|
-
["tag_data"] = {
|
105
|
-
["prefix"] = " ",
|
106
|
-
},
|
107
|
-
["attr_name"] = {
|
108
|
-
["suffix"] = "=",
|
109
|
-
},
|
110
|
-
}
|
111
|
-
|
112
|
-
|
113
|
-
return M
|