immunio 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +234 -0
- data/README.md +147 -0
- data/bin/immunio +5 -0
- data/lib/immunio.rb +29 -0
- data/lib/immunio/agent.rb +260 -0
- data/lib/immunio/authentication.rb +96 -0
- data/lib/immunio/blocked_app.rb +38 -0
- data/lib/immunio/channel.rb +432 -0
- data/lib/immunio/cli.rb +39 -0
- data/lib/immunio/context.rb +114 -0
- data/lib/immunio/errors.rb +43 -0
- data/lib/immunio/immunio_ca.crt +45 -0
- data/lib/immunio/logger.rb +87 -0
- data/lib/immunio/plugins/action_dispatch.rb +45 -0
- data/lib/immunio/plugins/action_view.rb +431 -0
- data/lib/immunio/plugins/active_record.rb +707 -0
- data/lib/immunio/plugins/active_record_relation.rb +370 -0
- data/lib/immunio/plugins/authlogic.rb +80 -0
- data/lib/immunio/plugins/csrf.rb +24 -0
- data/lib/immunio/plugins/devise.rb +40 -0
- data/lib/immunio/plugins/environment_reporter.rb +69 -0
- data/lib/immunio/plugins/eval.rb +51 -0
- data/lib/immunio/plugins/exception_handler.rb +55 -0
- data/lib/immunio/plugins/gems_tracker.rb +5 -0
- data/lib/immunio/plugins/haml.rb +36 -0
- data/lib/immunio/plugins/http_finisher.rb +50 -0
- data/lib/immunio/plugins/http_tracker.rb +203 -0
- data/lib/immunio/plugins/io.rb +96 -0
- data/lib/immunio/plugins/redirect.rb +42 -0
- data/lib/immunio/plugins/warden.rb +66 -0
- data/lib/immunio/processor.rb +234 -0
- data/lib/immunio/rails.rb +26 -0
- data/lib/immunio/request.rb +139 -0
- data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
- data/lib/immunio/rufus_lua_ext/state.rb +157 -0
- data/lib/immunio/rufus_lua_ext/table.rb +137 -0
- data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
- data/lib/immunio/version.rb +5 -0
- data/lib/immunio/vm.rb +291 -0
- data/lua-hooks/ext/all.c +78 -0
- data/lua-hooks/ext/bitop/README +22 -0
- data/lua-hooks/ext/bitop/bit.c +189 -0
- data/lua-hooks/ext/extconf.rb +38 -0
- data/lua-hooks/ext/libinjection/COPYING +37 -0
- data/lua-hooks/ext/libinjection/libinjection.h +65 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
- data/lua-hooks/ext/libinjection/lualib.c +109 -0
- data/lua-hooks/ext/lpeg/HISTORY +90 -0
- data/lua-hooks/ext/lpeg/lpcap.c +537 -0
- data/lua-hooks/ext/lpeg/lpcap.h +43 -0
- data/lua-hooks/ext/lpeg/lpcode.c +986 -0
- data/lua-hooks/ext/lpeg/lpcode.h +34 -0
- data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
- data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
- data/lua-hooks/ext/lpeg/lpprint.c +244 -0
- data/lua-hooks/ext/lpeg/lpprint.h +35 -0
- data/lua-hooks/ext/lpeg/lptree.c +1238 -0
- data/lua-hooks/ext/lpeg/lptree.h +77 -0
- data/lua-hooks/ext/lpeg/lptypes.h +149 -0
- data/lua-hooks/ext/lpeg/lpvm.c +355 -0
- data/lua-hooks/ext/lpeg/lpvm.h +58 -0
- data/lua-hooks/ext/lpeg/makefile +55 -0
- data/lua-hooks/ext/lpeg/re.html +498 -0
- data/lua-hooks/ext/lpeg/test.lua +1409 -0
- data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
- data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
- data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
- data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
- data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
- data/lua-hooks/ext/lua-snapshot/README.md +18 -0
- data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
- data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
- data/lua-hooks/ext/lua/COPYRIGHT +34 -0
- data/lua-hooks/ext/lua/lapi.c +1087 -0
- data/lua-hooks/ext/lua/lapi.h +16 -0
- data/lua-hooks/ext/lua/lauxlib.c +652 -0
- data/lua-hooks/ext/lua/lauxlib.h +174 -0
- data/lua-hooks/ext/lua/lbaselib.c +659 -0
- data/lua-hooks/ext/lua/lcode.c +831 -0
- data/lua-hooks/ext/lua/lcode.h +76 -0
- data/lua-hooks/ext/lua/ldblib.c +398 -0
- data/lua-hooks/ext/lua/ldebug.c +638 -0
- data/lua-hooks/ext/lua/ldebug.h +33 -0
- data/lua-hooks/ext/lua/ldo.c +519 -0
- data/lua-hooks/ext/lua/ldo.h +57 -0
- data/lua-hooks/ext/lua/ldump.c +164 -0
- data/lua-hooks/ext/lua/lfunc.c +174 -0
- data/lua-hooks/ext/lua/lfunc.h +34 -0
- data/lua-hooks/ext/lua/lgc.c +710 -0
- data/lua-hooks/ext/lua/lgc.h +110 -0
- data/lua-hooks/ext/lua/linit.c +38 -0
- data/lua-hooks/ext/lua/liolib.c +556 -0
- data/lua-hooks/ext/lua/llex.c +463 -0
- data/lua-hooks/ext/lua/llex.h +81 -0
- data/lua-hooks/ext/lua/llimits.h +128 -0
- data/lua-hooks/ext/lua/lmathlib.c +263 -0
- data/lua-hooks/ext/lua/lmem.c +86 -0
- data/lua-hooks/ext/lua/lmem.h +49 -0
- data/lua-hooks/ext/lua/loadlib.c +705 -0
- data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
- data/lua-hooks/ext/lua/lobject.c +214 -0
- data/lua-hooks/ext/lua/lobject.h +381 -0
- data/lua-hooks/ext/lua/lopcodes.c +102 -0
- data/lua-hooks/ext/lua/lopcodes.h +268 -0
- data/lua-hooks/ext/lua/loslib.c +243 -0
- data/lua-hooks/ext/lua/lparser.c +1339 -0
- data/lua-hooks/ext/lua/lparser.h +82 -0
- data/lua-hooks/ext/lua/lstate.c +214 -0
- data/lua-hooks/ext/lua/lstate.h +169 -0
- data/lua-hooks/ext/lua/lstring.c +111 -0
- data/lua-hooks/ext/lua/lstring.h +31 -0
- data/lua-hooks/ext/lua/lstrlib.c +871 -0
- data/lua-hooks/ext/lua/ltable.c +588 -0
- data/lua-hooks/ext/lua/ltable.h +40 -0
- data/lua-hooks/ext/lua/ltablib.c +287 -0
- data/lua-hooks/ext/lua/ltm.c +75 -0
- data/lua-hooks/ext/lua/ltm.h +54 -0
- data/lua-hooks/ext/lua/lua.c +392 -0
- data/lua-hooks/ext/lua/lua.def +131 -0
- data/lua-hooks/ext/lua/lua.h +388 -0
- data/lua-hooks/ext/lua/lua.rc +28 -0
- data/lua-hooks/ext/lua/lua_dll.rc +26 -0
- data/lua-hooks/ext/lua/luac.c +200 -0
- data/lua-hooks/ext/lua/luac.rc +1 -0
- data/lua-hooks/ext/lua/luaconf.h +763 -0
- data/lua-hooks/ext/lua/luaconf.h.in +724 -0
- data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
- data/lua-hooks/ext/lua/lualib.h +53 -0
- data/lua-hooks/ext/lua/lundump.c +227 -0
- data/lua-hooks/ext/lua/lundump.h +36 -0
- data/lua-hooks/ext/lua/lvm.c +767 -0
- data/lua-hooks/ext/lua/lvm.h +36 -0
- data/lua-hooks/ext/lua/lzio.c +82 -0
- data/lua-hooks/ext/lua/lzio.h +67 -0
- data/lua-hooks/ext/lua/print.c +227 -0
- data/lua-hooks/ext/luautf8/README.md +152 -0
- data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
- data/lua-hooks/ext/luautf8/unidata.h +3064 -0
- data/lua-hooks/lib/boot.lua +254 -0
- data/lua-hooks/lib/encode.lua +4 -0
- data/lua-hooks/lib/lexers/LICENSE +21 -0
- data/lua-hooks/lib/lexers/bash.lua +134 -0
- data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
- data/lua-hooks/lib/lexers/css.lua +216 -0
- data/lua-hooks/lib/lexers/html.lua +106 -0
- data/lua-hooks/lib/lexers/javascript.lua +68 -0
- data/lua-hooks/lib/lexers/lexer.lua +1575 -0
- data/lua-hooks/lib/lexers/markers.lua +33 -0
- metadata +308 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
-- This file is executed when the Lua VM boots.
|
|
2
|
+
require 'encode'
|
|
3
|
+
|
|
4
|
+
-- This is required to make lexers load from test harness.
|
|
5
|
+
-- In VM the path is handled for us by vm.rb --ol
|
|
6
|
+
lexer_path='lib/lexers/?.lua'
|
|
7
|
+
package.path = package.path..';'..lexer_path
|
|
8
|
+
|
|
9
|
+
-- Define the environment available to code executing in the VM.
|
|
10
|
+
-- All available functions must be declared here.
|
|
11
|
+
-- Make sure the function is safe before adding it here.
|
|
12
|
+
-- See http://lua-users.org/wiki/SandBoxes
|
|
13
|
+
SANDBOX_ENV = {
|
|
14
|
+
-- Lua libs
|
|
15
|
+
ipairs = ipairs,
|
|
16
|
+
next = next,
|
|
17
|
+
pairs = pairs,
|
|
18
|
+
pcall = pcall,
|
|
19
|
+
tonumber = tonumber,
|
|
20
|
+
tostring = tostring,
|
|
21
|
+
type = type,
|
|
22
|
+
unpack = unpack,
|
|
23
|
+
assert = assert,
|
|
24
|
+
error = error,
|
|
25
|
+
getmetatable = getmetatable,
|
|
26
|
+
setmetatable = setmetatable,
|
|
27
|
+
rawget = rawget,
|
|
28
|
+
rawset = rawset,
|
|
29
|
+
collectgarbage = collectgarbage,
|
|
30
|
+
math = math,
|
|
31
|
+
string = string,
|
|
32
|
+
bit = {
|
|
33
|
+
band = bit.band,
|
|
34
|
+
extract = bit.extract,
|
|
35
|
+
bor = bit.bor,
|
|
36
|
+
bnot = bit.bnot,
|
|
37
|
+
arshift = bit.arshift,
|
|
38
|
+
rshift = bit.rshift,
|
|
39
|
+
rrotate = bit.rrotate,
|
|
40
|
+
replace = bit.replace,
|
|
41
|
+
lshift = bit.lshift,
|
|
42
|
+
lrotate = bit.lrotate,
|
|
43
|
+
btest = bit.btest,
|
|
44
|
+
bxor = bit.bxor
|
|
45
|
+
},
|
|
46
|
+
coroutine = {
|
|
47
|
+
create = coroutine.create,
|
|
48
|
+
resume = coroutine.resume,
|
|
49
|
+
running = coroutine.running,
|
|
50
|
+
status = coroutine.status,
|
|
51
|
+
wrap = coroutine.wrap,
|
|
52
|
+
yield = coroutine.yield,
|
|
53
|
+
},
|
|
54
|
+
debug = {
|
|
55
|
+
-- Block most debug in sandbox, but allow tracebacks
|
|
56
|
+
traceback = debug.traceback
|
|
57
|
+
},
|
|
58
|
+
select = select,
|
|
59
|
+
utf8 = {
|
|
60
|
+
byte = utf8.byte,
|
|
61
|
+
char = utf8.char,
|
|
62
|
+
find = utf8.find,
|
|
63
|
+
format = utf8.format,
|
|
64
|
+
gmatch = utf8.gmatch,
|
|
65
|
+
gsub = utf8.gsub,
|
|
66
|
+
len = utf8.len,
|
|
67
|
+
lower = utf8.lower,
|
|
68
|
+
match = utf8.match,
|
|
69
|
+
rep = utf8.rep,
|
|
70
|
+
reverse = utf8.reverse,
|
|
71
|
+
sub = utf8.sub,
|
|
72
|
+
upper = utf8.upper,
|
|
73
|
+
split = utf8.split,
|
|
74
|
+
escape = utf8.escape,
|
|
75
|
+
charpos = utf8.charpos,
|
|
76
|
+
insert = utf8.insert,
|
|
77
|
+
remove = utf8.remove,
|
|
78
|
+
next = utf8.next,
|
|
79
|
+
ncasecmp = utf8.ncasecmp,
|
|
80
|
+
},
|
|
81
|
+
table = {
|
|
82
|
+
insert = table.insert,
|
|
83
|
+
maxn = table.maxn,
|
|
84
|
+
remove = table.remove,
|
|
85
|
+
sort = table.sort,
|
|
86
|
+
map = table.map,
|
|
87
|
+
reduce = table.reduce,
|
|
88
|
+
length = table.length,
|
|
89
|
+
concat = table.concat,
|
|
90
|
+
},
|
|
91
|
+
libinjection = {
|
|
92
|
+
sqli = libinjection.sqli,
|
|
93
|
+
fingerprint = libinjection.fingerprint,
|
|
94
|
+
xss = libinjection.xss,
|
|
95
|
+
sqli_tokenize = libinjection.sqli_tokenize
|
|
96
|
+
},
|
|
97
|
+
-- LPeg Library
|
|
98
|
+
lpeg = {
|
|
99
|
+
ptree = lpeg.ptree,
|
|
100
|
+
pcode = lpeg.pcode,
|
|
101
|
+
match = lpeg.match,
|
|
102
|
+
B = lpeg.B,
|
|
103
|
+
V = lpeg.V,
|
|
104
|
+
C = lpeg.C,
|
|
105
|
+
Cc = lpeg.Cc,
|
|
106
|
+
Cmt = lpeg.Cmt,
|
|
107
|
+
Cb = lpeg.Cb,
|
|
108
|
+
Carg = lpeg.Carg,
|
|
109
|
+
Cp = lpeg.Cp,
|
|
110
|
+
Cs = lpeg.Cs,
|
|
111
|
+
Ct = lpeg.Ct,
|
|
112
|
+
Cf = lpeg.Cf,
|
|
113
|
+
Cg = lpeg.Cg,
|
|
114
|
+
P = lpeg.P,
|
|
115
|
+
S = lpeg.S,
|
|
116
|
+
R = lpeg.R,
|
|
117
|
+
locale = lpeg.locale,
|
|
118
|
+
version = lpeg.version,
|
|
119
|
+
setmaxstack = lpeg.setmaxstack,
|
|
120
|
+
type = lpeg.type,
|
|
121
|
+
},
|
|
122
|
+
-- pre built lexer library
|
|
123
|
+
-- the call to load here will both load the code
|
|
124
|
+
-- and compile the LPeg grammar
|
|
125
|
+
lexers = {
|
|
126
|
+
lexer = require('lexers/lexer'),
|
|
127
|
+
bash = require('lexers/lexer').load('bash'), -- bash
|
|
128
|
+
bash_dqstr = require('lexers/lexer').load('bash_dqstr'), -- bash strings
|
|
129
|
+
markers = require('lexers/lexer').load('markers'),
|
|
130
|
+
html = require('lexers/lexer').load('html'),
|
|
131
|
+
javascript = require('lexers/lexer').load('javascript'),
|
|
132
|
+
css = require('lexers/lexer').load('css'),
|
|
133
|
+
},
|
|
134
|
+
-- Immunio vars
|
|
135
|
+
serverdata = {}, -- Default empty serverdata
|
|
136
|
+
agentdata = {},
|
|
137
|
+
utils = {}, -- Used to store utility functions declared in the sandbox.
|
|
138
|
+
-- pass mode flags into the VM
|
|
139
|
+
DEV_MODE = DEV_MODE,
|
|
140
|
+
DEBUG_MODE = DEBUG_MODE,
|
|
141
|
+
LUA_PLATFORM = LUA_PLATFORM or 'unix',
|
|
142
|
+
IMMUNIO_KEY = IMMUNIO_KEY,
|
|
143
|
+
IMMUNIO_SECRET = IMMUNIO_SECRET
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
-- Enable a few more things in dev mode. For debugging.
|
|
147
|
+
if DEBUG_MODE or DEV_MODE then
|
|
148
|
+
SANDBOX_ENV.print = print
|
|
149
|
+
SANDBOX_ENV.snapshot = snapshot
|
|
150
|
+
else
|
|
151
|
+
SANDBOX_ENV.print = function(...) end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
-- Perform a VM call a method of a lua pseudo-object
|
|
156
|
+
function sandboxed_method_call(method, object, vars)
|
|
157
|
+
if DEBUG_MODE then
|
|
158
|
+
SANDBOX_ENV.utils.debug_prefix = "UNKNOWN"
|
|
159
|
+
-- Change the values here to toggle debugging per module.
|
|
160
|
+
SANDBOX_ENV.utils.debug_module_prefixes = {
|
|
161
|
+
UNKNOWN = true,
|
|
162
|
+
IO = true,
|
|
163
|
+
SQLi = true,
|
|
164
|
+
ExceptionHandler = true,
|
|
165
|
+
Redirect = true,
|
|
166
|
+
XSS = true,
|
|
167
|
+
Eval = true,
|
|
168
|
+
}
|
|
169
|
+
end
|
|
170
|
+
-- Merges the vars and the default sandbox env.
|
|
171
|
+
-- The vars can override the sandbox environment.
|
|
172
|
+
-- The table is copied to keep data from leaking
|
|
173
|
+
-- out of the functions.
|
|
174
|
+
local merged_vars = {}
|
|
175
|
+
merged_vars._G = merged_vars
|
|
176
|
+
for k, v in pairs(SANDBOX_ENV) do
|
|
177
|
+
merged_vars[k] = v
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
if vars then
|
|
181
|
+
for k, v in pairs(vars) do
|
|
182
|
+
merged_vars[k] = v
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
-- XXX Open sandbox in DEBUG_MODE
|
|
187
|
+
if DEBUG_MODE then
|
|
188
|
+
merged_vars['__REAL_G'] = _G
|
|
189
|
+
end
|
|
190
|
+
-- Sets the environment of the function.
|
|
191
|
+
setfenv(method, merged_vars)
|
|
192
|
+
-- Call it!
|
|
193
|
+
local rval = nil
|
|
194
|
+
if object then
|
|
195
|
+
rval = method(object)
|
|
196
|
+
else
|
|
197
|
+
rval = method()
|
|
198
|
+
end
|
|
199
|
+
-- Hint the lua VM GC that the references held to values in merged_vars don't
|
|
200
|
+
-- count anymore. If we omit this line the function environment is held onto
|
|
201
|
+
-- by the GC and we leak the universe... --ol
|
|
202
|
+
setmetatable( merged_vars, {__mode = "v"} )
|
|
203
|
+
-- Remove merged_vars from function environment so it can be collected sooner
|
|
204
|
+
setfenv(method, _G)
|
|
205
|
+
return rval
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
-- Function called by the VM to call and sandbox a function.
|
|
209
|
+
function sandboxed_call(func, vars)
|
|
210
|
+
return sandboxed_method_call(func, nil, vars)
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
if DEBUG_MODE then
|
|
214
|
+
-- Memory Snapshot Debugger
|
|
215
|
+
local saved_snapshot = {}
|
|
216
|
+
function dump_snapshot( label )
|
|
217
|
+
collectgarbage()
|
|
218
|
+
collectgarbage()
|
|
219
|
+
saved_snapshot = snapshot.snapshot()
|
|
220
|
+
print("------------------------\nSNAPSHOT:\n")
|
|
221
|
+
if label then print(label) end
|
|
222
|
+
for k,v in pairs(saved_snapshot) do
|
|
223
|
+
print( "ALLOCATION:" .. tostring(k):gsub("userdata:", "") .. " " .. v)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
function update_snapshot()
|
|
228
|
+
collectgarbage()
|
|
229
|
+
collectgarbage()
|
|
230
|
+
saved_snapshot = snapshot.snapshot()
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
function diff_snapshot( update )
|
|
234
|
+
collectgarbage()
|
|
235
|
+
collectgarbage()
|
|
236
|
+
local S = snapshot.snapshot()
|
|
237
|
+
output = ("------------------------\nDIFF SNAPSHOT:\n")
|
|
238
|
+
for k,v in pairs(S) do
|
|
239
|
+
if saved_snapshot[k] == nil then
|
|
240
|
+
output = output .. "ALLOCATION:" .. tostring(k):gsub("userdata:", "") .. " " .. v .. "\n"
|
|
241
|
+
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
if update then saved_snapshot = S end
|
|
245
|
+
return output
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
-- Uncomment for snapshot tracing
|
|
249
|
+
--snapshot.tron()
|
|
250
|
+
-- Uncomment to generate a snapshot at boot.
|
|
251
|
+
--dump_snapshot('BOOT')
|
|
252
|
+
--snapshot.troff()
|
|
253
|
+
end
|
|
254
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2007-2015 Mitchell
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com.
|
|
2
|
+
-- Copyright 2015 Immunio, Inc.
|
|
3
|
+
|
|
4
|
+
-- Shell LPeg lexer.
|
|
5
|
+
|
|
6
|
+
-- This is based on the lexer from the Scintillua package, with a ot of extension
|
|
7
|
+
-- The goal isn't a complete parser for bash, but a lexer that can extract a useful
|
|
8
|
+
-- amount of structure to detect tampering. The emphasis is more on common injection
|
|
9
|
+
-- techniques and lexical structure than actually extracting properly formed bash
|
|
10
|
+
-- statements. Down the road we may need to go as far as to parse statements, and that
|
|
11
|
+
-- should be possible at the cost of a lot more complexity.
|
|
12
|
+
|
|
13
|
+
local l = require('lexer')
|
|
14
|
+
local token = l.token
|
|
15
|
+
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
|
16
|
+
|
|
17
|
+
local M = {_NAME = 'bash'}
|
|
18
|
+
|
|
19
|
+
-- Whitespace.
|
|
20
|
+
local ws = token(l.WHITESPACE, l.space^1)
|
|
21
|
+
|
|
22
|
+
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ' + '.')^0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
-- Comments.
|
|
26
|
+
local comment = token(l.COMMENT, '#' * l.nonnewline^0)
|
|
27
|
+
|
|
28
|
+
-- Strings.
|
|
29
|
+
local sq_str = token('sq_str', l.delimited_range("'", false, true))
|
|
30
|
+
local dq_str = token('dq_str', l.delimited_range('"'))
|
|
31
|
+
local ex_str = token('ex_str', l.delimited_range('`'))
|
|
32
|
+
local heredoc = token('heredoc', '<<' * P(function(input, index)
|
|
33
|
+
local s, e, _, delimiter =
|
|
34
|
+
input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
|
|
35
|
+
if s == index and delimiter then
|
|
36
|
+
local _, e = input:find('[\n\r\f]+'..delimiter, e)
|
|
37
|
+
return e and e + 1 or #input + 1
|
|
38
|
+
end
|
|
39
|
+
end))
|
|
40
|
+
local bash_string = sq_str + dq_str + ex_str + heredoc
|
|
41
|
+
|
|
42
|
+
-- Numbers.
|
|
43
|
+
local number = token(l.NUMBER, l.float + l.integer)
|
|
44
|
+
|
|
45
|
+
-- Keywords.
|
|
46
|
+
local keyword = token(l.KEYWORD, l.word_match({
|
|
47
|
+
'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
|
|
48
|
+
'do', 'done', 'continue', 'local', 'return', 'select',
|
|
49
|
+
-- Operators. These could be split into individual tokens...
|
|
50
|
+
'-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
|
|
51
|
+
'-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
|
|
52
|
+
'-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
|
|
53
|
+
}, '-'))
|
|
54
|
+
|
|
55
|
+
-- Common commands ... this is not exhaustive nor does it need to be.
|
|
56
|
+
local command = token("command", l.word_match({
|
|
57
|
+
'awk', 'cat', 'cmp', 'cp', 'curl', 'cut', 'date', 'find', 'grep', 'gunzip', 'gvim',
|
|
58
|
+
'gzip', 'kill', 'lua', 'make', 'mkdir', 'mv', 'php', 'pkill', 'python', 'rm',
|
|
59
|
+
'rmdir', 'rsync', 'ruby', 'scp', 'sed', 'sleep', 'ssh', 'sudo', 'tar', 'unlink',
|
|
60
|
+
'wget', 'zip'
|
|
61
|
+
}, '-'))
|
|
62
|
+
|
|
63
|
+
-- Builtins
|
|
64
|
+
local builtin = token("builtin", l.word_match({
|
|
65
|
+
'alias', 'bind', 'builtin', 'caller', 'command', 'declare', 'echo', 'enable',
|
|
66
|
+
'help', 'let', 'local', 'logout', 'mapfile', 'printf', 'read', 'readarray',
|
|
67
|
+
'source', 'type', 'typeset', 'ulimit', 'unalias',
|
|
68
|
+
}, '-'))
|
|
69
|
+
|
|
70
|
+
-- Filenames. This is a bit sloppy, but tries to discern filenames from other identifiers
|
|
71
|
+
-- Very much a case of R&D 'suck it and see'
|
|
72
|
+
local filename = token("filename", P('/')^0 * (bash_word + '.') * (
|
|
73
|
+
'/' + bash_word + '.' )^0 * ('.' * bash_word )^0 )
|
|
74
|
+
|
|
75
|
+
local ip = (l.integer * P('.') * l.integer * P('.') * l.integer * P('.') * l.integer)
|
|
76
|
+
|
|
77
|
+
local protocol = ((P('https') + 'http' + 'ftp' + 'irc') * '://') + 'mailto:'
|
|
78
|
+
local remainder = ((1-S'\r\n\f\t\v ,."}])') + (S',."}])' * (1-S'\r\n\f\t\v ')))^0
|
|
79
|
+
local url = protocol * remainder
|
|
80
|
+
|
|
81
|
+
-- Identifiers.
|
|
82
|
+
local identifier = token(l.IDENTIFIER, url + ip + bash_word)
|
|
83
|
+
|
|
84
|
+
-- Variables.
|
|
85
|
+
local ex_variable = token("ex_variable",
|
|
86
|
+
'$' * l.delimited_range('()', true, true))
|
|
87
|
+
|
|
88
|
+
local variable = token(l.VARIABLE,
|
|
89
|
+
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
|
90
|
+
l.delimited_range('{}', true, true)))
|
|
91
|
+
|
|
92
|
+
local var = ex_variable + variable
|
|
93
|
+
|
|
94
|
+
-- Operators. These could be split into individual tokens...
|
|
95
|
+
local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
|
|
96
|
+
|
|
97
|
+
M._rules = {
|
|
98
|
+
{'whitespace', ws},
|
|
99
|
+
{'keyword', keyword},
|
|
100
|
+
{'builtin', builtin},
|
|
101
|
+
{'command', command},
|
|
102
|
+
{'identifier', identifier},
|
|
103
|
+
{'filename', filename},
|
|
104
|
+
{'string', bash_string},
|
|
105
|
+
{'comment', comment},
|
|
106
|
+
{'number', number},
|
|
107
|
+
{'variable', var},
|
|
108
|
+
{'operator', operator},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
-- This is the main function for lexing bash data. It recurses and uses
|
|
112
|
+
-- the dqstr sub-lexer instance provided (we don't instantiate it directly
|
|
113
|
+
-- to allow the caller to cache the instance and avoid recompiling the grammar)
|
|
114
|
+
function M.lex_recursive( self, str, bash_dqstr_lexer )
|
|
115
|
+
local tokens = self:lex(str)
|
|
116
|
+
for i = 1, #tokens do
|
|
117
|
+
if tokens[i]['token'] == "ex_str" then
|
|
118
|
+
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 2, -2), bash_dqstr_lexer)
|
|
119
|
+
elseif tokens[i]['token'] == "ex_variable" then
|
|
120
|
+
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 3, -2), bash_dqstr_lexer)
|
|
121
|
+
elseif tokens[i]['token'] == "dq_str" then
|
|
122
|
+
tokens[i]['val'] =
|
|
123
|
+
bash_dqstr_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
|
124
|
+
elseif tokens[i]['token'] == "heredoc" then
|
|
125
|
+
tokens[i]['val'] =
|
|
126
|
+
bash_dqstr_lexer:lex_recursive(tokens[i]['val'], self)
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
return tokens
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
return M
|
|
133
|
+
|
|
134
|
+
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
-- Copyright (C) 2015 Immunio, Inc.
|
|
2
|
+
|
|
3
|
+
-- Lexer for bash magic double quotes
|
|
4
|
+
|
|
5
|
+
-- NOTE: not covered by Scintillua MIT license in this directory.
|
|
6
|
+
|
|
7
|
+
-- While our lexer has the ability to embed this sort of thing as a child of another lexer
|
|
8
|
+
-- I didn't bother here due to the recursion; we need to lex the parent (bash) language
|
|
9
|
+
-- for some tokens which would be very complex at best. It's cleaner to use two lexers
|
|
10
|
+
-- and handle the recursion in higher level lua at a minute performance cost.
|
|
11
|
+
|
|
12
|
+
local l = require('lexer')
|
|
13
|
+
local token, word_match = l.token, bash_word_match
|
|
14
|
+
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
|
15
|
+
|
|
16
|
+
local M = {_NAME = 'bash_dqstr'}
|
|
17
|
+
|
|
18
|
+
-- Whitespace.
|
|
19
|
+
local ws = token(l.WHITESPACE, l.space^1)
|
|
20
|
+
|
|
21
|
+
-- Generic token.
|
|
22
|
+
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ')^0
|
|
23
|
+
|
|
24
|
+
-- Strings.
|
|
25
|
+
-- Shell substitution.
|
|
26
|
+
local ex_str = token('ex_str', l.delimited_range('`'))
|
|
27
|
+
|
|
28
|
+
-- Other string data
|
|
29
|
+
local bash_string = token('str_data', (l.any - '$' - '`')^1)
|
|
30
|
+
|
|
31
|
+
-- Variables.
|
|
32
|
+
-- Shell Substitution.
|
|
33
|
+
local ex_variable = token("ex_variable",
|
|
34
|
+
'$' * l.delimited_range('()', true, true))
|
|
35
|
+
-- Other variables
|
|
36
|
+
local variable = token(l.VARIABLE,
|
|
37
|
+
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
|
38
|
+
l.delimited_range('{}', true, true)))
|
|
39
|
+
|
|
40
|
+
local var = ex_variable + variable
|
|
41
|
+
|
|
42
|
+
M._rules = {
|
|
43
|
+
{'variable', var},
|
|
44
|
+
{'ex_str', ex_str},
|
|
45
|
+
{'string', bash_string},
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function M.lex_recursive( self, str, bash_lexer )
|
|
49
|
+
local tokens = self:lex(str)
|
|
50
|
+
for i = 1, #tokens do
|
|
51
|
+
if tokens[i]['token'] == "ex_str" then
|
|
52
|
+
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
|
53
|
+
elseif tokens[i]['token'] == "ex_variable" then
|
|
54
|
+
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 3, -2), self)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
return tokens
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
return M
|
|
61
|
+
|
|
62
|
+
|