immunio 0.15.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +234 -0
- data/README.md +147 -0
- data/bin/immunio +5 -0
- data/lib/immunio.rb +29 -0
- data/lib/immunio/agent.rb +260 -0
- data/lib/immunio/authentication.rb +96 -0
- data/lib/immunio/blocked_app.rb +38 -0
- data/lib/immunio/channel.rb +432 -0
- data/lib/immunio/cli.rb +39 -0
- data/lib/immunio/context.rb +114 -0
- data/lib/immunio/errors.rb +43 -0
- data/lib/immunio/immunio_ca.crt +45 -0
- data/lib/immunio/logger.rb +87 -0
- data/lib/immunio/plugins/action_dispatch.rb +45 -0
- data/lib/immunio/plugins/action_view.rb +431 -0
- data/lib/immunio/plugins/active_record.rb +707 -0
- data/lib/immunio/plugins/active_record_relation.rb +370 -0
- data/lib/immunio/plugins/authlogic.rb +80 -0
- data/lib/immunio/plugins/csrf.rb +24 -0
- data/lib/immunio/plugins/devise.rb +40 -0
- data/lib/immunio/plugins/environment_reporter.rb +69 -0
- data/lib/immunio/plugins/eval.rb +51 -0
- data/lib/immunio/plugins/exception_handler.rb +55 -0
- data/lib/immunio/plugins/gems_tracker.rb +5 -0
- data/lib/immunio/plugins/haml.rb +36 -0
- data/lib/immunio/plugins/http_finisher.rb +50 -0
- data/lib/immunio/plugins/http_tracker.rb +203 -0
- data/lib/immunio/plugins/io.rb +96 -0
- data/lib/immunio/plugins/redirect.rb +42 -0
- data/lib/immunio/plugins/warden.rb +66 -0
- data/lib/immunio/processor.rb +234 -0
- data/lib/immunio/rails.rb +26 -0
- data/lib/immunio/request.rb +139 -0
- data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
- data/lib/immunio/rufus_lua_ext/state.rb +157 -0
- data/lib/immunio/rufus_lua_ext/table.rb +137 -0
- data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
- data/lib/immunio/version.rb +5 -0
- data/lib/immunio/vm.rb +291 -0
- data/lua-hooks/ext/all.c +78 -0
- data/lua-hooks/ext/bitop/README +22 -0
- data/lua-hooks/ext/bitop/bit.c +189 -0
- data/lua-hooks/ext/extconf.rb +38 -0
- data/lua-hooks/ext/libinjection/COPYING +37 -0
- data/lua-hooks/ext/libinjection/libinjection.h +65 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
- data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
- data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
- data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
- data/lua-hooks/ext/libinjection/lualib.c +109 -0
- data/lua-hooks/ext/lpeg/HISTORY +90 -0
- data/lua-hooks/ext/lpeg/lpcap.c +537 -0
- data/lua-hooks/ext/lpeg/lpcap.h +43 -0
- data/lua-hooks/ext/lpeg/lpcode.c +986 -0
- data/lua-hooks/ext/lpeg/lpcode.h +34 -0
- data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
- data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
- data/lua-hooks/ext/lpeg/lpprint.c +244 -0
- data/lua-hooks/ext/lpeg/lpprint.h +35 -0
- data/lua-hooks/ext/lpeg/lptree.c +1238 -0
- data/lua-hooks/ext/lpeg/lptree.h +77 -0
- data/lua-hooks/ext/lpeg/lptypes.h +149 -0
- data/lua-hooks/ext/lpeg/lpvm.c +355 -0
- data/lua-hooks/ext/lpeg/lpvm.h +58 -0
- data/lua-hooks/ext/lpeg/makefile +55 -0
- data/lua-hooks/ext/lpeg/re.html +498 -0
- data/lua-hooks/ext/lpeg/test.lua +1409 -0
- data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
- data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
- data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
- data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
- data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
- data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
- data/lua-hooks/ext/lua-snapshot/README.md +18 -0
- data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
- data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
- data/lua-hooks/ext/lua/COPYRIGHT +34 -0
- data/lua-hooks/ext/lua/lapi.c +1087 -0
- data/lua-hooks/ext/lua/lapi.h +16 -0
- data/lua-hooks/ext/lua/lauxlib.c +652 -0
- data/lua-hooks/ext/lua/lauxlib.h +174 -0
- data/lua-hooks/ext/lua/lbaselib.c +659 -0
- data/lua-hooks/ext/lua/lcode.c +831 -0
- data/lua-hooks/ext/lua/lcode.h +76 -0
- data/lua-hooks/ext/lua/ldblib.c +398 -0
- data/lua-hooks/ext/lua/ldebug.c +638 -0
- data/lua-hooks/ext/lua/ldebug.h +33 -0
- data/lua-hooks/ext/lua/ldo.c +519 -0
- data/lua-hooks/ext/lua/ldo.h +57 -0
- data/lua-hooks/ext/lua/ldump.c +164 -0
- data/lua-hooks/ext/lua/lfunc.c +174 -0
- data/lua-hooks/ext/lua/lfunc.h +34 -0
- data/lua-hooks/ext/lua/lgc.c +710 -0
- data/lua-hooks/ext/lua/lgc.h +110 -0
- data/lua-hooks/ext/lua/linit.c +38 -0
- data/lua-hooks/ext/lua/liolib.c +556 -0
- data/lua-hooks/ext/lua/llex.c +463 -0
- data/lua-hooks/ext/lua/llex.h +81 -0
- data/lua-hooks/ext/lua/llimits.h +128 -0
- data/lua-hooks/ext/lua/lmathlib.c +263 -0
- data/lua-hooks/ext/lua/lmem.c +86 -0
- data/lua-hooks/ext/lua/lmem.h +49 -0
- data/lua-hooks/ext/lua/loadlib.c +705 -0
- data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
- data/lua-hooks/ext/lua/lobject.c +214 -0
- data/lua-hooks/ext/lua/lobject.h +381 -0
- data/lua-hooks/ext/lua/lopcodes.c +102 -0
- data/lua-hooks/ext/lua/lopcodes.h +268 -0
- data/lua-hooks/ext/lua/loslib.c +243 -0
- data/lua-hooks/ext/lua/lparser.c +1339 -0
- data/lua-hooks/ext/lua/lparser.h +82 -0
- data/lua-hooks/ext/lua/lstate.c +214 -0
- data/lua-hooks/ext/lua/lstate.h +169 -0
- data/lua-hooks/ext/lua/lstring.c +111 -0
- data/lua-hooks/ext/lua/lstring.h +31 -0
- data/lua-hooks/ext/lua/lstrlib.c +871 -0
- data/lua-hooks/ext/lua/ltable.c +588 -0
- data/lua-hooks/ext/lua/ltable.h +40 -0
- data/lua-hooks/ext/lua/ltablib.c +287 -0
- data/lua-hooks/ext/lua/ltm.c +75 -0
- data/lua-hooks/ext/lua/ltm.h +54 -0
- data/lua-hooks/ext/lua/lua.c +392 -0
- data/lua-hooks/ext/lua/lua.def +131 -0
- data/lua-hooks/ext/lua/lua.h +388 -0
- data/lua-hooks/ext/lua/lua.rc +28 -0
- data/lua-hooks/ext/lua/lua_dll.rc +26 -0
- data/lua-hooks/ext/lua/luac.c +200 -0
- data/lua-hooks/ext/lua/luac.rc +1 -0
- data/lua-hooks/ext/lua/luaconf.h +763 -0
- data/lua-hooks/ext/lua/luaconf.h.in +724 -0
- data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
- data/lua-hooks/ext/lua/lualib.h +53 -0
- data/lua-hooks/ext/lua/lundump.c +227 -0
- data/lua-hooks/ext/lua/lundump.h +36 -0
- data/lua-hooks/ext/lua/lvm.c +767 -0
- data/lua-hooks/ext/lua/lvm.h +36 -0
- data/lua-hooks/ext/lua/lzio.c +82 -0
- data/lua-hooks/ext/lua/lzio.h +67 -0
- data/lua-hooks/ext/lua/print.c +227 -0
- data/lua-hooks/ext/luautf8/README.md +152 -0
- data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
- data/lua-hooks/ext/luautf8/unidata.h +3064 -0
- data/lua-hooks/lib/boot.lua +254 -0
- data/lua-hooks/lib/encode.lua +4 -0
- data/lua-hooks/lib/lexers/LICENSE +21 -0
- data/lua-hooks/lib/lexers/bash.lua +134 -0
- data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
- data/lua-hooks/lib/lexers/css.lua +216 -0
- data/lua-hooks/lib/lexers/html.lua +106 -0
- data/lua-hooks/lib/lexers/javascript.lua +68 -0
- data/lua-hooks/lib/lexers/lexer.lua +1575 -0
- data/lua-hooks/lib/lexers/markers.lua +33 -0
- metadata +308 -0
@@ -0,0 +1,254 @@
|
|
1
|
+
-- This file is executed when the Lua VM boots.
|
2
|
+
require 'encode'
|
3
|
+
|
4
|
+
-- This is required to make lexers load from test harness.
|
5
|
+
-- In VM the path is handled for us by vm.rb --ol
|
6
|
+
lexer_path='lib/lexers/?.lua'
|
7
|
+
package.path = package.path..';'..lexer_path
|
8
|
+
|
9
|
+
-- Define the environment available to code executing in the VM.
|
10
|
+
-- All available functions must be declared here.
|
11
|
+
-- Make sure the function is safe before adding it here.
|
12
|
+
-- See http://lua-users.org/wiki/SandBoxes
|
13
|
+
SANDBOX_ENV = {
|
14
|
+
-- Lua libs
|
15
|
+
ipairs = ipairs,
|
16
|
+
next = next,
|
17
|
+
pairs = pairs,
|
18
|
+
pcall = pcall,
|
19
|
+
tonumber = tonumber,
|
20
|
+
tostring = tostring,
|
21
|
+
type = type,
|
22
|
+
unpack = unpack,
|
23
|
+
assert = assert,
|
24
|
+
error = error,
|
25
|
+
getmetatable = getmetatable,
|
26
|
+
setmetatable = setmetatable,
|
27
|
+
rawget = rawget,
|
28
|
+
rawset = rawset,
|
29
|
+
collectgarbage = collectgarbage,
|
30
|
+
math = math,
|
31
|
+
string = string,
|
32
|
+
bit = {
|
33
|
+
band = bit.band,
|
34
|
+
extract = bit.extract,
|
35
|
+
bor = bit.bor,
|
36
|
+
bnot = bit.bnot,
|
37
|
+
arshift = bit.arshift,
|
38
|
+
rshift = bit.rshift,
|
39
|
+
rrotate = bit.rrotate,
|
40
|
+
replace = bit.replace,
|
41
|
+
lshift = bit.lshift,
|
42
|
+
lrotate = bit.lrotate,
|
43
|
+
btest = bit.btest,
|
44
|
+
bxor = bit.bxor
|
45
|
+
},
|
46
|
+
coroutine = {
|
47
|
+
create = coroutine.create,
|
48
|
+
resume = coroutine.resume,
|
49
|
+
running = coroutine.running,
|
50
|
+
status = coroutine.status,
|
51
|
+
wrap = coroutine.wrap,
|
52
|
+
yield = coroutine.yield,
|
53
|
+
},
|
54
|
+
debug = {
|
55
|
+
-- Block most debug in sandbox, but allow tracebacks
|
56
|
+
traceback = debug.traceback
|
57
|
+
},
|
58
|
+
select = select,
|
59
|
+
utf8 = {
|
60
|
+
byte = utf8.byte,
|
61
|
+
char = utf8.char,
|
62
|
+
find = utf8.find,
|
63
|
+
format = utf8.format,
|
64
|
+
gmatch = utf8.gmatch,
|
65
|
+
gsub = utf8.gsub,
|
66
|
+
len = utf8.len,
|
67
|
+
lower = utf8.lower,
|
68
|
+
match = utf8.match,
|
69
|
+
rep = utf8.rep,
|
70
|
+
reverse = utf8.reverse,
|
71
|
+
sub = utf8.sub,
|
72
|
+
upper = utf8.upper,
|
73
|
+
split = utf8.split,
|
74
|
+
escape = utf8.escape,
|
75
|
+
charpos = utf8.charpos,
|
76
|
+
insert = utf8.insert,
|
77
|
+
remove = utf8.remove,
|
78
|
+
next = utf8.next,
|
79
|
+
ncasecmp = utf8.ncasecmp,
|
80
|
+
},
|
81
|
+
table = {
|
82
|
+
insert = table.insert,
|
83
|
+
maxn = table.maxn,
|
84
|
+
remove = table.remove,
|
85
|
+
sort = table.sort,
|
86
|
+
map = table.map,
|
87
|
+
reduce = table.reduce,
|
88
|
+
length = table.length,
|
89
|
+
concat = table.concat,
|
90
|
+
},
|
91
|
+
libinjection = {
|
92
|
+
sqli = libinjection.sqli,
|
93
|
+
fingerprint = libinjection.fingerprint,
|
94
|
+
xss = libinjection.xss,
|
95
|
+
sqli_tokenize = libinjection.sqli_tokenize
|
96
|
+
},
|
97
|
+
-- LPeg Library
|
98
|
+
lpeg = {
|
99
|
+
ptree = lpeg.ptree,
|
100
|
+
pcode = lpeg.pcode,
|
101
|
+
match = lpeg.match,
|
102
|
+
B = lpeg.B,
|
103
|
+
V = lpeg.V,
|
104
|
+
C = lpeg.C,
|
105
|
+
Cc = lpeg.Cc,
|
106
|
+
Cmt = lpeg.Cmt,
|
107
|
+
Cb = lpeg.Cb,
|
108
|
+
Carg = lpeg.Carg,
|
109
|
+
Cp = lpeg.Cp,
|
110
|
+
Cs = lpeg.Cs,
|
111
|
+
Ct = lpeg.Ct,
|
112
|
+
Cf = lpeg.Cf,
|
113
|
+
Cg = lpeg.Cg,
|
114
|
+
P = lpeg.P,
|
115
|
+
S = lpeg.S,
|
116
|
+
R = lpeg.R,
|
117
|
+
locale = lpeg.locale,
|
118
|
+
version = lpeg.version,
|
119
|
+
setmaxstack = lpeg.setmaxstack,
|
120
|
+
type = lpeg.type,
|
121
|
+
},
|
122
|
+
-- pre built lexer library
|
123
|
+
-- the call to load here will both load the code
|
124
|
+
-- and compile the LPeg grammar
|
125
|
+
lexers = {
|
126
|
+
lexer = require('lexers/lexer'),
|
127
|
+
bash = require('lexers/lexer').load('bash'), -- bash
|
128
|
+
bash_dqstr = require('lexers/lexer').load('bash_dqstr'), -- bash strings
|
129
|
+
markers = require('lexers/lexer').load('markers'),
|
130
|
+
html = require('lexers/lexer').load('html'),
|
131
|
+
javascript = require('lexers/lexer').load('javascript'),
|
132
|
+
css = require('lexers/lexer').load('css'),
|
133
|
+
},
|
134
|
+
-- Immunio vars
|
135
|
+
serverdata = {}, -- Default empty serverdata
|
136
|
+
agentdata = {},
|
137
|
+
utils = {}, -- Used to store utility functions declared in the sandbox.
|
138
|
+
-- pass mode flags into the VM
|
139
|
+
DEV_MODE = DEV_MODE,
|
140
|
+
DEBUG_MODE = DEBUG_MODE,
|
141
|
+
LUA_PLATFORM = LUA_PLATFORM or 'unix',
|
142
|
+
IMMUNIO_KEY = IMMUNIO_KEY,
|
143
|
+
IMMUNIO_SECRET = IMMUNIO_SECRET
|
144
|
+
}
|
145
|
+
|
146
|
+
-- Enable a few more things in dev mode. For debugging.
|
147
|
+
if DEBUG_MODE or DEV_MODE then
|
148
|
+
SANDBOX_ENV.print = print
|
149
|
+
SANDBOX_ENV.snapshot = snapshot
|
150
|
+
else
|
151
|
+
SANDBOX_ENV.print = function(...) end
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
-- Perform a VM call a method of a lua pseudo-object
|
156
|
+
function sandboxed_method_call(method, object, vars)
|
157
|
+
if DEBUG_MODE then
|
158
|
+
SANDBOX_ENV.utils.debug_prefix = "UNKNOWN"
|
159
|
+
-- Change the values here to toggle debugging per module.
|
160
|
+
SANDBOX_ENV.utils.debug_module_prefixes = {
|
161
|
+
UNKNOWN = true,
|
162
|
+
IO = true,
|
163
|
+
SQLi = true,
|
164
|
+
ExceptionHandler = true,
|
165
|
+
Redirect = true,
|
166
|
+
XSS = true,
|
167
|
+
Eval = true,
|
168
|
+
}
|
169
|
+
end
|
170
|
+
-- Merges the vars and the default sandbox env.
|
171
|
+
-- The vars can override the sandbox environment.
|
172
|
+
-- The table is copied to keep data from leaking
|
173
|
+
-- out of the functions.
|
174
|
+
local merged_vars = {}
|
175
|
+
merged_vars._G = merged_vars
|
176
|
+
for k, v in pairs(SANDBOX_ENV) do
|
177
|
+
merged_vars[k] = v
|
178
|
+
end
|
179
|
+
|
180
|
+
if vars then
|
181
|
+
for k, v in pairs(vars) do
|
182
|
+
merged_vars[k] = v
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
-- XXX Open sandbox in DEBUG_MODE
|
187
|
+
if DEBUG_MODE then
|
188
|
+
merged_vars['__REAL_G'] = _G
|
189
|
+
end
|
190
|
+
-- Sets the environment of the function.
|
191
|
+
setfenv(method, merged_vars)
|
192
|
+
-- Call it!
|
193
|
+
local rval = nil
|
194
|
+
if object then
|
195
|
+
rval = method(object)
|
196
|
+
else
|
197
|
+
rval = method()
|
198
|
+
end
|
199
|
+
-- Hint the lua VM GC that the references held to values in merged_vars don't
|
200
|
+
-- count anymore. If we omit this line the function environment is held onto
|
201
|
+
-- by the GC and we leak the universe... --ol
|
202
|
+
setmetatable( merged_vars, {__mode = "v"} )
|
203
|
+
-- Remove merged_vars from function environment so it can be collected sooner
|
204
|
+
setfenv(method, _G)
|
205
|
+
return rval
|
206
|
+
end
|
207
|
+
|
208
|
+
-- Function called by the VM to call and sandbox a function.
|
209
|
+
function sandboxed_call(func, vars)
|
210
|
+
return sandboxed_method_call(func, nil, vars)
|
211
|
+
end
|
212
|
+
|
213
|
+
if DEBUG_MODE then
|
214
|
+
-- Memory Snapshot Debugger
|
215
|
+
local saved_snapshot = {}
|
216
|
+
function dump_snapshot( label )
|
217
|
+
collectgarbage()
|
218
|
+
collectgarbage()
|
219
|
+
saved_snapshot = snapshot.snapshot()
|
220
|
+
print("------------------------\nSNAPSHOT:\n")
|
221
|
+
if label then print(label) end
|
222
|
+
for k,v in pairs(saved_snapshot) do
|
223
|
+
print( "ALLOCATION:" .. tostring(k):gsub("userdata:", "") .. " " .. v)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
function update_snapshot()
|
228
|
+
collectgarbage()
|
229
|
+
collectgarbage()
|
230
|
+
saved_snapshot = snapshot.snapshot()
|
231
|
+
end
|
232
|
+
|
233
|
+
function diff_snapshot( update )
|
234
|
+
collectgarbage()
|
235
|
+
collectgarbage()
|
236
|
+
local S = snapshot.snapshot()
|
237
|
+
output = ("------------------------\nDIFF SNAPSHOT:\n")
|
238
|
+
for k,v in pairs(S) do
|
239
|
+
if saved_snapshot[k] == nil then
|
240
|
+
output = output .. "ALLOCATION:" .. tostring(k):gsub("userdata:", "") .. " " .. v .. "\n"
|
241
|
+
|
242
|
+
end
|
243
|
+
end
|
244
|
+
if update then saved_snapshot = S end
|
245
|
+
return output
|
246
|
+
end
|
247
|
+
|
248
|
+
-- Uncomment for snapshot tracing
|
249
|
+
--snapshot.tron()
|
250
|
+
-- Uncomment to generate a snapshot at boot.
|
251
|
+
--dump_snapshot('BOOT')
|
252
|
+
--snapshot.troff()
|
253
|
+
end
|
254
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2007-2015 Mitchell
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
@@ -0,0 +1,134 @@
|
|
1
|
+
-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com.
|
2
|
+
-- Copyright 2015 Immunio, Inc.
|
3
|
+
|
4
|
+
-- Shell LPeg lexer.
|
5
|
+
|
6
|
+
-- This is based on the lexer from the Scintillua package, with a ot of extension
|
7
|
+
-- The goal isn't a complete parser for bash, but a lexer that can extract a useful
|
8
|
+
-- amount of structure to detect tampering. The emphasis is more on common injection
|
9
|
+
-- techniques and lexical structure than actually extracting properly formed bash
|
10
|
+
-- statements. Down the road we may need to go as far as to parse statements, and that
|
11
|
+
-- should be possible at the cost of a lot more complexity.
|
12
|
+
|
13
|
+
local l = require('lexer')
|
14
|
+
local token = l.token
|
15
|
+
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
16
|
+
|
17
|
+
local M = {_NAME = 'bash'}
|
18
|
+
|
19
|
+
-- Whitespace.
|
20
|
+
local ws = token(l.WHITESPACE, l.space^1)
|
21
|
+
|
22
|
+
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ' + '.')^0
|
23
|
+
|
24
|
+
|
25
|
+
-- Comments.
|
26
|
+
local comment = token(l.COMMENT, '#' * l.nonnewline^0)
|
27
|
+
|
28
|
+
-- Strings.
|
29
|
+
local sq_str = token('sq_str', l.delimited_range("'", false, true))
|
30
|
+
local dq_str = token('dq_str', l.delimited_range('"'))
|
31
|
+
local ex_str = token('ex_str', l.delimited_range('`'))
|
32
|
+
local heredoc = token('heredoc', '<<' * P(function(input, index)
|
33
|
+
local s, e, _, delimiter =
|
34
|
+
input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
|
35
|
+
if s == index and delimiter then
|
36
|
+
local _, e = input:find('[\n\r\f]+'..delimiter, e)
|
37
|
+
return e and e + 1 or #input + 1
|
38
|
+
end
|
39
|
+
end))
|
40
|
+
local bash_string = sq_str + dq_str + ex_str + heredoc
|
41
|
+
|
42
|
+
-- Numbers.
|
43
|
+
local number = token(l.NUMBER, l.float + l.integer)
|
44
|
+
|
45
|
+
-- Keywords.
|
46
|
+
local keyword = token(l.KEYWORD, l.word_match({
|
47
|
+
'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
|
48
|
+
'do', 'done', 'continue', 'local', 'return', 'select',
|
49
|
+
-- Operators. These could be split into individual tokens...
|
50
|
+
'-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
|
51
|
+
'-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
|
52
|
+
'-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
|
53
|
+
}, '-'))
|
54
|
+
|
55
|
+
-- Common commands ... this is not exhaustive nor does it need to be.
|
56
|
+
local command = token("command", l.word_match({
|
57
|
+
'awk', 'cat', 'cmp', 'cp', 'curl', 'cut', 'date', 'find', 'grep', 'gunzip', 'gvim',
|
58
|
+
'gzip', 'kill', 'lua', 'make', 'mkdir', 'mv', 'php', 'pkill', 'python', 'rm',
|
59
|
+
'rmdir', 'rsync', 'ruby', 'scp', 'sed', 'sleep', 'ssh', 'sudo', 'tar', 'unlink',
|
60
|
+
'wget', 'zip'
|
61
|
+
}, '-'))
|
62
|
+
|
63
|
+
-- Builtins
|
64
|
+
local builtin = token("builtin", l.word_match({
|
65
|
+
'alias', 'bind', 'builtin', 'caller', 'command', 'declare', 'echo', 'enable',
|
66
|
+
'help', 'let', 'local', 'logout', 'mapfile', 'printf', 'read', 'readarray',
|
67
|
+
'source', 'type', 'typeset', 'ulimit', 'unalias',
|
68
|
+
}, '-'))
|
69
|
+
|
70
|
+
-- Filenames. This is a bit sloppy, but tries to discern filenames from other identifiers
|
71
|
+
-- Very much a case of R&D 'suck it and see'
|
72
|
+
local filename = token("filename", P('/')^0 * (bash_word + '.') * (
|
73
|
+
'/' + bash_word + '.' )^0 * ('.' * bash_word )^0 )
|
74
|
+
|
75
|
+
local ip = (l.integer * P('.') * l.integer * P('.') * l.integer * P('.') * l.integer)
|
76
|
+
|
77
|
+
local protocol = ((P('https') + 'http' + 'ftp' + 'irc') * '://') + 'mailto:'
|
78
|
+
local remainder = ((1-S'\r\n\f\t\v ,."}])') + (S',."}])' * (1-S'\r\n\f\t\v ')))^0
|
79
|
+
local url = protocol * remainder
|
80
|
+
|
81
|
+
-- Identifiers.
|
82
|
+
local identifier = token(l.IDENTIFIER, url + ip + bash_word)
|
83
|
+
|
84
|
+
-- Variables.
|
85
|
+
local ex_variable = token("ex_variable",
|
86
|
+
'$' * l.delimited_range('()', true, true))
|
87
|
+
|
88
|
+
local variable = token(l.VARIABLE,
|
89
|
+
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
90
|
+
l.delimited_range('{}', true, true)))
|
91
|
+
|
92
|
+
local var = ex_variable + variable
|
93
|
+
|
94
|
+
-- Operators. These could be split into individual tokens...
|
95
|
+
local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
|
96
|
+
|
97
|
+
M._rules = {
|
98
|
+
{'whitespace', ws},
|
99
|
+
{'keyword', keyword},
|
100
|
+
{'builtin', builtin},
|
101
|
+
{'command', command},
|
102
|
+
{'identifier', identifier},
|
103
|
+
{'filename', filename},
|
104
|
+
{'string', bash_string},
|
105
|
+
{'comment', comment},
|
106
|
+
{'number', number},
|
107
|
+
{'variable', var},
|
108
|
+
{'operator', operator},
|
109
|
+
}
|
110
|
+
|
111
|
+
-- This is the main function for lexing bash data. It recurses and uses
|
112
|
+
-- the dqstr sub-lexer instance provided (we don't instantiate it directly
|
113
|
+
-- to allow the caller to cache the instance and avoid recompiling the grammar)
|
114
|
+
function M.lex_recursive( self, str, bash_dqstr_lexer )
|
115
|
+
local tokens = self:lex(str)
|
116
|
+
for i = 1, #tokens do
|
117
|
+
if tokens[i]['token'] == "ex_str" then
|
118
|
+
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 2, -2), bash_dqstr_lexer)
|
119
|
+
elseif tokens[i]['token'] == "ex_variable" then
|
120
|
+
tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 3, -2), bash_dqstr_lexer)
|
121
|
+
elseif tokens[i]['token'] == "dq_str" then
|
122
|
+
tokens[i]['val'] =
|
123
|
+
bash_dqstr_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
124
|
+
elseif tokens[i]['token'] == "heredoc" then
|
125
|
+
tokens[i]['val'] =
|
126
|
+
bash_dqstr_lexer:lex_recursive(tokens[i]['val'], self)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
return tokens
|
130
|
+
end
|
131
|
+
|
132
|
+
return M
|
133
|
+
|
134
|
+
|
@@ -0,0 +1,62 @@
|
|
1
|
+
-- Copyright (C) 2015 Immunio, Inc.
|
2
|
+
|
3
|
+
-- Lexer for bash magic double quotes
|
4
|
+
|
5
|
+
-- NOTE: not covered by Scintillua MIT license in this directory.
|
6
|
+
|
7
|
+
-- While our lexer has the ability to embed this sort of thing as a child of another lexer
|
8
|
+
-- I didn't bother here due to the recursion; we need to lex the parent (bash) language
|
9
|
+
-- for some tokens which would be very complex at best. It's cleaner to use two lexers
|
10
|
+
-- and handle the recursion in higher level lua at a minute performance cost.
|
11
|
+
|
12
|
+
local l = require('lexer')
|
13
|
+
local token, word_match = l.token, bash_word_match
|
14
|
+
local P, R, S = lpeg.P, lpeg.R, lpeg.S
|
15
|
+
|
16
|
+
local M = {_NAME = 'bash_dqstr'}
|
17
|
+
|
18
|
+
-- Whitespace.
|
19
|
+
local ws = token(l.WHITESPACE, l.space^1)
|
20
|
+
|
21
|
+
-- Generic token.
|
22
|
+
local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ')^0
|
23
|
+
|
24
|
+
-- Strings.
|
25
|
+
-- Shell substitution.
|
26
|
+
local ex_str = token('ex_str', l.delimited_range('`'))
|
27
|
+
|
28
|
+
-- Other string data
|
29
|
+
local bash_string = token('str_data', (l.any - '$' - '`')^1)
|
30
|
+
|
31
|
+
-- Variables.
|
32
|
+
-- Shell Substitution.
|
33
|
+
local ex_variable = token("ex_variable",
|
34
|
+
'$' * l.delimited_range('()', true, true))
|
35
|
+
-- Other variables
|
36
|
+
local variable = token(l.VARIABLE,
|
37
|
+
'$' * (S('!#?*@$') + l.digit^1 + bash_word +
|
38
|
+
l.delimited_range('{}', true, true)))
|
39
|
+
|
40
|
+
local var = ex_variable + variable
|
41
|
+
|
42
|
+
M._rules = {
|
43
|
+
{'variable', var},
|
44
|
+
{'ex_str', ex_str},
|
45
|
+
{'string', bash_string},
|
46
|
+
}
|
47
|
+
|
48
|
+
function M.lex_recursive( self, str, bash_lexer )
|
49
|
+
local tokens = self:lex(str)
|
50
|
+
for i = 1, #tokens do
|
51
|
+
if tokens[i]['token'] == "ex_str" then
|
52
|
+
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
|
53
|
+
elseif tokens[i]['token'] == "ex_variable" then
|
54
|
+
tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 3, -2), self)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
return tokens
|
58
|
+
end
|
59
|
+
|
60
|
+
return M
|
61
|
+
|
62
|
+
|