immunio 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +234 -0
  3. data/README.md +147 -0
  4. data/bin/immunio +5 -0
  5. data/lib/immunio.rb +29 -0
  6. data/lib/immunio/agent.rb +260 -0
  7. data/lib/immunio/authentication.rb +96 -0
  8. data/lib/immunio/blocked_app.rb +38 -0
  9. data/lib/immunio/channel.rb +432 -0
  10. data/lib/immunio/cli.rb +39 -0
  11. data/lib/immunio/context.rb +114 -0
  12. data/lib/immunio/errors.rb +43 -0
  13. data/lib/immunio/immunio_ca.crt +45 -0
  14. data/lib/immunio/logger.rb +87 -0
  15. data/lib/immunio/plugins/action_dispatch.rb +45 -0
  16. data/lib/immunio/plugins/action_view.rb +431 -0
  17. data/lib/immunio/plugins/active_record.rb +707 -0
  18. data/lib/immunio/plugins/active_record_relation.rb +370 -0
  19. data/lib/immunio/plugins/authlogic.rb +80 -0
  20. data/lib/immunio/plugins/csrf.rb +24 -0
  21. data/lib/immunio/plugins/devise.rb +40 -0
  22. data/lib/immunio/plugins/environment_reporter.rb +69 -0
  23. data/lib/immunio/plugins/eval.rb +51 -0
  24. data/lib/immunio/plugins/exception_handler.rb +55 -0
  25. data/lib/immunio/plugins/gems_tracker.rb +5 -0
  26. data/lib/immunio/plugins/haml.rb +36 -0
  27. data/lib/immunio/plugins/http_finisher.rb +50 -0
  28. data/lib/immunio/plugins/http_tracker.rb +203 -0
  29. data/lib/immunio/plugins/io.rb +96 -0
  30. data/lib/immunio/plugins/redirect.rb +42 -0
  31. data/lib/immunio/plugins/warden.rb +66 -0
  32. data/lib/immunio/processor.rb +234 -0
  33. data/lib/immunio/rails.rb +26 -0
  34. data/lib/immunio/request.rb +139 -0
  35. data/lib/immunio/rufus_lua_ext/ref.rb +27 -0
  36. data/lib/immunio/rufus_lua_ext/state.rb +157 -0
  37. data/lib/immunio/rufus_lua_ext/table.rb +137 -0
  38. data/lib/immunio/rufus_lua_ext/utils.rb +13 -0
  39. data/lib/immunio/version.rb +5 -0
  40. data/lib/immunio/vm.rb +291 -0
  41. data/lua-hooks/ext/all.c +78 -0
  42. data/lua-hooks/ext/bitop/README +22 -0
  43. data/lua-hooks/ext/bitop/bit.c +189 -0
  44. data/lua-hooks/ext/extconf.rb +38 -0
  45. data/lua-hooks/ext/libinjection/COPYING +37 -0
  46. data/lua-hooks/ext/libinjection/libinjection.h +65 -0
  47. data/lua-hooks/ext/libinjection/libinjection_html5.c +847 -0
  48. data/lua-hooks/ext/libinjection/libinjection_html5.h +54 -0
  49. data/lua-hooks/ext/libinjection/libinjection_sqli.c +2301 -0
  50. data/lua-hooks/ext/libinjection/libinjection_sqli.h +295 -0
  51. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +9349 -0
  52. data/lua-hooks/ext/libinjection/libinjection_xss.c +531 -0
  53. data/lua-hooks/ext/libinjection/libinjection_xss.h +21 -0
  54. data/lua-hooks/ext/libinjection/lualib.c +109 -0
  55. data/lua-hooks/ext/lpeg/HISTORY +90 -0
  56. data/lua-hooks/ext/lpeg/lpcap.c +537 -0
  57. data/lua-hooks/ext/lpeg/lpcap.h +43 -0
  58. data/lua-hooks/ext/lpeg/lpcode.c +986 -0
  59. data/lua-hooks/ext/lpeg/lpcode.h +34 -0
  60. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  61. data/lua-hooks/ext/lpeg/lpeg.html +1429 -0
  62. data/lua-hooks/ext/lpeg/lpprint.c +244 -0
  63. data/lua-hooks/ext/lpeg/lpprint.h +35 -0
  64. data/lua-hooks/ext/lpeg/lptree.c +1238 -0
  65. data/lua-hooks/ext/lpeg/lptree.h +77 -0
  66. data/lua-hooks/ext/lpeg/lptypes.h +149 -0
  67. data/lua-hooks/ext/lpeg/lpvm.c +355 -0
  68. data/lua-hooks/ext/lpeg/lpvm.h +58 -0
  69. data/lua-hooks/ext/lpeg/makefile +55 -0
  70. data/lua-hooks/ext/lpeg/re.html +498 -0
  71. data/lua-hooks/ext/lpeg/test.lua +1409 -0
  72. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +45 -0
  73. data/lua-hooks/ext/lua-cmsgpack/README.md +115 -0
  74. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +957 -0
  75. data/lua-hooks/ext/lua-cmsgpack/test.lua +570 -0
  76. data/lua-hooks/ext/lua-snapshot/LICENSE +7 -0
  77. data/lua-hooks/ext/lua-snapshot/Makefile +12 -0
  78. data/lua-hooks/ext/lua-snapshot/README.md +18 -0
  79. data/lua-hooks/ext/lua-snapshot/dump.lua +15 -0
  80. data/lua-hooks/ext/lua-snapshot/snapshot.c +455 -0
  81. data/lua-hooks/ext/lua/COPYRIGHT +34 -0
  82. data/lua-hooks/ext/lua/lapi.c +1087 -0
  83. data/lua-hooks/ext/lua/lapi.h +16 -0
  84. data/lua-hooks/ext/lua/lauxlib.c +652 -0
  85. data/lua-hooks/ext/lua/lauxlib.h +174 -0
  86. data/lua-hooks/ext/lua/lbaselib.c +659 -0
  87. data/lua-hooks/ext/lua/lcode.c +831 -0
  88. data/lua-hooks/ext/lua/lcode.h +76 -0
  89. data/lua-hooks/ext/lua/ldblib.c +398 -0
  90. data/lua-hooks/ext/lua/ldebug.c +638 -0
  91. data/lua-hooks/ext/lua/ldebug.h +33 -0
  92. data/lua-hooks/ext/lua/ldo.c +519 -0
  93. data/lua-hooks/ext/lua/ldo.h +57 -0
  94. data/lua-hooks/ext/lua/ldump.c +164 -0
  95. data/lua-hooks/ext/lua/lfunc.c +174 -0
  96. data/lua-hooks/ext/lua/lfunc.h +34 -0
  97. data/lua-hooks/ext/lua/lgc.c +710 -0
  98. data/lua-hooks/ext/lua/lgc.h +110 -0
  99. data/lua-hooks/ext/lua/linit.c +38 -0
  100. data/lua-hooks/ext/lua/liolib.c +556 -0
  101. data/lua-hooks/ext/lua/llex.c +463 -0
  102. data/lua-hooks/ext/lua/llex.h +81 -0
  103. data/lua-hooks/ext/lua/llimits.h +128 -0
  104. data/lua-hooks/ext/lua/lmathlib.c +263 -0
  105. data/lua-hooks/ext/lua/lmem.c +86 -0
  106. data/lua-hooks/ext/lua/lmem.h +49 -0
  107. data/lua-hooks/ext/lua/loadlib.c +705 -0
  108. data/lua-hooks/ext/lua/loadlib_rel.c +760 -0
  109. data/lua-hooks/ext/lua/lobject.c +214 -0
  110. data/lua-hooks/ext/lua/lobject.h +381 -0
  111. data/lua-hooks/ext/lua/lopcodes.c +102 -0
  112. data/lua-hooks/ext/lua/lopcodes.h +268 -0
  113. data/lua-hooks/ext/lua/loslib.c +243 -0
  114. data/lua-hooks/ext/lua/lparser.c +1339 -0
  115. data/lua-hooks/ext/lua/lparser.h +82 -0
  116. data/lua-hooks/ext/lua/lstate.c +214 -0
  117. data/lua-hooks/ext/lua/lstate.h +169 -0
  118. data/lua-hooks/ext/lua/lstring.c +111 -0
  119. data/lua-hooks/ext/lua/lstring.h +31 -0
  120. data/lua-hooks/ext/lua/lstrlib.c +871 -0
  121. data/lua-hooks/ext/lua/ltable.c +588 -0
  122. data/lua-hooks/ext/lua/ltable.h +40 -0
  123. data/lua-hooks/ext/lua/ltablib.c +287 -0
  124. data/lua-hooks/ext/lua/ltm.c +75 -0
  125. data/lua-hooks/ext/lua/ltm.h +54 -0
  126. data/lua-hooks/ext/lua/lua.c +392 -0
  127. data/lua-hooks/ext/lua/lua.def +131 -0
  128. data/lua-hooks/ext/lua/lua.h +388 -0
  129. data/lua-hooks/ext/lua/lua.rc +28 -0
  130. data/lua-hooks/ext/lua/lua_dll.rc +26 -0
  131. data/lua-hooks/ext/lua/luac.c +200 -0
  132. data/lua-hooks/ext/lua/luac.rc +1 -0
  133. data/lua-hooks/ext/lua/luaconf.h +763 -0
  134. data/lua-hooks/ext/lua/luaconf.h.in +724 -0
  135. data/lua-hooks/ext/lua/luaconf.h.orig +763 -0
  136. data/lua-hooks/ext/lua/lualib.h +53 -0
  137. data/lua-hooks/ext/lua/lundump.c +227 -0
  138. data/lua-hooks/ext/lua/lundump.h +36 -0
  139. data/lua-hooks/ext/lua/lvm.c +767 -0
  140. data/lua-hooks/ext/lua/lvm.h +36 -0
  141. data/lua-hooks/ext/lua/lzio.c +82 -0
  142. data/lua-hooks/ext/lua/lzio.h +67 -0
  143. data/lua-hooks/ext/lua/print.c +227 -0
  144. data/lua-hooks/ext/luautf8/README.md +152 -0
  145. data/lua-hooks/ext/luautf8/lutf8lib.c +1274 -0
  146. data/lua-hooks/ext/luautf8/unidata.h +3064 -0
  147. data/lua-hooks/lib/boot.lua +254 -0
  148. data/lua-hooks/lib/encode.lua +4 -0
  149. data/lua-hooks/lib/lexers/LICENSE +21 -0
  150. data/lua-hooks/lib/lexers/bash.lua +134 -0
  151. data/lua-hooks/lib/lexers/bash_dqstr.lua +62 -0
  152. data/lua-hooks/lib/lexers/css.lua +216 -0
  153. data/lua-hooks/lib/lexers/html.lua +106 -0
  154. data/lua-hooks/lib/lexers/javascript.lua +68 -0
  155. data/lua-hooks/lib/lexers/lexer.lua +1575 -0
  156. data/lua-hooks/lib/lexers/markers.lua +33 -0
  157. metadata +308 -0
@@ -0,0 +1,254 @@
1
+ -- This file is executed when the Lua VM boots.
2
+ require 'encode'
3
+
4
+ -- This is required to make lexers load from test harness.
5
+ -- In VM the path is handled for us by vm.rb --ol
6
+ lexer_path='lib/lexers/?.lua'
7
+ package.path = package.path..';'..lexer_path
8
+
9
+ -- Define the environment available to code executing in the VM.
10
+ -- All available functions must be declared here.
11
+ -- Make sure the function is safe before adding it here.
12
+ -- See http://lua-users.org/wiki/SandBoxes
13
+ SANDBOX_ENV = {
14
+ -- Lua libs
15
+ ipairs = ipairs,
16
+ next = next,
17
+ pairs = pairs,
18
+ pcall = pcall,
19
+ tonumber = tonumber,
20
+ tostring = tostring,
21
+ type = type,
22
+ unpack = unpack,
23
+ assert = assert,
24
+ error = error,
25
+ getmetatable = getmetatable,
26
+ setmetatable = setmetatable,
27
+ rawget = rawget,
28
+ rawset = rawset,
29
+ collectgarbage = collectgarbage,
30
+ math = math,
31
+ string = string,
32
+ bit = {
33
+ band = bit.band,
34
+ extract = bit.extract,
35
+ bor = bit.bor,
36
+ bnot = bit.bnot,
37
+ arshift = bit.arshift,
38
+ rshift = bit.rshift,
39
+ rrotate = bit.rrotate,
40
+ replace = bit.replace,
41
+ lshift = bit.lshift,
42
+ lrotate = bit.lrotate,
43
+ btest = bit.btest,
44
+ bxor = bit.bxor
45
+ },
46
+ coroutine = {
47
+ create = coroutine.create,
48
+ resume = coroutine.resume,
49
+ running = coroutine.running,
50
+ status = coroutine.status,
51
+ wrap = coroutine.wrap,
52
+ yield = coroutine.yield,
53
+ },
54
+ debug = {
55
+ -- Block most debug in sandbox, but allow tracebacks
56
+ traceback = debug.traceback
57
+ },
58
+ select = select,
59
+ utf8 = {
60
+ byte = utf8.byte,
61
+ char = utf8.char,
62
+ find = utf8.find,
63
+ format = utf8.format,
64
+ gmatch = utf8.gmatch,
65
+ gsub = utf8.gsub,
66
+ len = utf8.len,
67
+ lower = utf8.lower,
68
+ match = utf8.match,
69
+ rep = utf8.rep,
70
+ reverse = utf8.reverse,
71
+ sub = utf8.sub,
72
+ upper = utf8.upper,
73
+ split = utf8.split,
74
+ escape = utf8.escape,
75
+ charpos = utf8.charpos,
76
+ insert = utf8.insert,
77
+ remove = utf8.remove,
78
+ next = utf8.next,
79
+ ncasecmp = utf8.ncasecmp,
80
+ },
81
+ table = {
82
+ insert = table.insert,
83
+ maxn = table.maxn,
84
+ remove = table.remove,
85
+ sort = table.sort,
86
+ map = table.map,
87
+ reduce = table.reduce,
88
+ length = table.length,
89
+ concat = table.concat,
90
+ },
91
+ libinjection = {
92
+ sqli = libinjection.sqli,
93
+ fingerprint = libinjection.fingerprint,
94
+ xss = libinjection.xss,
95
+ sqli_tokenize = libinjection.sqli_tokenize
96
+ },
97
+ -- LPeg Library
98
+ lpeg = {
99
+ ptree = lpeg.ptree,
100
+ pcode = lpeg.pcode,
101
+ match = lpeg.match,
102
+ B = lpeg.B,
103
+ V = lpeg.V,
104
+ C = lpeg.C,
105
+ Cc = lpeg.Cc,
106
+ Cmt = lpeg.Cmt,
107
+ Cb = lpeg.Cb,
108
+ Carg = lpeg.Carg,
109
+ Cp = lpeg.Cp,
110
+ Cs = lpeg.Cs,
111
+ Ct = lpeg.Ct,
112
+ Cf = lpeg.Cf,
113
+ Cg = lpeg.Cg,
114
+ P = lpeg.P,
115
+ S = lpeg.S,
116
+ R = lpeg.R,
117
+ locale = lpeg.locale,
118
+ version = lpeg.version,
119
+ setmaxstack = lpeg.setmaxstack,
120
+ type = lpeg.type,
121
+ },
122
+ -- pre built lexer library
123
+ -- the call to load here will both load the code
124
+ -- and compile the LPeg grammar
125
+ lexers = {
126
+ lexer = require('lexers/lexer'),
127
+ bash = require('lexers/lexer').load('bash'), -- bash
128
+ bash_dqstr = require('lexers/lexer').load('bash_dqstr'), -- bash strings
129
+ markers = require('lexers/lexer').load('markers'),
130
+ html = require('lexers/lexer').load('html'),
131
+ javascript = require('lexers/lexer').load('javascript'),
132
+ css = require('lexers/lexer').load('css'),
133
+ },
134
+ -- Immunio vars
135
+ serverdata = {}, -- Default empty serverdata
136
+ agentdata = {},
137
+ utils = {}, -- Used to store utility functions declared in the sandbox.
138
+ -- pass mode flags into the VM
139
+ DEV_MODE = DEV_MODE,
140
+ DEBUG_MODE = DEBUG_MODE,
141
+ LUA_PLATFORM = LUA_PLATFORM or 'unix',
142
+ IMMUNIO_KEY = IMMUNIO_KEY,
143
+ IMMUNIO_SECRET = IMMUNIO_SECRET
144
+ }
145
+
146
+ -- Enable a few more things in dev mode. For debugging.
147
+ if DEBUG_MODE or DEV_MODE then
148
+ SANDBOX_ENV.print = print
149
+ SANDBOX_ENV.snapshot = snapshot
150
+ else
151
+ SANDBOX_ENV.print = function(...) end
152
+ end
153
+
154
+
155
+ -- Perform a VM call a method of a lua pseudo-object
156
+ function sandboxed_method_call(method, object, vars)
157
+ if DEBUG_MODE then
158
+ SANDBOX_ENV.utils.debug_prefix = "UNKNOWN"
159
+ -- Change the values here to toggle debugging per module.
160
+ SANDBOX_ENV.utils.debug_module_prefixes = {
161
+ UNKNOWN = true,
162
+ IO = true,
163
+ SQLi = true,
164
+ ExceptionHandler = true,
165
+ Redirect = true,
166
+ XSS = true,
167
+ Eval = true,
168
+ }
169
+ end
170
+ -- Merges the vars and the default sandbox env.
171
+ -- The vars can override the sandbox environment.
172
+ -- The table is copied to keep data from leaking
173
+ -- out of the functions.
174
+ local merged_vars = {}
175
+ merged_vars._G = merged_vars
176
+ for k, v in pairs(SANDBOX_ENV) do
177
+ merged_vars[k] = v
178
+ end
179
+
180
+ if vars then
181
+ for k, v in pairs(vars) do
182
+ merged_vars[k] = v
183
+ end
184
+ end
185
+
186
+ -- XXX Open sandbox in DEBUG_MODE
187
+ if DEBUG_MODE then
188
+ merged_vars['__REAL_G'] = _G
189
+ end
190
+ -- Sets the environment of the function.
191
+ setfenv(method, merged_vars)
192
+ -- Call it!
193
+ local rval = nil
194
+ if object then
195
+ rval = method(object)
196
+ else
197
+ rval = method()
198
+ end
199
+ -- Hint the lua VM GC that the references held to values in merged_vars don't
200
+ -- count anymore. If we omit this line the function environment is held onto
201
+ -- by the GC and we leak the universe... --ol
202
+ setmetatable( merged_vars, {__mode = "v"} )
203
+ -- Remove merged_vars from function environment so it can be collected sooner
204
+ setfenv(method, _G)
205
+ return rval
206
+ end
207
+
208
+ -- Function called by the VM to call and sandbox a function.
209
+ function sandboxed_call(func, vars)
210
+ return sandboxed_method_call(func, nil, vars)
211
+ end
212
+
213
+ if DEBUG_MODE then
214
+ -- Memory Snapshot Debugger
215
+ local saved_snapshot = {}
216
+ function dump_snapshot( label )
217
+ collectgarbage()
218
+ collectgarbage()
219
+ saved_snapshot = snapshot.snapshot()
220
+ print("------------------------\nSNAPSHOT:\n")
221
+ if label then print(label) end
222
+ for k,v in pairs(saved_snapshot) do
223
+ print( "ALLOCATION:" .. tostring(k):gsub("userdata:", "") .. " " .. v)
224
+ end
225
+ end
226
+
227
+ function update_snapshot()
228
+ collectgarbage()
229
+ collectgarbage()
230
+ saved_snapshot = snapshot.snapshot()
231
+ end
232
+
233
+ function diff_snapshot( update )
234
+ collectgarbage()
235
+ collectgarbage()
236
+ local S = snapshot.snapshot()
237
+ output = ("------------------------\nDIFF SNAPSHOT:\n")
238
+ for k,v in pairs(S) do
239
+ if saved_snapshot[k] == nil then
240
+ output = output .. "ALLOCATION:" .. tostring(k):gsub("userdata:", "") .. " " .. v .. "\n"
241
+
242
+ end
243
+ end
244
+ if update then saved_snapshot = S end
245
+ return output
246
+ end
247
+
248
+ -- Uncomment for snapshot tracing
249
+ --snapshot.tron()
250
+ -- Uncomment to generate a snapshot at boot.
251
+ --dump_snapshot('BOOT')
252
+ --snapshot.troff()
253
+ end
254
+
@@ -0,0 +1,4 @@
1
+ -- Encode a Lua object to be sent to the server.
2
+ function encode(object)
3
+ return cmsgpack.pack(object)
4
+ end
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2007-2015 Mitchell
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,134 @@
1
+ -- Copyright 2006-2015 Mitchell mitchell.att.foicica.com.
2
+ -- Copyright 2015 Immunio, Inc.
3
+
4
+ -- Shell LPeg lexer.
5
+
6
+ -- This is based on the lexer from the Scintillua package, with a ot of extension
7
+ -- The goal isn't a complete parser for bash, but a lexer that can extract a useful
8
+ -- amount of structure to detect tampering. The emphasis is more on common injection
9
+ -- techniques and lexical structure than actually extracting properly formed bash
10
+ -- statements. Down the road we may need to go as far as to parse statements, and that
11
+ -- should be possible at the cost of a lot more complexity.
12
+
13
+ local l = require('lexer')
14
+ local token = l.token
15
+ local P, R, S = lpeg.P, lpeg.R, lpeg.S
16
+
17
+ local M = {_NAME = 'bash'}
18
+
19
+ -- Whitespace.
20
+ local ws = token(l.WHITESPACE, l.space^1)
21
+
22
+ local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ' + '.')^0
23
+
24
+
25
+ -- Comments.
26
+ local comment = token(l.COMMENT, '#' * l.nonnewline^0)
27
+
28
+ -- Strings.
29
+ local sq_str = token('sq_str', l.delimited_range("'", false, true))
30
+ local dq_str = token('dq_str', l.delimited_range('"'))
31
+ local ex_str = token('ex_str', l.delimited_range('`'))
32
+ local heredoc = token('heredoc', '<<' * P(function(input, index)
33
+ local s, e, _, delimiter =
34
+ input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
35
+ if s == index and delimiter then
36
+ local _, e = input:find('[\n\r\f]+'..delimiter, e)
37
+ return e and e + 1 or #input + 1
38
+ end
39
+ end))
40
+ local bash_string = sq_str + dq_str + ex_str + heredoc
41
+
42
+ -- Numbers.
43
+ local number = token(l.NUMBER, l.float + l.integer)
44
+
45
+ -- Keywords.
46
+ local keyword = token(l.KEYWORD, l.word_match({
47
+ 'if', 'then', 'elif', 'else', 'fi', 'case', 'in', 'esac', 'while', 'for',
48
+ 'do', 'done', 'continue', 'local', 'return', 'select',
49
+ -- Operators. These could be split into individual tokens...
50
+ '-a', '-b', '-c', '-d', '-e', '-f', '-g', '-h', '-k', '-p', '-r', '-s', '-t',
51
+ '-u', '-w', '-x', '-O', '-G', '-L', '-S', '-N', '-nt', '-ot', '-ef', '-o',
52
+ '-z', '-n', '-eq', '-ne', '-lt', '-le', '-gt', '-ge'
53
+ }, '-'))
54
+
55
+ -- Common commands ... this is not exhaustive nor does it need to be.
56
+ local command = token("command", l.word_match({
57
+ 'awk', 'cat', 'cmp', 'cp', 'curl', 'cut', 'date', 'find', 'grep', 'gunzip', 'gvim',
58
+ 'gzip', 'kill', 'lua', 'make', 'mkdir', 'mv', 'php', 'pkill', 'python', 'rm',
59
+ 'rmdir', 'rsync', 'ruby', 'scp', 'sed', 'sleep', 'ssh', 'sudo', 'tar', 'unlink',
60
+ 'wget', 'zip'
61
+ }, '-'))
62
+
63
+ -- Builtins
64
+ local builtin = token("builtin", l.word_match({
65
+ 'alias', 'bind', 'builtin', 'caller', 'command', 'declare', 'echo', 'enable',
66
+ 'help', 'let', 'local', 'logout', 'mapfile', 'printf', 'read', 'readarray',
67
+ 'source', 'type', 'typeset', 'ulimit', 'unalias',
68
+ }, '-'))
69
+
70
+ -- Filenames. This is a bit sloppy, but tries to discern filenames from other identifiers
71
+ -- Very much a case of R&D 'suck it and see'
72
+ local filename = token("filename", P('/')^0 * (bash_word + '.') * (
73
+ '/' + bash_word + '.' )^0 * ('.' * bash_word )^0 )
74
+
75
+ local ip = (l.integer * P('.') * l.integer * P('.') * l.integer * P('.') * l.integer)
76
+
77
+ local protocol = ((P('https') + 'http' + 'ftp' + 'irc') * '://') + 'mailto:'
78
+ local remainder = ((1-S'\r\n\f\t\v ,."}])') + (S',."}])' * (1-S'\r\n\f\t\v ')))^0
79
+ local url = protocol * remainder
80
+
81
+ -- Identifiers.
82
+ local identifier = token(l.IDENTIFIER, url + ip + bash_word)
83
+
84
+ -- Variables.
85
+ local ex_variable = token("ex_variable",
86
+ '$' * l.delimited_range('()', true, true))
87
+
88
+ local variable = token(l.VARIABLE,
89
+ '$' * (S('!#?*@$') + l.digit^1 + bash_word +
90
+ l.delimited_range('{}', true, true)))
91
+
92
+ local var = ex_variable + variable
93
+
94
+ -- Operators. These could be split into individual tokens...
95
+ local operator = token(l.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))
96
+
97
+ M._rules = {
98
+ {'whitespace', ws},
99
+ {'keyword', keyword},
100
+ {'builtin', builtin},
101
+ {'command', command},
102
+ {'identifier', identifier},
103
+ {'filename', filename},
104
+ {'string', bash_string},
105
+ {'comment', comment},
106
+ {'number', number},
107
+ {'variable', var},
108
+ {'operator', operator},
109
+ }
110
+
111
+ -- This is the main function for lexing bash data. It recurses and uses
112
+ -- the dqstr sub-lexer instance provided (we don't instantiate it directly
113
+ -- to allow the caller to cache the instance and avoid recompiling the grammar)
114
+ function M.lex_recursive( self, str, bash_dqstr_lexer )
115
+ local tokens = self:lex(str)
116
+ for i = 1, #tokens do
117
+ if tokens[i]['token'] == "ex_str" then
118
+ tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 2, -2), bash_dqstr_lexer)
119
+ elseif tokens[i]['token'] == "ex_variable" then
120
+ tokens[i]['val'] = self:lex_recursive(string.sub(tokens[i]['val'], 3, -2), bash_dqstr_lexer)
121
+ elseif tokens[i]['token'] == "dq_str" then
122
+ tokens[i]['val'] =
123
+ bash_dqstr_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
124
+ elseif tokens[i]['token'] == "heredoc" then
125
+ tokens[i]['val'] =
126
+ bash_dqstr_lexer:lex_recursive(tokens[i]['val'], self)
127
+ end
128
+ end
129
+ return tokens
130
+ end
131
+
132
+ return M
133
+
134
+
@@ -0,0 +1,62 @@
1
+ -- Copyright (C) 2015 Immunio, Inc.
2
+
3
+ -- Lexer for bash magic double quotes
4
+
5
+ -- NOTE: not covered by Scintillua MIT license in this directory.
6
+
7
+ -- While our lexer has the ability to embed this sort of thing as a child of another lexer
8
+ -- I didn't bother here due to the recursion; we need to lex the parent (bash) language
9
+ -- for some tokens which would be very complex at best. It's cleaner to use two lexers
10
+ -- and handle the recursion in higher level lua at a minute performance cost.
11
+
12
+ local l = require('lexer')
13
+ local token, word_match = l.token, bash_word_match
14
+ local P, R, S = lpeg.P, lpeg.R, lpeg.S
15
+
16
+ local M = {_NAME = 'bash_dqstr'}
17
+
18
+ -- Whitespace.
19
+ local ws = token(l.WHITESPACE, l.space^1)
20
+
21
+ -- Generic token.
22
+ local bash_word = (l.alpha + '_') * (l.alnum + '_' + '\\ ')^0
23
+
24
+ -- Strings.
25
+ -- Shell substitution.
26
+ local ex_str = token('ex_str', l.delimited_range('`'))
27
+
28
+ -- Other string data
29
+ local bash_string = token('str_data', (l.any - '$' - '`')^1)
30
+
31
+ -- Variables.
32
+ -- Shell Substitution.
33
+ local ex_variable = token("ex_variable",
34
+ '$' * l.delimited_range('()', true, true))
35
+ -- Other variables
36
+ local variable = token(l.VARIABLE,
37
+ '$' * (S('!#?*@$') + l.digit^1 + bash_word +
38
+ l.delimited_range('{}', true, true)))
39
+
40
+ local var = ex_variable + variable
41
+
42
+ M._rules = {
43
+ {'variable', var},
44
+ {'ex_str', ex_str},
45
+ {'string', bash_string},
46
+ }
47
+
48
+ function M.lex_recursive( self, str, bash_lexer )
49
+ local tokens = self:lex(str)
50
+ for i = 1, #tokens do
51
+ if tokens[i]['token'] == "ex_str" then
52
+ tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 2, -2), self)
53
+ elseif tokens[i]['token'] == "ex_variable" then
54
+ tokens[i]['val'] = bash_lexer:lex_recursive(string.sub(tokens[i]['val'], 3, -2), self)
55
+ end
56
+ end
57
+ return tokens
58
+ end
59
+
60
+ return M
61
+
62
+