immunio 1.2.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +13 -5
  3. data/ext/immunio/Rakefile +14 -6
  4. data/lib/immunio/context.rb +2 -0
  5. data/lib/immunio/plugins/action_view.rb +7 -668
  6. data/lib/immunio/plugins/action_view/action_view.rb +22 -0
  7. data/lib/immunio/plugins/action_view/active_support_hash.rb +29 -0
  8. data/lib/immunio/plugins/action_view/cache_store.rb +24 -0
  9. data/lib/immunio/plugins/action_view/erubi.rb +38 -0
  10. data/lib/immunio/plugins/action_view/erubis.rb +39 -0
  11. data/lib/immunio/plugins/action_view/fragment_caching.rb +29 -0
  12. data/lib/immunio/plugins/action_view/haml.rb +46 -0
  13. data/lib/immunio/plugins/action_view/slim.rb +42 -0
  14. data/lib/immunio/plugins/action_view/template.rb +431 -0
  15. data/lib/immunio/plugins/action_view/template_rendering.rb +45 -0
  16. data/lib/immunio/plugins/http_tracker.rb +2 -0
  17. data/lib/immunio/plugins/io.rb +34 -0
  18. data/lib/immunio/version.rb +1 -1
  19. data/lua-hooks/Makefile +36 -9
  20. data/lua-hooks/ext/luajit/COPYRIGHT +1 -1
  21. data/lua-hooks/ext/luajit/Makefile +22 -15
  22. data/lua-hooks/ext/luajit/README +2 -2
  23. data/lua-hooks/ext/luajit/doc/bluequad-print.css +1 -1
  24. data/lua-hooks/ext/luajit/doc/bluequad.css +1 -1
  25. data/lua-hooks/ext/luajit/doc/changes.html +69 -3
  26. data/lua-hooks/ext/luajit/doc/contact.html +10 -3
  27. data/lua-hooks/ext/luajit/doc/ext_c_api.html +2 -2
  28. data/lua-hooks/ext/luajit/doc/ext_ffi.html +2 -2
  29. data/lua-hooks/ext/luajit/doc/ext_ffi_api.html +2 -2
  30. data/lua-hooks/ext/luajit/doc/ext_ffi_semantics.html +3 -4
  31. data/lua-hooks/ext/luajit/doc/ext_ffi_tutorial.html +2 -2
  32. data/lua-hooks/ext/luajit/doc/ext_jit.html +3 -3
  33. data/lua-hooks/ext/luajit/doc/ext_profiler.html +2 -2
  34. data/lua-hooks/ext/luajit/doc/extensions.html +47 -20
  35. data/lua-hooks/ext/luajit/doc/faq.html +2 -2
  36. data/lua-hooks/ext/luajit/doc/install.html +74 -45
  37. data/lua-hooks/ext/luajit/doc/luajit.html +5 -5
  38. data/lua-hooks/ext/luajit/doc/running.html +3 -3
  39. data/lua-hooks/ext/luajit/doc/status.html +13 -8
  40. data/lua-hooks/ext/luajit/dynasm/dasm_arm.h +1 -1
  41. data/lua-hooks/ext/luajit/dynasm/dasm_arm.lua +1 -1
  42. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.h +1 -1
  43. data/lua-hooks/ext/luajit/dynasm/dasm_arm64.lua +1 -1
  44. data/lua-hooks/ext/luajit/dynasm/dasm_mips.h +8 -5
  45. data/lua-hooks/ext/luajit/dynasm/dasm_mips.lua +66 -11
  46. data/lua-hooks/ext/luajit/dynasm/dasm_mips64.lua +12 -0
  47. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.h +1 -1
  48. data/lua-hooks/ext/luajit/dynasm/dasm_ppc.lua +1 -1
  49. data/lua-hooks/ext/luajit/dynasm/dasm_proto.h +1 -1
  50. data/lua-hooks/ext/luajit/dynasm/dasm_x64.lua +1 -1
  51. data/lua-hooks/ext/luajit/dynasm/dasm_x86.h +1 -1
  52. data/lua-hooks/ext/luajit/dynasm/dasm_x86.lua +5 -1
  53. data/lua-hooks/ext/luajit/dynasm/dynasm.lua +2 -2
  54. data/lua-hooks/ext/luajit/etc/luajit.1 +1 -1
  55. data/lua-hooks/ext/luajit/etc/luajit.pc +1 -1
  56. data/lua-hooks/ext/luajit/src/Makefile +15 -11
  57. data/lua-hooks/ext/luajit/src/Makefile.dep +16 -16
  58. data/lua-hooks/ext/luajit/src/host/buildvm.c +2 -2
  59. data/lua-hooks/ext/luajit/src/host/buildvm.h +1 -1
  60. data/lua-hooks/ext/luajit/src/host/buildvm_asm.c +9 -4
  61. data/lua-hooks/ext/luajit/src/host/buildvm_fold.c +2 -2
  62. data/lua-hooks/ext/luajit/src/host/buildvm_lib.c +1 -1
  63. data/lua-hooks/ext/luajit/src/host/buildvm_libbc.h +14 -3
  64. data/lua-hooks/ext/luajit/src/host/buildvm_peobj.c +27 -3
  65. data/lua-hooks/ext/luajit/src/host/genlibbc.lua +1 -1
  66. data/lua-hooks/ext/luajit/src/host/genminilua.lua +6 -5
  67. data/lua-hooks/ext/luajit/src/host/minilua.c +1 -1
  68. data/lua-hooks/ext/luajit/src/jit/bc.lua +1 -1
  69. data/lua-hooks/ext/luajit/src/jit/bcsave.lua +8 -8
  70. data/lua-hooks/ext/luajit/src/jit/dis_arm.lua +2 -2
  71. data/lua-hooks/ext/luajit/src/jit/dis_arm64.lua +1216 -0
  72. data/lua-hooks/ext/luajit/src/jit/dis_arm64be.lua +12 -0
  73. data/lua-hooks/ext/luajit/src/jit/dis_mips.lua +35 -20
  74. data/lua-hooks/ext/luajit/src/jit/dis_mips64.lua +17 -0
  75. data/lua-hooks/ext/luajit/src/jit/dis_mips64el.lua +17 -0
  76. data/lua-hooks/ext/luajit/src/jit/dis_mipsel.lua +1 -1
  77. data/lua-hooks/ext/luajit/src/jit/dis_ppc.lua +2 -2
  78. data/lua-hooks/ext/luajit/src/jit/dis_x64.lua +1 -1
  79. data/lua-hooks/ext/luajit/src/jit/dis_x86.lua +7 -4
  80. data/lua-hooks/ext/luajit/src/jit/dump.lua +17 -12
  81. data/lua-hooks/ext/luajit/src/jit/p.lua +3 -2
  82. data/lua-hooks/ext/luajit/src/jit/v.lua +2 -2
  83. data/lua-hooks/ext/luajit/src/jit/zone.lua +1 -1
  84. data/lua-hooks/ext/luajit/src/lauxlib.h +14 -20
  85. data/lua-hooks/ext/luajit/src/lib_aux.c +38 -27
  86. data/lua-hooks/ext/luajit/src/lib_base.c +12 -5
  87. data/lua-hooks/ext/luajit/src/lib_bit.c +1 -1
  88. data/lua-hooks/ext/luajit/src/lib_debug.c +5 -5
  89. data/lua-hooks/ext/luajit/src/lib_ffi.c +2 -2
  90. data/lua-hooks/ext/luajit/src/lib_init.c +16 -16
  91. data/lua-hooks/ext/luajit/src/lib_io.c +6 -7
  92. data/lua-hooks/ext/luajit/src/lib_jit.c +14 -4
  93. data/lua-hooks/ext/luajit/src/lib_math.c +1 -5
  94. data/lua-hooks/ext/luajit/src/lib_os.c +1 -1
  95. data/lua-hooks/ext/luajit/src/lib_package.c +14 -23
  96. data/lua-hooks/ext/luajit/src/lib_string.c +1 -5
  97. data/lua-hooks/ext/luajit/src/lib_table.c +21 -1
  98. data/lua-hooks/ext/luajit/src/lj.supp +3 -3
  99. data/lua-hooks/ext/luajit/src/lj_alloc.c +174 -83
  100. data/lua-hooks/ext/luajit/src/lj_api.c +97 -18
  101. data/lua-hooks/ext/luajit/src/lj_arch.h +54 -22
  102. data/lua-hooks/ext/luajit/src/lj_asm.c +172 -53
  103. data/lua-hooks/ext/luajit/src/lj_asm.h +1 -1
  104. data/lua-hooks/ext/luajit/src/lj_asm_arm.h +19 -16
  105. data/lua-hooks/ext/luajit/src/lj_asm_arm64.h +2022 -0
  106. data/lua-hooks/ext/luajit/src/lj_asm_mips.h +564 -158
  107. data/lua-hooks/ext/luajit/src/lj_asm_ppc.h +19 -18
  108. data/lua-hooks/ext/luajit/src/lj_asm_x86.h +578 -92
  109. data/lua-hooks/ext/luajit/src/lj_bc.c +1 -1
  110. data/lua-hooks/ext/luajit/src/lj_bc.h +1 -1
  111. data/lua-hooks/ext/luajit/src/lj_bcdump.h +1 -1
  112. data/lua-hooks/ext/luajit/src/lj_bcread.c +1 -1
  113. data/lua-hooks/ext/luajit/src/lj_bcwrite.c +1 -1
  114. data/lua-hooks/ext/luajit/src/lj_buf.c +1 -1
  115. data/lua-hooks/ext/luajit/src/lj_buf.h +1 -1
  116. data/lua-hooks/ext/luajit/src/lj_carith.c +1 -1
  117. data/lua-hooks/ext/luajit/src/lj_carith.h +1 -1
  118. data/lua-hooks/ext/luajit/src/lj_ccall.c +172 -7
  119. data/lua-hooks/ext/luajit/src/lj_ccall.h +21 -5
  120. data/lua-hooks/ext/luajit/src/lj_ccallback.c +71 -17
  121. data/lua-hooks/ext/luajit/src/lj_ccallback.h +1 -1
  122. data/lua-hooks/ext/luajit/src/lj_cconv.c +4 -2
  123. data/lua-hooks/ext/luajit/src/lj_cconv.h +1 -1
  124. data/lua-hooks/ext/luajit/src/lj_cdata.c +7 -5
  125. data/lua-hooks/ext/luajit/src/lj_cdata.h +1 -1
  126. data/lua-hooks/ext/luajit/src/lj_clib.c +5 -5
  127. data/lua-hooks/ext/luajit/src/lj_clib.h +1 -1
  128. data/lua-hooks/ext/luajit/src/lj_cparse.c +11 -6
  129. data/lua-hooks/ext/luajit/src/lj_cparse.h +1 -1
  130. data/lua-hooks/ext/luajit/src/lj_crecord.c +70 -14
  131. data/lua-hooks/ext/luajit/src/lj_crecord.h +1 -1
  132. data/lua-hooks/ext/luajit/src/lj_ctype.c +1 -1
  133. data/lua-hooks/ext/luajit/src/lj_ctype.h +8 -8
  134. data/lua-hooks/ext/luajit/src/lj_debug.c +1 -1
  135. data/lua-hooks/ext/luajit/src/lj_debug.h +1 -1
  136. data/lua-hooks/ext/luajit/src/lj_def.h +6 -9
  137. data/lua-hooks/ext/luajit/src/lj_dispatch.c +3 -3
  138. data/lua-hooks/ext/luajit/src/lj_dispatch.h +2 -1
  139. data/lua-hooks/ext/luajit/src/lj_emit_arm.h +5 -4
  140. data/lua-hooks/ext/luajit/src/lj_emit_arm64.h +419 -0
  141. data/lua-hooks/ext/luajit/src/lj_emit_mips.h +100 -20
  142. data/lua-hooks/ext/luajit/src/lj_emit_ppc.h +4 -4
  143. data/lua-hooks/ext/luajit/src/lj_emit_x86.h +116 -25
  144. data/lua-hooks/ext/luajit/src/lj_err.c +34 -13
  145. data/lua-hooks/ext/luajit/src/lj_err.h +1 -1
  146. data/lua-hooks/ext/luajit/src/lj_errmsg.h +1 -1
  147. data/lua-hooks/ext/luajit/src/lj_ff.h +1 -1
  148. data/lua-hooks/ext/luajit/src/lj_ffrecord.c +58 -49
  149. data/lua-hooks/ext/luajit/src/lj_ffrecord.h +1 -1
  150. data/lua-hooks/ext/luajit/src/lj_frame.h +33 -6
  151. data/lua-hooks/ext/luajit/src/lj_func.c +4 -2
  152. data/lua-hooks/ext/luajit/src/lj_func.h +1 -1
  153. data/lua-hooks/ext/luajit/src/lj_gc.c +16 -7
  154. data/lua-hooks/ext/luajit/src/lj_gc.h +1 -1
  155. data/lua-hooks/ext/luajit/src/lj_gdbjit.c +31 -1
  156. data/lua-hooks/ext/luajit/src/lj_gdbjit.h +1 -1
  157. data/lua-hooks/ext/luajit/src/lj_ir.c +69 -96
  158. data/lua-hooks/ext/luajit/src/lj_ir.h +29 -18
  159. data/lua-hooks/ext/luajit/src/lj_ircall.h +24 -30
  160. data/lua-hooks/ext/luajit/src/lj_iropt.h +9 -9
  161. data/lua-hooks/ext/luajit/src/lj_jit.h +67 -9
  162. data/lua-hooks/ext/luajit/src/lj_lex.c +1 -1
  163. data/lua-hooks/ext/luajit/src/lj_lex.h +1 -1
  164. data/lua-hooks/ext/luajit/src/lj_lib.c +1 -1
  165. data/lua-hooks/ext/luajit/src/lj_lib.h +1 -1
  166. data/lua-hooks/ext/luajit/src/lj_load.c +1 -1
  167. data/lua-hooks/ext/luajit/src/lj_mcode.c +11 -10
  168. data/lua-hooks/ext/luajit/src/lj_mcode.h +1 -1
  169. data/lua-hooks/ext/luajit/src/lj_meta.c +1 -1
  170. data/lua-hooks/ext/luajit/src/lj_meta.h +1 -1
  171. data/lua-hooks/ext/luajit/src/lj_obj.c +1 -1
  172. data/lua-hooks/ext/luajit/src/lj_obj.h +7 -3
  173. data/lua-hooks/ext/luajit/src/lj_opt_dce.c +1 -1
  174. data/lua-hooks/ext/luajit/src/lj_opt_fold.c +84 -17
  175. data/lua-hooks/ext/luajit/src/lj_opt_loop.c +1 -1
  176. data/lua-hooks/ext/luajit/src/lj_opt_mem.c +3 -3
  177. data/lua-hooks/ext/luajit/src/lj_opt_narrow.c +24 -22
  178. data/lua-hooks/ext/luajit/src/lj_opt_sink.c +11 -6
  179. data/lua-hooks/ext/luajit/src/lj_opt_split.c +11 -2
  180. data/lua-hooks/ext/luajit/src/lj_parse.c +9 -7
  181. data/lua-hooks/ext/luajit/src/lj_parse.h +1 -1
  182. data/lua-hooks/ext/luajit/src/lj_profile.c +1 -1
  183. data/lua-hooks/ext/luajit/src/lj_profile.h +1 -1
  184. data/lua-hooks/ext/luajit/src/lj_record.c +201 -117
  185. data/lua-hooks/ext/luajit/src/lj_record.h +1 -1
  186. data/lua-hooks/ext/luajit/src/lj_snap.c +72 -26
  187. data/lua-hooks/ext/luajit/src/lj_snap.h +1 -1
  188. data/lua-hooks/ext/luajit/src/lj_state.c +6 -6
  189. data/lua-hooks/ext/luajit/src/lj_state.h +2 -2
  190. data/lua-hooks/ext/luajit/src/lj_str.c +1 -1
  191. data/lua-hooks/ext/luajit/src/lj_str.h +1 -1
  192. data/lua-hooks/ext/luajit/src/lj_strfmt.c +7 -3
  193. data/lua-hooks/ext/luajit/src/lj_strfmt.h +1 -1
  194. data/lua-hooks/ext/luajit/src/lj_strfmt_num.c +4 -3
  195. data/lua-hooks/ext/luajit/src/lj_strscan.c +1 -1
  196. data/lua-hooks/ext/luajit/src/lj_strscan.h +1 -1
  197. data/lua-hooks/ext/luajit/src/lj_tab.c +1 -2
  198. data/lua-hooks/ext/luajit/src/lj_tab.h +1 -1
  199. data/lua-hooks/ext/luajit/src/lj_target.h +3 -3
  200. data/lua-hooks/ext/luajit/src/lj_target_arm.h +1 -1
  201. data/lua-hooks/ext/luajit/src/lj_target_arm64.h +239 -7
  202. data/lua-hooks/ext/luajit/src/lj_target_mips.h +111 -22
  203. data/lua-hooks/ext/luajit/src/lj_target_ppc.h +1 -1
  204. data/lua-hooks/ext/luajit/src/lj_target_x86.h +21 -4
  205. data/lua-hooks/ext/luajit/src/lj_trace.c +63 -18
  206. data/lua-hooks/ext/luajit/src/lj_trace.h +2 -1
  207. data/lua-hooks/ext/luajit/src/lj_traceerr.h +1 -1
  208. data/lua-hooks/ext/luajit/src/lj_udata.c +1 -1
  209. data/lua-hooks/ext/luajit/src/lj_udata.h +1 -1
  210. data/lua-hooks/ext/luajit/src/lj_vm.h +5 -1
  211. data/lua-hooks/ext/luajit/src/lj_vmevent.c +1 -1
  212. data/lua-hooks/ext/luajit/src/lj_vmevent.h +1 -1
  213. data/lua-hooks/ext/luajit/src/lj_vmmath.c +1 -1
  214. data/lua-hooks/ext/luajit/src/ljamalg.c +1 -1
  215. data/lua-hooks/ext/luajit/src/lua.h +9 -1
  216. data/lua-hooks/ext/luajit/src/luaconf.h +3 -7
  217. data/lua-hooks/ext/luajit/src/luajit.c +69 -54
  218. data/lua-hooks/ext/luajit/src/luajit.h +4 -4
  219. data/lua-hooks/ext/luajit/src/lualib.h +1 -1
  220. data/lua-hooks/ext/luajit/src/msvcbuild.bat +12 -4
  221. data/lua-hooks/ext/luajit/src/vm_arm.dasc +1 -1
  222. data/lua-hooks/ext/luajit/src/vm_arm64.dasc +255 -32
  223. data/lua-hooks/ext/luajit/src/vm_mips.dasc +26 -23
  224. data/lua-hooks/ext/luajit/src/vm_mips64.dasc +5062 -0
  225. data/lua-hooks/ext/luajit/src/vm_ppc.dasc +1 -1
  226. data/lua-hooks/ext/luajit/src/vm_x64.dasc +24 -25
  227. data/lua-hooks/ext/luajit/src/vm_x86.dasc +77 -4
  228. data/lua-hooks/libluahooks.darwin.a +0 -0
  229. data/lua-hooks/libluahooks.linux.a +0 -0
  230. data/lua-hooks/options.mk +1 -1
  231. metadata +37 -77
  232. data/lua-hooks/ext/all.c +0 -69
  233. data/lua-hooks/ext/libinjection/COPYING +0 -37
  234. data/lua-hooks/ext/libinjection/libinjection.h +0 -65
  235. data/lua-hooks/ext/libinjection/libinjection_html5.c +0 -847
  236. data/lua-hooks/ext/libinjection/libinjection_html5.h +0 -54
  237. data/lua-hooks/ext/libinjection/libinjection_sqli.c +0 -2301
  238. data/lua-hooks/ext/libinjection/libinjection_sqli.h +0 -295
  239. data/lua-hooks/ext/libinjection/libinjection_sqli_data.h +0 -9349
  240. data/lua-hooks/ext/libinjection/libinjection_xss.c +0 -531
  241. data/lua-hooks/ext/libinjection/libinjection_xss.h +0 -21
  242. data/lua-hooks/ext/libinjection/lualib.c +0 -145
  243. data/lua-hooks/ext/libinjection/module.mk +0 -5
  244. data/lua-hooks/ext/lpeg/HISTORY +0 -96
  245. data/lua-hooks/ext/lpeg/lpcap.c +0 -537
  246. data/lua-hooks/ext/lpeg/lpcap.h +0 -56
  247. data/lua-hooks/ext/lpeg/lpcode.c +0 -1014
  248. data/lua-hooks/ext/lpeg/lpcode.h +0 -40
  249. data/lua-hooks/ext/lpeg/lpeg-128.gif +0 -0
  250. data/lua-hooks/ext/lpeg/lpeg.html +0 -1445
  251. data/lua-hooks/ext/lpeg/lpprint.c +0 -244
  252. data/lua-hooks/ext/lpeg/lpprint.h +0 -36
  253. data/lua-hooks/ext/lpeg/lptree.c +0 -1303
  254. data/lua-hooks/ext/lpeg/lptree.h +0 -82
  255. data/lua-hooks/ext/lpeg/lptypes.h +0 -149
  256. data/lua-hooks/ext/lpeg/lpvm.c +0 -364
  257. data/lua-hooks/ext/lpeg/lpvm.h +0 -58
  258. data/lua-hooks/ext/lpeg/makefile +0 -55
  259. data/lua-hooks/ext/lpeg/module.mk +0 -6
  260. data/lua-hooks/ext/lpeg/re.html +0 -498
  261. data/lua-hooks/ext/lua-cmsgpack/.gitignore +0 -13
  262. data/lua-hooks/ext/lua-cmsgpack/CMakeLists.txt +0 -45
  263. data/lua-hooks/ext/lua-cmsgpack/README.md +0 -115
  264. data/lua-hooks/ext/lua-cmsgpack/lua_cmsgpack.c +0 -970
  265. data/lua-hooks/ext/lua-cmsgpack/module.mk +0 -2
  266. data/lua-hooks/ext/lua-cmsgpack/test.lua +0 -570
  267. data/lua-hooks/ext/lua-snapshot/LICENSE +0 -7
  268. data/lua-hooks/ext/lua-snapshot/Makefile +0 -12
  269. data/lua-hooks/ext/lua-snapshot/README.md +0 -18
  270. data/lua-hooks/ext/lua-snapshot/dump.lua +0 -15
  271. data/lua-hooks/ext/lua-snapshot/module.mk +0 -2
  272. data/lua-hooks/ext/lua-snapshot/snapshot.c +0 -462
  273. data/lua-hooks/ext/luautf8/README.md +0 -152
  274. data/lua-hooks/ext/luautf8/lutf8lib.c +0 -1274
  275. data/lua-hooks/ext/luautf8/module.mk +0 -2
  276. data/lua-hooks/ext/luautf8/unidata.h +0 -3064
  277. data/lua-hooks/ext/module.mk +0 -15
  278. data/lua-hooks/ext/modules.h +0 -17
  279. data/lua-hooks/ext/perf/luacpu.c +0 -114
  280. data/lua-hooks/ext/perf/lualoadavg.c +0 -40
  281. data/lua-hooks/ext/perf/luameminfo.c +0 -38
  282. data/lua-hooks/ext/perf/luaoslib.c +0 -203
  283. data/lua-hooks/ext/perf/module.mk +0 -5
  284. data/lua-hooks/ext/sha1/luasha1.c +0 -74
  285. data/lua-hooks/ext/sha1/module.mk +0 -5
  286. data/lua-hooks/ext/sha1/sha1.c +0 -145
  287. data/lua-hooks/ext/sha2/luasha256.c +0 -77
  288. data/lua-hooks/ext/sha2/module.mk +0 -5
  289. data/lua-hooks/ext/sha2/sha256.c +0 -196
  290. data/lua-hooks/ext/sysutils/lua_utils.c +0 -56
  291. data/lua-hooks/ext/sysutils/module.mk +0 -2
@@ -1,54 +0,0 @@
1
- #ifndef LIBINJECTION_HTML5
2
- #define LIBINJECTION_HTML5
3
-
4
- #ifdef __cplusplus
5
- extern "C" {
6
- #endif
7
-
8
- /* pull in size_t */
9
-
10
- #include <stddef.h>
11
-
12
- enum html5_type {
13
- DATA_TEXT
14
- , TAG_NAME_OPEN
15
- , TAG_NAME_CLOSE
16
- , TAG_NAME_SELFCLOSE
17
- , TAG_DATA
18
- , TAG_CLOSE
19
- , ATTR_NAME
20
- , ATTR_VALUE
21
- , TAG_COMMENT
22
- , DOCTYPE
23
- };
24
-
25
- enum html5_flags {
26
- DATA_STATE
27
- , VALUE_NO_QUOTE
28
- , VALUE_SINGLE_QUOTE
29
- , VALUE_DOUBLE_QUOTE
30
- , VALUE_BACK_QUOTE
31
- };
32
-
33
- struct h5_state;
34
- typedef int (*ptr_html5_state)(struct h5_state*);
35
-
36
- typedef struct h5_state {
37
- const char* s;
38
- size_t len;
39
- size_t pos;
40
- int is_close;
41
- ptr_html5_state state;
42
- const char* token_start;
43
- size_t token_len;
44
- enum html5_type token_type;
45
- } h5_state_t;
46
-
47
-
48
- void libinjection_h5_init(h5_state_t* hs, const char* s, size_t len, enum html5_flags);
49
- int libinjection_h5_next(h5_state_t* hs);
50
-
51
- #ifdef __cplusplus
52
- }
53
- #endif
54
- #endif
@@ -1,2301 +0,0 @@
1
- /**
2
- * Copyright 2012,2013 Nick Galbreath
3
- * nickg@client9.com
4
- * BSD License -- see COPYING.txt for details
5
- *
6
- * https://libinjection.client9.com/
7
- *
8
- */
9
-
10
- #include <string.h>
11
- #include <stdlib.h>
12
- #include <stdio.h>
13
- #include <ctype.h>
14
- #include <assert.h>
15
- #include <stddef.h>
16
-
17
- #include "libinjection.h"
18
- #include "libinjection_sqli.h"
19
- #include "libinjection_sqli_data.h"
20
-
21
- #define LIBINJECTION_VERSION "3.9.1"
22
-
23
- #define LIBINJECTION_SQLI_TOKEN_SIZE sizeof(((stoken_t*)(0))->val)
24
- #define LIBINJECTION_SQLI_MAX_TOKENS 5
25
-
26
- #ifndef TRUE
27
- #define TRUE 1
28
- #endif
29
- #ifndef FALSE
30
- #define FALSE 0
31
- #endif
32
-
33
- #define CHAR_NULL '\0'
34
- #define CHAR_SINGLE '\''
35
- #define CHAR_DOUBLE '"'
36
- #define CHAR_TICK '`'
37
-
38
- /* faster than calling out to libc isdigit */
39
- #ifdef ISDIGIT
40
- #undef ISDIGIT
41
- #endif
42
- #define ISDIGIT(a) ((unsigned)((a) - '0') <= 9)
43
-
44
- #if 0
45
- #define FOLD_DEBUG printf("%d \t more=%d pos=%d left=%d\n", __LINE__, more, (int)pos, (int)left);
46
- #else
47
- #define FOLD_DEBUG
48
- #endif
49
-
50
- /*
51
- * not making public just yet
52
- */
53
- typedef enum {
54
- TYPE_NONE = 0
55
- , TYPE_KEYWORD = (int)'k'
56
- , TYPE_UNION = (int)'U'
57
- , TYPE_GROUP = (int)'B'
58
- , TYPE_EXPRESSION = (int)'E'
59
- , TYPE_SQLTYPE = (int)'t'
60
- , TYPE_FUNCTION = (int)'f'
61
- , TYPE_BAREWORD = (int)'n'
62
- , TYPE_NUMBER = (int)'1'
63
- , TYPE_VARIABLE = (int)'v'
64
- , TYPE_STRING = (int)'s'
65
- , TYPE_OPERATOR = (int)'o'
66
- , TYPE_LOGIC_OPERATOR = (int)'&'
67
- , TYPE_COMMENT = (int)'c'
68
- , TYPE_COLLATE = (int)'A'
69
- , TYPE_LEFTPARENS = (int)'('
70
- , TYPE_RIGHTPARENS = (int)')' /* not used? */
71
- , TYPE_LEFTBRACE = (int)'{'
72
- , TYPE_RIGHTBRACE = (int)'}'
73
- , TYPE_DOT = (int)'.'
74
- , TYPE_COMMA = (int)','
75
- , TYPE_COLON = (int)':'
76
- , TYPE_SEMICOLON = (int)';'
77
- , TYPE_TSQL = (int)'T' /* TSQL start */
78
- , TYPE_UNKNOWN = (int)'?'
79
- , TYPE_EVIL = (int)'X' /* unparsable, abort */
80
- , TYPE_FINGERPRINT = (int)'F' /* not really a token */
81
- , TYPE_BACKSLASH = (int)'\\'
82
- } sqli_token_types;
83
-
84
- /**
85
- * Initializes parsing state
86
- *
87
- */
88
- static char flag2delim(int flag)
89
- {
90
- if (flag & FLAG_QUOTE_SINGLE) {
91
- return CHAR_SINGLE;
92
- } else if (flag & FLAG_QUOTE_DOUBLE) {
93
- return CHAR_DOUBLE;
94
- } else {
95
- return CHAR_NULL;
96
- }
97
- }
98
-
99
- /* memchr2 finds a string of 2 characters inside another string
100
- * This a specialized version of "memmem" or "memchr".
101
- * 'memmem' doesn't exist on all platforms
102
- *
103
- * Porting notes: this is just a special version of
104
- * astring.find("AB")
105
- *
106
- */
107
- static const char *
108
- memchr2(const char *haystack, size_t haystack_len, char c0, char c1)
109
- {
110
- const char *cur = haystack;
111
- const char *last = haystack + haystack_len - 1;
112
-
113
- if (haystack_len < 2) {
114
- return NULL;
115
- }
116
-
117
- while (cur < last) {
118
- /* safe since cur < len - 1 always */
119
- if (cur[0] == c0 && cur[1] == c1) {
120
- return cur;
121
- }
122
- cur += 1;
123
- }
124
-
125
- return NULL;
126
- }
127
-
128
- /**
129
- * memmem might not exist on some systems
130
- */
131
- static const char *
132
- my_memmem(const char* haystack, size_t hlen, const char* needle, size_t nlen)
133
- {
134
- const char* cur;
135
- const char* last;
136
- assert(haystack);
137
- assert(needle);
138
- assert(nlen > 1);
139
- last = haystack + hlen - nlen;
140
- for (cur = haystack; cur <= last; ++cur) {
141
- if (cur[0] == needle[0] && memcmp(cur, needle, nlen) == 0) {
142
- return cur;
143
- }
144
- }
145
- return NULL;
146
- }
147
-
148
- /** Find largest string containing certain characters.
149
- *
150
- * C Standard library 'strspn' only works for 'c-strings' (null terminated)
151
- * This works on arbitrary length.
152
- *
153
- * Performance notes:
154
- * not critical
155
- *
156
- * Porting notes:
157
- * if accept is 'ABC', then this function would be similar to
158
- * a_regexp.match(a_str, '[ABC]*'),
159
- */
160
- static size_t
161
- strlenspn(const char *s, size_t len, const char *accept)
162
- {
163
- size_t i;
164
- for (i = 0; i < len; ++i) {
165
- /* likely we can do better by inlining this function
166
- * but this works for now
167
- */
168
- if (strchr(accept, s[i]) == NULL) {
169
- return i;
170
- }
171
- }
172
- return len;
173
- }
174
-
175
- static size_t
176
- strlencspn(const char *s, size_t len, const char *accept)
177
- {
178
- size_t i;
179
- for (i = 0; i < len; ++i) {
180
- /* likely we can do better by inlining this function
181
- * but this works for now
182
- */
183
- if (strchr(accept, s[i]) != NULL) {
184
- return i;
185
- }
186
- }
187
- return len;
188
- }
189
- static int char_is_white(char ch) {
190
- /* ' ' space is 0x32
191
- '\t 0x09 \011 horizontal tab
192
- '\n' 0x0a \012 new line
193
- '\v' 0x0b \013 verical tab
194
- '\f' 0x0c \014 new page
195
- '\r' 0x0d \015 carriage return
196
- 0x00 \000 null (oracle)
197
- 0xa0 \240 is latin1
198
- */
199
- return strchr(" \t\n\v\f\r\240\000", ch) != NULL;
200
- }
201
-
202
- /* DANGER DANGER
203
- * This is -very specialized function-
204
- *
205
- * this compares a ALL_UPPER CASE C STRING
206
- * with a *arbitrary memory* + length
207
- *
208
- * Sane people would just make a copy, up-case
209
- * and use a hash table.
210
- *
211
- * Required since libc version uses the current locale
212
- * and is much slower.
213
- */
214
- static int cstrcasecmp(const char *a, const char *b, size_t n)
215
- {
216
- char cb;
217
-
218
- for (; n > 0; a++, b++, n--) {
219
- cb = *b;
220
- if (cb >= 'a' && cb <= 'z') {
221
- cb -= 0x20;
222
- }
223
- if (*a != cb) {
224
- return *a - cb;
225
- } else if (*a == '\0') {
226
- return -1;
227
- }
228
- }
229
-
230
- return (*a == 0) ? 0 : 1;
231
- }
232
-
233
- /**
234
- * Case sensitive string compare.
235
- * Here only to make code more readable
236
- */
237
- static int streq(const char *a, const char *b)
238
- {
239
- return strcmp(a, b) == 0;
240
- }
241
-
242
- /**
243
- *
244
- *
245
- *
246
- * Porting Notes:
247
- * given a mapping/hash of string to char
248
- * this is just
249
- * typecode = mapping[key.upper()]
250
- */
251
-
252
- static char bsearch_keyword_type(const char *key, size_t len,
253
- const keyword_t * keywords, size_t numb)
254
- {
255
- size_t pos;
256
- size_t left = 0;
257
- size_t right = numb - 1;
258
-
259
- while (left < right) {
260
- pos = (left + right) >> 1;
261
-
262
- /* arg0 = upper case only, arg1 = mixed case */
263
- if (cstrcasecmp(keywords[pos].word, key, len) < 0) {
264
- left = pos + 1;
265
- } else {
266
- right = pos;
267
- }
268
- }
269
- if ((left == right) && cstrcasecmp(keywords[left].word, key, len) == 0) {
270
- return keywords[left].type;
271
- } else {
272
- return CHAR_NULL;
273
- }
274
- }
275
-
276
- static char is_keyword(const char* key, size_t len)
277
- {
278
- return bsearch_keyword_type(key, len, sql_keywords, sql_keywords_sz);
279
- }
280
-
281
- /* st_token methods
282
- *
283
- * The following functions manipulates the stoken_t type
284
- *
285
- *
286
- */
287
-
288
- static void st_clear(stoken_t * st)
289
- {
290
- memset(st, 0, sizeof(stoken_t));
291
- }
292
-
293
- static void st_assign_char(stoken_t * st, const char stype, size_t pos, size_t len,
294
- const char value)
295
- {
296
- /* done to elimiate unused warning */
297
- (void)len;
298
- st->type = (char) stype;
299
- st->pos = pos;
300
- st->len = 1;
301
- st->val[0] = value;
302
- st->val[1] = CHAR_NULL;
303
- }
304
-
305
- static void st_assign(stoken_t * st, const char stype,
306
- size_t pos, size_t len, const char* value)
307
- {
308
- const size_t MSIZE = LIBINJECTION_SQLI_TOKEN_SIZE;
309
- size_t last = len < MSIZE ? len : (MSIZE - 1);
310
- st->type = (char) stype;
311
- st->pos = pos;
312
- st->len = last;
313
- memcpy(st->val, value, last);
314
- st->val[last] = CHAR_NULL;
315
- }
316
-
317
- static void st_copy(stoken_t * dest, const stoken_t * src)
318
- {
319
- memcpy(dest, src, sizeof(stoken_t));
320
- }
321
-
322
- static int st_is_arithmetic_op(const stoken_t* st)
323
- {
324
- const char ch = st->val[0];
325
- return (st->type == TYPE_OPERATOR && st->len == 1 &&
326
- (ch == '*' || ch == '/' || ch == '-' || ch == '+' || ch == '%'));
327
- }
328
-
329
- static int st_is_unary_op(const stoken_t * st)
330
- {
331
- const char* str = st->val;
332
- const size_t len = st->len;
333
-
334
- if (st->type != TYPE_OPERATOR) {
335
- return FALSE;
336
- }
337
-
338
- switch (len) {
339
- case 1:
340
- return *str == '+' || *str == '-' || *str == '!' || *str == '~';
341
- case 2:
342
- return str[0] == '!' && str[1] == '!';
343
- case 3:
344
- return cstrcasecmp("NOT", str, 3) == 0;
345
- default:
346
- return FALSE;
347
- }
348
- }
349
-
350
- /* Parsers
351
- *
352
- *
353
- */
354
-
355
- static size_t parse_white(struct libinjection_sqli_state * sf)
356
- {
357
- return sf->pos + 1;
358
- }
359
-
360
- static size_t parse_operator1(struct libinjection_sqli_state * sf)
361
- {
362
- const char *cs = sf->s;
363
- size_t pos = sf->pos;
364
-
365
- st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, cs[pos]);
366
- return pos + 1;
367
- }
368
-
369
- static size_t parse_other(struct libinjection_sqli_state * sf)
370
- {
371
- const char *cs = sf->s;
372
- size_t pos = sf->pos;
373
-
374
- st_assign_char(sf->current, TYPE_UNKNOWN, pos, 1, cs[pos]);
375
- return pos + 1;
376
- }
377
-
378
- static size_t parse_char(struct libinjection_sqli_state * sf)
379
- {
380
- const char *cs = sf->s;
381
- size_t pos = sf->pos;
382
-
383
- st_assign_char(sf->current, cs[pos], pos, 1, cs[pos]);
384
- return pos + 1;
385
- }
386
-
387
- static size_t parse_eol_comment(struct libinjection_sqli_state * sf)
388
- {
389
- const char *cs = sf->s;
390
- const size_t slen = sf->slen;
391
- size_t pos = sf->pos;
392
-
393
- const char *endpos =
394
- (const char *) memchr((const void *) (cs + pos), '\n', slen - pos);
395
- if (endpos == NULL) {
396
- st_assign(sf->current, TYPE_COMMENT, pos, slen - pos, cs + pos);
397
- return slen;
398
- } else {
399
- st_assign(sf->current, TYPE_COMMENT, pos, (size_t)(endpos - cs) - pos, cs + pos);
400
- return (size_t)((endpos - cs) + 1);
401
- }
402
- }
403
-
404
- /** In Ansi mode, hash is an operator
405
- * In MYSQL mode, it's a EOL comment like '--'
406
- */
407
- static size_t parse_hash(struct libinjection_sqli_state * sf)
408
- {
409
- sf->stats_comment_hash += 1;
410
- if (sf->flags & FLAG_SQL_MYSQL) {
411
- sf->stats_comment_hash += 1;
412
- return parse_eol_comment(sf);
413
- } else {
414
- st_assign_char(sf->current, TYPE_OPERATOR, sf->pos, 1, '#');
415
- return sf->pos + 1;
416
- }
417
- }
418
-
419
- static size_t parse_dash(struct libinjection_sqli_state * sf)
420
- {
421
- const char *cs = sf->s;
422
- const size_t slen = sf->slen;
423
- size_t pos = sf->pos;
424
-
425
- /*
426
- * five cases
427
- * 1) --[white] this is always a SQL comment
428
- * 2) --[EOF] this is a comment
429
- * 3) --[notwhite] in MySQL this is NOT a comment but two unary operators
430
- * 4) --[notwhite] everyone else thinks this is a comment
431
- * 5) -[not dash] '-' is a unary operator
432
- */
433
-
434
- if (pos + 2 < slen && cs[pos + 1] == '-' && char_is_white(cs[pos+2]) ) {
435
- return parse_eol_comment(sf);
436
- } else if (pos +2 == slen && cs[pos + 1] == '-') {
437
- return parse_eol_comment(sf);
438
- } else if (pos + 1 < slen && cs[pos + 1] == '-' && (sf->flags & FLAG_SQL_ANSI)) {
439
- /* --[not-white] not-white case:
440
- *
441
- */
442
- sf->stats_comment_ddx += 1;
443
- return parse_eol_comment(sf);
444
- } else {
445
- st_assign_char(sf->current, TYPE_OPERATOR, pos, 1, '-');
446
- return pos + 1;
447
- }
448
- }
449
-
450
-
451
- /** This detects MySQL comments, comments that
452
- * start with /x! We just ban these now but
453
- * previously we attempted to parse the inside
454
- *
455
- * For reference:
456
- * the form of /x![anything]x/ or /x!12345[anything] x/
457
- *
458
- * Mysql 3 (maybe 4), allowed this:
459
- * /x!0selectx/ 1;
460
- * where 0 could be any number.
461
- *
462
- * The last version of MySQL 3 was in 2003.
463
-
464
- * It is unclear if the MySQL 3 syntax was allowed
465
- * in MySQL 4. The last version of MySQL 4 was in 2008
466
- *
467
- */
468
- static size_t is_mysql_comment(const char *cs, const size_t len, size_t pos)
469
- {
470
- /* so far...
471
- * cs[pos] == '/' && cs[pos+1] == '*'
472
- */
473
-
474
- if (pos + 2 >= len) {
475
- /* not a mysql comment */
476
- return 0;
477
- }
478
-
479
- if (cs[pos + 2] != '!') {
480
- /* not a mysql comment */
481
- return 0;
482
- }
483
-
484
- /*
485
- * this is a mysql comment
486
- * got "/x!"
487
- */
488
- return 1;
489
- }
490
-
491
- static size_t parse_slash(struct libinjection_sqli_state * sf)
492
- {
493
- const char* ptr;
494
- size_t clen;
495
- const char *cs = sf->s;
496
- const size_t slen = sf->slen;
497
- size_t pos = sf->pos;
498
- const char* cur = cs + pos;
499
- char ctype = TYPE_COMMENT;
500
- size_t pos1 = pos + 1;
501
- if (pos1 == slen || cs[pos1] != '*') {
502
- return parse_operator1(sf);
503
- }
504
-
505
- /*
506
- * skip over initial '/x'
507
- */
508
- ptr = memchr2(cur + 2, slen - (pos + 2), '*', '/');
509
-
510
- /*
511
- * (ptr == NULL) causes false positive in cppcheck 1.61
512
- * casting to type seems to fix it
513
- */
514
- if (ptr == (const char*) NULL) {
515
- /* till end of line */
516
- clen = slen - pos;
517
- } else {
518
- clen = (size_t)(ptr + 2 - cur);
519
- }
520
-
521
- /*
522
- * postgresql allows nested comments which makes
523
- * this is incompatible with parsing so
524
- * if we find a '/x' inside the coment, then
525
- * make a new token.
526
- *
527
- * Also, Mysql's "conditional" comments for version
528
- * are an automatic black ban!
529
- */
530
-
531
- if (memchr2(cur + 2, (size_t)(ptr - (cur + 1)), '/', '*') != NULL) {
532
- ctype = TYPE_EVIL;
533
- } else if (is_mysql_comment(cs, slen, pos)) {
534
- ctype = TYPE_EVIL;
535
- }
536
-
537
- st_assign(sf->current, ctype, pos, clen, cs + pos);
538
- return pos + clen;
539
- }
540
-
541
-
542
- static size_t parse_backslash(struct libinjection_sqli_state * sf)
543
- {
544
- const char *cs = sf->s;
545
- const size_t slen = sf->slen;
546
- size_t pos = sf->pos;
547
-
548
- /*
549
- * Weird MySQL alias for NULL, "\N" (capital N only)
550
- */
551
- if (pos + 1 < slen && cs[pos +1] == 'N') {
552
- st_assign(sf->current, TYPE_NUMBER, pos, 2, cs + pos);
553
- return pos + 2;
554
- } else {
555
- st_assign_char(sf->current, TYPE_BACKSLASH, pos, 1, cs[pos]);
556
- return pos + 1;
557
- }
558
- }
559
-
560
- static size_t parse_operator2(struct libinjection_sqli_state * sf)
561
- {
562
- char ch;
563
- const char *cs = sf->s;
564
- const size_t slen = sf->slen;
565
- size_t pos = sf->pos;
566
-
567
- if (pos + 1 >= slen) {
568
- return parse_operator1(sf);
569
- }
570
-
571
- if (pos + 2 < slen &&
572
- cs[pos] == '<' &&
573
- cs[pos + 1] == '=' &&
574
- cs[pos + 2] == '>') {
575
- /*
576
- * special 3-char operator
577
- */
578
- st_assign(sf->current, TYPE_OPERATOR, pos, 3, cs + pos);
579
- return pos + 3;
580
- }
581
-
582
- ch = sf->lookup(sf, LOOKUP_OPERATOR, cs + pos, 2);
583
- if (ch != CHAR_NULL) {
584
- st_assign(sf->current, ch, pos, 2, cs+pos);
585
- return pos + 2;
586
- }
587
-
588
- /*
589
- * not an operator.. what to do with the two
590
- * characters we got?
591
- */
592
-
593
- if (cs[pos] == ':') {
594
- /* ':' is not an operator */
595
- st_assign(sf->current, TYPE_COLON, pos, 1, cs+pos);
596
- return pos + 1;
597
- } else {
598
- /*
599
- * must be a single char operator
600
- */
601
- return parse_operator1(sf);
602
- }
603
- }
604
-
605
- /*
606
- * Ok! " \" " one backslash = escaped!
607
- * " \\" " two backslash = not escaped!
608
- * "\\\" " three backslash = escaped!
609
- */
610
- static int is_backslash_escaped(const char* end, const char* start)
611
- {
612
- const char* ptr;
613
- for (ptr = end; ptr >= start; ptr--) {
614
- if (*ptr != '\\') {
615
- break;
616
- }
617
- }
618
- /* if number of backslashes is odd, it is escaped */
619
-
620
- return (end - ptr) & 1;
621
- }
622
-
623
- static size_t is_double_delim_escaped(const char* cur, const char* end)
624
- {
625
- return ((cur + 1) < end) && *(cur+1) == *cur;
626
- }
627
-
628
- /* Look forward for doubling of delimiter
629
- *
630
- * case 'foo''bar' --> foo''bar
631
- *
632
- * ending quote isn't duplicated (i.e. escaped)
633
- * since it's the wrong char or EOL
634
- *
635
- */
636
- static size_t parse_string_core(const char *cs, const size_t len, size_t pos,
637
- stoken_t * st, char delim, size_t offset)
638
- {
639
- /*
640
- * offset is to skip the perhaps first quote char
641
- */
642
- const char *qpos =
643
- (const char *) memchr((const void *) (cs + pos + offset), delim,
644
- len - pos - offset);
645
-
646
- /*
647
- * then keep string open/close info
648
- */
649
- if (offset > 0) {
650
- /*
651
- * this is real quote
652
- */
653
- st->str_open = delim;
654
- } else {
655
- /*
656
- * this was a simulated quote
657
- */
658
- st->str_open = CHAR_NULL;
659
- }
660
-
661
- while (TRUE) {
662
- if (qpos == NULL) {
663
- /*
664
- * string ended with no trailing quote
665
- * assign what we have
666
- */
667
- st_assign(st, TYPE_STRING, pos + offset, len - pos - offset, cs + pos + offset);
668
- st->str_close = CHAR_NULL;
669
- return len;
670
- } else if ( is_backslash_escaped(qpos - 1, cs + pos + offset)) {
671
- /* keep going, move ahead one character */
672
- qpos =
673
- (const char *) memchr((const void *) (qpos + 1), delim,
674
- (size_t)((cs + len) - (qpos + 1)));
675
- continue;
676
- } else if (is_double_delim_escaped(qpos, cs + len)) {
677
- /* keep going, move ahead two characters */
678
- qpos =
679
- (const char *) memchr((const void *) (qpos + 2), delim,
680
- (size_t)((cs + len) - (qpos + 2)));
681
- continue;
682
- } else {
683
- /* hey it's a normal string */
684
- st_assign(st, TYPE_STRING, pos + offset,
685
- (size_t)(qpos - (cs + pos + offset)), cs + pos + offset);
686
- st->str_close = delim;
687
- return (size_t)(qpos - cs + 1);
688
- }
689
- }
690
- }
691
-
692
- /**
693
- * Used when first char is a ' or "
694
- */
695
- static size_t parse_string(struct libinjection_sqli_state * sf)
696
- {
697
- const char *cs = sf->s;
698
- const size_t slen = sf->slen;
699
- size_t pos = sf->pos;
700
-
701
- /*
702
- * assert cs[pos] == single or double quote
703
- */
704
- return parse_string_core(cs, slen, pos, sf->current, cs[pos], 1);
705
- }
706
-
707
- /**
708
- * Used when first char is:
709
- * N or n: mysql "National Character set"
710
- * E : psql "Escaped String"
711
- */
712
- static size_t parse_estring(struct libinjection_sqli_state * sf)
713
- {
714
- const char *cs = sf->s;
715
- const size_t slen = sf->slen;
716
- size_t pos = sf->pos;
717
-
718
- if (pos + 2 >= slen || cs[pos+1] != CHAR_SINGLE) {
719
- return parse_word(sf);
720
- }
721
- return parse_string_core(cs, slen, pos, sf->current, CHAR_SINGLE, 2);
722
- }
723
-
724
- static size_t parse_ustring(struct libinjection_sqli_state * sf)
725
- {
726
- const char *cs = sf->s;
727
- size_t slen = sf->slen;
728
- size_t pos = sf->pos;
729
-
730
- if (pos + 2 < slen && cs[pos+1] == '&' && cs[pos+2] == '\'') {
731
- sf->pos += 2;
732
- pos = parse_string(sf);
733
- sf->current->str_open = 'u';
734
- if (sf->current->str_close == '\'') {
735
- sf->current->str_close = 'u';
736
- }
737
- return pos;
738
- } else {
739
- return parse_word(sf);
740
- }
741
- }
742
-
743
- static size_t parse_qstring_core(struct libinjection_sqli_state * sf, size_t offset)
744
- {
745
- char ch;
746
- const char *strend;
747
- const char *cs = sf->s;
748
- size_t slen = sf->slen;
749
- size_t pos = sf->pos + offset;
750
-
751
- /* if we are already at end of string..
752
- if current char is not q or Q
753
- if we don't have 2 more chars
754
- if char2 != a single quote
755
- then, just treat as word
756
- */
757
- if (pos >= slen ||
758
- (cs[pos] != 'q' && cs[pos] != 'Q') ||
759
- pos + 2 >= slen ||
760
- cs[pos + 1] != '\'') {
761
- return parse_word(sf);
762
- }
763
-
764
- ch = cs[pos + 2];
765
-
766
- /* the ch > 127 is un-needed since
767
- * we assume char is signed
768
- */
769
- if (ch < 33 /* || ch > 127 */) {
770
- return parse_word(sf);
771
- }
772
- switch (ch) {
773
- case '(' : ch = ')'; break;
774
- case '[' : ch = ']'; break;
775
- case '{' : ch = '}'; break;
776
- case '<' : ch = '>'; break;
777
- }
778
-
779
- strend = memchr2(cs + pos + 3, slen - pos - 3, ch, '\'');
780
- if (strend == NULL) {
781
- st_assign(sf->current, TYPE_STRING, pos + 3, slen - pos - 3, cs + pos + 3);
782
- sf->current->str_open = 'q';
783
- sf->current->str_close = CHAR_NULL;
784
- return slen;
785
- } else {
786
- st_assign(sf->current, TYPE_STRING, pos + 3, (size_t)(strend - cs) - pos - 3, cs + pos + 3);
787
- sf->current->str_open = 'q';
788
- sf->current->str_close = 'q';
789
- return (size_t)(strend - cs + 2);
790
- }
791
- }
792
-
793
- /*
794
- * Oracle's q string
795
- */
796
- static size_t parse_qstring(struct libinjection_sqli_state * sf)
797
- {
798
- return parse_qstring_core(sf, 0);
799
- }
800
-
801
- /*
802
- * mysql's N'STRING' or
803
- * ... Oracle's nq string
804
- */
805
- static size_t parse_nqstring(struct libinjection_sqli_state * sf)
806
- {
807
- size_t slen = sf->slen;
808
- size_t pos = sf->pos;
809
- if (pos + 2 < slen && sf->s[pos+1] == CHAR_SINGLE) {
810
- return parse_estring(sf);
811
- }
812
- return parse_qstring_core(sf, 1);
813
- }
814
-
815
- /*
816
- * binary literal string
817
- * re: [bB]'[01]*'
818
- */
819
- static size_t parse_bstring(struct libinjection_sqli_state *sf)
820
- {
821
- size_t wlen;
822
- const char *cs = sf->s;
823
- size_t pos = sf->pos;
824
- size_t slen = sf->slen;
825
-
826
- /* need at least 2 more characters
827
- * if next char isn't a single quote, then
828
- * continue as normal word
829
- */
830
- if (pos + 2 >= slen || cs[pos+1] != '\'') {
831
- return parse_word(sf);
832
- }
833
-
834
- wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "01");
835
- if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
836
- return parse_word(sf);
837
- }
838
- st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
839
- return pos + 2 + wlen + 1;
840
- }
841
-
842
- /*
843
- * hex literal string
844
- * re: [xX]'[0123456789abcdefABCDEF]*'
845
- * mysql has requirement of having EVEN number of chars,
846
- * but pgsql does not
847
- */
848
- static size_t parse_xstring(struct libinjection_sqli_state *sf)
849
- {
850
- size_t wlen;
851
- const char *cs = sf->s;
852
- size_t pos = sf->pos;
853
- size_t slen = sf->slen;
854
-
855
- /* need at least 2 more characters
856
- * if next char isn't a single quote, then
857
- * continue as normal word
858
- */
859
- if (pos + 2 >= slen || cs[pos+1] != '\'') {
860
- return parse_word(sf);
861
- }
862
-
863
- wlen = strlenspn(cs + pos + 2, sf->slen - pos - 2, "0123456789ABCDEFabcdef");
864
- if (pos + 2 + wlen >= slen || cs[pos + 2 + wlen] != '\'') {
865
- return parse_word(sf);
866
- }
867
- st_assign(sf->current, TYPE_NUMBER, pos, wlen + 3, cs + pos);
868
- return pos + 2 + wlen + 1;
869
- }
870
-
871
- /**
872
- * This handles MS SQLSERVER bracket words
873
- * http://stackoverflow.com/questions/3551284/sql-serverwhat-do-brackets-mean-around-column-name
874
- *
875
- */
876
- static size_t parse_bword(struct libinjection_sqli_state * sf)
877
- {
878
- const char *cs = sf->s;
879
- size_t pos = sf->pos;
880
- const char* endptr = (const char*) memchr(cs + pos, ']', sf->slen - pos);
881
- if (endptr == NULL) {
882
- st_assign(sf->current, TYPE_BAREWORD, pos, sf->slen - pos, cs + pos);
883
- return sf->slen;
884
- } else {
885
- st_assign(sf->current, TYPE_BAREWORD, pos, (size_t)(endptr - cs) - pos + 1, cs + pos);
886
- return (size_t)((endptr - cs) + 1);
887
- }
888
- }
889
-
890
- static size_t parse_word(struct libinjection_sqli_state * sf)
891
- {
892
- char ch;
893
- char delim;
894
- size_t i;
895
- const char *cs = sf->s;
896
- size_t pos = sf->pos;
897
- size_t wlen = strlencspn(cs + pos, sf->slen - pos,
898
- " []{}<>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r\"\240\000");
899
-
900
- st_assign(sf->current, TYPE_BAREWORD, pos, wlen, cs + pos);
901
-
902
- /* now we need to look inside what we good for "." and "`"
903
- * and see if what is before is a keyword or not
904
- */
905
- for (i =0; i < sf->current->len; ++i) {
906
- delim = sf->current->val[i];
907
- if (delim == '.' || delim == '`') {
908
- ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, i);
909
- if (ch != TYPE_NONE && ch != TYPE_BAREWORD) {
910
- /* needed for swig */
911
- st_clear(sf->current);
912
- /*
913
- * we got something like "SELECT.1"
914
- * or SELECT`column`
915
- */
916
- st_assign(sf->current, ch, pos, i, cs + pos);
917
- return pos + i;
918
- }
919
- }
920
- }
921
-
922
- /*
923
- * do normal lookup with word including '.'
924
- */
925
- if (wlen < LIBINJECTION_SQLI_TOKEN_SIZE) {
926
-
927
- ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, wlen);
928
- if (ch == CHAR_NULL) {
929
- ch = TYPE_BAREWORD;
930
- }
931
- sf->current->type = ch;
932
- }
933
- return pos + wlen;
934
- }
935
-
936
- /* MySQL backticks are a cross between string and
937
- * and a bare word.
938
- *
939
- */
940
- static size_t parse_tick(struct libinjection_sqli_state* sf)
941
- {
942
- size_t pos = parse_string_core(sf->s, sf->slen, sf->pos, sf->current, CHAR_TICK, 1);
943
-
944
- /* we could check to see if start and end of
945
- * of string are both "`", i.e. make sure we have
946
- * matching set. `foo` vs. `foo
947
- * but I don't think it matters much
948
- */
949
-
950
- /* check value of string to see if it's a keyword,
951
- * function, operator, etc
952
- */
953
- char ch = sf->lookup(sf, LOOKUP_WORD, sf->current->val, sf->current->len);
954
- if (ch == TYPE_FUNCTION) {
955
- /* if it's a function, then convert token */
956
- sf->current->type = TYPE_FUNCTION;
957
- } else {
958
- /* otherwise it's a 'n' type -- mysql treats
959
- * everything as a bare word
960
- */
961
- sf->current->type = TYPE_BAREWORD;
962
- }
963
- return pos;
964
- }
965
-
966
- static size_t parse_var(struct libinjection_sqli_state * sf)
967
- {
968
- size_t xlen;
969
- const char *cs = sf->s;
970
- const size_t slen = sf->slen;
971
- size_t pos = sf->pos + 1;
972
-
973
- /*
974
- * var_count is only used to reconstruct
975
- * the input. It counts the number of '@'
976
- * seen 0 in the case of NULL, 1 or 2
977
- */
978
-
979
- /*
980
- * move past optional other '@'
981
- */
982
- if (pos < slen && cs[pos] == '@') {
983
- pos += 1;
984
- sf->current->count = 2;
985
- } else {
986
- sf->current->count = 1;
987
- }
988
-
989
- /*
990
- * MySQL allows @@`version`
991
- */
992
- if (pos < slen) {
993
- if (cs[pos] == '`') {
994
- sf->pos = pos;
995
- pos = parse_tick(sf);
996
- sf->current->type = TYPE_VARIABLE;
997
- return pos;
998
- } else if (cs[pos] == CHAR_SINGLE || cs[pos] == CHAR_DOUBLE) {
999
- sf->pos = pos;
1000
- pos = parse_string(sf);
1001
- sf->current->type = TYPE_VARIABLE;
1002
- return pos;
1003
- }
1004
- }
1005
-
1006
-
1007
- xlen = strlencspn(cs + pos, slen - pos,
1008
- " <>:\\?=@!#~+-*/&|^%(),';\t\n\v\f\r'`\"");
1009
- if (xlen == 0) {
1010
- st_assign(sf->current, TYPE_VARIABLE, pos, 0, cs + pos);
1011
- return pos;
1012
- } else {
1013
- st_assign(sf->current, TYPE_VARIABLE, pos, xlen, cs + pos);
1014
- return pos + xlen;
1015
- }
1016
- }
1017
-
1018
- static size_t parse_money(struct libinjection_sqli_state *sf)
1019
- {
1020
- size_t xlen;
1021
- const char* strend;
1022
- const char *cs = sf->s;
1023
- const size_t slen = sf->slen;
1024
- size_t pos = sf->pos;
1025
-
1026
- if (pos + 1 == slen) {
1027
- /* end of line */
1028
- st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1029
- return slen;
1030
- }
1031
-
1032
- /*
1033
- * $1,000.00 or $1.000,00 ok!
1034
- * This also parses $....,,,111 but that's ok
1035
- */
1036
-
1037
- xlen = strlenspn(cs + pos + 1, slen - pos - 1, "0123456789.,");
1038
- if (xlen == 0) {
1039
- if (cs[pos + 1] == '$') {
1040
- /* we have $$ .. find ending $$ and make string */
1041
- strend = memchr2(cs + pos + 2, slen - pos -2, '$', '$');
1042
- if (strend == NULL) {
1043
- /* fell off edge */
1044
- st_assign(sf->current, TYPE_STRING, pos + 2, slen - (pos + 2), cs + pos + 2);
1045
- sf->current->str_open = '$';
1046
- sf->current->str_close = CHAR_NULL;
1047
- return slen;
1048
- } else {
1049
- st_assign(sf->current, TYPE_STRING, pos + 2,
1050
- (size_t)(strend - (cs + pos + 2)), cs + pos + 2);
1051
- sf->current->str_open = '$';
1052
- sf->current->str_close = '$';
1053
- return (size_t)(strend - cs + 2);
1054
- }
1055
- } else {
1056
- /* ok it's not a number or '$$', but maybe it's pgsql "$ quoted strings" */
1057
- xlen = strlenspn(cs + pos + 1, slen - pos - 1, "abcdefghjiklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
1058
- if (xlen == 0) {
1059
- /* hmm it's "$" _something_ .. just add $ and keep going*/
1060
- st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1061
- return pos + 1;
1062
- }
1063
- /* we have $foobar????? */
1064
- /* is it $foobar$ */
1065
- if (pos + xlen + 1 == slen || cs[pos+xlen+1] != '$') {
1066
- /* not $foobar$, or fell off edge */
1067
- st_assign_char(sf->current, TYPE_BAREWORD, pos, 1, '$');
1068
- return pos + 1;
1069
- }
1070
-
1071
- /* we have $foobar$ ... find it again */
1072
- strend = my_memmem(cs+xlen+2, slen - (pos+xlen+2), cs + pos, xlen+2);
1073
-
1074
- if (strend == NULL) {
1075
- /* fell off edge */
1076
- st_assign(sf->current, TYPE_STRING, pos+xlen+2, slen - pos - xlen - 2, cs+pos+xlen+2);
1077
- sf->current->str_open = '$';
1078
- sf->current->str_close = CHAR_NULL;
1079
- return slen;
1080
- } else {
1081
- /* got one */
1082
- st_assign(sf->current, TYPE_STRING, pos+xlen+2,
1083
- (size_t)(strend - (cs + pos + xlen + 2)), cs+pos+xlen+2);
1084
- sf->current->str_open = '$';
1085
- sf->current->str_close = '$';
1086
- return (size_t)((strend + xlen + 2) - cs);
1087
- }
1088
- }
1089
- } else if (xlen == 1 && cs[pos + 1] == '.') {
1090
- /* $. should parsed as a word */
1091
- return parse_word(sf);
1092
- } else {
1093
- st_assign(sf->current, TYPE_NUMBER, pos, 1 + xlen, cs + pos);
1094
- return pos + 1 + xlen;
1095
- }
1096
- }
1097
-
1098
- static size_t parse_number(struct libinjection_sqli_state * sf)
1099
- {
1100
- size_t xlen;
1101
- size_t start;
1102
- const char* digits = NULL;
1103
- const char *cs = sf->s;
1104
- const size_t slen = sf->slen;
1105
- size_t pos = sf->pos;
1106
- int have_e = 0;
1107
- int have_exp = 0;
1108
-
1109
- /* cs[pos] == '0' has 1/10 chance of being true,
1110
- * while pos+1< slen is almost always true
1111
- */
1112
- if (cs[pos] == '0' && pos + 1 < slen) {
1113
- if (cs[pos + 1] == 'X' || cs[pos + 1] == 'x') {
1114
- digits = "0123456789ABCDEFabcdef";
1115
- } else if (cs[pos + 1] == 'B' || cs[pos + 1] == 'b') {
1116
- digits = "01";
1117
- }
1118
-
1119
- if (digits) {
1120
- xlen = strlenspn(cs + pos + 2, slen - pos - 2, digits);
1121
- if (xlen == 0) {
1122
- st_assign(sf->current, TYPE_BAREWORD, pos, 2, cs + pos);
1123
- return pos + 2;
1124
- } else {
1125
- st_assign(sf->current, TYPE_NUMBER, pos, 2 + xlen, cs + pos);
1126
- return pos + 2 + xlen;
1127
- }
1128
- }
1129
- }
1130
-
1131
- start = pos;
1132
- while (pos < slen && ISDIGIT(cs[pos])) {
1133
- pos += 1;
1134
- }
1135
-
1136
- if (pos < slen && cs[pos] == '.') {
1137
- pos += 1;
1138
- while (pos < slen && ISDIGIT(cs[pos])) {
1139
- pos += 1;
1140
- }
1141
- if (pos - start == 1) {
1142
- /* only one character read so far */
1143
- st_assign_char(sf->current, TYPE_DOT, start, 1, '.');
1144
- return pos;
1145
- }
1146
- }
1147
-
1148
- if (pos < slen) {
1149
- if (cs[pos] == 'E' || cs[pos] == 'e') {
1150
- have_e = 1;
1151
- pos += 1;
1152
- if (pos < slen && (cs[pos] == '+' || cs[pos] == '-')) {
1153
- pos += 1;
1154
- }
1155
- while (pos < slen && ISDIGIT(cs[pos])) {
1156
- have_exp = 1;
1157
- pos += 1;
1158
- }
1159
- }
1160
- }
1161
-
1162
- /* oracle's ending float or double suffix
1163
- * http://docs.oracle.com/cd/B19306_01/server.102/b14200/sql_elements003.htm#i139891
1164
- */
1165
- if (pos < slen && (cs[pos] == 'd' || cs[pos] == 'D' || cs[pos] == 'f' || cs[pos] == 'F')) {
1166
- if (pos + 1 == slen) {
1167
- /* line ends evaluate "... 1.2f$" as '1.2f' */
1168
- pos += 1;
1169
- } else if ((char_is_white(cs[pos+1]) || cs[pos+1] == ';')) {
1170
- /*
1171
- * easy case, evaluate "... 1.2f ... as '1.2f'
1172
- */
1173
- pos += 1;
1174
- } else if (cs[pos+1] == 'u' || cs[pos+1] == 'U') {
1175
- /*
1176
- * a bit of a hack but makes '1fUNION' parse as '1f UNION'
1177
- */
1178
- pos += 1;
1179
- } else {
1180
- /* it's like "123FROM" */
1181
- /* parse as "123" only */
1182
- }
1183
- }
1184
-
1185
- if (have_e == 1 && have_exp == 0) {
1186
- /* very special form of
1187
- * "1234.e"
1188
- * "10.10E"
1189
- * ".E"
1190
- * this is a WORD not a number!! */
1191
- st_assign(sf->current, TYPE_BAREWORD, start, pos - start, cs + start);
1192
- } else {
1193
- st_assign(sf->current, TYPE_NUMBER, start, pos - start, cs + start);
1194
- }
1195
- return pos;
1196
- }
1197
-
1198
- /*
1199
- * API to return version. This allows us to increment the version
1200
- * without having to regenerated the SWIG (or other binding) in minor
1201
- * releases.
1202
- */
1203
- const char* libinjection_version()
1204
- {
1205
- return LIBINJECTION_VERSION;
1206
- }
1207
-
1208
- int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf)
1209
- {
1210
- pt2Function fnptr;
1211
- size_t *pos = &sf->pos;
1212
- stoken_t *current = sf->current;
1213
- const char *s = sf->s;
1214
- const size_t slen = sf->slen;
1215
-
1216
- if (slen == 0) {
1217
- return FALSE;
1218
- }
1219
-
1220
- st_clear(current);
1221
- sf->current = current;
1222
-
1223
- /*
1224
- * if we are at beginning of string
1225
- * and in single-quote or double quote mode
1226
- * then pretend the input starts with a quote
1227
- */
1228
- if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) {
1229
- *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0);
1230
- sf->stats_tokens += 1;
1231
- return TRUE;
1232
- }
1233
-
1234
- while (*pos < slen) {
1235
-
1236
- /*
1237
- * get current character
1238
- */
1239
- const unsigned char ch = (unsigned char) (s[*pos]);
1240
-
1241
- /*
1242
- * look up the parser, and call it
1243
- *
1244
- * Porting Note: this is mapping of char to function
1245
- * charparsers[ch]()
1246
- */
1247
- fnptr = char_parse_map[ch];
1248
-
1249
- *pos = (*fnptr) (sf);
1250
-
1251
- /*
1252
- *
1253
- */
1254
- if (current->type != CHAR_NULL) {
1255
- sf->stats_tokens += 1;
1256
- return TRUE;
1257
- }
1258
- }
1259
- return FALSE;
1260
- }
1261
-
1262
- void libinjection_sqli_init(struct libinjection_sqli_state * sf, const char *s, size_t len, int flags)
1263
- {
1264
- if (flags == 0) {
1265
- flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1266
- }
1267
-
1268
- memset(sf, 0, sizeof(struct libinjection_sqli_state));
1269
- sf->s = s;
1270
- sf->slen = len;
1271
- sf->lookup = libinjection_sqli_lookup_word;
1272
- sf->userdata = 0;
1273
- sf->flags = flags;
1274
- sf->current = &(sf->tokenvec[0]);
1275
- }
1276
-
1277
- void libinjection_sqli_reset(struct libinjection_sqli_state * sf, int flags)
1278
- {
1279
- void *userdata = sf->userdata;
1280
- ptr_lookup_fn lookup = sf->lookup;;
1281
-
1282
- if (flags == 0) {
1283
- flags = FLAG_QUOTE_NONE | FLAG_SQL_ANSI;
1284
- }
1285
- libinjection_sqli_init(sf, sf->s, sf->slen, flags);
1286
- sf->lookup = lookup;
1287
- sf->userdata = userdata;
1288
- }
1289
-
1290
- void libinjection_sqli_callback(struct libinjection_sqli_state * sf, ptr_lookup_fn fn, void* userdata)
1291
- {
1292
- if (fn == NULL) {
1293
- sf->lookup = libinjection_sqli_lookup_word;
1294
- sf->userdata = (void*)(NULL);
1295
- } else {
1296
- sf->lookup = fn;
1297
- sf->userdata = userdata;
1298
- }
1299
- }
1300
-
1301
- /** See if two tokens can be merged since they are compound SQL phrases.
1302
- *
1303
- * This takes two tokens, and, if they are the right type,
1304
- * merges their values together. Then checks to see if the
1305
- * new value is special using the PHRASES mapping.
1306
- *
1307
- * Example: "UNION" + "ALL" ==> "UNION ALL"
1308
- *
1309
- * C Security Notes: this is safe to use C-strings (null-terminated)
1310
- * since the types involved by definition do not have embedded nulls
1311
- * (e.g. there is no keyword with embedded null)
1312
- *
1313
- * Porting Notes: since this is C, it's oddly complicated.
1314
- * This is just: multikeywords[token.value + ' ' + token2.value]
1315
- *
1316
- */
1317
- static int syntax_merge_words(struct libinjection_sqli_state * sf,stoken_t * a, stoken_t * b)
1318
- {
1319
- size_t sz1;
1320
- size_t sz2;
1321
- size_t sz3;
1322
- char tmp[LIBINJECTION_SQLI_TOKEN_SIZE];
1323
- char ch;
1324
-
1325
- /* first token is of right type? */
1326
- if (!
1327
- (a->type == TYPE_KEYWORD ||
1328
- a->type == TYPE_BAREWORD ||
1329
- a->type == TYPE_OPERATOR ||
1330
- a->type == TYPE_UNION ||
1331
- a->type == TYPE_FUNCTION ||
1332
- a->type == TYPE_EXPRESSION ||
1333
- a->type == TYPE_SQLTYPE)) {
1334
- return CHAR_NULL;
1335
- }
1336
-
1337
- if (b->type != TYPE_KEYWORD && b->type != TYPE_BAREWORD &&
1338
- b->type != TYPE_OPERATOR && b->type != TYPE_SQLTYPE &&
1339
- b->type != TYPE_LOGIC_OPERATOR &&
1340
- b->type != TYPE_FUNCTION &&
1341
- b->type != TYPE_UNION && b->type != TYPE_EXPRESSION) {
1342
- return CHAR_NULL;
1343
- }
1344
-
1345
- sz1 = a->len;
1346
- sz2 = b->len;
1347
- sz3 = sz1 + sz2 + 1; /* +1 for space in the middle */
1348
- if (sz3 >= LIBINJECTION_SQLI_TOKEN_SIZE) { /* make sure there is room for ending null */
1349
- return FALSE;
1350
- }
1351
- /*
1352
- * oddly annoying last.val + ' ' + current.val
1353
- */
1354
- memcpy(tmp, a->val, sz1);
1355
- tmp[sz1] = ' ';
1356
- memcpy(tmp + sz1 + 1, b->val, sz2);
1357
- tmp[sz3] = CHAR_NULL;
1358
-
1359
- ch = sf->lookup(sf, LOOKUP_WORD, tmp, sz3);
1360
-
1361
- if (ch != CHAR_NULL) {
1362
- st_assign(a, ch, a->pos, sz3, tmp);
1363
- return TRUE;
1364
- } else {
1365
- return FALSE;
1366
- }
1367
- }
1368
-
1369
- int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
1370
- {
1371
- stoken_t last_comment;
1372
-
1373
- /* POS is the position of where the NEXT token goes */
1374
- size_t pos = 0;
1375
-
1376
- /* LEFT is a count of how many tokens that are already
1377
- folded or processed (i.e. part of the fingerprint) */
1378
- size_t left = 0;
1379
-
1380
- int more = 1;
1381
-
1382
- st_clear(&last_comment);
1383
-
1384
- /* Skip all initial comments, right-parens ( and unary operators
1385
- *
1386
- */
1387
- sf->current = &(sf->tokenvec[0]);
1388
- while (more) {
1389
- more = libinjection_sqli_tokenize(sf);
1390
- if ( ! (sf->current->type == TYPE_COMMENT ||
1391
- sf->current->type == TYPE_LEFTPARENS ||
1392
- sf->current->type == TYPE_SQLTYPE ||
1393
- st_is_unary_op(sf->current))) {
1394
- break;
1395
- }
1396
- }
1397
-
1398
- if (! more) {
1399
- /* If input was only comments, unary or (, then exit */
1400
- return 0;
1401
- } else {
1402
- /* it's some other token */
1403
- pos += 1;
1404
- }
1405
-
1406
- while (1) {
1407
- FOLD_DEBUG;
1408
-
1409
- /* do we have all the max number of tokens? if so do
1410
- * some special cases for 5 tokens
1411
- */
1412
- if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {
1413
- if (
1414
- (
1415
- sf->tokenvec[0].type == TYPE_NUMBER &&
1416
- (sf->tokenvec[1].type == TYPE_OPERATOR || sf->tokenvec[1].type == TYPE_COMMA) &&
1417
- sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1418
- sf->tokenvec[3].type == TYPE_NUMBER &&
1419
- sf->tokenvec[4].type == TYPE_RIGHTPARENS
1420
- ) ||
1421
- (
1422
- sf->tokenvec[0].type == TYPE_BAREWORD &&
1423
- sf->tokenvec[1].type == TYPE_OPERATOR &&
1424
- sf->tokenvec[2].type == TYPE_LEFTPARENS &&
1425
- (sf->tokenvec[3].type == TYPE_BAREWORD || sf->tokenvec[3].type == TYPE_NUMBER) &&
1426
- sf->tokenvec[4].type == TYPE_RIGHTPARENS
1427
- ) ||
1428
- (
1429
- sf->tokenvec[0].type == TYPE_NUMBER &&
1430
- sf->tokenvec[1].type == TYPE_RIGHTPARENS &&
1431
- sf->tokenvec[2].type == TYPE_COMMA &&
1432
- sf->tokenvec[3].type == TYPE_LEFTPARENS &&
1433
- sf->tokenvec[4].type == TYPE_NUMBER
1434
- )
1435
- )
1436
- {
1437
- if (pos > LIBINJECTION_SQLI_MAX_TOKENS) {
1438
- st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS]));
1439
- pos = 2;
1440
- left = 0;
1441
- } else {
1442
- pos = 1;
1443
- left = 0;
1444
- }
1445
- }
1446
- }
1447
-
1448
- if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) {
1449
- left = pos;
1450
- break;
1451
- }
1452
-
1453
- /* get up to two tokens */
1454
- while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) {
1455
- sf->current = &(sf->tokenvec[pos]);
1456
- more = libinjection_sqli_tokenize(sf);
1457
- if (more) {
1458
- if (sf->current->type == TYPE_COMMENT) {
1459
- st_copy(&last_comment, sf->current);
1460
- } else {
1461
- last_comment.type = CHAR_NULL;
1462
- pos += 1;
1463
- }
1464
- }
1465
- }
1466
- FOLD_DEBUG;
1467
- /* did we get 2 tokens? if not then we are done */
1468
- if (pos - left < 2) {
1469
- left = pos;
1470
- continue;
1471
- }
1472
-
1473
- /* FOLD: "ss" -> "s"
1474
- * "foo" "bar" is valid SQL
1475
- * just ignore second string
1476
- */
1477
- if (sf->tokenvec[left].type == TYPE_STRING && sf->tokenvec[left+1].type == TYPE_STRING) {
1478
- pos -= 1;
1479
- sf->stats_folds += 1;
1480
- continue;
1481
- } else if (sf->tokenvec[left].type == TYPE_SEMICOLON && sf->tokenvec[left+1].type == TYPE_SEMICOLON) {
1482
- /* not sure how various engines handle
1483
- * 'select 1;;drop table foo' or
1484
- * 'select 1; /x foo x/; drop table foo'
1485
- * to prevent surprises, just fold away repeated semicolons
1486
- */
1487
- pos -= 1;
1488
- sf->stats_folds += 1;
1489
- continue;
1490
- } else if (sf->tokenvec[left].type == TYPE_SEMICOLON &&
1491
- sf->tokenvec[left+1].type == TYPE_FUNCTION &&
1492
- cstrcasecmp("IF", sf->tokenvec[left+1].val, sf->tokenvec[left+1].len) == 0) {
1493
- /* IF is normally a function, except in Transact-SQL where it can be used as a
1494
- * standalone control flow operator, e.g. ; IF 1=1 ...
1495
- * if found after a semicolon, convert from 'f' type to 'T' type
1496
- */
1497
- sf->tokenvec[left+1].type = TYPE_TSQL;
1498
- left += 2;
1499
- continue; /* reparse everything, but we probably can advance left, and pos */
1500
- } else if ((sf->tokenvec[left].type == TYPE_OPERATOR ||
1501
- sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR) &&
1502
- (st_is_unary_op(&sf->tokenvec[left+1]) ||
1503
- sf->tokenvec[left+1].type == TYPE_SQLTYPE)) {
1504
- pos -= 1;
1505
- sf->stats_folds += 1;
1506
- left = 0;
1507
- continue;
1508
- } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1509
- st_is_unary_op(&sf->tokenvec[left+1])) {
1510
- pos -= 1;
1511
- sf->stats_folds += 1;
1512
- if (left > 0) {
1513
- left -= 1;
1514
- }
1515
- continue;
1516
- } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) {
1517
- pos -= 1;
1518
- sf->stats_folds += 1;
1519
- if (left > 0) {
1520
- left -= 1;
1521
- }
1522
- continue;
1523
- } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) &&
1524
- sf->tokenvec[left+1].type == TYPE_LEFTPARENS && (
1525
- /* TSQL functions but common enough to be collumn names */
1526
- cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1527
- cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1528
-
1529
- /* Function in MYSQL */
1530
- cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1531
- cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1532
- cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1533
-
1534
- /* Mysql words that act as a variable and are a function */
1535
-
1536
- /* TSQL current_users is fake-variable */
1537
- /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */
1538
- cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1539
- cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1540
- cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1541
- cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1542
- cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1543
- cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1544
- )) {
1545
-
1546
- /* pos is the same
1547
- * other conversions need to go here... for instance
1548
- * password CAN be a function, coalese CAN be a function
1549
- */
1550
- sf->tokenvec[left].type = TYPE_FUNCTION;
1551
- continue;
1552
- } else if (sf->tokenvec[left].type == TYPE_KEYWORD && (
1553
- cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1554
- cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0
1555
- )) {
1556
-
1557
- if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1558
- /* got .... IN ( ... (or 'NOT IN')
1559
- * it's an operator
1560
- */
1561
- sf->tokenvec[left].type = TYPE_OPERATOR;
1562
- } else {
1563
- /*
1564
- * it's a nothing
1565
- */
1566
- sf->tokenvec[left].type = TYPE_BAREWORD;
1567
- }
1568
-
1569
- /* "IN" can be used as "IN BOOLEAN MODE" for mysql
1570
- * in which case merging of words can be done later
1571
- * other wise it acts as an equality operator __ IN (values..)
1572
- *
1573
- * here we got "IN" "(" so it's an operator.
1574
- * also back track to handle "NOT IN"
1575
- * might need to do the same with like
1576
- * two use cases "foo" LIKE "BAR" (normal operator)
1577
- * "foo" = LIKE(1,2)
1578
- */
1579
- continue;
1580
- } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && (
1581
- cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 ||
1582
- cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) {
1583
- if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1584
- /* SELECT LIKE(...
1585
- * it's a function
1586
- */
1587
- sf->tokenvec[left].type = TYPE_FUNCTION;
1588
- }
1589
- } else if (sf->tokenvec[left].type == TYPE_SQLTYPE &&
1590
- (sf->tokenvec[left+1].type == TYPE_BAREWORD ||
1591
- sf->tokenvec[left+1].type == TYPE_NUMBER ||
1592
- sf->tokenvec[left+1].type == TYPE_SQLTYPE ||
1593
- sf->tokenvec[left+1].type == TYPE_LEFTPARENS ||
1594
- sf->tokenvec[left+1].type == TYPE_FUNCTION ||
1595
- sf->tokenvec[left+1].type == TYPE_VARIABLE ||
1596
- sf->tokenvec[left+1].type == TYPE_STRING)) {
1597
- st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1598
- pos -= 1;
1599
- sf->stats_folds += 1;
1600
- left = 0;
1601
- continue;
1602
- } else if (sf->tokenvec[left].type == TYPE_COLLATE &&
1603
- sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1604
- /*
1605
- * there are too many collation types.. so if the bareword has a "_"
1606
- * then it's TYPE_SQLTYPE
1607
- */
1608
- if (strchr(sf->tokenvec[left+1].val, '_') != NULL) {
1609
- sf->tokenvec[left+1].type = TYPE_SQLTYPE;
1610
- left = 0;
1611
- }
1612
- } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) {
1613
- if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) {
1614
- /* very weird case in TSQL where '\%1' is parsed as '0 % 1', etc */
1615
- sf->tokenvec[left].type = TYPE_NUMBER;
1616
- } else {
1617
- /* just ignore it.. Again T-SQL seems to parse \1 as "1" */
1618
- st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]);
1619
- pos -= 1;
1620
- sf->stats_folds += 1;
1621
- }
1622
- left = 0;
1623
- continue;
1624
- } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS &&
1625
- sf->tokenvec[left+1].type == TYPE_LEFTPARENS) {
1626
- pos -= 1;
1627
- left = 0;
1628
- sf->stats_folds += 1;
1629
- continue;
1630
- } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS &&
1631
- sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) {
1632
- pos -= 1;
1633
- left = 0;
1634
- sf->stats_folds += 1;
1635
- continue;
1636
- } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE &&
1637
- sf->tokenvec[left+1].type == TYPE_BAREWORD) {
1638
-
1639
- /*
1640
- * MySQL Degenerate case --
1641
- *
1642
- * select { ``.``.id }; -- valid !!!
1643
- * select { ``.``.``.id }; -- invalid
1644
- * select ``.``.id; -- invalid
1645
- * select { ``.id }; -- invalid
1646
- *
1647
- * so it appears {``.``.id} is a magic case
1648
- * I suspect this is "current database, current table, field id"
1649
- *
1650
- * The folding code can't look at more than 3 tokens, and
1651
- * I don't want to make two passes.
1652
- *
1653
- * Since "{ ``" so rare, we are just going to blacklist it.
1654
- *
1655
- * Highly likely this will need revisiting!
1656
- *
1657
- * CREDIT @rsalgado 2013-11-25
1658
- */
1659
- if (sf->tokenvec[left+1].len == 0) {
1660
- sf->tokenvec[left+1].type = TYPE_EVIL;
1661
- return (int)(left+2);
1662
- }
1663
- /* weird ODBC / MYSQL {foo expr} --> expr
1664
- * but for this rule we just strip away the "{ foo" part
1665
- */
1666
- left = 0;
1667
- pos -= 2;
1668
- sf->stats_folds += 2;
1669
- continue;
1670
- } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) {
1671
- pos -= 1;
1672
- left = 0;
1673
- sf->stats_folds += 1;
1674
- continue;
1675
- }
1676
-
1677
- /* all cases of handing 2 tokens is done
1678
- and nothing matched. Get one more token
1679
- */
1680
- FOLD_DEBUG;
1681
- while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) {
1682
- sf->current = &(sf->tokenvec[pos]);
1683
- more = libinjection_sqli_tokenize(sf);
1684
- if (more) {
1685
- if (sf->current->type == TYPE_COMMENT) {
1686
- st_copy(&last_comment, sf->current);
1687
- } else {
1688
- last_comment.type = CHAR_NULL;
1689
- pos += 1;
1690
- }
1691
- }
1692
- }
1693
-
1694
- /* do we have three tokens? If not then we are done */
1695
- if (pos -left < 3) {
1696
- left = pos;
1697
- continue;
1698
- }
1699
-
1700
- /*
1701
- * now look for three token folding
1702
- */
1703
- if (sf->tokenvec[left].type == TYPE_NUMBER &&
1704
- sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1705
- sf->tokenvec[left+2].type == TYPE_NUMBER) {
1706
- pos -= 2;
1707
- left = 0;
1708
- continue;
1709
- } else if (sf->tokenvec[left].type == TYPE_OPERATOR &&
1710
- sf->tokenvec[left+1].type != TYPE_LEFTPARENS &&
1711
- sf->tokenvec[left+2].type == TYPE_OPERATOR) {
1712
- left = 0;
1713
- pos -= 2;
1714
- continue;
1715
- } else if (sf->tokenvec[left].type == TYPE_LOGIC_OPERATOR &&
1716
- sf->tokenvec[left+2].type == TYPE_LOGIC_OPERATOR) {
1717
- pos -= 2;
1718
- left = 0;
1719
- continue;
1720
- } else if (sf->tokenvec[left].type == TYPE_VARIABLE &&
1721
- sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1722
- (sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1723
- sf->tokenvec[left+2].type == TYPE_NUMBER ||
1724
- sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1725
- pos -= 2;
1726
- left = 0;
1727
- continue;
1728
- } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1729
- sf->tokenvec[left].type == TYPE_NUMBER ) &&
1730
- sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1731
- (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1732
- sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1733
- pos -= 2;
1734
- left = 0;
1735
- continue;
1736
- } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1737
- sf->tokenvec[left].type == TYPE_NUMBER ||
1738
- sf->tokenvec[left].type == TYPE_VARIABLE ||
1739
- sf->tokenvec[left].type == TYPE_STRING) &&
1740
- sf->tokenvec[left+1].type == TYPE_OPERATOR &&
1741
- streq(sf->tokenvec[left+1].val, "::") &&
1742
- sf->tokenvec[left+2].type == TYPE_SQLTYPE) {
1743
- pos -= 2;
1744
- left = 0;
1745
- sf->stats_folds += 2;
1746
- continue;
1747
- } else if ((sf->tokenvec[left].type == TYPE_BAREWORD ||
1748
- sf->tokenvec[left].type == TYPE_NUMBER ||
1749
- sf->tokenvec[left].type == TYPE_STRING ||
1750
- sf->tokenvec[left].type == TYPE_VARIABLE) &&
1751
- sf->tokenvec[left+1].type == TYPE_COMMA &&
1752
- (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1753
- sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1754
- sf->tokenvec[left+2].type == TYPE_STRING ||
1755
- sf->tokenvec[left+2].type == TYPE_VARIABLE)) {
1756
- pos -= 2;
1757
- left = 0;
1758
- continue;
1759
- } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION ||
1760
- sf->tokenvec[left].type == TYPE_GROUP ||
1761
- sf->tokenvec[left].type == TYPE_COMMA) &&
1762
- st_is_unary_op(&sf->tokenvec[left+1]) &&
1763
- sf->tokenvec[left+2].type == TYPE_LEFTPARENS) {
1764
- /* got something like SELECT + (, LIMIT + (
1765
- * remove unary operator
1766
- */
1767
- st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1768
- pos -= 1;
1769
- left = 0;
1770
- continue;
1771
- } else if ((sf->tokenvec[left].type == TYPE_KEYWORD ||
1772
- sf->tokenvec[left].type == TYPE_EXPRESSION ||
1773
- sf->tokenvec[left].type == TYPE_GROUP ) &&
1774
- st_is_unary_op(&sf->tokenvec[left+1]) &&
1775
- (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1776
- sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1777
- sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1778
- sf->tokenvec[left+2].type == TYPE_STRING ||
1779
- sf->tokenvec[left+2].type == TYPE_FUNCTION )) {
1780
- /* remove unary operators
1781
- * select - 1
1782
- */
1783
- st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1784
- pos -= 1;
1785
- left = 0;
1786
- continue;
1787
- } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1788
- st_is_unary_op(&sf->tokenvec[left+1]) &&
1789
- (sf->tokenvec[left+2].type == TYPE_NUMBER ||
1790
- sf->tokenvec[left+2].type == TYPE_BAREWORD ||
1791
- sf->tokenvec[left+2].type == TYPE_VARIABLE ||
1792
- sf->tokenvec[left+2].type == TYPE_STRING)) {
1793
- /*
1794
- * interesting case turn ", -1" ->> ",1" PLUS we need to back up
1795
- * one token if possible to see if more folding can be done
1796
- * "1,-1" --> "1"
1797
- */
1798
- st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1799
- left = 0;
1800
- /* pos is >= 3 so this is safe */
1801
- assert(pos >= 3);
1802
- pos -= 3;
1803
- continue;
1804
- } else if (sf->tokenvec[left].type == TYPE_COMMA &&
1805
- st_is_unary_op(&sf->tokenvec[left+1]) &&
1806
- sf->tokenvec[left+2].type == TYPE_FUNCTION) {
1807
-
1808
- /* Separate case from above since you end up with
1809
- * 1,-sin(1) --> 1 (1)
1810
- * Here, just do
1811
- * 1,-sin(1) --> 1,sin(1)
1812
- * just remove unary opartor
1813
- */
1814
- st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1815
- pos -= 1;
1816
- left = 0;
1817
- continue;
1818
- } else if ((sf->tokenvec[left].type == TYPE_BAREWORD) &&
1819
- (sf->tokenvec[left+1].type == TYPE_DOT) &&
1820
- (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1821
- /* ignore the '.n'
1822
- * typically is this databasename.table
1823
- */
1824
- assert(pos >= 3);
1825
- pos -= 2;
1826
- left = 0;
1827
- continue;
1828
- } else if ((sf->tokenvec[left].type == TYPE_EXPRESSION) &&
1829
- (sf->tokenvec[left+1].type == TYPE_DOT) &&
1830
- (sf->tokenvec[left+2].type == TYPE_BAREWORD)) {
1831
- /* select . `foo` --> select `foo` */
1832
- st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]);
1833
- pos -= 1;
1834
- left = 0;
1835
- continue;
1836
- }
1837
-
1838
-
1839
- /* no folding -- assume left-most token is
1840
- is good, now use the existing 2 tokens --
1841
- do not get another
1842
- */
1843
-
1844
- left += 1;
1845
-
1846
- } /* while(1) */
1847
-
1848
- /* if we have 4 or less tokens, and we had a comment token
1849
- * at the end, add it back
1850
- */
1851
-
1852
- if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) {
1853
- st_copy(&sf->tokenvec[left], &last_comment);
1854
- left += 1;
1855
- }
1856
-
1857
- /* sometimes we grab a 6th token to help
1858
- determine the type of token 5.
1859
- */
1860
- if (left > LIBINJECTION_SQLI_MAX_TOKENS) {
1861
- left = LIBINJECTION_SQLI_MAX_TOKENS;
1862
- }
1863
-
1864
- return (int)left;
1865
- }
1866
-
1867
- /* secondary api: detects SQLi in a string, GIVEN a context.
1868
- *
1869
- * A context can be:
1870
- * * CHAR_NULL (\0), process as is
1871
- * * CHAR_SINGLE ('), process pretending input started with a
1872
- * single quote.
1873
- * * CHAR_DOUBLE ("), process pretending input started with a
1874
- * double quote.
1875
- *
1876
- */
1877
- const char* libinjection_sqli_fingerprint(struct libinjection_sqli_state * sql_state, int flags)
1878
- {
1879
- int i;
1880
- int tlen = 0;
1881
-
1882
- libinjection_sqli_reset(sql_state, flags);
1883
-
1884
- tlen = libinjection_sqli_fold(sql_state);
1885
-
1886
- /* Check for magic PHP backquote comment
1887
- * If:
1888
- * * last token is of type "bareword"
1889
- * * And is quoted in a backtick
1890
- * * And isn't closed
1891
- * * And it's empty?
1892
- * Then convert it to comment
1893
- */
1894
- if (tlen > 2 &&
1895
- sql_state->tokenvec[tlen-1].type == TYPE_BAREWORD &&
1896
- sql_state->tokenvec[tlen-1].str_open == CHAR_TICK &&
1897
- sql_state->tokenvec[tlen-1].len == 0 &&
1898
- sql_state->tokenvec[tlen-1].str_close == CHAR_NULL) {
1899
- sql_state->tokenvec[tlen-1].type = TYPE_COMMENT;
1900
- }
1901
-
1902
- for (i = 0; i < tlen; ++i) {
1903
- sql_state->fingerprint[i] = sql_state->tokenvec[i].type;
1904
- }
1905
-
1906
- /*
1907
- * make the fingerprint pattern a c-string (null delimited)
1908
- */
1909
- sql_state->fingerprint[tlen] = CHAR_NULL;
1910
-
1911
- /*
1912
- * check for 'X' in pattern, and then
1913
- * clear out all tokens
1914
- *
1915
- * this means parsing could not be done
1916
- * accurately due to pgsql's double comments
1917
- * or other syntax that isn't consistent.
1918
- * Should be very rare false positive
1919
- */
1920
- if (strchr(sql_state->fingerprint, TYPE_EVIL)) {
1921
- /* needed for SWIG */
1922
- memset((void*)sql_state->fingerprint, 0, LIBINJECTION_SQLI_MAX_TOKENS + 1);
1923
- memset((void*)sql_state->tokenvec[0].val, 0, LIBINJECTION_SQLI_TOKEN_SIZE);
1924
-
1925
- sql_state->fingerprint[0] = TYPE_EVIL;
1926
-
1927
- sql_state->tokenvec[0].type = TYPE_EVIL;
1928
- sql_state->tokenvec[0].val[0] = TYPE_EVIL;
1929
- sql_state->tokenvec[1].type = CHAR_NULL;
1930
- }
1931
-
1932
-
1933
- return sql_state->fingerprint;
1934
- }
1935
-
1936
- int libinjection_sqli_check_fingerprint(struct libinjection_sqli_state* sql_state)
1937
- {
1938
- return libinjection_sqli_blacklist(sql_state) &&
1939
- libinjection_sqli_not_whitelist(sql_state);
1940
- }
1941
-
1942
- char libinjection_sqli_lookup_word(struct libinjection_sqli_state *sql_state, int lookup_type,
1943
- const char* str, size_t len)
1944
- {
1945
- if (lookup_type == LOOKUP_FINGERPRINT) {
1946
- return libinjection_sqli_check_fingerprint(sql_state) ? 'X' : '\0';
1947
- } else {
1948
- return bsearch_keyword_type(str, len, sql_keywords, sql_keywords_sz);
1949
- }
1950
- }
1951
-
1952
- int libinjection_sqli_blacklist(struct libinjection_sqli_state* sql_state)
1953
- {
1954
- /*
1955
- * use minimum of 8 bytes to make sure gcc -fstack-protector
1956
- * works correctly
1957
- */
1958
- char fp2[8];
1959
- char ch;
1960
- size_t i;
1961
- size_t len = strlen(sql_state->fingerprint);
1962
- int patmatch;
1963
-
1964
- if (len < 1) {
1965
- sql_state->reason = __LINE__;
1966
- return FALSE;
1967
- }
1968
-
1969
- /*
1970
- to keep everything compatible, convert the
1971
- v0 fingerprint pattern to v1
1972
- v0: up to 5 chars, mixed case
1973
- v1: 1 char is '0', up to 5 more chars, upper case
1974
- */
1975
-
1976
- fp2[0] = '0';
1977
- for (i = 0; i < len; ++i) {
1978
- ch = sql_state->fingerprint[i];
1979
- if (ch >= 'a' && ch <= 'z') {
1980
- ch -= 0x20;
1981
- }
1982
- fp2[i+1] = ch;
1983
- }
1984
- fp2[i+1] = '\0';
1985
-
1986
- patmatch = is_keyword(fp2, len + 1) == TYPE_FINGERPRINT;
1987
-
1988
- /*
1989
- * No match.
1990
- *
1991
- * Set sql_state->reason to current line number
1992
- * only for debugging purposes.
1993
- */
1994
- if (!patmatch) {
1995
- sql_state->reason = __LINE__;
1996
- return FALSE;
1997
- }
1998
-
1999
- return TRUE;
2000
- }
2001
-
2002
- /*
2003
- * return TRUE if sqli, false is benign
2004
- */
2005
- int libinjection_sqli_not_whitelist(struct libinjection_sqli_state* sql_state)
2006
- {
2007
- /*
2008
- * We assume we got a SQLi match
2009
- * This next part just helps reduce false positives.
2010
- *
2011
- */
2012
- char ch;
2013
- size_t tlen = strlen(sql_state->fingerprint);
2014
-
2015
- if (tlen > 1 && sql_state->fingerprint[tlen-1] == TYPE_COMMENT) {
2016
- /*
2017
- * if ending comment is contains 'sp_password' then it's sqli!
2018
- * MS Audit log apparently ignores anything with
2019
- * 'sp_password' in it. Unable to find primary refernece to
2020
- * this "feature" of SQL Server but seems to be known sqli
2021
- * technique
2022
- */
2023
- if (my_memmem(sql_state->s, sql_state->slen,
2024
- "sp_password", strlen("sp_password"))) {
2025
- sql_state->reason = __LINE__;
2026
- return TRUE;
2027
- }
2028
- }
2029
-
2030
- switch (tlen) {
2031
- case 2:{
2032
- /*
2033
- * case 2 are "very small SQLi" which make them
2034
- * hard to tell from normal input...
2035
- */
2036
-
2037
- if (sql_state->fingerprint[1] == TYPE_UNION) {
2038
- if (sql_state->stats_tokens == 2) {
2039
- /* not sure why but 1U comes up in Sqli attack
2040
- * likely part of parameter splitting/etc.
2041
- * lots of reasons why "1 union" might be normal
2042
- * input, so beep only if other SQLi things are present
2043
- */
2044
- /* it really is a number and 'union'
2045
- * other wise it has folding or comments
2046
- */
2047
- sql_state->reason = __LINE__;
2048
- return FALSE;
2049
- } else {
2050
- sql_state->reason = __LINE__;
2051
- return TRUE;
2052
- }
2053
- }
2054
- /*
2055
- * if 'comment' is '#' ignore.. too many FP
2056
- */
2057
- if (sql_state->tokenvec[1].val[0] == '#') {
2058
- sql_state->reason = __LINE__;
2059
- return FALSE;
2060
- }
2061
-
2062
- /*
2063
- * for fingerprint like 'nc', only comments of /x are treated
2064
- * as SQL... ending comments of "--" and "#" are not sqli
2065
- */
2066
- if (sql_state->tokenvec[0].type == TYPE_BAREWORD &&
2067
- sql_state->tokenvec[1].type == TYPE_COMMENT &&
2068
- sql_state->tokenvec[1].val[0] != '/') {
2069
- sql_state->reason = __LINE__;
2070
- return FALSE;
2071
- }
2072
-
2073
- /*
2074
- * if '1c' ends with '/x' then it's sqli
2075
- */
2076
- if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2077
- sql_state->tokenvec[1].type == TYPE_COMMENT &&
2078
- sql_state->tokenvec[1].val[0] == '/') {
2079
- return TRUE;
2080
- }
2081
-
2082
- /**
2083
- * there are some odd base64-looking query string values
2084
- * 1234-ABCDEFEhfhihwuefi--
2085
- * which evaluate to "1c"... these are not SQLi
2086
- * but 1234-- probably is.
2087
- * Make sure the "1" in "1c" is actually a true decimal number
2088
- *
2089
- * Need to check -original- string since the folding step
2090
- * may have merged tokens, e.g. "1+FOO" is folded into "1"
2091
- *
2092
- * Note: evasion: 1*1--
2093
- */
2094
- if (sql_state->tokenvec[0].type == TYPE_NUMBER &&
2095
- sql_state->tokenvec[1].type == TYPE_COMMENT) {
2096
- if (sql_state->stats_tokens > 2) {
2097
- /* we have some folding going on, highly likely sqli */
2098
- sql_state->reason = __LINE__;
2099
- return TRUE;
2100
- }
2101
- /*
2102
- * we check that next character after the number is either whitespace,
2103
- * or '/' or a '-' ==> sqli.
2104
- */
2105
- ch = sql_state->s[sql_state->tokenvec[0].len];
2106
- if ( ch <= 32 ) {
2107
- /* next char was whitespace,e.g. "1234 --"
2108
- * this isn't exactly correct.. ideally we should skip over all whitespace
2109
- * but this seems to be ok for now
2110
- */
2111
- return TRUE;
2112
- }
2113
- if (ch == '/' && sql_state->s[sql_state->tokenvec[0].len + 1] == '*') {
2114
- return TRUE;
2115
- }
2116
- if (ch == '-' && sql_state->s[sql_state->tokenvec[0].len + 1] == '-') {
2117
- return TRUE;
2118
- }
2119
-
2120
- sql_state->reason = __LINE__;
2121
- return FALSE;
2122
- }
2123
-
2124
- /*
2125
- * detect obvious sqli scans.. many people put '--' in plain text
2126
- * so only detect if input ends with '--', e.g. 1-- but not 1-- foo
2127
- */
2128
- if ((sql_state->tokenvec[1].len > 2)
2129
- && sql_state->tokenvec[1].val[0] == '-') {
2130
- sql_state->reason = __LINE__;
2131
- return FALSE;
2132
- }
2133
-
2134
- break;
2135
- } /* case 2 */
2136
- case 3:{
2137
- /*
2138
- * ...foo' + 'bar...
2139
- * no opening quote, no closing quote
2140
- * and each string has data
2141
- */
2142
-
2143
- if (streq(sql_state->fingerprint, "sos")
2144
- || streq(sql_state->fingerprint, "s&s")) {
2145
-
2146
- if ((sql_state->tokenvec[0].str_open == CHAR_NULL)
2147
- && (sql_state->tokenvec[2].str_close == CHAR_NULL)
2148
- && (sql_state->tokenvec[0].str_close == sql_state->tokenvec[2].str_open)) {
2149
- /*
2150
- * if ....foo" + "bar....
2151
- */
2152
- sql_state->reason = __LINE__;
2153
- return TRUE;
2154
- }
2155
- if (sql_state->stats_tokens == 3) {
2156
- sql_state->reason = __LINE__;
2157
- return FALSE;
2158
- }
2159
-
2160
- /*
2161
- * not sqli
2162
- */
2163
- sql_state->reason = __LINE__;
2164
- return FALSE;
2165
- } else if (streq(sql_state->fingerprint, "s&n") ||
2166
- streq(sql_state->fingerprint, "n&1") ||
2167
- streq(sql_state->fingerprint, "1&1") ||
2168
- streq(sql_state->fingerprint, "1&v") ||
2169
- streq(sql_state->fingerprint, "1&s")) {
2170
- /* 'sexy and 17' not sqli
2171
- * 'sexy and 17<18' sqli
2172
- */
2173
- if (sql_state->stats_tokens == 3) {
2174
- sql_state->reason = __LINE__;
2175
- return FALSE;
2176
- }
2177
- } else if (sql_state->tokenvec[1].type == TYPE_KEYWORD) {
2178
- if ((sql_state->tokenvec[1].len < 5) ||
2179
- cstrcasecmp("INTO", sql_state->tokenvec[1].val, 4)) {
2180
- /* if it's not "INTO OUTFILE", or "INTO DUMPFILE" (MySQL)
2181
- * then treat as safe
2182
- */
2183
- sql_state->reason = __LINE__;
2184
- return FALSE;
2185
- }
2186
- }
2187
- break;
2188
- } /* case 3 */
2189
- case 4:
2190
- case 5: {
2191
- /* nothing right now */
2192
- break;
2193
- } /* case 5 */
2194
- } /* end switch */
2195
-
2196
- return TRUE;
2197
- }
2198
-
2199
- /** Main API, detects SQLi in an input.
2200
- *
2201
- *
2202
- */
2203
- static int reparse_as_mysql(struct libinjection_sqli_state * sql_state)
2204
- {
2205
- return sql_state->stats_comment_ddx ||
2206
- sql_state->stats_comment_hash;
2207
- }
2208
-
2209
- /*
2210
- * This function is mostly use with SWIG
2211
- */
2212
- struct libinjection_sqli_token*
2213
- libinjection_sqli_get_token(struct libinjection_sqli_state * sql_state, int i)
2214
- {
2215
- if (i < 0 || i > (int)LIBINJECTION_SQLI_MAX_TOKENS) {
2216
- return NULL;
2217
- }
2218
- return &(sql_state->tokenvec[i]);
2219
- }
2220
-
2221
- int libinjection_is_sqli(struct libinjection_sqli_state * sql_state)
2222
- {
2223
- const char *s = sql_state->s;
2224
- size_t slen = sql_state->slen;
2225
-
2226
- /*
2227
- * no input? not sqli
2228
- */
2229
- if (slen == 0) {
2230
- return FALSE;
2231
- }
2232
-
2233
- /*
2234
- * test input "as-is"
2235
- */
2236
- libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_ANSI);
2237
- if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2238
- sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2239
- return TRUE;
2240
- } else if (reparse_as_mysql(sql_state)) {
2241
- libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_NONE | FLAG_SQL_MYSQL);
2242
- if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2243
- sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2244
- return TRUE;
2245
- }
2246
- }
2247
-
2248
- /*
2249
- * if input has a single_quote, then
2250
- * test as if input was actually '
2251
- * example: if input if "1' = 1", then pretend it's
2252
- * "'1' = 1"
2253
- * Porting Notes: example the same as doing
2254
- * is_string_sqli(sql_state, "'" + s, slen+1, NULL, fn, arg)
2255
- *
2256
- */
2257
- if (memchr(s, CHAR_SINGLE, slen)) {
2258
- libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_ANSI);
2259
- if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2260
- sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2261
- return TRUE;
2262
- } else if (reparse_as_mysql(sql_state)) {
2263
- libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_SINGLE | FLAG_SQL_MYSQL);
2264
- if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2265
- sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2266
- return TRUE;
2267
- }
2268
- }
2269
- }
2270
-
2271
- /*
2272
- * same as above but with a double-quote "
2273
- */
2274
- if (memchr(s, CHAR_DOUBLE, slen)) {
2275
- libinjection_sqli_fingerprint(sql_state, FLAG_QUOTE_DOUBLE | FLAG_SQL_MYSQL);
2276
- if (sql_state->lookup(sql_state, LOOKUP_FINGERPRINT,
2277
- sql_state->fingerprint, strlen(sql_state->fingerprint))) {
2278
- return TRUE;
2279
- }
2280
- }
2281
-
2282
- /*
2283
- * Hurray, input is not SQLi
2284
- */
2285
- return FALSE;
2286
- }
2287
-
2288
- int libinjection_sqli(const char* input, size_t slen, char fingerprint[])
2289
- {
2290
- int issqli;
2291
- struct libinjection_sqli_state state;
2292
-
2293
- libinjection_sqli_init(&state, input, slen, 0);
2294
- issqli = libinjection_is_sqli(&state);
2295
- if (issqli) {
2296
- strcpy(fingerprint, state.fingerprint);
2297
- } else {
2298
- fingerprint[0] = '\0';
2299
- }
2300
- return issqli;
2301
- }